Mercurial > repos > devteam > ncbi_blast_plus
diff tools/ncbi_blast_plus/ncbi_makeblastdb.xml @ 22:6f386c5dc4fb draft
v0.2.01 add -max_hsps, -use_sw_tback; lists args; internal updates
author | peterjc |
---|---|
date | Mon, 18 Sep 2017 06:21:27 -0400 |
parents | 3034ce97dd33 |
children | 31e517610e1f |
line wrap: on
line diff
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Wed Apr 19 05:27:19 2017 -0400 +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Mon Sep 18 06:21:27 2017 -0400 @@ -5,23 +5,24 @@ <import>ncbi_macros.xml</import> </macros> <expand macro="preamble" /> - <command interpreter="python">check_no_duplicates.py + <command detect_errors="aggressive" strict="true"> +python $__tool_directory__/check_no_duplicates.py ##First check for duplicates (since BLAST+ 2.2.28 fails to do so) ##and abort (via the ampersand ampersand trick) if any are found. -#for i in $input_file#"${i}" #end for# +#for i in $input_file#'${i}' #end for# && -makeblastdb -out "${os.path.join($outfile.files_path,'blastdb')}" +makeblastdb -out '${os.path.join($outfile.files_path, "blastdb")}' $parse_seqids $hash_index ## Single call to -in with multiple filenames space separated with outer quotes ## (presumably any filenames with spaces would be a problem). Note this gives ## some extra spaces, e.g. -in "file1 file2 file3 " but BLAST seems happy: --in "#for i in $input_file#${i} #end for#" +-in '#for i in $input_file#${i} #end for#' #if $title: --title "$title" +-title '$title' #else: ##Would default to being based on the cryptic Galaxy filenames, which is unhelpful --title "BLAST Database" +-title 'BLAST Database' #end if -dbtype $dbtype ## -------------------------------------------------------------------- @@ -31,7 +32,7 @@ ## See Trello issue https://trello.com/c/lp5YmA1O #if ' '.join( map(str, $mask_data_file) ) != 'None': #for i in $mask_data_file: --mask_data "${i}" +-mask_data '${i}' #end for #end if ## -------------------------------------------------------------------- @@ -48,7 +49,7 @@ > "$outfile" </command> <inputs> - <param name="dbtype" type="select" display="radio" label="Molecule type of input"> + <param argument="-dbtype" type="select" display="radio" label="Molecule type of input"> <option value="prot">protein</option> <option value="nucl">nucleotide</option> </param> @@ -56,13 +57,13 @@ NOTE Double check the new database would be self contained first --> <!-- Note this is a mandatory parameter - default should be most recent FASTA file --> - <param name="input_file" type="data" multiple="true" optional="false" format="fasta" label="Input FASTA files(s)" help="One or more FASTA files" /> - <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" /> - <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" /> - <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." /> + <param name="input_file" argument="-in" type="data" multiple="true" optional="false" format="fasta" label="Input FASTA files(s)" help="One or more FASTA files" /> + <param argument="-title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" /> + <param argument="-parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="false" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" /> + <param argument="-hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." /> <!-- SEQUENCE MASKING OPTIONS --> <!-- Note this is an optional parameter - default should be NO files --> - <param name="mask_data_file" type="data" multiple="true" optional="true" value="" format="maskinfo-asn1,maskinfo-asn1-binary" label="Optional ASN.1 file(s) containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" /> + <param name="mask_data_file" argument="-mask_data" type="data" multiple="true" optional="true" value="" format="maskinfo-asn1,maskinfo-asn1-binary" label="Optional ASN.1 file(s) containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" /> <!-- TODO - Option to create GI indexed masking data? via -gi_mask and -gi_mask_name? --> <!-- TAXONOMY OPTIONS --> <conditional name="tax"> @@ -76,11 +77,11 @@ <when value=""> </when> <when value="id"> - <param name="taxid" type="integer" value="" label="NCBI taxonomy ID" help="Integer >=0, e.g. 9606 for Homo sapiens" min="0" /> + <param argument="-taxid" type="integer" min="0" value="" label="NCBI taxonomy ID" help="Integer >=0, e.g. 9606 for Homo sapiens" /> </when> <!-- TODO: File format? <when value="map"> - <param name="taxmap" type="data" format="txt" label="Seq ID : Tax ID mapping file" help="Format: SequenceId TaxonomyId" /> + <param name="taxmap" argument="-taxid_map" type="data" format="txt" label="Seq ID : Tax ID mapping file" help="Format: SequenceId TaxonomyId" /> </when> --> </conditional> @@ -107,7 +108,7 @@ <param name="title" value="Just 4 human proteins" /> <param name="parse_seqids" value="" /> <param name="hash_index" value="true" /> - <output name="out_file" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp"> + <output name="outfile" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp"> <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" /> <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" /> <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" /> @@ -126,7 +127,7 @@ <param name="hash_index" value="true" /> <param name="taxselect" value="id" /> <param name="taxid" value="9606" /> - <output name="out_file" compare="contains" file="four_human_proteins_taxid.fasta.log.txt" ftype="blastdbp"> + <output name="outfile" compare="contains" file="four_human_proteins_taxid.fasta.log.txt" ftype="blastdbp"> <extra_files type="file" value="four_human_proteins_taxid.fasta.phr" name="blastdb.phr" /> <extra_files type="file" value="four_human_proteins_taxid.fasta.pin" name="blastdb.pin" lines_diff="2" /> <extra_files type="file" value="four_human_proteins_taxid.fasta.psq" name="blastdb.psq" /> @@ -144,7 +145,7 @@ <param name="parse_seqids" value="" /> <param name="hash_index" value="true" /> <param name="mask_data_file" value="segmasker_four_human.maskinfo-asn1" ftype="maskinfo-asn1" /> - <output name="out_file" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp"> + <output name="outfile" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp"> <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" /> <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" /> <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" /> @@ -163,7 +164,7 @@ <param name="hash_index" value="true" /> <param name="taxselect" value="id" /> <param name="taxid" value="9606" /> - <output name="out_file" compare="contains" file="three_human_mRNA.fasta.log.txt" ftype="blastdbn"> + <output name="outfile" compare="contains" file="three_human_mRNA.fasta.log.txt" ftype="blastdbn"> <extra_files type="file" value="three_human_mRNA.fasta.nhr" name="blastdb.nhr" /> <extra_files type="file" value="three_human_mRNA.fasta.nin" name="blastdb.nin" lines_diff="2" /> <extra_files type="file" value="three_human_mRNA.fasta.nsq" name="blastdb.nsq" />