Mercurial > repos > devteam > ncbi_blast_plus
diff tools/ncbi_blast_plus/ncbi_makeblastdb.xml @ 23:31e517610e1f draft
v0.3.0 Updated for NCBI BLAST+ 2.7.1
author | peterjc |
---|---|
date | Sat, 30 Jun 2018 17:22:46 -0400 |
parents | 6f386c5dc4fb |
children | e25d3acf6e68 |
line wrap: on
line diff
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Mon Sep 18 06:21:27 2017 -0400 +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Sat Jun 30 17:22:46 2018 -0400 @@ -5,21 +5,30 @@ <import>ncbi_macros.xml</import> </macros> <expand macro="preamble" /> - <command detect_errors="aggressive" strict="true"> + <command detect_errors="aggressive" strict="true"><![CDATA[ python $__tool_directory__/check_no_duplicates.py ##First check for duplicates (since BLAST+ 2.2.28 fails to do so) ##and abort (via the ampersand ampersand trick) if any are found. #for i in $input_file#'${i}' #end for# -&& -makeblastdb -out '${os.path.join($outfile.files_path, "blastdb")}' +&& +##makeblastdb does not like input redirects of the sort +##makeblastdb -in <(gunzip -c gzipped_fasta_file) +##therefore we're cramming everything +##into a single cat command below +cat +#for i in $input_file: + #if $i.is_of_type('fasta.gz'): + <(gunzip -c ${i}) + #else: + ${i} + #end if +#end for +| makeblastdb -out '${os.path.join($outfile.files_path, "blastdb")}' $parse_seqids $hash_index -## Single call to -in with multiple filenames space separated with outer quotes -## (presumably any filenames with spaces would be a problem). Note this gives -## some extra spaces, e.g. -in "file1 file2 file3 " but BLAST seems happy: --in '#for i in $input_file#${i} #end for#' +-in - #if $title: --title '$title' +-title '${title}' #else: ##Would default to being based on the cryptic Galaxy filenames, which is unhelpful -title 'BLAST Database' @@ -46,8 +55,8 @@ #end if ## -------------------------------------------------------------------- ## Capture the stdout log information to the primary file (plain text): -> "$outfile" - </command> +> '$outfile' + ]]></command> <inputs> <param argument="-dbtype" type="select" display="radio" label="Molecule type of input"> <option value="prot">protein</option> @@ -57,7 +66,7 @@ NOTE Double check the new database would be self contained first --> <!-- Note this is a mandatory parameter - default should be most recent FASTA file --> - <param name="input_file" argument="-in" type="data" multiple="true" optional="false" format="fasta" label="Input FASTA files(s)" help="One or more FASTA files" /> + <param name="input_file" argument="-in" type="data" multiple="true" optional="false" format="fasta,fasta.gz" label="Input FASTA files(s)" help="One or more FASTA files" /> <param argument="-title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" /> <param argument="-parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="false" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" /> <param argument="-hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." /> @@ -110,7 +119,7 @@ <param name="hash_index" value="true" /> <output name="outfile" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp"> <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" /> - <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" /> + <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" compare="sim_size" delta="0" /> <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" /> <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" /> <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" /> @@ -129,7 +138,7 @@ <param name="taxid" value="9606" /> <output name="outfile" compare="contains" file="four_human_proteins_taxid.fasta.log.txt" ftype="blastdbp"> <extra_files type="file" value="four_human_proteins_taxid.fasta.phr" name="blastdb.phr" /> - <extra_files type="file" value="four_human_proteins_taxid.fasta.pin" name="blastdb.pin" lines_diff="2" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.pin" name="blastdb.pin" compare="sim_size" delta="0" /> <extra_files type="file" value="four_human_proteins_taxid.fasta.psq" name="blastdb.psq" /> <extra_files type="file" value="four_human_proteins_taxid.fasta.pog" name="blastdb.pog" /> <extra_files type="file" value="four_human_proteins_taxid.fasta.phd" name="blastdb.phd" /> @@ -147,7 +156,7 @@ <param name="mask_data_file" value="segmasker_four_human.maskinfo-asn1" ftype="maskinfo-asn1" /> <output name="outfile" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp"> <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" /> - <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" /> + <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" compare="sim_size" delta="0" /> <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" /> <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" /> <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" /> @@ -158,7 +167,7 @@ </test> <test> <param name="dbtype" value="nucl" /> - <param name="input_file" value="three_human_mRNA.fasta" ftype="fasta" /> + <param name="input_file" value="three_human_mRNA.fasta.gz" ftype="fasta.gz" /> <param name="title" value="Just 3 human mRNA sequences" /> <param name="parse_seqids" value="" /> <param name="hash_index" value="true" /> @@ -166,7 +175,7 @@ <param name="taxid" value="9606" /> <output name="outfile" compare="contains" file="three_human_mRNA.fasta.log.txt" ftype="blastdbn"> <extra_files type="file" value="three_human_mRNA.fasta.nhr" name="blastdb.nhr" /> - <extra_files type="file" value="three_human_mRNA.fasta.nin" name="blastdb.nin" lines_diff="2" /> + <extra_files type="file" value="three_human_mRNA.fasta.nin" name="blastdb.nin" compare="sim_size" delta="8" /> <extra_files type="file" value="three_human_mRNA.fasta.nsq" name="blastdb.nsq" /> <extra_files type="file" value="three_human_mRNA.fasta.nog" name="blastdb.nog" /> <extra_files type="file" value="three_human_mRNA.fasta.nhd" name="blastdb.nhd" /> @@ -184,15 +193,16 @@ This is a wrapper for the NCBI BLAST+ tool 'makeblastdb', which is the replacement for the 'formatdb' tool in the NCBI 'legacy' BLAST suite. +More information about makeblastdb can be found in the `BLAST Command Line Applications User Manual`_. + +.. _BLAST Command Line Applications User Manual: https://www.ncbi.nlm.nih.gov/books/NBK279690/ + + <!-- Applying masks to an existing BLAST database will not change the original database; a new database will be created. For this reason, it's best to apply all masks at once to minimize the number of unnecessary intermediate databases. --> -**Documentation** - -https://www.ncbi.nlm.nih.gov/books/NBK279690/ - **References** If you use this Galaxy tool in work leading to a scientific publication please