Mercurial > repos > devteam > ncbi_blast_plus
changeset 30:acf4e47b734c draft
"2.10.1+galaxy1 with taxid improvements"
author | peterjc |
---|---|
date | Tue, 29 Mar 2022 14:54:02 +0000 |
parents | 5edc472ec434 |
children | 0e3cf9594bb7 |
files | tools/ncbi_blast_plus/README.rst tools/ncbi_blast_plus/get_species_taxids.xml tools/ncbi_blast_plus/ncbi_macros.xml |
diffstat | 3 files changed, 144 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/ncbi_blast_plus/README.rst Thu Sep 10 13:23:34 2020 +0000 +++ b/tools/ncbi_blast_plus/README.rst Tue Mar 29 14:54:02 2022 +0000 @@ -136,6 +136,15 @@ ============== =============================================================== Version Changes -------------- --------------------------------------------------------------- +2.10.1+galaxy1 - Add tool `NCBI get species taxids` that wraps NCBI's + `get_species_taxids.sh` script + (https://www.ncbi.nlm.nih.gov/books/NBK546209/). + It allows to determine all species taxids below a certain + Taxon. + - Add the possibility to restrict BLAST searches taxonomically + by species taxids given in a file. + - Properly quote cached databases + - Make locally installed database selector non-optional. 2.10.1+galaxy0 - Updated for NCBI BLAST+ 2.10.1 release. - Supports locally installed v4 or v5 format BLAST databases (listed in the ``blastdb*.loc`` files). @@ -248,6 +257,15 @@ - Supports setting a taxonomy ID in ``makeblastdb`` wrapper. - Subtle changes like new conditional settings will require some old workflows be updated to cope. +v0.0.22 - More use of macros to simplify the wrappers. + - Set number of threads via ``$GALAXY_SLOTS`` environment variable. + - More descriptive default output names. + - Tests require updated BLAST DB definitions (``blast_datatypes`` + v0.0.18). + - Pre-check for duplicate identifiers in ``makeblastdb`` wrapper. + - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27. + - Now depends on ``package_blast_plus_2_2_28`` in ToolShed. + - Extended tabular output includes 'salltitles' as column 25. v0.0.21 - Use macros to simplify the XML wrappers (by John Chilton). - Added wrapper for dustmasker. - Enabled masking for makeblastdb (Nicola Soranzo). @@ -288,15 +306,6 @@ e-values v0.0.11 - Final revision as part of the Galaxy main repository, and the first release via the Tool Shed -v0.0.22 - More use of macros to simplify the wrappers. - - Set number of threads via ``$GALAXY_SLOTS`` environment variable. - - More descriptive default output names. - - Tests require updated BLAST DB definitions (``blast_datatypes`` - v0.0.18). - - Pre-check for duplicate identifiers in ``makeblastdb`` wrapper. - - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27. - - Now depends on ``package_blast_plus_2_2_28`` in ToolShed. - - Extended tabular output includes 'salltitles' as column 25. ======= ======================================================================
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/get_species_taxids.xml Tue Mar 29 14:54:02 2022 +0000 @@ -0,0 +1,106 @@ +<tool id="get_species_taxids" name="NCBI get species taxids" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description></description> + <macros> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="preamble"/> + <command detect_errors="aggressive"><![CDATA[ +#if $type_cond.type_sel == 'names' + #for name in $type_cond.names.split(',') + taxid=\$(get_species_taxids.sh -n '$name' | grep Taxid | sed 's/Taxid://') && + if [ -z \$taxid ]; then + >&2 echo "could not find taxid for $name" && exit 1; + else + echo " $name -> \$taxid"; + fi && + get_species_taxids.sh -t \$taxid >> species_ids.txt && + #end for +#else + #for taxid in $type_cond.ids.split(',') + get_species_taxids.sh -t $taxid >> species_ids.txt && + #end for +#end if +sort -n -u species_ids.txt > '$output' + ]]></command> + <inputs> + <conditional name="type_cond"> + <param name="type_sel" type="select" label="Get taxids by"> + <option value="names">Taxon names</option> + <option value="ids">Taxon ids</option> + </param> + <when value="names"> + <param name="names" type="text" label="Taxon names" help="comma separated"> + <validator type="regex" message="Enter a comma separated list of taxon names">[a-zA-Z ,]+$</validator> + </param> + </when> + <when value="ids"> + <param name="ids" type="text" label="Taxon ids" help="comma separated"> + <validator type="regex" message="Enter a comma separated list of taxids">[0-9,]+$</validator> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data format="txt" name="output"/> + </outputs> + <tests> + <test> + <param name="type_cond|type_sel" value="names"/> + <param name="type_cond|names" value="Enterobacterales"/> + <output name="output" ftype="txt"> + <assert_contents> + <has_line line="9"/> + <has_line line="2791989"/> + </assert_contents> + </output> + </test> + <test> + <param name="type_cond|type_sel" value="ids"/> + <param name="type_cond|ids" value="91347"/> + <output name="output" ftype="txt"> + <assert_contents> + <has_line line="9"/> + <has_line line="2791989"/> + </assert_contents> + </output> + </test> + <test> + <param name="type_cond|type_sel" value="names"/> + <param name="type_cond|names" value="Enterobacterales,Hominidae"/> + <output name="output" ftype="txt"> + <assert_contents> + <has_line line="9"/> + <has_line line="9606"/> + <has_line line="2791989"/> + </assert_contents> + </output> + </test> + <test> + <param name="type_cond|type_sel" value="ids"/> + <param name="type_cond|ids" value="91347,9604"/> + <output name="output" ftype="txt"> + <assert_contents> + <has_line line="9"/> + <has_line line="9606"/> + <has_line line="2791989"/> + </assert_contents> + </output> + </test> + </tests> + <help> + +**What it does** + +Returns a list of species taxids for a taxon. It relies on the get_species_taxids.sh script of the BLAST+ package https://www.ncbi.nlm.nih.gov/books/NBK546209/ + +------- + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +@REFERENCES@ + </help> + <expand macro="blast_citations"/> +</tool>
--- a/tools/ncbi_blast_plus/ncbi_macros.xml Thu Sep 10 13:23:34 2020 +0000 +++ b/tools/ncbi_blast_plus/ncbi_macros.xml Tue Mar 29 14:54:02 2022 +0000 @@ -1,6 +1,6 @@ <macros> <token name="@TOOL_VERSION@">2.10.1</token> - <token name="@VERSION_SUFFIX@">0</token> + <token name="@VERSION_SUFFIX@">1</token> <token name="@PROFILE@">16.10</token> <xml name="parallelism"> <!-- If job splitting is enabled, break up the query file into parts --> @@ -353,7 +353,7 @@ <option value="file">FASTA file from your history (see warning note below)</option> </param> <when value="db"> - <param name="database" type="select" multiple="true" label="Nucleotide BLAST database"> + <param name="database" type="select" multiple="true" optional="false" label="Nucleotide BLAST database"> <options from_data_table="blastdb" /> </param> <param name="histdb" type="hidden" value="" /> @@ -381,7 +381,7 @@ <option value="file">FASTA file from your history (see warning note below)</option> </param> <when value="db"> - <param name="database" type="select" multiple="true" label="Protein BLAST database"> + <param name="database" type="select" multiple="true" optional="false" label="Protein BLAST database"> <options from_data_table="blastdb_p" /> </param> <param name="histdb" type="hidden" value="" /> @@ -452,7 +452,7 @@ <option value="histdb">BLAST database from your history</option> </param> <when value="db"> - <param name="database" argument="-db" type="select" multiple="true" label="Protein BLAST database"> + <param name="database" argument="-db" type="select" multiple="true" optional="false" label="Protein BLAST database"> <options from_data_table="blastdb_p" /> </param> <param name="histdb" type="hidden" value="" /> @@ -558,6 +558,8 @@ <option value="gilist">GI identifiers</option> <option value="negative_gilist">Negative GI identifiers</option> <option value="seqidlist">Sequence identifiers (SeqId's)</option> + <option value="taxidlist">Taxonomy identifiers (TaxId's)</option> + <option value="negative_taxidlist">Negative taxonomy identifiers (TaxId's)</option> </param> <when value="none" /> <when value="gilist"> @@ -572,8 +574,17 @@ <param argument="-seqidlist" type="data" format="txt" label=" Restrict search of database to list of SeqId's" help="This option is only available for database searches."/> </when> + <when value="taxidlist"> + <param argument="-taxidlist" type="data" format="txt" label="Restrict search of database to list of TaxId's" + help="This option is only available for database searches."/> + </when> + <when value="negative_taxidlist"> + <param argument="-negative_taxidlist" type="data" format="txt" label="Restrict search of database to list of TaxId's" + help="This option is only available for database searches."/> + </when> </conditional> </xml> + <!--Tokens--> <token name="@ADV_MATRIX_GAPCOSTS@"><![CDATA[ #if str($adv_opts.matrix_gapcosts.matrix): @@ -595,6 +606,10 @@ -gilist '{$adv_opts.adv_optional_id_files_opts.gilist}' #elif $adv_opts.adv_optional_id_files_opts.adv_optional_id_files_opts_selector == 'seqidlist': -seqidlist '${adv_opts.adv_optional_id_files_opts.seqidlist}' +#elif $adv_opts.adv_optional_id_files_opts.adv_optional_id_files_opts_selector == 'taxidlist': + -taxidlist '${adv_opts.adv_optional_id_files_opts.taxidlist}' +#elif $adv_opts.adv_optional_id_files_opts.adv_optional_id_files_opts_selector == 'negative_taxidlist': + -negative_taxidlist '${adv_opts.adv_optional_id_files_opts.negative_taxidlist}' #end if ]]></token> @@ -621,7 +636,7 @@ <!-- Implement -db ... / -subject ... command line options --> <token name="@BLAST_DB_SUBJECT@"><![CDATA[ #if $db_opts.db_opts_selector == "db": - -db '${" ".join(str($db_opts.database.fields.path).split(","))}' + -db '"${'" "'.join(str($db_opts.database.fields.path).split(","))}"' #elif $db_opts.db_opts_selector == "histdb": -db '${os.path.join($db_opts.histdb.extra_files_path, "blastdb")}' #else: