Mercurial > repos > bgruening > diamond
changeset 8:54f751e413f4 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diamond commit 75abf7d4b23ed7ae8abce80609d81b20bc882863"
author | iuc |
---|---|
date | Mon, 22 Mar 2021 13:21:23 +0000 (2021-03-22) |
parents | 62c9df8382c2 |
children | f921014aba5a |
files | diamond.xml diamond_makedb.xml diamond_view.xml macros.xml repository_dependencies.xml test-data/db-wtax.dmnd test-data/db.dmnd test-data/diamond_database.loc test-data/diamond_results.daa test-data/diamond_results.pairwise test-data/diamond_results.sam test-data/diamond_results.tabular test-data/diamond_results.wtax.tabular test-data/diamond_results.xml test-data/diamond_view_results.tabular test-data/names.dmp tool_data_table_conf.xml.test |
diffstat | 17 files changed, 195 insertions(+), 73 deletions(-) [+] |
line wrap: on
line diff
--- a/diamond.xml Tue Dec 03 17:40:05 2019 -0500 +++ b/diamond.xml Mon Mar 22 13:21:23 2021 +0000 @@ -6,7 +6,7 @@ <expand macro="requirements" /> <expand macro="stdio" /> <expand macro="version_command" /> - <command> + <command detect_errors="aggressive"> <![CDATA[ #if $ref_db_source.db_source == "history": @@ -19,27 +19,26 @@ diamond $method_cond.method_select + --quiet --threads "\${GALAXY_SLOTS:-12}" --db ./database --query '$query' #if $method_cond.method_select == "blastx" --query-gencode '$method_cond.query_gencode' --strand '$method_cond.query_strand' - --min-orf $method_cond.min_orf - #if $method_cond.frameshift_cond.frameshift_select == 'yes' - --frameshift $method_cond.frameshift_cond.frameshift - $method_cond.frameshift_cond.range_culling + --min-orf $method_cond.min_orf + #if $method_cond.frameshift_cond.frameshift_select == 'yes' + --frameshift $method_cond.frameshift_cond.frameshift + $method_cond.frameshift_cond.range_culling #end if + #else if $method_cond.method_select == "blastp" + $method_cond.no_self_hits #end if @OUTPUT_ARGS@ --compress '0' - #if $sensitivity == "1" - --sensitive - #else if $sensitivity == "2" - --more-sensitive - #end if + $sens_cond.sensitivity #if str($gapopen) != "": --gapopen '$gapopen' @@ -48,7 +47,7 @@ --gapextend '$gapextend' #end if --matrix '$matrix' - --comp-based-stats '$comp_based_stats' + --comp-based-stats '$method_cond.comp_based_stats' --masking '$masking' @HITFILTER_ARGS@ @@ -62,11 +61,10 @@ --id '$id' --query-cover '$query_cover' --subject-cover '$subject_cover' - --block-size '$block_size' + --block-size '$sens_cond.block_size' #if str($unal) == '1': --unal 1 --un '$unalqueries' #end if - $no_self_hits #if $tax_cond.tax_select == 'file': --taxonlist `cat '$tax_cond.taxonlistfile' | grep -v "^#" | grep -v "^$" | tr "\n" "," | sed 's/,$//'` #else if $tax_cond.tax_select == 'list': @@ -120,8 +118,22 @@ </when> <when value="no"/> </conditional> + + <param name="comp_based_stats" argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"> + <option value="0">Disable</option> + <option value="1" selected="True">Default mode (Hauser, 2016)</option> + </param> </when> <when value="blastp"> + <param name="no_self_hits" argument="--no-self-hits" type="boolean" truevalue="--no-self-hits" falsevalue="" checked="true" label="suppress reporting of identical self hits?" help=""/> + + <param name="comp_based_stats" argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"> + <option value="0">Disable</option> + <option value="1" selected="True">Default mode (Hauser, 2016)</option> + <option value="2">Compositional matrix adjust conditioned on sequence properties, simplified (Yu, 2005)</option> + <option value="3">Compositional matrix adjust conditioned on sequence properties (Yu, 2005)</option> + <option value="4">Compositional matrix adjust unconditionally (Yu, 2005)</option> + </param> </when> </conditional> <param argument="--query" type="data" format="fasta,fastq" label="Input query file in FASTA or FASTQ format" /> @@ -139,16 +151,45 @@ </param> </when> <when value="history"> - <param name="reference_database" type="data" format="dmnd" label="Select the reference database" /> + <param name="reference_database" argument="--db" type="data" format="dmnd" label="Select the reference database" /> </when> </conditional> - <expand macro="output_type_macro" /> - <param name="no_self_hits" argument="--no-self-hits" type="boolean" truevalue="--no-self-hits" falsevalue="" checked="true" label="suppress reporting of identical self hits?" help=""/> - <param name='sensitivity' type="select" label="Sensitivity Mode" help="Choose one of the sensitivity modes. The default mode is mainly designed for short read alignment, i.e. finding significant matches of >50 bits on 30-40aa fragments. The sensitive mode is a lot more sensitive than the default and generally recommended for aligning longer sequences. The more sensitive mode provides even more sensitivity. More sensitivity may increase computation time."> - <option value="0" selected="True">Default</option> - <option value="1">Sensitive</option> - <option value="2">More Sensitive</option> - </param> + <expand macro="output_type_macro"> + <!-- Taxonomy features are not supported for the DAA format (i.e. + can't be used in diamond view) --> + <option value="staxids">unique Subject Taxonomy ID(s), separated by a ';' (in numerical order)</option> + <option value="sskingdoms">Subject super kingdoms</option> + <option value="skingdoms">Subject kingdoms</option> + <option value="sphylums">Subject phylums</option> + </expand> + <conditional name="sens_cond"> + <param name='sensitivity' type="select" label="Sensitivity Mode" help="Choose one of the sensitivity modes. The default mode is mainly designed for short read alignment, i.e. finding significant matches of >50 bits on 30-40aa fragments. The sensitive mode is a lot more sensitive than the default and generally recommended for aligning longer sequences. The more sensitive mode provides even more sensitivity. More sensitivity may increase computation time."> + <option value="" selected="True">Default</option> + <option value="--mid-sensitive">Mid Sensitive (--mid-sensitive)</option> + <option value="--sensitive">Sensitive (--sensitive)</option> + <option value="--more-sensitive">More Sensitive (--more-sensitive)</option> + <option value="--very-sensitive">Very Sensitive (--very-sensitive)</option> + <option value="--ultra-sensitive">Ultra Sensitive (--ultra-sensitive)</option> + </param> + <when value=""> + <expand macro="block_size_low_sens"/> + </when> + <when value="--mid-sensitive"> + <expand macro="block_size_low_sens"/> + </when> + <when value="--sensitive"> + <expand macro="block_size_low_sens"/> + </when> + <when value="--more-sensitive"> + <expand macro="block_size_low_sens"/> + </when> + <when value="--very-sensitive"> + <expand macro="block_size_hi_sens"/> + </when> + <when value="--ultra-sensitive"> + <expand macro="block_size_hi_sens"/> + </when> + </conditional> <param argument="--matrix" type="select" label="Scoring matrix" help="In parentheses are the supported values for (gap open)/(gap extend). In brackets are default gap penalties"> <option value="BLOSUM45">BLOSUM45 ((10-13)/3; (12-16)/2; (16-19)/1) [14/2]</option> <option value="BLOSUM50">BLOSUM50 ((9-13)/3; (12-16)/2; (15-19)/1) [13/2]</option> @@ -161,7 +202,6 @@ </param> <param argument="--gapopen" type="integer" optional="True" value="" label="Gap open penalty" help="leave empty for default (see scoring matrix)" /> <param argument="--gapextend" type="integer" optional="True" value="" label="Gap extension penalty" help="leave empty for default (see scoring matrix)" /> - <param name="comp_based_stats" argument="--comp-based-stats" type="boolean" truevalue="1" falsevalue="0" checked="true" label="enable composition based statistics?" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"/> <param argument="--masking" type="boolean" truevalue="1" falsevalue="0" checked="true" label="enable masking of low complexity regions?" help="Masked residues appear in the output as X"/> <conditional name="tax_cond"> <param name="tax_select" type="select" label="Restrict search taxonomically?" help="Any taxonomic rank can be used, and only reference sequences matching one of the specified taxon ids will be searched against"> @@ -171,7 +211,10 @@ </param> <when value="no"/> <when value="list"> - <param name="taxonlist" argument="--taxonlist" type="text" value="" label="comma separated list of taxon ids" help="" /> + <param name="taxonlist" argument="--taxonlist" type="text" value="" label="comma separated list of taxon ids" help=""> + <validator type="regex" message="taxonlist needs to be a comma separated list of integers">[0-9,]*</validator> + <validator type="expression" message="taxon ids 0 and 1 are not allowed">not ("0" in value.split(",") or "1" in value.split(","))</validator> + </param> </when> <when value="file"> <param name="taxonlistfile" argument="--taxonlist" type="data" format="tabular" label="Keep alignments within the given percentage range of the top alignment score for a quer" help="" /> @@ -193,7 +236,6 @@ <param argument="--id" type="integer" value="0" label="Minimum identity percentage to report an alignment" help="" /> <param name="query_cover" argument="--query-cover" type="integer" value="0" label="Minimum query cover percentage to report an alignment" help="" /> <param name="subject_cover" argument="--subject-cover" type="integer" value="0" label="Minimum subject cover percentage to report an alignment" help="" /> - <param name="block_size" argument="--block-size" type="float" value="2" label="Block size in billions of sequence letters to be processed at a time" help="" /> <param argument="--unal" type="boolean" truevalue="1" falsevalue="0" checked="false" label="report unaligned queries" help=""/> </inputs> <outputs> @@ -210,13 +252,15 @@ <param name="query" value="protein.fasta" ftype="fasta"/> <conditional name="ref_db_source"> <param name="db_source" value="history"/> - <param name="reference_database" value="db.dmnd"/> + <param name="reference_database" value="db-wtax.dmnd"/> </conditional> <conditional name="output"> <param name="outfmt" value="6"/> - <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/> + <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,cigar,scovhsp,sskingdoms,skingdoms,sphylums"/> </conditional> - <param name="sensitivity" value="0"/> + <conditional name="sens_cond"> + <param name="sensitivity" value=""/> + </conditional> <param name="matrix" value="BLOSUM62"/> <param name="comp-based-stat" value="1"/> <param name="masking" value="1"/> @@ -230,7 +274,9 @@ </conditional> <param name="id" value="0"/> <param name="query_cover" value="0"/> - <param name="block_size" value="2"/> + <conditional name="sens_cond"> + <param name="block_size" value="2"/> + </conditional> <output name="blast_tabular" file="diamond_results.tabular"/> </test> <test> @@ -243,14 +289,16 @@ <param name="reference_database" value="db-wtax.dmnd"/> </conditional> <conditional name="tax_cond"> - <param name="tax_select" value="list"/> - <param name="taxonlist" value="2" /> + <param name="tax_select" value="list"/> + <param name="taxonlist" value="2" /> </conditional> <conditional name="output"> <param name="outfmt" value="6"/> <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/> </conditional> - <param name="sensitivity" value="0"/> + <conditional name="sens_cond"> + <param name="sensitivity" value=""/> + </conditional> <param name="matrix" value="BLOSUM62"/> <param name="comp-based-stat" value="1"/> <param name="masking" value="1"/> @@ -264,7 +312,9 @@ </conditional> <param name="id" value="0"/> <param name="query_cover" value="0"/> - <param name="block_size" value="2"/> + <conditional name="sens_cond"> + <param name="block_size" value="2"/> + </conditional> <output name="blast_tabular" file="diamond_results.wtax.tabular"/> </test> <test> @@ -282,7 +332,9 @@ <conditional name="output"> <param name="outfmt" value="0"/> </conditional> - <param name="sensitivity" value="0"/> + <conditional name="sens_cond"> + <param name="sensitivity" value=""/> + </conditional> <param name="matrix" value="BLOSUM62"/> <param name="comp-based-stat" value="1"/> <param name="masking" value="1"/> @@ -291,12 +343,14 @@ <param name="top" value="10" /> </conditional> <conditional name="filter_score"> - <param name="filter_score_select" value="score"/> - <param name="evalue" value="1" /> + <param name="filter_score_select" value="min-score"/> + <param name="min_score" value="1" /> </conditional> <param name="id" value="0"/> <param name="query_cover" value="0"/> - <param name="block_size" value="2"/> + <conditional name="sens_cond"> + <param name="block_size" value="2"/> + </conditional> <output name="blast_tabular" file="diamond_results.pairwise"/> </test> <test> @@ -313,6 +367,42 @@ </conditional> <output name="daa_output" file="diamond_results.daa" compare="sim_size" delta="10"/> </test> + <test> + <conditional name="method_cond"> + <param name="method_select" value="blastx" /> + <conditional name="frameshift_cond"> + <param name="frameshift_select" value="yes"/> + </conditional> + </conditional> + <param name="query" value="nucleotide.fasta" ftype="fasta"/> + <conditional name="ref_db_source"> + <param name="db_source" value="indexed"/> + <param name="index" value="testDb"/> + </conditional> + <conditional name="output"> + <param name="outfmt" value="0"/> + </conditional> + <conditional name="sens_cond"> + <param name="sensitivity" value=""/> + </conditional> + <param name="matrix" value="BLOSUM62"/> + <param name="comp-based-stat" value="1"/> + <param name="masking" value="1"/> + <conditional name="hit_filter"> + <param name="hit_filter_select" value="top"/> + <param name="top" value="10" /> + </conditional> + <conditional name="filter_score"> + <param name="filter_score_select" value="min-score"/> + <param name="min_score" value="1" /> + </conditional> + <param name="id" value="0"/> + <param name="query_cover" value="0"/> + <conditional name="sens_cond"> + <param name="block_size" value="2"/> + </conditional> + <output name="blast_tabular" file="diamond_results.pairwise"/> + </test> </tests> <help> <![CDATA[
--- a/diamond_makedb.xml Tue Dec 03 17:40:05 2019 -0500 +++ b/diamond_makedb.xml Mon Mar 22 13:21:23 2021 +0000 @@ -8,7 +8,7 @@ <expand macro="stdio" /> <expand macro="version_command" /> - <command> + <command detect_errors="aggressive"> <!-- DB has two files, *.dmnd and *.tx --> <![CDATA[ diamond makedb @@ -19,6 +19,7 @@ #if str($tax_cond.tax_select) == 'yes': --taxonmap '$tax_cond.taxonmap' --taxonnodes '$tax_cond.taxonnodes' + --taxonnames '$tax_cond.taxonnames' #end if ]]> </command> @@ -33,6 +34,7 @@ <when value="yes"> <param argument="--taxonmap" type="data" format="tabular" label="protein accession to taxid mapping file" help="" /> <param argument="--taxonnodes" type="data" format="tabular" label="taxonomy nodes.dmp from NCBI" help="" /> + <param argument="--taxonnames" type="data" format="tabular" label="taxonomy names.dmp from NCBI" help="" /> </when> <when value="no"/> </conditional> @@ -53,6 +55,7 @@ <param name="tax_select" value="yes"/> <param name="taxonmap" ftype="tabular" value="prot.accession2taxid" /> <param name="taxonnodes" ftype="tabular" value="nodes.dmp" /> + <param name="taxonnames" ftype="tabular" value="names.dmp" /> </conditional> <output name="outfile" value="db-wtax.dmnd" compare="sim_size" delta="2"/> </test> @@ -73,7 +76,9 @@ .. _DIAMOND: http://ab.inf.uni-tuebingen.de/software/diamond/ -- taxonmap: Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features. The file can be downloaded from NCBI: ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/prot.accession2taxid.gz +- taxonmap: Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features. The file can be downloaded from NCBI: ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/prot.accession2taxid.FULL.gz + +- taxonnames: Path to the names.dmp file from the NCBI taxonomy. This parameter is optional and needs to be supplied in order to provide taxonomy features. The file is contained within this archive downloadable at NCBI: ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip - taxonnodes: Path to the nodes.dmp file from the NCBI taxonomy. This parameter is optional and needs to be supplied in order to provide taxonomy features. The file is contained within this archive downloadable at NCBI: ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip ]]>
--- a/diamond_view.xml Tue Dec 03 17:40:05 2019 -0500 +++ b/diamond_view.xml Mon Mar 22 13:21:23 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="bg_diamond_view" name="Diamond" version="@VERSION@" profile="19.01"> +<tool id="bg_diamond_view" name="Diamond view" version="@VERSION@" profile="19.01"> <description>generate formatted output from DAA files</description> <macros> <import>macros.xml</import> @@ -6,11 +6,12 @@ <expand macro="requirements" /> <expand macro="stdio" /> <expand macro="version_command" /> - <command><![CDATA[ + <command detect_errors="aggressive"><![CDATA[ ## need to link because diamont tries to open dataset_xxx.dat.daa ln -s '$daa' input.daa && diamond view + --threads \${GALAXY_SLOTS:-1} --daa input.daa @OUTPUT_ARGS@ @HITFILTER_ARGS@ @@ -43,9 +44,9 @@ <param name="daa" ftype="daa" value="diamond_results.daa" /> <conditional name="output"> <param name="outfmt" value="6"/> - <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/> + <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,cigar,scovhsp"/> </conditional> - <output name="blast_tabular" file="diamond_results.tabular"/> + <output name="blast_tabular" file="diamond_view_results.tabular"/> </test> <test> <param name="daa" ftype="daa" value="diamond_results.daa" />
--- a/macros.xml Tue Dec 03 17:40:05 2019 -0500 +++ b/macros.xml Mon Mar 22 13:21:23 2021 +0000 @@ -1,5 +1,5 @@ <macros> - <token name="@VERSION@">0.9.29</token> + <token name="@VERSION@">2.0.8</token> <xml name="requirements"> <requirements> @@ -9,14 +9,12 @@ <xml name="stdio"> <stdio> - <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error" /> - <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" /> - <regex match="Failed to allocate sufficient memory." source="stderr" level="fatal_oom" /> + <regex match="Failed to allocate" source="stderr" level="fatal_oom" /> </stdio> </xml> <xml name="version_command"> - <version_command>diamond version</version_command> + <version_command>diamond version | cut -d" " -f 3</version_command> </xml> <xml name="output_type_macro"> @@ -30,7 +28,7 @@ <option value="102">Taxonomic classification</option> </param> <when value="0"/> - <when value="5"/> + <when value="5"/> <when value="6"> <param name="fields" type="select" label="Tabular fields" help="" multiple="true"> <option value="qseqid" selected="true">Query Seq - id</option> @@ -52,12 +50,13 @@ <option value="send" selected="true">End of alignment in subject</option> <option value="qseq">Aligned part of query sequence</option> <option value="sseq">Aligned part of subject sequence</option> + <option value="qseq_translated">Translation of the aligned part of query sequence</option> <option value="evalue" selected="true">Expect value</option> <option value="bitscore" selected="true">Bit score</option> <option value="score">Raw score</option> <option value="qframe">Query frame</option> <option value="btop">Blast traceback operations(BTOP)</option> - <option value="staxids">unique Subject Taxonomy ID(s), separated by a ';' (in numerical order)</option> + <option value="scovhsp">Subject coverage per HSP</option> <option value="stitle">Subject Title</option> <option value="salltitles">All Subject Title(s)</option> <option value="qcovhsp">Query Coverage Per HSP</option> @@ -67,13 +66,15 @@ <option value="qqual">Query quality values for the aligned part of the query</option> <option value="full_qqual">Query quality values</option> <option value="qstrand">Query strand</option> + <option value="cigar">Cigar</option> + <yield/> </param> </when> <when value="100"> <param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full subject titles in DAA file?" help=""/> <param argument="--sallseqid" type="boolean" truevalue="--sallseqid" falsevalue="" checked="true" label="Include all subject ids in DAA file?" help=""/> </when> - <when value="101"> + <when value="101"> <param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full subject titles in DAA file?" help=""/> <param argument="--sallseqid" type="boolean" truevalue="--sallseqid" falsevalue="" checked="true" label="Include all subject ids in DAA file?" help=""/> </when> @@ -97,6 +98,14 @@ </conditional> </xml> + <xml name="block_size_low_sens"> + <param name="block_size" argument="--block-size" type="float" value="2" label="Block size in billions of sequence letters to be processed at a time" help="" /> + </xml> + + <xml name="block_size_hi_sens"> + <param name="block_size" argument="--block-size" type="float" value="0.4" label="Block size in billions of sequence letters to be processed at a time" help="" /> + </xml> + <xml name="citations"> <citations> <citation type="doi">10.1038/nmeth.3176</citation> @@ -115,7 +124,7 @@ <filter>output["outfmt"] == "6"</filter> </data> <!-- for daa diamond appends the .daa extension -> hence from_work_dir --> - <data format="daa" name="daa_output" label="${tool.name} on ${on_string}" from_work_dir="output.daa"> + <data format="daa" name="daa_output" label="${tool.name} on ${on_string}" from_work_dir="output.daa"> <filter>output["outfmt"] == "100"</filter> </data> <data format="sam" name="sam_output" label="${tool.name} on ${on_string}"> @@ -153,6 +162,6 @@ --max-target-seqs '$hit_filter.max_target_seqs' #else: --top '$hit_filter.top' - #end if + #end if </token> </macros>
--- a/repository_dependencies.xml Tue Dec 03 17:40:05 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -<?xml version="1.0" ?> -<repositories description="This requires the Diamond data manager."> - <repository changeset_revision="5a0d0bee4f8d" name="data_manager_diamond_database_builder" owner="bgruening" toolshed="https://toolshed.g2.bx.psu.edu"/> -</repositories> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/diamond_database.loc Mon Mar 22 13:21:23 2021 +0000 @@ -0,0 +1,1 @@ +testDb test_index ${__HERE__}/db.dmnd
--- a/test-data/diamond_results.pairwise Tue Dec 03 17:40:05 2019 -0500 +++ b/test-data/diamond_results.pairwise Mon Mar 22 13:21:23 2021 +0000 @@ -8,7 +8,7 @@ >gi|5524211|gb|AAD44166.1| cytochrome b [Elephas maximus maximus] Length=284 - Score = 547.4 bits (1409), Expect = 2.7e-160 + Score = 550 bits (1417), Expect = 1.44e-205 Identities = 283/284 (99%), Positives = 283/284 (99%), Gaps = 1/284 (0%) Frame = 1 @@ -20,9 +20,9 @@ AIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFIL FTMVALAGVHLTFLHETGSNNPLGL Sbjct 61 AIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGL 120 -Query 358 TSDSDKIPFHPYYTIKDFLGLLILXXXXXXXXXXSPDMLGDPDNHMPADPLNTPLHIKPE 537 - TSDSDKIPFHPYYTIKDFLGLLILXXXXXXXXXXSPDMLGDPDNHMPADPLNTPLHIKPE -Sbjct 121 TSDSDKIPFHPYYTIKDFLGLLILXXXXXXXXXXSPDMLGDPDNHMPADPLNTPLHIKPE 180 +Query 358 TSDSDKIPFHPYYTIKDFLGLLILXXXXXXXALLSPDMLGDPDNHMPADPLNTPLHIKPE 537 + TSDSDKIPFHPYYTIKDFLGLLILXXXXXXXALLSPDMLGDPDNHMPADPLNTPLHIKPE +Sbjct 121 TSDSDKIPFHPYYTIKDFLGLLILXXXXXXXALLSPDMLGDPDNHMPADPLNTPLHIKPE 180 Query 538 WYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMD 717 WYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMD
--- a/test-data/diamond_results.sam Tue Dec 03 17:40:05 2019 -0500 +++ b/test-data/diamond_results.sam Mon Mar 22 13:21:23 2021 +0000 @@ -1,6 +1,6 @@ @HD VN:1.5 SO:query -@PG PN:DIAMOND VN:0.9.29 CL:diamond view --daa input.daa --outfmt 101 --out /tmp/tmpgnYFop/files/3/3/7/dataset_3370e74a-4639-49d2-b5de-fe40b7c3b66a.dat --top 0 --forwardonly --compress 0 +@PG PN:DIAMOND VN:2.0.8 CL:diamond view --threads 1 --daa input.daa --outfmt 101 --out /tmp/tmpz1aqzru3/files/3/f/6/dataset_3f6f43ac-3af2-4ec2-93be-9ced0e692b43.dat --top 0 --forwardonly --compress 0 @mm BlastP @CO BlastP-like alignments @CO Reporting AS: bitScore, ZR: rawScore, ZE: expected, ZI: percent identity, ZL: reference length, ZF: frame, ZS: query start DNA coordinate -sequence 0 gi|5524211|gb|AAD44166.1| 1 255 94M1D189M * 0 0 LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFILFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLGLLILXXXXXXXXXXSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY * AS:i:547 NM:i:1 ZL:i:284 ZR:i:1409 ZE:f:2.7e-160 ZI:i:99 ZF:i:1 ZS:i:1 MD:Z:94^P189 +sequence 0 gi|5524211|gb|AAD44166.1| 1 255 94M1D189M * 0 0 LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFILFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLGLLILXXXXXXXALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY * AS:i:550 NM:i:1 ZL:i:284 ZR:i:1417 ZE:f:1.44e-205 ZI:i:99 ZF:i:1 ZS:i:1 MD:Z:94^P189
--- a/test-data/diamond_results.tabular Tue Dec 03 17:40:05 2019 -0500 +++ b/test-data/diamond_results.tabular Mon Mar 22 13:21:23 2021 +0000 @@ -1,2 +1,2 @@ -sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 283 1 284 2.7e-160 547.4 -sequence gi|5524212|gb|AAD44167.1| 79.2 284 58 1 1 283 1 284 1.1e-116 402.5 +sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 283 1 284 1.44e-205 550 94M1D189M 100 0 0 0 +sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 283 1 284 5.77e-150 409 105M1D178M 100 0 0 0
--- a/test-data/diamond_results.wtax.tabular Tue Dec 03 17:40:05 2019 -0500 +++ b/test-data/diamond_results.wtax.tabular Mon Mar 22 13:21:23 2021 +0000 @@ -1,1 +1,1 @@ -sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 283 1 284 2.7e-160 547.4 +sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 283 1 284 1.44e-205 550
--- a/test-data/diamond_results.xml Tue Dec 03 17:40:05 2019 -0500 +++ b/test-data/diamond_results.xml Mon Mar 22 13:21:23 2021 +0000 @@ -2,7 +2,7 @@ <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd"> <BlastOutput> <BlastOutput_program>blastp</BlastOutput_program> - <BlastOutput_version>diamond 0.9.29</BlastOutput_version> + <BlastOutput_version>diamond 2.0.8</BlastOutput_version> <BlastOutput_reference>Benjamin Buchfink, Xie Chao, and Daniel Huson (2015), "Fast and sensitive protein alignment using DIAMOND", Nature Methods 12:59-60.</BlastOutput_reference> <BlastOutput_db></BlastOutput_db> <BlastOutput_query-ID>Query_1</BlastOutput_query-ID> @@ -33,11 +33,11 @@ <Hit_hsps> <Hsp> <Hsp_num>1</Hsp_num> - <Hsp_bit-score>547.4</Hsp_bit-score> - <Hsp_score>1409</Hsp_score> - <Hsp_evalue>2.7e-160</Hsp_evalue> + <Hsp_bit-score>550</Hsp_bit-score> + <Hsp_score>1417</Hsp_score> + <Hsp_evalue>1.44e-205</Hsp_evalue> <Hsp_query-from>1</Hsp_query-from> - <Hsp_query-to>284</Hsp_query-to> + <Hsp_query-to>283</Hsp_query-to> <Hsp_hit-from>1</Hsp_hit-from> <Hsp_hit-to>284</Hsp_hit-to> <Hsp_query-frame>0</Hsp_query-frame> @@ -46,9 +46,9 @@ <Hsp_positive>283</Hsp_positive> <Hsp_gaps>1</Hsp_gaps> <Hsp_align-len>284</Hsp_align-len> - <Hsp_qseq>LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFIL-FTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLGLLILXXXXXXXXXXSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY</Hsp_qseq> - <Hsp_hseq>LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLGLLILXXXXXXXXXXSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY</Hsp_hseq> - <Hsp_midline>LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFIL FTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLGLLILXXXXXXXXXXSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY</Hsp_midline> + <Hsp_qseq>LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFIL-FTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLGLLILXXXXXXXALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY</Hsp_qseq> + <Hsp_hseq>LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLGLLILXXXXXXXALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY</Hsp_hseq> + <Hsp_midline>LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFIL FTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLGLLILXXXXXXXALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY</Hsp_midline> </Hsp> </Hit_hsps> </Hit>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/diamond_view_results.tabular Mon Mar 22 13:21:23 2021 +0000 @@ -0,0 +1,2 @@ +sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 283 1 284 1.44e-205 550 94M1D189M 100 +sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 283 1 284 5.77e-150 409 83M1D200M 100
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/names.dmp Mon Mar 22 13:21:23 2021 +0000 @@ -0,0 +1,11 @@ +1 | all | | synonym | +1 | root | | scientific name | +2 | Bacteria | Bacteria <bacteria> | scientific name | +2 | bacteria | | blast name | +2 | eubacteria | | genbank common name | +2 | Monera | Monera <bacteria> | in-part | +3 | Procaryotae | Procaryotae <bacteria> | in-part | +3 | Prokaryotae | Prokaryotae <bacteria> | in-part | +3 | Prokaryota | Prokaryota <bacteria> | in-part | +3 | prokaryote | prokaryote <bacteria> | in-part | +3 | prokaryotes | prokaryotes <bacteria> | in-part |
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Mon Mar 22 13:21:23 2021 +0000 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all fasta files required to build Diamond databases --> + <table name="diamond_database" comment_char="#"> + <columns>value, name, db_path</columns> + <file path="${__HERE__}/test-data/diamond_database.loc" /> + </table> +</tables>