Mercurial > repos > iuc > qiime_assign_taxonomy
changeset 3:ec3c4654eacc draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime/ commit a831282140ce160035a4ce984f48cc20198ed0a1
author | iuc |
---|---|
date | Thu, 22 Jun 2017 06:57:54 -0400 |
parents | 70206002b220 |
children | 01151a09513f |
files | assign_taxonomy.xml generate_test_data.sh macros.xml test-data/assign_taxonomy/mothur_id_to_taxonomy.txt test-data/assign_taxonomy/mothur_repr_set_seqs.fasta test-data/assign_taxonomy/sortmerna_input_seqs.fasta test-data/assign_taxonomy/sortmerna_map.blast test-data/assign_taxonomy/sortmerna_taxonomic_assignation.txt test-data/assign_taxonomy/uclust_taxonomic_assignation.txt |
diffstat | 9 files changed, 174 insertions(+), 104 deletions(-) [+] |
line wrap: on
line diff
--- a/assign_taxonomy.xml Fri May 19 04:09:30 2017 -0400 +++ b/assign_taxonomy.xml Thu Jun 22 06:57:54 2017 -0400 @@ -4,7 +4,7 @@ <import>macros.xml</import> </macros> <expand macro="requirements"> - <requirement type="package" version="2.0.2">rdptools</requirement> + <!--<requirement type="package" version="2.0.2">rdptools</requirement>--> <requirement type="package" version="2.2.22">blast-legacy</requirement> <requirement type="package" version="2.3.4">vsearch</requirement> <requirement type="package" version="1.36.1">mothur</requirement> @@ -13,8 +13,10 @@ <command detect_errors="aggressive"><![CDATA[ assign_taxonomy.py --input_fasta_fp '$input_fasta_fp' - #if $id_to_taxonomy_fp - --id_to_taxonomy_fp '$id_to_taxonomy_fp' + #if $id_to_taxonomy_condition.source_selector == 'history' + --id_to_taxonomy_fp '$id_to_taxonomy_condition.id_to_taxonomy_fp' + #else if $id_to_taxonomy_condition.source_selector == 'cached' + --id_to_taxonomy_fp '$id_to_taxonomy_condition.id_to_taxonomy_fp.fields.path' #end if --assignment_method '$methodcond.assignment_method' #if $methodcond.assignment_method == "uclust" @@ -22,12 +24,18 @@ --similarity '$methodcond.similarity' --uclust_max_accepts '$methodcond.uclust_max_accepts' #else if $methodcond.assignment_method == "rdp" - #if $methodcond.reference_seqs_fp - --reference_seqs_fp '$methodcond.reference_seqs_fp' + #if $methodcond.references.source_selector == 'history' + --reference_seqs_fp '$methodcond.references.reference_seqs_fp' + #else if $methodcond.references.source_selector == 'cached' + --reference_seqs_fp '$methodcond.references.reference_seqs_fp.fields.path' #end if --confidence '$methodcond.confidence' #else if $methodcond.assignment_method == "blast" - --reference_seqs_fp '$methodcond.reference_seqs_fp' + #if $methodcond.references.source_selector == 'history' + --reference_seqs_fp '$methodcond.references.reference_seqs_fp' + #else if $methodcond.references.source_selector == 'cached' + --reference_seqs_fp '$methodcond.references.reference_seqs_fp.fields.path' + #end if --blast_e_value '$methodcond.blast_e_value' #else if $methodcond.assignment_method == "rtax" --read_1_seqs_fp '$methodcond.read_1_seqs_fp' @@ -38,6 +46,11 @@ --amplicon_id_regex '$methodcond.amplicon_id_regex' --header_id_rege '$methodcond.header_id_regex' #else if $methodcond.assignment_method == "mothur" + #if $methodcond.references.source_selector == 'history' + --reference_seqs_fp '$methodcond.references.reference_seqs_fp' + #else if $methodcond.references.source_selector == 'cached' + --reference_seqs_fp '$methodcond.references.reference_seqs_fp.fields.path' + #end if --confidence '$methodcond.confidence' #else if $methodcond.assignment_method == "sortmerna" --sortmerna_threads \${GALAXY_SLOTS:-1} @@ -54,15 +67,32 @@ ]]></command> <inputs> <param argument="--input_fasta_fp" type="data" format="fasta" label="Input fasta file" /> - <param argument="--id_to_taxonomy_fp" label="Tab-delimited file mapping sequences to assigned taxonomy" type="data" format="tabular" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep" optional="True"/> + + <conditional name="id_to_taxonomy_condition"> + <param name="source_selector" type="select" label="Do you want to use a taxonomy reference ?"> + <option value="cached">Yes (from the local cache)</option> + <option value="history">Yes (from the active history)</option> + <option value="void" selected="true">No</option> + </param> + <when value="cached"> + <param argument="--id_to_taxonomy_fp" label="Tab-delimited file mapping sequences to assigned taxonomy" type="select" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep"> + <options from_data_table="qiime_taxonomy"/> + </param> + </when> + <when value="history"> + <param argument="--id_to_taxonomy_fp" label="Tab-delimited file mapping sequences to assigned taxonomy" type="data" format="tabular" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep"/> + </when> + <when value="void"/> + </conditional> + <conditional name="methodcond"> <param argument="--assignment_method" label="Taxon assignment method" type="select"> <option selected="True" value="uclust">uclust</option> - <!--<option value="rdp">rdp</option> + <!--<option value="rdp">rdp</option>--> <option value="blast">blast</option> - <option value="rtax">rtax</option> + <!--<option value="rtax">rtax</option>--> <option value="mothur">mothur</option> - <option value="sortmerna">sortmerna</option>--> + <option value="sortmerna">sortmerna</option> </param> <when value="uclust"> <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/> @@ -70,11 +100,11 @@ <param argument="uclust_max_accepts" type="integer" value="3" label="Number of database hits to consider when making an assignment"/> </when> <when value="rdp"> - <param argument="--reference_seqs_fp" label="Reference sequences used as training sequences for the classifier" type="data" format="fasta" optional="True"/> + <expand macro="assign_taxonomy_reference_source"/> <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/> </when> <when value="blast"> - <param argument="--reference_seqs_fp" label="Reference sequences used to generate a blast database" type="data" format="fasta" optional="True"/> + <expand macro="assign_taxonomy_reference_source"/> <param argument="--blast_e_value" type="float" value="0.001" label="Maximum e-value to record an assignment"/> </when> <when value="rtax"> @@ -87,10 +117,11 @@ <param argument="--header_id_regex" type="text" value="\S+\s+(\S+?)\/" label="Regex used to parse the result of split_libraries, to get the portion of the header that RTAX uses to match mate pairs" help="The default uses the amplicon ID, not including /1 or /3, as the primary key for the query sequences. Typically this regex will be the same as amplicon_id_regex, except that only the second group is captured" /> </when> <when value="mothur"> + <expand macro="assign_taxonomy_reference_source"/> <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/> </when> <when value="sortmerna"> - <param argument="--sortmerna_db" type="data" format="fasta" label="Pre-existing database to search against" optional="True"/> + <!--<param argument="- -sortmerna_db" type="data" format="fasta" label="Pre-existing database to search against" optional="True"/>--> <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/> <param argument="--similarity" type="float" value="0.9" label="Minimum percent similarity (expressed as a fraction between 0 and 1) to consider a database match a hit"/> <param argument="--sortmerna_e_value" type="float" value="1.0" label="Maximum E-value when clustering"/> @@ -100,20 +131,28 @@ </conditional> </inputs> <outputs> - <data name="log" format="txt" from_work_dir="assign_taxonomy/*_assignments.log" label="${tool.name} on ${on_string}: Log"/> + <data name="log" format="txt" from_work_dir="assign_taxonomy/*_assignments.log" label="${tool.name} on ${on_string}: Log"> + <filter>methodcond['assignment_method']!="mothur"</filter> + </data> <data name="tax_assignments" format="txt" from_work_dir="assign_taxonomy/*.txt" label="${tool.name} on ${on_string}: Taxonomic assignment"/> <data name="sortmerna_map" format="tabular" from_work_dir="assign_taxonomy/sortmerna_map.blast" label="${tool.name} on ${on_string}: SortMeRNA Blast"> <filter>methodcond['assignment_method']=="sortmerna"</filter> </data> </outputs> <tests> + <!-- Uclust assignment method --> <test> <param name="input_fasta_fp" value="assign_taxonomy/uclust_input_seqs.fasta"/> - <param name="assignment_method" value="uclust"/> - <param name="min_consensus_fraction" value="0.51"/> - <param name="similarity" value="0.9"/> - <param name="uclust_max_accepts" value="3" /> - <output name="tax_assignments" value="assign_taxonomy/uclust_taxonomic_assignation.txt"/> + <conditional name="id_to_taxonomy_condition"> + <param name="source_selector" value="void" /> + </conditional> + <conditional name="methodcond"> + <param name="assignment_method" value="uclust"/> + <param name="min_consensus_fraction" value="0.51"/> + <param name="similarity" value="0.9"/> + <param name="uclust_max_accepts" value="3" /> + </conditional> + <output name="tax_assignments" md5="57b0cf51fc0142f369134ea923d78d99"/> <output name="log"> <assert_contents> <has_text text="UclustConsensusTaxonAssigner" /> @@ -121,22 +160,73 @@ </assert_contents> </output> </test> + <!-- Mothur assignment method --> + <!-- Note: there is variability in the assignment results with this method so the md5 checksum comparison is not possible --> + <test> + <param name="input_fasta_fp" value="assign_taxonomy/mothur_repr_set_seqs.fasta"/> + <conditional name="id_to_taxonomy_condition"> + <param name="source_selector" value="history" /> + <param name="id_to_taxonomy_fp" value="assign_taxonomy/mothur_id_to_taxonomy.txt"/> + </conditional> + <conditional name="methodcond"> + <param name="assignment_method" value="mothur"/> + <conditional name="references"> + <param name="source_selector" value="history" /> + <param name="reference_seqs_fp" value="assign_taxonomy/mothur_ref_seq_set.fna" /> + </conditional> + <param name="confidence" value="0.5"/> + </conditional> + <output name="tax_assignments"> + <assert_contents> + <has_text text="X67228" /> + <has_text text="Rhizobium" /> + <has_text text="EF503697" /> + </assert_contents> + </output> + </test> + <!-- Blast assignment method --> + <test> + <param name="input_fasta_fp" value="assign_taxonomy/mothur_repr_set_seqs.fasta"/> + <conditional name="id_to_taxonomy_condition"> + <param name="source_selector" value="history" /> + <param name="id_to_taxonomy_fp" value="assign_taxonomy/mothur_id_to_taxonomy.txt"/> + </conditional> + <conditional name="methodcond"> + <param name="assignment_method" value="blast"/> + <conditional name="references"> + <param name="source_selector" value="history" /> + <param name="reference_seqs_fp" value="assign_taxonomy/mothur_ref_seq_set.fna" /> + </conditional> + <param name="blast_e_value" value="0.001"/> + </conditional> + <output name="tax_assignments" md5="5ab8d28f67bcbf828937d222b2ab9c6e"/> + <output name="log"> + <assert_contents> + <has_text text="BlastTaxonAssigner" /> + <has_text text="inspected: 2" /> + </assert_contents> + </output> + </test> + <!-- SortMeRNA assignment method --> + <!-- Note: The input file has been reduced to only 1 sequence but this test is still quite long to execute (more than 10min) --> <!--<test> - <param name="input_fasta_fp" value="assign_taxonomy/mothur_ref_seq_set.fna"/> - <param name="assignment_method" value="sortmerna"/> - <param name="min_consensus_fraction" value="0.51" /> - <param name="similarity" value="0.9" /> - <param name="sortmerna_e_value" value="1.0" /> - <param name="sortmerna_coverage" value="0.9" /> - <param name="sortmerna_best_N_alignments" value="5" /> + <param name="input_fasta_fp" value="assign_taxonomy/sortmerna_input_seqs.fasta"/> + <conditional name="methodcond"> + <param name="assignment_method" value="sortmerna"/> + <param name="min_consensus_fraction" value="0.51" /> + <param name="similarity" value="0.9" /> + <param name="sortmerna_e_value" value="1.0" /> + <param name="sortmerna_coverage" value="0.9" /> + <param name="sortmerna_best_N_alignments" value="5" /> + </conditional> <output name="log"> <assert_contents> <has_text text="Application:SortMeRNA" /> <has_text text="min_consensus_fraction" /> </assert_contents> </output> - <output name="tax_assignments" value="assign_taxonomy/sortmerna_taxonomic_assignation.txt"/> - <output name="sortmerna_map" value="assign_taxonomy/sortmerna_map.blast"/> + <output name="tax_assignments" md5="0da68ab9762b677a00f34051eadad68c"/> + <output name="sortmerna_map" md5="16e349be29f121fca741d6294f79ce7c"/> </test>--> </tests> <help><![CDATA[
--- a/generate_test_data.sh Fri May 19 04:09:30 2017 -0400 +++ b/generate_test_data.sh Thu Jun 22 06:57:54 2017 -0400 @@ -92,9 +92,32 @@ --similarity '0.9' \ --uclust_max_accepts '3' \ -o assign_taxonomy_uclust -cp assign_taxonomy_uclust/uclust_input_seqs_tax_assignments.txt 'test-data/assign_taxonomy/uclust_taxonomic_assignation.txt' +ls assign_taxonomy_uclust +md5sum 'assign_taxonomy_uclust/uclust_input_seqs_tax_assignments.txt' rm -rf assign_taxonomy_uclust +assign_taxonomy.py \ + --input_fasta_fp 'test-data/assign_taxonomy/mothur_repr_set_seqs.fasta' \ + --id_to_taxonomy_fp 'test-data/assign_taxonomy/mothur_id_to_taxonomy.txt' \ + --assignment_method 'mothur' \ + --reference_seqs_fp 'test-data/assign_taxonomy/mothur_ref_seq_set.fna' \ + --confidence '0.5' \ + -o assign_taxonomy_mothur +ls assign_taxonomy_mothur +md5sum 'assign_taxonomy_mothur/mothur_repr_set_seqs_tax_assignments.txt' +rm -rf assign_taxonomy_mothur + +assign_taxonomy.py \ + --input_fasta_fp 'test-data/assign_taxonomy/mothur_repr_set_seqs.fasta' \ + --id_to_taxonomy_fp 'test-data/assign_taxonomy/mothur_id_to_taxonomy.txt' \ + --assignment_method 'mothur' \ + --reference_seqs_fp 'test-data/assign_taxonomy/mothur_ref_seq_set.fna' \ + --blast_e_value '0.001' \ + -o assign_taxonomy_blast +ls assign_taxonomy_blast +md5sum 'assign_taxonomy_blast/mothur_repr_set_seqs_tax_assignments.txt' +rm -rf assign_taxonomy_blast + #assign_taxonomy.py \ # --input_fasta_fp 'test-data/assign_taxonomy/rdp_input_seqs.fasta' \ # --id_to_taxonomy_fp 'test-data/assign_taxonomy/rdp_id_to_taxonomy.txt' \ @@ -116,14 +139,6 @@ # -o assign_taxonomy_rtax #ls assign_taxonomy_rtax -#assign_taxonomy.py \ -# --input_fasta_fp 'test-data/assign_taxonomy/mothur_ref_seq_set.fna' \ -# --id_to_taxonomy_fp 'test-data/assign_taxonomy/mothur_id_to_taxonomy.txt' \ -# --assignment_method 'mothur' \ -# --confidence 0.5 \ -# -o assign_taxonomy_mothur -#ls assign_taxonomy_mothur - assign_taxonomy.py \ --input_fasta_fp 'test-data/assign_taxonomy/mothur_ref_seq_set.fna' \ --assignment_method 'sortmerna' \ @@ -133,8 +148,9 @@ --sortmerna_coverage "0.9" \ --sortmerna_best_N_alignments "5" \ -o assign_taxonomy_sortmerna -cp assign_taxonomy_sortmerna/sortmerna_map.blast 'test-data/assign_taxonomy/sortmerna_map.blast' -cp assign_taxonomy_sortmerna/mothur_ref_seq_set_tax_assignments.txt 'test-data/assign_taxonomy/sortmerna_taxonomic_assignation.txt' +ls assign_taxonomy_sortmerna +md5sum 'assign_taxonomy_sortmerna/mothur_ref_seq_set_tax_assignments.txt' +md5sum 'assign_taxonomy_sortmerna/sortmerna_map.blast' rm -rf assign_taxonomy_sortmerna #beta_diversity @@ -1105,22 +1121,3 @@ cp validate_mapping_file_output/*.log 'test-data/validate_mapping_file/map.tsv.log' cp validate_mapping_file_output/*corrected.txt 'test-data/validate_mapping_file/map.tsv_corrected.txt' rm -rf validate_mapping_file_output - - - - - - - - - - - - - - - - - - -
--- a/macros.xml Fri May 19 04:09:30 2017 -0400 +++ b/macros.xml Thu Jun 22 06:57:54 2017 -0400 @@ -29,6 +29,22 @@ </when> </conditional> </xml> + <xml name="assign_taxonomy_reference_source"> + <conditional name="references"> + <param name="source_selector" type="select" label="Select a reference sequence file from"> + <option value="cached">The local cache</option> + <option value="history">The active history</option> + </param> + <when value="cached"> + <param argument="--reference_seqs_fp" label="Reference sequences either used to generate a blast database (Blast) or used as training sequences for the selected classifier (RDP, Mothur)" type="select"> + <options from_data_table="qiime_rep_set"/> + </param> + </when> + <when value="history"> + <param argument="--reference_seqs_fp" type="data" format="fasta" label="Reference sequences to search against"/> + </when> + </conditional> + </xml> <xml name="pick_otus_similarity"> <param argument="--similarity" type="float" value="0.97" label="Sequence similarity threshold"/> </xml>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/assign_taxonomy/mothur_id_to_taxonomy.txt Thu Jun 22 06:57:54 2017 -0400 @@ -0,0 +1,7 @@ +X67228 Bacteria;Proteobacteria;Alphaproteobacteria;Rhizobiales;Rhizobiaceae;Rhizobium +X73443 Bacteria;Firmicutes;Clostridia;Clostridiales;Clostridiaceae;Clostridium +AB004750 Bacteria;Proteobacteria;Gammaproteobacteria;Enterobacteriales;Enterobacteriaceae;Enterobacter +xxxxxx Bacteria;Proteobacteria;Gammaproteobacteria;Pseudomonadales;Pseudomonadaceae;Pseudomonas +AB004748 Bacteria;Proteobacteria;Gammaproteobacteria;Enterobacteriales;Enterobacteriaceae;Enterobacter +AB000278 Bacteria;Proteobacteria;Gammaproteobacteria;Vibrionales;Vibrionaceae;Photobacterium +AB000390 Bacteria;Proteobacteria;Gammaproteobacteria;Vibrionales;Vibrionaceae;Vibrio
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/assign_taxonomy/mothur_repr_set_seqs.fasta Thu Jun 22 06:57:54 2017 -0400 @@ -0,0 +1,4 @@ +>X67228 some description +aacgaacgctggcggcaggcttaacacatgcaagtcgaacgctccgcaaggagagtggcagacgggtgagtaacgcgtgggaatctacccaaccctgcggaatagctctgggaaactggaattaataccgcatacgccctacgggggaaagatttatcggggatggatgagcccgcgttggattagctagttggtggggtaaaggcctaccaaggcgacgatccatagctggtctgagaggatgatcagccacattgggactgagacacggcccaaa +>EF503697 +TAAAATGACTAGCCTGCGAGTCACGCCGTAAGGCGTGGCATACAGGCTCAGTAACACGTAGTCAACATGCCCAAAGGACGTGGATAACCTCGGGAAACTGAGGATAAACCGCGATAGGCCAAGGTTTCTGGAATGAGCTATGGCCGAAATCTATATGGCCTTTGGATTGGACTGCGGCCGATCAGGCTGTTGGTGAGGTAATGGCCCACCAAACCTGTAACCGGTACGGGCTTTGAGAGAAGTAGCCCGGAGATGGGCACTGAGACAAGGGCCCAGGCCCTATGGGGCGCAGCAGGCGCGAAACCTCTGCAATAGGCGAAAGCCTGACAGGGTTACTCTGAGTGATGCCCGCTAAGGGTATCTTTTGGCACCTCTAAAAATGGTGCAGAATAAGGGGTGGGCAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCACCCCGAGTTGTCGGGACGATTATTGGGCCTAAAGCATCCGTAGCCTGTTCTGCAAGTCCTCCGTTAAATCCACCTGCTCAACGGATGGGCTGCGGAGGATACCGCAGAGCTAGGAGGCGGGAGAGGCAAACGGTACTCAGTGGGTAGGGGTAAAATCCATTGATCTACTGAAGACCACCAGTGGCGAAGGCGGTTTGCCAGAACGCGCTCGACGGTGAGGGATGAAAGCTGGGGGAGCAAACCGGATTAGATACCCGGGGTAGTCCCAGCTGTAAACGGATGCAGACTCGGGTGATGGGGTTGGCTTCCGGCCCAACCCCAATTGCCCCCAGGCGAAGCCCGTTAAGATCTTGCCGCCCTGTCAGATGTCAGGGCCGCCAATACTCGAAACCTTAAAAGGAAATTGGGCGCGGGAAAAGTCACCAAAAGGGGGTTGAAACCCTGCGGGTTATATATTGTAAACC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/assign_taxonomy/sortmerna_input_seqs.fasta Thu Jun 22 06:57:54 2017 -0400 @@ -0,0 +1,2 @@ +>X67228 +aacgaacgctggcggcaggcttaacacatgcaagtcgaacgctccgcaaggagagtggcagacgggtgagtaacgcgtgggaatctacccaaccctgcggaatagctctgggaaactggaattaataccgcatacgccctacgggggaaagatttatcggggatggatgagcccgcgttggattagctagttggtggggtaaaggcctaccaaggcgacgatccatagctggtctgagaggatgatcagccacattgggactgagacacggcccaaa
--- a/test-data/assign_taxonomy/sortmerna_map.blast Fri May 19 04:09:30 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -X67228 152350 98.6 277 4 0 1 277 22 298 5.76e-129 464 277M 100 -X67228 558499 97.1 275 8 0 1 275 2 276 1.05e-122 443 275M2S 99.3 -X67228 553706 97.5 277 7 0 1 277 1 277 4.7e-125 451 277M 100 -X67228 553981 95.7 277 12 0 1 277 2 278 1.55e-118 429 277M 100 -X67228 4423084 98.6 277 4 0 1 277 21 297 5.76e-129 464 277M 100 -X73443 179865 96.3 269 2 8 8 276 2 268 2.31e-114 415 7S3M1I28M1I3M1I7M1D8M1D20M1I26M1I115M1D54M 97.5 -X73443 181718 96 269 3 8 8 276 2 268 4.66e-113 411 7S3M1I28M1I3M1I7M1D8M1D20M1I26M1I115M1D54M 97.5 -X73443 193551 96.3 269 2 8 8 276 2 268 2.31e-114 415 7S3M1I28M1I3M1I7M1D8M1D21M1I26M1I114M1D54M 97.5 -X73443 212341 96.3 269 2 8 8 276 2 268 2.31e-114 415 7S3M1I28M1I3M1I7M1D8M1D21M1I26M1I114M1D54M 97.5 -X73443 175883 96 269 3 8 8 276 2 268 4.66e-113 411 7S3M1I28M1I3M1I7M1D8M1D21M1I26M1I114M1D54M 97.5 -AB004750 3888577 100 339 0 0 1 339 26 364 1.61e-166 588 339M 100 -AB004750 581782 97.6 339 8 0 1 339 27 365 4.36e-156 554 339M 100 -AB004750 1108679 97.9 339 7 0 1 339 26 364 2.16e-157 558 339M 100 -AB004750 1109844 97.9 339 7 0 1 339 26 364 2.16e-157 558 339M 100 -AB004750 4418165 99.7 339 1 0 1 339 28 366 3.25e-165 584 339M 100 -xxxxxx 1102995 97.5 361 8 1 1 361 22 383 2.94e-166 588 174M1D187M 100 -xxxxxx 340031 95.6 361 13 3 1 361 23 386 1.07e-158 562 169M3D192M 100 -xxxxxx 340031 95.6 361 13 3 1 361 23 386 1.07e-158 562 169M3D192M 100 -AB004748 581782 98 396 8 0 1 396 27 422 8.13e-186 653 396M 100 -AB004748 1108679 98.2 396 7 0 1 396 26 421 4.04e-187 657 396M 100 -AB004748 1109844 98.2 396 7 0 1 396 26 421 4.04e-187 657 396M 100 -AB004748 3888577 100 396 0 0 1 396 26 421 3.01e-196 687 396M 100 -AB004748 561327 97.5 396 10 0 1 396 1 396 3.3e-183 644 396M 100 -AB000278 554346 98.6 368 5 0 1 368 6 373 4e-175 617 368M 100 -AB000278 160928 97 368 7 4 1 368 33 400 2.94e-166 588 33M1D5M1I8M1I2M1D318M 100 -AB000390 4433053 98.1 317 6 0 1 317 13 329 3.2e-147 524 317M 100 -AB000390 19456 94.4 317 14 4 1 317 12 328 4.28e-132 474 77M2D4M2I234M 100 -AB000390 4432126 94.4 317 14 4 1 317 13 329 4.28e-132 474 77M2D4M2I234M 100
--- a/test-data/assign_taxonomy/sortmerna_taxonomic_assignation.txt Fri May 19 04:09:30 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -#OTU ID taxonomy confidence num hits -AB004750 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__; s__ 0.60 5 -AB000390 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Vibrionales; f__Vibrionaceae 1.00 3 -xxxxxx k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Alteromonadales; f__Alteromonadaceae; g__Marinobacter; s__ 1.00 3 -X67228 k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Rhizobiaceae 0.60 5 -AB000278 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Vibrionales; f__Vibrionaceae; g__Photobacterium 1.00 2 -AB004748 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__; s__ 0.60 5 -X73443 k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae 1.00 5
--- a/test-data/assign_taxonomy/uclust_taxonomic_assignation.txt Fri May 19 04:09:30 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -11469739 k__Bacteria; p__OP9; c__JS1; o__SB-45; f__; g__; s__ 1.00 3 -11480235 k__Bacteria; p__OD1; c__; o__; f__; g__; s__ 1.00 1 -11460543 k__Bacteria; p__OP9; c__JS1; o__SB-45; f__; g__; s__ 1.00 3 -11460523 k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; o__Desulfobacterales; f__Desulfobulbaceae; g__; s__ 1.00 3 -11472286 k__Bacteria; p__WS5; c__; o__; f__; g__; s__ 1.00 1 -11458037 k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Peptococcaceae; g__Desulfosporosinus; s__meridiei 1.00 3 -11472384 k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__Burkholderiaceae; g__Burkholderia; s__ 0.67 3 -11469752 k__Bacteria; p__TM7; c__TM7-1; o__; f__; g__; s__ 1.00 3 -11480408 k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__; g__; s__ 1.00 3 -11468680 k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__Burkholderiaceae; g__Burkholderia; s__ 1.00 3