comparison assign_taxonomy.xml @ 3:ec3c4654eacc draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime/ commit a831282140ce160035a4ce984f48cc20198ed0a1
author iuc
date Thu, 22 Jun 2017 06:57:54 -0400
parents fa330c61c0a5
children b4170e1a3b85
comparison
equal deleted inserted replaced
2:70206002b220 3:ec3c4654eacc
2 <description>to each sequence</description> 2 <description>to each sequence</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"> 6 <expand macro="requirements">
7 <requirement type="package" version="2.0.2">rdptools</requirement> 7 <!--<requirement type="package" version="2.0.2">rdptools</requirement>-->
8 <requirement type="package" version="2.2.22">blast-legacy</requirement> 8 <requirement type="package" version="2.2.22">blast-legacy</requirement>
9 <requirement type="package" version="2.3.4">vsearch</requirement> 9 <requirement type="package" version="2.3.4">vsearch</requirement>
10 <requirement type="package" version="1.36.1">mothur</requirement> 10 <requirement type="package" version="1.36.1">mothur</requirement>
11 </expand> 11 </expand>
12 <version_command>assign_taxonomy.py --version</version_command> 12 <version_command>assign_taxonomy.py --version</version_command>
13 <command detect_errors="aggressive"><![CDATA[ 13 <command detect_errors="aggressive"><![CDATA[
14 assign_taxonomy.py 14 assign_taxonomy.py
15 --input_fasta_fp '$input_fasta_fp' 15 --input_fasta_fp '$input_fasta_fp'
16 #if $id_to_taxonomy_fp 16 #if $id_to_taxonomy_condition.source_selector == 'history'
17 --id_to_taxonomy_fp '$id_to_taxonomy_fp' 17 --id_to_taxonomy_fp '$id_to_taxonomy_condition.id_to_taxonomy_fp'
18 #else if $id_to_taxonomy_condition.source_selector == 'cached'
19 --id_to_taxonomy_fp '$id_to_taxonomy_condition.id_to_taxonomy_fp.fields.path'
18 #end if 20 #end if
19 --assignment_method '$methodcond.assignment_method' 21 --assignment_method '$methodcond.assignment_method'
20 #if $methodcond.assignment_method == "uclust" 22 #if $methodcond.assignment_method == "uclust"
21 --min_consensus_fraction '$methodcond.min_consensus_fraction' 23 --min_consensus_fraction '$methodcond.min_consensus_fraction'
22 --similarity '$methodcond.similarity' 24 --similarity '$methodcond.similarity'
23 --uclust_max_accepts '$methodcond.uclust_max_accepts' 25 --uclust_max_accepts '$methodcond.uclust_max_accepts'
24 #else if $methodcond.assignment_method == "rdp" 26 #else if $methodcond.assignment_method == "rdp"
25 #if $methodcond.reference_seqs_fp 27 #if $methodcond.references.source_selector == 'history'
26 --reference_seqs_fp '$methodcond.reference_seqs_fp' 28 --reference_seqs_fp '$methodcond.references.reference_seqs_fp'
29 #else if $methodcond.references.source_selector == 'cached'
30 --reference_seqs_fp '$methodcond.references.reference_seqs_fp.fields.path'
27 #end if 31 #end if
28 --confidence '$methodcond.confidence' 32 --confidence '$methodcond.confidence'
29 #else if $methodcond.assignment_method == "blast" 33 #else if $methodcond.assignment_method == "blast"
30 --reference_seqs_fp '$methodcond.reference_seqs_fp' 34 #if $methodcond.references.source_selector == 'history'
35 --reference_seqs_fp '$methodcond.references.reference_seqs_fp'
36 #else if $methodcond.references.source_selector == 'cached'
37 --reference_seqs_fp '$methodcond.references.reference_seqs_fp.fields.path'
38 #end if
31 --blast_e_value '$methodcond.blast_e_value' 39 --blast_e_value '$methodcond.blast_e_value'
32 #else if $methodcond.assignment_method == "rtax" 40 #else if $methodcond.assignment_method == "rtax"
33 --read_1_seqs_fp '$methodcond.read_1_seqs_fp' 41 --read_1_seqs_fp '$methodcond.read_1_seqs_fp'
34 --read_2_seqs_fp '$methodcond.read_2_seqs_fp' 42 --read_2_seqs_fp '$methodcond.read_2_seqs_fp'
35 $methodcond.single_ok 43 $methodcond.single_ok
36 $methodcond.no_single_ok_generic 44 $methodcond.no_single_ok_generic
37 --read_id_regex '$methodcond.read_id_regex' 45 --read_id_regex '$methodcond.read_id_regex'
38 --amplicon_id_regex '$methodcond.amplicon_id_regex' 46 --amplicon_id_regex '$methodcond.amplicon_id_regex'
39 --header_id_rege '$methodcond.header_id_regex' 47 --header_id_rege '$methodcond.header_id_regex'
40 #else if $methodcond.assignment_method == "mothur" 48 #else if $methodcond.assignment_method == "mothur"
49 #if $methodcond.references.source_selector == 'history'
50 --reference_seqs_fp '$methodcond.references.reference_seqs_fp'
51 #else if $methodcond.references.source_selector == 'cached'
52 --reference_seqs_fp '$methodcond.references.reference_seqs_fp.fields.path'
53 #end if
41 --confidence '$methodcond.confidence' 54 --confidence '$methodcond.confidence'
42 #else if $methodcond.assignment_method == "sortmerna" 55 #else if $methodcond.assignment_method == "sortmerna"
43 --sortmerna_threads \${GALAXY_SLOTS:-1} 56 --sortmerna_threads \${GALAXY_SLOTS:-1}
44 #if $methodcond.sortmerna_db 57 #if $methodcond.sortmerna_db
45 --sortmerna_db '$methodcond.sortmerna_db' 58 --sortmerna_db '$methodcond.sortmerna_db'
52 #end if 65 #end if
53 -o assign_taxonomy 66 -o assign_taxonomy
54 ]]></command> 67 ]]></command>
55 <inputs> 68 <inputs>
56 <param argument="--input_fasta_fp" type="data" format="fasta" label="Input fasta file" /> 69 <param argument="--input_fasta_fp" type="data" format="fasta" label="Input fasta file" />
57 <param argument="--id_to_taxonomy_fp" label="Tab-delimited file mapping sequences to assigned taxonomy" type="data" format="tabular" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep" optional="True"/> 70
71 <conditional name="id_to_taxonomy_condition">
72 <param name="source_selector" type="select" label="Do you want to use a taxonomy reference ?">
73 <option value="cached">Yes (from the local cache)</option>
74 <option value="history">Yes (from the active history)</option>
75 <option value="void" selected="true">No</option>
76 </param>
77 <when value="cached">
78 <param argument="--id_to_taxonomy_fp" label="Tab-delimited file mapping sequences to assigned taxonomy" type="select" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep">
79 <options from_data_table="qiime_taxonomy"/>
80 </param>
81 </when>
82 <when value="history">
83 <param argument="--id_to_taxonomy_fp" label="Tab-delimited file mapping sequences to assigned taxonomy" type="data" format="tabular" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep"/>
84 </when>
85 <when value="void"/>
86 </conditional>
87
58 <conditional name="methodcond"> 88 <conditional name="methodcond">
59 <param argument="--assignment_method" label="Taxon assignment method" type="select"> 89 <param argument="--assignment_method" label="Taxon assignment method" type="select">
60 <option selected="True" value="uclust">uclust</option> 90 <option selected="True" value="uclust">uclust</option>
61 <!--<option value="rdp">rdp</option> 91 <!--<option value="rdp">rdp</option>-->
62 <option value="blast">blast</option> 92 <option value="blast">blast</option>
63 <option value="rtax">rtax</option> 93 <!--<option value="rtax">rtax</option>-->
64 <option value="mothur">mothur</option> 94 <option value="mothur">mothur</option>
65 <option value="sortmerna">sortmerna</option>--> 95 <option value="sortmerna">sortmerna</option>
66 </param> 96 </param>
67 <when value="uclust"> 97 <when value="uclust">
68 <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/> 98 <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/>
69 <param argument="--similarity" type="float" value="0.9" label="Minimum percent similarity (expressed as a fraction between 0 and 1) to consider a database match a hit"/> 99 <param argument="--similarity" type="float" value="0.9" label="Minimum percent similarity (expressed as a fraction between 0 and 1) to consider a database match a hit"/>
70 <param argument="uclust_max_accepts" type="integer" value="3" label="Number of database hits to consider when making an assignment"/> 100 <param argument="uclust_max_accepts" type="integer" value="3" label="Number of database hits to consider when making an assignment"/>
71 </when> 101 </when>
72 <when value="rdp"> 102 <when value="rdp">
73 <param argument="--reference_seqs_fp" label="Reference sequences used as training sequences for the classifier" type="data" format="fasta" optional="True"/> 103 <expand macro="assign_taxonomy_reference_source"/>
74 <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/> 104 <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/>
75 </when> 105 </when>
76 <when value="blast"> 106 <when value="blast">
77 <param argument="--reference_seqs_fp" label="Reference sequences used to generate a blast database" type="data" format="fasta" optional="True"/> 107 <expand macro="assign_taxonomy_reference_source"/>
78 <param argument="--blast_e_value" type="float" value="0.001" label="Maximum e-value to record an assignment"/> 108 <param argument="--blast_e_value" type="float" value="0.001" label="Maximum e-value to record an assignment"/>
79 </when> 109 </when>
80 <when value="rtax"> 110 <when value="rtax">
81 <param argument="--read_1_seqs_fp" type="data" format="fasta" label="First reads from paired-end sequencing, prior to OTU clustering" help="This file is the result of split_illumina_fastq"/> 111 <param argument="--read_1_seqs_fp" type="data" format="fasta" label="First reads from paired-end sequencing, prior to OTU clustering" help="This file is the result of split_illumina_fastq"/>
82 <param argument="--read_2_seqs_fp" type="data" format="fasta" label="Second reads from paired-end sequencing, prior to OTU clustering" help="This file is the result of split_illumina_fastq"/> 112 <param argument="--read_2_seqs_fp" type="data" format="fasta" label="Second reads from paired-end sequencing, prior to OTU clustering" help="This file is the result of split_illumina_fastq"/>
85 <param argument="--read_id_regex" type="text" value="\S+\s+(\S+)" label="Regex used to parse the result of OTU clustering, to get the read_1_id for each clusterID" help="The clusterID itself is assumed to be the first field, and is not captured by the regex"/> 115 <param argument="--read_id_regex" type="text" value="\S+\s+(\S+)" label="Regex used to parse the result of OTU clustering, to get the read_1_id for each clusterID" help="The clusterID itself is assumed to be the first field, and is not captured by the regex"/>
86 <param argument="--amplicon_id_regex" type="text" value="(\S+)\s+(\S+?)\/" label="Regex used to parse the result of split_libraries, to get the ampliconID for each read_1_id" help="Two groups capture read_1_id and ampliconID, respectively."/> 116 <param argument="--amplicon_id_regex" type="text" value="(\S+)\s+(\S+?)\/" label="Regex used to parse the result of split_libraries, to get the ampliconID for each read_1_id" help="Two groups capture read_1_id and ampliconID, respectively."/>
87 <param argument="--header_id_regex" type="text" value="\S+\s+(\S+?)\/" label="Regex used to parse the result of split_libraries, to get the portion of the header that RTAX uses to match mate pairs" help="The default uses the amplicon ID, not including /1 or /3, as the primary key for the query sequences. Typically this regex will be the same as amplicon_id_regex, except that only the second group is captured" /> 117 <param argument="--header_id_regex" type="text" value="\S+\s+(\S+?)\/" label="Regex used to parse the result of split_libraries, to get the portion of the header that RTAX uses to match mate pairs" help="The default uses the amplicon ID, not including /1 or /3, as the primary key for the query sequences. Typically this regex will be the same as amplicon_id_regex, except that only the second group is captured" />
88 </when> 118 </when>
89 <when value="mothur"> 119 <when value="mothur">
120 <expand macro="assign_taxonomy_reference_source"/>
90 <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/> 121 <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/>
91 </when> 122 </when>
92 <when value="sortmerna"> 123 <when value="sortmerna">
93 <param argument="--sortmerna_db" type="data" format="fasta" label="Pre-existing database to search against" optional="True"/> 124 <!--<param argument="- -sortmerna_db" type="data" format="fasta" label="Pre-existing database to search against" optional="True"/>-->
94 <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/> 125 <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/>
95 <param argument="--similarity" type="float" value="0.9" label="Minimum percent similarity (expressed as a fraction between 0 and 1) to consider a database match a hit"/> 126 <param argument="--similarity" type="float" value="0.9" label="Minimum percent similarity (expressed as a fraction between 0 and 1) to consider a database match a hit"/>
96 <param argument="--sortmerna_e_value" type="float" value="1.0" label="Maximum E-value when clustering"/> 127 <param argument="--sortmerna_e_value" type="float" value="1.0" label="Maximum E-value when clustering"/>
97 <param argument="--sortmerna_coverage" type="float" value="0.9" label="Mininum percent query coverage (of an alignment) to consider a hit, expressed as a fraction between 0 and 1"/> 128 <param argument="--sortmerna_coverage" type="float" value="0.9" label="Mininum percent query coverage (of an alignment) to consider a hit, expressed as a fraction between 0 and 1"/>
98 <param argument="--sortmerna_best_N_alignments" type="integer" value="5" label="Number best alignments per read to be written"/> 129 <param argument="--sortmerna_best_N_alignments" type="integer" value="5" label="Number best alignments per read to be written"/>
99 </when> 130 </when>
100 </conditional> 131 </conditional>
101 </inputs> 132 </inputs>
102 <outputs> 133 <outputs>
103 <data name="log" format="txt" from_work_dir="assign_taxonomy/*_assignments.log" label="${tool.name} on ${on_string}: Log"/> 134 <data name="log" format="txt" from_work_dir="assign_taxonomy/*_assignments.log" label="${tool.name} on ${on_string}: Log">
135 <filter>methodcond['assignment_method']!="mothur"</filter>
136 </data>
104 <data name="tax_assignments" format="txt" from_work_dir="assign_taxonomy/*.txt" label="${tool.name} on ${on_string}: Taxonomic assignment"/> 137 <data name="tax_assignments" format="txt" from_work_dir="assign_taxonomy/*.txt" label="${tool.name} on ${on_string}: Taxonomic assignment"/>
105 <data name="sortmerna_map" format="tabular" from_work_dir="assign_taxonomy/sortmerna_map.blast" label="${tool.name} on ${on_string}: SortMeRNA Blast"> 138 <data name="sortmerna_map" format="tabular" from_work_dir="assign_taxonomy/sortmerna_map.blast" label="${tool.name} on ${on_string}: SortMeRNA Blast">
106 <filter>methodcond['assignment_method']=="sortmerna"</filter> 139 <filter>methodcond['assignment_method']=="sortmerna"</filter>
107 </data> 140 </data>
108 </outputs> 141 </outputs>
109 <tests> 142 <tests>
143 <!-- Uclust assignment method -->
110 <test> 144 <test>
111 <param name="input_fasta_fp" value="assign_taxonomy/uclust_input_seqs.fasta"/> 145 <param name="input_fasta_fp" value="assign_taxonomy/uclust_input_seqs.fasta"/>
112 <param name="assignment_method" value="uclust"/> 146 <conditional name="id_to_taxonomy_condition">
113 <param name="min_consensus_fraction" value="0.51"/> 147 <param name="source_selector" value="void" />
114 <param name="similarity" value="0.9"/> 148 </conditional>
115 <param name="uclust_max_accepts" value="3" /> 149 <conditional name="methodcond">
116 <output name="tax_assignments" value="assign_taxonomy/uclust_taxonomic_assignation.txt"/> 150 <param name="assignment_method" value="uclust"/>
151 <param name="min_consensus_fraction" value="0.51"/>
152 <param name="similarity" value="0.9"/>
153 <param name="uclust_max_accepts" value="3" />
154 </conditional>
155 <output name="tax_assignments" md5="57b0cf51fc0142f369134ea923d78d99"/>
117 <output name="log"> 156 <output name="log">
118 <assert_contents> 157 <assert_contents>
119 <has_text text="UclustConsensusTaxonAssigner" /> 158 <has_text text="UclustConsensusTaxonAssigner" />
120 <has_text text="2751331" /> 159 <has_text text="2751331" />
121 </assert_contents> 160 </assert_contents>
122 </output> 161 </output>
123 </test> 162 </test>
163 <!-- Mothur assignment method -->
164 <!-- Note: there is variability in the assignment results with this method so the md5 checksum comparison is not possible -->
165 <test>
166 <param name="input_fasta_fp" value="assign_taxonomy/mothur_repr_set_seqs.fasta"/>
167 <conditional name="id_to_taxonomy_condition">
168 <param name="source_selector" value="history" />
169 <param name="id_to_taxonomy_fp" value="assign_taxonomy/mothur_id_to_taxonomy.txt"/>
170 </conditional>
171 <conditional name="methodcond">
172 <param name="assignment_method" value="mothur"/>
173 <conditional name="references">
174 <param name="source_selector" value="history" />
175 <param name="reference_seqs_fp" value="assign_taxonomy/mothur_ref_seq_set.fna" />
176 </conditional>
177 <param name="confidence" value="0.5"/>
178 </conditional>
179 <output name="tax_assignments">
180 <assert_contents>
181 <has_text text="X67228" />
182 <has_text text="Rhizobium" />
183 <has_text text="EF503697" />
184 </assert_contents>
185 </output>
186 </test>
187 <!-- Blast assignment method -->
188 <test>
189 <param name="input_fasta_fp" value="assign_taxonomy/mothur_repr_set_seqs.fasta"/>
190 <conditional name="id_to_taxonomy_condition">
191 <param name="source_selector" value="history" />
192 <param name="id_to_taxonomy_fp" value="assign_taxonomy/mothur_id_to_taxonomy.txt"/>
193 </conditional>
194 <conditional name="methodcond">
195 <param name="assignment_method" value="blast"/>
196 <conditional name="references">
197 <param name="source_selector" value="history" />
198 <param name="reference_seqs_fp" value="assign_taxonomy/mothur_ref_seq_set.fna" />
199 </conditional>
200 <param name="blast_e_value" value="0.001"/>
201 </conditional>
202 <output name="tax_assignments" md5="5ab8d28f67bcbf828937d222b2ab9c6e"/>
203 <output name="log">
204 <assert_contents>
205 <has_text text="BlastTaxonAssigner" />
206 <has_text text="inspected: 2" />
207 </assert_contents>
208 </output>
209 </test>
210 <!-- SortMeRNA assignment method -->
211 <!-- Note: The input file has been reduced to only 1 sequence but this test is still quite long to execute (more than 10min) -->
124 <!--<test> 212 <!--<test>
125 <param name="input_fasta_fp" value="assign_taxonomy/mothur_ref_seq_set.fna"/> 213 <param name="input_fasta_fp" value="assign_taxonomy/sortmerna_input_seqs.fasta"/>
126 <param name="assignment_method" value="sortmerna"/> 214 <conditional name="methodcond">
127 <param name="min_consensus_fraction" value="0.51" /> 215 <param name="assignment_method" value="sortmerna"/>
128 <param name="similarity" value="0.9" /> 216 <param name="min_consensus_fraction" value="0.51" />
129 <param name="sortmerna_e_value" value="1.0" /> 217 <param name="similarity" value="0.9" />
130 <param name="sortmerna_coverage" value="0.9" /> 218 <param name="sortmerna_e_value" value="1.0" />
131 <param name="sortmerna_best_N_alignments" value="5" /> 219 <param name="sortmerna_coverage" value="0.9" />
220 <param name="sortmerna_best_N_alignments" value="5" />
221 </conditional>
132 <output name="log"> 222 <output name="log">
133 <assert_contents> 223 <assert_contents>
134 <has_text text="Application:SortMeRNA" /> 224 <has_text text="Application:SortMeRNA" />
135 <has_text text="min_consensus_fraction" /> 225 <has_text text="min_consensus_fraction" />
136 </assert_contents> 226 </assert_contents>
137 </output> 227 </output>
138 <output name="tax_assignments" value="assign_taxonomy/sortmerna_taxonomic_assignation.txt"/> 228 <output name="tax_assignments" md5="0da68ab9762b677a00f34051eadad68c"/>
139 <output name="sortmerna_map" value="assign_taxonomy/sortmerna_map.blast"/> 229 <output name="sortmerna_map" md5="16e349be29f121fca741d6294f79ce7c"/>
140 </test>--> 230 </test>-->
141 </tests> 231 </tests>
142 <help><![CDATA[ 232 <help><![CDATA[
143 **What it does** 233 **What it does**
144 234