comparison cd_hit.xml @ 1:7807800a3d03 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cdhit commit 86f060dc1ab97b56dc8502ad30d1b01343554329"
author iuc
date Fri, 05 Nov 2021 08:23:26 +0000
parents e0da3400ac2f
children
comparison
equal deleted inserted replaced
0:e0da3400ac2f 1:7807800a3d03
1 <tool id="cd_hit" name="cd-hit" version="4.6.8.1"> 1 <tool id="cd_hit" name="cd-hit" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
2 <description>Cluster or compare biological sequence datasets</description> 2 <description>Cluster or compare biological sequence datasets</description>
3 <requirements> 3 <macros>
4 <requirement type="package" version="4.6.8">cd-hit</requirement> 4 <import>macros.xml</import>
5 </requirements> 5 </macros>
6 <expand macro="requirements" />
7 <expand macro="xrefs"/>
6 <version_command><![CDATA[ 8 <version_command><![CDATA[
7 cd-hit | grep "CD-HIT version" | cut -d" " -f 4 9 cd-hit | grep "CD-HIT version" | cut -d" " -f 4
8 ]]></version_command> 10 ]]></version_command>
9 <command detect_errors="exit_code"><![CDATA[ 11 <command detect_errors="exit_code"><![CDATA[
10 cd-hit$est.est_select$twod.twod_select 12 cd-hit$est.est_select$twod.twod_select
47 -uL $advanced.max_unmatched_per_l 49 -uL $advanced.max_unmatched_per_l
48 -uS $advanced.max_unmatched_per_s 50 -uS $advanced.max_unmatched_per_s
49 -U $advanced.max_unmatched_len 51 -U $advanced.max_unmatched_len
50 $advanced.accurate 52 $advanced.accurate
51 $advanced.inram 53 $advanced.inram
54 $advanced.sort_cluster
55 $advanced.sort_fasta
52 #if $print_alnovl.print_alnovl_select == "yes": 56 #if $print_alnovl.print_alnovl_select == "yes":
53 -p 1 57 -p 1
54 -d $print_alnovl.desclen 58 -d $print_alnovl.desclen
55 #end if 59 #end if
56 60
135 <param name="max_unmatched_per_l" argument="-uL" type="float" min="0.0" max="1.0" value="1.0" label="Maximum unmatched percentage for the shorter sequence" help="If set to 0.1, the unmatched region (excluding leading and tailing gaps) must not be more than 10% of the sequence"/> 139 <param name="max_unmatched_per_l" argument="-uL" type="float" min="0.0" max="1.0" value="1.0" label="Maximum unmatched percentage for the shorter sequence" help="If set to 0.1, the unmatched region (excluding leading and tailing gaps) must not be more than 10% of the sequence"/>
136 <param name="max_unmatched_per_s" argument="-uS" type="float" min="0.0" max="1.0" value="1.0" label="Maximum unmatched percentage for the shorter sequence" help="If set to 0.1, the unmatched region (excluding leading and tailing gaps) must not be more than 10% of the sequence"/> 140 <param name="max_unmatched_per_s" argument="-uS" type="float" min="0.0" max="1.0" value="1.0" label="Maximum unmatched percentage for the shorter sequence" help="If set to 0.1, the unmatched region (excluding leading and tailing gaps) must not be more than 10% of the sequence"/>
137 <param name="max_unmatched_len" argument="-U" type="integer" min="0" value="99999999" label="Maximum unmatched length" help="If set to 10, the unmatched region (excluding leading and tailing gaps) must not be more than 10 bases"/> 141 <param name="max_unmatched_len" argument="-U" type="integer" min="0" value="99999999" label="Maximum unmatched length" help="If set to 10, the unmatched region (excluding leading and tailing gaps) must not be more than 10 bases"/>
138 <param name="inram" argument="-B" type="boolean" truevalue="-B 0" falsevalue="-B 1" checked="true" label="Sequences are stored in RAM" help="If false: sequence are stored on hard drive - use for huge data sets"/> 142 <param name="inram" argument="-B" type="boolean" truevalue="-B 0" falsevalue="-B 1" checked="true" label="Sequences are stored in RAM" help="If false: sequence are stored on hard drive - use for huge data sets"/>
139 <param name="accurate" argument="-g" type="boolean" truevalue="-g 1" falsevalue="-g 0" checked="false" label="Accurate but slow mode" help="By cd-hit's default algorithm, a sequence is clustered to the first cluster that meet the threshold (fast cluster). If set to true, the program will cluster it into the most similar cluster that meet the threshold (accurate but slow mode)"/> 143 <param name="accurate" argument="-g" type="boolean" truevalue="-g 1" falsevalue="-g 0" checked="false" label="Accurate but slow mode" help="By cd-hit's default algorithm, a sequence is clustered to the first cluster that meet the threshold (fast cluster). If set to true, the program will cluster it into the most similar cluster that meet the threshold (accurate but slow mode)"/>
144 <param name="sort_cluster" argument="-sc" type="boolean" truevalue="-sc 1" falsevalue="-sc 0" label="Sort clusters by size" help="When disabled, clusters are sorted by decreasing length; if enabled, clusters are sorted by decreasing size" />
145 <param name="sort_fasta" argument="-sf" type="boolean" truevalue="-sf 1" falsevalue="-sf 0" label="Sort FASTA/FASTQ by cluster size" help="When enabled, output sequences are sorted by decreasing cluster size" />
140 </section> 146 </section>
141 147
142 <conditional name="print_alnovl"> 148 <conditional name="print_alnovl">
143 <param name="print_alnovl_select" type="select" label="Print alignment overlap in .clstr file?"> 149 <param name="print_alnovl_select" type="select" label="Print alignment overlap in .clstr file?">
144 <option value="no" selected="true">No</option> 150 <option value="no" selected="true">No</option>
223 <param name="strand" value="true"/> 229 <param name="strand" value="true"/>
224 </conditional> 230 </conditional>
225 <param name="similarity" value="0.9"/> 231 <param name="similarity" value="0.9"/>
226 <output name="clusters_out" file="est-2d.txt.clstr"/> 232 <output name="clusters_out" file="est-2d.txt.clstr"/>
227 <output name="fasta_out" file="est-2d.txt"/> 233 <output name="fasta_out" file="est-2d.txt"/>
234 </test>
235 <!-- Test sort cluster parameter -->
236 <test>
237 <param name="fasta_in" value="cd_hit_est_in.fa" />
238 <conditional name="twod">
239 <param name="twod_select" value="" />
240 </conditional>
241 <conditional name="est">
242 <param name="est_select" value="-est" />
243 <param name="wordsize" value="8"/>
244 <param name="strand" value="false"/>
245 </conditional>
246 <param name="similarity" value="0.9"/>
247 <section name="advanced">
248 <param name="sort_cluster" value="true"/>
249 </section>
250 <output name="clusters_out" file="est_clusters_sorted.txt"/>
251 <output name="fasta_out" file="est_fasta_output.fasta"/>
252 </test>
253 <!-- Test sort fasta parameter -->
254 <test>
255 <param name="fasta_in" value="cd_hit_est_in.fa" />
256 <conditional name="twod">
257 <param name="twod_select" value="" />
258 </conditional>
259 <conditional name="est">
260 <param name="est_select" value="-est" />
261 <param name="wordsize" value="8"/>
262 <param name="strand" value="false"/>
263 </conditional>
264 <param name="similarity" value="0.9"/>
265 <section name="advanced">
266 <param name="sort_fasta" value="true"/>
267 </section>
268 <output name="clusters_out" file="est_clusters_output.txt"/>
269 <output name="fasta_out" file="est_fasta_sorted.fasta"/>
228 </test> 270 </test>
229 </tests> 271 </tests>
230 <help><![CDATA[ 272 <help><![CDATA[
231 **What it does** 273 **What it does**
232 274
274 316
275 1. The first output is a FASTA file of sequences in db2 that are not similar to db1. 317 1. The first output is a FASTA file of sequences in db2 that are not similar to db1.
276 318
277 2. The second output is a text file that lists similar sequences between db1 & db2 319 2. The second output is a text file that lists similar sequences between db1 & db2
278 ]]></help> 320 ]]></help>
279 <citations> 321 <expand macro="citations" />
280 <citation type="doi">10.1093/bioinformatics/btl158</citation>
281 <citation type="doi">10.1093/bioinformatics/bts565</citation>
282 </citations>
283 </tool> 322 </tool>