Mercurial > repos > devteam > cd_hit_dup
annotate cd_hit_dup.xml @ 1:0fb894bd8eba draft default tip
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
author | devteam |
---|---|
date | Tue, 21 Jul 2015 14:14:43 -0400 |
parents | 2e150ed1b76e |
children |
rev | line source |
---|---|
0 | 1 <tool id="cd_hit_dup" name="cd-hit-dup" version="0.0.1"> |
1
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
2 <description> |
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
3 remove duplicates and detect chimaeras in sequencing reads |
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
4 </description> |
0 | 5 <requirements> |
6 <requirement type="package" version="0.5-2012-03-07-fix-dan-gh-0.0.1">cd-hit-auxtools</requirement> | |
7 </requirements> | |
8 <stdio> | |
9 <exit_code range="1:" /> | |
10 <exit_code range=":-1" /> | |
11 </stdio> | |
12 | |
13 <command><![CDATA[ | |
14 cd-hit-dup | |
15 -i "${ fastq_input.fastq_input1 }" | |
16 #if str( $fastq_input.fastq_input_selector ) == "paired": | |
17 -i2 "${ fastq_input.fastq_input2 }" | |
18 #elif str( $fastq_input.filter_chimeras.filter_chimeras_selector ) == "true": | |
19 -f "true" | |
20 -s "${ fastq_input.filter_chimeras.min_chimeric_length }" | |
21 -a "${ fastq_input.filter_chimeras.abundance_cutoff }" | |
22 -b "${ fastq_input.filter_chimeras.abundance_ratio }" | |
23 -p "${ fastq_input.filter_chimeras.dissimilarity_control }" | |
24 #end if | |
25 -u "${ prefix_length }" | |
26 -m "${ match_length }" | |
27 #if str( $mismatches_allowed ) != "": | |
28 #if float( str( $mismatches_allowed ) ) == int( float( str( $mismatches_allowed ) ) ): | |
29 -e "${ int( float( str( $mismatches_allowed ) ) ) }" | |
30 #else: | |
31 -e "${ mismatches_allowed }" | |
32 #end if | |
33 #end if | |
34 -d "${ description_length }" | |
35 -o "output" | |
36 ]]> | |
37 </command> | |
38 <inputs> | |
39 <conditional name="fastq_input"> | |
40 <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="For joined Paired-end reads choose Single."> | |
41 <option value="paired">Paired</option> | |
42 <option value="single" selected="True">Single</option> | |
43 </param> | |
44 <when value="paired"> | |
45 <param name="fastq_input1" type="data" format="fastqsanger,fasta" label="Select first set of reads" help="Specify dataset with forward reads"/> | |
46 <param name="fastq_input2" type="data" format="fastqsanger,fasta" label="Select second set of reads" help="Specify dataset with reverse reads"/> | |
47 </when> | |
48 <when value="single"> | |
1
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
49 <param name="fastq_input1" type="data" format="fastqsanger,fasta" label="Select reads" help="Specify dataset with single reads"/> |
0 | 50 <conditional name="filter_chimeras"> |
51 <param name="filter_chimeras_selector" type="select" label="Filter out chimeric clusters"> | |
52 <option value="true">Yes</option> | |
53 <option value="false" selected="True">No</option> | |
54 </param> | |
55 <when value="true"> | |
56 <param name="min_chimeric_length" type="integer" value="30" min="20" label="Minimum length of common sequence shared between a chimeric read and each of its parents" help="-s"/> | |
57 <param name="abundance_cutoff" type="integer" value="1" min="1" label="Abundance cutoff" help="-a; Tool Author recommend default of 2, but this would require the chimera itself to need 2 copies"/> | |
58 <param name="abundance_ratio" type="integer" value="1" min="1" label="Abundance ratio between a parent read and a chimeric read" help="-b"/> | |
59 <param name="dissimilarity_control" type="integer" value="1" min="1" label="Dissimilarity control for chimeric filtering" help="-p"/> | |
60 </when> | |
61 <when value="false"> | |
62 <!-- do nothing here --> | |
63 </when> | |
64 </conditional> | |
65 </when> | |
66 </conditional> | |
67 <param name="prefix_length" type="integer" value="0" min="0" label="Length of prefix to be used in the analysis" help="-u"/> | |
68 <param name="match_length" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Match length" help="-m; specifies whether the lengths of two reads should be exactly the same to be considered as duplicates. "/> | |
69 <param name="mismatches_allowed" type="float" optional="True" value="" min="0" label="Maximum number/percent of mismatches allowed" help="-e"/> | |
70 <param name="description_length" type="integer" value="0" min="0" label="Description length" help="-d; 0 means truncate at the first whitespace character"/> | |
71 </inputs> | |
72 <outputs> | |
73 <data format="fastqsanger" format_source="fastq_input1" name="output_reads" label="${tool.name} on ${on_string} (filtered reads)" from_work_dir="output"/> | |
74 <data format="tabular" name="output_duplicate_clusters" label="${tool.name} on ${on_string} (duplicate clusters)" from_work_dir="output.clstr"/> | |
75 <data format="tabular" name="output_chimeric_clusters" label="${tool.name} on ${on_string} (chimeric clusters)" from_work_dir="output2.clstr"> | |
76 <filter>str( fastq_input['filter_chimeras']['filter_chimeras_selector'] ) == "true"</filter> | |
77 </data> | |
78 </outputs> | |
79 <tests> | |
80 <test> | |
81 <param name="fastq_input|fastq_input_selector" value="single" /> | |
82 <param name="fastq_input|fastq_input1" ftype="fastqsanger" value="cd-hit-dup_in.fastqsanger"/> | |
83 <output name="output_reads" ftype="fastqsanger" file="cd-hit-dup_out.fastqsanger" /> | |
84 <output name="output_duplicate_clusters" ftype="tabular" file="cd-hit-dup_out.dup_clusters.tabular" /> | |
85 </test> | |
86 <test> | |
87 <param name="fastq_input|fastq_input_selector" value="single" /> | |
88 <param name="fastq_input|fastq_input1" ftype="fastqsanger" value="cd-hit-dup_in.fastqsanger"/> | |
89 <param name="fastq_input|filter_chimeras|filter_chimeras_selector" value="true"/> | |
90 <output name="output_reads" ftype="fastqsanger" file="cd-hit-dup_out_chimera.fastqsanger" /> | |
91 <output name="output_duplicate_clusters" ftype="tabular" file="cd-hit-dup_out_chimera.dup_clusters.tabular" /> | |
92 <output name="output_chimeric_clusters" ftype="tabular" file="cd-hit-dup_out_chimera.chimeric_clusters.tabular" /> | |
93 </test> | |
94 </tests> | |
95 <help> | |
96 <![CDATA[ | |
1
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
97 **What it does** |
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
98 |
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
99 cd-hit-dup is a simple tool for removing duplicates from sequencing reads, with optional step to detect and remove chimeric reads. |
0 | 100 |
1
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
101 **Options** |
0 | 102 |
1
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
103 cd-hit-dup provides a number of options to tune how the duplicates are removed:: |
0 | 104 |
1
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
105 -d Description length (default 0, truncate at the first whitespace character) |
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
106 -u Length of prefix to be used in the analysis (default 0, for full/maximum length) |
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
107 -m Match length (true/false, default true) |
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
108 -e Maximum number/percent of mismatches allowed |
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
109 -f Filter out chimeric clusters (true/false, default false) |
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
110 -s Minimum length of common sequence shared between a chimeric read and each of |
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
111 its parents (default 30, minimum 20) |
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
112 -a Abundance cutoff (default 1 without chimeric filtering, 2 with chimeric filtering) |
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
113 -b Abundance ratio between a parent read and a chimeric read (default 1) |
0fb894bd8eba
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
0
diff
changeset
|
114 -p Dissimilarity control for chimeric filtering (default 1) |
0 | 115 |
116 | |
117 ]]> | |
118 </help> | |
119 <citations> | |
120 <citation type="doi">10.1093/bioinformatics/bts565</citation> | |
121 </citations> | |
122 </tool> |