annotate cd_hit_dup.xml @ 0:2e150ed1b76e draft

Uploaded
author devteam
date Wed, 29 Apr 2015 12:06:47 -0400
parents
children 0fb894bd8eba
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
1 <tool id="cd_hit_dup" name="cd-hit-dup" version="0.0.1">
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
2 <requirements>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
3 <requirement type="package" version="0.5-2012-03-07-fix-dan-gh-0.0.1">cd-hit-auxtools</requirement>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
4 </requirements>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
5 <stdio>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
6 <exit_code range="1:" />
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
7 <exit_code range=":-1" />
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
8 </stdio>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
9
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
10 <command><![CDATA[
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
11 cd-hit-dup
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
12 -i "${ fastq_input.fastq_input1 }"
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
13 #if str( $fastq_input.fastq_input_selector ) == "paired":
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
14 -i2 "${ fastq_input.fastq_input2 }"
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
15 #elif str( $fastq_input.filter_chimeras.filter_chimeras_selector ) == "true":
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
16 -f "true"
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
17 -s "${ fastq_input.filter_chimeras.min_chimeric_length }"
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
18 -a "${ fastq_input.filter_chimeras.abundance_cutoff }"
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
19 -b "${ fastq_input.filter_chimeras.abundance_ratio }"
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
20 -p "${ fastq_input.filter_chimeras.dissimilarity_control }"
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
21 #end if
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
22 -u "${ prefix_length }"
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
23 -m "${ match_length }"
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
24 #if str( $mismatches_allowed ) != "":
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
25 #if float( str( $mismatches_allowed ) ) == int( float( str( $mismatches_allowed ) ) ):
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
26 -e "${ int( float( str( $mismatches_allowed ) ) ) }"
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
27 #else:
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
28 -e "${ mismatches_allowed }"
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
29 #end if
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
30 #end if
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
31 -d "${ description_length }"
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
32 -o "output"
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
33 ]]>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
34 </command>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
35 <inputs>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
36 <conditional name="fastq_input">
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
37 <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="For joined Paired-end reads choose Single.">
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
38 <option value="paired">Paired</option>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
39 <option value="single" selected="True">Single</option>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
40 </param>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
41 <when value="paired">
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
42 <param name="fastq_input1" type="data" format="fastqsanger,fasta" label="Select first set of reads" help="Specify dataset with forward reads"/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
43 <param name="fastq_input2" type="data" format="fastqsanger,fasta" label="Select second set of reads" help="Specify dataset with reverse reads"/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
44 </when>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
45 <when value="single">
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
46 <param name="fastq_input1" type="data" format="fastqsanger,fasta" label="Select read dataset" help="Specify dataset with single reads"/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
47 <conditional name="filter_chimeras">
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
48 <param name="filter_chimeras_selector" type="select" label="Filter out chimeric clusters">
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
49 <option value="true">Yes</option>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
50 <option value="false" selected="True">No</option>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
51 </param>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
52 <when value="true">
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
53 <param name="min_chimeric_length" type="integer" value="30" min="20" label="Minimum length of common sequence shared between a chimeric read and each of its parents" help="-s"/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
54 <param name="abundance_cutoff" type="integer" value="1" min="1" label="Abundance cutoff" help="-a; Tool Author recommend default of 2, but this would require the chimera itself to need 2 copies"/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
55 <param name="abundance_ratio" type="integer" value="1" min="1" label="Abundance ratio between a parent read and a chimeric read" help="-b"/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
56 <param name="dissimilarity_control" type="integer" value="1" min="1" label="Dissimilarity control for chimeric filtering" help="-p"/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
57 </when>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
58 <when value="false">
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
59 <!-- do nothing here -->
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
60 </when>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
61 </conditional>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
62 </when>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
63 </conditional>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
64 <param name="prefix_length" type="integer" value="0" min="0" label="Length of prefix to be used in the analysis" help="-u"/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
65 <param name="match_length" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Match length" help="-m; specifies whether the lengths of two reads should be exactly the same to be considered as duplicates. "/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
66 <param name="mismatches_allowed" type="float" optional="True" value="" min="0" label="Maximum number/percent of mismatches allowed" help="-e"/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
67 <param name="description_length" type="integer" value="0" min="0" label="Description length" help="-d; 0 means truncate at the first whitespace character"/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
68 </inputs>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
69 <outputs>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
70 <data format="fastqsanger" format_source="fastq_input1" name="output_reads" label="${tool.name} on ${on_string} (filtered reads)" from_work_dir="output"/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
71 <data format="tabular" name="output_duplicate_clusters" label="${tool.name} on ${on_string} (duplicate clusters)" from_work_dir="output.clstr"/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
72 <data format="tabular" name="output_chimeric_clusters" label="${tool.name} on ${on_string} (chimeric clusters)" from_work_dir="output2.clstr">
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
73 <filter>str( fastq_input['filter_chimeras']['filter_chimeras_selector'] ) == "true"</filter>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
74 </data>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
75 </outputs>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
76 <tests>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
77 <test>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
78 <param name="fastq_input|fastq_input_selector" value="single" />
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
79 <param name="fastq_input|fastq_input1" ftype="fastqsanger" value="cd-hit-dup_in.fastqsanger"/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
80 <output name="output_reads" ftype="fastqsanger" file="cd-hit-dup_out.fastqsanger" />
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
81 <output name="output_duplicate_clusters" ftype="tabular" file="cd-hit-dup_out.dup_clusters.tabular" />
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
82 </test>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
83 <test>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
84 <param name="fastq_input|fastq_input_selector" value="single" />
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
85 <param name="fastq_input|fastq_input1" ftype="fastqsanger" value="cd-hit-dup_in.fastqsanger"/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
86 <param name="fastq_input|filter_chimeras|filter_chimeras_selector" value="true"/>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
87 <output name="output_reads" ftype="fastqsanger" file="cd-hit-dup_out_chimera.fastqsanger" />
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
88 <output name="output_duplicate_clusters" ftype="tabular" file="cd-hit-dup_out_chimera.dup_clusters.tabular" />
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
89 <output name="output_chimeric_clusters" ftype="tabular" file="cd-hit-dup_out_chimera.chimeric_clusters.tabular" />
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
90 </test>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
91 </tests>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
92 <help>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
93 <![CDATA[
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
94 **What it does**
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
95
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
96 cd-hit-dup is a simple tool for removing duplicates from sequencing reads, with optional step to detect and remove chimeric reads. A number of options are provided to tune how the duplicates are removed.
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
97
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
98
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
99 **Options**
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
100
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
101 +--------+-------------------------------------------------------------------------------------------------------------------+
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
102 | Option | Description |
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
103 +========+===================================================================================================================+
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
104 |-i | Input file |
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
105 +--------+-------------------------------------------------------------------------------------------------------------------+
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
106 |-i2 | Second input file |
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
107 +--------+-------------------------------------------------------------------------------------------------------------------+
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
108 |-o | Output file |
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
109 +--------+-------------------------------------------------------------------------------------------------------------------+
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
110 |-d | Description length (default 0, truncate at the first whitespace character) |
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
111 +--------+-------------------------------------------------------------------------------------------------------------------+
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
112 |-u | Length of prefix to be used in the analysis (default 0, for full/maximum length) |
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
113 +--------+-------------------------------------------------------------------------------------------------------------------+
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
114 |-m | Match length (true/false, default true) |
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
115 +--------+-------------------------------------------------------------------------------------------------------------------+
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
116 |-e | Maximum number/percent of mismatches allowed |
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
117 +--------+-------------------------------------------------------------------------------------------------------------------+
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
118 |-f | Filter out chimeric clusters (true/false, default false) |
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
119 +--------+-------------------------------------------------------------------------------------------------------------------+
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
120 |-s | Minimum length of common sequence shared between a chimeric read and each of its parents (default 30, minimum 20) |
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
121 +--------+-------------------------------------------------------------------------------------------------------------------+
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
122 |-a | Abundance cutoff (default 1 without chimeric filtering, 2 with chimeric filtering) |
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
123 +--------+-------------------------------------------------------------------------------------------------------------------+
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
124 |-b | Abundance ratio between a parent read and a chimeric read (default 1) |
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
125 +--------+-------------------------------------------------------------------------------------------------------------------+
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
126 |-p | Dissimilarity control for chimeric filtering (default 1) |
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
127 +--------+-------------------------------------------------------------------------------------------------------------------+
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
128
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
129
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
130 ]]>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
131 </help>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
132 <citations>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
133 <citation type="doi">10.1093/bioinformatics/bts565</citation>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
134 </citations>
2e150ed1b76e Uploaded
devteam
parents:
diff changeset
135 </tool>