annotate cd_hit_est.xml @ 1:34a799d173f7

Add tool_dependencies and functional test
author Jim Johnson <jj@umn.edu>
date Fri, 07 Sep 2012 13:52:03 -0500
parents 23f5701549b1
children cca0838c1597
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
1 <tool id="cd_hit_est" name="CD-HIT-EST" version="1.1">
0
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
2 <description>Cluster a nucleotide dataset into representative sequences</description>
1
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
3 <requirements>
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
4 <requirement type="package" version="4.6.1">cd-hit</requirement>
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
5 </requirements>
0
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
6 <command>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
7 cd-hit-est -i $fasta_in -o rep_seq -c $similarity -n $wordsize $strand
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
8 </command>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
9 <inputs>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
10 <param name="fasta_in" type="data" format="fasta" label="EST Sequences to cluster"/>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
11 <param name="similarity" type="float" value="0.9" label="similarity threshold: .75 - 1.0, default is .9">
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
12 <validator type="in_range" message="sequence similarity threshold should be .75 - 1.0" min=".75" max="1.0"/>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
13 </param>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
14 <param name="wordsize" type="integer" value="8" label="word size">
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
15 <help> Suggested word size:
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
16 8,9,10 for thresholds 0.90 ~ 1.0
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
17 7 for thresholds 0.88 ~ 0.9
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
18 6 for thresholds 0.85 ~ 0.88
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
19 5 for thresholds 0.80 ~ 0.85
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
20 4 for thresholds 0.75 ~ 0.8
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
21 </help>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
22 <validator type="in_range" message="word size should be between 4 and 10" min="4" max="10"/>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
23 </param>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
24 <param name="strand" type="boolean" truevalue="-r 1" falsevalue="" checked="false" label="Compare both strands"/>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
25 </inputs>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
26 <outputs>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
27 <data format="txt" name="clusters_out" label="${tool.name} on ${on_string}: clusters" from_work_dir="rep_seq.clstr"/>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
28 <data format="fasta" name="fasta_out" label="${tool.name} on ${on_string}: representatives.fasta" from_work_dir="rep_seq"/>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
29 </outputs>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
30 <tests>
1
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
31 <test>
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
32 <param name="fasta_in" value="cd_hit_est_in.fa" />
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
33 <param name="similarity" value="0.9"/>
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
34 <param name="wordsize" value="8"/>
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
35 <output name="clusters_out">
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
36 <assert_contents>
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
37 <has_text text=">Cluster" />
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
38 <has_text_matching expression=">F12Fcsw_481739" />
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
39 </assert_contents>
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
40 </output>
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
41 <output name="fasta_out">
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
42 <assert_contents>
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
43 <has_text_matching expression="^>[MF]\d\dFcsw_\d*" />
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
44 </assert_contents>
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
45 </output>
34a799d173f7 Add tool_dependencies and functional test
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
46 </test>
0
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
47 </tests>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
48 <help>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
49 **CD-HIT-EST**
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
50
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
51 CD-HIT_ stands for Cluster Database at High Identity with Tolerance. The program (cd-hit) takes a fasta format sequence database as input and produces a set of 'non-redundant' (nr) representative sequences as output. In addition cd-hit outputs a cluster file, documenting the sequence 'groupies' for each nr sequence representative. The idea is to reduce the overall size of the database without removing any sequence information by only removing 'redundant' (or highly similar) sequences. This is why the resulting database is called non-redundant (nr). Essentially, cd-hit produces a set of closely related protein families from a given fasta sequence database.
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
52
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
53 .. _CD-HIT: http://www.bioinformatics.org/cd-hit/
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
54
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
55 ------
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
56
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
57 **Inputs**
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
58
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
59 cd-hit-est requires a fasta dataset as input.
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
60
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
61 ------
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
62
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
63 **Outputs**
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
64
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
65 A fasta datasets containing representative sequences.
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
66
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
67 A text file listing the mapping of sequences to the representative sequences::
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
68
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
69 >Cluster 0
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
70 0 2799aa, >PF04998.6|RPOC2_CHLRE/275-3073... *
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
71 >Cluster 1
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
72 0 2214aa, >PF06317.1|Q6Y625_9VIRU/1-2214... at 80%
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
73 1 2215aa, >PF06317.1|O09705_9VIRU/1-2215... at 84%
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
74 2 2217aa, >PF06317.1|Q6Y630_9VIRU/1-2217... *
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
75 3 2216aa, >PF06317.1|Q6GWS6_9VIRU/1-2216... at 84%
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
76 4 527aa, >PF06317.1|Q67E14_9VIRU/6-532... at 63%
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
77 >Cluster 2
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
78 0 2202aa, >PF06317.1|Q6UY61_9VIRU/8-2209... at 60%
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
79 1 2208aa, >PF06317.1|Q6IVU4_JUNIN/1-2208... *
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
80 2 2207aa, >PF06317.1|Q6IVU0_MACHU/1-2207... at 73%
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
81 3 2208aa, >PF06317.1|RRPO_TACV/1-2208... at 69%
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
82
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
83
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
84 </help>
23f5701549b1 Uploaded
jjohnson
parents:
diff changeset
85 </tool>