annotate fasta_to_tabular.xml @ 0:9d189d08f2ad draft

Imported from capsule None
author devteam
date Mon, 19 May 2014 12:34:27 -0400
parents
children 7e801ab2b70e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
1 <tool id="fasta2tab" name="FASTA-to-Tabular" version="1.1.0">
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
2 <description>converter</description>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
3 <command interpreter="python">fasta_to_tabular.py $input $output $keep_first $descr_columns</command>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
4 <inputs>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
5 <param name="input" type="data" format="fasta" label="Convert these sequences"/>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
6 <param name="descr_columns" type="integer" size="2" value="1" label="How many columns to divide title string into?" help="Typically 2 to take the ID (first word) and decription (rest) as two columns, or 1 to give a single column">
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
7 <validator type="in_range" min="1" />
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
8 </param>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
9 <param name="keep_first" type="integer" size="5" value="0" label="How many title characters to keep?" help="Applies only to the first column taken from the title string ('0' = keep the whole thing), useful when your sequence identifiers are all the same length.">
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
10 <validator type="in_range" min="0" />
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
11 </param>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
12 </inputs>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
13 <outputs>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
14 <data name="output" format="tabular"/>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
15 </outputs>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
16 <tests>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
17 <test>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
18 <param name="input" value="454.fasta" />
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
19 <param name="descr_columns" value="1"/>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
20 <param name="keep_first" value="0"/>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
21 <output name="output" file="fasta_to_tabular_out1.tabular" />
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
22 </test>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
23
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
24 <test>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
25 <param name="input" value="4.fasta" />
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
26 <param name="descr_columns" value="1"/>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
27 <param name="keep_first" value="0"/>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
28 <output name="output" file="fasta_to_tabular_out2.tabular" />
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
29 </test>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
30
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
31 <test>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
32 <param name="input" value="454.fasta" />
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
33 <param name="descr_columns" value="1"/>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
34 <param name="keep_first" value="14"/>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
35 <output name="output" file="fasta_to_tabular_out3.tabular" />
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
36 </test>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
37
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
38 <test>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
39 <param name="input" value="454.fasta" />
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
40 <param name="descr_columns" value="2"/>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
41 <param name="keep_first" value="0"/>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
42 <output name="output" file="fasta_to_tabular_out4.tabular" />
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
43 </test>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
44
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
45 <test>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
46 <param name="input" value="454.fasta" />
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
47 <param name="descr_columns" value="5"/>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
48 <param name="keep_first" value="0"/>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
49 <output name="output" file="fasta_to_tabular_out5.tabular" />
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
50 </test>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
51
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
52 <test>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
53 <param name="input" value="454.fasta" />
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
54 <param name="descr_columns" value="5"/>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
55 <param name="keep_first" value="10"/>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
56 <output name="output" file="fasta_to_tabular_out6.tabular" />
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
57 </test>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
58
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
59 </tests>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
60 <help>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
61
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
62 **What it does**
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
63
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
64 This tool converts FASTA formatted sequences to TAB-delimited format.
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
65
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
66 Many tools consider the first word of the FASTA "&gt;" title line to be an identifier, and any remaining text to be a free form description.
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
67 It is therefore useful to split this text into two columns in Galaxy (identifier and any description) by setting **How many columns to divide title string into?** to **2**.
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
68 In some cases the description can be usefully broken up into more columns -- see the examples .
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
69
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
70 The option *How many characters to keep?* allows to select a specified number of letters from the beginning of each FASTA entry.
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
71 With the introduction of the **How many columns to divide title string into?** option this setting is of limited use, but does still allow you to truncate the identifier.
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
72
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
73 -----
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
74
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
75 **Example**
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
76
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
77 Suppose you have the following FASTA formatted sequences from a Roche (454) FLX sequencing run::
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
78
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
79 &gt;EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
80 TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
81 TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
82 &gt;EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
83 AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAA
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
84
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
85 Running this tool with the default settings will produce this (2 column output):
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
86
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
87 ========================================================================== =======================================
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
88 EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
89 EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
90 ========================================================================== =======================================
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
91
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
92 Having the full title line (the FASTA "&gt;" line text) as a column is not always ideal.
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
93
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
94 The **How many characters to keep?** option is useful if your identifiers are all the same length.
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
95 In this example the identifier is 14 characters, so setting **How many characters to keep?** to **14** (and leaving **How many columns to divide title string into?** as the default, **1**) will produce this (2 column output):
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
96
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
97 ============== =======================================
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
98 EYKX4VC02EQLO5 TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
99 EYKX4VC02D4GS2 AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
100 ============== =======================================
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
101
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
102 If however your FASTA file has identifiers of variable length, it is better to split the text into at least two columns.
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
103 Running this tool with **How many columns to divide title string into?** to **2** will produce this (3 column output):
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
104
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
105 ============== =========================================================== =======================================
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
106 EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
107 EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
108 ============== =========================================================== =======================================
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
109
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
110 Running this tool with **How many columns to divide title string into?** to **5** will produce this (5 column output):
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
111
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
112 ============== ========== ============ ======== ========================== =======================================
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
113 EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
114 EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
115 ============== ========== ============ ======== ========================== =======================================
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
116
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
117 Running this tool with **How many columns to divide title string into?** to **5** and **How many characters to keep?** to **10** will produce this (5 column output).
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
118 Notice that only the first column is truncated to 10 characters -- and be careful not to trim your sequence names too much (generally they should be unique):
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
119
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
120 ========== ========== ============ ======== ========================== =======================================
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
121 EYKX4VC02E length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
122 EYKX4VC02D length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
123 ========== ========== ============ ======== ========================== =======================================
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
124
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
125 Note the sequences have been truncated for display purposes in the above tables.
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
126
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
127 </help>
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
128 </tool>