annotate SMART/DiffExpAnal/fastq_groomer_parallel.xml @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 <tool id="fastq_groomer_parallel" name="FASTQ Groomer (for DEA)" version="1.0.0">
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2 <description>convert between various FASTQ quality formats for a list of inputs.</description>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 <command interpreter="python">fastq_groomer_parallel.py '$input_file' '$input_type' '$output_file'
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 #if str( $options_type['options_type_selector'] ) == 'basic':
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 #if str( $input_type ) == 'cssanger':
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 'cssanger'
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 #else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 'sanger'
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 #end if
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 'ascii' 'summarize_input'
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 #else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 '${options_type.output_type}' '${options_type.force_quality_encoding}' '${options_type.summarize_input}'
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 #end if
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 #if $OptionPairedEnd.pairedEnd == "Yes":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 '$OptionPairedEnd.pairedEnd_input' '$output_pairedEndFile'
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 #else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 'None' 'None'
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 #end if
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 </command>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 <inputs>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 <param name="input_file" type="data" format="txt" label="The File list to groom" />
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 <param name="input_type" type="select" label="Input FASTQ quality scores type">
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 <option value="solexa">Solexa</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 <option value="illumina">Illumina 1.3-1.7</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 <option value="sanger" selected="True">Sanger</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 <option value="cssanger">Color Space Sanger</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 </param>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 <conditional name="options_type">
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 <param name="options_type_selector" type="select" label="Advanced Options">
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 <option value="basic" selected="True">Hide Advanced Options</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 <option value="advanced">Show Advanced Options</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 </param>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 <when value="basic">
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 <!-- no options -->
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 </when>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 <when value="advanced">
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 <param name="output_type" type="select" label="Output FASTQ quality scores type" help="Galaxy tools are designed to work with the Sanger Quality score format.">
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 <option value="solexa">Solexa</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 <option value="illumina">Illumina 1.3+</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 <option value="sanger" selected="True">Sanger (recommended)</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 <option value="cssanger">Color Space Sanger</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 </param>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 <param name="force_quality_encoding" type="select" label="Force Quality Score encoding">
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 <option value="None">Use Source Encoding</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 <option value="ascii" selected="True">ASCII</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 <option value="decimal">Decimal</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 </param>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 <param name="summarize_input" type="select" label="Summarize input data">
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 <option value="summarize_input" selected="True">Summarize Input</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 <option value="dont_summarize_input">Do not Summarize Input (faster)</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 </param>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 </when>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 </conditional>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 <conditional name="OptionPairedEnd">
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 <param name="pairedEnd" type="select" label="For paired-end analysis.">
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 <option value="Yes">Yes</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 <option value="No" selected="true">No</option>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 </param>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 <when value="Yes">
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 <param name="pairedEnd_input" type="data" format="txt" label="input paired-end files list"/>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 </when>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 <when value="No">
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 </when>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 </conditional>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 </inputs>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 <outputs>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 <data name="output_file" format="txt">
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 </data>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 <data format="txt" name="output_pairedEndFile" label="output Paired-end fastq files">
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 <filter>(OptionPairedEnd['pairedEnd']=='Yes')</filter>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 </data>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 </outputs>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 <help>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 **What it does**
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 This tool offers several conversions options relating to the FASTQ format.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 When using *Basic* options, the output will be *sanger* formatted or *cssanger* formatted (when the input is Color Space Sanger).
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 When converting, if a quality score falls outside of the target score range, it will be coerced to the closest available value (i.e. the minimum or maximum).
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 When converting between Solexa and the other formats, quality scores are mapped between Solexa and PHRED scales using the equations found in `Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.`_
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 When converting between color space (csSanger) and base/sequence space (Sanger, Illumina, Solexa) formats, adapter bases are lost or gained; if gained, the base 'G' is used as the adapter. You cannot convert a color space read to base space if there is no adapter present in the color space sequence. Any masked or ambiguous nucleotides in base space will be converted to 'N's when determining color space encoding.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 -----
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 **Quality Score Comparison**
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 ::
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 !"#$%&amp;'()*+,-./0123456789:;&lt;=&gt;?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 | | | | | |
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 33 59 64 73 104 126
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 S - Sanger Phred+33, 93 values (0, 93) (0 to 60 expected in raw reads)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 I - Illumina 1.3 Phred+64, 62 values (0, 62) (0 to 40 expected in raw reads)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 X - Solexa Solexa+64, 67 values (-5, 62) (-5 to 40 expected in raw reads)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 Diagram adapted from http://en.wikipedia.org/wiki/FASTQ_format
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108 .. class:: infomark
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 Output from Illumina 1.8+ pipelines are Sanger encoded.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112 ------
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 **Citation**
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 .. _Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.: http://www.ncbi.nlm.nih.gov/pubmed/20015970
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 </help>
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 </tool>