comparison seqPrep.xml @ 0:c0ecd158e2a7 draft default tip

Initial public release of the seqprep wrapper, v0.1
author lionelguy
date Wed, 23 Oct 2013 09:44:03 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c0ecd158e2a7
1 <tool id="seqprep" name="SeqPrep" version="0.1">
2 <description>merge paired end Illumina reads</description>
3 <requirements>
4 <requirement type="package" version="11301c4a5f3222bdb4d0ec30ae5e192e609c0154">SeqPrep</requirement>
5 </requirements>
6 <command>
7 SeqPrep
8 ## Required arguments
9 -f $f
10 -r $r
11 -1 $one
12 -2 $two
13 ## General arguments
14 #if $save_discarded
15 -3 $three
16 -4 $four
17 #end if
18 #if $f.extension == "fastqsolexa"
19 -6
20 #end if
21 #if $q
22 -q $q
23 #end if
24 #if $L
25 -L $L
26 #end if
27 ## Arguments for Adapter/Primer Trimming (Optional)
28 #if $A
29 -A $A
30 #end if
31 #if $B
32 -B $B
33 #end if
34 #if $extended_options.extended_select
35 #if $extended_options.O
36 -O $extended_options.O
37 #end if
38 #if $extended_options.M
39 -M $extended_options.M
40 #end if
41 #if $extended_options.N
42 -N $extended_options.N
43 #end if
44 #if $extended_options.b
45 -b $extended_options.b
46 #end if
47 #if $extended_options.Q
48 -Q $extended_options.Q
49 #end if
50 #if $extended_options.t
51 -t $extended_options.t
52 #end if
53 #if $extended_options.e
54 -e $extended_options.e
55 #end if
56 #if $extended_options.Z
57 -Z $extended_options.Z
58 #end if
59 #if $extended_options.w
60 -w $extended_options.w
61 #end if
62 #if $extended_options.W
63 -W $extended_options.W
64 #end if
65 #if $extended_options.p
66 -p $extended_options.p
67 #end if
68 #if $extended_options.P
69 -P $extended_options.P
70 #end if
71 #if $extended_options.X
72 -X $extended_options.X
73 #end if
74 #end if
75 ## Optional Arguments for Merging:
76 #if $merge.merge_select
77 #if $merge.y
78 -y $merge.y
79 #end if
80 #if $merge.o
81 -o $merge.o
82 #end if
83 #if $merge.m
84 -m $merge.m
85 #end if
86 #if $merge.n
87 -n $merge.n
88 #end if
89 -s $s
90 #if $merge.lengths_select
91 &amp;&amp; zcat -f $s | seqlens.py &gt; $s_lengths
92 #end if
93 #end if
94 </command>
95 <inputs>
96 <param name="f" type="data" format="fastq,fastqillumina,fastqsanger,fastqsolexa" label="Forward reads" />
97 <param name="r" type="data" format="fastq,fastqillumina,fastqsanger,fastqsolexa" label="Reverse reads" />
98 <param name="save_discarded" type="boolean" checked="false" truevalue="True" falsevalue="False" label="Save discarded reads?"/>
99 <param name="A" type="text" value="" optional="true" label="Forward read primer/adapter sequence to trim as it would appear at the end of a read" help="
100 default (genomic non-multiplexed adapter1) = AGATCGGAAGAGCGGTTCAG. See help for more details" />
101 <param name="B" type="text" value="" optional="true" label="Reverse read primer/adapter sequence to trim as it would appear at the end of a read" help="
102 default (genomic non-multiplexed adapter2) = AGATCGGAAGAGCGTCGTGT. See help for more details" />
103 <param name="q" type="integer" min="1" value="" optional="true" label="Quality score cutoff for mismatches to be counted in overlap" help="Default = 13" />
104 <param name="L" type="integer" min="1" value="" optional="true" label="Minimum length of a trimmed or merged read to print it" help="Default = 30" />
105 <conditional name="extended_options">
106 <param name="extended_select" type="boolean" checked="false" truevalue="True" falsevalue="" label="Show extended options?" />
107 <when value="True">
108 <param name="O" type="integer" min="1" value="" optional="true" label="Minimum overall base pair overlap with adapter sequence to trim" help="Default = 10" />
109 <param name="M" type="float" min="0" max="1" value="" optional="true" label="maximum fraction of good quality mismatching bases for primer/adapter overlap" help="Default = 0.02" />
110 <param name="N" type="float" min="0" max="1" value="" optional="true" label="Minimum fraction of matching bases for primer/adapter overlap" help="Default = 0.87" />
111 <param name="b" type="integer" min="1" value="" optional="true" label="Adapter alignment band-width" help="Default = 50" />
112 <param name="Q" type="integer" min="0" value="" optional="true" label="Adapter alignment gap-open" help="Default = 8" />
113 <param name="t" type="integer" min="0" value="" optional="true" label="Adapter alignment gap-extension" help="Default = 2" />
114 <param name="e" type="integer" min="0" value="" optional="true" label="Adapter alignment gap-end" help="Default = 2" />
115 <param name="Z" type="integer" min="0" value="" optional="true" label="Adapter alignment minimum local alignment score cutoff" help="Roughly (2*num_hits) - (num_gaps*gap_open) - (num_gaps*gap_close) - (gap_len*gap_extend) - (2*num_mismatches)]. Default = 26" />
116 <param name="w" type="integer" min="1" value="" optional="true" label="Read alignment band-width" help="Default = 50" />
117 <param name="W" type="integer" min="0" value="" optional="true" label="Read alignment gap-open" help="Default = 26" />
118 <param name="p" type="integer" min="0" value="" optional="true" label="Read alignment gap-extension" help="Default = 9" />
119 <param name="P" type="integer" min="0" value="" optional="true" label="Read alignment gap-end" help="Default = 5" />
120 <param name="X" type="float" min="0" max="1" value="" optional="true" label="Read alignment maximum fraction gap cutoff" help="Default = 0.125" />
121 </when>
122 </conditional>
123 <conditional name="merge">
124 <param name="merge_select" type="boolean" checked="true" truevalue="True" falsevalue="" label="Merge overlapping reads and output merged reads?" />
125 <when value="True">
126 <param name="y" type="text" value="" optional="true" label="Maximum quality score in merged output" help="In phred 33. Default (']') is 60" />
127 <param name="o" type="integer" min="1" value="" optional="true" label="Minimum overall base pair overlap to merge two reads" help="Default = 15" />
128 <param name="m" type="float" value="" min="0" max="1" optional="true" label="Maximum fraction of good quality mismatching bases to overlap reads" help="Default = 0.02" />
129 <param name="n" type="text" value="" min="0" max="1" optional="true" label="Minimum fraction of matching bases to overlap reads" help="Default = 0.9" />
130 <param name="lengths_select" type="boolean" checked="true" truevalue="True" falsevalue="" label="Output stats of sequence lengths?" />
131 </when>
132 </conditional>
133 </inputs>
134 <outputs>
135 <data name="one" format="fastqsanger" label="${tool.name} on ${on_string}: forward reads" />
136 <data name="two" format="fastqsanger" label="${tool.name} on ${on_string}: reverse reads" />
137 <data name="s" format="fastqsanger" label="${tool.name} on ${on_string}: merged reads">
138 <filter>merge["merge_select"] is True</filter>
139 </data>
140 <data name="s_lengths" format="tabular" label="${tool.name} on ${on_string}: histogram of lengths">
141 <filter>merge["lengths_select"] is True</filter>
142 </data>
143 <data name="three" format="fastqsanger" label="${tool.name} on ${on_string}: discarded forward reads">
144 <filter>save_discarded is True</filter>
145 </data>
146 <data name="four" format="fastqsanger" label="${tool.name} on ${on_string}: discarded reverse reads">
147 <filter>save_discarded is True</filter>
148 </data>
149 </outputs>
150 <stdio>
151 <exit_code range="1:" level="fatal" description="Error" />
152 </stdio>
153 <help>
154 **What it does**
155
156 SeqPrep is a program to merge paired end Illumina reads that are overlapping into a single longer read. It may also just be used for its adapter trimming feature without doing any paired end overlap. When an adapter sequence is present, that means that the two reads must overlap (in most cases) so they are forcefully merged. When reads do not have adapter sequence they must be treated with care when doing the merging, so a much more specific approach is taken. The default parameters were chosen with specificity in mind, so that they could be ran on libraries where very few reads are expected to overlap. It is always safest though to save the overlapping procedure for libraries where you have some prior knowledge that a significant portion of the reads will have some overlap.
157
158 Before running SeqPrep make sure to check that the program's defaults are indeed the adapters you are looking for. Try copying the default forward adapter from this file and grep it against your reads doing a word count, also try the same with the reverse adapter with grep. You should see some hits. You can also try using (and validating with grep) -A GATCGGAAGAGCACACG -B AGATCGGAAGAGCGTCGT as parameters. To find a list of Illumina adapter sequences you should write to Illumina tech support TechSupport@illumina.com (they do not like people to share the list of sequences outside of their institution).
159
160 You can also try to search online for Illumina read primers, of course.
161
162 See more details on SeqPrep github repository: https://github.com/jstjohn/SeqPrep
163
164 **License**
165
166 SeqPrep is copyrighted to John St. John.
167
168 This wrapper is copyrighted by Lionel Guy, and is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
169
170 This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
171
172 You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.
173 </help>
174 </tool>