comparison bsmap.xml @ 0:b35020882aad draft

Uploaded
author eiriche
date Thu, 29 Nov 2012 10:09:10 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b35020882aad
1 <tool id="bsmap" name="BSMAP Mapper">
2 <requirements>
3 <requirement type='package'>
4 bsmap
5 </requirement>
6 </requirements>
7 <command interpreter="bash">
8 bsmap_wrapper.sh
9 ##Reference genome
10 ref="${reference.fields.path}"
11 ##Output files (SAM output, BSMAP summary)
12 mapped=$mapped
13 ##Temp directory
14 tempdir=$mapped.files_path
15 summary=$summary
16 #if str($singlePaired.sPaired) == "single":
17 library="single"
18 mate1=$singlePaired.sInput1
19 #if str($singlePaired.sParams.sSettingsType) == "full":
20 fullparam=true
21 qual=$singlePaired.sParams.qual
22 threshold=$singlePaired.sParams.threshold
23 lowqual=$singlePaired.sParams.lowqual
24 adapter=$singlePaired.sParams.adapter
25 firstn=$singlePaired.sParams.firstn
26 repeat_reads=$singlePaired.sParams.repeat_reads
27 seed_size=$singlePaired.sParams.seed_size
28 mismatch=$singlePaired.sParams.mismatch
29 equal_best=$singlePaired.sParams.equal_best
30 start=$singlePaired.sParams.start
31 end=$singlePaired.sParams.end
32 index_interval=$singlePaired.sParams.index_interval
33 seed_random=$singlePaired.sParams.seed_random
34 rrbs=$singlePaired.sParams.rrbs
35 mode=$singlePaired.sParams.mode
36 align_info=$singlePaired.sParams.align_info
37 #end if
38 #else:
39 library="paired"
40 mate1=$singlePaired.pInput1
41 mate2=$singlePaired.pInput2
42 unpaired=$unpaired
43 #if str($singlePaired.pParams.pSettingsType) == "full":
44 fullparam=true
45 qual=$singlePaired.pParams.qual
46 threshold=$singlePaired.pParams.threshold
47 lowqual=$singlePaired.pParams.lowqual
48 adapter=$singlePaired.pParams.adapter
49 firstn=$singlePaired.pParams.firstn
50 repeat_reads=$singlePaired.pParams.repeat_reads
51 seed_size=$singlePaired.pParams.seed_size
52 mismatch=$singlePaired.pParams.mismatch
53 equal_best=$singlePaired.pParams.equal_best
54 start=$singlePaired.pParams.start
55 end=$singlePaired.pParams.end
56 index_interval=$singlePaired.pParams.index_interval
57 seed_random=$singlePaired.pParams.seed_random
58 rrbs=$singlePaired.pParams.rrbs
59 mode=$singlePaired.pParams.mode
60 align_info=$singlePaired.pParams.align_info
61 maxinsert=$singlePaired.pParams.maxinsert
62 mininsert=$singlePaired.pParams.mininsert
63 #end if
64 #end if
65 </command>
66 <inputs>
67 <param name="reference" type="select" label="Select a reference genome">
68 <options from_data_table="all_fasta">
69 <filter type="sort_by" column="2" />
70 <validator type="no_options" message="No reference genomes are available" />
71 </options>
72 </param>
73
74 <conditional name="singlePaired">
75 <param name="sPaired" type="select" label="Is this library mate-paired?">
76 <option value="single">Single-end</option>
77 <option value="paired">Paired-end</option>
78 </param>
79 <when value="single">
80 <param name="sInput1" type="data" format="fastq,fasta" label="FASTQ file" help="Must have ASCII encoded quality scores"/>
81 <conditional name="sParams">
82 <param name="sSettingsType" type="select" label="BSMAP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list">
83 <option value="preSet">Commonly used</option>
84 <option value="full">Full parameter list</option>
85 </param>
86 <when value="preSet" />
87 <when value="full">
88 <param name="qual" type="select" label="Select the type of FastQ qualities">
89 <option value="33">phred33-quals</option>
90 <option value="64">phred64-quals</option>
91 </param>
92 <param name="threshold" type="integer" value="0" label="Quality threshold in trimming" help="0-40, default=0 (no trim)" min="0" max="40" />
93 <param name="lowqual" type="integer" value="5" label="Filter low-quality reads containing >n Ns" help="default=5" />
94 <param name="adapter" type="text" value="none" label="3-end adapter sequence" help="default: none (no trim)" />
95 <param name="firstn" type="integer" value="144" label="Map the first N nucleotides of the read" help="default:144 (map the whole read)" />
96
97 <param name="repeat_reads" type="select" label="How to report repeat hits">
98 <option value="0">none(unique hit only)</option>
99 <option value="1">random one</option>
100 </param>
101
102 <param name="seed_size" type="integer" value="16" label="Seed size" min="8" max="16" help="default=16(WGBS mode), 12(RRBS mode)" />
103 <param name="mismatch" type="integer" value="2" label="Maximum number of mismatches allowed on a read" max="15" />
104 <param name="equal_best" type="integer" value="20" label="Maximum number of equal best hits to count" max="1000" />
105 <param name="start" type="integer" value="1" label="Start from the Nth read or read pair" />
106 <param name="end" type="integer" value="4294967295" label="End at the Nth read or read pair" />
107 <param name="index_interval" type="integer" value="4" label="Index interval" />
108 <param name="seed_random" type="integer" value="-1" label="Seed for random number generation used in selecting multiple hits" help="other seed values generate pseudo random number based on read index number, to allow reproducible mapping results" />
109 <param name="rrbs" type="text" value="none" label="Activating RRBS mapping mode and set restriction enzyme digestion sites" help="digestion position marked by '-', example: -D C-CGG for MspI digestion. default: none (whole genome shotgun bisulfite mapping mode)" />
110 <param name="mode" type="select" label="Set mapping strand information">
111 <option value="0">only map to 2 forward strands</option>
112 <option value="1">map SE or PE reads to all 4 strands</option>
113 </param>
114 <param name="align_info" type="text" value="none" label="Set alignment information for the additional nucleotide transition" help="is in the form of two different nucleotides N1N2,indicating N1 in the reads could be mapped to N2 in the reference sequences. default: -M TC, corresponds to C=>U(T) transition in bisulfite conversion. example: -M GA could be used to detect A=>I(G) transition in RNA editing." />
115 </when> <!-- full -->
116 </conditional> <!-- sParams -->
117 </when> <!-- single -->
118
119 <when value="paired">
120 <param name="pInput1" type="data" format="fastq,fasta" label="Forward FASTQ file" />
121 <param name="pInput2" type="data" format="fastq,fasta" label="Reverse FASTQ file" />
122
123 <conditional name="pParams">
124 <param name="pSettingsType" type="select" label="BSMAP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list">
125 <option value="preSet">Commonly used</option>
126 <option value="full">Full parameter list</option>
127 </param>
128 <when value="preSet" />
129 <when value="full">
130 <param name="qual" type="select" label="Select the type of FastQ qualities">
131 <option value="33">phred33-quals</option>
132 <option value="64">phred64-quals</option>
133 </param>
134
135 <param name="mininsert" type="integer" value="28" label="Minimal insert size allowed" />
136 <param name="maxinsert" type="integer" value="500" label="Maximal insert size allowed" />
137
138 <param name="threshold" type="integer" value="0" label="Quality threshold in trimming" help="0-40, default=0 (no trim)" min="0" max="40" />
139 <param name="lowqual" type="integer" value="5" label="Filter low-quality reads containing >n Ns" help="default=5" />
140 <param name="adapter" type="text" value="none" label="3-end adapter sequence" help="default: none (no trim)" />
141 <param name="firstn" type="integer" value="144" label="Map the first N nucleotides of the read" help="default:144 (map the whole read)" />
142
143 <param name="repeat_reads" type="select" label="How to report repeat hits">
144 <option value="0">none(unique hit only)</option>
145 <option value="1">random one</option>
146 </param>
147
148 <param name="seed_size" type="integer" value="16" label="Seed size" min="8" max="16" help="default=16(WGBS mode), 12(RRBS mode)" />
149 <param name="mismatch" type="integer" value="2" label="Maximum number of mismatches allowed on a read" max="15" />
150 <param name="equal_best" type="integer" value="20" label="Maximum number of equal best hits to count" max="1000" />
151 <param name="start" type="integer" value="1" label="Start from the Nth read or read pair" />
152 <param name="end" type="integer" value="4294967295" label="End at the Nth read or read pair" />
153 <param name="index_interval" type="integer" value="4" label="Index interval" />
154 <param name="seed_random" type="integer" value="-1" label="Seed for random number generation used in selecting multiple hits" help="other seed values generate pseudo random number based on read index number, to allow reproducible mapping results" />
155 <param name="rrbs" type="text" value="none" label="Activating RRBS mapping mode and set restriction enzyme digestion sites" help="digestion position marked by '-', example: -D C-CGG for MspI digestion. default: none (whole genome shotgun bisulfite mapping mode)" />
156 <param name="mode" type="select" label="Set mapping strand information">
157 <option value="0">only map to 2 forward strands</option>
158 <option value="1">map SE or PE reads to all 4 strands</option>
159 </param>
160 <param name="align_info" type="text" value="none" label="Set alignment information for the additional nucleotide transition" help="is in the form of two different nucleotides N1N2,indicating N1 in the reads could be mapped to N2 in the reference sequences. default: -M TC, corresponds to C=>U(T) transition in bisulfite conversion. example: -M GA could be used to detect A=>I(G) transition in RNA editing." />
161
162
163 </when> <!-- full -->
164 </conditional> <!-- pParams -->
165 </when> <!-- paired -->
166 </conditional> <!-- singlePaired -->
167
168
169 </inputs>
170 <outputs>
171 <data name="mapped" format="sam" label="BSMAP Mapped Reads">
172 <actions>
173 <action type="metadata" name="dbkey">
174 <option type="from_data_table" name="bsmap_fasta" column="1" offset="0">
175 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
176 <filter type="param_value" ref="reference" column="0"/>
177 </option>
178 </action>
179 </actions>
180 </data>
181 <data name="summary" format="txt" label="BSMAP Mapping Summary" />
182 <data name="unpaired" format ="sam" label="BSMAP Unpaired Hits">
183 <filter>(singlePaired['sPaired'] == 'paired')</filter>
184 </data>
185
186 </outputs>
187 <help>
188 **What it does**
189
190 BSMAP is a short reads mapping software for bisulfite sequencing reads. It has the following features:
191
192 - read length up to 144 nt, allow up to 15 mismatches, gap size up to 3 bp.
193
194 - support single end and pair end mapping. support multi-thread mapping.
195
196 - support both "Lister protocol" (sequence 2 forward strands only) and "Cokus protocol" (sequence all 4 bisulfite converted strands)
197
198 - reads are directly mapped to original reference genome sequence, no need to preprocess the reads and reference genome to convert C to T.
199
200 - support both whole genome bisulfite sequencing (WGBS) mode and reduced representation bisulfite sequencing (RRBS) mode, allow changing the digestion site information to support different digestion enzymes for RRBS.
201
202 - allow trimming adapter sequences and low quality nucleotides from the 3'end of reads
203
204 - allow trade off between speed/memory usage/mapping sensitivity. For human genome, the RRBS mode uses ~3GB. In WGBS mode, the typical memory usage is ~9GB, but can be as low as 5GB.
205
206 - allow alignment for other nucleotide transitions, for example, can be set to detect the A=>I(G) transition in RNA editing.
207
208 .. _BSMAP: http://code.google.com/p/bsmap/
209
210 **Input formats**
211
212 BSMAP accepts files in FASTA/FASTQ format.
213
214 **Outputs**
215
216 The output contains the following files:
217
218 - mapped reads in SAM format
219
220 - mapping summary
221
222 - unpaired hits (only for paired-end mapping)
223
224 </help>
225
226 <tests>
227 <test>
228 <param name="sPaired" value="single" />
229 <param name="indices" value="bismark" />
230 <param name="sInput1" ftype="fastq" value="bismark_test_single.fastq" />
231 <param name="sParams" value="preSet" />
232 <output name="mapped" ftype="SAM" file="bismark_result_single_1.SAM" />
233 <ouput name="summary" ftype="txt" file="bismark_result_single_2.txt" />
234 </test>
235 </tests>
236 </tool>
237