Mercurial > repos > eiriche > bsmap
comparison bsmap.xml @ 11:413c742682f7 draft
Uploaded
author | eiriche |
---|---|
date | Fri, 30 Nov 2012 09:14:33 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
10:4f9b7eaecbd4 | 11:413c742682f7 |
---|---|
1 <tool id="bsmap" name="BSMAP Mapper"> | |
2 <requirements> | |
3 <requirement type='package'> | |
4 bsmap | |
5 </requirement> | |
6 </requirements> | |
7 <command interpreter="bash"> | |
8 bsmap_wrapper.sh | |
9 ##Reference genome | |
10 ##ref="${reference.fields.path}" | |
11 #if $refGenomeSource.genomeSource == "history": | |
12 ref="${refGenomeSource.myFile}" | |
13 #else | |
14 ref="${refGenomeSource.builtinFile.fields.path}" | |
15 #end if | |
16 ##Output files (SAM output, BSMAP summary) | |
17 mapped=$mapped | |
18 ##Temp directory | |
19 tempdir=$mapped.files_path | |
20 summary=$summary | |
21 #if str($singlePaired.sPaired) == "single": | |
22 library="single" | |
23 mate1=$singlePaired.sInput1 | |
24 #if str($singlePaired.sParams.sSettingsType) == "full": | |
25 fullparam=true | |
26 qual=$singlePaired.sParams.qual | |
27 threshold=$singlePaired.sParams.threshold | |
28 lowqual=$singlePaired.sParams.lowqual | |
29 adapter=$singlePaired.sParams.adapter | |
30 firstn=$singlePaired.sParams.firstn | |
31 repeat_reads=$singlePaired.sParams.repeat_reads | |
32 seed_size=$singlePaired.sParams.seed_size | |
33 mismatch=$singlePaired.sParams.mismatch | |
34 equal_best=$singlePaired.sParams.equal_best | |
35 start=$singlePaired.sParams.start | |
36 end=$singlePaired.sParams.end | |
37 index_interval=$singlePaired.sParams.index_interval | |
38 seed_random=$singlePaired.sParams.seed_random | |
39 rrbs=$singlePaired.sParams.rrbs | |
40 mode=$singlePaired.sParams.mode | |
41 align_info=$singlePaired.sParams.align_info | |
42 #end if | |
43 #else: | |
44 library="paired" | |
45 mate1=$singlePaired.pInput1 | |
46 mate2=$singlePaired.pInput2 | |
47 unpaired=$unpaired | |
48 #if str($singlePaired.pParams.pSettingsType) == "full": | |
49 fullparam=true | |
50 qual=$singlePaired.pParams.qual | |
51 threshold=$singlePaired.pParams.threshold | |
52 lowqual=$singlePaired.pParams.lowqual | |
53 adapter=$singlePaired.pParams.adapter | |
54 firstn=$singlePaired.pParams.firstn | |
55 repeat_reads=$singlePaired.pParams.repeat_reads | |
56 seed_size=$singlePaired.pParams.seed_size | |
57 mismatch=$singlePaired.pParams.mismatch | |
58 equal_best=$singlePaired.pParams.equal_best | |
59 start=$singlePaired.pParams.start | |
60 end=$singlePaired.pParams.end | |
61 index_interval=$singlePaired.pParams.index_interval | |
62 seed_random=$singlePaired.pParams.seed_random | |
63 rrbs=$singlePaired.pParams.rrbs | |
64 mode=$singlePaired.pParams.mode | |
65 align_info=$singlePaired.pParams.align_info | |
66 maxinsert=$singlePaired.pParams.maxinsert | |
67 mininsert=$singlePaired.pParams.mininsert | |
68 #end if | |
69 #end if | |
70 </command> | |
71 <inputs> | |
72 | |
73 <conditional name="refGenomeSource"> | |
74 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in reference?"> | |
75 <option value="builtin">Use a built-in reference</option> | |
76 <option value="history">Use one from the history</option> | |
77 </param> | |
78 <when value="builtin"> | |
79 <param name="builtinFile" type="select" label="Select the reference genome"> | |
80 <options from_data_table="bsmap_fasta"> | |
81 <filter type="sort_by" column="2" /> | |
82 <validator type="no_options" message="No reference genomes are available" /> | |
83 </options> | |
84 </param> | |
85 </when> | |
86 <when value="history"> | |
87 <param name="myFile" type="data" format="fasta" label="Select the reference genome" /> | |
88 </when> | |
89 </conditional> | |
90 | |
91 <conditional name="singlePaired"> | |
92 <param name="sPaired" type="select" label="Is this library mate-paired?"> | |
93 <option value="single">Single-end</option> | |
94 <option value="paired">Paired-end</option> | |
95 </param> | |
96 <when value="single"> | |
97 <param name="sInput1" type="data" format="fastq,fasta" label="FASTQ file" help="Must have ASCII encoded quality scores"/> | |
98 <conditional name="sParams"> | |
99 <param name="sSettingsType" type="select" label="BSMAP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list"> | |
100 <option value="preSet">Commonly used</option> | |
101 <option value="full">Full parameter list</option> | |
102 </param> | |
103 <when value="preSet" /> | |
104 <when value="full"> | |
105 <param name="qual" type="select" label="Select the type of FastQ qualities"> | |
106 <option value="33">phred33-quals</option> | |
107 <option value="64">phred64-quals</option> | |
108 </param> | |
109 <param name="threshold" type="integer" value="0" label="Quality threshold in trimming" help="0-40, default=0 (no trim)" min="0" max="40" /> | |
110 <param name="lowqual" type="integer" value="5" label="Filter low-quality reads containing >n Ns" help="default=5" /> | |
111 <param name="adapter" type="text" value="none" label="3-end adapter sequence" help="default: none (no trim)" /> | |
112 <param name="firstn" type="integer" value="144" label="Map the first N nucleotides of the read" help="default:144 (map the whole read)" /> | |
113 | |
114 <param name="repeat_reads" type="select" label="How to report repeat hits"> | |
115 <option value="0">none(unique hit only)</option> | |
116 <option value="1">random one</option> | |
117 </param> | |
118 | |
119 <param name="seed_size" type="integer" value="16" label="Seed size" min="8" max="16" help="default=16(WGBS mode), 12(RRBS mode)" /> | |
120 <param name="mismatch" type="integer" value="2" label="Maximum number of mismatches allowed on a read" max="15" /> | |
121 <param name="equal_best" type="integer" value="20" label="Maximum number of equal best hits to count" max="1000" /> | |
122 <param name="start" type="integer" value="1" label="Start from the Nth read or read pair" /> | |
123 <param name="end" type="integer" value="4294967295" label="End at the Nth read or read pair" /> | |
124 <param name="index_interval" type="integer" value="4" label="Index interval" /> | |
125 <param name="seed_random" type="integer" value="-1" label="Seed for random number generation used in selecting multiple hits" help="other seed values generate pseudo random number based on read index number, to allow reproducible mapping results" /> | |
126 <param name="rrbs" type="text" value="none" label="Activating RRBS mapping mode and set restriction enzyme digestion sites" help="digestion position marked by '-', example: -D C-CGG for MspI digestion. default: none (whole genome shotgun bisulfite mapping mode)" /> | |
127 <param name="mode" type="select" label="Set mapping strand information"> | |
128 <option value="0">only map to 2 forward strands</option> | |
129 <option value="1">map SE or PE reads to all 4 strands</option> | |
130 </param> | |
131 <param name="align_info" type="text" value="none" label="Set alignment information for the additional nucleotide transition" help="is in the form of two different nucleotides N1N2,indicating N1 in the reads could be mapped to N2 in the reference sequences. default: -M TC, corresponds to C=>U(T) transition in bisulfite conversion. example: -M GA could be used to detect A=>I(G) transition in RNA editing." /> | |
132 </when> <!-- full --> | |
133 </conditional> <!-- sParams --> | |
134 </when> <!-- single --> | |
135 | |
136 <when value="paired"> | |
137 <param name="pInput1" type="data" format="fastq,fasta" label="Forward FASTQ file" /> | |
138 <param name="pInput2" type="data" format="fastq,fasta" label="Reverse FASTQ file" /> | |
139 | |
140 <conditional name="pParams"> | |
141 <param name="pSettingsType" type="select" label="BSMAP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list"> | |
142 <option value="preSet">Commonly used</option> | |
143 <option value="full">Full parameter list</option> | |
144 </param> | |
145 <when value="preSet" /> | |
146 <when value="full"> | |
147 <param name="qual" type="select" label="Select the type of FastQ qualities"> | |
148 <option value="33">phred33-quals</option> | |
149 <option value="64">phred64-quals</option> | |
150 </param> | |
151 | |
152 <param name="mininsert" type="integer" value="28" label="Minimal insert size allowed" /> | |
153 <param name="maxinsert" type="integer" value="500" label="Maximal insert size allowed" /> | |
154 | |
155 <param name="threshold" type="integer" value="0" label="Quality threshold in trimming" help="0-40, default=0 (no trim)" min="0" max="40" /> | |
156 <param name="lowqual" type="integer" value="5" label="Filter low-quality reads containing >n Ns" help="default=5" /> | |
157 <param name="adapter" type="text" value="none" label="3-end adapter sequence" help="default: none (no trim)" /> | |
158 <param name="firstn" type="integer" value="144" label="Map the first N nucleotides of the read" help="default:144 (map the whole read)" /> | |
159 | |
160 <param name="repeat_reads" type="select" label="How to report repeat hits"> | |
161 <option value="0">none(unique hit only)</option> | |
162 <option value="1">random one</option> | |
163 </param> | |
164 | |
165 <param name="seed_size" type="integer" value="16" label="Seed size" min="8" max="16" help="default=16(WGBS mode), 12(RRBS mode)" /> | |
166 <param name="mismatch" type="integer" value="2" label="Maximum number of mismatches allowed on a read" max="15" /> | |
167 <param name="equal_best" type="integer" value="20" label="Maximum number of equal best hits to count" max="1000" /> | |
168 <param name="start" type="integer" value="1" label="Start from the Nth read or read pair" /> | |
169 <param name="end" type="integer" value="4294967295" label="End at the Nth read or read pair" /> | |
170 <param name="index_interval" type="integer" value="4" label="Index interval" /> | |
171 <param name="seed_random" type="integer" value="-1" label="Seed for random number generation used in selecting multiple hits" help="other seed values generate pseudo random number based on read index number, to allow reproducible mapping results" /> | |
172 <param name="rrbs" type="text" value="none" label="Activating RRBS mapping mode and set restriction enzyme digestion sites" help="digestion position marked by '-', example: -D C-CGG for MspI digestion. default: none (whole genome shotgun bisulfite mapping mode)" /> | |
173 <param name="mode" type="select" label="Set mapping strand information"> | |
174 <option value="0">only map to 2 forward strands</option> | |
175 <option value="1">map SE or PE reads to all 4 strands</option> | |
176 </param> | |
177 <param name="align_info" type="text" value="none" label="Set alignment information for the additional nucleotide transition" help="is in the form of two different nucleotides N1N2,indicating N1 in the reads could be mapped to N2 in the reference sequences. default: -M TC, corresponds to C=>U(T) transition in bisulfite conversion. example: -M GA could be used to detect A=>I(G) transition in RNA editing." /> | |
178 | |
179 | |
180 </when> <!-- full --> | |
181 </conditional> <!-- pParams --> | |
182 </when> <!-- paired --> | |
183 </conditional> <!-- singlePaired --> | |
184 | |
185 | |
186 </inputs> | |
187 <outputs> | |
188 <data name="mapped" format="sam" label="BSMAP Mapped Reads" /> | |
189 <data name="summary" format="txt" label="BSMAP Mapping Summary" /> | |
190 <data name="unpaired" format ="sam" label="BSMAP Unpaired Hits"> | |
191 <filter>(singlePaired['sPaired'] == 'paired')</filter> | |
192 </data> | |
193 | |
194 </outputs> | |
195 <help> | |
196 **What it does** | |
197 | |
198 BSMAP is a short reads mapping software for bisulfite sequencing reads. It has the following features: | |
199 | |
200 - read length up to 144 nt, allow up to 15 mismatches, gap size up to 3 bp. | |
201 | |
202 - support single end and pair end mapping. support multi-thread mapping. | |
203 | |
204 - support both "Lister protocol" (sequence 2 forward strands only) and "Cokus protocol" (sequence all 4 bisulfite converted strands) | |
205 | |
206 - reads are directly mapped to original reference genome sequence, no need to preprocess the reads and reference genome to convert C to T. | |
207 | |
208 - support both whole genome bisulfite sequencing (WGBS) mode and reduced representation bisulfite sequencing (RRBS) mode, allow changing the digestion site information to support different digestion enzymes for RRBS. | |
209 | |
210 - allow trimming adapter sequences and low quality nucleotides from the 3'end of reads | |
211 | |
212 - allow trade off between speed/memory usage/mapping sensitivity. For human genome, the RRBS mode uses ~3GB. In WGBS mode, the typical memory usage is ~9GB, but can be as low as 5GB. | |
213 | |
214 - allow alignment for other nucleotide transitions, for example, can be set to detect the A=>I(G) transition in RNA editing. | |
215 | |
216 .. _BSMAP: http://code.google.com/p/bsmap/ | |
217 | |
218 **Input formats** | |
219 | |
220 BSMAP accepts files in FASTA/FASTQ format. | |
221 | |
222 **Outputs** | |
223 | |
224 The output contains the following files: | |
225 | |
226 - mapped reads in SAM format | |
227 | |
228 - mapping summary | |
229 | |
230 - unpaired hits (only for paired-end mapping) | |
231 | |
232 </help> | |
233 | |
234 <tests> | |
235 </tests> | |
236 </tool> | |
237 |