annotate methylation_analysis_bismark/methylation_analysis/bismark.xml @ 9:5b208d4d89e5 draft

Uploaded
author fcaramia
date Tue, 04 Dec 2012 20:15:26 -0500
parents d15b4a2e3bdc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
1 <tool id="bismark_tool" name="Bismark" version="0.7.6">
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
2 <description>: A bisulfite read mapper and methylation caller</description>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
3 <requirements>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
4 <requirement type="package" version="0.1.16">samtools</requirement>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
5 <requirement type="package" version="0.12.7">bowtie2</requirement>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
6 <requirement type="package" version="0.7.6">bismark</requirement>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
7 </requirements>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
8 <command interpreter="perl">
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
9
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
10 bismark_wrapper.pl
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
11
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
12
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
13 "GENOME::${genome.fields.path}"
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
14
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
15
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
16
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
17 #if str($option_input.input_option) == "mates":
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
18 "MATES::$option_input.file_mate1::$option_input.file_mate2"
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
19 #else
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
20 "SINGLES::$option_input.file_single"
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
21 #end if
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
22
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
23 #if str($format_option) == "fasta":
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
24 "FORMAT::--fasta"
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
25 #else
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
26 "FORMAT::--fastq"
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
27 #end if
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
28
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
29 #if str($non_directional) == "ON":
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
30 "DIRECTIONAL::--non_directional"
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
31 #end if
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
32
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
33 "OUTPUT::$output"
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
34 "SUMMARY::$summary"
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
35
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
36 </command>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
37 <inputs>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
38
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
39 <param name="genome" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
40 <options from_data_table="bismark_indexes">
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
41 <filter type="sort_by" column="2"/>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
42 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
43 </options>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
44 </param>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
45
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
46 <param name="format_option" type="select" label="sample format">
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
47 <option value="fastq" selected="true">fastq</option>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
48 <option value="fasta">fasta</option>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
49 </param>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
50
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
51
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
52 <conditional name="option_input">
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
53 <param name="input_option" type="select" label="Input files">
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
54 <option value="mates" selected="true">mates</option>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
55 <option value="singles">singles</option>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
56 </param>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
57 <when value="mates">
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
58 <param format="fasta, fastq" name="file_mate1" type="data" label="Mate 1" help=""/>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
59 <param format="fasta, fastq" name="file_mate2" type="data" label="Mate 2" help=""/>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
60 </when>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
61 <when value="singles">
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
62 <param format="fasta, fastq" name="file_single" type="data" label="Single" help=""/>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
63 </when>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
64 </conditional>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
65
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
66 <param name="non_directional" type="select" label="non-directional" help="" optional="true">
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
67 <option value="ON" selected="true">ON</option>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
68 <option value="OFF">OFF</option>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
69 </param>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
70
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
71 </inputs>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
72 <outputs>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
73 <data name="summary" format="txt" label="Bismark Sumary" />
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
74 <data format="bam" name="output" label="${tool.name} on ${on_string}">
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
75 <actions>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
76 <action type="metadata" name="dbkey">
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
77 <option type="from_data_table" name="bismark_indexes" column="1" offset="0">
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
78 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
79 <filter type="param_value" ref="genome" column="0"/>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
80 </option>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
81 </action>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
82 </actions>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
83 </data>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
84 </outputs>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
85 <help>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
86 |
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
87
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
88
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
89 **Reference**
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
90
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
91 http://www.bioinformatics.babraham.ac.uk/projects/bismark/
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
92
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
93 -----
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
94
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
95 **What it does**
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
96
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
97 Bismark takes in FastA or FastQ files and aligns the reads to a specified bisulfite genome.
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
98 Sequence reads are transformed into a bisulfite converted forward strand version (C->T conversion)
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
99 or into a bisulfite treated reverse strand (G->A conversion of the forward strand).
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
100 Each of these reads are then aligned to bisulfite treated forward strand index of a reference genome
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
101 (C->T converted) and a bisulfite treated reverse strand index of the genome (G->A conversion of the
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
102 forward strand, by doing this alignments will produce the same positions). These 4 instances of
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
103 Bowtie (1 or 2) are run in parallel. The sequence file(s) are then read in again sequence by sequence
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
104 to pull out the original sequence from the genome and determine if there were any protected C's present or not.
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
105
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
106 As of version 0.7.0 Bismark will only run 2 alignment threads for OT and OB in parallel, the 4 strand mode can be
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
107 re-enabled by using --non_directional.
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
108
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
109 The final output of Bismark is in SAM format by default. But for storage restrictions the output is compressed (BAM).
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
110
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
111
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
112 -----
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
113
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
114 **Required Parameters**
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
115
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
116 ::
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
117
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
118 -q/--fastq The query input files (specified as mate1,mate2 or singles are FASTQ
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
119 files (usually having extension .fg or .fastq). This is the default. See also
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
120 --solexa-quals.
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
121
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
122 -f/--fasta The query input files (specified as mate1,mate2 or singles are FASTA
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
123 files (usually havin extension .fa, .mfa, .fna or similar). All quality values
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
124 are assumed to be 40 on the Phred scale.
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
125
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
126 -1 mates1 List of files containing the #1 mates (filename usually includes
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
127 "_1"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
128 correspond file-for-file and read-for-read with those specified in mates2.
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
129 Reads may be a mix of different lengths. Bismark will produce one mapping result
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
130 and one report file per paired-end input file pair.
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
131
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
132 -2 mates2 List of files containing the #2 mates (filename usually includes
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
133 "_2"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
134 correspond file-for-file and read-for-read with those specified in mates1.
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
135 Reads may be a mix of different lengths.
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
136
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
137 singles List of files containing the reads to be aligned (e.g.
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
138 lane1.fq,lane2.fq lane3.fq). Reads may be a mix of different lengths. Bismark will
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
139 produce one mapping result and one report file per input file.
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
140
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
141 --non_directional The sequencing library was constructed in a non strand-specific manner, alignments to all four
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
142 bisulfite strands will be reported. Default: ON.
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
143
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
144 (The current Illumina protocol for BS-Seq is directional, in which case the strands complementary
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
145 to the original strands are merely theoretical and should not exist in reality. Specifying directional
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
146 alignments (which is the default) will only run 2 alignment threads to the original top (OT)
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
147 or bottom (OB) strands in parallel and report these alignments. This is the recommended option
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
148 for sprand-specific libraries).
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
149
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
150 -----
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
151
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
152 **Default Parameters**
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
153
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
154 ::
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
155
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
156 --bowtie2 Uses Bowtie 2 instead of Bowtie 1. Bismark limits Bowtie 2 to only perform end-to-end
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
157 alignments, i.e. searches for alignments involving all read characters (also called
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
158 untrimmed or unclipped alignments). Bismark assumes that raw sequence data is adapter
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
159 and/or quality trimmed where appropriate. Default: on.
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
160
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
161
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
162
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
163
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
164 -p NTHREADS Launch NTHREADS parallel search threads (default: 4). Threads will run on separate processors/cores
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
165 and synchronize when parsing reads and outputting alignments. Searching for alignments is highly
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
166 parallel, and speedup is close to linear. Increasing -p increases Bowtie 2's memory footprint.
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
167 E.g. when aligning to a human genome index, increasing -p from 1 to 8 increases the memory footprint
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
168 by a few hundred megabytes. This option is only available if bowtie is linked with the pthreads
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
169 library (i.e. if BOWTIE_PTHREADS=0 is not specified at build time). In addition, this option will
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
170 automatically use the option '--reorder', which guarantees that output SAM records are printed in
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
171 an order corresponding to the order of the reads in the original input file, even when -p is set
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
172 greater than 1 (Bismark requires the Bowtie 2 output to be this way). Specifying --reorder and
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
173 setting -p greater than 1 causes Bowtie 2 to run somewhat slower and use somewhat more memory then
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
174 if --reorder were not specified. Has no effect if -p is set to 1, since output order will naturally
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
175 correspond to input order in that case.
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
176
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
177
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
178
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
179 </help>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
180 </tool>
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
181
d15b4a2e3bdc Uploaded
fcaramia
parents:
diff changeset
182