comparison minfi_pipeline.xml @ 0:84361ce36a11 draft

planemo upload commit fb90aafc93e5e63acfcdac4c27cfd865cdf06c5a-dirty
author nturaga
date Tue, 19 Apr 2016 11:10:25 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:84361ce36a11
1 <?xml version="1.0" encoding="UTF-8"?>
2 <tool id="minfi_pipeline" name="Minfi pipeline" version="1.0">
3 <description>to Analayze Illumina 450k data</description>
4 <macros>
5 <import>minfi_macros.xml</import>
6 </macros>
7 <expand macro="requirements" />
8 <command detect_errors="exit_code"><![CDATA[mkdir minfi_temp
9 &&
10 cp "${minfi_get_files}" ./minfi_temp/minfi_config.txt
11 &&
12 echo \$GALAXY_SLOTS
13 &&
14 Rscript ${__tool_directory__}/minfi_pipeline.R
15 --quiet="TRUE"
16 --preprocess="${preprocess.preprocess_method}"
17 --cores="\${GALAXY_SLOTS:-4}"
18 #if str( $minfi_param_type.minfi_param_type_selector ) == "advanced":
19 --numPositions=${minfi_param_type.numPositions}
20 --shrinkVar=${minfi_param_type.shrinkVar}
21 --b_permutations=${minfi_param_type.b_permutations}
22 --smooth=${minfi_param_type.smooth}
23 --cutoff=${minfi_param_type.cutoff}
24 --l_value=${minfi_param_type.l_value}
25 #else:
26 --numPositions=1000
27 --shrinkVar=TRUE
28 --b_permutations=25
29 --smooth=FALSE
30 --cutoff=0.3
31 --l_value=4
32 #end if]]></command>
33 <configfiles>
34 <configfile name="minfi_get_files"><![CDATA[### Parse the HDA's to get the path of each forward and reverse dataset
35 #for $key in $control.keys()
36 control $control[$key].forward $control[$key].reverse $control[$key].name
37 #end for
38 #for $key in $case.keys()
39 case $case[$key].forward $case[$key].reverse $case[$key].name
40 #end for]]></configfile>
41 </configfiles>
42 <inputs>
43 <!--<param name="experiment" size="30" type="text" value="Experiment" label="Label your experiment/analysis"/>-->
44 <param name="control" type="data_collection" format="idat" label="Condition 1/ Treatment" collection_type="list:paired" help="Input data needs to be a list of dataset pairs, where the files are in IDAT format" />
45 <param name="case" type="data_collection" format="idat" label="Condition 2/ Wildtype" collection_type="list:paired" help="Input data needs to be a list of dataset pairs, where the files are in IDAT format" />
46 <conditional name="preprocess">
47 <param name="preprocess_method" type="select" label="Select Preprocessing Method">
48 <option value="quantile">Quantile Normalization (Recommended)</option>
49 <option value="funnorm">Functional Normalization (Recommended)</option>
50 <option value="illumina">Illumina:Genome Studio Normalization</option>
51 <option value="swan">Subset-quantile Within Array Normalisation</option>
52 <option value="noob">Noob background correction method or Noob Normalization</option>
53 </param>
54 <when value="quantile" />
55 <when value="funnorm" />
56 <when value="illumina" />
57 <when value="swan" />
58 <when value="noob" />
59 </conditional>
60 <conditional name="minfi_param_type">
61 <param name="minfi_param_type_selector" type="select" label="Basic or Advanced Minfi Parameters">
62 <option value="basic" selected="True">Basic Default settings</option>
63 <option value="advanced">Advanced</option>
64 </param>
65 <when value="basic">
66 <!--Do nothing here -->
67 </when>
68 <when value="advanced">
69 <!-- Give options for choosing "numPositions in MDS plot here -->
70 <param name="numPositions" type="integer" value="1000" label="numPositions" help="Refer the tool's help section" />
71 <!-- Give options for estimating cell counts here -->
72 <!-- Give options for Shrink Var in DMP finder here -->
73 <param name="shrinkVar" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="(ShrinkVar) Should variance shrinkage be used?" help="Refer the tool's help section" />
74 <!-- Give Bumphunter options here like B,smooth,cutoff, length of dmrs-->
75 <param name="b_permutations" type="integer" value="25" label="Number of times resampled" help="Refer the tool's help section" />
76 <param name="smooth" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Should a smoothing function be used?" help="Refer the tool's help section" />
77 <param name="cutoff" type="float" value="0.3" label="Cut off for selecting candidate regions" help="Refer the tool's help section" />
78 <param name="l_value" type="integer" value="4" label="Length of DMRs to be selected" help="Refer the tool's help section" />
79 </when>
80 </conditional>
81 </inputs>
82 <outputs>
83 <!-- PLOT OUTPUTS -->
84 <data name="qc_report.pdf" from_work_dir="qc_report.pdf" format="pdf" label="QC report" />
85 <data name="mds_plot.pdf" from_work_dir="mds_plot.pdf" format="pdf" label="MDS plot" />
86 <!-- CSV outputs -->
87 <data name="dmps.csv" from_work_dir="dmps.csv" format="csv" label="Differentially Methylated positions" />
88 <data name="dmrs.csv" from_work_dir="dmrs.csv" format="csv" label="Differentially Methylated Regions using Bumphunter" />
89 </outputs>
90 <tests>
91 <test>
92 <param name="case">
93 <collection type="list:paired">
94 <element name="5723646052_R02C02">
95 <collection type="paired">
96 <element name="forward" value="5723646052/5723646052_R02C02_Grn.idat" />
97 <element name="reverse" value="5723646052/5723646052_R02C02_Red.idat" />
98 </collection>
99 </element>
100 <element name="5723646052_R04C01">
101 <collection type="paired">
102 <element name="forward" value="5723646052/5723646052_R04C01_Grn.idat" />
103 <element name="reverse" value="5723646052/5723646052_R04C01_Red.idat" />
104 </collection>
105 </element>
106 <element name="5723646052_R05C02">
107 <collection type="paired">
108 <element name="forward" value="5723646052/5723646052_R05C02_Grn.idat" />
109 <element name="reverse" value="5723646052/5723646052_R05C02_Red.idat" />
110 </collection>
111 </element>
112 </collection>
113 </param>
114 <param name="control">
115 <collection type="list:paired">
116 <element name="5723646053_R04C02">
117 <collection type="paired">
118 <element name="forward" value="5723646053/5723646053_R04C02_Grn.idat" />
119 <element name="reverse" value="5723646053/5723646053_R04C02_Red.idat" />
120 </collection>
121 </element>
122 <element name="5723646053_R05C02">
123 <collection type="paired">
124 <element name="forward" value="5723646053/5723646053_R05C02_Grn.idat" />
125 <element name="reverse" value="5723646053/5723646053_R05C02_Red.idat" />
126 </collection>
127 </element>
128 <element name="5723646053_R06C02">
129 <collection type="paired">
130 <element name="forward" value="5723646053/5723646053_R06C02_Grn.idat" />
131 <element name="reverse" value="5723646053/5723646053_R06C02_Red.idat" />
132 </collection>
133 </element>
134 </collection>
135 </param>
136 <param name="preprocess.preprocess_method" value="quantile" />
137 <param name="minfi_param_type.minfi_param_type_selector" value="basic" />
138 <output name="qc_report" file="qc_report.pdf" ftype="pdf" />
139 <output name="mds_plot" file="mds_plot.pdf" ftype="pdf" />
140 <output name="dmps" file="dmps.csv" ftype="csv" />
141 <output name="dmrs" file="dmrs.csv" ftype="csv" />
142 </test>
143 </tests>
144 <help><![CDATA[.. class:: infomark
145
146 **What it does**
147
148 The minfi package provides tools for analyzing Illumina’s Methylation arrays, with a special
149 focus on the new 450k array for humans. The functionality addressed in this wrapper include preprocessing, QC assessments, identification of interesting methylation loci and plotting functionality.
150
151
152 **INPUTS**:
153
154 *Case* : Dataset collection with all samples which are of one phenotype (Example: Cancer, Disease state, Phenotype 1)
155
156 *Control* : Dataset collection with all samples which are of base normal phenotype (Example: Normals, Non-Disease state, Phenotype 2)
157
158 *Select Preprocessing Method*:
159
160 Choose one of the many preprocessing methods available. For more information on the different preprocessing methods refer to the minfi manual_, https://www.bioconductor.org/packages/release/bioc/manuals/minfi/man/minfi.pdf
161
162 *NOTE*
163 Many people ask us which normalization they should apply to their dataset. A good rule recommended by the authors of the package is, If there exist global biological methylation differences between your samples, as for instance a dataset with cancer and normal samples, or a dataset with different tissues/cell types, use the preprocessFunnorm function as it is aimed for such datasets. On the other hand, if you do not expect global differences between your samples, for instance a blood dataset, or one-tissue dataset, use the preprocessQuantile function. In our experience, these two normalization procedures perform always better than the functions preprocessNoob, preprocessIllumina and preprocessSWAN discussed below. For convenience, these functions are still implemented in the minfi package. This section is taken from the excellent guide_ provided by Jean-Philippe Fortin and Kasper Daniel Hansen.
164
165
166 **OUTPUTS**:
167
168 Plots:
169
170 Output 1: PDF file of the QC Report.
171 Output 2: PDF file of the MDS plot.
172
173 CSV files:
174
175 Output 1: CSV file containing Differentially Methylated Positions.
176 Output 2: CSV file containing Differentially Methylated Regions calculated using Bumphunter.
177 Output 3: CSV file containing Large scale Differentially methylated regions.
178
179
180 **HOW TO USE**
181
182 IDAT files (Both Red and Green channel). Make paired dataset collections, with RED and GREEN channel IDAT files.
183
184 Step 1: Upload IDAT(Both Red and green channel) files using the upload tool in Galaxy.
185
186 Step 2: Once the upload is completed, select the "Operations on Multiple Datasets" in the history panel.
187
188 Step 3: Select the list of IDAT files to be analyzed, and click "For all selected".
189
190 Step 4:
191
192 Choose the "Build List of Dataset pairs". Make the pairs and label the dataset collections. Once you enter the "Create a collection of paried datasets" dialogue box, click on "Clear filters" and then choose the "Forward" == Green channel, and "Reverse" == Red channel files. You should see the pairs in green color in the bottom panel.
193
194 Rename your common prefix for the file, by removing the trailing underscore "_", and name your collection. You should have one dataset collection for "Case" and another with "Control" (Normal vs Cancer or Treatment vs Wildtype)
195
196 Step 5: Once the two dataset collections are prepared, run the tool to run a minfi pipeline.
197
198
199 **ADVANCED PARAMETERS:**
200
201 Variance shrinkage (‘shrinkVar=TRUE’) is recommended when sample sizes are small (<10).
202 The sample variances are squeezed by computing empirical Bayes posterior means using
203 the ‘limma’ package.
204
205
206 B: An integer denoting the number of resamples to use when computing null distributions.
207 This defaults to 0. If ‘permutations’ is supplied that defines the number of
208 permutations/bootstraps and ‘B’ is ignored.
209
210
211 smooth: A logical value. If TRUE the estimated profile will be smoothed with the smoother
212 defined by ‘smoothFunction’
213
214
215 cutoff: A numeric value. Values of the estimate of the genomic profile above the cutoff
216 or below the negative of the cutoff will be used as candidate regions. It is possible
217 to give two separate values (upper and lower bounds). If one value is given, the lower
218 bound is minus the value.
219
220 .. _manual: https://www.bioconductor.org/packages/release/bioc/manuals/minfi/man/minfi.pdf
221 .. _guide: https://www.bioconductor.org/help/course-materials/2015/BioC2015/methylation450k.html]]></help>
222 <expand macro="citations" />
223 </tool>