comparison minfi_analysis.xml @ 78:e67f424d3f42 draft

Uploaded Analysis Pipeline
author kpbioteam
date Sun, 26 Jan 2020 16:55:54 -0500
parents
children 8ab24a5229bd
comparison
equal deleted inserted replaced
77:c7e8fdb9f1db 78:e67f424d3f42
1 <tool id="minfi_analysis" name="Infinium Human Methylation BeadChip" version="2.1.0">
2 <description>Determines differentially methylated regions and positions from idat files</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements">
7 <requirement type="package" version="0.6.0">bioconductor-illuminahumanmethylation450kanno.ilmn12.hg19</requirement>
8 </expand>
9 <command detect_errors="exit_code"><![CDATA[
10 #for $counter, $input in enumerate($files_red):
11 #set $redname = str( getattr( $input, 'element_identifier', 'sample' ) ).replace( "/", '-' ).replace( "\t", "-" )
12 ln -s $input ./${redname} &&
13 #end for
14 #for $counter, $input in enumerate($files_grn):
15 #set $grnname = str( getattr( $input, 'element_identifier', 'sample' ) ).replace( "/", '-' ).replace( "\t", "-" )
16 ln -s $input ./${grnname} &&
17 #end for
18 Rscript '$minfi_analysis_script'
19 ]]></command>
20 <configfiles>
21 <configfile name="minfi_analysis_script"><![CDATA[
22 require("minfi", quietly = TRUE)
23 require("IlluminaHumanMethylation27kanno.ilmn12.hg19", quietly = TRUE)
24 require("IlluminaHumanMethylation450kanno.ilmn12.hg19", quietly = TRUE)
25 require("IlluminaHumanMethylationEPICanno.ilm10b4.hg19", quietly = TRUE)
26 options(warn = -1)
27 RGSet <- read.metharray(list.files(pattern="_Red.idat")) #load .IDAT files
28
29 MSet <- preprocessRaw(RGSet) #create objects contains CpGs signals
30
31 qc <- getQC(MSet)
32 write.table(qc, '$qctab') #optional - provides a simple quality control matrix and plot
33 png('$qcpng')
34 plotQC(qc)
35 dev.off()
36
37 RSet <- ratioConvert(MSet, what = "both", keepCN = TRUE) #store Beta values and/or M values
38 GRSet <- mapToGenome(RSet)
39
40
41 if ('$optpp' == "na" ) {
42 GRSet <- mapToGenome(RSet) #mapping Ilumina methylation array data to the genome
43 } else if ('$optpp' == "ppfun" ) {
44 GRSet <- preprocessFunnorm(RGSet) #optional - implements the functional normalization algorithm
45 } else if ('$optpp' == "ppq" ) {
46 GRSet <- preprocessQuantile(RGSet, fixOutliers = TRUE,
47 removeBadSamples = TRUE, badSampleCutoff = 10.5,
48 quantileNormalize = TRUE, stratified = TRUE,
49 mergeManifest = FALSE, sex = NULL) #optional - implements stratified quantile normalization preprocessing
50 } else if ('$optpp' == "ppsnp" ) {
51 snps <- getSnpInfo(GRSet) #optional - retrieve the chromosome and the position of each SNP
52 write.table(snps, '$table')
53 GRSet <- dropLociWithSnps(GRSet, snps=c("SBE","CpG"), maf=0) #optional - drop the probes that contain either a SNP at the CpG interrogation or at the single nucleotide extensions
54 }
55 pheno <- read.table('$phenotype_table',skip = 1)
56 group <- pheno\$V2
57 pair <- factor(pheno\$V3)
58
59 design.matrix <- model.matrix(~ group + pair)
60
61 maxGap <- as.numeric('$maxgap_size')
62 if(is.null(GRSet\$cluster)){
63 cluster = NULL
64 maxGap = maxGap
65 } else {
66 cluster = GRSet\$cluster
67 maxGap = NULL
68 }
69
70 dmrs <- bumphunter(GRSet,
71 design = design.matrix,
72 cluster = cluster,
73 maxGap = maxGap,
74 cutoff = as.numeric('$cutoff_size'),
75 nullMethod = '$null_method',
76 B = as.numeric('$number_of_resamples'))
77 dmrGR <- dmrs\$table[,c(1,2,3)]
78 colnames(dmrGR) <- c("chr","start","end")
79 write.table(dmrGR, file= '$dmr', quote = FALSE,col.names = TRUE, row.names = FALSE, sep = "\t")
80
81 tab <- read.table('$ucsc_genome')
82 tab <- tab[,-(11:14),drop=FALSE]
83 tab <- tab[,c(1,4,5,10)]
84 colnames(tab) <- c('chr','start','end','names')
85
86 dmp <- dmpFinder(dat = getBeta(GRSet),pheno = read.table('$phenotype_table',skip=1)[,"V2"], type = '$phenotype', qCutoff = as.numeric('$q_cutoff'), shrinkVar = '$variance_shrinkage')
87 dmp[,"names"] <- rownames(dmp)
88 data <- merge(dmp, tab, by="names",sort = TRUE)
89 data <- data[,c(6,7,8,1,4,5)]
90 write.table(data, file= '$dmp', quote = FALSE,col.names = TRUE, row.names = FALSE, sep = "\t")
91
92 ]]> </configfile>
93 </configfiles>
94 <inputs>
95 <param type="data" name="files_red" multiple="true" format="idat" label="Red .IDAT files" help="Red .IDAT files extension is followed by the unmethylated signal intensity read in the red channel."/>
96 <param type="data" name="files_grn" multiple="true" format="idat" label="Green .IDAT files" help="Green .IDAT files extension is followed by the methylated signal intensity read in the green channel."/>
97 <param name="optpp" type="select" label="(Optional) Preprocessing Method" help="Mapping Ilumina methylation array data to the genome with or without additional preprocess.">
98 <option value="na">No Selection (use default)</option>
99 <option value="ppfun">Preprocess Funnorm</option>
100 <option value="ppq">Preprocess Quantile</option>
101 <option value="ppsnp">Remove SNPs</option>
102 </param>
103 <param type="data" name="phenotype_table" format="tabular" label="Phenotype Table"
104 help="Phenotype Table must include the following information: sampleID, phenotype and paird or unpaired samples column."/>
105 <param name="maxgap_size" type="integer" value="250" label="maxGap Size"
106 help="If cluster is not provided this maximum location gap will be used to define cluster."/>
107 <param name="cutoff_size" type="float" value="0.1" label="Cutoff Size"
108 help="A numeric value. Values of the estimate of the genomic profile above the cutoff or below the negative of the cutoff will be used as candidate regions. It is possible to give two separate values (upper and lower bounds). If one value is given, the lower bound is minus the value."/>
109 <param name="number_of_resamples" type="integer" value="0" label="Number of Resamples"
110 help="An integer denoting the number of resamples to use when computing null distributions. This defaults to 0. If permutations is supplied that defines the number of permutations/bootstraps and B is ignored."/>
111 <param name="null_method" type="select" label="null Method" help="Method used to generate null candidate regions (defaults to ‘permutation’). Note that for cases with more than one covariate the permutation approach is not generally recommended. ">
112 <option value="permutation" selected="True">permutation</option>
113 <option value="bootstrap">bootstrap</option>
114 </param>
115 <param type="data" name="phenotype_table" format="tabular" label="Phenotype Table" help="Table of compared probes and their characteristics, may be categorical (e.g. cancer vs. normal) or continuous (e.g. blood pressure)."/>
116 <param name="phenotype" type="select" label="Phenotype Type">
117 <option value="categorical">categorical</option>
118 <option value="continuous">continuous</option>
119 </param>
120 <param name="q_cutoff" type="float" value="1" label="qCutoff Size" help="DMPs with an FDR q-value greater than this will not be returned."/>
121 <param name="variance_shrinkage" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Variance Shrinkage"
122 help="Enable variance shrinkage is recommended when sample sizes are small."/>
123 <param type="data" name="ucsc_genome" format="gtf" label="Genome Table" help="Reference Sequence e.g. wgEncodeHaibMethyl450Gm12878SitesRep1."/>
124 </inputs>
125 <outputs>
126 <data name="qctab" format="txt" label="Quality Control Report"/>
127 <data name="qcpng" format="png" label="Quality Control Plot"/>
128 <data name="table" format="txt" label="SNPInfo Table"/>
129 <data name="dmr" format="bed" label="Differentially Methylated Regions"/>
130 <data name="dmp" format="bed" label="Differentially Methylated Positions"/>
131 </outputs>
132 <tests>
133 <test>
134 <param name="files_red" value="GSM1588707_8795207119_R06C02_Red.idat,GSM1588706_8795207135_R02C02_Red.idat,GSM1588705_8795207119_R05C02_Red.idat,GSM1588704_8795207135_R01C02_Red.idat" ftype="idat"/>
135 <param name="files_grn" value="GSM1588707_8795207119_R06C02_Grn.idat,GSM1588706_8795207135_R02C02_Grn.idat,GSM1588705_8795207119_R05C02_Grn.idat,GSM1588704_8795207135_R01C02_Grn.idat" ftype="idat"/>
136 <param name="optpp" value="ppsnp"/>
137 <param name="grset" value="GRSet_without_SNPs.rdata"/>
138 <param name="phenotype_table" value="phenotypeTable.txt"/>
139 <param name="maxgap_size" value="250"/>
140 <param name="cutoff_size" value="0.1"/>
141 <param name="number_of_resamples" value="0"/>
142 <param name="null_method" value="permutation"/>
143 <param name="grset" value="GRSet_without_SNPs.rdata"/>
144 <param name="phenotype_table" value="phenotypeTable.txt"/>
145 <param name="phenotype" value="categorical"/>
146 <param name="q_cutoff" value="1"/>
147 <param name="variance_shrinkage" value="FALSE"/>
148 <param name="ucsc_genome" value="ucsc.gtf"/>
149 <output name="qctab" file="Quality_Control_Report.txt"/>
150 <output name="qcpng" file="Quality_Control_Plot.png" compare="sim_size"/>
151 <output name="table" file="SNPInfo_Table.txt"/>
152 <output name="dmr" file="Differentially_Methylated_Regions.bed"/>
153 <output name="dmp" file="Differentially_Methylated_Positions.bed"/>
154 </test>
155 </tests>
156 <help><![CDATA[
157
158 .. class:: infomark
159
160 **What it does**
161
162 The workflow combines 5 main steps, starting with raw intensity data loading (.idat) and then optional preprocessing and normalisation of the data. The next quality control step performs an additional sample check to remove low-quality data, which normalisation cannot detect. The workflow gives the user the opportunity to perform any of these preparation and data cleaning steps, including highly recommended genetic variation annotation step resulting in single nucleotide polymorphism identification and removal. Finally, the dataset generated through all of these steps can be used to hunt (find) differentially-methylated positions (DMP)and regions (DMR) with respect to a phenotype covariate.
163
164 ***Inputs***
165
166 *Series of .IDAT files*: red and green .idat file for each sample on the chip intensity data.
167
168 *(optional) Preprocessing Methods*: by this step probes can be stratified by region via quantile normalization or by extended implementation of functional normalisation recommended for cases where global changes are expected such as in cancer-normal comparisons. In addition unwanted probes containing either a SNP at the CpG interrogation or at the single nucleotide extension can be removed (recommended).
169
170 *Phenotype Table*: table of compared probes and their characteristics, may be categorical (e.g. cancer vs. normal) or continuous (e.g. blood pressure).
171
172 ========== ============== ===============
173 Accession Sensitivity Treatment
174 ---------- -------------- ---------------
175 GSM1588704 sensitive MAPKi
176 ---------- -------------- ---------------
177 GSM1588705 sensitive MAPKi
178 ---------- -------------- ---------------
179 GSM1588706 resistant BRAFi
180 ========== ============== ===============
181
182 *Note*: phenotype covariate table must include the following information:
183 sampleID/Accession, phenotype and paird or unpaired samples column
184
185 *Genome Table*: a reference genome that contains the nucleotide sequence of the chromosomes, It is representative of a specific genome build and release.
186
187 ***Outputs***
188
189 *Quality Control Report and Plot*: quality control (QC) outputs plot of the log median intensity in both the methylated (M) and unmethylated (U) channels. When plotting these two medians against each other the good samples cluster together, while failed samples tend to separate and have lower median intensities.
190
191 *(optional) SNPInfo Table*: matrix of the chromosome and the position of each SNP on a given Affymetrix SNP Array.
192
193 *Differentially Methylated Regions*: consecutive genomic locations differentially methylated in the same direction save as multiple track lines in a single BED file.
194
195 *Differentially Methylated Positions*: single genomic position that has a different methylated level in two different groups of samples (or conditions) save as multiple track lines in a single BED file.
196 ]]></help>
197 <citations>
198 <citation type="doi">10.18129/B9.bioc.illuminaio</citation>
199 </citations>
200 </tool>