annotate minfi_analysis.xml @ 78:e67f424d3f42 draft

Uploaded Analysis Pipeline
author kpbioteam
date Sun, 26 Jan 2020 16:55:54 -0500
parents
children 8ab24a5229bd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
78
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
1 <tool id="minfi_analysis" name="Infinium Human Methylation BeadChip" version="2.1.0">
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
2 <description>Determines differentially methylated regions and positions from idat files</description>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
3 <macros>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
4 <import>macros.xml</import>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
5 </macros>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
6 <expand macro="requirements">
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
7 <requirement type="package" version="0.6.0">bioconductor-illuminahumanmethylation450kanno.ilmn12.hg19</requirement>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
8 </expand>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
9 <command detect_errors="exit_code"><![CDATA[
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
10 #for $counter, $input in enumerate($files_red):
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
11 #set $redname = str( getattr( $input, 'element_identifier', 'sample' ) ).replace( "/", '-' ).replace( "\t", "-" )
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
12 ln -s $input ./${redname} &&
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
13 #end for
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
14 #for $counter, $input in enumerate($files_grn):
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
15 #set $grnname = str( getattr( $input, 'element_identifier', 'sample' ) ).replace( "/", '-' ).replace( "\t", "-" )
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
16 ln -s $input ./${grnname} &&
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
17 #end for
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
18 Rscript '$minfi_analysis_script'
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
19 ]]></command>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
20 <configfiles>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
21 <configfile name="minfi_analysis_script"><![CDATA[
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
22 require("minfi", quietly = TRUE)
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
23 require("IlluminaHumanMethylation27kanno.ilmn12.hg19", quietly = TRUE)
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
24 require("IlluminaHumanMethylation450kanno.ilmn12.hg19", quietly = TRUE)
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
25 require("IlluminaHumanMethylationEPICanno.ilm10b4.hg19", quietly = TRUE)
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
26 options(warn = -1)
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
27 RGSet <- read.metharray(list.files(pattern="_Red.idat")) #load .IDAT files
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
28
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
29 MSet <- preprocessRaw(RGSet) #create objects contains CpGs signals
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
30
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
31 qc <- getQC(MSet)
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
32 write.table(qc, '$qctab') #optional - provides a simple quality control matrix and plot
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
33 png('$qcpng')
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
34 plotQC(qc)
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
35 dev.off()
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
36
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
37 RSet <- ratioConvert(MSet, what = "both", keepCN = TRUE) #store Beta values and/or M values
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
38 GRSet <- mapToGenome(RSet)
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
39
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
40
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
41 if ('$optpp' == "na" ) {
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
42 GRSet <- mapToGenome(RSet) #mapping Ilumina methylation array data to the genome
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
43 } else if ('$optpp' == "ppfun" ) {
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
44 GRSet <- preprocessFunnorm(RGSet) #optional - implements the functional normalization algorithm
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
45 } else if ('$optpp' == "ppq" ) {
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
46 GRSet <- preprocessQuantile(RGSet, fixOutliers = TRUE,
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
47 removeBadSamples = TRUE, badSampleCutoff = 10.5,
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
48 quantileNormalize = TRUE, stratified = TRUE,
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
49 mergeManifest = FALSE, sex = NULL) #optional - implements stratified quantile normalization preprocessing
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
50 } else if ('$optpp' == "ppsnp" ) {
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
51 snps <- getSnpInfo(GRSet) #optional - retrieve the chromosome and the position of each SNP
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
52 write.table(snps, '$table')
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
53 GRSet <- dropLociWithSnps(GRSet, snps=c("SBE","CpG"), maf=0) #optional - drop the probes that contain either a SNP at the CpG interrogation or at the single nucleotide extensions
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
54 }
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
55 pheno <- read.table('$phenotype_table',skip = 1)
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
56 group <- pheno\$V2
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
57 pair <- factor(pheno\$V3)
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
58
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
59 design.matrix <- model.matrix(~ group + pair)
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
60
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
61 maxGap <- as.numeric('$maxgap_size')
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
62 if(is.null(GRSet\$cluster)){
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
63 cluster = NULL
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
64 maxGap = maxGap
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
65 } else {
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
66 cluster = GRSet\$cluster
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
67 maxGap = NULL
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
68 }
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
69
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
70 dmrs <- bumphunter(GRSet,
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
71 design = design.matrix,
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
72 cluster = cluster,
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
73 maxGap = maxGap,
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
74 cutoff = as.numeric('$cutoff_size'),
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
75 nullMethod = '$null_method',
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
76 B = as.numeric('$number_of_resamples'))
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
77 dmrGR <- dmrs\$table[,c(1,2,3)]
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
78 colnames(dmrGR) <- c("chr","start","end")
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
79 write.table(dmrGR, file= '$dmr', quote = FALSE,col.names = TRUE, row.names = FALSE, sep = "\t")
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
80
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
81 tab <- read.table('$ucsc_genome')
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
82 tab <- tab[,-(11:14),drop=FALSE]
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
83 tab <- tab[,c(1,4,5,10)]
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
84 colnames(tab) <- c('chr','start','end','names')
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
85
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
86 dmp <- dmpFinder(dat = getBeta(GRSet),pheno = read.table('$phenotype_table',skip=1)[,"V2"], type = '$phenotype', qCutoff = as.numeric('$q_cutoff'), shrinkVar = '$variance_shrinkage')
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
87 dmp[,"names"] <- rownames(dmp)
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
88 data <- merge(dmp, tab, by="names",sort = TRUE)
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
89 data <- data[,c(6,7,8,1,4,5)]
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
90 write.table(data, file= '$dmp', quote = FALSE,col.names = TRUE, row.names = FALSE, sep = "\t")
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
91
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
92 ]]> </configfile>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
93 </configfiles>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
94 <inputs>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
95 <param type="data" name="files_red" multiple="true" format="idat" label="Red .IDAT files" help="Red .IDAT files extension is followed by the unmethylated signal intensity read in the red channel."/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
96 <param type="data" name="files_grn" multiple="true" format="idat" label="Green .IDAT files" help="Green .IDAT files extension is followed by the methylated signal intensity read in the green channel."/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
97 <param name="optpp" type="select" label="(Optional) Preprocessing Method" help="Mapping Ilumina methylation array data to the genome with or without additional preprocess.">
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
98 <option value="na">No Selection (use default)</option>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
99 <option value="ppfun">Preprocess Funnorm</option>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
100 <option value="ppq">Preprocess Quantile</option>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
101 <option value="ppsnp">Remove SNPs</option>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
102 </param>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
103 <param type="data" name="phenotype_table" format="tabular" label="Phenotype Table"
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
104 help="Phenotype Table must include the following information: sampleID, phenotype and paird or unpaired samples column."/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
105 <param name="maxgap_size" type="integer" value="250" label="maxGap Size"
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
106 help="If cluster is not provided this maximum location gap will be used to define cluster."/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
107 <param name="cutoff_size" type="float" value="0.1" label="Cutoff Size"
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
108 help="A numeric value. Values of the estimate of the genomic profile above the cutoff or below the negative of the cutoff will be used as candidate regions. It is possible to give two separate values (upper and lower bounds). If one value is given, the lower bound is minus the value."/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
109 <param name="number_of_resamples" type="integer" value="0" label="Number of Resamples"
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
110 help="An integer denoting the number of resamples to use when computing null distributions. This defaults to 0. If permutations is supplied that defines the number of permutations/bootstraps and B is ignored."/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
111 <param name="null_method" type="select" label="null Method" help="Method used to generate null candidate regions (defaults to ‘permutation’). Note that for cases with more than one covariate the permutation approach is not generally recommended. ">
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
112 <option value="permutation" selected="True">permutation</option>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
113 <option value="bootstrap">bootstrap</option>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
114 </param>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
115 <param type="data" name="phenotype_table" format="tabular" label="Phenotype Table" help="Table of compared probes and their characteristics, may be categorical (e.g. cancer vs. normal) or continuous (e.g. blood pressure)."/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
116 <param name="phenotype" type="select" label="Phenotype Type">
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
117 <option value="categorical">categorical</option>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
118 <option value="continuous">continuous</option>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
119 </param>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
120 <param name="q_cutoff" type="float" value="1" label="qCutoff Size" help="DMPs with an FDR q-value greater than this will not be returned."/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
121 <param name="variance_shrinkage" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Variance Shrinkage"
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
122 help="Enable variance shrinkage is recommended when sample sizes are small."/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
123 <param type="data" name="ucsc_genome" format="gtf" label="Genome Table" help="Reference Sequence e.g. wgEncodeHaibMethyl450Gm12878SitesRep1."/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
124 </inputs>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
125 <outputs>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
126 <data name="qctab" format="txt" label="Quality Control Report"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
127 <data name="qcpng" format="png" label="Quality Control Plot"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
128 <data name="table" format="txt" label="SNPInfo Table"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
129 <data name="dmr" format="bed" label="Differentially Methylated Regions"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
130 <data name="dmp" format="bed" label="Differentially Methylated Positions"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
131 </outputs>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
132 <tests>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
133 <test>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
134 <param name="files_red" value="GSM1588707_8795207119_R06C02_Red.idat,GSM1588706_8795207135_R02C02_Red.idat,GSM1588705_8795207119_R05C02_Red.idat,GSM1588704_8795207135_R01C02_Red.idat" ftype="idat"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
135 <param name="files_grn" value="GSM1588707_8795207119_R06C02_Grn.idat,GSM1588706_8795207135_R02C02_Grn.idat,GSM1588705_8795207119_R05C02_Grn.idat,GSM1588704_8795207135_R01C02_Grn.idat" ftype="idat"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
136 <param name="optpp" value="ppsnp"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
137 <param name="grset" value="GRSet_without_SNPs.rdata"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
138 <param name="phenotype_table" value="phenotypeTable.txt"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
139 <param name="maxgap_size" value="250"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
140 <param name="cutoff_size" value="0.1"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
141 <param name="number_of_resamples" value="0"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
142 <param name="null_method" value="permutation"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
143 <param name="grset" value="GRSet_without_SNPs.rdata"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
144 <param name="phenotype_table" value="phenotypeTable.txt"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
145 <param name="phenotype" value="categorical"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
146 <param name="q_cutoff" value="1"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
147 <param name="variance_shrinkage" value="FALSE"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
148 <param name="ucsc_genome" value="ucsc.gtf"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
149 <output name="qctab" file="Quality_Control_Report.txt"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
150 <output name="qcpng" file="Quality_Control_Plot.png" compare="sim_size"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
151 <output name="table" file="SNPInfo_Table.txt"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
152 <output name="dmr" file="Differentially_Methylated_Regions.bed"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
153 <output name="dmp" file="Differentially_Methylated_Positions.bed"/>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
154 </test>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
155 </tests>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
156 <help><![CDATA[
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
157
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
158 .. class:: infomark
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
159
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
160 **What it does**
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
161
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
162 The workflow combines 5 main steps, starting with raw intensity data loading (.idat) and then optional preprocessing and normalisation of the data. The next quality control step performs an additional sample check to remove low-quality data, which normalisation cannot detect. The workflow gives the user the opportunity to perform any of these preparation and data cleaning steps, including highly recommended genetic variation annotation step resulting in single nucleotide polymorphism identification and removal. Finally, the dataset generated through all of these steps can be used to hunt (find) differentially-methylated positions (DMP)and regions (DMR) with respect to a phenotype covariate.
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
163
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
164 ***Inputs***
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
165
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
166 *Series of .IDAT files*: red and green .idat file for each sample on the chip intensity data.
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
167
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
168 *(optional) Preprocessing Methods*: by this step probes can be stratified by region via quantile normalization or by extended implementation of functional normalisation recommended for cases where global changes are expected such as in cancer-normal comparisons. In addition unwanted probes containing either a SNP at the CpG interrogation or at the single nucleotide extension can be removed (recommended).
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
169
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
170 *Phenotype Table*: table of compared probes and their characteristics, may be categorical (e.g. cancer vs. normal) or continuous (e.g. blood pressure).
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
171
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
172 ========== ============== ===============
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
173 Accession Sensitivity Treatment
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
174 ---------- -------------- ---------------
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
175 GSM1588704 sensitive MAPKi
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
176 ---------- -------------- ---------------
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
177 GSM1588705 sensitive MAPKi
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
178 ---------- -------------- ---------------
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
179 GSM1588706 resistant BRAFi
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
180 ========== ============== ===============
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
181
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
182 *Note*: phenotype covariate table must include the following information:
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
183 sampleID/Accession, phenotype and paird or unpaired samples column
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
184
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
185 *Genome Table*: a reference genome that contains the nucleotide sequence of the chromosomes, It is representative of a specific genome build and release.
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
186
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
187 ***Outputs***
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
188
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
189 *Quality Control Report and Plot*: quality control (QC) outputs plot of the log median intensity in both the methylated (M) and unmethylated (U) channels. When plotting these two medians against each other the good samples cluster together, while failed samples tend to separate and have lower median intensities.
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
190
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
191 *(optional) SNPInfo Table*: matrix of the chromosome and the position of each SNP on a given Affymetrix SNP Array.
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
192
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
193 *Differentially Methylated Regions*: consecutive genomic locations differentially methylated in the same direction save as multiple track lines in a single BED file.
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
194
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
195 *Differentially Methylated Positions*: single genomic position that has a different methylated level in two different groups of samples (or conditions) save as multiple track lines in a single BED file.
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
196 ]]></help>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
197 <citations>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
198 <citation type="doi">10.18129/B9.bioc.illuminaio</citation>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
199 </citations>
e67f424d3f42 Uploaded Analysis Pipeline
kpbioteam
parents:
diff changeset
200 </tool>