diff minfi_analysis.xml @ 81:8ab24a5229bd draft

"planemo upload for repository https://github.com/kpbioteam/ewas_galaxy commit 323de18b21453fa652ae3abb7e847704e13a65ed"
author kpbioteam
date Tue, 11 Feb 2020 09:14:55 -0500
parents e67f424d3f42
children 369fef559cfc
line wrap: on
line diff
--- a/minfi_analysis.xml	Sun Jan 26 16:58:09 2020 -0500
+++ b/minfi_analysis.xml	Tue Feb 11 09:14:55 2020 -0500
@@ -1,30 +1,31 @@
-<tool id="minfi_analysis" name="Infinium Human Methylation BeadChip" version="2.1.0">
-    <description>Determines differentially methylated regions and positions from idat files</description>
-    <macros>
-        <import>macros.xml</import>
-    </macros>
-    <expand macro="requirements">
-  <requirement type="package" version="0.6.0">bioconductor-illuminahumanmethylation450kanno.ilmn12.hg19</requirement>
-    </expand>
-    <command detect_errors="exit_code"><![CDATA[
+<?xml version='1.0'?>
+<tool id='minfi_analysis' name='Infinium Human Methylation BeadChip' version='2.1.0'>
+  <description>Determines differentially methylated regions and positions from Infinium Methylation Assays</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro='requirements'>
+    <requirement type='package' version='0.6.0'>bioconductor-illuminahumanmethylation450kanno.ilmn12.hg19</requirement>
+  </expand>
+  <command detect_errors='exit_code'><![CDATA[
       #for $counter, $input in enumerate($files_red):
-        #set $redname = str( getattr( $input, 'element_identifier', 'sample' ) ).replace( "/", '-' ).replace( "\t", "-" )
+        #set $redname = str( getattr( $input, 'element_identifier', 'sample' ) ).replace( '/', '-' ).replace( '\t', '-' )
         ln -s $input ./${redname} &&
       #end for
       #for $counter, $input in enumerate($files_grn):
-        #set $grnname = str( getattr( $input, 'element_identifier', 'sample' ) ).replace( "/", '-' ).replace( "\t", "-" )
+        #set $grnname = str( getattr( $input, 'element_identifier', 'sample' ) ).replace( '/', '-' ).replace( '\t', '-' )
         ln -s $input ./${grnname} &&
       #end for
       Rscript '$minfi_analysis_script'
       ]]></command>
-      <configfiles>
-      <configfile name="minfi_analysis_script"><![CDATA[
-	require("minfi", quietly = TRUE)
-	require("IlluminaHumanMethylation27kanno.ilmn12.hg19", quietly = TRUE)
-	require("IlluminaHumanMethylation450kanno.ilmn12.hg19", quietly = TRUE)
-        require("IlluminaHumanMethylationEPICanno.ilm10b4.hg19", quietly = TRUE)
+  <configfiles>
+    <configfile name='minfi_analysis_script'><![CDATA[
+	require('minfi', quietly = TRUE)
+	require('IlluminaHumanMethylation27kanno.ilmn12.hg19', quietly = TRUE)
+	require('IlluminaHumanMethylation450kanno.ilmn12.hg19', quietly = TRUE)
+        require('IlluminaHumanMethylationEPICanno.ilm10b4.hg19', quietly = TRUE)
 	options(warn = -1)
-	RGSet <- read.metharray(list.files(pattern="_Red.idat")) #load .IDAT files
+	RGSet <- read.metharray(list.files(pattern='_Red.idat')) #load .IDAT files
 
 	MSet <- preprocessRaw(RGSet) #create objects contains CpGs signals
 
@@ -34,23 +35,23 @@
         plotQC(qc)
 	dev.off()
 
-	RSet <- ratioConvert(MSet, what = "both", keepCN = TRUE) #store Beta values and/or M values
+	RSet <- ratioConvert(MSet, what = 'both', keepCN = TRUE) #store Beta values and/or M values
 	GRSet <- mapToGenome(RSet)
 
 
-	if ('$optpp' == "na" ) {
+	if ('$optpp' == 'na' ) {
 	GRSet <- mapToGenome(RSet) #mapping Ilumina methylation array data to the genome
-	} else if ('$optpp' == "ppfun"  ) {
+	} else if ('$optpp' == 'ppfun'  ) {
 	GRSet <- preprocessFunnorm(RGSet) #optional - implements the functional normalization algorithm
-	} else  if ('$optpp' == "ppq" ) {
+	} else  if ('$optpp' == 'ppq' ) {
 	GRSet <- preprocessQuantile(RGSet, fixOutliers = TRUE,
         removeBadSamples = TRUE, badSampleCutoff = 10.5,
         quantileNormalize = TRUE, stratified = TRUE,
         mergeManifest = FALSE, sex = NULL) #optional - implements stratified quantile normalization preprocessing
-	}  else if ('$optpp' == "ppsnp" ) {
+	}  else if ('$optpp' == 'ppsnp' ) {
 	snps <- getSnpInfo(GRSet) #optional - retrieve the chromosome and the position of each SNP
 	write.table(snps, '$table')
-	GRSet <- dropLociWithSnps(GRSet, snps=c("SBE","CpG"), maf=0) #optional - drop the probes that contain either a SNP at the CpG interrogation or at the single nucleotide extensions
+	GRSet <- dropLociWithSnps(GRSet, snps=c('SBE','CpG'), maf=0) #optional - drop the probes that contain either a SNP at the CpG interrogation or at the single nucleotide extensions
 	}
 	pheno <- read.table('$phenotype_table',skip = 1)
         group <- pheno\$V2
@@ -75,85 +76,79 @@
         nullMethod = '$null_method',
         B = as.numeric('$number_of_resamples'))
         dmrGR <- dmrs\$table[,c(1,2,3)] 
-        colnames(dmrGR) <- c("chr","start","end")
-	write.table(dmrGR, file= '$dmr', quote = FALSE,col.names = TRUE, row.names = FALSE, sep = "\t")
+        colnames(dmrGR) <- c('chr','start','end')
+	write.table(dmrGR, file= '$dmr', quote = FALSE,col.names = TRUE, row.names = FALSE, sep = '\t')
       
         tab <- read.table('$ucsc_genome')
         tab <- tab[,-(11:14),drop=FALSE]
         tab <- tab[,c(1,4,5,10)]
         colnames(tab) <- c('chr','start','end','names')
 
-        dmp <- dmpFinder(dat = getBeta(GRSet),pheno =  read.table('$phenotype_table',skip=1)[,"V2"], type = '$phenotype', qCutoff = as.numeric('$q_cutoff'), shrinkVar = '$variance_shrinkage')
-        dmp[,"names"] <- rownames(dmp)
-        data <- merge(dmp, tab, by="names",sort = TRUE)
+        dmp <- dmpFinder(dat = getBeta(GRSet),pheno =  read.table('$phenotype_table',skip=1)[,'V2'], type = '$phenotype', qCutoff = as.numeric('$q_cutoff'), shrinkVar = '$variance_shrinkage')
+        dmp[,'names'] <- rownames(dmp)
+        data <- merge(dmp, tab, by='names',sort = TRUE)
         data <- data[,c(6,7,8,1,4,5)]
-        write.table(data, file= '$dmp', quote = FALSE,col.names = TRUE, row.names = FALSE, sep = "\t")
+        write.table(data, file= '$dmp', quote = FALSE,col.names = TRUE, row.names = FALSE, sep = '\t')
 
-	]]> </configfile>
-        </configfiles>
-	<inputs>	
- 	<param type="data" name="files_red" multiple="true" format="idat" label="Red .IDAT files" help="Red .IDAT files extension is followed by the unmethylated signal intensity read in the red channel."/>
-	<param type="data" name="files_grn" multiple="true" format="idat" label="Green .IDAT files" help="Green .IDAT files extension is followed by the methylated signal intensity read in the green channel."/>
-            <param name="optpp" type="select" label="(Optional) Preprocessing Method" help="Mapping Ilumina methylation array data to the genome with or without additional preprocess.">
-            <option value="na">No Selection (use default)</option>
-	    <option value="ppfun">Preprocess Funnorm</option>
-	    <option value="ppq">Preprocess Quantile</option>
-            <option value="ppsnp">Remove SNPs</option>
-            </param> 
-            <param type="data" name="phenotype_table" format="tabular" label="Phenotype Table"
-            help="Phenotype Table must include the following information: sampleID, phenotype and paird or unpaired samples column."/>
-            <param name="maxgap_size" type="integer" value="250" label="maxGap Size"
-            help="If cluster is not provided this maximum location gap will be used to define cluster."/>
-            <param name="cutoff_size" type="float" value="0.1" label="Cutoff Size"
-            help="A numeric value. Values of the estimate of the genomic profile above the cutoff or below the negative of the cutoff will be used as candidate regions. It is possible to give two separate values (upper and lower bounds). If one value is given, the lower bound is minus the value."/> 
-            <param name="number_of_resamples" type="integer" value="0" label="Number of Resamples"
-            help="An integer denoting the number of resamples to use when computing null distributions. This defaults to 0. If permutations is supplied that defines the number of permutations/bootstraps and B is ignored."/>
-            <param name="null_method" type="select" label="null Method" help="Method used to generate null candidate regions (defaults to ‘permutation’). Note that for cases with more than one covariate the permutation approach is not generally recommended. ">
-            <option value="permutation" selected="True">permutation</option>
-            <option value="bootstrap">bootstrap</option>
-            </param>
-            <param type="data" name="phenotype_table" format="tabular" label="Phenotype Table" help="Table of compared probes and their characteristics, may be categorical (e.g. cancer vs. normal) or continuous (e.g. blood pressure)."/>
-            <param name="phenotype" type="select" label="Phenotype Type">
-            <option value="categorical">categorical</option>
-            <option value="continuous">continuous</option>
-            </param> 
-            <param name="q_cutoff" type="float" value="1" label="qCutoff Size" help="DMPs with an FDR q-value greater than this will not be returned."/>
-            <param name="variance_shrinkage" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Variance Shrinkage"
-            help="Enable variance shrinkage is recommended when sample sizes are small."/>
-            <param type="data" name="ucsc_genome" format="gtf" label="Genome Table" help="Reference Sequence e.g. wgEncodeHaibMethyl450Gm12878SitesRep1."/>
-        </inputs>
-	<outputs>
-		<data name="qctab" format="txt" label="Quality Control Report"/>
-		<data name="qcpng" format="png" label="Quality Control Plot"/>
-		<data name="table" format="txt" label="SNPInfo Table"/>
-		<data name="dmr" format="bed" label="Differentially Methylated Regions"/>
-	        <data name="dmp" format="bed" label="Differentially Methylated Positions"/>
-	</outputs>
-              <tests>
-		<test>
-		<param name="files_red" value="GSM1588707_8795207119_R06C02_Red.idat,GSM1588706_8795207135_R02C02_Red.idat,GSM1588705_8795207119_R05C02_Red.idat,GSM1588704_8795207135_R01C02_Red.idat" ftype="idat"/>
-		<param name="files_grn" value="GSM1588707_8795207119_R06C02_Grn.idat,GSM1588706_8795207135_R02C02_Grn.idat,GSM1588705_8795207119_R05C02_Grn.idat,GSM1588704_8795207135_R01C02_Grn.idat" ftype="idat"/>
-		<param name="optpp" value="ppsnp"/>
-		<param name="grset" value="GRSet_without_SNPs.rdata"/>
-                <param name="phenotype_table" value="phenotypeTable.txt"/>
-                <param name="maxgap_size" value="250"/>
-                <param name="cutoff_size" value="0.1"/>
-                <param name="number_of_resamples" value="0"/>
-		<param name="null_method" value="permutation"/>
-		<param name="grset" value="GRSet_without_SNPs.rdata"/>
-                <param name="phenotype_table" value="phenotypeTable.txt"/>
-                <param name="phenotype" value="categorical"/>
-                <param name="q_cutoff" value="1"/>
-                <param name="variance_shrinkage" value="FALSE"/>
-                <param name="ucsc_genome" value="ucsc.gtf"/>
-		<output name="qctab" file="Quality_Control_Report.txt"/>
-		<output name="qcpng" file="Quality_Control_Plot.png" compare="sim_size"/>
-		<output name="table" file="SNPInfo_Table.txt"/>
-		<output name="dmr" file="Differentially_Methylated_Regions.bed"/>
-		<output name="dmp" file="Differentially_Methylated_Positions.bed"/>
-             </test>
-          </tests>
-<help><![CDATA[
+	]]></configfile>
+  </configfiles>
+  <inputs>
+    <param type='data' name='files_red' multiple='true' format='idat' label='Red .IDAT files' help='Red .IDAT files extension is followed by the unmethylated signal intensity read in the red channel.'/>
+    <param type='data' name='files_grn' multiple='true' format='idat' label='Green .IDAT files' help='Green .IDAT files extension is followed by the methylated signal intensity read in the green channel.'/>
+    <param name='optpp' type='select' label='(Optional) Preprocessing Method' help='Mapping Ilumina methylation array data to the genome with or without additional preprocessing methods.'>
+      <option value='na'>No Selection (use default)</option>
+      <option value='ppfun'>Preprocess Funnorm</option>
+      <option value='ppq'>Preprocess Quantile</option>
+      <option value='ppsnp'>Remove SNPs</option>
+    </param>
+    <param type='data' name='phenotype_table' format='tabular' label='Phenotype Table' help='Phenotype Table must include the following information: sampleID, phenotype and paird or unpaired samples column.'/>
+    <param name='maxgap_size' type='integer' value='250' label='maxGap Size' help='If cluster is not provided this maximum location gap will be used to define cluster.'/>
+    <param name='cutoff_size' type='float' value='0.1' label='Cutoff Size' help='A numeric value. Values of the estimate of the genomic profile above the cutoff or below the negative of the cutoff will be used as candidate regions. It is possible to give two separate values (upper and lower bounds). If one value is given, the lower bound is minus the value.'/>
+    <param name='number_of_resamples' type='integer' value='0' label='Number of Resamples' help='An integer denoting the number of resamples to use when computing null distributions. This defaults to 0. If permutations is supplied that defines the number of permutations/bootstraps and B is ignored.'/>
+    <param name='null_method' type='select' label='null Method' help='Method used to generate null candidate regions (defaults to &#x2018;permutation&#x2019;). Note that for cases with more than one covariate the permutation approach is not generally recommended. '>
+      <option value='permutation' selected='True'>permutation</option>
+      <option value='bootstrap'>bootstrap</option>
+    </param>
+    <param type='data' name='phenotype_table' format='tabular' label='Phenotype Table' help='Table of compared samples and their characteristics, may be categorical (e.g. cancer vs. normal) or continuous (e.g. blood pressure).'/>
+    <param name='phenotype' type='select' label='Phenotype Type'>
+      <option value='categorical'>categorical</option>
+      <option value='continuous'>continuous</option>
+    </param>
+    <param name='q_cutoff' type='float' value='1' label='qCutoff Size' help='DMPs with an FDR q-value greater than this will not be returned.'/>
+    <param name='variance_shrinkage' type='boolean' truevalue='TRUE' falsevalue='FALSE' label='Variance Shrinkage' help='Enable variance shrinkage is recommended when sample sizes are small.'/>
+    <param type='data' name='ucsc_genome' format='gtf' label='Genome Table' help='Reference Sequence e.g. wgEncodeHaibMethyl450Gm12878SitesRep1.'/>
+  </inputs>
+  <outputs>
+    <data name='qctab' format='txt' label='Quality Control Report'/>
+    <data name='qcpng' format='png' label='Quality Control Plot'/>
+    <data name='table' format='txt' label='SNPInfo Table'/>
+    <data name='dmr' format='bed' label='Differentially Methylated Regions'/>
+    <data name='dmp' format='bed' label='Differentially Methylated Positions'/>
+  </outputs>
+  <tests>
+    <test>
+      <param name='files_red' value='GSM1588707_8795207119_R06C02_Red.idat,GSM1588706_8795207135_R02C02_Red.idat,GSM1588705_8795207119_R05C02_Red.idat,GSM1588704_8795207135_R01C02_Red.idat' ftype='idat'/>
+      <param name='files_grn' value='GSM1588707_8795207119_R06C02_Grn.idat,GSM1588706_8795207135_R02C02_Grn.idat,GSM1588705_8795207119_R05C02_Grn.idat,GSM1588704_8795207135_R01C02_Grn.idat' ftype='idat'/>
+      <param name='optpp' value='ppsnp'/>
+      <param name='grset' value='GRSet_without_SNPs.rdata'/>
+      <param name='phenotype_table' value='phenotypeTable.txt'/>
+      <param name='maxgap_size' value='250'/>
+      <param name='cutoff_size' value='0.1'/>
+      <param name='number_of_resamples' value='0'/>
+      <param name='null_method' value='permutation'/>
+      <param name='grset' value='GRSet_without_SNPs.rdata'/>
+      <param name='phenotype' value='categorical'/>
+      <param name='q_cutoff' value='1'/>
+      <param name='variance_shrinkage' value='FALSE'/>
+      <param name='ucsc_genome' value='ucsc.gtf'/>
+      <output name='qctab' file='Quality_Control_Report.txt'/>
+      <output name='qcpng' file='Quality_Control_Plot.png' compare='sim_size'/>
+      <output name='table' file='SNPInfo_Table.txt'/>
+      <output name='dmr' file='Differentially_Methylated_Regions.bed'/>
+      <output name='dmp' file='Differentially_Methylated_Positions.bed'/>
+    </test>
+  </tests>
+  <help><![CDATA[
 
 .. class:: infomark
 	
@@ -167,7 +162,7 @@
 
 *(optional) Preprocessing Methods*: by this step probes can be stratified by region via quantile normalization or by extended implementation of functional normalisation recommended for cases where global changes are expected such as in cancer-normal comparisons. In addition unwanted probes containing either a SNP at the CpG interrogation or at the single nucleotide extension can be removed (recommended).   
 
-*Phenotype Table*: table of compared probes and their characteristics, may be categorical (e.g. cancer vs. normal) or continuous (e.g. blood pressure).   
+*Phenotype Table*: table of compared samples and their characteristics, may be categorical (e.g. cancer vs. normal) or continuous (e.g. blood pressure).   
 
 ========== ============== ===============
 Accession  Sensitivity    Treatment
@@ -177,6 +172,8 @@
 GSM1588705 sensitive      MAPKi
 ---------- -------------- ---------------
 GSM1588706 resistant      BRAFi
+---------- -------------- ---------------
+GSM1588707 resistant      BRAFi
 ========== ============== ===============    
 
 *Note*: phenotype covariate table must include the following information:
@@ -194,7 +191,7 @@
 
 *Differentially Methylated Positions*: single genomic position that has a different methylated level in two different groups of samples (or conditions) save as multiple track lines in a single BED file.
   ]]></help>
-    <citations>
-        <citation type="doi">10.18129/B9.bioc.illuminaio</citation>
-</citations>
-</tool>	
+  <citations>
+    <citation type='doi'>10.18129/B9.bioc.illuminaio</citation>
+  </citations>
+</tool>