Mercurial > repos > nturaga > minfi_pipeline

<?xml version="1.0" encoding="UTF-8"?>
<tool id="minfi_pipeline" name="Minfi pipeline" version="1.0">
    <description>to Analayze Illumina 450k data</description>
    <macros>
        <import>minfi_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[mkdir minfi_temp
        &&
        cp "${minfi_get_files}" ./minfi_temp/minfi_config.txt
        &&
        echo \$GALAXY_SLOTS
        &&
        Rscript ${__tool_directory__}/minfi_pipeline.R
        --quiet="TRUE"
        --preprocess="${preprocess.preprocess_method}"
        --cores="\${GALAXY_SLOTS:-4}"
        #if str( $minfi_param_type.minfi_param_type_selector ) == "advanced":
            --numPositions=${minfi_param_type.numPositions}
            --shrinkVar=${minfi_param_type.shrinkVar}
            --b_permutations=${minfi_param_type.b_permutations}
            --smooth=${minfi_param_type.smooth}
            --cutoff=${minfi_param_type.cutoff}
            --l_value=${minfi_param_type.l_value}
        #else:
            --numPositions=1000
            --shrinkVar=TRUE
            --b_permutations=25
            --smooth=FALSE
            --cutoff=0.3
            --l_value=4
        #end if]]></command>
    <configfiles>
        <configfile name="minfi_get_files"><![CDATA[### Parse the HDA's to get the path of each forward and reverse dataset
          #for $key in $control.keys()
            control $control[$key].forward  $control[$key].reverse $control[$key].name
          #end for
          #for $key in $case.keys()
            case  $case[$key].forward $case[$key].reverse $case[$key].name
          #end for]]></configfile>
    </configfiles>
    <inputs>
        <!--<param name="experiment" size="30" type="text" value="Experiment" label="Label your experiment/analysis"/>-->
        <param name="control" type="data_collection" format="idat" label="Condition 1/ Treatment" collection_type="list:paired" help="Input data needs to be a list of dataset pairs, where the files are in IDAT format" />
        <param name="case" type="data_collection" format="idat" label="Condition 2/ Wildtype" collection_type="list:paired" help="Input data needs to be a list of dataset pairs, where the files are in IDAT format" />
        <conditional name="preprocess">
            <param name="preprocess_method" type="select" label="Select Preprocessing Method">
                <option value="quantile">Quantile Normalization (Recommended)</option>
                <option value="funnorm">Functional Normalization (Recommended)</option>
                <option value="illumina">Illumina:Genome Studio Normalization</option>
                <option value="swan">Subset-quantile Within Array Normalisation</option>
                <option value="noob">Noob background correction method or Noob Normalization</option>
            </param>
            <when value="quantile" />
            <when value="funnorm" />
            <when value="illumina" />
            <when value="swan" />
            <when value="noob" />
        </conditional>
        <conditional name="minfi_param_type">
            <param name="minfi_param_type_selector" type="select" label="Basic or Advanced Minfi Parameters">
                <option value="basic" selected="True">Basic Default settings</option>
                <option value="advanced">Advanced</option>
            </param>
            <when value="basic">
                <!--Do nothing here -->
            </when>
            <when value="advanced">
                <!-- Give options for choosing "numPositions in MDS plot here -->
                <param name="numPositions" type="integer" value="1000" label="numPositions" help="Refer the tool's help section" />
                <!-- Give options for estimating cell counts here -->
                <!-- Give options for Shrink Var in DMP finder here -->
                <param name="shrinkVar" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="(ShrinkVar) Should variance shrinkage be used?" help="Refer the tool's help section" />
                <!-- Give Bumphunter options here like B,smooth,cutoff, length of dmrs-->
                <param name="b_permutations" type="integer" value="25" label="Number of times resampled" help="Refer the tool's help section" />
                <param name="smooth" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Should a smoothing function be used?" help="Refer the tool's help section" />
                <param name="cutoff" type="float" value="0.3" label="Cut off for selecting candidate regions" help="Refer the tool's help section" />
                <param name="l_value" type="integer" value="4" label="Length of DMRs to be selected" help="Refer the tool's help section" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <!-- PLOT OUTPUTS -->
        <data name="qc_report.pdf" from_work_dir="qc_report.pdf" format="pdf" label="QC report" />
        <data name="mds_plot.pdf" from_work_dir="mds_plot.pdf" format="pdf" label="MDS plot" />
        <!-- CSV outputs -->
        <data name="dmps.csv" from_work_dir="dmps.csv" format="csv" label="Differentially Methylated positions" />
        <data name="dmrs.csv" from_work_dir="dmrs.csv" format="csv" label="Differentially Methylated Regions using Bumphunter" />
    </outputs>
    <tests>
        <test>
            <param name="case">
                <collection type="list:paired">
                    <element name="5723646052_R02C02">
                        <collection type="paired">
                            <element name="forward" value="5723646052/5723646052_R02C02_Grn.idat" />
                            <element name="reverse" value="5723646052/5723646052_R02C02_Red.idat" />
                        </collection>
                    </element>
                    <element name="5723646052_R04C01">
                        <collection type="paired">
                            <element name="forward" value="5723646052/5723646052_R04C01_Grn.idat" />
                            <element name="reverse" value="5723646052/5723646052_R04C01_Red.idat" />
                        </collection>
                    </element>
                    <element name="5723646052_R05C02">
                        <collection type="paired">
                            <element name="forward" value="5723646052/5723646052_R05C02_Grn.idat" />
                            <element name="reverse" value="5723646052/5723646052_R05C02_Red.idat" />
                        </collection>
                    </element>
                </collection>
            </param>
            <param name="control">
                <collection type="list:paired">
                    <element name="5723646053_R04C02">
                        <collection type="paired">
                            <element name="forward" value="5723646053/5723646053_R04C02_Grn.idat" />
                            <element name="reverse" value="5723646053/5723646053_R04C02_Red.idat" />
                        </collection>
                    </element>
                    <element name="5723646053_R05C02">
                        <collection type="paired">
                            <element name="forward" value="5723646053/5723646053_R05C02_Grn.idat" />
                            <element name="reverse" value="5723646053/5723646053_R05C02_Red.idat" />
                        </collection>
                    </element>
                    <element name="5723646053_R06C02">
                        <collection type="paired">
                            <element name="forward" value="5723646053/5723646053_R06C02_Grn.idat" />
                            <element name="reverse" value="5723646053/5723646053_R06C02_Red.idat" />
                        </collection>
                    </element>
                </collection>
            </param>
            <param name="preprocess.preprocess_method" value="quantile" />
            <param name="minfi_param_type.minfi_param_type_selector" value="basic" />
            <output name="qc_report" file="qc_report.pdf" ftype="pdf" />
            <output name="mds_plot" file="mds_plot.pdf" ftype="pdf" />
            <output name="dmps" file="dmps.csv" ftype="csv" />
            <output name="dmrs" file="dmrs.csv" ftype="csv" />
        </test>
    </tests>
    <help><![CDATA[.. class:: infomark

**What it does**

The minfi package provides tools for analyzing Illumina’s Methylation arrays, with a special
focus on the new 450k array for humans. The functionality addressed in this wrapper include preprocessing, QC assessments, identification of interesting methylation loci and plotting functionality.


**INPUTS**:

*Case* : Dataset collection with all samples which are of one phenotype (Example: Cancer, Disease state, Phenotype 1)

*Control* : Dataset collection with all samples which are of base normal phenotype (Example: Normals, Non-Disease state, Phenotype 2)

*Select Preprocessing Method*:

Choose one of the many preprocessing methods available. For more information on the different preprocessing methods refer to the minfi manual_, https://www.bioconductor.org/packages/release/bioc/manuals/minfi/man/minfi.pdf

*NOTE*
Many people ask us which normalization they should apply to their dataset. A good rule recommended by the authors of the package is, If there exist global biological methylation differences between your samples, as for instance a dataset with cancer and normal samples, or a dataset with different tissues/cell types, use the preprocessFunnorm function as it is aimed for such datasets. On the other hand, if you do not expect global differences between your samples, for instance a blood dataset, or one-tissue dataset, use the preprocessQuantile function. In our experience, these two normalization procedures perform always better than the functions preprocessNoob, preprocessIllumina and preprocessSWAN discussed below. For convenience, these functions are still implemented in the minfi package. This section is taken from the excellent guide_ provided by Jean-Philippe Fortin and Kasper Daniel Hansen.


**OUTPUTS**:

Plots:

Output 1: PDF file of the QC Report.
Output 2: PDF file of the MDS plot.

CSV files:

Output 1: CSV file containing Differentially Methylated Positions.
Output 2: CSV file containing Differentially Methylated Regions calculated using Bumphunter.
Output 3: CSV file containing Large scale Differentially methylated regions.


**HOW TO USE**

IDAT files (Both Red and Green channel). Make paired dataset collections, with RED and GREEN channel IDAT files.

Step 1: Upload IDAT(Both Red and green channel) files using the upload tool in Galaxy.

Step 2: Once the upload is completed, select the "Operations on Multiple Datasets" in the history panel.

Step 3: Select the list of IDAT files to be analyzed, and click "For all selected".

Step 4:

Choose the "Build List of Dataset pairs". Make the pairs and label the dataset collections. Once you enter the "Create a collection of paried datasets" dialogue box, click on "Clear filters" and then choose the "Forward" == Green channel, and "Reverse" == Red channel files. You should see the pairs in green color in the bottom panel.

Rename your common prefix for the file, by removing the trailing underscore "_", and name your collection. You should have one dataset collection for "Case" and another with "Control" (Normal vs Cancer or Treatment vs Wildtype)

Step 5: Once the two dataset collections are prepared, run the tool to run a minfi pipeline.


**ADVANCED PARAMETERS:**

Variance shrinkage (‘shrinkVar=TRUE’) is recommended when sample sizes are small (<10).
The sample variances are squeezed by computing empirical Bayes posterior means using
the ‘limma’ package.


B: An integer denoting the number of resamples to use when computing null distributions.
This defaults to 0. If ‘permutations’ is supplied that defines the number of
permutations/bootstraps and ‘B’ is ignored.


smooth: A logical value. If TRUE the estimated profile will be smoothed with the smoother
defined by ‘smoothFunction’


cutoff: A numeric value. Values of the estimate of the genomic profile above the cutoff
or below the negative of the cutoff will be used as candidate regions. It is possible
to give two separate values (upper and lower bounds). If one value is given, the lower
bound is minus the value.

.. _manual: https://www.bioconductor.org/packages/release/bioc/manuals/minfi/man/minfi.pdf
.. _guide: https://www.bioconductor.org/help/course-materials/2015/BioC2015/methylation450k.html]]></help>
    <expand macro="citations" />
</tool>
author	nturaga
date	Tue, 19 Apr 2016 12:20:29 -0400
parents	84361ce36a11
children