view methylkit.xml @ 0:a8705df7c57f draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/methylkit commit 14f0b39f64982773ef0367379b915f742eabcc1b
author rnateam
date Wed, 21 Dec 2016 17:30:57 -0500
parents
children
line wrap: on
line source

<tool id="methylkit" name="methylKit" version="0.99.2">
  <description>DNA methylation analysis and annotation</description>
  <macros>
        <macro name="differential_methylation">
          <param name="overdispersion" type="select"
              label="overdispersion"
              help="if set to 'none' (default), no overdispersion
              correction will be attempted. 'MN' applies a scaling
              parameter to variance estimated by the model.
              If set to 'shrinkMN' (experimental parameter),
              scaling parameter will be shrunk towards a common value">
              <option value="none" selected="True">
                  none
              </option>
              <option value="MN">
                  MN
              </option>
              <option value="shrinkMN">
                  shrinkMN
              </option>
          </param>
          <param name="adjust" type="select"
              label="adjust"
              help="different methods to correct the p-values
              for multiple testing (default: SLIM).">
              <option value="SLIM" selected="True">
                  SLIM
              </option>
              <option value="holm">
                  holm
              </option>
              <option value="hochberg">
                  hochberg
              </option>
              <option value="hommel">
                  hommel
              </option>
              <option value="bonferroni">
                  bonferroni
              </option>
              <option value="BH">
                  BH
              </option>
              <option value="BY">
                  BY
              </option>
              <option value="fdr">
                  fdr
              </option>
              <option value="none">
                  none
              </option>
              <option value="qvalue">
                  qvalue
              </option>
          </param>
          <param name="effect" type="select"
              label="effect"
              help="method to calculate the mean methylation different
              between groups using read coverage as weights (default: wmean).
              When set to 'mean', the generic mean is applied and when
              set to 'predicted', predicted means from the logistic
              regression model is used for calculating the effect.">
              <option value="wmean" selected="True">
                  wmean
              </option>
              <option value="mean">
                  mean
              </option>
              <option value="predicted">
                  predicted
              </option>
          </param>
          <param name="test" type="select"
              label="test"
              help="the statistical test used to determine
              the methylation differences (default: Chisq-test).
              The F-test can be chosen
              if overdispersion control is applied.">
              <option value="Chisq" selected="True">
                  Chisq
              </option>
              <option value="F">
                  F
              </option>
          </param>
          <param name="qvalue_cutoff" type="float"
              value="0.01" label="qvalue.cutoff"
              help="cutoff for qvalue of differential methylation statistic (default:0.01)">
              <validator type="in_range"
                  message="Minimum 0 and maximum 1" min="0" max="1"/>
          </param>
          <param name="meth_cutoff" type="float"
              value="25" label="meth.cutoff"
              help="cutoff for absolute value of methylation percentage change between test and control (default:25)">
              <validator type="in_range"
                  message="Minimum 0 and maximum 100" min="0" max="100"/>
          </param>
          <param name="type" type="select"
              label="type"
              help="For retrieving
              hyper-methylated regions/bases type='hyper',
              for hypo-methylated type='hypo' (default:'all')">
              <option value="all" selected="True">
                  all
              </option>
              <option value="hyper">
                  hyper
              </option>
              <option value="hypo">
                  hypo
              </option>
          </param>
        </macro>
        <macro name="clustering">
          <param name="dist" type="select"
              label="dist"
              help="the distance measure to be used.
              (default: correlation)">
              <option value="correlation" selected="True">
                  correlation
              </option>
              <option value="euclidean">
                  euclidean
              </option>
              <option value="maximum">
                  maximum
              </option>
              <option value="manhattan">
                  manhattan
              </option>
              <option value="canberra">
                  canberra
              </option>
              <option value="binary">
                  binary
              </option>
              <option value="minkowski">
                  minkowski
              </option>
          </param>
          <param name="method" type="select"
              label="method"
              help="the agglomeration method to be used.
              (default: ward)">
              <option value="ward" selected="True">
                  ward
              </option>
              <option value="single">
                  single
              </option>
              <option value="complete">
                  complete
              </option>
              <option value="average">
                  average
              </option>
              <option value="mcquitty">
                  mcquitty
              </option>
              <option value="median">
                  median
              </option>
              <option value="centroid">
                  centroid
              </option>
          </param>
        </macro>
  </macros>
  <requirements>
    <requirement type="package" version="0.99.2">bioconductor-methylkit</requirement>
  </requirements>
  <stdio>
        <regex match="Execution halted"
           source="both"
           level="fatal"
           description="Execution halted." />
        <regex match="Input-Error 01"
           source="both"
           level="fatal"
           description="Error in your input parameters: Make sure you only apply factors to selected samples." />
        <regex match="Error in"
           source="both"
           level="fatal"
           description="An undefined error occured, please check your intput carefully and contact your administrator." />
  </stdio>
  <command>
<![CDATA[
      Rscript $script_file
]]>
  </command>
  <configfiles>
    <configfile name="script_file">
        library("methylKit")

        test_files = list()
        control_files = list()

        test_ids = list()
        control_ids = list()

        #for $i, $s in enumerate( $test_series )
          test_ids[${i}+1] = paste("test ", ${i}+1, sep="")
          test_files[${i}+1] = "${s.input.file_name}"
        #end for

        #for $i, $s in enumerate( $control_series )
          control_ids[${i}+1] = paste("control ", ${i}+1, sep="")
          control_files[${i}+1] = "${s.input.file_name}"
        #end for

        input_files = append(test_files, control_files)
        sample_ids = append(test_ids, control_ids)
        treatment_tag = c(rep.int(1, length(test_ids)), rep.int(0, length(control_ids)))

        myobj=methRead(input_files, sample.id=sample_ids, assembly="${assembly}",
                    pipeline="${pipeline}", treatment=treatment_tag)

        pdf('output_statistics.pdf')
        for (obj in myobj){
          getMethylationStats(obj,plot=TRUE,both.strands=FALSE)
          getCoverageStats(obj,plot=TRUE,both.strands=FALSE)
        }
        devname = dev.off()

        ## unite function
        methidh=unite(myobj)

        pdf("output_correlation.pdf")
        getCorrelation(object = methidh, plot=TRUE, method = "${correlation}")
        devname = dev.off()

        #if $input_type.choice in ["all", "differential_methylation"]:
            ## the last two arguments slim, weighted.mean
            ## have the redundant counterparts in effect and adjust,
            ## so turning them off to avoide the possible conflict.
            myDiff = calculateDiffMeth(methidh, overdispersion="${input_type.overdispersion}",
            adjust="${input_type.adjust}", effect="${input_type.effect}", test="${input_type.test}",
            slim=FALSE, weighted.mean=FALSE)

            bedgraph(myDiff, file.name="output_myDiff.bedgraph", col.name="meth.diff",
                     unmeth=FALSE, log.transform=FALSE, negative=FALSE, add.on="")

            MethPerChr = diffMethPerChr(myDiff, plot=FALSE,
                                        qvalue.cutoff=${input_type.qvalue_cutoff},
                                        meth.cutoff=${input_type.meth_cutoff})
            write.table(MethPerChr, sep="\t", row.names=FALSE, quote=FALSE, file="output_MethPerChr.tsv")

            MethylDiff = getMethylDiff(myDiff, difference=${input_type.meth_cutoff},
                                        qvalue=${input_type.qvalue_cutoff}, type="${input_type.type}")
            bedgraph(MethylDiff, file.name="output_MethylDiff.bedgraph", col.name="meth.diff",
                     unmeth=FALSE,log.transform=FALSE,negative=FALSE,add.on="")
        #end if

        #if $input_type.choice in ["all", "clustering"]:
            pdf( "output_clustering.pdf" )
            methClust = clusterSamples(methidh, dist="${input_type.dist}", method="${input_type.method}")
            devname = dev.off()

            pdf( "output_PCA.pdf" )
            PCASamples(methidh)
            devname = dev.off()
        #end if

        #if $input_type.choice in ["all", "segmentation"]:
            ## methSeg works for methylRaw or methylDiff with resolution region,
            ## so methylBase has to be tiled before
            tileRaw = tileMethylCounts(myobj[[1]])
            tileBase = tileMethylCounts(methidh)
            tileDiff = calculateDiffMeth(tileBase)

            ## methseg generates Granges
            segRaw = methSeg(tileRaw, diagnostic.plot = FALSE)
            segDiff = methSeg(tileDiff, diagnostic.plot = FALSE)

            ## and can be exported as BED
            methSeg2bed(segments = segRaw, filename = "output_seg_raw.bed")
            methSeg2bed(segments = segDiff, filename = "output_seg_diff.bed")
        #end if
    </configfile>
  </configfiles>
  <inputs>
    <repeat name="test_series" title="Test samples" min="1">
                <param name="input" type="data" format="tabular" label="Add a file"
                    help="Such input file may be obtained from AMP pipeline for aligning RRBS reads.">
                    <validator type="unspecified_build" />
                </param>
    </repeat>
    <repeat name="control_series" title="Control samples" min="1">
                <param name="input" type="data" format="tabular" label="Add a file"
                    help="Such input file may be obtained from AMP pipeline for aligning RRBS reads." >
                    <validator type="unspecified_build" />
                </param>
    </repeat>
    <param name="assembly" type="text"
        value="hg18" label="assembly"
        help="A string that defines the genome assembly such as
        hg18, mm9 (default: hg18).">
    </param>
    <param name="correlation" type="select"
        label="correlation"
        help="correlation method (default: pearson)">
        <option value="pearson" selected="True">
        pearson
        </option>
        <option value="kendall">
        kendall
        </option>
        <option value="spearman">
        spearman
        </option>
    </param>
    <param name="pipeline" type="select"
        label="pipeline"
        help="name of the alignment pipeline (default: amp)">
        <option value="amp" selected="True">
        amp
        </option>
        <option value="bismark">
        bismark
        </option>
        <option value="bismarkCoverage">
        bismarkCoverage
        </option>
        <option value="bismarkCytosineReport">
        bismarkCytosineReport
        </option>
    </param>
    <conditional name="input_type">
        <param name="choice" type="select"
            label="analysis to carry out:"
            help="The analysis you wish to carry out.">
            <option value="all" selected="True">
            All provided analysis
            </option>
            <option value="differential_methylation">
            Differential methylation
            </option>
            <option value="clustering">
            Clustering
            </option>
            <option value="segmentation">
            Segmentation
            </option>
        </param>
        <when value="all">
          <expand macro="differential_methylation" />
          <expand macro="clustering" />
        </when>
        <when value="differential_methylation">
          <expand macro="differential_methylation" />
        </when>
        <when value="clustering">
          <expand macro="clustering" />
        </when>
        <when value="segmentation" />
    </conditional>
  </inputs>
  <outputs>
      <data name="output_statistics" format="pdf"
      from_work_dir="output_statistics.pdf"
      label="${tool.name} on ${on_string}: CpG statistics">
      </data>

      <data name="output_correlation" format="pdf"
      from_work_dir="output_correlation.pdf"
      label="${tool.name} on ${on_string}: correlation between samples">
      </data>

      <data name="output_myDiff" format="bedgraph"
      from_work_dir="output_myDiff.bedgraph"
      label="${tool.name} on ${on_string}: differential methylation">
      <filter>input_type['choice'] in ['all', 'differential_methylation']</filter>
      </data>

      <data name="output_MethylDiff" format="bedgraph"
      from_work_dir="output_MethylDiff.bedgraph"
      label="${tool.name} on ${on_string}: differential methylation - subset">
      <filter>input_type['choice'] in ['all', 'differential_methylation']</filter>
      </data>

      <data name="output_MethPerChr" format="tabular"
      from_work_dir="output_MethPerChr.tsv"
      label="${tool.name} on ${on_string}: number of hyper/hypo sites">
      <filter>input_type['choice'] in ['all', 'differential_methylation']</filter>
      </data>

      <data name="output_clustering" format="pdf"
      from_work_dir="output_clustering.pdf"
      label="${tool.name} on ${on_string}: hierarchical clustering">
      <filter>input_type['choice'] in ['all', 'clustering']</filter>
      </data>

      <data name="output_PCA" format="pdf"
      from_work_dir="output_PCA.pdf"
      label="${tool.name} on ${on_string}: PCA">
      <filter>input_type['choice'] in ['all', 'clustering']</filter>
      </data>

      <data name="output_seg_raw" format="bed"
      from_work_dir="output_seg_raw.bed"
      label="${tool.name} on ${on_string}: methylation segment">
      <filter>input_type['choice'] in ['all', 'segmentation']</filter>
      </data>

      <data name="output_seg_diff" format="bed"
      from_work_dir="output_seg_diff.bed"
      label="${tool.name} on ${on_string}: differential methylation segment">
      <filter>input_type['choice'] in ['all', 'segmentation']</filter>
      </data>
  </outputs>
  <tests>
    <test>
        <repeat name="test_series">
            <param name="input" value="input_test1.myCpG.txt" dbkey="hg18" ftype="tabular" />
        </repeat>
        <repeat name="test_series">
            <param name="input" value="input_test2.myCpG.txt" dbkey="hg18" ftype="tabular" />
        </repeat>
        <repeat name="control_series">
            <param name="input" value="input_control1.myCpG.txt" dbkey="hg18" ftype="tabular" />
        </repeat>
        <repeat name="control_series">
            <param name="input" value="input_control2.myCpG.txt" dbkey="hg18" ftype="tabular" />
        </repeat>
        <param name="assembly" value="hg18" />
        <param name="correlation" value="pearson" />
        <param name="pipeline" value="amp" />
        <param name="choice" value="all" />
        <param name="overdispersion" value="none" />
        <param name="adjust" value="SLIM" />
        <param name="effect" value="wmean" />
        <param name="test" value="Chisq" />
        <param name="qvalue_cutoff" value="0.01" />
        <param name="meth_cutoff" value="25" />
        <param name="type" value="all" />
        <param name="dist" value="correlation" />
        <param name="method" value="ward" />
        <output name="output_statistics" file="output_statistics.pdf"
        ftype="pdf" compare="sim_size"/>
        <output name="output_correlation" file="output_correlation.pdf"
        ftype="pdf" compare="sim_size"/>
        <output name="output_myDiff" file="output_myDiff.bedgraph"
        ftype="bedgraph"/>
        <output name="output_MethPerChr" file="output_MethPerChr.tsv"
        ftype="tabular"/>
        <output name="output_MethylDiff" file="output_MethylDiff.bedgraph"
        ftype="bedgraph"/>
        <output name="output_clustering" file="output_clustering.pdf"
        ftype="pdf" compare="sim_size"/>
        <output name="output_PCA" file="output_PCA.pdf"
        ftype="pdf" compare="sim_size"/>
        <output name="output_seg_raw" file="output_seg_raw.bed"
        ftype="bed"/>
        <output name="output_seg_diff" file="output_seg_diff.bed"
        ftype="bed"/>
    </test>
  </tests>
<help>
<![CDATA[
.. class:: infomark

**What it does**

`methylKit`_ is an R package for DNA methylation analysis and annotation
from high-throughput bisulfite sequencing.
The package is designed to deal with sequencing data from RRBS and
its variants, but also target-capture methods such as Agilent SureSelect
methyl-seq. In addition, methylKit can deal with base-pair resolution data
for 5hmC obtained from Tab-seq or oxBS-seq. It can also handle whole-genome
bisulfite sequencing data if proper input format is provided.

.. _methylKit: https://github.com/al2na/methylKit

The Galaxy tool enables three types of analysis:
  * differential methylation
  * clustering
  * segmentation

The user can choose to run all provided analysis or run an individual one.

.. class:: infomark

**Input**

Typically, bisulfite converted reads are aligned to the genome and %
methylation value per base is calculated by processing alignments.
methylKit takes that  % methylation value per base information as input.
Such input file may be obtained from `AMP`_ pipeline
for aligning RRBS reads. A typical input file looks like this::


    chrBase       chr   base    strand coverage freqC freqT

    chr21.9764539 chr21 9764539 R      12       25.00 75.00

    chr21.9764513 chr21 9764513 F      12       0.00  100.00


.. _AMP: http://code.google.com/p/amp-errbs/

.. class:: infomark

**Output**

The outputs from differential methylation
  * ``differential methylation``: The `bedgraph`_ file contains differentially methylated bases/regions and the corresponding statistics.
  * ``differential methylation - subset``: The bedgraph file contains the subset of differentially methylated bases/regions that satisfies the user defined thresholds with qvalue.cutoff and meth.cutoff.
  * ``number of hyper/hypo sites``: The tabular file contains number of hyper/hypo methylated regions/bases.

.. _bedgraph: https://genome.ucsc.edu/goldenpath/help/bedgraph.html

The outputs from clustering
  * ``hierarchical clustering``: The figure shows hierarchical clustering using methylation data.
  * ``PCA``: The figure shows principal components analysis of methylation data.

The output from segmentation
  * ``methylation segment``: The `bed`_ file contains the profile of methylation segment.
  * ``differential methylation segment``: The bed file contains the profile of differential methylation segment.

.. _bed: https://genome.ucsc.edu/FAQ/FAQformat#format1
]]>
</help>
<citations>
    <citation type="doi">10.1186/gb-2012-13-10-r87</citation>
</citations>
</tool>