diff bismark_methylation_extractor.xml @ 0:62c6da72dd4a draft

Uploaded
author bgruening
date Sat, 06 Jul 2013 09:57:36 -0400
parents
children 91f07ff056ca
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bismark_methylation_extractor.xml	Sat Jul 06 09:57:36 2013 -0400
@@ -0,0 +1,306 @@
+<tool id="bismark_methylation_extractor" name="Bismark" version="0.7.12">
+    <!-- Wrapper compatible with Bismark version 0.7.7 -->
+    <description>methylation extractor</description>
+    <!--<version_command>bismark_methylation_extractor version</version_command>-->
+    <requirements>
+        <requirement type="set_environment">SCRIPT_PATH</requirement>
+        <requirement type="package" version="0.12.8">bowtie</requirement>
+        <requirement type="package" version="2.0.0-beta7">bowtie2</requirement>
+    </requirements>
+    <parallelism method="basic"></parallelism>
+    <command interpreter="python">
+        bismark_methylation_extractor.py
+
+        --infile $input
+
+        --bismark_path \$SCRIPT_PATH
+
+        #if $singlePaired.sPaired == "single":
+            --single-end
+        #else:
+            --paired-end
+            $singlePaired.no_overlap
+        #end if
+
+        #if str($ignore_bps) != "0":
+           --ignore $ignore_bps
+        #end if
+
+        #if $report:
+            --report-file $o_report
+        #end if
+
+        #if $comprehensive:
+            --comprehensive
+        #end if
+
+        #if $merge_non_cpg:
+            --merge-non-cpg
+        #end if
+
+        #if $compress:
+            --compress $compressed_output
+        #else:
+            #if $comprehensive == False and $merge_non_cpg == False:
+                ##twelfe files
+                --cpg_ot $cpg_ot
+                --chg_ot $chg_ot
+                --chh_ot $chh_ot
+                --cpg_ctot $cpg_ctot
+                --chg_ctot $chg_ctot
+                --chh_ctot $chh_ctot
+                --cpg_ob $cpg_ob
+                --chg_ob $chg_ob
+                --chh_ob $chh_ob
+                --cpg_ctob $cpg_ctob
+                --chg_ctob $chg_ctob
+                --chh_ctob $chh_ctob
+            #elif $merge_non_cpg and $comprehensive:
+                ## two files
+                --non_cpg_context $non_cpg_context
+                --cpg_context $cpg_context
+            #elif $comprehensive:
+                ## three files
+                --cpg_context $cpg_context
+                --chg_context $chg_context
+                --chh_context $chh_context
+            #elif $merge_non_cpg:
+                ## eight files
+                --non_cpg_context_ctot $non_cpg_context_ctot
+                --non_cpg_context_ot $non_cpg_context_ot
+                --non_cpg_context_ob $non_cpg_context_ob
+                --non_cpg_context_ctob $non_cpg_context_ctob
+                --cpg_ot $cpg_ot
+                --cpg_ctot $cpg_ctot
+                --cpg_ob $cpg_ob
+                --cpg_ctob $cpg_ctob
+            #end if
+        ## end compress
+        #end if
+
+    </command>
+    <inputs>
+        <!-- Input Parameters -->
+        <param name="input" type="data" format="sam" label="SAM file from Bismark bisulfid mapper" />
+        <conditional name="singlePaired">
+            <param name="sPaired" type="select" label="Is this library mate-paired?">
+              <option value="single">Single-end</option>
+              <option value="paired">Paired-end</option>
+            </param>
+            <when value="single" />
+            <when value="paired">
+                <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="This option avoids scoring overlapping methylation calls twice, in case of overlapping read one and read two" help="" />
+            </when>
+        </conditional>
+
+       <param name="ignore_bps" type="integer" value="0" label="Ignore the first N bp when processing the methylation call string" />
+       <param name="comprehensive" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Merge all four possible strand-specific methylation info
+into context-dependent output files" help="" />
+       <param name="merge_non_cpg" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Merge all non-CpG contexts into one file" help="This will produce eight strand-specific output files, or two output files in comprehensive mode." />
+       <param name="report" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Short methylation summary output" />
+       <param name="compress" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Compress all result files and output one single file" />
+
+    </inputs>
+    <outputs>
+        <!--
+            OT – original top strand
+            CTOT – complementary to original top strand
+            OB – original bottom strand
+            CTOB – complementary to original bottom strand
+        -->
+        <data format="tabular" name="o_report" label="${tool.name} on ${on_string}: Report file">
+          <filter> ( report is True ) </filter>
+        </data>
+
+        <!-- default output 12 files -->
+        <data format="tabular" name="cpg_ot" label="${tool.name} on ${on_string}: CpG original top strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
+        </data>
+        <data format="tabular" name="chg_ot" label="${tool.name} on ${on_string}: CHG original top strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
+        </data>
+        <data format="tabular" name="chh_ot" label="${tool.name} on ${on_string}: CHH original top strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
+        </data>
+        <data format="tabular" name="cpg_ctot" label="${tool.name} on ${on_string}: CpG complementary to top strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
+        </data>
+        <data format="tabular" name="chg_ctot" label="${tool.name} on ${on_string}: CHG complementary to top strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
+        </data>
+        <data format="tabular" name="chh_ctot" label="${tool.name} on ${on_string}: CHH complementary to top strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
+        </data>
+
+        <data format="tabular" name="cpg_ob" label="${tool.name} on ${on_string}: CpG original bottom strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
+        </data>
+        <data format="tabular" name="chg_ob" label="${tool.name} on ${on_string}: CHG original bottom strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
+        </data>
+        <data format="tabular" name="chh_ob" label="${tool.name} on ${on_string}: CHH original bottom strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
+        </data>
+        <data format="tabular" name="cpg_ctob" label="${tool.name} on ${on_string}: CpG complementary to bottom strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
+        </data>
+        <data format="tabular" name="chg_ctob" label="${tool.name} on ${on_string}: CHG complementary to bottom strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
+        </data>
+        <data format="tabular" name="chh_ctob" label="${tool.name} on ${on_string}: CHH complementary to bottom strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
+        </data>
+
+        <!-- Context-dependent methylation output files (comprehensive option) -->
+        <data format="tabular" name="cpg_context" label="${tool.name} on ${on_string}: CpG context dependent">
+          <filter> ( compress == False and comprehensive) </filter>
+        </data>
+        <data format="tabular" name="chg_context" label="${tool.name} on ${on_string}: CHG context dependent">
+          <filter> ( compress == False and comprehensive and merge_non_CpG == False) </filter>
+        </data>
+        <data format="tabular" name="chh_context" label="${tool.name} on ${on_string}: CHH context dependent">
+          <filter> ( compress == False and comprehensive and merge_non_CpG == False) </filter>
+        </data>
+
+        <data format="tabular" name="non_cpg_context" label="${tool.name} on ${on_string}: Non CpG context dependent">
+          <filter> ( compress == False and comprehensive and merge_non_cpg) </filter>
+        </data>
+
+        <data format="tabular" name="non_cpg_context_ot" label="${tool.name} on ${on_string}: Non CpG context dependent on original top strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter>
+        </data>
+        <data format="tabular" name="non_cpg_context_ctot" label="${tool.name} on ${on_string}: Non CpG context dependent on complementary to top strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter>
+        </data>
+        <data format="tabular" name="non_cpg_context_ob" label="${tool.name} on ${on_string}: Non CpG context dependent on bottom top strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter>
+        </data>
+        <data format="tabular" name="non_cpg_context_ctob" label="${tool.name} on ${on_string}: Non CpG context dependent on complementary to bottom strand">
+          <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter>
+        </data>
+
+        <data format="gzipped" name="compressed_output" label="${tool.name} on ${on_string}: Result archive.">
+          <filter> ( compress ) </filter>
+        </data>
+    </outputs>
+
+    <tests>
+    </tests>
+
+    <help>
+
+**What it does**
+
+The following is a brief description of all options to control the Bismark_
+methylation extractor. The script reads in a bisulfite read alignment results file 
+produced by the Bismark bisulfite mapper and extracts the methylation information
+for individual cytosines. This information is found in the methylation call field
+which can contain the following characters:
+
+
+  - X = for methylated C in CHG context (was protected)
+  - x = for not methylated C CHG (was converted)
+  - H = for methylated C in CHH context (was protected)
+  - h = for not methylated C in CHH context (was converted)
+  - Z = for methylated C in CpG context (was protected)
+  - z = for not methylated C in CpG context (was converted)
+  - . = for any bases not involving cytosines
+
+
+The methylation extractor outputs result files for cytosines in CpG, CHG and CHH
+context (this distinction is actually already made in Bismark itself). As the methylation
+information for every C analysed can produce files which easily have tens or even hundreds of
+millions of lines, file sizes can become very large and more difficult to handle. The C
+methylation info additionally splits cytosine methylation calls up into one of the four possible
+strands a given bisulfite read aligned against:
+
+  - OT = original top strand
+  - CTOT = complementary to original top strand
+
+  - OB = original bottom strand
+  - CTOB = complementary to original bottom strand
+
+Thus, by default twelve individual output files are being generated per input file (unless
+--comprehensive is specified, see below). The output files can be imported into a genome
+viewer, such as SeqMonk, and re-combined into a single data group if desired (in fact
+unless the bisulfite reads were generated preserving directionality it doesn't make any
+sense to look at the data in a strand-specific manner). Strand-specific output files can
+optionally be skipped, in which case only three output files for CpG, CHG or CHH context
+will be generated. For both the strand-specific and comprehensive outputs there is also
+the option to merge both non-CpG contexts (CHG and CHH) into one single non-CpG context.
+
+
+.. _Bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/
+
+
+It is developed by Krueger F and Andrews SR. at the Babraham Institute. Krueger F, Andrews SR. (2011) Bismark: a flexible aligner and methylation caller for Bisulfite-Seq applications. Bioinformatics, 27, 1571-2.
+
+-------
+
+**Bismark settings**
+
+All of the options have a default value. You can change any of them. If any Bismark function is missing please contact the tool author or your Galaxy admin.
+
+------
+
+**Outputs**
+
+The output files are in the following format (tab delimited)::
+
+
+    Column  Description
+  --------  --------------------------------------------------------
+      1     seq-ID
+      2     strand
+      3     chromosome
+      4     position
+      5     methylation call
+
+
+  * Methylated cytosines receive a '+' orientation,
+  * Unmethylated cytosines receive a '-' orientation.
+
+------
+
+**OPTIONS**
+
+Input::
+
+  -s/--single-end          Input file(s) are Bismark result file(s) generated from single-end
+                           read data. Specifying either --single-end or --paired-end is
+                           mandatory.
+
+  -p/--paired-end          Input file(s) are Bismark result file(s) generated from paired-end
+                           read data. Specifying either --paired-end or --single-end is
+                           mandatory.
+
+  --no_overlap             For paired-end reads it is theoretically possible that read_1 and
+                           read_2 overlap. This option avoids scoring overlapping methylation
+                           calls twice. Whilst this removes a bias towards more methylation calls
+                           towards the center of sequenced fragments it can de facto remove
+                           a good proportion of the data.
+
+  --ignore INT             Ignore the first INT bp at the 5' end of each read when processing the
+                           methylation call string. This can remove e.g. a restriction enzyme site
+                           at the start of each read.
+
+Output::
+
+  --comprehensive          Specifying this option will merge all four possible strand-specific 
+                           methylation info into context-dependent output files. The default 
+                           contexts are:
+                            - CpG context
+                            - CHG context
+                            - CHH context
+
+  --merge_non_CpG          This will produce two output files (in --comprehensive mode) or eight
+                           strand-specific output files (default) for Cs in
+                            - CpG context
+                            - non-CpG context
+
+  --report                 Prints out a short methylation summary as well as the paramaters used to run
+                           this script.
+
+
+  </help>
+</tool>