Repository 'cytosine_report_to_bedgraph'
hg clone https://toolshed.g2.bx.psu.edu/repos/charles-bernard/cytosine_report_to_bedgraph

Changeset 0:b03e31cab4a6 (2016-11-14)
Next changeset 1:03eefc41396e (2016-11-14)
Commit message:
Uploaded
added:
cytosine_report_to_bedgraph/.shed.yml
cytosine_report_to_bedgraph/bismark2bedgraph.awk
cytosine_report_to_bedgraph/bismark2bedgraph.sh
cytosine_report_to_bedgraph/cytosine_report_to_bedgraph.xml
cytosine_report_to_bedgraph/cytosine_report_to_bedgraph_wrapper.py
cytosine_report_to_bedgraph/tool-data/igv_genomes_chr_sizes.loc.sample
cytosine_report_to_bedgraph/tool_data_table_conf.xml.sample
cytosine_report_to_bedgraph/tool_dependencies.xml
b
diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/.shed.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cytosine_report_to_bedgraph/.shed.yml Mon Nov 14 04:55:30 2016 -0500
b
@@ -0,0 +1,14 @@
+categories:
+- Convert Formats
+description: Converts genome-wide cytosine methylation report to Bedgraphs
+long_description: |
+    This tool takes as input a genome-wide cytosine methylation report (generated by the tool Bismark Meth. Extractor) and converts it into a bedGraph for each cytosine context (CpG, CHG and CHH).
+    These bedGraphs display, for a given context, the ratio of methylation of each covered cytosine in the genome.
+
+    It also produces a bedGraph displaying the coverage count of each cytosine in the genome (non-covered cytosine are ignored).
+
+    The tool outputs offer the possibility to vizualise the methylation signal of covered cytosines thanks to softwares like IGV (Integrative Genomics Viewer).
+    In this respect, the tool can optionally generate a tdf binary file (Tiled Data Format) from each converted bedGraph. Tdf format is indeed better handled by IGV than bedGraph.
+name: bismark
+owner: charles-bernard
+type: unrestricted
\ No newline at end of file
b
diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/bismark2bedgraph.awk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cytosine_report_to_bedgraph/bismark2bedgraph.awk Mon Nov 14 04:55:30 2016 -0500
b
@@ -0,0 +1,29 @@
+#!/usr/bin/awk
+
+BEGIN {
+ FS = "\t";
+}
+
+{
+ if ( $6 ~ context && ($4 > 0 || $5 > 0) ) { 
+
+ chr_name = $1;
+ chr_pos = $2;
+ strand = $3;
+ c_meth_count = $4;
+ c_unmeth_count = $5;
+
+ if ( coverage == "true" ) {
+ nb_reads = c_meth_count + c_unmeth_count
+ printf("%s\t%s\t%s\t%s\n", chr_name, chr_pos, chr_pos, nb_reads)
+ } else {
+ if ( strand == "-") { 
+ s = "-"; 
+ } else { 
+ s = "";
+ }
+ meth_ratio = c_meth_count / (c_meth_count + c_unmeth_count); 
+ printf("%s\t%s\t%s\t%s%s\n", chr_name, chr_pos, chr_pos, s, meth_ratio)
+ }
+ }
+}
\ No newline at end of file
b
diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/bismark2bedgraph.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cytosine_report_to_bedgraph/bismark2bedgraph.sh Mon Nov 14 04:55:30 2016 -0500
[
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+# read the options
+TEMP=`getopt -o e::i:co: --long epi::,infile_cov:,context,output_dir:,tool_dir:,tdf,igv_genome: -n 'report' -- "$@"`
+eval set -- "$TEMP"
+
+# extract options and their arguments into variables.
+while true ; do
+ case "$1" in
+ -e | --epi )
+ case "$2" in
+ "" ) epi="current_job"; shift 2 ;;
+ *) epi=$2; shift 2 ;;
+ esac ;;
+ -i | --infile_cov )
+ case "$2" in
+ *) infile_cov=$2; shift 2 ;;
+ esac ;;
+ -c | --context ) 
+ context=true; shift ;;
+ --tdf )
+ tdf=true; shift ;;
+ --igv_genome )
+ case "$2" in
+ *) igv_genome=$2; shift 2 ;;
+ esac ;;
+ -o | --output_dir )
+ case "$2" in
+ *) output_dir=$2; shift 2 ;;
+ esac ;;
+ --tool_dir )
+ case "$2" in
+ *) tool_dir=$2; shift 2 ;;
+ esac ;;
+ -- ) shift; break ;;
+ * ) echo "Internal error!"; exit 1 ;;
+ esac
+done
+
+# do something with the variables -- in this case the lamest possible one :-)
+echo "epi = $epi"
+echo "infile_cov = $infile_cov"
+echo "output_dir = $output_dir"
+echo "context = $context"
+echo "tool_dir = $tool_dir"
+echo "tdf = $tdf"
+echo "igv_genome = $igv_genome"
+
+#IGV_path
+IGV_path="/users/biocomp/chbernar/galaxy_testing/database/dependencies/igvtools/2.3.32/geert-vandeweyer/package_igvtools_2_3_32/3c087cee3b8f/bin"
+
+# define outputs according to options
+if [[ "$context" = true ]]; then
+ context_list=("CG" "CHG" "CHH")
+ output_types=("CG" "CHG" "CHH" "coverage")
+ bedgraph_list=("$output_dir""/""$epi""_CpG.bedgraph" "$output_dir""/""$epi""_CHG.bedgraph" "$output_dir""/""$epi""_CHH.bedgraph" "$output_dir""/""$epi""_coverage.bedgraph")
+ tdf_list=("$output_dir""/""$epi""_CpG.tdf" "$output_dir""/""$epi""_CHG.tdf" "$output_dir""/""$epi""_CHH.tdf" "$output_dir""/""$epi""_coverage.tdf")
+ n="4"
+else
+ context_list=(".*")
+ output_types=("CXX" "coverage")
+ bedgraph_list=("$output_dir""/""$epi""_CXX.bedgraph" "$output_dir""/""$epi""_coverage.bedgraph")
+ tdf_list=("$output_dir""/""$epi""_CXX.tdf" "$output_dir""/""$epi""_coverage.tdf")
+ n="2"
+fi
+
+# process
+for (( i=0; i<$n; i++)); do
+ printf "________________________________________________________________________\n"
+ printf "Processing %s\n" ${output_types[$i]}
+ printf "... Converting Cytosine Report to Bedgraph\n" 
+ if (( i < n - 1 )); then
+ #if not coverage:
+ #printf "track type=bedGraph name=%s Coverage description=%s Coverage\n" "$epi""_""${context_list[$i]}" "$epi""_""${context_list[$i]}" > "${bedgraph_list[$i]}"
+ printf "#<Chr>\t<Start>\t<End>\t<Strand;Meth_ratio>\n" > "${bedgraph_list[$i]}"
+ awk -v context="${context_list[$i]}" -v coverage="false" -f "$tool_dir"/bismark2bedgraph.awk $infile_cov >> "${bedgraph_list[$i]}"
+ else
+ #printf "track type=bedGraph name=%s Coverage description=%s Coverage\n" "$epi""_""${context_list[$i]}" "$epi""_""${context_list[$i]}" > "${bedgraph_list[$i]}"
+ printf "#<Chr>\t<Start>\t<End>\t<Coverage>\n" > "${bedgraph_list[$i]}"
+ awk -v context="${context_list[$i]}" -v coverage="true" -f "$tool_dir"/bismark2bedgraph.awk $infile_cov >> "${bedgraph_list[$i]}"
+ fi
+ if [[ "$tdf" = true ]]; then
+ printf "... Converting Bedgraph to Tdf\n"
+ "$IGV_path""/"igvtools toTDF "${bedgraph_list[$i]}" "${tdf_list[$i]}" "$igv_genome" > stdout_file  
+ fi
+done
\ No newline at end of file
b
diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/cytosine_report_to_bedgraph.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cytosine_report_to_bedgraph/cytosine_report_to_bedgraph.xml Mon Nov 14 04:55:30 2016 -0500
[
@@ -0,0 +1,112 @@
+<tool id="cytosine_report_to_bedgraph" name="Cytosine Report To Bedgraphs" version="0.0.0">
+
+  <description>Converts genome-wide cytosine methylation report to Bedgraphs</description>
+
+  <requirements>
+   <requirement type="package" version="2.3.32">igvtools</requirement>
+  </requirements>
+
+  <command interpreter="python">
+<![CDATA[
+        cytosine_report_to_bedgraph_wrapper.py
+
+        --tool_dir "$__tool_directory__"
+
+        --cytosine_report "$cytosine_report"
+
+        --coverage_bedgraph "$coverage_bedgraph"
+        #if $tdf['tdf_selector']:
+          --tdf
+          --igv_genome "${tdf.built_in_igv_genomes_chr_sizes.fields.path}"
+          --coverage_tdf "$coverage_tdf"
+        #end if
+
+        #if $context:
+          --context
+          --CpG_bedgraph "$CpG_bedgraph"
+          --CHG_bedgraph "$CHG_bedgraph"
+          --CHH_bedgraph "$CHH_bedgraph"
+          #if $tdf['tdf_selector']:
+            --CpG_tdf "$CpG_tdf"
+            --CHG_tdf "$CHG_tdf"
+            --CHH_tdf "$CHH_tdf"
+          #end if
+        #else
+          --CXX "$CXX_bedgraph"
+          #if $tdf['tdf_selector']:
+            --CXX "$CXX_tdf"
+          #end if
+        #end if  
+
+        --log "$log_report"
+]]>
+  </command>
+  <inputs>
+    <param name="cytosine_report" type="data" format="txt" label="Submit a genome-wide cytosine methylation report" optional="False" />
+    <param name="context" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Create a bedGraph for each cytosine methylation context?" />
+    <conditional name="tdf">
+      <param name="tdf_selector" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Generate a tdf file from each resulting bedGraph?" />
+      <when value="true">
+        <param name="built_in_igv_genomes_chr_sizes" type="select" label="Select Reference Genome (igv genomes chr.sizes)" help="If your genome of interest is not listed, contact your Galaxy admin">
+          <options from_data_table="igv_genomes_chr_sizes">
+            <filter type="sort_by" column="name"/>
+            <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+          </options>
+        </param>
+      </when>
+      <when value="false"></when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data name="log_report" format="txt" label="${tool.name} on ${on_string}: Log Report" />
+    <data name="CpG_bedgraph" format="bedgraph" label="${tool.name} on ${on_string}: CpG context">
+      <filter>context is True</filter>
+    </data>
+    <data name="CHG_bedgraph" format="bedgraph" label="${tool.name} on ${on_string}: CHG context">
+      <filter>context is True</filter>
+    </data>
+    <data name="CHH_bedgraph" format="bedgraph" label="${tool.name} on ${on_string}: CHH context">
+      <filter>context is True</filter>
+    </data>
+    <data name="CXX_bedgraph" format="bedgraph" label="${tool.name} on ${on_string}: CXX">
+      <filter>context is False</filter>
+    </data>
+    <data name="coverage_bedgraph" format="bedgraph" label="${tool.name} on ${on_string}: Coverage" />
+    <data name="CpG_tdf" format="tdf" label="${tool.name} on ${on_string}: CpG context (tdf)">
+      <filter>tdf['tdf_selector'] is True and context is True</filter>
+    </data>
+    <data name="CHG_tdf" format="tdf" label="${tool.name} on ${on_string}: CHG context (tdf)">
+          <filter>tdf['tdf_selector'] is True and context is True</filter>
+    </data>
+    <data name="CHH_tdf" format="tdf" label="${tool.name} on ${on_string}: CHH context (tdf)">
+          <filter>tdf['tdf_selector'] is True and context is True</filter>
+    </data>
+    <data name="CXX_tdf" format="tdf" label="${tool.name} on ${on_string}: CXX (tdf)">
+          <filter>context is False and tdf['tdf_selector'] is True</filter>
+    </data>
+    <data name="coverage_tdf" format="tdf" label="${tool.name} on ${on_string}: Coverage (tdf)" >
+      <filter>tdf['tdf_selector'] is True</filter>
+    </data>
+  </outputs>
+
+  <tests></tests>
+
+  <help>
+<![CDATA[
+
+**What it does**
+
+  | This tool takes as input a genome-wide cytosine methylation report (generated by the tool *Bismark Meth. Extractor*) and converts it into a bedGraph for each cytosine context (CpG, CHG and CHH). 
+  | These bedGraphs display, for a given context, the ratio of methylation of each covered cytosine in the genome.
+  |
+  | It also produces a bedGraph displaying the coverage count of each cytosine in the genome (non-covered cytosine are ignored).
+  | 
+
+  .. class:: infomark
+
+  | The tool outputs offer the possibility to vizualise the methylation signal of covered cytosines thanks to softwares like IGV (*Integrative Genomics Viewer*).
+  | In this respect, the tool can optionally generate a tdf binary file (*Tiled Data Format*) from each converted bedGraph. Tdf format is indeed better handled by IGV than bedGraph.
+]]>
+  </help>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/cytosine_report_to_bedgraph_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cytosine_report_to_bedgraph/cytosine_report_to_bedgraph_wrapper.py Mon Nov 14 04:55:30 2016 -0500
[
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+
+import logging
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+from glob import glob
+from optparse import OptionParser
+
+def __main__():
+    parser = OptionParser()
+    parser.add_option("--tool_dir", dest = 'tool_dir', action = 'store', nargs = 1, metavar = 'tool_dir', type = "str")
+    parser.add_option("--cytosine_report", dest='cytosine_report', action='store', nargs=1, metavar='cytosine_report', type="str")
+    parser.add_option("--context", action="store_true")
+    parser.add_option("--CpG_bedgraph", dest='CpG_bedgraph', action="store", nargs = 1, metavar = 'CpG_bedgraph', type='str')
+    parser.add_option("--CHH_bedgraph", dest='CHH_bedgraph', action="store", nargs=1, metavar='CHH_bedgraph', type='str')
+    parser.add_option("--CHG_bedgraph", dest='CHG_bedgraph', action="store", nargs=1, metavar='CHG_bedgraph', type='str')
+    parser.add_option("--CXX_bedgraph", dest='CXX_bedgraph', action="store", nargs=1, metavar='CXX_bedgraph', type='str')
+    parser.add_option("--coverage_bedgraph", dest='coverage_bedgraph', action="store", nargs=1, metavar='coverage_bedgraph', type='str')
+    parser.add_option("--tdf", action="store_true")
+    parser.add_option("--igv_genome", dest='igv_genome', action='store', nargs=1, metavar='igv_genome', type='str')
+    parser.add_option("--CpG_tdf", dest='CpG_tdf', action="store", nargs=1, metavar='CpG_tdf', type='str')
+    parser.add_option("--CHH_tdf", dest='CHH_tdf', action="store", nargs=1, metavar='CHH_tdf', type='str')
+    parser.add_option("--CHG_tdf", dest='CHG_tdf', action="store", nargs=1, metavar='CHG_tdf', type='str')
+    parser.add_option("--CXX_tdf", dest='CXX_tdf', action="store", nargs=1, metavar='CXX_tdf', type='str')
+    parser.add_option("--coverage_tdf", dest='coverage_tdf', action="store", nargs=1, metavar='coverage_tdf', type='str')
+    parser.add_option("--log", dest='log_report', action="store", nargs=1, metavar='log_report')
+
+    options, args = parser.parse_args()
+
+    if options.log_report:
+     logging.basicConfig(level=logging.INFO, filename=options.log_report, filemode="a+", format='%(message)s')
+    else:
+     logging.basicConfig(level=logging.INFO, filename='galaxy_log_report.log', filemode="a+", format='%(message)s')
+    logging.info('_______________________________________________________________\n')
+    logging.info('List of options: %s\n' % options)
+
+    script_path=os.path.join(options.tool_dir, 'bismark2bedgraph.sh')
+    tmp_dir = tempfile.mkdtemp(prefix='tmp')
+
+    if options.context:
+        context_option = '-c'
+    else:
+        context_option = ''
+
+    if options.tdf:
+        tdf_option = '--tdf'
+        genome_option = '--igv_genome'
+        genome = options.igv_genome
+    else:
+        tdf_option = ''
+        genome_option = ''
+        genome = ''
+
+    logging.info('_______________________________________________________________\n')
+    logging.info('COMMAND:\n')
+    logging.info("bash %s -i %s %s %s %s %s -e current_job -o %s\n" % (script_path, options.cytosine_report, context_option, tdf_option, genome_option, genome, tmp_dir))
+
+    proc = subprocess.Popen(['bash', script_path, '--tool_dir', options.tool_dir, '-i', options.cytosine_report,\
+                             context_option, tdf_option, genome_option, genome, '-e', 'current_job', '-o', tmp_dir], \
+                             stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    proc_out, proc_err = proc.communicate()
+
+    if proc_out:
+     logging.info('_______________________________________________________________\n')
+     logging.info('Tool STDOUT:\n')
+     logging.info(proc_out)
+    if proc_err:
+     logging.info('_______________________________________________________________\n')
+     logging.info('ERROR:\n')
+     msg = 'The tool crashed with the following error:\n%s\n' % proc_err
+     logging.critical(msg)
+     sys.exit(msg)
+
+    if options.context:
+        shutil.move( glob(os.path.join(tmp_dir, '*_CpG.bedgraph'))[0], options.CpG_bedgraph )
+        shutil.move( glob(os.path.join(tmp_dir, '*_CHG.bedgraph'))[0], options.CHG_bedgraph )
+        shutil.move( glob(os.path.join(tmp_dir, '*_CHH.bedgraph'))[0], options.CHH_bedgraph )
+        if options.tdf:
+            shutil.move(glob(os.path.join(tmp_dir, '*_CpG.tdf'))[0], options.CpG_tdf)
+            shutil.move(glob(os.path.join(tmp_dir, '*_CHG.tdf'))[0], options.CHG_tdf)
+            shutil.move(glob(os.path.join(tmp_dir, '*_CHH.tdf'))[0], options.CHH_tdf)
+    else:
+        shutil.move(glob(os.path.join(tmp_dir, '*_CXX.bedgraph'))[0], options.CXX_bedgraph)
+        if options.tdf:
+            shutil.move(glob(os.path.join(tmp_dir, '*_CXX.tdf'))[0], options.CXX_tdf)
+    shutil.move(glob(os.path.join(tmp_dir, '*_coverage.bedgraph'))[0], options.coverage_bedgraph)
+    if options.tdf:
+        shutil.move(glob(os.path.join(tmp_dir, '*_coverage.tdf'))[0], options.coverage_tdf)
+
+    if os.path.exists(tmp_dir):
+        shutil.rmtree(tmp_dir)
+
+if __name__=="__main__": __main__()
\ No newline at end of file
b
diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/tool-data/igv_genomes_chr_sizes.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cytosine_report_to_bedgraph/tool-data/igv_genomes_chr_sizes.loc.sample Mon Nov 14 04:55:30 2016 -0500
b
@@ -0,0 +1,2 @@
+#<value> <dbkey> <name> <path>
+Arabidopsis_thaliana_TAIR10 Arabidopsis_thaliana_TAIR10 Arabidopsis thaliana: TAIR10 /export/home1/users/biocomp/chbernar/galaxy_testing/database/dependencies/igvtools/2.3.32/geert-vandeweyer/package_igvtools_2_3_32/3c087cee3b8f/bin/genomes/tair10.chrom.sizes
\ No newline at end of file
b
diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cytosine_report_to_bedgraph/tool_data_table_conf.xml.sample Mon Nov 14 04:55:30 2016 -0500
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Location of all igv genomes chr sizes -->
+     <table name="igv_genomes_chr_sizes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/igv_genomes_chr_sizes.loc" />
+    </table>
+</tables>
\ No newline at end of file
b
diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cytosine_report_to_bedgraph/tool_dependencies.xml Mon Nov 14 04:55:30 2016 -0500
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="igv_tools" version="2.3.32">
+        <repository changeset_revision="3c087cee3b8f" name="package_igvtools_2_3_32" owner="geert-vandeweyer" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>