# HG changeset patch # User charles-bernard # Date 1479117330 18000 # Node ID b03e31cab4a6883573c5e003da0276305d4ae76b Uploaded diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/.shed.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cytosine_report_to_bedgraph/.shed.yml Mon Nov 14 04:55:30 2016 -0500 @@ -0,0 +1,14 @@ +categories: +- Convert Formats +description: Converts genome-wide cytosine methylation report to Bedgraphs +long_description: | + This tool takes as input a genome-wide cytosine methylation report (generated by the tool Bismark Meth. Extractor) and converts it into a bedGraph for each cytosine context (CpG, CHG and CHH). + These bedGraphs display, for a given context, the ratio of methylation of each covered cytosine in the genome. + + It also produces a bedGraph displaying the coverage count of each cytosine in the genome (non-covered cytosine are ignored). + + The tool outputs offer the possibility to vizualise the methylation signal of covered cytosines thanks to softwares like IGV (Integrative Genomics Viewer). + In this respect, the tool can optionally generate a tdf binary file (Tiled Data Format) from each converted bedGraph. Tdf format is indeed better handled by IGV than bedGraph. +name: bismark +owner: charles-bernard +type: unrestricted \ No newline at end of file diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/bismark2bedgraph.awk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cytosine_report_to_bedgraph/bismark2bedgraph.awk Mon Nov 14 04:55:30 2016 -0500 @@ -0,0 +1,29 @@ +#!/usr/bin/awk + +BEGIN { + FS = "\t"; +} + +{ + if ( $6 ~ context && ($4 > 0 || $5 > 0) ) { + + chr_name = $1; + chr_pos = $2; + strand = $3; + c_meth_count = $4; + c_unmeth_count = $5; + + if ( coverage == "true" ) { + nb_reads = c_meth_count + c_unmeth_count + printf("%s\t%s\t%s\t%s\n", chr_name, chr_pos, chr_pos, nb_reads) + } else { + if ( strand == "-") { + s = "-"; + } else { + s = ""; + } + meth_ratio = c_meth_count / (c_meth_count + c_unmeth_count); + printf("%s\t%s\t%s\t%s%s\n", chr_name, chr_pos, chr_pos, s, meth_ratio) + } + } +} \ No newline at end of file diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/bismark2bedgraph.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cytosine_report_to_bedgraph/bismark2bedgraph.sh Mon Nov 14 04:55:30 2016 -0500 @@ -0,0 +1,86 @@ +#!/bin/bash + +# read the options +TEMP=`getopt -o e::i:co: --long epi::,infile_cov:,context,output_dir:,tool_dir:,tdf,igv_genome: -n 'report' -- "$@"` +eval set -- "$TEMP" + +# extract options and their arguments into variables. +while true ; do + case "$1" in + -e | --epi ) + case "$2" in + "" ) epi="current_job"; shift 2 ;; + *) epi=$2; shift 2 ;; + esac ;; + -i | --infile_cov ) + case "$2" in + *) infile_cov=$2; shift 2 ;; + esac ;; + -c | --context ) + context=true; shift ;; + --tdf ) + tdf=true; shift ;; + --igv_genome ) + case "$2" in + *) igv_genome=$2; shift 2 ;; + esac ;; + -o | --output_dir ) + case "$2" in + *) output_dir=$2; shift 2 ;; + esac ;; + --tool_dir ) + case "$2" in + *) tool_dir=$2; shift 2 ;; + esac ;; + -- ) shift; break ;; + * ) echo "Internal error!"; exit 1 ;; + esac +done + +# do something with the variables -- in this case the lamest possible one :-) +echo "epi = $epi" +echo "infile_cov = $infile_cov" +echo "output_dir = $output_dir" +echo "context = $context" +echo "tool_dir = $tool_dir" +echo "tdf = $tdf" +echo "igv_genome = $igv_genome" + +#IGV_path +IGV_path="/users/biocomp/chbernar/galaxy_testing/database/dependencies/igvtools/2.3.32/geert-vandeweyer/package_igvtools_2_3_32/3c087cee3b8f/bin" + +# define outputs according to options +if [[ "$context" = true ]]; then + context_list=("CG" "CHG" "CHH") + output_types=("CG" "CHG" "CHH" "coverage") + bedgraph_list=("$output_dir""/""$epi""_CpG.bedgraph" "$output_dir""/""$epi""_CHG.bedgraph" "$output_dir""/""$epi""_CHH.bedgraph" "$output_dir""/""$epi""_coverage.bedgraph") + tdf_list=("$output_dir""/""$epi""_CpG.tdf" "$output_dir""/""$epi""_CHG.tdf" "$output_dir""/""$epi""_CHH.tdf" "$output_dir""/""$epi""_coverage.tdf") + n="4" +else + context_list=(".*") + output_types=("CXX" "coverage") + bedgraph_list=("$output_dir""/""$epi""_CXX.bedgraph" "$output_dir""/""$epi""_coverage.bedgraph") + tdf_list=("$output_dir""/""$epi""_CXX.tdf" "$output_dir""/""$epi""_coverage.tdf") + n="2" +fi + +# process +for (( i=0; i<$n; i++)); do + printf "________________________________________________________________________\n" + printf "Processing %s\n" ${output_types[$i]} + printf "... Converting Cytosine Report to Bedgraph\n" + if (( i < n - 1 )); then + #if not coverage: + #printf "track type=bedGraph name=%s Coverage description=%s Coverage\n" "$epi""_""${context_list[$i]}" "$epi""_""${context_list[$i]}" > "${bedgraph_list[$i]}" + printf "#\t\t\t\n" > "${bedgraph_list[$i]}" + awk -v context="${context_list[$i]}" -v coverage="false" -f "$tool_dir"/bismark2bedgraph.awk $infile_cov >> "${bedgraph_list[$i]}" + else + #printf "track type=bedGraph name=%s Coverage description=%s Coverage\n" "$epi""_""${context_list[$i]}" "$epi""_""${context_list[$i]}" > "${bedgraph_list[$i]}" + printf "#\t\t\t\n" > "${bedgraph_list[$i]}" + awk -v context="${context_list[$i]}" -v coverage="true" -f "$tool_dir"/bismark2bedgraph.awk $infile_cov >> "${bedgraph_list[$i]}" + fi + if [[ "$tdf" = true ]]; then + printf "... Converting Bedgraph to Tdf\n" + "$IGV_path""/"igvtools toTDF "${bedgraph_list[$i]}" "${tdf_list[$i]}" "$igv_genome" > stdout_file + fi +done \ No newline at end of file diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/cytosine_report_to_bedgraph.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cytosine_report_to_bedgraph/cytosine_report_to_bedgraph.xml Mon Nov 14 04:55:30 2016 -0500 @@ -0,0 +1,112 @@ + + + Converts genome-wide cytosine methylation report to Bedgraphs + + + igvtools + + + + + + + + + + + + + + + + + + + + + + + + + + context is True + + + context is True + + + context is True + + + context is False + + + + tdf['tdf_selector'] is True and context is True + + + tdf['tdf_selector'] is True and context is True + + + tdf['tdf_selector'] is True and context is True + + + context is False and tdf['tdf_selector'] is True + + + tdf['tdf_selector'] is True + + + + + + + + + \ No newline at end of file diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/cytosine_report_to_bedgraph_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cytosine_report_to_bedgraph/cytosine_report_to_bedgraph_wrapper.py Mon Nov 14 04:55:30 2016 -0500 @@ -0,0 +1,96 @@ +#!/usr/bin/env python + +import logging +import os +import shutil +import subprocess +import sys +import tempfile +from glob import glob +from optparse import OptionParser + +def __main__(): + parser = OptionParser() + parser.add_option("--tool_dir", dest = 'tool_dir', action = 'store', nargs = 1, metavar = 'tool_dir', type = "str") + parser.add_option("--cytosine_report", dest='cytosine_report', action='store', nargs=1, metavar='cytosine_report', type="str") + parser.add_option("--context", action="store_true") + parser.add_option("--CpG_bedgraph", dest='CpG_bedgraph', action="store", nargs = 1, metavar = 'CpG_bedgraph', type='str') + parser.add_option("--CHH_bedgraph", dest='CHH_bedgraph', action="store", nargs=1, metavar='CHH_bedgraph', type='str') + parser.add_option("--CHG_bedgraph", dest='CHG_bedgraph', action="store", nargs=1, metavar='CHG_bedgraph', type='str') + parser.add_option("--CXX_bedgraph", dest='CXX_bedgraph', action="store", nargs=1, metavar='CXX_bedgraph', type='str') + parser.add_option("--coverage_bedgraph", dest='coverage_bedgraph', action="store", nargs=1, metavar='coverage_bedgraph', type='str') + parser.add_option("--tdf", action="store_true") + parser.add_option("--igv_genome", dest='igv_genome', action='store', nargs=1, metavar='igv_genome', type='str') + parser.add_option("--CpG_tdf", dest='CpG_tdf', action="store", nargs=1, metavar='CpG_tdf', type='str') + parser.add_option("--CHH_tdf", dest='CHH_tdf', action="store", nargs=1, metavar='CHH_tdf', type='str') + parser.add_option("--CHG_tdf", dest='CHG_tdf', action="store", nargs=1, metavar='CHG_tdf', type='str') + parser.add_option("--CXX_tdf", dest='CXX_tdf', action="store", nargs=1, metavar='CXX_tdf', type='str') + parser.add_option("--coverage_tdf", dest='coverage_tdf', action="store", nargs=1, metavar='coverage_tdf', type='str') + parser.add_option("--log", dest='log_report', action="store", nargs=1, metavar='log_report') + + options, args = parser.parse_args() + + if options.log_report: + logging.basicConfig(level=logging.INFO, filename=options.log_report, filemode="a+", format='%(message)s') + else: + logging.basicConfig(level=logging.INFO, filename='galaxy_log_report.log', filemode="a+", format='%(message)s') + logging.info('_______________________________________________________________\n') + logging.info('List of options: %s\n' % options) + + script_path=os.path.join(options.tool_dir, 'bismark2bedgraph.sh') + tmp_dir = tempfile.mkdtemp(prefix='tmp') + + if options.context: + context_option = '-c' + else: + context_option = '' + + if options.tdf: + tdf_option = '--tdf' + genome_option = '--igv_genome' + genome = options.igv_genome + else: + tdf_option = '' + genome_option = '' + genome = '' + + logging.info('_______________________________________________________________\n') + logging.info('COMMAND:\n') + logging.info("bash %s -i %s %s %s %s %s -e current_job -o %s\n" % (script_path, options.cytosine_report, context_option, tdf_option, genome_option, genome, tmp_dir)) + + proc = subprocess.Popen(['bash', script_path, '--tool_dir', options.tool_dir, '-i', options.cytosine_report,\ + context_option, tdf_option, genome_option, genome, '-e', 'current_job', '-o', tmp_dir], \ + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + proc_out, proc_err = proc.communicate() + + if proc_out: + logging.info('_______________________________________________________________\n') + logging.info('Tool STDOUT:\n') + logging.info(proc_out) + if proc_err: + logging.info('_______________________________________________________________\n') + logging.info('ERROR:\n') + msg = 'The tool crashed with the following error:\n%s\n' % proc_err + logging.critical(msg) + sys.exit(msg) + + if options.context: + shutil.move( glob(os.path.join(tmp_dir, '*_CpG.bedgraph'))[0], options.CpG_bedgraph ) + shutil.move( glob(os.path.join(tmp_dir, '*_CHG.bedgraph'))[0], options.CHG_bedgraph ) + shutil.move( glob(os.path.join(tmp_dir, '*_CHH.bedgraph'))[0], options.CHH_bedgraph ) + if options.tdf: + shutil.move(glob(os.path.join(tmp_dir, '*_CpG.tdf'))[0], options.CpG_tdf) + shutil.move(glob(os.path.join(tmp_dir, '*_CHG.tdf'))[0], options.CHG_tdf) + shutil.move(glob(os.path.join(tmp_dir, '*_CHH.tdf'))[0], options.CHH_tdf) + else: + shutil.move(glob(os.path.join(tmp_dir, '*_CXX.bedgraph'))[0], options.CXX_bedgraph) + if options.tdf: + shutil.move(glob(os.path.join(tmp_dir, '*_CXX.tdf'))[0], options.CXX_tdf) + shutil.move(glob(os.path.join(tmp_dir, '*_coverage.bedgraph'))[0], options.coverage_bedgraph) + if options.tdf: + shutil.move(glob(os.path.join(tmp_dir, '*_coverage.tdf'))[0], options.coverage_tdf) + + if os.path.exists(tmp_dir): + shutil.rmtree(tmp_dir) + +if __name__=="__main__": __main__() \ No newline at end of file diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/tool-data/igv_genomes_chr_sizes.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cytosine_report_to_bedgraph/tool-data/igv_genomes_chr_sizes.loc.sample Mon Nov 14 04:55:30 2016 -0500 @@ -0,0 +1,2 @@ +# +Arabidopsis_thaliana_TAIR10 Arabidopsis_thaliana_TAIR10 Arabidopsis thaliana: TAIR10 /export/home1/users/biocomp/chbernar/galaxy_testing/database/dependencies/igvtools/2.3.32/geert-vandeweyer/package_igvtools_2_3_32/3c087cee3b8f/bin/genomes/tair10.chrom.sizes \ No newline at end of file diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cytosine_report_to_bedgraph/tool_data_table_conf.xml.sample Mon Nov 14 04:55:30 2016 -0500 @@ -0,0 +1,7 @@ + + + + value, dbkey, name, path + +
+
\ No newline at end of file diff -r 000000000000 -r b03e31cab4a6 cytosine_report_to_bedgraph/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cytosine_report_to_bedgraph/tool_dependencies.xml Mon Nov 14 04:55:30 2016 -0500 @@ -0,0 +1,6 @@ + + + + + +