Mercurial > repos > charles-bernard > cytosine_report_to_bedgraph
changeset 5:ee5badb527cd draft default tip
Uploaded
author | charles-bernard |
---|---|
date | Wed, 16 Nov 2016 06:38:59 -0500 |
parents | e989fc4b1b76 |
children | |
files | cytosine_report_to_bedgraph/.shed.yml cytosine_report_to_bedgraph/bismark2bedgraph.awk cytosine_report_to_bedgraph/bismark2bedgraph.sh cytosine_report_to_bedgraph/cytosine_report_to_bedgraph.xml cytosine_report_to_bedgraph/cytosine_report_to_bedgraph_wrapper.py |
diffstat | 4 files changed, 33 insertions(+), 29 deletions(-) [+] |
line wrap: on
line diff
--- a/cytosine_report_to_bedgraph/.shed.yml Mon Nov 14 05:02:15 2016 -0500 +++ b/cytosine_report_to_bedgraph/.shed.yml Wed Nov 16 06:38:59 2016 -0500 @@ -9,6 +9,7 @@ The tool outputs offer the possibility to vizualise the methylation signal of covered cytosines thanks to softwares like IGV (Integrative Genomics Viewer). In this respect, the tool can optionally generate a tdf binary file (Tiled Data Format) from each converted bedGraph. Tdf format is indeed better handled by IGV than bedGraph. -name: bismark +name: cytosine_report_to_bedgraph +remote_repository_url: https://github.com/charles-bernard/Galaxy_tools/tree/master/cytosine_report_to_bedgraph owner: charles-bernard type: unrestricted \ No newline at end of file
--- a/cytosine_report_to_bedgraph/bismark2bedgraph.awk Mon Nov 14 05:02:15 2016 -0500 +++ b/cytosine_report_to_bedgraph/bismark2bedgraph.awk Wed Nov 16 06:38:59 2016 -0500 @@ -1,11 +1,14 @@ #!/usr/bin/awk +#USAGE: +#awk -v context=<list_of_contexts> -v coverage=<boolean> -f <script_path>/bismark2bedgraph.awk <cytosine_report_name> >> <bedgraph_name> + BEGIN { FS = "\t"; } { - if ( $6 ~ context && ($4 > 0 || $5 > 0) ) { + if ( $6 ~ context && ( $4 > 0 || $5 > 0 ) ) { chr_name = $1; chr_pos = $2; @@ -17,13 +20,13 @@ nb_reads = c_meth_count + c_unmeth_count printf("%s\t%s\t%s\t%s\n", chr_name, chr_pos, chr_pos, nb_reads) } else { - if ( strand == "-") { + if ( strand == "-" ) { s = "-"; } else { s = ""; } - meth_ratio = c_meth_count / (c_meth_count + c_unmeth_count); - printf("%s\t%s\t%s\t%s%s\n", chr_name, chr_pos, chr_pos, s, meth_ratio) + meth_ratio = c_meth_count / ( c_meth_count + c_unmeth_count ); + printf( "%s\t%s\t%s\t%s%s\n", chr_name, chr_pos, chr_pos, s, meth_ratio ) } } -} \ No newline at end of file +}
--- a/cytosine_report_to_bedgraph/bismark2bedgraph.sh Mon Nov 14 05:02:15 2016 -0500 +++ b/cytosine_report_to_bedgraph/bismark2bedgraph.sh Wed Nov 16 06:38:59 2016 -0500 @@ -8,27 +8,34 @@ while true ; do case "$1" in -e | --epi ) + #epi is the prefix of the output files names case "$2" in "" ) epi="current_job"; shift 2 ;; *) epi=$2; shift 2 ;; esac ;; -i | --infile_cov ) + #infile_cov is the filename of the cytosine report taken as input case "$2" in *) infile_cov=$2; shift 2 ;; esac ;; -c | --context ) + #context defines weither 2 or 4 bedgraphs are returned context=true; shift ;; --tdf ) + #tdf defines weither or not bedgraphs have to be converted into tdf files. tdf=true; shift ;; --igv_genome ) + #tdf conversion is achieved by igvtools and requires a file with the chrs_len of the genome case "$2" in *) igv_genome=$2; shift 2 ;; esac ;; -o | --output_dir ) + #output_dir in this galaxy tool is the tmp dir created by the wrapper.py case "$2" in *) output_dir=$2; shift 2 ;; esac ;; --tool_dir ) + #tool_dir is recquired to call other scripts stored in this directory case "$2" in *) tool_dir=$2; shift 2 ;; esac ;; @@ -37,31 +44,26 @@ esac done -# do something with the variables -- in this case the lamest possible one :-) -echo "epi = $epi" -echo "infile_cov = $infile_cov" -echo "output_dir = $output_dir" -echo "context = $context" -echo "tool_dir = $tool_dir" -echo "tdf = $tdf" -echo "igv_genome = $igv_genome" - #IGV_path -IGV_path="/users/biocomp/chbernar/galaxy_testing/database/dependencies/igvtools/2.3.32/geert-vandeweyer/package_igvtools_2_3_32/3c087cee3b8f/bin" +#IGV_path="/users/biocomp/chbernar/galaxy_testing/database/dependencies/igvtools/2.3.32/geert-vandeweyer/package_igvtools_2_3_32/3c087cee3b8f/bin" # define outputs according to options if [[ "$context" = true ]]; then context_list=("CG" "CHG" "CHH") + n="4" output_types=("CG" "CHG" "CHH" "coverage") bedgraph_list=("$output_dir""/""$epi""_CpG.bedgraph" "$output_dir""/""$epi""_CHG.bedgraph" "$output_dir""/""$epi""_CHH.bedgraph" "$output_dir""/""$epi""_coverage.bedgraph") - tdf_list=("$output_dir""/""$epi""_CpG.tdf" "$output_dir""/""$epi""_CHG.tdf" "$output_dir""/""$epi""_CHH.tdf" "$output_dir""/""$epi""_coverage.tdf") - n="4" + if [[ "$tdf" = true ]]; then + tdf_list=("$output_dir""/""$epi""_CpG.tdf" "$output_dir""/""$epi""_CHG.tdf" "$output_dir""/""$epi""_CHH.tdf" "$output_dir""/""$epi""_coverage.tdf") + fi else context_list=(".*") + n="2" output_types=("CXX" "coverage") bedgraph_list=("$output_dir""/""$epi""_CXX.bedgraph" "$output_dir""/""$epi""_coverage.bedgraph") - tdf_list=("$output_dir""/""$epi""_CXX.tdf" "$output_dir""/""$epi""_coverage.tdf") - n="2" + if [[ "$tdf" = true ]]; then + tdf_list=("$output_dir""/""$epi""_CXX.tdf" "$output_dir""/""$epi""_coverage.tdf") + fi fi # process @@ -69,18 +71,16 @@ printf "________________________________________________________________________\n" printf "Processing %s\n" ${output_types[$i]} printf "... Converting Cytosine Report to Bedgraph\n" - if (( i < n - 1 )); then - #if not coverage: - #printf "track type=bedGraph name=%s Coverage description=%s Coverage\n" "$epi""_""${context_list[$i]}" "$epi""_""${context_list[$i]}" > "${bedgraph_list[$i]}" + if (( i < n - 1 )); then #if not coverage: printf "#<Chr>\t<Start>\t<End>\t<Strand;Meth_ratio>\n" > "${bedgraph_list[$i]}" awk -v context="${context_list[$i]}" -v coverage="false" -f "$tool_dir"/bismark2bedgraph.awk $infile_cov >> "${bedgraph_list[$i]}" else - #printf "track type=bedGraph name=%s Coverage description=%s Coverage\n" "$epi""_""${context_list[$i]}" "$epi""_""${context_list[$i]}" > "${bedgraph_list[$i]}" printf "#<Chr>\t<Start>\t<End>\t<Coverage>\n" > "${bedgraph_list[$i]}" awk -v context="${context_list[$i]}" -v coverage="true" -f "$tool_dir"/bismark2bedgraph.awk $infile_cov >> "${bedgraph_list[$i]}" fi if [[ "$tdf" = true ]]; then printf "... Converting Bedgraph to Tdf\n" - "$IGV_path""/"igvtools toTDF "${bedgraph_list[$i]}" "${tdf_list[$i]}" "$igv_genome" > stdout_file + #"$IGV_path""/"igvtools toTDF "${bedgraph_list[$i]}" "${tdf_list[$i]}" "$igv_genome" > stdout_file + igvtools toTDF "${bedgraph_list[$i]}" "${tdf_list[$i]}" "$igv_genome" > stdout_file fi done \ No newline at end of file
--- a/cytosine_report_to_bedgraph/cytosine_report_to_bedgraph.xml Mon Nov 14 05:02:15 2016 -0500 +++ b/cytosine_report_to_bedgraph/cytosine_report_to_bedgraph.xml Wed Nov 16 06:38:59 2016 -0500 @@ -90,7 +90,7 @@ </data> </outputs> - <tests></tests> + <tests><!--TO DO --></tests> <help> <![CDATA[ @@ -98,7 +98,7 @@ **What it does** | This tool takes as input a genome-wide cytosine methylation report (generated by the tool *Bismark Meth. Extractor*) and converts it into a bedGraph for each cytosine context (CpG, CHG and CHH). - | These bedGraphs display, for a given context, the ratio of methylation of each covered cytosine in the genome. + | These bedGraphs display, for any given context, the ratio of methylation of each covered cytosine in the genome. | | It also produces a bedGraph displaying the coverage count of each cytosine in the genome (non-covered cytosine are ignored). | @@ -106,7 +106,7 @@ .. class:: infomark | The tool outputs offer the possibility to vizualise the methylation signal of covered cytosines thanks to softwares like IGV (*Integrative Genomics Viewer*). - | In this respect, the tool can optionally generate a tdf binary file (*Tiled Data Format*) from each converted bedGraph. Tdf format is indeed better handled by IGV than bedGraph. + | In this respect, the tool can optionally generate a tdf binary file (*Tiled Data Format*) from each converted bedGraph ; tdf format is indeed better handled by IGV than bedGraph. ]]> </help> -</tool> \ No newline at end of file +</tool>