changeset 0:012dc1867dc7 draft

Not_yet_tested_properly
author triasteran
date Tue, 22 Feb 2022 15:38:38 +0000
parents
children 812052306395
files BamToBigWig_planemo/.shed.yml BamToBigWig_planemo/bam2bigwig.xml BamToBigWig_planemo/bam_to_bigwig_py3.py
diffstat 3 files changed, 159 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/BamToBigWig_planemo/.shed.yml	Tue Feb 22 15:38:38 2022 +0000
@@ -0,0 +1,4 @@
+categories: [Transcriptomics]
+description: riboseq_bam_to_bigwig_for_gwips
+name: bam_to_bigwig
+owner: triasteran
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/BamToBigWig_planemo/bam2bigwig.xml	Tue Feb 22 15:38:38 2022 +0000
@@ -0,0 +1,40 @@
+<tool id="bam2bigwig" name="Convert Bam to BigWig" version="0.1.0" python_template_version="3.5">
+    <requirements>
+        <requirement type="package" version="0.18.0">pysam</requirement>
+        <requirement type="package" version="2.30.0">bedtools</requirement>
+        <requirement type="package" version="377">ucsc-bedgraphtobigwig</requirement>        
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        bam_to_bigwig_py3.py '$input1' --outfile=$output1
+    ]]></command>
+    <inputs>
+        <param type="data" name="input1" format="bam" />
+    </inputs>
+    <outputs>
+        <data name="output1" format="bigwig" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input1" value="file.bam"/>
+            <output name="output1" file="out.bigwig"/>
+        </test>
+    </tests>
+    <help>
+      <![CDATA[
+Convert BAM files to BigWig file format in a specified region.
+
+Usage:
+    bam_to_wiggle.py <BAM file> <YAML config> --outfile=<output file name> ]]>
+    </help>
+    <citations>
+        <citation type="bibtex">
+           @misc{githubbam_to_bigwig,
+  author = {Audrey},
+  year = {2022},
+  title = {bam_to_bigwig},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/triasteran/RiboGalaxy_tools}, }
+        </citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/BamToBigWig_planemo/bam_to_bigwig_py3.py	Tue Feb 22 15:38:38 2022 +0000
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+#Original version copyright Brad Chapman with revisions from Peter Cock
+#and ideas from Lance Parsons
+"""Convert BAM files to BigWig file format in a specified region.
+
+Usage:
+    bam_to_wiggle.py <BAM file> [<YAML config>] [--outfile=<output file name>]
+
+The optional config file (not used by the Galaxy interface) is in YAML format
+and specifies the location of the binary tools.
+
+program:
+  bedtools_genomeCoverageBed: genomeCoverageBed
+  ucsc_bedGraphToBigWig: bedGraphToBigWig
+
+If not specified, these will be assumed to be present in the system path.
+
+The script requires:
+    pysam (http://code.google.com/p/pysam/)
+    genomeCoverageBed from BedTools (http://code.google.com/p/bedtools/)
+    bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/)
+If a configuration file is used, then PyYAML is also required (http://pyyaml.org/)
+"""
+import os
+import sys
+import subprocess
+import tempfile
+from optparse import OptionParser
+from contextlib import contextmanager, closing
+
+import pysam
+
+def main(bam_file, config_file=None, outfile=None, split=False):
+    if config_file:
+        import yaml
+        with open(config_file) as in_handle:
+            config = yaml.load(in_handle)
+    else:
+        config = {"program": {"ucsc_bedGraphToBigWig" : "bedGraphToBigWig",
+                              "bedtools_genomeCoverageBed" : "genomeCoverageBed"}}
+    if outfile is None:
+        outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
+    if os.path.abspath(bam_file) == os.path.abspath(outfile):
+        sys.stderr.write("Bad arguments, input and output files are the same.\n")
+        sys.exit(1)
+    if os.path.exists(outfile) and os.path.getsize(outfile) > 0:
+        sys.stderr.write("Warning, output file already exists.\n")
+
+    sizes = get_sizes(bam_file, config)
+    #print ("Have %i references" % len(sizes))
+    if not sizes:
+        sys.stderr.write("Problem reading BAM header.\n")
+        sys.exit(1)
+
+    #Use a temp file to avoid any possiblity of not having write permission
+    temp_handle = tempfile.NamedTemporaryFile(delete=False)
+    temp_file = temp_handle.name
+    with closing(temp_handle):
+        print ("Calculating coverage...")
+        convert_to_graph(bam_file, split, config, temp_handle)
+    try:
+        print ("Converting %i MB graph file to bigwig..." % (os.path.getsize(temp_file) // (1024*1024)))
+        #Can't pipe this as stdin due to converter design,
+        #https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html
+        convert_to_bigwig(temp_file, sizes, config, outfile)
+    finally:
+        if os.path.isfile(temp_file):
+            os.remove(temp_file)
+    print ("Done")
+
+@contextmanager
+def indexed_bam(bam_file, config):
+    if not os.path.exists(bam_file + ".bai"):
+        pysam.index(bam_file)
+    sam_reader = pysam.Samfile(bam_file, "rb")
+    yield sam_reader
+    sam_reader.close()
+
+def get_sizes(bam_file, config):
+    with indexed_bam(bam_file, config) as work_bam:
+        sizes = zip(work_bam.references, work_bam.lengths)
+    return sizes
+
+def convert_to_graph(bam_file, split, config, out_handle):
+    cl = [config["program"]["bedtools_genomeCoverageBed"], "-ibam", bam_file, "-bg"]
+    if split:
+        cl.append("-split")
+    subprocess.check_call(cl, stdout=out_handle)
+
+def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file):
+    #This will be fine under Galaxy, but could use temp folder?
+    size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0])
+    with open(size_file, "w") as out_handle:
+        for chrom, size in chr_sizes:
+            out_handle.write("%s\t%s\n" % (chrom, size))
+    try:
+        cl = [config["program"]["ucsc_bedGraphToBigWig"], bedgraph_file, size_file, bw_file]
+        subprocess.check_call(cl)
+    finally:
+        os.remove(size_file)
+    return bw_file
+
+if __name__ == "__main__":
+    parser = OptionParser()
+    parser.add_option("-o", "--outfile", dest="outfile")
+    parser.add_option("-s", "--split", action="store_true", dest="split")
+    (options, args) = parser.parse_args()
+    if len(args) not in [1, 2]:
+        print ("Incorrect arguments")
+        print (__doc__)
+        sys.exit()
+    kwargs = dict(
+        outfile=options.outfile,
+        split=options.split)
+    main(*args, **kwargs)