Mercurial > repos > triasteran > bam_to_bigwig
changeset 2:447324f08654 draft
Uploaded
author | triasteran |
---|---|
date | Tue, 22 Feb 2022 16:14:23 +0000 |
parents | 812052306395 |
children | d6280eee8002 |
files | BamToBigWig_planemo/.shed.yml BamToBigWig_planemo/bam2bigwig.xml BamToBigWig_planemo/bam_to_bigwig_py3.py |
diffstat | 3 files changed, 159 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BamToBigWig_planemo/.shed.yml Tue Feb 22 16:14:23 2022 +0000 @@ -0,0 +1,4 @@ +categories: [Transcriptomics] +description: riboseq_bam_to_bigwig_for_gwips +name: bam_to_bigwig +owner: triasteran
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BamToBigWig_planemo/bam2bigwig.xml Tue Feb 22 16:14:23 2022 +0000 @@ -0,0 +1,40 @@ +<tool id="bam2bigwig" name="Convert Bam to BigWig" version="0.1.0" python_template_version="3.5"> + <requirements> + <requirement type="package" version="0.18.0">pysam</requirement> + <requirement type="package" version="2.30.0">bedtools</requirement> + <requirement type="package" version="377">ucsc-bedgraphtobigwig</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python3 '$__tool_directory__/bam_to_bigwig.py3' '$input1' --outfile=$output1 + ]]></command> + <inputs> + <param type="data" name="input1" format="bam" /> + </inputs> + <outputs> + <data name="output1" format="bigwig" /> + </outputs> + <tests> + <test> + <param name="input1" value="file.bam"/> + <output name="output1" file="out.bigwig"/> + </test> + </tests> + <help> + <![CDATA[ +Convert BAM files to BigWig file format in a specified region. + +Usage: + bam_to_wiggle.py <BAM file> <YAML config> --outfile=<output file name> ]]> + </help> + <citations> + <citation type="bibtex"> + @misc{githubbam_to_bigwig, + author = {Audrey}, + year = {2022}, + title = {bam_to_bigwig}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/triasteran/RiboGalaxy_tools}, } + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BamToBigWig_planemo/bam_to_bigwig_py3.py Tue Feb 22 16:14:23 2022 +0000 @@ -0,0 +1,115 @@ +#!/usr/bin/env python +#Original version copyright Brad Chapman with revisions from Peter Cock +#and ideas from Lance Parsons +"""Convert BAM files to BigWig file format in a specified region. + +Usage: + bam_to_wiggle.py <BAM file> [<YAML config>] [--outfile=<output file name>] + +The optional config file (not used by the Galaxy interface) is in YAML format +and specifies the location of the binary tools. + +program: + bedtools_genomeCoverageBed: genomeCoverageBed + ucsc_bedGraphToBigWig: bedGraphToBigWig + +If not specified, these will be assumed to be present in the system path. + +The script requires: + pysam (http://code.google.com/p/pysam/) + genomeCoverageBed from BedTools (http://code.google.com/p/bedtools/) + bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) +If a configuration file is used, then PyYAML is also required (http://pyyaml.org/) +""" +import os +import sys +import subprocess +import tempfile +from optparse import OptionParser +from contextlib import contextmanager, closing + +import pysam + +def main(bam_file, config_file=None, outfile=None, split=False): + if config_file: + import yaml + with open(config_file) as in_handle: + config = yaml.load(in_handle) + else: + config = {"program": {"ucsc_bedGraphToBigWig" : "bedGraphToBigWig", + "bedtools_genomeCoverageBed" : "genomeCoverageBed"}} + if outfile is None: + outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] + if os.path.abspath(bam_file) == os.path.abspath(outfile): + sys.stderr.write("Bad arguments, input and output files are the same.\n") + sys.exit(1) + if os.path.exists(outfile) and os.path.getsize(outfile) > 0: + sys.stderr.write("Warning, output file already exists.\n") + + sizes = get_sizes(bam_file, config) + #print ("Have %i references" % len(sizes)) + if not sizes: + sys.stderr.write("Problem reading BAM header.\n") + sys.exit(1) + + #Use a temp file to avoid any possiblity of not having write permission + temp_handle = tempfile.NamedTemporaryFile(delete=False) + temp_file = temp_handle.name + with closing(temp_handle): + print ("Calculating coverage...") + convert_to_graph(bam_file, split, config, temp_handle) + try: + print ("Converting %i MB graph file to bigwig..." % (os.path.getsize(temp_file) // (1024*1024))) + #Can't pipe this as stdin due to converter design, + #https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html + convert_to_bigwig(temp_file, sizes, config, outfile) + finally: + if os.path.isfile(temp_file): + os.remove(temp_file) + print ("Done") + +@contextmanager +def indexed_bam(bam_file, config): + if not os.path.exists(bam_file + ".bai"): + pysam.index(bam_file) + sam_reader = pysam.Samfile(bam_file, "rb") + yield sam_reader + sam_reader.close() + +def get_sizes(bam_file, config): + with indexed_bam(bam_file, config) as work_bam: + sizes = zip(work_bam.references, work_bam.lengths) + return sizes + +def convert_to_graph(bam_file, split, config, out_handle): + cl = [config["program"]["bedtools_genomeCoverageBed"], "-ibam", bam_file, "-bg"] + if split: + cl.append("-split") + subprocess.check_call(cl, stdout=out_handle) + +def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file): + #This will be fine under Galaxy, but could use temp folder? + size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0]) + with open(size_file, "w") as out_handle: + for chrom, size in chr_sizes: + out_handle.write("%s\t%s\n" % (chrom, size)) + try: + cl = [config["program"]["ucsc_bedGraphToBigWig"], bedgraph_file, size_file, bw_file] + subprocess.check_call(cl) + finally: + os.remove(size_file) + return bw_file + +if __name__ == "__main__": + parser = OptionParser() + parser.add_option("-o", "--outfile", dest="outfile") + parser.add_option("-s", "--split", action="store_true", dest="split") + (options, args) = parser.parse_args() + if len(args) not in [1, 2]: + print ("Incorrect arguments") + print (__doc__) + sys.exit() + kwargs = dict( + outfile=options.outfile, + split=options.split) + main(*args, **kwargs)