Mercurial > repos > bgruening > bismark
view bismark_methyl_extractor/bismark_methylation_extractor.py @ 7:fcadce4d9a06 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n'
author | bgruening |
---|---|
date | Sat, 06 May 2017 13:18:09 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python import argparse, os, shutil, subprocess, sys, tempfile, fileinput import zipfile import re from glob import glob def stop_err( msg ): sys.stderr.write( "%s\n" % msg ) sys.exit() def zipper(dir, zip_file): output_files_regex = re.compile('^(Non_)?C[pH][GH]_.*') bedgraph_regex = re.compile('.*bedGraph.gz') zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED) root_len = len(os.path.abspath(dir)) for root, dirs, files in os.walk(dir): archive_root = os.path.abspath(root)[root_len:] for f in files: if re.search(output_files_regex, f) or re.search(bedgraph_regex, f): fullpath = os.path.join(root, f) archive_name = os.path.join(archive_root, f) zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED) zip.close() return zip_file def build_genome_dir(genome_file): tmp_genome_dir = tempfile.mkdtemp(prefix='tmp') genome_path = os.path.join(tmp_genome_dir, '.'.join(os.path.split(genome_file)[1].split('.')[:-1])) try: """ Create a hard link pointing to genome_file named 'genome_path'.fa. """ os.symlink(genome_file, genome_path + '.fa') except Exception, e: if os.path.exists(tmp_genome_dir): shutil.rmtree(tmp_genome_dir) stop_err('Error in linking the reference database.\n' + str(e)) return tmp_genome_dir def __main__(): #Parse Command Line parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.') # input options parser.add_argument( '--bismark_path', dest='bismark_path', help='Path to the bismark perl scripts' ) parser.add_argument( '--infile', help='Input file in SAM or BAM format.' ) parser.add_argument( '--single-end', dest='single_end', action="store_true" ) parser.add_argument( '--paired-end', dest='paired_end', action="store_true" ) parser.add_argument('--splitting_report', dest='splitting_report') parser.add_argument('--mbias_report', dest='mbias_report') parser.add_argument('--cytosine_report', dest="cytosine_report") parser.add_argument('--genome_file', dest="genome_file") parser.add_argument('--cx_context', action="store_true" ) parser.add_argument( '--comprehensive', action="store_true" ) parser.add_argument( '--merge-non-cpg', dest='merge_non_cpg', action="store_true" ) parser.add_argument( '--no-overlap', dest='no_overlap', action="store_true" ) parser.add_argument( '--compress' ) parser.add_argument('--ignore', dest='ignore', type=int) parser.add_argument('--ignore_r2', dest='ignore_r2', type=int) parser.add_argument('--ignore_3prime', dest='ignore_3prime', type=int) parser.add_argument('--ignore_3prime_r2', dest='ignore_3prime_r2', type=int) args = parser.parse_args() # Build methylation extractor command output_dir = tempfile.mkdtemp() cmd = 'bismark_methylation_extractor --no_header -o %s %s %s' if args.bismark_path: # add the path to the bismark perl scripts, that is needed for galaxy cmd = os.path.join(args.bismark_path, cmd) # Set up all options additional_opts = '' if args.single_end: additional_opts += ' --single-end ' else: additional_opts += ' --paired-end ' if args.no_overlap: additional_opts += ' --no_overlap ' if args.ignore: additional_opts += ' --ignore %s ' % args.ignore if args.ignore_r2: additional_opts += ' --ignore_r2 %s ' % args.ignore_r2 if args.ignore_3prime: additional_opts += ' --ignore_3prime %s ' % args.ignore_3prime if args.ignore_3prime_r2: additional_opts += ' --ignore_3prime_r2 %s ' % args.ignore_3prime_r2 if args.comprehensive: additional_opts += ' --comprehensive ' if args.merge_non_cpg: additional_opts += ' --merge_non_CpG ' if args.splitting_report: additional_opts += ' --report ' if args.cytosine_report: tmp_genome_dir = build_genome_dir(args.genome_file) if args.cx_context: additional_opts += ' --bedgraph --CX_context --cytosine_report --CX_context --genome_folder %s ' % tmp_genome_dir else: additional_opts += ' --bedgraph --cytosine_report --genome_folder %s ' % tmp_genome_dir #detect BAM file, use samtools view if it is a bam file f = open (args.infile, 'rb') sig = f.read(4) f.close() if sig == '\x1f\x8b\x08\x04' : #cmd = cmd % (output_dir, additional_opts, '-') new_infilename = os.path.join(output_dir, 'submitted_bs_mapped_reads.sam') new_sam = open(new_infilename, 'wb') tmp_err = tempfile.NamedTemporaryFile().name tmp_stderr = open(tmp_err, 'wb') proc = subprocess.Popen(['samtools', 'view', args.infile], stdout=new_sam, stderr=tmp_stderr) new_sam.close() tmp_stderr.close() if os.stat(tmp_err).st_size != 0: tmp_sterr = open(tmp_err, 'rb') error_msg = tmp_sterr.read() tmp_sterr.close() sys.exit("error: %s" % error_msg) cmd = cmd % (output_dir, additional_opts, new_infilename) else: cmd = cmd % (output_dir, additional_opts, args.infile) # Run try: tmp_out = tempfile.NamedTemporaryFile().name tmp_stdout = open( tmp_out, 'wb' ) tmp_err = tempfile.NamedTemporaryFile().name tmp_stderr = open( tmp_err, 'wb' ) proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) returncode = proc.wait() tmp_stderr.close() # get stderr, allowing for case where it's very large tmp_stderr = open( tmp_err, 'rb' ) stderr = '' buffsize = 1048576 try: while True: stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass tmp_stdout.close() tmp_stderr.close() if returncode != 0: raise Exception, stderr # TODO: look for errors in program output. except Exception, e: stop_err( 'Error in bismark methylation extractor:\n' + str( e ) ) # collect and copy output files if args.compress: zipper(output_dir, args.compress) # cytosine report if args.cytosine_report: if args.cx_context: shutil.move( glob(os.path.join( output_dir, '*CX_report.txt'))[0], args.cytosine_report ) else: shutil.move(glob(os.path.join(output_dir, '*CpG_report.txt'))[0], args.cytosine_report) # splitting report if args.splitting_report: shutil.move( glob(os.path.join( output_dir, '*_splitting_report.txt'))[0], args.splitting_report ) if args.mbias_report: shutil.move(glob(os.path.join(output_dir, '*M-bias.txt'))[0], args.mbias_report) #Clean up temp dirs if os.path.exists( output_dir ): shutil.rmtree( output_dir ) if args.cytosine_report: if os.path.exists( tmp_genome_dir ): shutil.rmtree( tmp_genome_dir ) if __name__=="__main__": __main__()