Mercurial > repos > bgruening > bismark
comparison bismark_methyl_extractor/bismark_methylation_extractor.py @ 7:fcadce4d9a06 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n'
author | bgruening |
---|---|
date | Sat, 06 May 2017 13:18:09 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
6:0f8646f22b8d | 7:fcadce4d9a06 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse, os, shutil, subprocess, sys, tempfile, fileinput | |
4 import zipfile | |
5 import re | |
6 from glob import glob | |
7 | |
8 def stop_err( msg ): | |
9 sys.stderr.write( "%s\n" % msg ) | |
10 sys.exit() | |
11 | |
12 def zipper(dir, zip_file): | |
13 output_files_regex = re.compile('^(Non_)?C[pH][GH]_.*') | |
14 bedgraph_regex = re.compile('.*bedGraph.gz') | |
15 zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED) | |
16 root_len = len(os.path.abspath(dir)) | |
17 for root, dirs, files in os.walk(dir): | |
18 archive_root = os.path.abspath(root)[root_len:] | |
19 for f in files: | |
20 if re.search(output_files_regex, f) or re.search(bedgraph_regex, f): | |
21 fullpath = os.path.join(root, f) | |
22 archive_name = os.path.join(archive_root, f) | |
23 zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED) | |
24 zip.close() | |
25 return zip_file | |
26 | |
27 def build_genome_dir(genome_file): | |
28 tmp_genome_dir = tempfile.mkdtemp(prefix='tmp') | |
29 genome_path = os.path.join(tmp_genome_dir, '.'.join(os.path.split(genome_file)[1].split('.')[:-1])) | |
30 try: | |
31 """ | |
32 Create a hard link pointing to genome_file named 'genome_path'.fa. | |
33 """ | |
34 os.symlink(genome_file, genome_path + '.fa') | |
35 except Exception, e: | |
36 if os.path.exists(tmp_genome_dir): | |
37 shutil.rmtree(tmp_genome_dir) | |
38 stop_err('Error in linking the reference database.\n' + str(e)) | |
39 return tmp_genome_dir | |
40 | |
41 def __main__(): | |
42 #Parse Command Line | |
43 parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.') | |
44 | |
45 # input options | |
46 parser.add_argument( '--bismark_path', dest='bismark_path', help='Path to the bismark perl scripts' ) | |
47 | |
48 parser.add_argument( '--infile', help='Input file in SAM or BAM format.' ) | |
49 parser.add_argument( '--single-end', dest='single_end', action="store_true" ) | |
50 parser.add_argument( '--paired-end', dest='paired_end', action="store_true" ) | |
51 | |
52 parser.add_argument('--splitting_report', dest='splitting_report') | |
53 parser.add_argument('--mbias_report', dest='mbias_report') | |
54 parser.add_argument('--cytosine_report', dest="cytosine_report") | |
55 parser.add_argument('--genome_file', dest="genome_file") | |
56 parser.add_argument('--cx_context', action="store_true" ) | |
57 | |
58 parser.add_argument( '--comprehensive', action="store_true" ) | |
59 parser.add_argument( '--merge-non-cpg', dest='merge_non_cpg', action="store_true" ) | |
60 parser.add_argument( '--no-overlap', dest='no_overlap', action="store_true" ) | |
61 parser.add_argument( '--compress' ) | |
62 parser.add_argument('--ignore', dest='ignore', type=int) | |
63 parser.add_argument('--ignore_r2', dest='ignore_r2', type=int) | |
64 parser.add_argument('--ignore_3prime', dest='ignore_3prime', type=int) | |
65 parser.add_argument('--ignore_3prime_r2', dest='ignore_3prime_r2', type=int) | |
66 | |
67 args = parser.parse_args() | |
68 | |
69 # Build methylation extractor command | |
70 output_dir = tempfile.mkdtemp() | |
71 cmd = 'bismark_methylation_extractor --no_header -o %s %s %s' | |
72 if args.bismark_path: | |
73 # add the path to the bismark perl scripts, that is needed for galaxy | |
74 cmd = os.path.join(args.bismark_path, cmd) | |
75 | |
76 # Set up all options | |
77 additional_opts = '' | |
78 if args.single_end: | |
79 additional_opts += ' --single-end ' | |
80 else: | |
81 additional_opts += ' --paired-end ' | |
82 if args.no_overlap: | |
83 additional_opts += ' --no_overlap ' | |
84 if args.ignore: | |
85 additional_opts += ' --ignore %s ' % args.ignore | |
86 if args.ignore_r2: | |
87 additional_opts += ' --ignore_r2 %s ' % args.ignore_r2 | |
88 if args.ignore_3prime: | |
89 additional_opts += ' --ignore_3prime %s ' % args.ignore_3prime | |
90 if args.ignore_3prime_r2: | |
91 additional_opts += ' --ignore_3prime_r2 %s ' % args.ignore_3prime_r2 | |
92 if args.comprehensive: | |
93 additional_opts += ' --comprehensive ' | |
94 if args.merge_non_cpg: | |
95 additional_opts += ' --merge_non_CpG ' | |
96 if args.splitting_report: | |
97 additional_opts += ' --report ' | |
98 if args.cytosine_report: | |
99 tmp_genome_dir = build_genome_dir(args.genome_file) | |
100 if args.cx_context: | |
101 additional_opts += ' --bedgraph --CX_context --cytosine_report --CX_context --genome_folder %s ' % tmp_genome_dir | |
102 else: | |
103 additional_opts += ' --bedgraph --cytosine_report --genome_folder %s ' % tmp_genome_dir | |
104 | |
105 | |
106 #detect BAM file, use samtools view if it is a bam file | |
107 f = open (args.infile, 'rb') | |
108 sig = f.read(4) | |
109 f.close() | |
110 if sig == '\x1f\x8b\x08\x04' : | |
111 #cmd = cmd % (output_dir, additional_opts, '-') | |
112 new_infilename = os.path.join(output_dir, 'submitted_bs_mapped_reads.sam') | |
113 new_sam = open(new_infilename, 'wb') | |
114 tmp_err = tempfile.NamedTemporaryFile().name | |
115 tmp_stderr = open(tmp_err, 'wb') | |
116 proc = subprocess.Popen(['samtools', 'view', args.infile], stdout=new_sam, stderr=tmp_stderr) | |
117 new_sam.close() | |
118 tmp_stderr.close() | |
119 if os.stat(tmp_err).st_size != 0: | |
120 tmp_sterr = open(tmp_err, 'rb') | |
121 error_msg = tmp_sterr.read() | |
122 tmp_sterr.close() | |
123 sys.exit("error: %s" % error_msg) | |
124 cmd = cmd % (output_dir, additional_opts, new_infilename) | |
125 else: | |
126 cmd = cmd % (output_dir, additional_opts, args.infile) | |
127 | |
128 # Run | |
129 try: | |
130 tmp_out = tempfile.NamedTemporaryFile().name | |
131 tmp_stdout = open( tmp_out, 'wb' ) | |
132 tmp_err = tempfile.NamedTemporaryFile().name | |
133 tmp_stderr = open( tmp_err, 'wb' ) | |
134 proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) | |
135 returncode = proc.wait() | |
136 tmp_stderr.close() | |
137 # get stderr, allowing for case where it's very large | |
138 tmp_stderr = open( tmp_err, 'rb' ) | |
139 stderr = '' | |
140 buffsize = 1048576 | |
141 try: | |
142 while True: | |
143 stderr += tmp_stderr.read( buffsize ) | |
144 if not stderr or len( stderr ) % buffsize != 0: | |
145 break | |
146 except OverflowError: | |
147 pass | |
148 tmp_stdout.close() | |
149 tmp_stderr.close() | |
150 if returncode != 0: | |
151 raise Exception, stderr | |
152 | |
153 # TODO: look for errors in program output. | |
154 except Exception, e: | |
155 stop_err( 'Error in bismark methylation extractor:\n' + str( e ) ) | |
156 | |
157 # collect and copy output files | |
158 if args.compress: | |
159 zipper(output_dir, args.compress) | |
160 | |
161 # cytosine report | |
162 if args.cytosine_report: | |
163 if args.cx_context: | |
164 shutil.move( glob(os.path.join( output_dir, '*CX_report.txt'))[0], args.cytosine_report ) | |
165 else: | |
166 shutil.move(glob(os.path.join(output_dir, '*CpG_report.txt'))[0], args.cytosine_report) | |
167 # splitting report | |
168 if args.splitting_report: | |
169 shutil.move( glob(os.path.join( output_dir, '*_splitting_report.txt'))[0], args.splitting_report ) | |
170 if args.mbias_report: | |
171 shutil.move(glob(os.path.join(output_dir, '*M-bias.txt'))[0], args.mbias_report) | |
172 | |
173 | |
174 #Clean up temp dirs | |
175 if os.path.exists( output_dir ): | |
176 shutil.rmtree( output_dir ) | |
177 if args.cytosine_report: | |
178 if os.path.exists( tmp_genome_dir ): | |
179 shutil.rmtree( tmp_genome_dir ) | |
180 | |
181 if __name__=="__main__": __main__() |