Mercurial > repos > bgruening > bismark
comparison bismark_wrapper.py @ 21:120b7b35e442 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
author | bgruening |
---|---|
date | Thu, 22 Apr 2021 17:05:46 +0000 |
parents | bb74e17e47d7 |
children |
comparison
equal
deleted
inserted
replaced
20:ff6ee551b153 | 21:120b7b35e442 |
---|---|
16 logger.critical(msg) | 16 logger.critical(msg) |
17 sys.exit(1) | 17 sys.exit(1) |
18 | 18 |
19 | 19 |
20 def log_subprocess_output(logger, pipe): | 20 def log_subprocess_output(logger, pipe): |
21 for line in iter(pipe.readline, b''): | 21 for line in iter(pipe.readline, b""): |
22 logger.debug(line.decode().rstrip()) | 22 logger.debug(line.decode().rstrip()) |
23 | 23 |
24 | 24 |
25 def __main__(): | 25 def __main__(): |
26 # Parse Command Line | 26 # Parse Command Line |
27 parser = argparse.ArgumentParser(description='Wrapper for the bismark bisulfite mapper.') | 27 parser = argparse.ArgumentParser( |
28 parser.add_argument('-p', '--num-threads', dest='num_threads', | 28 description="Wrapper for the bismark bisulfite mapper." |
29 type=int, default=4, help='Use this many threads to align reads. The default is 4.') | 29 ) |
30 parser.add_argument( | |
31 "-p", | |
32 "--num-threads", | |
33 dest="num_threads", | |
34 type=int, | |
35 default=4, | |
36 help="Use this many threads to align reads. The default is 4.", | |
37 ) | |
30 | 38 |
31 # input options | 39 # input options |
32 parser.add_argument('--own-file', dest='own_file', help='') | 40 parser.add_argument("--own-file", dest="own_file", help="") |
33 parser.add_argument('-D', '--indexes-path', dest='index_path', | 41 parser.add_argument( |
34 help='Indexes directory; location of .ebwt and .fa files.') | 42 "-D", |
35 parser.add_argument('-O', '--output', dest='output') | 43 "--indexes-path", |
36 | 44 dest="index_path", |
37 parser.add_argument('--output-report-file', dest='output_report_file') | 45 help="Indexes directory; location of .ebwt and .fa files.", |
38 parser.add_argument('--suppress-header', dest='suppress_header', action="store_true") | 46 ) |
39 | 47 parser.add_argument("-O", "--output", dest="output") |
40 parser.add_argument('--mate-paired', dest='mate_paired', action='store_true', help='Reads are mate-paired', | 48 |
41 default=False) | 49 parser.add_argument("--output-report-file", dest="output_report_file") |
42 | 50 parser.add_argument( |
43 parser.add_argument('-1', '--mate1', dest='mate1', | 51 "--suppress-header", dest="suppress_header", action="store_true" |
44 help='The forward reads file in Sanger FASTQ or FASTA format.') | 52 ) |
45 parser.add_argument('-2', '--mate2', dest='mate2', | 53 |
46 help='The reverse reads file in Sanger FASTQ or FASTA format.') | 54 parser.add_argument( |
47 parser.add_argument('--sort-bam', dest='sort_bam', action="store_true") | 55 "--mate-paired", |
48 | 56 dest="mate_paired", |
49 parser.add_argument('--output-unmapped-reads', dest='output_unmapped_reads', | 57 action="store_true", |
50 help='Additional output file with unmapped reads (single-end).') | 58 help="Reads are mate-paired", |
51 parser.add_argument('--output-unmapped-reads-l', dest='output_unmapped_reads_l', | 59 default=False, |
52 help='File name for unmapped reads (left, paired-end).') | 60 ) |
53 parser.add_argument('--output-unmapped-reads-r', dest='output_unmapped_reads_r', | 61 |
54 help='File name for unmapped reads (right, paired-end).') | 62 parser.add_argument( |
55 | 63 "-1", |
56 parser.add_argument('--output-suppressed-reads', dest='output_suppressed_reads', | 64 "--mate1", |
57 help='Additional output file with suppressed reads (single-end).') | 65 dest="mate1", |
58 parser.add_argument('--output-suppressed-reads-l', dest='output_suppressed_reads_l', | 66 help="The forward reads file in Sanger FASTQ or FASTA format.", |
59 help='File name for suppressed reads (left, paired-end).') | 67 ) |
60 parser.add_argument('--output-suppressed-reads-r', dest='output_suppressed_reads_r', | 68 parser.add_argument( |
61 help='File name for suppressed reads (right, paired-end).') | 69 "-2", |
62 parser.add_argument('--stdout', dest='output_stdout', | 70 "--mate2", |
63 help='File name for the standard output of bismark.') | 71 dest="mate2", |
64 | 72 help="The reverse reads file in Sanger FASTQ or FASTA format.", |
65 parser.add_argument('--single-paired', dest='single_paired', | 73 ) |
66 help='The single-end reads file in Sanger FASTQ or FASTA format.') | 74 parser.add_argument("--sort-bam", dest="sort_bam", action="store_true") |
67 | 75 |
68 parser.add_argument('--fastq', action='store_true', help='Query filetype is in FASTQ format') | 76 parser.add_argument( |
69 parser.add_argument('--fasta', action='store_true', help='Query filetype is in FASTA format') | 77 "--output-unmapped-reads", |
70 parser.add_argument('--phred64-quals', dest='phred64', action="store_true") | 78 dest="output_unmapped_reads", |
71 parser.add_argument('--non-directional', dest='non_directional', action="store_true") | 79 help="Additional output file with unmapped reads (single-end).", |
72 parser.add_argument('--pbat', dest='pbat', action="store_true") | 80 ) |
73 | 81 parser.add_argument( |
74 parser.add_argument('--skip-reads', dest='skip_reads', type=int) | 82 "--output-unmapped-reads-l", |
75 parser.add_argument('--score-min', dest='score_min', type=str) | 83 dest="output_unmapped_reads_l", |
76 parser.add_argument('--qupto', type=int) | 84 help="File name for unmapped reads (left, paired-end).", |
85 ) | |
86 parser.add_argument( | |
87 "--output-unmapped-reads-r", | |
88 dest="output_unmapped_reads_r", | |
89 help="File name for unmapped reads (right, paired-end).", | |
90 ) | |
91 | |
92 parser.add_argument( | |
93 "--output-suppressed-reads", | |
94 dest="output_suppressed_reads", | |
95 help="Additional output file with suppressed reads (single-end).", | |
96 ) | |
97 parser.add_argument( | |
98 "--output-suppressed-reads-l", | |
99 dest="output_suppressed_reads_l", | |
100 help="File name for suppressed reads (left, paired-end).", | |
101 ) | |
102 parser.add_argument( | |
103 "--output-suppressed-reads-r", | |
104 dest="output_suppressed_reads_r", | |
105 help="File name for suppressed reads (right, paired-end).", | |
106 ) | |
107 parser.add_argument( | |
108 "--stdout", | |
109 dest="output_stdout", | |
110 help="File name for the standard output of bismark.", | |
111 ) | |
112 | |
113 parser.add_argument( | |
114 "--single-paired", | |
115 dest="single_paired", | |
116 help="The single-end reads file in Sanger FASTQ or FASTA format.", | |
117 ) | |
118 | |
119 parser.add_argument( | |
120 "--fastq", action="store_true", help="Query filetype is in FASTQ format" | |
121 ) | |
122 parser.add_argument( | |
123 "--fasta", action="store_true", help="Query filetype is in FASTA format" | |
124 ) | |
125 parser.add_argument("--phred64-quals", dest="phred64", action="store_true") | |
126 parser.add_argument( | |
127 "--non_directional", dest="non_directional", action="store_true" | |
128 ) | |
129 parser.add_argument("--pbat", dest="pbat", action="store_true") | |
130 | |
131 parser.add_argument("--skip-reads", dest="skip_reads", type=int) | |
132 parser.add_argument("--score-min", dest="score_min", type=str) | |
133 parser.add_argument("--qupto", type=int) | |
77 | 134 |
78 # paired end options | 135 # paired end options |
79 parser.add_argument('-I', '--minins', dest='min_insert') | 136 parser.add_argument("-I", "--minins", dest="min_insert") |
80 parser.add_argument('-X', '--maxins', dest='max_insert') | 137 parser.add_argument("-X", "--maxins", dest="max_insert") |
81 parser.add_argument('--no-mixed', dest='no_mixed', action="store_true") | 138 parser.add_argument("--no-mixed", dest="no_mixed", action="store_true") |
82 parser.add_argument('--no-discordant', dest='no_discordant', action="store_true") | 139 parser.add_argument("--no-discordant", dest="no_discordant", action="store_true") |
83 | 140 |
84 # parse general options | 141 # parse general options |
85 # default 20 | 142 # default 20 |
86 parser.add_argument('--seed-len', dest='seed_len', type=int) | 143 parser.add_argument("--seed-len", dest="seed_len", type=int) |
87 # default 15 | 144 # default 15 |
88 parser.add_argument('--seed-extention-attempts', dest='seed_extention_attempts', type=int) | 145 parser.add_argument( |
146 "--seed-extention-attempts", dest="seed_extention_attempts", type=int | |
147 ) | |
89 # default 0 | 148 # default 0 |
90 parser.add_argument('--seed-mismatches', dest='seed_mismatches', type=int) | 149 parser.add_argument("--seed-mismatches", dest="seed_mismatches", type=int) |
91 # default 2 | 150 # default 2 |
92 parser.add_argument('--max-reseed', dest='max_reseed', type=int) | 151 parser.add_argument("--max-reseed", dest="max_reseed", type=int) |
93 | |
94 | 152 |
95 """ | 153 """ |
96 The number of megabytes of memory a given thread is given to store path | 154 The number of megabytes of memory a given thread is given to store path |
97 descriptors in --best mode. Best-first search must keep track of many paths | 155 descriptors in --best mode. Best-first search must keep track of many paths |
98 at once to ensure it is always extending the path with the lowest cumulative | 156 at once to ensure it is always extending the path with the lowest cumulative |
99 cost. Bowtie tries to minimize the memory impact of the descriptors, but | 157 cost. Bowtie tries to minimize the memory impact of the descriptors, but |
100 they can still grow very large in some cases. If you receive an error message | 158 they can still grow very large in some cases. If you receive an error message |
101 saying that chunk memory has been exhausted in --best mode, try adjusting | 159 saying that chunk memory has been exhausted in --best mode, try adjusting |
102 this parameter up to dedicate more memory to the descriptors. Default: 512. | 160 this parameter up to dedicate more memory to the descriptors. Default: 512. |
103 """ | 161 """ |
104 parser.add_argument('--chunkmbs', type=int, default=512) | 162 parser.add_argument("--chunkmbs", type=int, default=512) |
105 | 163 |
106 args = parser.parse_args() | 164 args = parser.parse_args() |
107 | 165 |
108 logger = logging.getLogger('bismark_wrapper') | 166 logger = logging.getLogger("bismark_wrapper") |
109 logger.setLevel(logging.DEBUG) | 167 logger.setLevel(logging.DEBUG) |
110 ch = logging.StreamHandler(sys.stdout) | 168 ch = logging.StreamHandler(sys.stdout) |
111 if args.output_stdout: | 169 if args.output_stdout: |
112 ch.setLevel(logging.WARNING) | 170 ch.setLevel(logging.WARNING) |
113 handler = logging.FileHandler(args.output_stdout) | 171 handler = logging.FileHandler(args.output_stdout) |
119 | 177 |
120 # Create bismark index if necessary. | 178 # Create bismark index if necessary. |
121 index_dir = "" | 179 index_dir = "" |
122 tmp_index_dir = None | 180 tmp_index_dir = None |
123 if args.own_file: | 181 if args.own_file: |
124 logger.info("Create a temporary index with the offered files from the user. " | 182 logger.info( |
125 "Utilizing the script: bismark_genome_preparation") | 183 "Create a temporary index with the offered files from the user. " |
184 "Utilizing the script: bismark_genome_preparation" | |
185 ) | |
126 tmp_index_dir = tempfile.mkdtemp() | 186 tmp_index_dir = tempfile.mkdtemp() |
127 index_path = os.path.join(tmp_index_dir, '.'.join(os.path.split(args.own_file)[1].split('.')[:-1])) | 187 index_path = os.path.join( |
188 tmp_index_dir, ".".join(os.path.split(args.own_file)[1].split(".")[:-1]) | |
189 ) | |
128 try: | 190 try: |
129 # Create a hard link pointing to args.own_file named 'index_path'.fa. | 191 # Create a hard link pointing to args.own_file named 'index_path'.fa. |
130 os.symlink(args.own_file, index_path + '.fa') | 192 os.symlink(args.own_file, index_path + ".fa") |
131 except Exception as e: | 193 except Exception as e: |
132 if os.path.exists(tmp_index_dir): | 194 if os.path.exists(tmp_index_dir): |
133 shutil.rmtree(tmp_index_dir) | 195 shutil.rmtree(tmp_index_dir) |
134 stop_err(logger, 'Error in linking the reference database!\n%s' % e) | 196 stop_err(logger, "Error in linking the reference database!\n%s" % e) |
135 # bismark_genome_preparation needs the complete path to the folder in which the database is stored | 197 # bismark_genome_preparation needs the complete path to the folder in which the database is stored |
136 cmd_index = ['bismark_genome_preparation', '--bowtie2', tmp_index_dir] | 198 cmd_index = ["bismark_genome_preparation", "--bowtie2", tmp_index_dir] |
137 try: | 199 try: |
138 logger.info("Generating index with: '%s'", " ".join(cmd_index)) | 200 logger.info("Generating index with: '%s'", " ".join(cmd_index)) |
139 process = subprocess.Popen(cmd_index, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) | 201 process = subprocess.Popen( |
202 cmd_index, stdout=subprocess.PIPE, stderr=subprocess.STDOUT | |
203 ) | |
140 with process.stdout: | 204 with process.stdout: |
141 log_subprocess_output(logger, process.stdout) | 205 log_subprocess_output(logger, process.stdout) |
142 exitcode = process.wait() | 206 exitcode = process.wait() |
143 if exitcode != 0: | 207 if exitcode != 0: |
144 raise Exception(process.stderr) | 208 raise Exception(process.stderr) |
145 except Exception as e: | 209 except Exception as e: |
146 if os.path.exists(tmp_index_dir): | 210 if os.path.exists(tmp_index_dir): |
147 shutil.rmtree(tmp_index_dir) | 211 shutil.rmtree(tmp_index_dir) |
148 stop_err(logger, 'Error indexing reference sequence!\n%s' % e) | 212 stop_err(logger, "Error indexing reference sequence!\n%s" % e) |
149 index_dir = tmp_index_dir | 213 index_dir = tmp_index_dir |
150 else: | 214 else: |
151 # bowtie path is the path to the index directory and the first path of the index file name | 215 # bowtie path is the path to the index directory and the first path of the index file name |
152 index_dir = os.path.dirname(args.index_path) | 216 index_dir = os.path.dirname(args.index_path) |
153 | 217 |
154 # Build bismark command | 218 # Build bismark command |
155 tmp_bismark_dir = tempfile.mkdtemp() | 219 tmp_bismark_dir = tempfile.mkdtemp() |
156 output_dir = os.path.join(tmp_bismark_dir, 'results') | 220 output_dir = os.path.join(tmp_bismark_dir, "results") |
157 cmd = ['bismark', '--bam', '--temp_dir', tmp_bismark_dir, '-o', output_dir, '--quiet'] | 221 cmd = [ |
222 "bismark", | |
223 "--bam", | |
224 "--temp_dir", | |
225 tmp_bismark_dir, | |
226 "-o", | |
227 output_dir, | |
228 "--quiet", | |
229 ] | |
158 if not args.pbat: | 230 if not args.pbat: |
159 cmd.append('--gzip') | 231 cmd.append("--gzip") |
160 | 232 |
161 if args.fasta: | 233 if args.fasta: |
162 # the query input files (specified as mate1,mate2 or singles) are FastA | 234 # the query input files (specified as mate1,mate2 or singles) are FastA |
163 cmd.append('--fasta') | 235 cmd.append("--fasta") |
164 elif args.fastq: | 236 elif args.fastq: |
165 cmd.append('--fastq') | 237 cmd.append("--fastq") |
166 | 238 |
167 # alignment options | 239 # alignment options |
168 if args.num_threads > 2: | 240 if args.num_threads > 2: |
169 # divide num_threads by 2 here since bismark will spawn 2 jobs with -p threads each | 241 # divide num_threads by 2 here since bismark will spawn 2 jobs with -p threads each |
170 cmd.extend(['-p', str(int(math.floor(args.num_threads / 2)))]) | 242 cmd.extend(["-p", str(int(math.floor(args.num_threads / 2)))]) |
171 if args.seed_mismatches: | 243 if args.seed_mismatches: |
172 cmd.extend(['-N', str(args.seed_mismatches)]) | 244 cmd.extend(["-N", str(args.seed_mismatches)]) |
173 if args.seed_len: | 245 if args.seed_len: |
174 cmd.extend(['-L', str(args.seed_len)]) | 246 cmd.extend(["-L", str(args.seed_len)]) |
175 if args.seed_extention_attempts: | 247 if args.seed_extention_attempts: |
176 cmd.extend(['-D', str(args.seed_extention_attempts)]) | 248 cmd.extend(["-D", str(args.seed_extention_attempts)]) |
177 if args.max_reseed: | 249 if args.max_reseed: |
178 cmd.extend(['-R', str(args.max_reseed)]) | 250 cmd.extend(["-R", str(args.max_reseed)]) |
179 if args.no_discordant: | 251 if args.no_discordant: |
180 cmd.append('--no-discordant') | 252 cmd.append("--no-discordant") |
181 if args.no_mixed: | 253 if args.no_mixed: |
182 cmd.append('--no-mixed') | 254 cmd.append("--no-mixed") |
183 if args.skip_reads: | 255 if args.skip_reads: |
184 cmd.extend(['--skip', str(args.skip_reads)]) | 256 cmd.extend(["--skip", str(args.skip_reads)]) |
185 if args.score_min: | 257 if args.score_min: |
186 cmd.extend(['--score_min', str(args.score_min)]) | 258 cmd.extend(["--score_min", str(args.score_min)]) |
187 if args.qupto: | 259 if args.qupto: |
188 cmd.extend(['--upto', 'args.qupto']) | 260 cmd.extend(["--upto", "args.qupto"]) |
189 if args.phred64: | 261 if args.phred64: |
190 cmd.append('--phred64-quals') | 262 cmd.append("--phred64-quals") |
191 if args.non_directional: | 263 if args.non_directional: |
192 cmd.append('--non-directional') | 264 cmd.append("--non_directional") |
193 if args.pbat: | 265 if args.pbat: |
194 cmd.append('--pbat') | 266 cmd.append("--pbat") |
195 if args.suppress_header: | 267 if args.suppress_header: |
196 cmd.append('--sam-no-hd') | 268 cmd.append("--sam-no-hd") |
197 if args.output_unmapped_reads or (args.output_unmapped_reads_l and args.output_unmapped_reads_r): | 269 if args.output_unmapped_reads or ( |
198 cmd.append('--un') | 270 args.output_unmapped_reads_l and args.output_unmapped_reads_r |
199 if args.output_suppressed_reads or (args.output_suppressed_reads_l and args.output_suppressed_reads_r): | 271 ): |
200 cmd.append('--ambiguous') | 272 cmd.append("--un") |
273 if args.output_suppressed_reads or ( | |
274 args.output_suppressed_reads_l and args.output_suppressed_reads_r | |
275 ): | |
276 cmd.append("--ambiguous") | |
201 | 277 |
202 cmd.append(index_dir) | 278 cmd.append(index_dir) |
203 # Set up the reads | 279 # Set up the reads |
204 if args.mate_paired: | 280 if args.mate_paired: |
205 # paired-end reads library | 281 # paired-end reads library |
206 cmd.extend(['-1', args.mate1, '-2', args.mate2, '-I', str(args.min_insert), '-X', str(args.max_insert)]) | 282 cmd.extend( |
283 [ | |
284 "-1", | |
285 args.mate1, | |
286 "-2", | |
287 args.mate2, | |
288 "-I", | |
289 str(args.min_insert), | |
290 "-X", | |
291 str(args.max_insert), | |
292 ] | |
293 ) | |
207 else: | 294 else: |
208 # single paired reads library | 295 # single paired reads library |
209 cmd.append(args.single_paired) | 296 cmd.append(args.single_paired) |
210 | 297 |
211 # Run | 298 # Run |
212 try: | 299 try: |
213 logger.info("Running bismark with: '%s'", " ".join(cmd)) | 300 logger.info("Running bismark with: '%s'", " ".join(cmd)) |
214 process = subprocess.Popen(args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) | 301 process = subprocess.Popen( |
302 args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT | |
303 ) | |
215 with process.stdout: | 304 with process.stdout: |
216 log_subprocess_output(logger, process.stdout) | 305 log_subprocess_output(logger, process.stdout) |
217 exitcode = process.wait() | 306 exitcode = process.wait() |
218 if exitcode != 0: | 307 if exitcode != 0: |
219 raise Exception(process.stderr) | 308 raise Exception(process.stderr) |
220 except Exception as e: | 309 except Exception as e: |
221 stop_err(logger, 'Error in running bismark!\n%s' % e) | 310 stop_err(logger, "Error in running bismark!\n%s" % e) |
222 | 311 |
223 # collect and copy output files | 312 # collect and copy output files |
224 if args.output_report_file: | 313 if args.output_report_file: |
225 output_report_file = open(args.output_report_file, 'w+') | 314 output_report_file = open(args.output_report_file, "w+") |
226 for line in fileinput.input(glob(os.path.join(output_dir, '*report.txt'))): | 315 for line in fileinput.input(glob(os.path.join(output_dir, "*report.txt"))): |
227 output_report_file.write(line) | 316 output_report_file.write(line) |
228 output_report_file.close() | 317 output_report_file.close() |
229 | 318 |
230 if args.output_suppressed_reads: | 319 if args.output_suppressed_reads: |
231 if glob(os.path.join(output_dir, '*ambiguous_reads.*')): | 320 if glob(os.path.join(output_dir, "*ambiguous_reads.*")): |
232 shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads.*'))[0], args.output_suppressed_reads) | 321 shutil.move( |
322 glob(os.path.join(output_dir, "*ambiguous_reads.*"))[0], | |
323 args.output_suppressed_reads, | |
324 ) | |
233 if args.output_suppressed_reads_l: | 325 if args.output_suppressed_reads_l: |
234 if glob(os.path.join(output_dir, '*ambiguous_reads_1.*')): | 326 if glob(os.path.join(output_dir, "*ambiguous_reads_1.*")): |
235 shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_1.*'))[0], args.output_suppressed_reads_l) | 327 shutil.move( |
328 glob(os.path.join(output_dir, "*ambiguous_reads_1.*"))[0], | |
329 args.output_suppressed_reads_l, | |
330 ) | |
236 if args.output_suppressed_reads_r: | 331 if args.output_suppressed_reads_r: |
237 if glob(os.path.join(output_dir, '*ambiguous_reads_2.*')): | 332 if glob(os.path.join(output_dir, "*ambiguous_reads_2.*")): |
238 shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_2.*'))[0], args.output_suppressed_reads_r) | 333 shutil.move( |
334 glob(os.path.join(output_dir, "*ambiguous_reads_2.*"))[0], | |
335 args.output_suppressed_reads_r, | |
336 ) | |
239 | 337 |
240 if args.output_unmapped_reads: | 338 if args.output_unmapped_reads: |
241 if glob(os.path.join(output_dir, '*unmapped_reads.*')): | 339 if glob(os.path.join(output_dir, "*unmapped_reads.*")): |
242 shutil.move(glob(os.path.join(output_dir, '*unmapped_reads.*'))[0], args.output_unmapped_reads) | 340 shutil.move( |
341 glob(os.path.join(output_dir, "*unmapped_reads.*"))[0], | |
342 args.output_unmapped_reads, | |
343 ) | |
243 if args.output_unmapped_reads_l: | 344 if args.output_unmapped_reads_l: |
244 if glob(os.path.join(output_dir, '*unmapped_reads_1.*')): | 345 if glob(os.path.join(output_dir, "*unmapped_reads_1.*")): |
245 shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_1.*'))[0], args.output_unmapped_reads_l) | 346 shutil.move( |
347 glob(os.path.join(output_dir, "*unmapped_reads_1.*"))[0], | |
348 args.output_unmapped_reads_l, | |
349 ) | |
246 if args.output_unmapped_reads_r: | 350 if args.output_unmapped_reads_r: |
247 if glob(os.path.join(output_dir, '*unmapped_reads_2.*')): | 351 if glob(os.path.join(output_dir, "*unmapped_reads_2.*")): |
248 shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_2.*'))[0], args.output_unmapped_reads_r) | 352 shutil.move( |
353 glob(os.path.join(output_dir, "*unmapped_reads_2.*"))[0], | |
354 args.output_unmapped_reads_r, | |
355 ) | |
249 | 356 |
250 try: | 357 try: |
251 # merge all bam files | 358 # merge all bam files |
252 tmp_res = tempfile.NamedTemporaryFile(dir=output_dir).name | 359 tmp_res = tempfile.NamedTemporaryFile(dir=output_dir).name |
253 | 360 |
254 bam_files = glob(os.path.join(output_dir, '*.bam')) | 361 bam_files = glob(os.path.join(output_dir, "*.bam")) |
255 if len(bam_files) > 1: | 362 if len(bam_files) > 1: |
256 cmd = ['samtools', 'merge', '-@', str(args.num_threads), '-f', tmp_res, ' '.join(bam_files)] | 363 cmd = [ |
364 "samtools", | |
365 "merge", | |
366 "-@", | |
367 str(args.num_threads), | |
368 "-f", | |
369 tmp_res | |
370 ] | |
371 [cmd.append(x) for x in bam_files] | |
257 logger.info("Merging bams with: '%s'", cmd) | 372 logger.info("Merging bams with: '%s'", cmd) |
258 process = subprocess.Popen(args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) | 373 process = subprocess.Popen( |
374 args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT | |
375 ) | |
259 with process.stdout: | 376 with process.stdout: |
260 log_subprocess_output(logger, process.stdout) | 377 log_subprocess_output(logger, process.stdout) |
261 exitcode = process.wait() | 378 exitcode = process.wait() |
262 if exitcode != 0: | 379 if exitcode != 0: |
263 raise Exception(process.stderr) | 380 raise Exception(process.stderr) |
266 | 383 |
267 bam_path = "%s" % tmp_res | 384 bam_path = "%s" % tmp_res |
268 | 385 |
269 if os.path.exists(bam_path): | 386 if os.path.exists(bam_path): |
270 if args.sort_bam: | 387 if args.sort_bam: |
271 cmd = ['samtools', 'sort', '-@', str(args.num_threads), bam_path, 'sorted_bam'] | 388 cmd = [ |
389 "samtools", | |
390 "sort", | |
391 "-@", | |
392 str(args.num_threads), | |
393 bam_path, | |
394 "sorted_bam", | |
395 ] | |
272 logger.info("Sorting bam with: '%s'", cmd) | 396 logger.info("Sorting bam with: '%s'", cmd) |
273 process = subprocess.Popen(args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) | 397 process = subprocess.Popen( |
398 args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT | |
399 ) | |
274 with process.stdout: | 400 with process.stdout: |
275 log_subprocess_output(logger, process.stdout) | 401 log_subprocess_output(logger, process.stdout) |
276 exitcode = process.wait() | 402 exitcode = process.wait() |
277 if exitcode != 0: | 403 if exitcode != 0: |
278 raise Exception(process.stderr) | 404 raise Exception(process.stderr) |
279 shutil.move('sorted_bam.bam', args.output) | 405 shutil.move("sorted_bam.bam", args.output) |
280 else: | 406 else: |
281 shutil.move(bam_path, args.output) | 407 shutil.move(bam_path, args.output) |
282 else: | 408 else: |
283 stop_err(logger, 'BAM file no found:\n%s' % bam_path) | 409 stop_err(logger, "BAM file no found:\n%s" % bam_path) |
284 | 410 |
285 except Exception as e: | 411 except Exception as e: |
286 stop_err(logger, 'Error in merging bam files!\n%s' % e) | 412 stop_err(logger, "Error in merging bam files!\n%s" % e) |
287 | 413 |
288 # Clean up temp dirs | 414 # Clean up temp dirs |
289 if tmp_index_dir and os.path.exists(tmp_index_dir): | 415 if tmp_index_dir and os.path.exists(tmp_index_dir): |
290 shutil.rmtree(tmp_index_dir) | 416 shutil.rmtree(tmp_index_dir) |
291 if os.path.exists(tmp_bismark_dir): | 417 if os.path.exists(tmp_bismark_dir): |
292 shutil.rmtree(tmp_bismark_dir) | 418 shutil.rmtree(tmp_bismark_dir) |
293 if os.path.exists(output_dir): | 419 if os.path.exists(output_dir): |
294 shutil.rmtree(output_dir) | 420 shutil.rmtree(output_dir) |
295 | 421 |
296 | 422 |
297 if __name__ == "__main__": __main__() | 423 if __name__ == "__main__": |
424 __main__() |