Mercurial > repos > richard-burhans > batched_lastz
comparison run_lastz_tarball.py @ 7:4cd7884635c2 draft
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/batched_lastz commit 869a01abe21b6283d3c83da38fc68761c2e94ba1
author | richard-burhans |
---|---|
date | Tue, 30 Jul 2024 19:53:53 +0000 |
parents | 34d51e66256c |
children | 3a67d287d19f |
comparison
equal
deleted
inserted
replaced
6:34d51e66256c | 7:4cd7884635c2 |
---|---|
89 def __init__(self, pathname: str, debug: bool = False) -> None: | 89 def __init__(self, pathname: str, debug: bool = False) -> None: |
90 self.pathname = pathname | 90 self.pathname = pathname |
91 self.debug = debug | 91 self.debug = debug |
92 self.tarfile = None | 92 self.tarfile = None |
93 self.commands: typing.List[typing.Dict[str, typing.Any]] = [] | 93 self.commands: typing.List[typing.Dict[str, typing.Any]] = [] |
94 self.format_name = "tabular" | |
94 self._extract() | 95 self._extract() |
95 self._load_commands() | 96 self._load_commands() |
97 self._load_format() | |
96 | 98 |
97 def batch_commands(self) -> typing.Iterator[typing.Dict[str, typing.Any]]: | 99 def batch_commands(self) -> typing.Iterator[typing.Dict[str, typing.Any]]: |
98 for command in self.commands: | 100 for command in self.commands: |
99 yield command | 101 yield command |
102 | |
103 def final_output_format(self) -> str: | |
104 return self.format_name | |
105 | |
106 def _extract(self) -> None: | |
107 try: | |
108 self.tarball = tarfile.open( | |
109 name=self.pathname, mode="r:*", format=tarfile.GNU_FORMAT | |
110 ) | |
111 except FileNotFoundError: | |
112 sys.exit(f"ERROR: unable to find input tarball: {self.pathname}") | |
113 except tarfile.ReadError: | |
114 sys.exit(f"ERROR: error reading input tarball: {self.pathname}") | |
115 | |
116 begin = time.perf_counter() | |
117 self.tarball.extractall(filter="data") | |
118 self.tarball.close() | |
119 elapsed = time.perf_counter() - begin | |
120 | |
121 if self.debug: | |
122 print( | |
123 f"Extracted tarball in {elapsed} seconds", file=sys.stderr, flush=True | |
124 ) | |
100 | 125 |
101 def _load_commands(self) -> None: | 126 def _load_commands(self) -> None: |
102 try: | 127 try: |
103 f = open("galaxy/commands.json") | 128 f = open("galaxy/commands.json") |
104 except FileNotFoundError: | 129 except FileNotFoundError: |
171 f"ERROR: unexpected json format in line in galaxy/commands.json: {self.pathname}" | 196 f"ERROR: unexpected json format in line in galaxy/commands.json: {self.pathname}" |
172 ) | 197 ) |
173 | 198 |
174 self.commands.append(command_dict) | 199 self.commands.append(command_dict) |
175 | 200 |
176 def _extract(self) -> None: | 201 def _load_format(self) -> None: |
177 try: | 202 try: |
178 self.tarball = tarfile.open( | 203 with open("galaxy/format.txt") as f: |
179 name=self.pathname, mode="r:*", format=tarfile.GNU_FORMAT | 204 format_name = f.readline() |
180 ) | 205 format_name = format_name.rstrip("\n") |
181 except FileNotFoundError: | 206 except FileNotFoundError: |
182 sys.exit(f"ERROR: unable to find input tarball: {self.pathname}") | 207 sys.exit( |
183 except tarfile.ReadError: | 208 f"ERROR: input tarball missing galaxy/format.txt: {self.pathname}" |
184 sys.exit(f"ERROR: error reading input tarball: {self.pathname}") | 209 ) |
185 | 210 |
186 begin = time.perf_counter() | 211 if format_name in ["bam", "maf"]: |
187 self.tarball.extractall(filter="data") | 212 self.format_name = format_name |
188 self.tarball.close() | 213 elif format_name == "differences": |
189 elapsed = time.perf_counter() - begin | 214 self.format_name = "interval" |
190 | |
191 if self.debug: | |
192 print( | |
193 f"Extracted tarball in {elapsed} seconds", file=sys.stderr, flush=True | |
194 ) | |
195 | 215 |
196 | 216 |
197 class TarRunner: | 217 class TarRunner: |
198 def __init__( | 218 def __init__( |
199 self, | 219 self, |
300 | 320 |
301 self._cleanup() | 321 self._cleanup() |
302 | 322 |
303 def _cleanup(self) -> None: | 323 def _cleanup(self) -> None: |
304 num_output_files = len(self.output_files.keys()) | 324 num_output_files = len(self.output_files.keys()) |
325 if num_output_files != 1: | |
326 sys.exit(f"ERROR: expecting a single output file, found {num_output_files}") | |
327 | |
328 final_output_format = self.batch_tar.final_output_format() | |
305 | 329 |
306 for file_type, file_list in self.output_files.items(): | 330 for file_type, file_list in self.output_files.items(): |
307 with open(f"output.{file_type}", "w") as ofh: | 331 with open(f"output.{final_output_format}", "w") as ofh: |
308 print("##maf version=1", file=ofh) | 332 if final_output_format == "maf": |
333 print("##maf version=1", file=ofh) | |
309 for filename in file_list: | 334 for filename in file_list: |
310 with open(f"galaxy/files/{filename}") as ifh: | 335 with open(f"galaxy/files/{filename}") as ifh: |
311 for line in ifh: | 336 for line in ifh: |
312 ofh.write(line) | 337 ofh.write(line) |
313 | 338 |
314 if num_output_files == 1: | 339 src_filename = f"output.{final_output_format}" |
315 file_type = list(self.output_files.keys())[0] | 340 shutil.copy2(src_filename, self.output_pathname) |
316 src_filename = f"output.{file_type}" | 341 |
317 shutil.copy2(src_filename, self.output_pathname) | 342 output_metadata = { |
343 "output": { | |
344 "ext": final_output_format, | |
345 } | |
346 } | |
347 | |
348 with open("galaxy.json", "w") as ofh: | |
349 json.dump(output_metadata, ofh) | |
318 | 350 |
319 | 351 |
320 def main() -> None: | 352 def main() -> None: |
321 if not hasattr(tarfile, "data_filter"): | 353 if not hasattr(tarfile, "data_filter"): |
322 sys.exit("ERROR: extracting may be unsafe; consider updating Python") | 354 sys.exit("ERROR: extracting may be unsafe; consider updating Python") |