comparison run_lastz_tarball.py @ 7:4cd7884635c2 draft

planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/batched_lastz commit 869a01abe21b6283d3c83da38fc68761c2e94ba1
author richard-burhans
date Tue, 30 Jul 2024 19:53:53 +0000
parents 34d51e66256c
children 3a67d287d19f
comparison
equal deleted inserted replaced
6:34d51e66256c 7:4cd7884635c2
89 def __init__(self, pathname: str, debug: bool = False) -> None: 89 def __init__(self, pathname: str, debug: bool = False) -> None:
90 self.pathname = pathname 90 self.pathname = pathname
91 self.debug = debug 91 self.debug = debug
92 self.tarfile = None 92 self.tarfile = None
93 self.commands: typing.List[typing.Dict[str, typing.Any]] = [] 93 self.commands: typing.List[typing.Dict[str, typing.Any]] = []
94 self.format_name = "tabular"
94 self._extract() 95 self._extract()
95 self._load_commands() 96 self._load_commands()
97 self._load_format()
96 98
97 def batch_commands(self) -> typing.Iterator[typing.Dict[str, typing.Any]]: 99 def batch_commands(self) -> typing.Iterator[typing.Dict[str, typing.Any]]:
98 for command in self.commands: 100 for command in self.commands:
99 yield command 101 yield command
102
103 def final_output_format(self) -> str:
104 return self.format_name
105
106 def _extract(self) -> None:
107 try:
108 self.tarball = tarfile.open(
109 name=self.pathname, mode="r:*", format=tarfile.GNU_FORMAT
110 )
111 except FileNotFoundError:
112 sys.exit(f"ERROR: unable to find input tarball: {self.pathname}")
113 except tarfile.ReadError:
114 sys.exit(f"ERROR: error reading input tarball: {self.pathname}")
115
116 begin = time.perf_counter()
117 self.tarball.extractall(filter="data")
118 self.tarball.close()
119 elapsed = time.perf_counter() - begin
120
121 if self.debug:
122 print(
123 f"Extracted tarball in {elapsed} seconds", file=sys.stderr, flush=True
124 )
100 125
101 def _load_commands(self) -> None: 126 def _load_commands(self) -> None:
102 try: 127 try:
103 f = open("galaxy/commands.json") 128 f = open("galaxy/commands.json")
104 except FileNotFoundError: 129 except FileNotFoundError:
171 f"ERROR: unexpected json format in line in galaxy/commands.json: {self.pathname}" 196 f"ERROR: unexpected json format in line in galaxy/commands.json: {self.pathname}"
172 ) 197 )
173 198
174 self.commands.append(command_dict) 199 self.commands.append(command_dict)
175 200
176 def _extract(self) -> None: 201 def _load_format(self) -> None:
177 try: 202 try:
178 self.tarball = tarfile.open( 203 with open("galaxy/format.txt") as f:
179 name=self.pathname, mode="r:*", format=tarfile.GNU_FORMAT 204 format_name = f.readline()
180 ) 205 format_name = format_name.rstrip("\n")
181 except FileNotFoundError: 206 except FileNotFoundError:
182 sys.exit(f"ERROR: unable to find input tarball: {self.pathname}") 207 sys.exit(
183 except tarfile.ReadError: 208 f"ERROR: input tarball missing galaxy/format.txt: {self.pathname}"
184 sys.exit(f"ERROR: error reading input tarball: {self.pathname}") 209 )
185 210
186 begin = time.perf_counter() 211 if format_name in ["bam", "maf"]:
187 self.tarball.extractall(filter="data") 212 self.format_name = format_name
188 self.tarball.close() 213 elif format_name == "differences":
189 elapsed = time.perf_counter() - begin 214 self.format_name = "interval"
190
191 if self.debug:
192 print(
193 f"Extracted tarball in {elapsed} seconds", file=sys.stderr, flush=True
194 )
195 215
196 216
197 class TarRunner: 217 class TarRunner:
198 def __init__( 218 def __init__(
199 self, 219 self,
300 320
301 self._cleanup() 321 self._cleanup()
302 322
303 def _cleanup(self) -> None: 323 def _cleanup(self) -> None:
304 num_output_files = len(self.output_files.keys()) 324 num_output_files = len(self.output_files.keys())
325 if num_output_files != 1:
326 sys.exit(f"ERROR: expecting a single output file, found {num_output_files}")
327
328 final_output_format = self.batch_tar.final_output_format()
305 329
306 for file_type, file_list in self.output_files.items(): 330 for file_type, file_list in self.output_files.items():
307 with open(f"output.{file_type}", "w") as ofh: 331 with open(f"output.{final_output_format}", "w") as ofh:
308 print("##maf version=1", file=ofh) 332 if final_output_format == "maf":
333 print("##maf version=1", file=ofh)
309 for filename in file_list: 334 for filename in file_list:
310 with open(f"galaxy/files/{filename}") as ifh: 335 with open(f"galaxy/files/{filename}") as ifh:
311 for line in ifh: 336 for line in ifh:
312 ofh.write(line) 337 ofh.write(line)
313 338
314 if num_output_files == 1: 339 src_filename = f"output.{final_output_format}"
315 file_type = list(self.output_files.keys())[0] 340 shutil.copy2(src_filename, self.output_pathname)
316 src_filename = f"output.{file_type}" 341
317 shutil.copy2(src_filename, self.output_pathname) 342 output_metadata = {
343 "output": {
344 "ext": final_output_format,
345 }
346 }
347
348 with open("galaxy.json", "w") as ofh:
349 json.dump(output_metadata, ofh)
318 350
319 351
320 def main() -> None: 352 def main() -> None:
321 if not hasattr(tarfile, "data_filter"): 353 if not hasattr(tarfile, "data_filter"):
322 sys.exit("ERROR: extracting may be unsafe; consider updating Python") 354 sys.exit("ERROR: extracting may be unsafe; consider updating Python")