Galaxy |

Changeset 0:103538753e81 (2024-04-30)

Next changeset 1:ad3554614aad (2024-04-30)

Commit message:
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/batched_lastz commit 7b119b432f721e228a73396d4e8f0d54350b0481

added:
batched_lastz.xml
macros.xml
run_lastz_tarball.py

diff -r 000000000000 -r 103538753e81 batched_lastz.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/batched_lastz.xml Tue Apr 30 21:06:58 2024 +0000

[

@@ -0,0 +1,20 @@
+<tool id="batched_lastz" name="Batched Lastz" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>: align batches of sequences</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        run_lastz_tarball.py '--input=$input' '--output=$output' '--parallel=\${GALAXY_SLOTS:-2}'
+    ]]></command>
+    <inputs>
+        <param argument="--tarball" type="data" format="tgz" label="Tarball"/>
+    </inputs>
+    <outputs>
+        <data name="output" label="Output"/>
+    </outputs>
+    <help><![CDATA[
+    TODO: Fill in help.
+    ]]></help>
+    <expand macro="citations"/>
+</tool>

diff -r 000000000000 -r 103538753e81 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Apr 30 21:06:58 2024 +0000

@@ -0,0 +1,26 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">lastz</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@TOOL_VERSION@">1.04.22</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.05</token>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+            @misc{
+                githublastz,
+                author = {Harris, Robert},
+                year = {2007},
+                title = {Improved pairwise alignment of genomic DNA},
+                publisher = {The Pennsylvania State University},
+                journal = {Ph. D. Thesis},
+                url = {http://www.bx.psu.edu/~rsharris/rsharris_phd_thesis_2007.pdf},
+                }
+            </citation>
+        </citations>
+    </xml>
+</macros>

diff -r 000000000000 -r 103538753e81 run_lastz_tarball.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/run_lastz_tarball.py Tue Apr 30 21:06:58 2024 +0000

[

b'@@ -0,0 +1,335 @@\n+#!/usr/bin/env python\n+\n+import argparse\n+import concurrent.futures\n+import json\n+import multiprocessing\n+import os\n+import queue\n+import re\n+import shutil\n+import sys\n+import subprocess\n+import tarfile\n+import tempfile\n+import typing\n+import time\n+\n+\n+lastz_output_format_regex = re.compile(\n+ r"^(?:axt\\+?|blastn|cigar|differences|general-?.+|lav|lav\\+text|maf[-+]?|none|paf(?::wfmash)?|rdotplot|sam-?|softsam-?|text)$",\n+ re.IGNORECASE,\n+)\n+\n+\n+# Specifies the output format: lav, lav+text, axt, axt+, maf, maf+, maf-, sam, softsam, sam-, softsam-, cigar, BLASTN, PAF, PAF:wfmash, differences, rdotplot, text, general[:<fields>], or general-[:<fields>].\n+# \xe2\x80\x91\xe2\x80\x91format=none can be used when no alignment output is desired.\n+\n+\n+def run_command(\n+ instance: int,\n+ input_queue: "queue.Queue[typing.Dict[str, typing.Any]]",\n+ output_queue: "queue.Queue[float]",\n+ debug: bool = False,\n+) -> None:\n+ os.chdir("galaxy/files")\n+\n+ while True:\n+ command_dict = input_queue.get()\n+\n+ if not command_dict:\n+ return\n+\n+ args = ["lastz"]\n+ args.extend(command_dict["args"])\n+\n+ stdin = command_dict["stdin"]\n+ if stdin is not None:\n+ stdin = open(stdin, "r")\n+\n+ stdout = command_dict["stdout"]\n+ if stdout is not None:\n+ stdout = open(stdout, "w")\n+\n+ stderr = command_dict["stderr"]\n+ if stderr is not None:\n+ stderr = open(stderr, "w")\n+\n+ begin = time.perf_counter()\n+ p = subprocess.run(args, stdin=stdin, stdout=stdout, stderr=stderr)\n+\n+ for var in [stdin, stdout, stderr]:\n+ if var is not None:\n+ var.close()\n+\n+ if p.returncode != 0:\n+ sys.exit(f"command failed: {\' \'.join(args)}")\n+ else:\n+ stderr = command_dict["stderr"]\n+ if stderr is not None:\n+ try:\n+ statinfo = os.stat(stderr, follow_symlinks=False)\n+ except:\n+ statinfo = None\n+\n+ if statinfo is None:\n+ sys.exit(f"unable to stat stderr file: {\' \'.join(args)}")\n+\n+ if statinfo.st_size != 0:\n+ sys.exit(f"stderr file is not empty: {\' \'.join(args)}")\n+\n+ elapsed = time.perf_counter() - begin\n+ output_queue.put(elapsed)\n+\n+ if debug:\n+ print(f"runtime {elapsed}", file=sys.stderr, flush=True)\n+\n+\n+class BatchTar:\n+ def __init__(self, pathname: str, debug: bool = False) -> None:\n+ self.pathname = pathname\n+ self.debug = debug\n+ self.tarfile = None\n+ self.commands: typing.List[typing.Dict[str, typing.Any]] = []\n+ self._extract()\n+ self._load_commands()\n+\n+ def batch_commands(self) -> typing.Iterator[typing.Dict[str, typing.Any]]:\n+ for command in self.commands:\n+ yield command\n+\n+ def _load_commands(self) -> None:\n+ try:\n+ f = open("galaxy/commands.json")\n+ except FileNotFoundError:\n+ sys.exit(\n+ f"ERROR: input tarball missing galaxy/commands.json: {self.pathname}"\n+ )\n+\n+ begin = time.perf_counter()\n+ for json_line in f:\n+ json_line = json_line.rstrip("\\n")\n+ try:\n+ command_dict = json.loads(json_line)\n+ except json.JSONDecodeError:\n+ sys.exit(\n+ f"ERROR: bad json line in galaxy/commands.json: {self.pathname}"\n+ )\n+\n+ self._load_command(command_dict)\n+\n+ f.close()\n+ elapsed = time.perf_counter() - begin\n+\n+ if self.debug:\n+ print(\n+ f"loaded {len(self.commands)} commands in {elapsed} seconds ",\n+ file=sys.stderr,\n+ flush=True,\n+ )\n+\n+ def _load_command(self, command_dict: typing.Dict[str, typing.Any]) -> None:\n+ # check command_dict stru'..b'.basename(f.name)\n+ f.close()\n+ command_dict["args"].append(f"--output={output_file}")\n+\n+ if output_format is None:\n+ output_format = "lav"\n+ command_dict["args"].append(f"--format={output_format}")\n+\n+ if not lastz_output_format_regex.match(output_format):\n+ sys.exit(f"ERROR: invalid output format: {output_format}")\n+\n+ self.output_file_format[output_file] = output_format\n+\n+ for output_file, output_format in self.output_file_format.items():\n+ self.output_files.setdefault(output_format, [])\n+ self.output_files[output_format].append(output_file)\n+\n+ def _set_target_query(self) -> None:\n+ for command_dict in self.batch_tar.batch_commands():\n+ new_args: typing.List[str] = []\n+\n+ for arg in command_dict["args"]:\n+ if arg.startswith("--target="):\n+ new_args.insert(0, arg[9:])\n+ elif arg.startswith("--query="):\n+ new_args.insert(1, arg[8:])\n+ else:\n+ new_args.append(arg)\n+\n+ command_dict["args"] = new_args\n+\n+ def run(self) -> None:\n+ run_times = []\n+ begin = time.perf_counter()\n+\n+ with multiprocessing.Manager() as manager:\n+ input_queue: queue.Queue[typing.Dict[str, typing.Any]] = manager.Queue()\n+ output_queue: queue.Queue[float] = manager.Queue()\n+\n+ for command_dict in self.batch_tar.batch_commands():\n+ input_queue.put(command_dict)\n+\n+ # use the empty dict as a sentinel\n+ for _ in range(self.parallel):\n+ input_queue.put({})\n+\n+ with concurrent.futures.ProcessPoolExecutor(\n+ max_workers=self.parallel\n+ ) as executor:\n+ futures = [\n+ executor.submit(\n+ run_command,\n+ instance,\n+ input_queue,\n+ output_queue,\n+ debug=self.debug,\n+ )\n+ for instance in range(self.parallel)\n+ ]\n+\n+ for f in concurrent.futures.as_completed(futures):\n+ if not f.done() or f.cancelled() or f.exception() is not None:\n+ sys.exit("lastz command failed")\n+\n+ while not output_queue.empty():\n+ run_time = output_queue.get()\n+ run_times.append(run_time)\n+\n+ elapsed = time.perf_counter() - begin\n+\n+ if self.debug:\n+ print(f"elapsed {elapsed}", file=sys.stderr, flush=True)\n+\n+ self._cleanup()\n+\n+ def _cleanup(self) -> None:\n+ num_output_files = len(self.output_files.keys())\n+\n+ for file_type, file_list in self.output_files.items():\n+ with open(f"output.{file_type}", "w") as ofh:\n+ for filename in file_list:\n+ with open(f"galaxy/files/{filename}") as ifh:\n+ for line in ifh:\n+ ofh.write(line)\n+\n+ if num_output_files == 1:\n+ file_type = list(self.output_files.keys())[0]\n+ src_filename = f"output.{file_type}"\n+ shutil.copy2(src_filename, self.output_pathname)\n+\n+\n+def main() -> None:\n+ if not hasattr(tarfile, "data_filter"):\n+ sys.exit("ERROR: extracting may be unsafe; consider updating Python")\n+\n+ parser = argparse.ArgumentParser()\n+ parser.add_argument("--input", type=str, required=True)\n+ parser.add_argument("--output", type=str, required=True)\n+ parser.add_argument("--parallel", type=int, default=1, required=False)\n+ parser.add_argument("--debug", action="store_true", required=False)\n+\n+ args = parser.parse_args()\n+ runner = TarRunner(args.input, args.output, args.parallel, args.debug)\n+ runner.run()\n+\n+\n+if __name__ == "__main__":\n+ main()\n'