Repository 'batched_lastz'
hg clone https://toolshed.g2.bx.psu.edu/repos/richard-burhans/batched_lastz

Changeset 0:103538753e81 (2024-04-30)
Next changeset 1:ad3554614aad (2024-04-30)
Commit message:
planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/batched_lastz commit 7b119b432f721e228a73396d4e8f0d54350b0481
added:
batched_lastz.xml
macros.xml
run_lastz_tarball.py
b
diff -r 000000000000 -r 103538753e81 batched_lastz.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/batched_lastz.xml Tue Apr 30 21:06:58 2024 +0000
[
@@ -0,0 +1,20 @@
+<tool id="batched_lastz" name="Batched Lastz" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>: align batches of sequences</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        run_lastz_tarball.py '--input=$input' '--output=$output' '--parallel=\${GALAXY_SLOTS:-2}'
+    ]]></command>
+    <inputs>
+        <param argument="--tarball" type="data" format="tgz" label="Tarball"/>
+    </inputs>
+    <outputs>
+        <data name="output" label="Output"/>
+    </outputs>
+    <help><![CDATA[
+    TODO: Fill in help.
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 103538753e81 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Apr 30 21:06:58 2024 +0000
b
@@ -0,0 +1,26 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">lastz</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@TOOL_VERSION@">1.04.22</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.05</token>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+            @misc{
+                githublastz,
+                author = {Harris, Robert},
+                year = {2007},
+                title = {Improved pairwise alignment of genomic DNA},
+                publisher = {The Pennsylvania State University},
+                journal = {Ph. D. Thesis},
+                url = {http://www.bx.psu.edu/~rsharris/rsharris_phd_thesis_2007.pdf},
+                }
+            </citation>
+        </citations>
+    </xml>
+</macros>
b
diff -r 000000000000 -r 103538753e81 run_lastz_tarball.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/run_lastz_tarball.py Tue Apr 30 21:06:58 2024 +0000
[
b'@@ -0,0 +1,335 @@\n+#!/usr/bin/env python\n+\n+import argparse\n+import concurrent.futures\n+import json\n+import multiprocessing\n+import os\n+import queue\n+import re\n+import shutil\n+import sys\n+import subprocess\n+import tarfile\n+import tempfile\n+import typing\n+import time\n+\n+\n+lastz_output_format_regex = re.compile(\n+    r"^(?:axt\\+?|blastn|cigar|differences|general-?.+|lav|lav\\+text|maf[-+]?|none|paf(?::wfmash)?|rdotplot|sam-?|softsam-?|text)$",\n+    re.IGNORECASE,\n+)\n+\n+\n+# Specifies the output format: lav, lav+text, axt, axt+, maf, maf+, maf-, sam, softsam, sam-, softsam-, cigar, BLASTN, PAF, PAF:wfmash, differences, rdotplot, text, general[:<fields>], or general-[:<fields>].\n+# \xe2\x80\x91\xe2\x80\x91format=none can be used when no alignment output is desired.\n+\n+\n+def run_command(\n+    instance: int,\n+    input_queue: "queue.Queue[typing.Dict[str, typing.Any]]",\n+    output_queue: "queue.Queue[float]",\n+    debug: bool = False,\n+) -> None:\n+    os.chdir("galaxy/files")\n+\n+    while True:\n+        command_dict = input_queue.get()\n+\n+        if not command_dict:\n+            return\n+\n+        args = ["lastz"]\n+        args.extend(command_dict["args"])\n+\n+        stdin = command_dict["stdin"]\n+        if stdin is not None:\n+            stdin = open(stdin, "r")\n+\n+        stdout = command_dict["stdout"]\n+        if stdout is not None:\n+            stdout = open(stdout, "w")\n+\n+        stderr = command_dict["stderr"]\n+        if stderr is not None:\n+            stderr = open(stderr, "w")\n+\n+        begin = time.perf_counter()\n+        p = subprocess.run(args, stdin=stdin, stdout=stdout, stderr=stderr)\n+\n+        for var in [stdin, stdout, stderr]:\n+            if var is not None:\n+                var.close()\n+\n+        if p.returncode != 0:\n+            sys.exit(f"command failed: {\' \'.join(args)}")\n+        else:\n+            stderr = command_dict["stderr"]\n+            if stderr is not None:\n+                try:\n+                    statinfo = os.stat(stderr, follow_symlinks=False)\n+                except:\n+                    statinfo = None\n+\n+                if statinfo is None:\n+                    sys.exit(f"unable to stat stderr file: {\' \'.join(args)}")\n+\n+                if statinfo.st_size != 0:\n+                    sys.exit(f"stderr file is not empty: {\' \'.join(args)}")\n+\n+            elapsed = time.perf_counter() - begin\n+            output_queue.put(elapsed)\n+\n+        if debug:\n+            print(f"runtime {elapsed}", file=sys.stderr, flush=True)\n+\n+\n+class BatchTar:\n+    def __init__(self, pathname: str, debug: bool = False) -> None:\n+        self.pathname = pathname\n+        self.debug = debug\n+        self.tarfile = None\n+        self.commands: typing.List[typing.Dict[str, typing.Any]] = []\n+        self._extract()\n+        self._load_commands()\n+\n+    def batch_commands(self) -> typing.Iterator[typing.Dict[str, typing.Any]]:\n+        for command in self.commands:\n+            yield command\n+\n+    def _load_commands(self) -> None:\n+        try:\n+            f = open("galaxy/commands.json")\n+        except FileNotFoundError:\n+            sys.exit(\n+                f"ERROR: input tarball missing galaxy/commands.json: {self.pathname}"\n+            )\n+\n+        begin = time.perf_counter()\n+        for json_line in f:\n+            json_line = json_line.rstrip("\\n")\n+            try:\n+                command_dict = json.loads(json_line)\n+            except json.JSONDecodeError:\n+                sys.exit(\n+                    f"ERROR: bad json line in galaxy/commands.json: {self.pathname}"\n+                )\n+\n+            self._load_command(command_dict)\n+\n+        f.close()\n+        elapsed = time.perf_counter() - begin\n+\n+        if self.debug:\n+            print(\n+                f"loaded {len(self.commands)} commands in {elapsed} seconds ",\n+                file=sys.stderr,\n+                flush=True,\n+            )\n+\n+    def _load_command(self, command_dict: typing.Dict[str, typing.Any]) -> None:\n+        # check command_dict stru'..b'.basename(f.name)\n+                f.close()\n+                command_dict["args"].append(f"--output={output_file}")\n+\n+            if output_format is None:\n+                output_format = "lav"\n+                command_dict["args"].append(f"--format={output_format}")\n+\n+            if not lastz_output_format_regex.match(output_format):\n+                sys.exit(f"ERROR: invalid output format: {output_format}")\n+\n+            self.output_file_format[output_file] = output_format\n+\n+        for output_file, output_format in self.output_file_format.items():\n+            self.output_files.setdefault(output_format, [])\n+            self.output_files[output_format].append(output_file)\n+\n+    def _set_target_query(self) -> None:\n+        for command_dict in self.batch_tar.batch_commands():\n+            new_args: typing.List[str] = []\n+\n+            for arg in command_dict["args"]:\n+                if arg.startswith("--target="):\n+                    new_args.insert(0, arg[9:])\n+                elif arg.startswith("--query="):\n+                    new_args.insert(1, arg[8:])\n+                else:\n+                    new_args.append(arg)\n+\n+            command_dict["args"] = new_args\n+\n+    def run(self) -> None:\n+        run_times = []\n+        begin = time.perf_counter()\n+\n+        with multiprocessing.Manager() as manager:\n+            input_queue: queue.Queue[typing.Dict[str, typing.Any]] = manager.Queue()\n+            output_queue: queue.Queue[float] = manager.Queue()\n+\n+            for command_dict in self.batch_tar.batch_commands():\n+                input_queue.put(command_dict)\n+\n+            # use the empty dict as a sentinel\n+            for _ in range(self.parallel):\n+                input_queue.put({})\n+\n+            with concurrent.futures.ProcessPoolExecutor(\n+                max_workers=self.parallel\n+            ) as executor:\n+                futures = [\n+                    executor.submit(\n+                        run_command,\n+                        instance,\n+                        input_queue,\n+                        output_queue,\n+                        debug=self.debug,\n+                    )\n+                    for instance in range(self.parallel)\n+                ]\n+\n+            for f in concurrent.futures.as_completed(futures):\n+                if not f.done() or f.cancelled() or f.exception() is not None:\n+                    sys.exit("lastz command failed")\n+\n+            while not output_queue.empty():\n+                run_time = output_queue.get()\n+                run_times.append(run_time)\n+\n+        elapsed = time.perf_counter() - begin\n+\n+        if self.debug:\n+            print(f"elapsed {elapsed}", file=sys.stderr, flush=True)\n+\n+        self._cleanup()\n+\n+    def _cleanup(self) -> None:\n+        num_output_files = len(self.output_files.keys())\n+\n+        for file_type, file_list in self.output_files.items():\n+            with open(f"output.{file_type}", "w") as ofh:\n+                for filename in file_list:\n+                    with open(f"galaxy/files/{filename}") as ifh:\n+                        for line in ifh:\n+                            ofh.write(line)\n+\n+        if num_output_files == 1:\n+            file_type = list(self.output_files.keys())[0]\n+            src_filename = f"output.{file_type}"\n+            shutil.copy2(src_filename, self.output_pathname)\n+\n+\n+def main() -> None:\n+    if not hasattr(tarfile, "data_filter"):\n+        sys.exit("ERROR: extracting may be unsafe; consider updating Python")\n+\n+    parser = argparse.ArgumentParser()\n+    parser.add_argument("--input", type=str, required=True)\n+    parser.add_argument("--output", type=str, required=True)\n+    parser.add_argument("--parallel", type=int, default=1, required=False)\n+    parser.add_argument("--debug", action="store_true", required=False)\n+\n+    args = parser.parse_args()\n+    runner = TarRunner(args.input, args.output, args.parallel, args.debug)\n+    runner.run()\n+\n+\n+if __name__ == "__main__":\n+    main()\n'