Mercurial > repos > peterjc > mira4_assembler
changeset 4:1713289d9908 draft default tip
v0.0.11 tweak for use with bioconda dependencies
author | peterjc |
---|---|
date | Thu, 10 Aug 2017 11:09:10 -0400 |
parents | a4f602cc3aa9 |
children | |
files | tools/mira4_0/README.rst tools/mira4_0/mira4.py tools/mira4_0/mira4_bait.py tools/mira4_0/mira4_bait.xml tools/mira4_0/mira4_convert.py tools/mira4_0/mira4_de_novo.xml tools/mira4_0/mira4_make_bam.py tools/mira4_0/mira4_mapping.xml tools/mira4_0/mira4_validator.py tools/mira4_0/tool_dependencies.xml |
diffstat | 10 files changed, 252 insertions(+), 242 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/mira4_0/README.rst Fri Oct 02 06:12:23 2015 -0400 +++ b/tools/mira4_0/README.rst Thu Aug 10 11:09:10 2017 -0400 @@ -1,7 +1,7 @@ Galaxy wrapper for the MIRA assembly program (v4.0) =================================================== -This tool is copyright 2011-2015 by Peter Cock, The James Hutton Institute +This tool is copyright 2011-2016 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. See the licence text below (MIT licence). @@ -106,7 +106,13 @@ - MIRA 4.0.2 dependency now declared via dedicated Tool Shed package. v0.0.8 - Renamed folder now have a MIRA 4.9.x wrapper (internal change only). v0.0.9 - Additional unit tests now covering ``miraconvert``. - - Was missing ``mirabait`` wrapper since move to using Planemo. + - Re-include missing ``mirabait`` wrapper (accidentally left out when + changed to using Planemo for uploading to the Tool Shed). +v0.0.10 - Python 3 compatible syntax (internal change only). + - Use ``<command detect_errors="aggressive">`` (internal change only). + - Single quote command line arguments (internal change only). +v0.0.11 - For compatibility with MIRA installed from the BioConda package, + if ``MIRA4$`` is not set, assume binaries are on the ``$PATH``. ======= ======================================================================
--- a/tools/mira4_0/mira4.py Fri Oct 02 06:12:23 2015 -0400 +++ b/tools/mira4_0/mira4.py Thu Aug 10 11:09:10 2017 -0400 @@ -1,41 +1,42 @@ #!/usr/bin/env python """A simple wrapper script to call MIRA and collect its output. """ + +from __future__ import print_function + import os -import sys +import shutil import subprocess -import shutil +import sys +import tempfile import time -import tempfile + from optparse import OptionParser -#Do we need any PYTHONPATH magic? +# Do we need any PYTHONPATH magic? from mira4_make_bam import make_bam -WRAPPER_VER = "0.0.4" #Keep in sync with the XML file - -def sys_exit(msg, err=1): - sys.stderr.write(msg+"\n") - sys.exit(err) +WRAPPER_VER = "0.0.11" # Keep in sync with the XML file def get_version(mira_binary): - """Run MIRA to find its version number""" + """Run MIRA to find its version number.""" # At the commend line I would use: mira -v | head -n 1 # however there is some pipe error when doing that here. cmd = [mira_binary, "-v"] try: - child = subprocess.Popen(cmd, + child = subprocess.Popen(cmd, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - except Exception, err: + except Exception as err: sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) sys.exit(1) ver, tmp = child.communicate() del child return ver.split("\n", 1)[0].strip() -#Parse Command Line + +# Parse Command Line usage = """Galaxy MIRA4 wrapper script v%s - use as follows: $ python mira4.py ... @@ -68,35 +69,38 @@ out_fasta = options.fasta out_log = options.log -try: +if "MIRA4" in os.environ: mira_path = os.environ["MIRA4"] -except KeyError: - sys_exit("Environment variable $MIRA4 not set") -mira_binary = os.path.join(mira_path, "mira") -if not os.path.isfile(mira_binary): - sys_exit("Missing mira under $MIRA4, %r\nFolder contained: %s" - % (mira_binary, ", ".join(os.listdir(mira_path)))) -mira_convert = os.path.join(mira_path, "miraconvert") -if not os.path.isfile(mira_convert): - sys_exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" - % (mira_convert, ", ".join(os.listdir(mira_path)))) + mira_binary = os.path.join(mira_path, "mira") + if not os.path.isfile(mira_binary): + sys.exit("Missing mira under $MIRA4, %r\nFolder contained: %s" + % (mira_binary, ", ".join(os.listdir(mira_path)))) + mira_convert = os.path.join(mira_path, "miraconvert") + if not os.path.isfile(mira_convert): + sys.exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" + % (mira_convert, ", ".join(os.listdir(mira_path)))) +else: + sys.stderr.write("DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n") + mira_path = None + mira_binary = "mira" + mira_convert = "miraconvert" mira_ver = get_version(mira_binary) if not mira_ver.strip().startswith("4.0"): - sys_exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_binary)) + sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_binary)) mira_convert_ver = get_version(mira_convert) if not mira_convert_ver.strip().startswith("4.0"): - sys_exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert)) + sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert)) if options.version: - print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER) + print("%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER)) if mira_ver != mira_convert_ver: - print "WARNING: miraconvert %s" % mira_convert_ver + print("WARNING: miraconvert %s" % mira_convert_ver) sys.exit(0) if not manifest: - sys_exit("Manifest is required") + sys.exit("Manifest is required") elif not os.path.isfile(manifest): - sys_exit("Missing input MIRA manifest file: %r" % manifest) + sys.exit("Missing input MIRA manifest file: %r" % manifest) try: @@ -125,12 +129,12 @@ text = handle.read() handle.close() - #At time of writing, this is at the end of a file, - #but could be followed by a space in future... + # At time of writing, this is at the end of a file, + # but could be followed by a space in future... text = text.replace("-DI:trt=/tmp", "-DI:trt=" + tempfile.gettempdir()) - #Want to try to ensure this gets written to disk before MIRA attempts - #to open it - any networked file system may impose a delay... + # Want to try to ensure this gets written to disk before MIRA attempts + # to open it - any networked file system may impose a delay... handle = open(manifest, "w") handle.write(text) handle.flush() @@ -140,41 +144,39 @@ def log_manifest(manifest): """Write the manifest file to stderr.""" - sys.stderr.write("\n%s\nManifest file\n%s\n" % ("="*60, "="*60)) + sys.stderr.write("\n%s\nManifest file\n%s\n" % ("=" * 60, "=" * 60)) with open(manifest) as h: for line in h: sys.stderr.write(line) - sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60)) + sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("=" * 60, "=" * 60)) def collect_output(temp, name, handle): """Moves files to the output filenames (global variables).""" - n3 = (temp, name, name, name) f = "%s/%s_assembly/%s_d_results" % (temp, name, name) if not os.path.isdir(f): log_manifest(manifest) - sys_exit("Missing output folder") + sys.exit("Missing output folder") if not os.listdir(f): log_manifest(manifest) - sys_exit("Empty output folder") + sys.exit("Empty output folder") missing = [] old_maf = "%s/%s_out.maf" % (f, name) if not os.path.isfile(old_maf): - #Triggered extractLargeContigs.sh? + # Triggered extractLargeContigs.sh? old_maf = "%s/%s_LargeContigs_out.maf" % (f, name) - #De novo or single strain mapping, + # De novo or single strain mapping, old_fasta = "%s/%s_out.unpadded.fasta" % (f, name) ref_fasta = "%s/%s_out.padded.fasta" % (f, name) if not os.path.isfile(old_fasta): - #Mapping (StrainX versus reference) or de novo + # Mapping (StrainX versus reference) or de novo old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name) ref_fasta = "%s/%s_out_StrainX.padded.fasta" % (f, name) if not os.path.isfile(old_fasta): old_fasta = "%s/%s_out_ReferenceStrain.unpadded.fasta" % (f, name) ref_fasta = "%s/%s_out_ReferenceStrain.padded.fasta" % (f, name) - missing = False for old, new in [(old_maf, out_maf), @@ -192,25 +194,27 @@ for filename in sorted(os.listdir(f)): sys.stderr.write("%s\n" % filename) - #For mapping mode, probably most people would expect a BAM file - #using the reference FASTA file... + # For mapping mode, probably most people would expect a BAM file + # using the reference FASTA file... if out_bam and out_bam != "-": if out_maf and out_maf != "-": msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle) else: - #Not collecting the MAF file, use original location + # Not collecting the MAF file, use original location msg = make_bam(mira_convert, old_maf, ref_fasta, out_bam, handle) if msg: - sys_exit(msg) + sys.exit(msg) + def clean_up(temp, name): folder = "%s/%s_assembly" % (temp, name) if os.path.isdir(folder): shutil.rmtree(folder) -#TODO - Run MIRA in /tmp or a configurable directory? -#Currently Galaxy puts us somewhere safe like: -#/opt/galaxy-dist/database/job_working_directory/846/ + +# TODO - Run MIRA in /tmp or a configurable directory? +# Currently Galaxy puts us somewhere safe like: +# /opt/galaxy-dist/database/job_working_directory/846/ temp = "." name = "MIRA" @@ -223,19 +227,19 @@ assert os.path.isdir(temp) d = "%s_assembly" % name -#This can fail on my development machine if stale folders exist -#under Galaxy's .../database/job_working_directory/ tree: +# This can fail on my development machine if stale folders exist +# under Galaxy's .../database/job_working_directory/ tree: assert not os.path.isdir(d), "Path %r already exists:\n%s" % (d, os.path.abspath(d)) try: - #Check path access + # Check path access os.mkdir(d) -except Exception, err: +except Exception as err: log_manifest(manifest) sys.stderr.write("Error making directory %s\n%s" % (d, err)) sys.exit(1) -#print os.path.abspath(".") -#print cmd +# print(os.path.abspath(".")) +# print(cmd) if out_log and out_log != "-": handle = open(out_log, "w") @@ -251,34 +255,35 @@ handle.write("============================ Starting MIRA now ===============================\n") handle.flush() try: - #Run MIRA + # Run MIRA child = subprocess.Popen(cmd_list, stdout=handle, stderr=subprocess.STDOUT) -except Exception, err: +except Exception as err: log_manifest(manifest) sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) - #TODO - call clean up? + # TODO - call clean up? handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) handle.close() sys.exit(1) -#Use .communicate as can get deadlocks with .wait(), +# Use .communicate as can get deadlocks with .wait(), stdout, stderr = child.communicate() -assert not stdout and not stderr #Should be empty as sent to handle +assert not stdout and not stderr # Should be empty as sent to handle run_time = time.time() - start_time return_code = child.returncode handle.write("\n") handle.write("============================ MIRA has finished ===============================\n") handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0)) if return_code: - print "MIRA took %0.2f hours" % (run_time / 3600.0) + print("MIRA took %0.2f hours" % (run_time / 3600.0)) handle.write("Return error code %i from command:\n" % return_code) handle.write(cmd + "\n") handle.close() clean_up(temp, name) log_manifest(manifest) - sys_exit("Return error code %i from command:\n%s" % (return_code, cmd), - return_code) + sys.stderr.write("Return error code %i from command:\n" % return_code) + sys.stderr.write(cmd + "\n") + sys.exit(return_code) handle.flush() if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): @@ -291,21 +296,23 @@ handle.write("============================ (end of ec.log) =================================\n") handle.flush() -#print "Collecting output..." +# print("Collecting output...") start_time = time.time() collect_output(temp, name, handle) collect_time = time.time() - start_time -handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0)) -print("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0)) +handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n" + % (run_time / 3600.0, collect_time / 60.0)) +print("MIRA took %0.2f hours; collecting output %0.2f minutes\n" + % (run_time / 3600.0, collect_time / 60.0)) if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): - #Treat as an error, but doing this AFTER collect_output + # Treat as an error, but doing this AFTER collect_output sys.stderr.write("Extract Large Contigs failed\n") handle.write("Extract Large Contigs failed\n") handle.close() sys.exit(1) -#print "Cleaning up..." +# print "Cleaning up..." clean_up(temp, name) handle.write("\nDone\n")
--- a/tools/mira4_0/mira4_bait.py Fri Oct 02 06:12:23 2015 -0400 +++ b/tools/mira4_0/mira4_bait.py Thu Aug 10 11:09:10 2017 -0400 @@ -1,17 +1,16 @@ #!/usr/bin/env python """A simple wrapper script to call MIRA4's mirabait and collect its output. """ + +from __future__ import print_function + import os +import shutil +import subprocess import sys -import subprocess -import shutil import time -WRAPPER_VER = "0.0.5" #Keep in sync with the XML file - -def sys_exit(msg, err=1): - sys.stderr.write(msg+"\n") - sys.exit(err) +WRAPPER_VER = "0.0.11" # Keep in sync with the XML file def get_version(mira_binary): @@ -20,36 +19,40 @@ # however there is some pipe error when doing that here. cmd = [mira_binary, "-v"] try: - child = subprocess.Popen(cmd, + child = subprocess.Popen(cmd, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - except Exception, err: + except Exception as err: sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) sys.exit(1) ver, tmp = child.communicate() del child - #Workaround for -v not working in mirabait 4.0RC4 + # Workaround for -v not working in mirabait 4.0RC4 if "invalid option" in ver.split("\n", 1)[0]: for line in ver.split("\n", 1): if " version " in line: line = line.split() - return line[line.index("version")+1].rstrip(")") - sys_exit("Could not determine MIRA version:\n%s" % ver) + return line[line.index("version") + 1].rstrip(")") + sys.exit("Could not determine MIRA version:\n%s" % ver) return ver.split("\n", 1)[0] -try: + +if "MIRA4" in os.environ: mira_path = os.environ["MIRA4"] -except KeyError: - sys_exit("Environment variable $MIRA4 not set") -mira_binary = os.path.join(mira_path, "mirabait") -if not os.path.isfile(mira_binary): - sys_exit("Missing mirabait under $MIRA4, %r\nFolder contained: %s" - % (mira_binary, ", ".join(os.listdir(mira_path)))) + mira_binary = os.path.join(mira_path, "mirabait") + if not os.path.isfile(mira_binary): + sys.exit("Missing mirabait under $MIRA4, %r\nFolder contained: %s" + % (mira_binary, ", ".join(os.listdir(mira_path)))) +else: + sys.stderr.write("DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n") + mira_path = None + mira_binary = "mirabait" + mira_ver = get_version(mira_binary) if not mira_ver.strip().startswith("4.0"): - sys_exit("This wrapper is for MIRA V4.0, not:\n%s" % mira_ver) + sys.exit("This wrapper is for MIRA V4.0, not:\n%s" % mira_ver) if "-v" in sys.argv or "--version" in sys.argv: - print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER) + print("%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER)) sys.exit(0) @@ -60,7 +63,7 @@ elif format == "mira": format = "maf" elif format != "fasta": - sys_exit("Was not expected format %r" % format) + sys.exit("Was not expected format %r" % format) assert out_file.endswith(".dat") out_file_stem = out_file[:-4] @@ -71,44 +74,44 @@ if output_choice == "pos": pass elif output_choice == "neg": - #Invert the selection... + # Invert the selection... cmd_list.insert(1, "-i") else: - sys_exit("Output choice should be 'pos' or 'neg', not %r" % output_choice) + sys.exit("Output choice should be 'pos' or 'neg', not %r" % output_choice) if strand_choice == "both": pass elif strand_choice == "fwd": - #Ingore reverse strand... + # Ingore reverse strand... cmd_list.insert(1, "-r") else: - sys_exit("Strand choice should be 'both' or 'fwd', not %r" % strand_choice) + sys.exit("Strand choice should be 'both' or 'fwd', not %r" % strand_choice) cmd = " ".join(cmd_list) -#print cmd +# print cmd start_time = time.time() try: - #Run MIRA - child = subprocess.Popen(cmd_list, + # Run MIRA + child = subprocess.Popen(cmd_list, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) -except Exception, err: +except Exception as err: sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) sys.exit(1) -#Use .communicate as can get deadlocks with .wait(), +# Use .communicate as can get deadlocks with .wait(), stdout, stderr = child.communicate() -assert stderr is None # Due to way we ran with subprocess +assert stderr is None # Due to way we ran with subprocess run_time = time.time() - start_time return_code = child.returncode -print "mirabait took %0.2f minutes" % (run_time / 60.0) +print("mirabait took %0.2f minutes" % (run_time / 60.0)) if return_code: sys.stderr.write(stdout) - sys_exit("Return error code %i from command:\n%s" % (return_code, cmd), + sys.exit("Return error code %i from command:\n%s" % (return_code, cmd), return_code) -#Capture output +# Capture output out_tmp = out_file_stem + "." + format if not os.path.isfile(out_tmp): sys.stderr.write(stdout) - sys_exit("Missing output file from mirabait: %s" % out_tmp) + sys.exit("Missing output file from mirabait: %s" % out_tmp) shutil.move(out_tmp, out_file)
--- a/tools/mira4_0/mira4_bait.xml Fri Oct 02 06:12:23 2015 -0400 +++ b/tools/mira4_0/mira4_bait.xml Thu Aug 10 11:09:10 2017 -0400 @@ -1,21 +1,17 @@ -<tool id="mira_4_0_bait" name="MIRA v4.0 mirabait" version="0.0.9"> +<tool id="mira_4_0_bait" name="MIRA v4.0 mirabait" version="0.0.11"> <description>Filter reads using kmer matches</description> <requirements> - <requirement type="binary">mirabait</requirement> <requirement type="package" version="4.0.2">MIRA</requirement> </requirements> - <stdio> - <!-- Assume anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - </stdio> - <version_command interpreter="python">mira4_bait.py --version</version_command> - <command interpreter="python"> -mira4_bait.py $input_reads.ext $output_choice $strand_choice $kmer_length $min_occurence "$bait_file" "$input_reads" "$output_reads" + <version_command> +python $__tool_directory__/mira4_bait.py --version + </version_command> + <command detect_errors="aggressive"> +python $__tool_directory__/mira4_bait.py $input_reads.ext $output_choice $strand_choice $kmer_length $min_occurence '$bait_file' '$input_reads' '$output_reads' </command> <inputs> - <param name="bait_file" type="data" format="fasta,fastq,mira" required="true" label="Bait file (what to look for)" /> - <param name="input_reads" type="data" format="fasta,fastq,mira" required="true" label="Reads to search" /> + <param name="bait_file" type="data" format="fasta,fastq,mira" label="Bait file (what to look for)" /> + <param name="input_reads" type="data" format="fasta,fastq,mira" label="Reads to search" /> <param name="output_choice" type="select" label="Output positive matches, or negative matches?"> <option value="pos">Just positive matches</option> <option value="neg">Just negative matches</option> @@ -32,7 +28,7 @@ </inputs> <outputs> <data name="output_reads" format_source="input_reads" metadata_source="input_reads" - label="$input_reads.name #if str($output_choice)=='pos' then 'matching' else 'excluding matches to' # $bait_file.name"/> + label="$input_reads.name #if str($output_choice)=='pos' then 'matching' else 'excluding matches to' # $bait_file.name"/> </outputs> <tests> <test>
--- a/tools/mira4_0/mira4_convert.py Fri Oct 02 06:12:23 2015 -0400 +++ b/tools/mira4_0/mira4_convert.py Thu Aug 10 11:09:10 2017 -0400 @@ -3,44 +3,46 @@ This focuses on the miraconvert binary. """ + +from __future__ import print_function + import os -import sys -import subprocess import shutil -import time -import tempfile +import subprocess +import sys + from optparse import OptionParser + try: from io import BytesIO except ImportError: - #Should we worry about Python 2.5 or older? + # Should we worry about Python 2.5 or older? from StringIO import StringIO as BytesIO -#Do we need any PYTHONPATH magic? +# Do we need any PYTHONPATH magic? from mira4_make_bam import depad -WRAPPER_VER = "0.0.7" # Keep in sync with the XML file +WRAPPER_VER = "0.0.11" # Keep in sync with the XML file -def sys_exit(msg, err=1): - sys.stderr.write(msg+"\n") - sys.exit(err) def run(cmd): - #Avoid using shell=True when we call subprocess to ensure if the Python - #script is killed, so too is the child process. + # Avoid using shell=True when we call subprocess to ensure if the Python + # script is killed, so too is the child process. try: - child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - except Exception, err: - sys_exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) - #Use .communicate as can get deadlocks with .wait(), + child = subprocess.Popen(cmd, universal_newlines=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except Exception as err: + sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) + # Use .communicate as can get deadlocks with .wait(), stdout, stderr = child.communicate() return_code = child.returncode if return_code: cmd_str = " ".join(cmd) # doesn't quote spaces etc if stderr and stdout: - sys_exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) + sys.exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) else: - sys_exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) + sys.exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) + def get_version(mira_binary): """Run MIRA to find its version number""" @@ -51,14 +53,15 @@ child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - except Exception, err: + except Exception as err: sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) sys.exit(1) ver, tmp = child.communicate() del child return ver.split("\n", 1)[0].strip() -#Parse Command Line + +# Parse Command Line usage = """Galaxy MIRA4 wrapper script v%s - use as follows: $ python mira4_convert.py ... @@ -98,7 +101,7 @@ help="Show version and quit") options, args = parser.parse_args() if args: - sys_exit("Expected options (e.g. --input example.maf), not arguments") + sys.exit("Expected options (e.g. --input example.maf), not arguments") input_maf = options.input out_maf = options.maf @@ -107,47 +110,50 @@ out_ace = options.ace out_cstats = options.cstats -try: +if "MIRA4" in os.environ: mira_path = os.environ["MIRA4"] -except KeyError: - sys_exit("Environment variable $MIRA4 not set") -mira_convert = os.path.join(mira_path, "miraconvert") -if not os.path.isfile(mira_convert): - sys_exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" - % (mira_convert, ", ".join(os.listdir(mira_path)))) + mira_convert = os.path.join(mira_path, "miraconvert") + if not os.path.isfile(mira_convert): + sys.exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" + % (mira_convert, ", ".join(os.listdir(mira_path)))) +else: + sys.stderr.write("DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n") + mira_path = None + mira_convert = "miraconvert" mira_convert_ver = get_version(mira_convert) if not mira_convert_ver.strip().startswith("4.0"): - sys_exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert)) + sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert)) if options.version: print("%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER)) sys.exit(0) if not input_maf: - sys_exit("Input MIRA file is required") + sys.exit("Input MIRA file is required") elif not os.path.isfile(input_maf): - sys_exit("Missing input MIRA file: %r" % input_maf) + sys.exit("Missing input MIRA file: %r" % input_maf) if not (out_maf or out_bam or out_fasta or out_ace or out_cstats): - sys_exit("No output requested") + sys.exit("No output requested") def check_min_int(value, name): try: i = int(value) - except: - sys_exit("Bad %s setting, %r" % (name, value)) + except ValueError: + sys.exit("Bad %s setting, %r" % (name, value)) if i < 0: - sys_exit("Negative %s setting, %r" % (name, value)) + sys.exit("Negative %s setting, %r" % (name, value)) return i + min_length = check_min_int(options.min_length, "minimum length") min_cover = check_min_int(options.min_cover, "minimum cover") min_reads = check_min_int(options.min_reads, "minimum reads") -#TODO - Run MIRA in /tmp or a configurable directory? -#Currently Galaxy puts us somewhere safe like: -#/opt/galaxy-dist/database/job_working_directory/846/ +# TODO - Run MIRA in /tmp or a configurable directory? +# Currently Galaxy puts us somewhere safe like: +# /opt/galaxy-dist/database/job_working_directory/846/ temp = "." @@ -164,7 +170,7 @@ if out_bam: cmd_list.append("samnbb") if not out_fasta: - #Need this for samtools depad + # Need this for samtools depad out_fasta = os.path.join(temp, "depadded.fasta") if out_fasta: cmd_list.append("fasta") @@ -174,28 +180,30 @@ cmd_list.append("cstats") run(cmd_list) + def collect(old, new): if not os.path.isfile(old): - sys_exit("Missing expected output file %s" % old) + sys.exit("Missing expected output file %s" % old) shutil.move(old, new) + if out_maf: collect(os.path.join(temp, "converted.maf"), out_maf) if out_fasta: - #Can we look at the MAF file to see if there are multiple strains? + # Can we look at the MAF file to see if there are multiple strains? old = os.path.join(temp, "converted_AllStrains.unpadded.fasta") if os.path.isfile(old): collect(old, out_fasta) else: - #Might the output be filtered down to zero contigs? + # Might the output be filtered down to zero contigs? old = os.path.join(temp, "converted.fasta") if not os.path.isfile(old): - sys_exit("Missing expected output FASTA file") + sys.exit("Missing expected output FASTA file") elif os.path.getsize(old) == 0: print("Warning - no contigs (harsh filters?)") collect(old, out_fasta) else: - sys_exit("Missing expected output FASTA file (only generic file present)") + sys.exit("Missing expected output FASTA file (only generic file present)") if out_ace: collect(os.path.join(temp, "converted.maf"), out_ace) if out_cstats: @@ -207,7 +215,7 @@ if not os.path.isfile(old): old = os.path.join(temp, "converted.sam") if not os.path.isfile(old): - sys_exit("Missing expected intermediate file %s" % old) + sys.exit("Missing expected intermediate file %s" % old) h = BytesIO() msg = depad(out_fasta, old, out_bam, h) if msg: @@ -217,7 +225,7 @@ sys.exit(1) h.close() if out_fasta == os.path.join(temp, "depadded.fasta"): - #Not asked for by Galaxy, no longer needed + # Not asked for by Galaxy, no longer needed os.remove(out_fasta) if min_length or min_cover or min_reads:
--- a/tools/mira4_0/mira4_de_novo.xml Fri Oct 02 06:12:23 2015 -0400 +++ b/tools/mira4_0/mira4_de_novo.xml Thu Aug 10 11:09:10 2017 -0400 @@ -1,29 +1,24 @@ -<tool id="mira_4_0_de_novo" name="MIRA v4.0 de novo assember" version="0.0.9"> +<tool id="mira_4_0_de_novo" name="MIRA v4.0 de novo assember" version="0.0.11"> <description>Takes Sanger, Roche 454, Solexa/Illumina, Ion Torrent and PacBio reads</description> <requirements> - <requirement type="binary">mira</requirement> - <requirement type="binary">miraconvert</requirement> <requirement type="package" version="4.0.2">MIRA</requirement> - <requirement type="binary">samtools</requirement> <requirement type="package" version="0.1.19">samtools</requirement> </requirements> <code file="mira4_validator.py" /> - <stdio> - <!-- Assume anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - </stdio> - <version_command interpreter="python">mira4.py --version</version_command> - <command interpreter="python">mira4.py ---manifest "$manifest" + <version_command> +python $__tool_directory__/mira4.py --version + </version_command> + <command detect_errors="aggressive"> +python $__tool_directory__/mira4.py +--manifest '$manifest' #if str($maf_wanted)=="true": ---maf "$out_maf" +--maf '$out_maf' #end if #if str($bam_wanted)=="true": ---bam "$out_bam" +--bam '$out_bam' #end if ---fasta "$out_fasta" ---log "$out_log" +--fasta '$out_fasta' +--log '$out_log' </command> <configfiles> <configfile name="manifest"> @@ -133,11 +128,11 @@ </when> <when value="none" /><!-- no further questions --> </conditional> - <param name="filenames" type="data" format="fastq,mira" multiple="true" required="true" label="Read file(s)" + <param name="filenames" type="data" format="fastq,mira" multiple="true" optional="false" label="Read file(s)" help="Multiple files allowed, for example paired reads can be given as two files (MIRA looks at read names to identify pairs)." /> </repeat> - <param name="maf_wanted" type="boolean" label="Output assembly in MIRA's own format?" checked="False" /> - <param name="bam_wanted" type="boolean" label="Convert assembly into BAM format?" checked="True" /> + <param name="maf_wanted" type="boolean" label="Output assembly in MIRA's own format?" checked="false" /> + <param name="bam_wanted" type="boolean" label="Convert assembly into BAM format?" checked="true" /> </inputs> <outputs> <data name="out_fasta" format="fasta" label="MIRA de novo contigs (FASTA)" />
--- a/tools/mira4_0/mira4_make_bam.py Fri Oct 02 06:12:23 2015 -0400 +++ b/tools/mira4_0/mira4_make_bam.py Thu Aug 10 11:09:10 2017 -0400 @@ -1,41 +1,41 @@ #!/usr/bin/env python """Wrapper script using miraconvert & samtools to get BAM from MIRA. """ + import os -import sys import shutil import subprocess +import sys import tempfile -def sys_exit(msg, err=1): - sys.stderr.write(msg+"\n") - sys.exit(err) def run(cmd, log_handle): try: child = subprocess.Popen(cmd, shell=True, + universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - except Exception, err: + except Exception as err: sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) - #TODO - call clean up? + # TODO - call clean up? log_handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) sys.exit(1) - #Use .communicate as can get deadlocks with .wait(), + # Use .communicate as can get deadlocks with .wait(), stdout, stderr = child.communicate() - assert not stderr #Should be empty as sent to stdout + assert not stderr # Should be empty as sent to stdout if len(stdout) > 10000: - #miraconvert can be very verbose (is holding stdout in RAM a problem?) + # miraconvert can be very verbose (is holding stdout in RAM a problem?) stdout = stdout.split("\n") stdout = stdout[:10] + ["...", "<snip>", "..."] + stdout[-10:] stdout = "\n".join(stdout) log_handle.write(stdout) return child.returncode + def depad(fasta_file, sam_file, bam_file, log_handle): log_handle.write("\n================= Converting MIRA assembly from SAM to BAM ===================\n") - #Also doing SAM to (uncompressed) BAM during depad - bam_stem = bam_file + ".tmp" # Have write permissions and want final file in this folder + # Also doing SAM to (uncompressed) BAM during depad + bam_stem = bam_file + ".tmp" # Have write permissions and want final file in this folder cmd = 'samtools depad -S -u -T "%s" "%s" | samtools sort - "%s"' % (fasta_file, sam_file, bam_stem) return_code = run(cmd, log_handle) if return_code: @@ -52,12 +52,10 @@ return "samtools indexing of BAM file failed to produce BAI file" shutil.move(bam_stem + ".bam", bam_file) - os.remove(bam_stem + ".bam.bai") #Let Galaxy handle that... + os.remove(bam_stem + ".bam.bai") # Let Galaxy handle that... def make_bam(mira_convert, maf_file, fasta_file, bam_file, log_handle): - if not os.path.isfile(mira_convert): - return "Missing binary %r" % mira_convert if not os.path.isfile(maf_file): return "Missing input MIRA file: %r" % maf_file if not os.path.isfile(fasta_file): @@ -75,7 +73,7 @@ if not os.path.isfile(sam_file): return "Conversion from MIRA to SAM failed" - #Also doing SAM to (uncompressed) BAM during depad + # Also doing SAM to (uncompressed) BAM during depad msg = depad(fasta_file, sam_file, bam_file, log_handle) if msg: return msg @@ -83,10 +81,11 @@ os.remove(sam_file) os.rmdir(tmp_dir) - return None #Good :) + return None # Good :) + if __name__ == "__main__": mira_convert, maf_file, fasta_file, bam_file = sys.argv[1:] msg = make_bam(mira_convert, maf_file, fasta_file, bam_file, sys.stdout) if msg: - sys_exit(msg) + sys.exit(msg)
--- a/tools/mira4_0/mira4_mapping.xml Fri Oct 02 06:12:23 2015 -0400 +++ b/tools/mira4_0/mira4_mapping.xml Thu Aug 10 11:09:10 2017 -0400 @@ -1,28 +1,23 @@ -<tool id="mira_4_0_mapping" name="MIRA v4.0 mapping" version="0.0.9"> +<tool id="mira_4_0_mapping" name="MIRA v4.0 mapping" version="0.0.11"> <description>Maps Sanger, Roche 454, Solexa/Illumina, Ion Torrent and PacBio reads</description> <requirements> - <requirement type="binary">mira</requirement> - <requirement type="binary">miraconvert</requirement> <requirement type="package" version="4.0.2">MIRA</requirement> - <requirement type="binary">samtools</requirement> <requirement type="package" version="0.1.19">samtools</requirement> </requirements> - <stdio> - <!-- Assume anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - </stdio> - <version_command interpreter="python">mira4.py --version</version_command> - <command interpreter="python">mira4.py ---manifest "$manifest" + <version_command> +python $__tool_directory__/mira4.py --version + </version_command> + <command detect_errors="aggressive"> +python $__tool_directory__/mira4.py +--manifest '$manifest' #if str($maf_wanted) == "true": ---maf "$out_maf" +--maf '$out_maf' #end if #if str($bam_wanted) == "true": ---bam "$out_bam" +--bam '$out_bam' #end if ---fasta "$out_fasta" ---log "$out_log" +--fasta '$out_fasta' +--log '$out_log' </command> <configfiles> <configfile name="manifest"> @@ -121,7 +116,7 @@ <!-- TODO? Allow strain settings for reference(s) and reads? --> <!-- TODO? Use a repeat to allow for multi-strain references? --> <!-- TODO? Add strain to the mapping read groups? --> - <param name="references" type="data" format="fasta,fastq,mira" multiple="true" required="true" label="Backbone reference file(s)" + <param name="references" type="data" format="fasta,fastq,mira" multiple="true" optional="false" label="Backbone reference file(s)" help="Multiple files allowed, for example one FASTA file per chromosome or plasmid." /> <param name="strain_setup" type="select" label="Strain configuration (reference vs reads)"> <option value="default">Different strains - mapping reads onto a related reference ('StrainX' vs 'ReferenceStrain')</option> @@ -158,11 +153,11 @@ </when> <when value="none" /><!-- no further questions --> </conditional> - <param name="filenames" type="data" format="fastq,mira" multiple="true" required="true" label="Read file(s)" + <param name="filenames" type="data" format="fastq,mira" multiple="true" optional="false" label="Read file(s)" help="Multiple files allowed, for example paired reads can be given as two files (MIRA looks at read names to identify pairs)." /> </repeat> - <param name="maf_wanted" type="boolean" label="Output mapping in MIRA's own format?" checked="False" /> - <param name="bam_wanted" type="boolean" label="Convert mapping into BAM format?" checked="True" /> + <param name="maf_wanted" type="boolean" label="Output mapping in MIRA's own format?" checked="false" /> + <param name="bam_wanted" type="boolean" label="Convert mapping into BAM format?" checked="true" /> </inputs> <outputs> <data name="out_fasta" format="fasta" label="MIRA #if str($strain_setup)=='same' then 'same strain' else 'reference' # mapping contigs (FASTA)" />
--- a/tools/mira4_0/mira4_validator.py Fri Oct 02 06:12:23 2015 -0400 +++ b/tools/mira4_0/mira4_validator.py Thu Aug 10 11:09:10 2017 -0400 @@ -1,5 +1,6 @@ -#Called from the Galaxy Tool XML file -#import sys +# Called from the Galaxy Tool XML file +# import sys + def validate_input(trans, error_map, param_values, page_param_map): """Validates the min_size/max_size user input, before execution.""" @@ -13,41 +14,41 @@ min_size = str(segments["min_size"]).strip() max_size = str(segments["max_size"]).strip() - #sys.stderr.write("DEBUG min_size=%r, max_size=%r\n" % (min_size, max_size)) + # sys.stderr.write("DEBUG min_size=%r, max_size=%r\n" % (min_size, max_size)) - #Somehow Galaxy seems to turn an empty field into string "None"... - if min_size=="None": + # Somehow Galaxy seems to turn an empty field into string "None"... + if min_size == "None": min_size = "" - if max_size=="None": + if max_size == "None": max_size = "" - if min_size=="" and max_size=="": - #Both missing is good + if min_size == "" and max_size == "": + # Both missing is good pass - elif min_size=="": + elif min_size == "": err["min_size"] = "Minimum size required if maximum size given" - elif max_size=="": + elif max_size == "": err["max_size"] = "Maximum size required if minimum size given" - + if min_size: try: min_size_int = int(min_size) if min_size_int < 0: err["min_size"] = "Minumum size must not be negative (%i)" % min_size_int - min_size = None # Avoid doing comparison below + min_size = None # Avoid doing comparison below except ValueError: err["min_size"] = "Minimum size is not an integer (%s)" % min_size - min_size = None # Avoid doing comparison below + min_size = None # Avoid doing comparison below if max_size: try: max_size_int = int(max_size) - if max_size_int< 0: + if max_size_int < 0: err["max_size"] = "Maximum size must not be negative (%i)" % max_size_int - max_size = None # Avoid doing comparison below + max_size = None # Avoid doing comparison below except ValueError: err["max_size"] = "Maximum size is not an integer (%s)" % max_size - max_size = None # Avoid doing comparison below + max_size = None # Avoid doing comparison below if min_size and max_size and min_size_int > max_size_int: msg = "Minimum size must be less than maximum size (%i vs %i)" % (min_size_int, max_size_int) @@ -55,10 +56,10 @@ err["max_size"] = msg if err: - err_list.append({"segments":err}) + err_list.append({"segments": err}) else: err_list.append(dict()) if any(err_list): - #Return an error map only if any readgroup gave errors + # Return an error map only if any readgroup gave errors error_map["read_group"] = err_list
--- a/tools/mira4_0/tool_dependencies.xml Fri Oct 02 06:12:23 2015 -0400 +++ b/tools/mira4_0/tool_dependencies.xml Thu Aug 10 11:09:10 2017 -0400 @@ -1,9 +1,9 @@ <?xml version="1.0"?> <tool_dependency> <package name="samtools" version="0.1.19"> - <repository changeset_revision="96aab723499f" name="package_samtools_0_1_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="c9bd782f5342" name="package_samtools_0_1_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> <package name="MIRA" version="4.0.2"> - <repository changeset_revision="8564aa1dbbf5" name="package_mira_4_0_2" owner="peterjc" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="49974733ab62" name="package_mira_4_0_2" owner="peterjc" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency>