diff assembly_stats_txt.py @ 1:7556309ffbaf draft default tip

"planemo upload for repository https://github.com/phac-nml/galaxy_tools commit fb4c29f720748f46ff501140f2cd306bab6614f9"
author nml
date Fri, 29 May 2020 13:51:50 -0400
parents ad2b274663f8
children
line wrap: on
line diff
--- a/assembly_stats_txt.py	Tue Nov 07 12:28:31 2017 -0500
+++ b/assembly_stats_txt.py	Fri May 29 13:51:50 2020 -0400
@@ -3,106 +3,143 @@
 
 # Version 1.01 - bugs kindly corrected by Jan van Haarst
 # Modified by Matthew Gopez October 13th, 2017
+# Rewritten by Matthew Gopez May 25th, 2020
 
-import logging
+import argparse
 import os
+import shutil
 import subprocess
-import sys
+from pathlib import Path
 
 
-log = logging.getLogger(__name__)
-
-assert sys.version_info[:2] >= (2, 4)
-
-
-def stop_err(msg):
-    sys.stderr.write('%s\n' % msg)
-    sys.exit()
+PERL_OUT_FILES = ['stats.txt', 'sorted_contigs.fa', 'histogram_bins.dat.png',
+                  'summed_contig_lengths.dat.png', 'histogram_bins.dat',
+                  'summed_contig_lengths.dat']
 
 
-def __main__():
+def init_parser():
+    """Create argument parser and return parser obj."""
+    parser = argparse.ArgumentParser(description="usage: %prog [options]")
 
-    # Parse Command Line
+    parser.add_argument(
+        "-d",
+        "--working-dir",
+        dest="working_dir",
+        required=True)
 
-    working_dir = sys.argv[2]
-    type = sys.argv[3]
-    bucket = sys.argv[4]
-    input = sys.argv[5]
-    stats = sys.argv[6]
-    sortedcontigs = sys.argv[7]
-    histogrampng = sys.argv[8]
-    summedcontigspng = sys.argv[9]
-    histogramdata = sys.argv[10]
-    summedcontigdata = sys.argv[11]
-    try:  # for test - needs this done
-        os.makedirs(working_dir)
-    except Exception, e:
-        stop_err('Error running assembly_stats_txt.py ' + str(e))
+    parser.add_argument(
+        "-t",
+        "--type",
+        dest="file_type",
+        required=True)
 
-    cmdline = '%s/fasta_summary.pl -i %s -t %s %s -o %s > /dev/null' \
-        % (os.path.dirname(sys.argv[0]), input, type, bucket,
-           working_dir)
-    try:
-        proc = subprocess.Popen(args=cmdline, shell=True,
-                                stderr=subprocess.PIPE)
-        returncode = proc.wait()
+    parser.add_argument(
+        "-b",
+        "--bucket",
+        dest="bucket",
+        action='store_true')
 
-        # get stderr, allowing for case where it's very large
+    parser.add_argument(
+        "-i",
+        "--input",
+        dest="input",
+        required=True)
 
-        stderr = ''
-        buffsize = 1048576
-        try:
-            while True:
-                stderr += proc.stderr.read(buffsize)
-                if not stderr or len(stderr) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-        if returncode != 0:
-            raise Exception
-    except Exception, e:
-        stop_err('Error running assembly_stats.py ' + str(e))
+    parser.add_argument(
+        "-s",
+        "--stats",
+        dest="stats",
+        required=True)
 
-    stats_path = os.path.join(working_dir, 'stats.txt')
-    sorted_contigs_path = os.path.join(working_dir, 'sorted_contigs.fa')
-    histogram_png_path = os.path.join(working_dir,
-                                      'histogram_bins.dat.png')
-    summed_contigs_path = os.path.join(working_dir,
-                                       'summed_contig_lengths.dat.png')
-    histogram_data_path = os.path.join(working_dir, 'histogram_bins.dat')
-    summed_contigs_data_path = os.path.join(working_dir,
-                                            'summed_contig_lengths.dat')
+    parser.add_argument(
+        "-sc",
+        "--sorted-contigs",
+        dest="sorted_contigs",
+        required=True)
+
+    parser.add_argument(
+        "-hpng",
+        "--histogram-png",
+        dest="histogram_png",
+        required=True)
 
-    out = open(stats, 'w')
-    for line in open(stats_path):
-        out.write('%s' % line)
-    out.close()
-
-    out = open(sortedcontigs, 'w')
-    for line in open(sorted_contigs_path):
-        out.write('%s' % line)
-    out.close()
-
-    out = open(histogrampng, 'w')
-    for line in open(histogram_png_path):
-        out.write('%s' % line)
-    out.close()
+    parser.add_argument(
+        "-spng",
+        "--summed-contigs-png",
+        dest="summed_contigs_png",
+        required=True)
 
-    out = open(summedcontigspng, 'w')
-    for line in open(summed_contigs_path):
-        out.write('%s' % line)
-    out.close()
+    parser.add_argument(
+        "-hd",
+        "--histogram-data",
+        dest="histogram_data",
+        required=True)
 
-    out = open(histogramdata, 'w')
-    for line in open(histogram_data_path):
-        out.write('%s' % line)
-    out.close()
+    parser.add_argument(
+        "-scd",
+        "--summed-config-data",
+        dest="summed_contig_data",
+        required=True)
 
-    out = open(summedcontigdata, 'w')
-    for line in open(summed_contigs_data_path):
-        out.write('%s' % line)
-    out.close()
+    return parser
 
 
-if __name__ == '__main__':
-    __main__()
+def exec_fasta_summary(input_data, file_type, bucket, working_dir):
+    """Execute fasta_summary.pl script with user arguments."""
+    script_dir = Path(__file__).parent.absolute()
+
+    if bucket:
+        bucket_arg = '-b'
+    else:
+        bucket_arg = ''
+
+    cli_command = \
+        '{}/fasta_summary.pl -i {} -t {} {} -o {} > /dev/null'.format(
+            script_dir, input_data, file_type, bucket_arg, working_dir)
+
+    try:
+        subprocess.check_output(
+            cli_command,
+            stderr=subprocess.STDOUT,
+            shell=True,
+            universal_newlines=True)
+    except subprocess.CalledProcessError as exc:
+        raise RuntimeError('Error running assembly_stats.py!\n'
+                           'Return Code: {}\nOutput: {}'.format(
+                            exc.returncode, exc.output))
+
+
+def main():
+    """This is where the magic happens. (not really)
+
+    1. Gets command line arguments.
+    2. Grabs the user's desired parameters for running the perl script.
+    3. Ensures the directories are in place.
+    4. Executes fasta_summary.pl
+    5. Move the out files from the perl script to the desired
+    location the user specified.
+
+    """
+    parser = init_parser()
+    args = parser.parse_args()
+
+    working_dir = args.working_dir
+
+    out_file_names = [args.stats, args.sorted_contigs, args.histogram_png,
+                      args.summed_contigs_png, args.histogram_data,
+                      args.summed_contig_data]
+
+    # Ensure working directory is created.
+    Path(working_dir).mkdir(parents=True, exist_ok=True)
+
+    # Execute Perl Script
+    exec_fasta_summary(args.input, args.file_type, args.bucket, working_dir)
+
+    # Rename out files to desired file names
+    for perl_out_file, dest_file in zip(PERL_OUT_FILES, out_file_names):
+        shutil.move(os.path.join(working_dir, perl_out_file),
+                    dest_file)
+
+
+if __name__ == "__main__":
+    main()