diff STACKS_genotypes.py @ 0:d6ba40f6c824

first commit
author cmonjeau
date Mon, 24 Aug 2015 09:29:12 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/STACKS_genotypes.py	Mon Aug 24 09:29:12 2015 +0000
@@ -0,0 +1,143 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import sys
+import re
+import os
+import tempfile
+import shutil
+import subprocess
+import glob
+import argparse
+from os.path import basename
+import zipfile
+import tarfile
+import gzip
+from galaxy.datatypes.checkers import *
+from stacks import *
+
+
+def __main__():
+
+    # arguments recuperation
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-P')
+    parser.add_argument('-b')
+    parser.add_argument('-c')
+    parser.add_argument('-t')
+    parser.add_argument('-o')
+    parser.add_argument('-e')
+    parser.add_argument('--active_advanced')
+    parser.add_argument('-r')
+    parser.add_argument('-m')
+    parser.add_argument('-B')
+    parser.add_argument('-W')
+    parser.add_argument('--active_autocorrect')
+    parser.add_argument('--min_hom_seqs')
+    parser.add_argument('--min_het_seqs')
+    parser.add_argument('--max_het_seqs')
+
+    # multifile management
+
+    parser.add_argument('--logfile')
+    parser.add_argument('--compress_output')
+
+    # additionnal outputs
+
+    parser.add_argument('--total_output')
+
+    options = parser.parse_args()
+
+        # create the working dir
+
+    os.mkdir('job_outputs')
+    os.mkdir('galaxy_outputs')
+
+    os.chdir('job_outputs')
+
+    # edit the command line
+
+    cmd_line = []
+    cmd_line.append("genotypes")
+
+    # STACKS_archive
+    # check if zipped files are into the tab
+
+    extract_compress_files(options.P, os.getcwd())
+
+    # create the genotypes command input line
+
+    cmd_line.extend(["-b", options.b, "-P", os.getcwd()])   
+
+    # create the genotypes command line
+
+    if options.e:
+       cmd_line.extend(["-e", options.e])
+    if options.c == 'true':
+       cmd_line.append("-c")
+    if options.t:
+        cmd_line.extend(["-t", options.t])
+    if options.o:
+        cmd_line.extend(["-o", options.o])
+
+    # if advanced is activate
+    if options.active_advanced == "true":
+        cmd_line.extend(["-r", options.r])
+        cmd_line.extend(["-m", options.m])
+        if options.B:
+            cmd_line.extend(["-B", options.B])
+        if options.W:
+            cmd_line.extend(["-W", options.W])
+
+    # if autocorrect is activate
+    if options.active_autocorrect == "true":
+        cmd_line.extend(["--min_hom_seqs", options.min_hom_seqs])
+        cmd_line.extend(["--min_het_seqs", options.min_het_seqs])
+        cmd_line.extend(["--max_het_seqs", options.max_het_seqs])
+
+    # command with dependencies installed
+    print "[CMD]:"+' '.join(cmd_line)
+    subprocess.call(cmd_line)
+
+    # postprocesses
+    try:
+        shutil.copy('batch_1.haplotypes_1.tsv', options.logfile)
+    except:
+        sys.stderr.write('Error in genotypes execution; Please read the additional output (stdout)\n')
+        sys.exit(1)
+
+    # copy all files inside tmp_dir into workdir
+
+    list_files = glob.glob('*')
+
+    # if compress output is total
+
+    if options.compress_output == 'total':
+        mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w')
+
+        for i in list_files:
+            if re.search('^batch', os.path.basename(i)) \
+                and not re.search("\.tsv$", os.path.basename(i)) \
+                or re.search(".*_[0-9]*\.tsv$", os.path.basename(i)) \
+                or re.search('.*genotypes.*', os.path.basename(i)):
+                mytotalzipfile.write(i, os.path.basename(i))
+
+        # return the unique archive
+
+        shutil.move('total.zip.temp', options.total_output)
+
+    # if compress output is default
+    if options.compress_output == 'default':
+        for i in list_files:
+            if re.search('^batch', os.path.basename(i)) \
+                and not re.search("\.tsv$", os.path.basename(i)) \
+                or re.search(".*_[0-9]*\.tsv$", os.path.basename(i)) \
+                or re.search('.*genotypes.*', os.path.basename(i)):
+                shutil.move(i, '../galaxy_outputs')
+
+
+if __name__ == '__main__':
+    __main__()
+
+