diff STACKS_population.py @ 0:d6ba40f6c824

first commit
author cmonjeau
date Mon, 24 Aug 2015 09:29:12 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/STACKS_population.py	Mon Aug 24 09:29:12 2015 +0000
@@ -0,0 +1,243 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import sys
+import re
+import os
+import tempfile
+import shutil
+import subprocess
+import glob
+import argparse
+from os.path import basename
+import zipfile
+import tarfile
+import gzip
+from galaxy.datatypes.checkers import *
+from stacks import *
+
+
+def __main__():
+
+    # arguments recuperation
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-P')
+    parser.add_argument('-M')
+    parser.add_argument('-b')
+    parser.add_argument('--vcf', action='store_true')
+    parser.add_argument('--genepop', action='store_true')
+    parser.add_argument('--structure', action='store_true')
+    parser.add_argument('-e')
+    parser.add_argument('--genomic', action='store_true')
+    parser.add_argument('--fasta', action='store_true')
+    parser.add_argument('--phase', action='store_true')
+    parser.add_argument('--beagle', action='store_true')
+    parser.add_argument('--plink', action='store_true')
+    parser.add_argument('--phylip', action='store_true')
+    parser.add_argument('--phylip_var', action='store_true')
+    parser.add_argument('--write_single_snp', action='store_true')
+    parser.add_argument('-k', action='store_true')
+
+    # advanced options
+    parser.add_argument('--advanced_options_activate')
+    parser.add_argument('-B')
+    parser.add_argument('-W')
+    parser.add_argument('-r')
+    parser.add_argument('-p')
+    parser.add_argument('-m')
+    parser.add_argument('-a')
+    parser.add_argument('-f')
+    parser.add_argument('--p_value_cutoff')
+    parser.add_argument('--window_size')
+    parser.add_argument('--bootstrap')
+    parser.add_argument('--bootstrap_reps')
+
+    # multifile management
+    parser.add_argument('--logfile')
+
+    # outputs
+    parser.add_argument('--ss')
+    parser.add_argument('--s')
+
+    # optional outputs
+    parser.add_argument('--ov')
+    parser.add_argument('--op')
+    parser.add_argument('--ol')
+    parser.add_argument('--of')
+    parser.add_argument('--os')
+    parser.add_argument('--oe')
+    parser.add_argument('--om')
+    parser.add_argument('--og') 
+
+    parser.add_argument('--unphased_output')
+    parser.add_argument('--markers_output')
+    parser.add_argument('--phase_output')
+    parser.add_argument('--fst_output')
+
+    options = parser.parse_args()
+
+    # create the working dir
+    os.mkdir('job_outputs')
+    os.mkdir('galaxy_outputs')
+
+    os.chdir('job_outputs')
+
+    # STACKS_archive
+    # check if zipped files are into the tab
+    extract_compress_files(options.P, os.getcwd())
+
+    # create the populations command input line
+    cmd_line=['populations']
+    cmd_line.extend(['-b', options.b, '-P', os.getcwd(), '-M', options.M])
+
+    if options.e:
+        cmd_line.extend(['-e', options.e, options.genomic])
+
+    # output options
+    if options.vcf:
+        cmd_line.append('--vcf')
+    if options.genepop:
+        cmd_line.append('--genepop')
+    if options.structure:
+        cmd_line.append('--structure')
+    if options.fasta:
+        cmd_line.append('--fasta')
+    if options.phase:
+        cmd_line.append('--phase')
+    if options.beagle:
+        cmd_line.append('--beagle')
+    if options.plink:
+        cmd_line.append('--plink')
+    if options.phylip:
+        cmd_line.append('--phylip')
+    if options.phylip_var and options.phylip:
+        cmd_line.append('--phylip_var')
+    if options.write_single_snp and (options.genepop or options.structure):
+        cmd_line.append('--write_single_snp')
+
+    if options.k:
+        cmd_line.extend(['-k', '--window_size', options.window_size])
+   
+    if options.advanced_options_activate == 'true':
+        if options.B:
+            cmd_line.extend(['-B', options.B])
+        if options.W:
+            cmd_line.extend(['-W', options.W])
+
+        cmd_line.extend(['-r', options.r])
+        cmd_line.extend(['-p', options.p])
+        cmd_line.extend(['-m', options.m])
+        cmd_line.extend(['-a', options.a])
+
+    if options.f:
+        cmd_line.extend(['-f', options.f, '--p_value_cutoff', options.p_value_cutoff])
+    if options.bootstrap:
+        cmd_line.extend(['--bootstrap', options.bootstrap, '--bootstrap_reps', options.bootstrap_reps])
+
+    print "[CMD]:"+' '.join(cmd_line)
+    subprocess.call(cmd_line)
+
+    # postprocesses
+    try:
+        shutil.copy('batch_1.populations.log', options.logfile)
+    except:
+        sys.stderr.write('Error in population execution; Please read the additional output (stdout)\n')
+        sys.exit(1)
+
+    try:
+        shutil.move(glob.glob('*.sumstats_summary.tsv')[0], options.ss)
+    except:
+        print "No sumstats summary file"
+
+    try:
+        shutil.move(glob.glob('*.sumstats.tsv')[0], options.s)
+    except:
+        print "No sumstats file"
+
+    # move additionnal output files
+    if options.vcf:
+        try:
+            shutil.move(glob.glob('*.vcf')[0], options.ov)
+        except:
+            print "No VCF files"
+
+    if options.phylip:
+        try:
+            shutil.move(glob.glob('*.phylip')[0], options.op)
+            shutil.move(glob.glob('*.phylip.log')[0], options.ol)
+        except:
+            print "No phylip file"
+
+    if options.fasta:
+        try:
+            shutil.move(glob.glob('*.fa')[0], options.of)
+        except:
+            print "No fasta files"
+
+    if options.structure:
+        try:
+            shutil.move(glob.glob('*.structure.tsv')[0], options.os)
+        except:
+            print "No structure file"
+
+    if options.plink :
+        try:
+            shutil.move(glob.glob('*.ped')[0], options.oe)
+            shutil.move(glob.glob('*.map')[0], options.om)
+        except:
+            print "No ped and map file"
+
+    if options.genepop :
+        try:
+            shutil.move(glob.glob('*.genepop')[0], options.og)
+        except:
+            print "No genepop file"
+
+    # copy all files inside tmp_dir into workdir or into an archive....
+    list_files = glob.glob('*')
+
+    markerszip = zipfile.ZipFile('markers.zip.temp', 'w',
+                                 allowZip64=True)
+    phasezip = zipfile.ZipFile('phase.zip.temp', 'w', allowZip64=True)
+    unphasedzip = zipfile.ZipFile('unphased.zip.temp', 'w',
+                                  allowZip64=True)
+    fstzip = zipfile.ZipFile('fst.zip.temp', 'w', allowZip64=True)
+
+    for i in list_files:
+        # for each type of files
+        if re.search("\.markers$", i):
+            markerszip.write(i)
+        elif re.search("phase\.inp$", i):
+            phasezip.write(i)
+        elif re.search("unphased\.bgl$", i):
+            unphasedzip.write(i)
+        elif re.search('fst', i):
+            fstzip.write(i)
+        else:
+        # else return original files
+            if re.search('^batch', os.path.basename(i)) \
+                and not re.search("\.tsv$", os.path.basename(i)) \
+                or re.search(".*_[0-9]*\.tsv$", os.path.basename(i)):
+                shutil.move(i, '../galaxy_outputs')
+
+    # close zip files
+    markerszip.close()
+    phasezip.close()
+    unphasedzip.close()
+    fstzip.close()
+
+    # return archives
+    shutil.move('fst.zip.temp', options.fst_output)
+    if options.beagle:
+        shutil.move('markers.zip.temp', options.markers_output)
+        shutil.move('unphased.zip.temp', options.unphased_output)
+    if options.phase:
+        shutil.move('phase.zip.temp', options.phase_output)
+
+
+if __name__ == '__main__':
+    __main__()
+
+
+