diff STACKS_refmap.py @ 0:d6ba40f6c824

first commit
author cmonjeau
date Mon, 24 Aug 2015 09:29:12 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/STACKS_refmap.py	Mon Aug 24 09:29:12 2015 +0000
@@ -0,0 +1,258 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import sys
+import re
+import os
+import tempfile
+import shutil
+import subprocess
+import glob
+import optparse
+from os.path import basename
+import zipfile
+import tarfile
+import gzip
+from galaxy.datatypes.checkers import *
+from stacks import *
+
+
+def __main__():
+
+    # arguments recuperation
+
+    parser = optparse.OptionParser()
+    parser.add_option('-p')
+    parser.add_option('-r')
+    parser.add_option('-s')
+    parser.add_option('-O')
+    parser.add_option('-n')
+    parser.add_option('-m')
+    parser.add_option('--bound_low')
+    parser.add_option('--bound_high')
+    parser.add_option('--alpha')
+    parser.add_option('--logfile')
+    parser.add_option('--compress_output')
+    parser.add_option('--catalogsnps')
+    parser.add_option('--catalogalleles')
+    parser.add_option('--catalogtags')
+
+    # additionnal outputs
+
+    parser.add_option('--total_output')
+    parser.add_option('--tags_output')
+    parser.add_option('--snps_output')
+    parser.add_option('--alleles_output')
+    parser.add_option('--matches_output')
+    (options, args) = parser.parse_args()
+
+        # create working directories
+
+    os.mkdir('inputs')
+    os.mkdir('job_outputs')
+    os.mkdir('galaxy_outputs')
+
+    cmd_line = []
+    cmd_line.append('ref_map.pl')
+
+    # if genetic map
+
+    if options.p:
+
+        # parse config files
+
+        tab_parent_files = galaxy_config_to_tabfiles_for_STACKS(options.p)
+
+        # check if zipped files are into the tab and change tab content
+
+        extract_compress_files_from_tabfiles(tab_parent_files, 'inputs')
+
+        # check files extension (important to have .sam files)
+
+        check_sam_extension_and_add(tab_parent_files, 'inputs')
+
+        # create symlink into the temp dir
+
+        create_symlinks_from_tabfiles(tab_parent_files, 'inputs')
+
+        # create the command input line
+
+        for key in tab_parent_files:
+
+            # if is a file (skip repository created after a decompression)
+
+            if os.path.isfile('inputs/'+key):
+                cmd_line.extend(['-p', os.path.normpath('inputs/'+key)])
+
+    # if genetic map with progeny files
+
+    if options.r:
+
+        # parse config files
+
+        tab_progeny_files = galaxy_config_to_tabfiles_for_STACKS(options.r)
+
+        # check if zipped files are into the tab and change tab content
+
+        extract_compress_files_from_tabfiles(tab_progeny_files, 'inputs')
+
+        # check files extension (important to have .sam files)
+
+        check_sam_extension_and_add(tab_progeny_files, 'inputs')
+
+        # create symlink into the temp dir
+
+        create_symlinks_from_tabfiles(tab_progeny_files, 'inputs')
+
+        for key in tab_progeny_files:
+
+            # if is a file (skip repository created after a decompression)
+
+            if os.path.isfile('inputs/' + key):
+                cmd_line.extend(['-r', 'inputs/' + key])
+
+    # parse config files and create symlink if individual files are selected
+
+    if options.s:
+
+        # parse config files
+
+        tab_individual_files = galaxy_config_to_tabfiles_for_STACKS(options.s)
+
+        # check if zipped files are into the tab and change tab content
+
+        extract_compress_files_from_tabfiles(tab_individual_files, 'inputs')
+
+        # check files extension (important to have .sam files)
+
+        check_sam_extension_and_add(tab_individual_files, 'inputs')
+
+        # create symlink into the temp dir
+
+        create_symlinks_from_tabfiles(tab_individual_files, 'inputs')
+
+        # create the command input line
+
+        for key in tab_individual_files:
+            cmd_line.extend(['-s', 'inputs/' + key])
+
+    # create the options command line
+
+    cmd_line.extend([
+        '-S',
+        '-b', '1',
+        '-T', '4',
+        '-o', 'job_outputs',
+        '-n', options.n,
+        '-m', options.m,
+        ])
+
+    if options.O:
+        cmd_line.extend(['-O', options.O])
+
+    if options.bound_low:
+        cmd_line.extend(['--bound_low', options.bound_low])
+
+    if options.bound_high:
+        cmd_line.extend(['--bound_high', options.bound_high])
+
+    if options.alpha:
+        cmd_line.extend(['--alpha', options.alpha])
+
+    # execute job
+
+    print '[COMMAND LINE]' + ' '.join(cmd_line)
+
+    p = subprocess.Popen(cmd_line, stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE)
+
+    (stdoutput, stderror) = p.communicate()
+
+    print stdoutput
+    print stderror
+
+        # postprocesses
+
+    try:
+        shutil.move('job_outputs/ref_map.log', options.logfile)
+    except:
+        sys.stderr.write('Error in ref_map execution; Please read the additional output (stdout)\n')
+
+    # go inside the outputs dir
+
+    os.chdir('job_outputs')
+
+     # move files
+
+    for i in glob.glob('*'):
+        if re.search('catalog.snps.tsv$', i):
+            shutil.copy(i, options.catalogsnps)
+        if re.search('catalog.alleles.tsv$', i):
+            shutil.copy(i, options.catalogalleles)
+        if re.search('catalog.tags.tsv$', i):
+            shutil.copy(i, options.catalogtags)
+
+    # copy all files inside tmp_dir into workdir
+
+    list_files = glob.glob('*')
+
+    # if compress output is total
+
+    if options.compress_output == 'total':
+
+        mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w',
+                allowZip64=True)
+
+        for i in list_files:
+
+            mytotalzipfile.write(os.path.basename(i))
+
+        # return the unique archive
+
+        shutil.move('total.zip.temp', options.total_output)
+    elif options.compress_output == 'categories':
+
+    # if compress output is by categories
+
+        mytagszip = zipfile.ZipFile('tags.zip.temp', 'w', allowZip64=True)
+        mysnpszip = zipfile.ZipFile('snps.zip.temp', 'w', allowZip64=True)
+        myalleleszip = zipfile.ZipFile('alleles.zip.temp', 'w', allowZip64=True)
+        mymatcheszip = zipfile.ZipFile('matches.zip.temp', 'w', allowZip64=True)
+
+        for i in list_files:
+
+            # for each type of files
+
+            if re.search("tags\.tsv$", i) and not re.search('batch', i):
+                mytagszip.write(os.path.basename(i))
+                os.remove(i)
+            elif re.search("snps\.tsv$", i) and not re.search('batch', i):
+                mysnpszip.write(os.path.basename(i))
+                os.remove(i)
+            elif re.search("alleles\.tsv$", i) and not re.search('batch', i):
+                myalleleszip.write(os.path.basename(i))
+                os.remove(i)
+            elif re.search("matches\.tsv$", i) and not re.search('batch', i):
+                mymatcheszip.write(os.path.basename(i))
+                os.remove(i)
+            else:
+                shutil.move(os.path.basename(i), '../galaxy_outputs')
+
+        # return archives....
+
+        shutil.move('tags.zip.temp', options.tags_output)
+        shutil.move('snps.zip.temp', options.snps_output)
+        shutil.move('alleles.zip.temp', options.alleles_output)
+        shutil.move('matches.zip.temp', options.matches_output)
+    else:
+
+    # else no compression
+
+        for i in list_files:
+            shutil.move(os.path.basename(i), '../galaxy_outputs')
+
+
+if __name__ == '__main__':
+    __main__()
+
+