comparison STACKS_genotypes.py @ 0:d6ba40f6c824

first commit
author cmonjeau
date Mon, 24 Aug 2015 09:29:12 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d6ba40f6c824
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3
4 import sys
5 import re
6 import os
7 import tempfile
8 import shutil
9 import subprocess
10 import glob
11 import argparse
12 from os.path import basename
13 import zipfile
14 import tarfile
15 import gzip
16 from galaxy.datatypes.checkers import *
17 from stacks import *
18
19
20 def __main__():
21
22 # arguments recuperation
23
24 parser = argparse.ArgumentParser()
25 parser.add_argument('-P')
26 parser.add_argument('-b')
27 parser.add_argument('-c')
28 parser.add_argument('-t')
29 parser.add_argument('-o')
30 parser.add_argument('-e')
31 parser.add_argument('--active_advanced')
32 parser.add_argument('-r')
33 parser.add_argument('-m')
34 parser.add_argument('-B')
35 parser.add_argument('-W')
36 parser.add_argument('--active_autocorrect')
37 parser.add_argument('--min_hom_seqs')
38 parser.add_argument('--min_het_seqs')
39 parser.add_argument('--max_het_seqs')
40
41 # multifile management
42
43 parser.add_argument('--logfile')
44 parser.add_argument('--compress_output')
45
46 # additionnal outputs
47
48 parser.add_argument('--total_output')
49
50 options = parser.parse_args()
51
52 # create the working dir
53
54 os.mkdir('job_outputs')
55 os.mkdir('galaxy_outputs')
56
57 os.chdir('job_outputs')
58
59 # edit the command line
60
61 cmd_line = []
62 cmd_line.append("genotypes")
63
64 # STACKS_archive
65 # check if zipped files are into the tab
66
67 extract_compress_files(options.P, os.getcwd())
68
69 # create the genotypes command input line
70
71 cmd_line.extend(["-b", options.b, "-P", os.getcwd()])
72
73 # create the genotypes command line
74
75 if options.e:
76 cmd_line.extend(["-e", options.e])
77 if options.c == 'true':
78 cmd_line.append("-c")
79 if options.t:
80 cmd_line.extend(["-t", options.t])
81 if options.o:
82 cmd_line.extend(["-o", options.o])
83
84 # if advanced is activate
85 if options.active_advanced == "true":
86 cmd_line.extend(["-r", options.r])
87 cmd_line.extend(["-m", options.m])
88 if options.B:
89 cmd_line.extend(["-B", options.B])
90 if options.W:
91 cmd_line.extend(["-W", options.W])
92
93 # if autocorrect is activate
94 if options.active_autocorrect == "true":
95 cmd_line.extend(["--min_hom_seqs", options.min_hom_seqs])
96 cmd_line.extend(["--min_het_seqs", options.min_het_seqs])
97 cmd_line.extend(["--max_het_seqs", options.max_het_seqs])
98
99 # command with dependencies installed
100 print "[CMD]:"+' '.join(cmd_line)
101 subprocess.call(cmd_line)
102
103 # postprocesses
104 try:
105 shutil.copy('batch_1.haplotypes_1.tsv', options.logfile)
106 except:
107 sys.stderr.write('Error in genotypes execution; Please read the additional output (stdout)\n')
108 sys.exit(1)
109
110 # copy all files inside tmp_dir into workdir
111
112 list_files = glob.glob('*')
113
114 # if compress output is total
115
116 if options.compress_output == 'total':
117 mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w')
118
119 for i in list_files:
120 if re.search('^batch', os.path.basename(i)) \
121 and not re.search("\.tsv$", os.path.basename(i)) \
122 or re.search(".*_[0-9]*\.tsv$", os.path.basename(i)) \
123 or re.search('.*genotypes.*', os.path.basename(i)):
124 mytotalzipfile.write(i, os.path.basename(i))
125
126 # return the unique archive
127
128 shutil.move('total.zip.temp', options.total_output)
129
130 # if compress output is default
131 if options.compress_output == 'default':
132 for i in list_files:
133 if re.search('^batch', os.path.basename(i)) \
134 and not re.search("\.tsv$", os.path.basename(i)) \
135 or re.search(".*_[0-9]*\.tsv$", os.path.basename(i)) \
136 or re.search('.*genotypes.*', os.path.basename(i)):
137 shutil.move(i, '../galaxy_outputs')
138
139
140 if __name__ == '__main__':
141 __main__()
142
143