0
|
1 #!/usr/bin/python
|
|
2 # -*- coding: utf-8 -*-
|
|
3
|
|
4 import sys
|
|
5 import re
|
|
6 import os
|
|
7 import tempfile
|
|
8 import shutil
|
|
9 import subprocess
|
|
10 import glob
|
|
11 import argparse
|
|
12 from os.path import basename
|
|
13 import zipfile
|
|
14 import tarfile
|
|
15 import gzip
|
|
16 from galaxy.datatypes.checkers import *
|
|
17 from stacks import *
|
|
18
|
|
19
|
|
20 def __main__():
|
|
21
|
|
22 # arguments recuperation
|
|
23
|
|
24 parser = argparse.ArgumentParser()
|
|
25 parser.add_argument('-P')
|
|
26 parser.add_argument('-b')
|
|
27 parser.add_argument('-c')
|
|
28 parser.add_argument('-t')
|
|
29 parser.add_argument('-o')
|
|
30 parser.add_argument('-e')
|
|
31 parser.add_argument('--active_advanced')
|
|
32 parser.add_argument('-r')
|
|
33 parser.add_argument('-m')
|
|
34 parser.add_argument('-B')
|
|
35 parser.add_argument('-W')
|
|
36 parser.add_argument('--active_autocorrect')
|
|
37 parser.add_argument('--min_hom_seqs')
|
|
38 parser.add_argument('--min_het_seqs')
|
|
39 parser.add_argument('--max_het_seqs')
|
|
40
|
|
41 # multifile management
|
|
42
|
|
43 parser.add_argument('--logfile')
|
|
44 parser.add_argument('--compress_output')
|
|
45
|
|
46 # additionnal outputs
|
|
47
|
|
48 parser.add_argument('--total_output')
|
|
49
|
|
50 options = parser.parse_args()
|
|
51
|
|
52 # create the working dir
|
|
53
|
|
54 os.mkdir('job_outputs')
|
|
55 os.mkdir('galaxy_outputs')
|
|
56
|
|
57 os.chdir('job_outputs')
|
|
58
|
|
59 # edit the command line
|
|
60
|
|
61 cmd_line = []
|
|
62 cmd_line.append("genotypes")
|
|
63
|
|
64 # STACKS_archive
|
|
65 # check if zipped files are into the tab
|
|
66
|
|
67 extract_compress_files(options.P, os.getcwd())
|
|
68
|
|
69 # create the genotypes command input line
|
|
70
|
|
71 cmd_line.extend(["-b", options.b, "-P", os.getcwd()])
|
|
72
|
|
73 # create the genotypes command line
|
|
74
|
|
75 if options.e:
|
|
76 cmd_line.extend(["-e", options.e])
|
|
77 if options.c == 'true':
|
|
78 cmd_line.append("-c")
|
|
79 if options.t:
|
|
80 cmd_line.extend(["-t", options.t])
|
|
81 if options.o:
|
|
82 cmd_line.extend(["-o", options.o])
|
|
83
|
|
84 # if advanced is activate
|
|
85 if options.active_advanced == "true":
|
|
86 cmd_line.extend(["-r", options.r])
|
|
87 cmd_line.extend(["-m", options.m])
|
|
88 if options.B:
|
|
89 cmd_line.extend(["-B", options.B])
|
|
90 if options.W:
|
|
91 cmd_line.extend(["-W", options.W])
|
|
92
|
|
93 # if autocorrect is activate
|
|
94 if options.active_autocorrect == "true":
|
|
95 cmd_line.extend(["--min_hom_seqs", options.min_hom_seqs])
|
|
96 cmd_line.extend(["--min_het_seqs", options.min_het_seqs])
|
|
97 cmd_line.extend(["--max_het_seqs", options.max_het_seqs])
|
|
98
|
|
99 # command with dependencies installed
|
|
100 print "[CMD]:"+' '.join(cmd_line)
|
|
101 subprocess.call(cmd_line)
|
|
102
|
|
103 # postprocesses
|
|
104 try:
|
|
105 shutil.copy('batch_1.haplotypes_1.tsv', options.logfile)
|
|
106 except:
|
|
107 sys.stderr.write('Error in genotypes execution; Please read the additional output (stdout)\n')
|
|
108 sys.exit(1)
|
|
109
|
|
110 # copy all files inside tmp_dir into workdir
|
|
111
|
|
112 list_files = glob.glob('*')
|
|
113
|
|
114 # if compress output is total
|
|
115
|
|
116 if options.compress_output == 'total':
|
|
117 mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w')
|
|
118
|
|
119 for i in list_files:
|
|
120 if re.search('^batch', os.path.basename(i)) \
|
|
121 and not re.search("\.tsv$", os.path.basename(i)) \
|
|
122 or re.search(".*_[0-9]*\.tsv$", os.path.basename(i)) \
|
|
123 or re.search('.*genotypes.*', os.path.basename(i)):
|
|
124 mytotalzipfile.write(i, os.path.basename(i))
|
|
125
|
|
126 # return the unique archive
|
|
127
|
|
128 shutil.move('total.zip.temp', options.total_output)
|
|
129
|
|
130 # if compress output is default
|
|
131 if options.compress_output == 'default':
|
|
132 for i in list_files:
|
|
133 if re.search('^batch', os.path.basename(i)) \
|
|
134 and not re.search("\.tsv$", os.path.basename(i)) \
|
|
135 or re.search(".*_[0-9]*\.tsv$", os.path.basename(i)) \
|
|
136 or re.search('.*genotypes.*', os.path.basename(i)):
|
|
137 shutil.move(i, '../galaxy_outputs')
|
|
138
|
|
139
|
|
140 if __name__ == '__main__':
|
|
141 __main__()
|
|
142
|
|
143
|