0
|
1 #!/usr/bin/python
|
|
2 # -*- coding: utf-8 -*-
|
|
3
|
|
4 import sys
|
|
5 import re
|
|
6 import os
|
|
7 import tempfile
|
|
8 import shutil
|
|
9 import subprocess
|
|
10 import glob
|
|
11 import argparse
|
|
12 from os.path import basename
|
|
13 import zipfile
|
|
14 import tarfile
|
|
15 import gzip
|
|
16 from galaxy.datatypes.checkers import *
|
|
17 from stacks import *
|
|
18
|
|
19
|
|
20 def __main__():
|
|
21
|
|
22 # arguments recuperation
|
|
23
|
|
24 parser = argparse.ArgumentParser()
|
|
25 parser.add_argument('-P')
|
|
26 parser.add_argument('-M')
|
|
27 parser.add_argument('-b')
|
|
28 parser.add_argument('--vcf', action='store_true')
|
|
29 parser.add_argument('--genepop', action='store_true')
|
|
30 parser.add_argument('--structure', action='store_true')
|
|
31 parser.add_argument('-e')
|
|
32 parser.add_argument('--genomic', action='store_true')
|
|
33 parser.add_argument('--fasta', action='store_true')
|
|
34 parser.add_argument('--phase', action='store_true')
|
|
35 parser.add_argument('--beagle', action='store_true')
|
|
36 parser.add_argument('--plink', action='store_true')
|
|
37 parser.add_argument('--phylip', action='store_true')
|
|
38 parser.add_argument('--phylip_var', action='store_true')
|
|
39 parser.add_argument('--write_single_snp', action='store_true')
|
|
40 parser.add_argument('-k', action='store_true')
|
|
41
|
|
42 # advanced options
|
|
43 parser.add_argument('--advanced_options_activate')
|
|
44 parser.add_argument('-B')
|
|
45 parser.add_argument('-W')
|
|
46 parser.add_argument('-r')
|
|
47 parser.add_argument('-p')
|
|
48 parser.add_argument('-m')
|
|
49 parser.add_argument('-a')
|
|
50 parser.add_argument('-f')
|
|
51 parser.add_argument('--p_value_cutoff')
|
|
52 parser.add_argument('--window_size')
|
|
53 parser.add_argument('--bootstrap')
|
|
54 parser.add_argument('--bootstrap_reps')
|
|
55
|
|
56 # multifile management
|
|
57 parser.add_argument('--logfile')
|
|
58
|
|
59 # outputs
|
|
60 parser.add_argument('--ss')
|
|
61 parser.add_argument('--s')
|
|
62
|
|
63 # optional outputs
|
|
64 parser.add_argument('--ov')
|
|
65 parser.add_argument('--op')
|
|
66 parser.add_argument('--ol')
|
|
67 parser.add_argument('--of')
|
|
68 parser.add_argument('--os')
|
|
69 parser.add_argument('--oe')
|
|
70 parser.add_argument('--om')
|
|
71 parser.add_argument('--og')
|
|
72
|
|
73 parser.add_argument('--unphased_output')
|
|
74 parser.add_argument('--markers_output')
|
|
75 parser.add_argument('--phase_output')
|
|
76 parser.add_argument('--fst_output')
|
|
77
|
|
78 options = parser.parse_args()
|
|
79
|
|
80 # create the working dir
|
|
81 os.mkdir('job_outputs')
|
|
82 os.mkdir('galaxy_outputs')
|
|
83
|
|
84 os.chdir('job_outputs')
|
|
85
|
|
86 # STACKS_archive
|
|
87 # check if zipped files are into the tab
|
|
88 extract_compress_files(options.P, os.getcwd())
|
|
89
|
|
90 # create the populations command input line
|
|
91 cmd_line=['populations']
|
|
92 cmd_line.extend(['-b', options.b, '-P', os.getcwd(), '-M', options.M])
|
|
93
|
|
94 if options.e:
|
|
95 cmd_line.extend(['-e', options.e, options.genomic])
|
|
96
|
|
97 # output options
|
|
98 if options.vcf:
|
|
99 cmd_line.append('--vcf')
|
|
100 if options.genepop:
|
|
101 cmd_line.append('--genepop')
|
|
102 if options.structure:
|
|
103 cmd_line.append('--structure')
|
|
104 if options.fasta:
|
|
105 cmd_line.append('--fasta')
|
|
106 if options.phase:
|
|
107 cmd_line.append('--phase')
|
|
108 if options.beagle:
|
|
109 cmd_line.append('--beagle')
|
|
110 if options.plink:
|
|
111 cmd_line.append('--plink')
|
|
112 if options.phylip:
|
|
113 cmd_line.append('--phylip')
|
|
114 if options.phylip_var and options.phylip:
|
|
115 cmd_line.append('--phylip_var')
|
|
116 if options.write_single_snp and (options.genepop or options.structure):
|
|
117 cmd_line.append('--write_single_snp')
|
|
118
|
|
119 if options.k:
|
|
120 cmd_line.extend(['-k', '--window_size', options.window_size])
|
|
121
|
|
122 if options.advanced_options_activate == 'true':
|
|
123 if options.B:
|
|
124 cmd_line.extend(['-B', options.B])
|
|
125 if options.W:
|
|
126 cmd_line.extend(['-W', options.W])
|
|
127
|
|
128 cmd_line.extend(['-r', options.r])
|
|
129 cmd_line.extend(['-p', options.p])
|
|
130 cmd_line.extend(['-m', options.m])
|
|
131 cmd_line.extend(['-a', options.a])
|
|
132
|
|
133 if options.f:
|
|
134 cmd_line.extend(['-f', options.f, '--p_value_cutoff', options.p_value_cutoff])
|
|
135 if options.bootstrap:
|
|
136 cmd_line.extend(['--bootstrap', options.bootstrap, '--bootstrap_reps', options.bootstrap_reps])
|
|
137
|
|
138 print "[CMD]:"+' '.join(cmd_line)
|
|
139 subprocess.call(cmd_line)
|
|
140
|
|
141 # postprocesses
|
|
142 try:
|
|
143 shutil.copy('batch_1.populations.log', options.logfile)
|
|
144 except:
|
|
145 sys.stderr.write('Error in population execution; Please read the additional output (stdout)\n')
|
|
146 sys.exit(1)
|
|
147
|
|
148 try:
|
|
149 shutil.move(glob.glob('*.sumstats_summary.tsv')[0], options.ss)
|
|
150 except:
|
|
151 print "No sumstats summary file"
|
|
152
|
|
153 try:
|
|
154 shutil.move(glob.glob('*.sumstats.tsv')[0], options.s)
|
|
155 except:
|
|
156 print "No sumstats file"
|
|
157
|
|
158 # move additionnal output files
|
|
159 if options.vcf:
|
|
160 try:
|
|
161 shutil.move(glob.glob('*.vcf')[0], options.ov)
|
|
162 except:
|
|
163 print "No VCF files"
|
|
164
|
|
165 if options.phylip:
|
|
166 try:
|
|
167 shutil.move(glob.glob('*.phylip')[0], options.op)
|
|
168 shutil.move(glob.glob('*.phylip.log')[0], options.ol)
|
|
169 except:
|
|
170 print "No phylip file"
|
|
171
|
|
172 if options.fasta:
|
|
173 try:
|
|
174 shutil.move(glob.glob('*.fa')[0], options.of)
|
|
175 except:
|
|
176 print "No fasta files"
|
|
177
|
|
178 if options.structure:
|
|
179 try:
|
|
180 shutil.move(glob.glob('*.structure.tsv')[0], options.os)
|
|
181 except:
|
|
182 print "No structure file"
|
|
183
|
|
184 if options.plink :
|
|
185 try:
|
|
186 shutil.move(glob.glob('*.ped')[0], options.oe)
|
|
187 shutil.move(glob.glob('*.map')[0], options.om)
|
|
188 except:
|
|
189 print "No ped and map file"
|
|
190
|
|
191 if options.genepop :
|
|
192 try:
|
|
193 shutil.move(glob.glob('*.genepop')[0], options.og)
|
|
194 except:
|
|
195 print "No genepop file"
|
|
196
|
|
197 # copy all files inside tmp_dir into workdir or into an archive....
|
|
198 list_files = glob.glob('*')
|
|
199
|
|
200 markerszip = zipfile.ZipFile('markers.zip.temp', 'w',
|
|
201 allowZip64=True)
|
|
202 phasezip = zipfile.ZipFile('phase.zip.temp', 'w', allowZip64=True)
|
|
203 unphasedzip = zipfile.ZipFile('unphased.zip.temp', 'w',
|
|
204 allowZip64=True)
|
|
205 fstzip = zipfile.ZipFile('fst.zip.temp', 'w', allowZip64=True)
|
|
206
|
|
207 for i in list_files:
|
|
208 # for each type of files
|
|
209 if re.search("\.markers$", i):
|
|
210 markerszip.write(i)
|
|
211 elif re.search("phase\.inp$", i):
|
|
212 phasezip.write(i)
|
|
213 elif re.search("unphased\.bgl$", i):
|
|
214 unphasedzip.write(i)
|
|
215 elif re.search('fst', i):
|
|
216 fstzip.write(i)
|
|
217 else:
|
|
218 # else return original files
|
|
219 if re.search('^batch', os.path.basename(i)) \
|
|
220 and not re.search("\.tsv$", os.path.basename(i)) \
|
|
221 or re.search(".*_[0-9]*\.tsv$", os.path.basename(i)):
|
|
222 shutil.move(i, '../galaxy_outputs')
|
|
223
|
|
224 # close zip files
|
|
225 markerszip.close()
|
|
226 phasezip.close()
|
|
227 unphasedzip.close()
|
|
228 fstzip.close()
|
|
229
|
|
230 # return archives
|
|
231 shutil.move('fst.zip.temp', options.fst_output)
|
|
232 if options.beagle:
|
|
233 shutil.move('markers.zip.temp', options.markers_output)
|
|
234 shutil.move('unphased.zip.temp', options.unphased_output)
|
|
235 if options.phase:
|
|
236 shutil.move('phase.zip.temp', options.phase_output)
|
|
237
|
|
238
|
|
239 if __name__ == '__main__':
|
|
240 __main__()
|
|
241
|
|
242
|
|
243
|