0
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 Structure is a script for model-based clustering method for inferring population structure using genotype data
|
|
4
|
|
5 Created by Yvan LE BRAS
|
|
6 """
|
|
7 import optparse, os, sys, subprocess, tempfile, glob, shutil
|
|
8 import zipfile, tarfile, gzip
|
|
9 from os.path import basename
|
|
10
|
|
11 def __main__():
|
|
12
|
|
13
|
|
14 # arguments recuperation
|
|
15 parser = optparse.OptionParser()
|
|
16 parser.add_option("--input")
|
|
17 parser.add_option("--param")
|
|
18 parser.add_option("--extraparam")
|
|
19 # multifile management
|
|
20 parser.add_option("--K")
|
|
21 parser.add_option("--k2")
|
|
22 parser.add_option("--k3")
|
|
23 parser.add_option("--k4")
|
|
24 parser.add_option("--k5")
|
|
25 parser.add_option("--k6")
|
|
26 parser.add_option("--k7")
|
|
27 parser.add_option("--k8")
|
|
28 parser.add_option("--k9")
|
|
29 parser.add_option("--k10")
|
|
30 parser.add_option("--t")
|
|
31 parser.add_option("--N")
|
|
32 parser.add_option("--L")
|
|
33 parser.add_option("--D")
|
|
34 # output management
|
|
35 parser.add_option("--logfile")
|
|
36 parser.add_option("--id")
|
|
37 parser.add_option("--workdir")
|
|
38 parser.add_option("--compress_output")
|
|
39 # additionnal outputs
|
|
40 parser.add_option("--total_output")
|
|
41 (options, args) = parser.parse_args()
|
|
42
|
|
43 # create the working dir
|
|
44 tmp_dir = tempfile.mkdtemp(dir=options.workdir)
|
|
45 tmp_output_dir = tempfile.mkdtemp(dir=tmp_dir)
|
|
46
|
|
47 print tmp_dir
|
|
48
|
|
49 # create the structure command input line
|
|
50 cmd_files=" -m "+options.param+" -e "+options.extraparam+" -i "+options.input+" "
|
|
51
|
|
52 # create the structure command line. The idea is to run one job by k value then take all results from tmp_out_putdir
|
|
53 cmd_options=""
|
|
54 cmd_options0=""
|
|
55 cmd_options1=""
|
|
56 cmd_options2=""
|
|
57 cmd_options3=""
|
|
58 cmd_options4=""
|
|
59 cmd_options5=""
|
|
60 cmd_options6=""
|
|
61 cmd_options7=""
|
|
62 cmd_options8=""
|
|
63 cmd_options9=""
|
|
64 cmd_options10=""
|
|
65 cmd_options11=""
|
|
66 cmd_options12=""
|
|
67 cmd_options13=""
|
|
68 cmd_options14=""
|
|
69 cmd_options15=""
|
|
70 cmd_options16=""
|
|
71 cmd_options17=""
|
|
72 cmd_options18=""
|
|
73 cmd_options19=""
|
|
74 cmd_options20=""
|
|
75 cmd_options21=""
|
|
76 cmd_options22=""
|
|
77 cmd_options23=""
|
|
78 cmd_options24=""
|
|
79 cmd_options25=""
|
|
80 cmd_options26=""
|
|
81 cmd_options27=""
|
|
82 cmd_options28=""
|
|
83 cmd_options29=""
|
|
84
|
|
85 if options.N:
|
|
86 cmd_options0+=" -N "+options.N
|
|
87 if options.L:
|
|
88 cmd_options0+=" -L "+options.L
|
|
89 if options.D:
|
|
90 cmd_options0+=" -D "+options.D
|
|
91 if options.K:
|
|
92 cmd_options+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_f"
|
|
93 if options.k2:
|
|
94 cmd_options1+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_f"
|
|
95 if options.k3:
|
|
96 cmd_options2+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_f"
|
|
97 if options.k4:
|
|
98 cmd_options3+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_f"
|
|
99 if options.k5:
|
|
100 cmd_options4+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_f"
|
|
101 if options.k6:
|
|
102 cmd_options5+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_f"
|
|
103 if options.k7:
|
|
104 cmd_options6+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_f"
|
|
105 if options.k8:
|
|
106 cmd_options7+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_f"
|
|
107 if options.k9:
|
|
108 cmd_options8+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_f"
|
|
109 if options.k10:
|
|
110 cmd_options9+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_f"
|
|
111 if options.K and options.t and options.t == 'true':
|
|
112 cmd_options10+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run2_f"
|
|
113 if options.k2 and options.t and options.t == 'true':
|
|
114 cmd_options11+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run2_f"
|
|
115 if options.k3 and options.t and options.t == 'true':
|
|
116 cmd_options12+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run2_f"
|
|
117 if options.k4 and options.t and options.t == 'true':
|
|
118 cmd_options13+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run2_f"
|
|
119 if options.k5 and options.t and options.t == 'true':
|
|
120 cmd_options14+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run2_f"
|
|
121 if options.k6 and options.t and options.t == 'true':
|
|
122 cmd_options15+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run2_f"
|
|
123 if options.k7 and options.t and options.t == 'true':
|
|
124 cmd_options16+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run2_f"
|
|
125 if options.k8 and options.t and options.t == 'true':
|
|
126 cmd_options17+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run2_f"
|
|
127 if options.k9 and options.t and options.t == 'true':
|
|
128 cmd_options18+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run2_f"
|
|
129 if options.k10 and options.t and options.t == 'true':
|
|
130 cmd_options19+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run2_f"
|
|
131 if options.K and options.t and options.t == 'true':
|
|
132 cmd_options20+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run3_f"
|
|
133 if options.k2 and options.t and options.t == 'true':
|
|
134 cmd_options21+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run3_f"
|
|
135 if options.k3 and options.t and options.t == 'true':
|
|
136 cmd_options22+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run3_f"
|
|
137 if options.k4 and options.t and options.t == 'true':
|
|
138 cmd_options23+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run3_f"
|
|
139 if options.k5 and options.t and options.t == 'true':
|
|
140 cmd_options24+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run3_f"
|
|
141 if options.k6 and options.t and options.t == 'true':
|
|
142 cmd_options25+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run3_f"
|
|
143 if options.k7 and options.t and options.t == 'true':
|
|
144 cmd_options26+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run3_f"
|
|
145 if options.k8 and options.t and options.t == 'true':
|
|
146 cmd_options27+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run3_f"
|
|
147 if options.k9 and options.t and options.t == 'true':
|
|
148 cmd_options28+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run3_f"
|
|
149 if options.k10 and options.t and options.t == 'true':
|
|
150 cmd_options29+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run3_f"
|
|
151
|
|
152
|
|
153 if options.t and options.t == 'true' and not options.K:
|
|
154 cmd='structure'+cmd_files+" "+cmd_options0+" 2>&1"
|
|
155
|
|
156 # execute command line
|
|
157 proc = subprocess.Popen( args=cmd, shell=True )
|
|
158 returncode = proc.wait()
|
|
159
|
|
160 print "\n[INFO] : "+cmd
|
|
161
|
|
162 if options.t and options.t == 'true' and options.K:
|
|
163 cmd1='structure'+cmd_files+" "+cmd_options0+" "+cmd_options+" 2>&1"
|
|
164
|
|
165 # execute command line
|
|
166 proc = subprocess.Popen( args=cmd1, shell=True )
|
|
167 returncode = proc.wait()
|
|
168
|
|
169 print "\n[INFO] : "+cmd1
|
|
170
|
|
171 #os.system("mv outfile_f outfilefirstk_f")
|
|
172
|
|
173 if options.k2:
|
|
174 cmd2='structure'+cmd_files+" "+cmd_options0+" "+cmd_options1+" 2>&1"
|
|
175
|
|
176 proc = subprocess.Popen( args=cmd2, shell=True )
|
|
177 returncode = proc.wait()
|
|
178
|
|
179 #os.system("mv outfile_f outfilesecondk_f")
|
|
180
|
|
181 if options.k3:
|
|
182 cmd3='structure'+cmd_files+" "+cmd_options0+" "+cmd_options2+" 2>&1"
|
|
183
|
|
184 proc = subprocess.Popen( args=cmd3, shell=True )
|
|
185 returncode = proc.wait()
|
|
186
|
|
187 #os.system("mv outfile_f outfilethirdk_f")
|
|
188
|
|
189 if options.k4:
|
|
190 cmd4='structure'+cmd_files+" "+cmd_options0+" "+cmd_options3+" 2>&1"
|
|
191
|
|
192 proc = subprocess.Popen( args=cmd4, shell=True )
|
|
193 returncode = proc.wait()
|
|
194
|
|
195 #os.system("mv outfile_f outfilefourthk_f")
|
|
196
|
|
197 if options.k5:
|
|
198 cmd5='structure'+cmd_files+" "+cmd_options0+" "+cmd_options4+" 2>&1"
|
|
199
|
|
200 proc = subprocess.Popen( args=cmd5, shell=True )
|
|
201 returncode = proc.wait()
|
|
202
|
|
203 #os.system("mv outfile_f outfilefifthk_f")
|
|
204
|
|
205 if options.k6:
|
|
206 cmd6='structure'+cmd_files+" "+cmd_options0+" "+cmd_options5+" 2>&1"
|
|
207
|
|
208 proc = subprocess.Popen( args=cmd6, shell=True )
|
|
209 returncode = proc.wait()
|
|
210
|
|
211 #os.system("mv outfile_f outfilesixthk_f")
|
|
212
|
|
213 if options.k7:
|
|
214 cmd7='structure'+cmd_files+" "+cmd_options0+" "+cmd_options6+" 2>&1"
|
|
215
|
|
216 proc = subprocess.Popen( args=cmd7, shell=True )
|
|
217 returncode = proc.wait()
|
|
218
|
|
219 #os.system("mv outfile_f outfileseventhk_f")
|
|
220
|
|
221 if options.k8:
|
|
222 cmd8='structure'+cmd_files+" "+cmd_options0+" "+cmd_options7+" 2>&1"
|
|
223
|
|
224 proc = subprocess.Popen( args=cmd8, shell=True )
|
|
225 returncode = proc.wait()
|
|
226
|
|
227 #os.system("mv outfile_f outfileeigthk_f")
|
|
228
|
|
229 if options.k9:
|
|
230 cmd9='structure'+cmd_files+" "+cmd_options0+" "+cmd_options8+" 2>&1"
|
|
231
|
|
232 proc = subprocess.Popen( args=cmd9, shell=True )
|
|
233 returncode = proc.wait()
|
|
234
|
|
235 #os.system("mv outfile_f outfileninthk_f")
|
|
236
|
|
237 if options.k10:
|
|
238 cmd10='structure'+cmd_files+" "+cmd_options0+" "+cmd_options9+" 2>&1"
|
|
239
|
|
240 proc = subprocess.Popen( args=cmd10, shell=True )
|
|
241 returncode = proc.wait()
|
|
242
|
|
243 if options.K and options.t and options.t == 'true':
|
|
244 cmd11='structure'+cmd_files+" "+cmd_options0+" "+cmd_options10+" 2>&1"
|
|
245 cmd21='structure'+cmd_files+" "+cmd_options0+" "+cmd_options20+" 2>&1"
|
|
246
|
|
247 proc = subprocess.Popen( args=cmd11, shell=True )
|
|
248 returncode = proc.wait()
|
|
249 proc = subprocess.Popen( args=cmd21, shell=True )
|
|
250 returncode = proc.wait()
|
|
251
|
|
252 if options.k2 and options.t and options.t == 'true':
|
|
253 cmd12='structure'+cmd_files+" "+cmd_options0+" "+cmd_options11+" 2>&1"
|
|
254 cmd22='structure'+cmd_files+" "+cmd_options0+" "+cmd_options21+" 2>&1"
|
|
255
|
|
256 proc = subprocess.Popen( args=cmd12, shell=True )
|
|
257 returncode = proc.wait()
|
|
258 proc = subprocess.Popen( args=cmd22, shell=True )
|
|
259 returncode = proc.wait()
|
|
260
|
|
261 #os.system("mv outfile_f outfilesecondk_f")
|
|
262
|
|
263 if options.k3 and options.t and options.t == 'true':
|
|
264 cmd13='structure'+cmd_files+" "+cmd_options0+" "+cmd_options12+" 2>&1"
|
|
265 cmd23='structure'+cmd_files+" "+cmd_options0+" "+cmd_options22+" 2>&1"
|
|
266
|
|
267 proc = subprocess.Popen( args=cmd13, shell=True )
|
|
268 returncode = proc.wait()
|
|
269 proc = subprocess.Popen( args=cmd23, shell=True )
|
|
270 returncode = proc.wait()
|
|
271
|
|
272 #os.system("mv outfile_f outfilethirdk_f")
|
|
273
|
|
274 if options.k4 and options.t and options.t == 'true':
|
|
275 cmd14='structure'+cmd_files+" "+cmd_options0+" "+cmd_options13+" 2>&1"
|
|
276 cmd24='structure'+cmd_files+" "+cmd_options0+" "+cmd_options23+" 2>&1"
|
|
277
|
|
278 proc = subprocess.Popen( args=cmd14, shell=True )
|
|
279 returncode = proc.wait()
|
|
280 proc = subprocess.Popen( args=cmd24, shell=True )
|
|
281 returncode = proc.wait()
|
|
282
|
|
283 #os.system("mv outfile_f outfilefourthk_f")
|
|
284
|
|
285 if options.k5 and options.t and options.t == 'true':
|
|
286 cmd15='structure'+cmd_files+" "+cmd_options0+" "+cmd_options14+" 2>&1"
|
|
287 cmd25='structure'+cmd_files+" "+cmd_options0+" "+cmd_options24+" 2>&1"
|
|
288
|
|
289 proc = subprocess.Popen( args=cmd15, shell=True )
|
|
290 returncode = proc.wait()
|
|
291 proc = subprocess.Popen( args=cmd25, shell=True )
|
|
292 returncode = proc.wait()
|
|
293
|
|
294 #os.system("mv outfile_f outfilefifthk_f")
|
|
295
|
|
296 if options.k6 and options.t and options.t == 'true':
|
|
297 cmd16='structure'+cmd_files+" "+cmd_options0+" "+cmd_options15+" 2>&1"
|
|
298 cmd26='structure'+cmd_files+" "+cmd_options0+" "+cmd_options25+" 2>&1"
|
|
299
|
|
300 proc = subprocess.Popen( args=cmd16, shell=True )
|
|
301 returncode = proc.wait()
|
|
302 proc = subprocess.Popen( args=cmd26, shell=True )
|
|
303 returncode = proc.wait()
|
|
304
|
|
305 #os.system("mv outfile_f outfilesixthk_f")
|
|
306
|
|
307 if options.k7 and options.t and options.t == 'true':
|
|
308 cmd17='structure'+cmd_files+" "+cmd_options0+" "+cmd_options16+" 2>&1"
|
|
309 cmd27='structure'+cmd_files+" "+cmd_options0+" "+cmd_options26+" 2>&1"
|
|
310
|
|
311 proc = subprocess.Popen( args=cmd17, shell=True )
|
|
312 returncode = proc.wait()
|
|
313 proc = subprocess.Popen( args=cmd27, shell=True )
|
|
314 returncode = proc.wait()
|
|
315
|
|
316 #os.system("mv outfile_f outfileseventhk_f")
|
|
317
|
|
318 if options.k8 and options.t and options.t == 'true':
|
|
319 cmd18='structure'+cmd_files+" "+cmd_options0+" "+cmd_options17+" 2>&1"
|
|
320 cmd28='structure'+cmd_files+" "+cmd_options0+" "+cmd_options27+" 2>&1"
|
|
321
|
|
322 proc = subprocess.Popen( args=cmd18, shell=True )
|
|
323 returncode = proc.wait()
|
|
324 proc = subprocess.Popen( args=cmd28, shell=True )
|
|
325 returncode = proc.wait()
|
|
326
|
|
327 #os.system("mv outfile_f outfileeigthk_f")
|
|
328
|
|
329 if options.k9 and options.t and options.t == 'true':
|
|
330 cmd19='structure'+cmd_files+" "+cmd_options0+" "+cmd_options18+" 2>&1"
|
|
331 cmd29='structure'+cmd_files+" "+cmd_options0+" "+cmd_options28+" 2>&1"
|
|
332
|
|
333 proc = subprocess.Popen( args=cmd19, shell=True )
|
|
334 returncode = proc.wait()
|
|
335 proc = subprocess.Popen( args=cmd29, shell=True )
|
|
336 returncode = proc.wait()
|
|
337
|
|
338 #os.system("mv outfile_f outfileninthk_f")
|
|
339
|
|
340 if options.k10 and options.t and options.t == 'true':
|
|
341 cmd20='structure'+cmd_files+" "+cmd_options0+" "+cmd_options19+" 2>&1"
|
|
342 cmd30='structure'+cmd_files+" "+cmd_options0+" "+cmd_options29+" 2>&1"
|
|
343
|
|
344 proc = subprocess.Popen( args=cmd20, shell=True )
|
|
345 returncode = proc.wait()
|
|
346 proc = subprocess.Popen( args=cmd30, shell=True )
|
|
347 returncode = proc.wait()
|
|
348
|
|
349 #os.system("mv outfile_f outfiletenthk_f")
|
|
350
|
|
351 # postprocesses
|
|
352 #if os.path.exists(tmp_output_dir+'/outfile_f'):
|
|
353 # os.system('mv '+tmp_output_dir+'/outfile_f '+options.logfile)
|
|
354 #else:
|
|
355 # sys.stderr.write('Error in structure execution; Please read the additional output (stdout)\n')
|
|
356
|
|
357
|
|
358 # copy all files inside tmp_dir into workdir
|
|
359 list_files = glob.glob(tmp_output_dir+'/*')
|
|
360
|
|
361
|
|
362 # if compress output is total
|
|
363 if options.compress_output == 'total':
|
|
364 mytotalzipfile=zipfile.ZipFile(tmp_output_dir+'/total.zip.temp', 'w')
|
|
365 os.chdir(tmp_output_dir)
|
|
366
|
|
367 for i in list_files:
|
|
368 mytotalzipfile.write(os.path.basename(i))
|
|
369 #command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt"
|
|
370 #proc = subprocess.Popen( args=command, shell=True )
|
|
371 #returncode = proc.wait()
|
|
372
|
|
373 # return the unique archive
|
|
374 os.system("mv "+tmp_output_dir+'/total.zip.temp'+" "+options.total_output)
|
|
375
|
|
376 # if compress output is default
|
|
377 if options.compress_output == 'default':
|
|
378
|
|
379 for i in list_files:
|
|
380 command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt"
|
|
381 proc = subprocess.Popen( args=command, shell=True )
|
|
382 returncode = proc.wait()
|
|
383
|
|
384
|
|
385
|
|
386 #clean up temp files
|
|
387 shutil.rmtree( tmp_dir )
|
|
388
|
|
389 if __name__=="__main__": __main__()
|