comparison structure-923cc9e6aa30/Structure.py @ 0:2c0b270dae70 draft default tip

Uploaded
author ylebrascnrs
date Thu, 14 Sep 2017 08:33:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2c0b270dae70
1 #!/usr/bin/env python
2 """
3 Structure is a script for model-based clustering method for inferring population structure using genotype data
4
5 Created by Yvan LE BRAS
6 """
7 import optparse, os, sys, subprocess, tempfile, glob, shutil
8 import zipfile, tarfile, gzip
9 from os.path import basename
10
11 def __main__():
12
13
14 # arguments recuperation
15 parser = optparse.OptionParser()
16 parser.add_option("--input")
17 parser.add_option("--param")
18 parser.add_option("--extraparam")
19 # multifile management
20 parser.add_option("--K")
21 parser.add_option("--k2")
22 parser.add_option("--k3")
23 parser.add_option("--k4")
24 parser.add_option("--k5")
25 parser.add_option("--k6")
26 parser.add_option("--k7")
27 parser.add_option("--k8")
28 parser.add_option("--k9")
29 parser.add_option("--k10")
30 parser.add_option("--t")
31 parser.add_option("--N")
32 parser.add_option("--L")
33 parser.add_option("--D")
34 # output management
35 parser.add_option("--logfile")
36 parser.add_option("--id")
37 parser.add_option("--workdir")
38 parser.add_option("--compress_output")
39 # additionnal outputs
40 parser.add_option("--total_output")
41 (options, args) = parser.parse_args()
42
43 # create the working dir
44 tmp_dir = tempfile.mkdtemp(dir=options.workdir)
45 tmp_output_dir = tempfile.mkdtemp(dir=tmp_dir)
46
47 print tmp_dir
48
49 # create the structure command input line
50 cmd_files=" -m "+options.param+" -e "+options.extraparam+" -i "+options.input+" "
51
52 # create the structure command line. The idea is to run one job by k value then take all results from tmp_out_putdir
53 cmd_options=""
54 cmd_options0=""
55 cmd_options1=""
56 cmd_options2=""
57 cmd_options3=""
58 cmd_options4=""
59 cmd_options5=""
60 cmd_options6=""
61 cmd_options7=""
62 cmd_options8=""
63 cmd_options9=""
64 cmd_options10=""
65 cmd_options11=""
66 cmd_options12=""
67 cmd_options13=""
68 cmd_options14=""
69 cmd_options15=""
70 cmd_options16=""
71 cmd_options17=""
72 cmd_options18=""
73 cmd_options19=""
74 cmd_options20=""
75 cmd_options21=""
76 cmd_options22=""
77 cmd_options23=""
78 cmd_options24=""
79 cmd_options25=""
80 cmd_options26=""
81 cmd_options27=""
82 cmd_options28=""
83 cmd_options29=""
84
85 if options.N:
86 cmd_options0+=" -N "+options.N
87 if options.L:
88 cmd_options0+=" -L "+options.L
89 if options.D:
90 cmd_options0+=" -D "+options.D
91 if options.K:
92 cmd_options+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_f"
93 if options.k2:
94 cmd_options1+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_f"
95 if options.k3:
96 cmd_options2+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_f"
97 if options.k4:
98 cmd_options3+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_f"
99 if options.k5:
100 cmd_options4+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_f"
101 if options.k6:
102 cmd_options5+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_f"
103 if options.k7:
104 cmd_options6+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_f"
105 if options.k8:
106 cmd_options7+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_f"
107 if options.k9:
108 cmd_options8+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_f"
109 if options.k10:
110 cmd_options9+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_f"
111 if options.K and options.t and options.t == 'true':
112 cmd_options10+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run2_f"
113 if options.k2 and options.t and options.t == 'true':
114 cmd_options11+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run2_f"
115 if options.k3 and options.t and options.t == 'true':
116 cmd_options12+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run2_f"
117 if options.k4 and options.t and options.t == 'true':
118 cmd_options13+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run2_f"
119 if options.k5 and options.t and options.t == 'true':
120 cmd_options14+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run2_f"
121 if options.k6 and options.t and options.t == 'true':
122 cmd_options15+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run2_f"
123 if options.k7 and options.t and options.t == 'true':
124 cmd_options16+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run2_f"
125 if options.k8 and options.t and options.t == 'true':
126 cmd_options17+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run2_f"
127 if options.k9 and options.t and options.t == 'true':
128 cmd_options18+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run2_f"
129 if options.k10 and options.t and options.t == 'true':
130 cmd_options19+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run2_f"
131 if options.K and options.t and options.t == 'true':
132 cmd_options20+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run3_f"
133 if options.k2 and options.t and options.t == 'true':
134 cmd_options21+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run3_f"
135 if options.k3 and options.t and options.t == 'true':
136 cmd_options22+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run3_f"
137 if options.k4 and options.t and options.t == 'true':
138 cmd_options23+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run3_f"
139 if options.k5 and options.t and options.t == 'true':
140 cmd_options24+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run3_f"
141 if options.k6 and options.t and options.t == 'true':
142 cmd_options25+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run3_f"
143 if options.k7 and options.t and options.t == 'true':
144 cmd_options26+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run3_f"
145 if options.k8 and options.t and options.t == 'true':
146 cmd_options27+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run3_f"
147 if options.k9 and options.t and options.t == 'true':
148 cmd_options28+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run3_f"
149 if options.k10 and options.t and options.t == 'true':
150 cmd_options29+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run3_f"
151
152
153 if options.t and options.t == 'true' and not options.K:
154 cmd='structure'+cmd_files+" "+cmd_options0+" 2>&1"
155
156 # execute command line
157 proc = subprocess.Popen( args=cmd, shell=True )
158 returncode = proc.wait()
159
160 print "\n[INFO] : "+cmd
161
162 if options.t and options.t == 'true' and options.K:
163 cmd1='structure'+cmd_files+" "+cmd_options0+" "+cmd_options+" 2>&1"
164
165 # execute command line
166 proc = subprocess.Popen( args=cmd1, shell=True )
167 returncode = proc.wait()
168
169 print "\n[INFO] : "+cmd1
170
171 #os.system("mv outfile_f outfilefirstk_f")
172
173 if options.k2:
174 cmd2='structure'+cmd_files+" "+cmd_options0+" "+cmd_options1+" 2>&1"
175
176 proc = subprocess.Popen( args=cmd2, shell=True )
177 returncode = proc.wait()
178
179 #os.system("mv outfile_f outfilesecondk_f")
180
181 if options.k3:
182 cmd3='structure'+cmd_files+" "+cmd_options0+" "+cmd_options2+" 2>&1"
183
184 proc = subprocess.Popen( args=cmd3, shell=True )
185 returncode = proc.wait()
186
187 #os.system("mv outfile_f outfilethirdk_f")
188
189 if options.k4:
190 cmd4='structure'+cmd_files+" "+cmd_options0+" "+cmd_options3+" 2>&1"
191
192 proc = subprocess.Popen( args=cmd4, shell=True )
193 returncode = proc.wait()
194
195 #os.system("mv outfile_f outfilefourthk_f")
196
197 if options.k5:
198 cmd5='structure'+cmd_files+" "+cmd_options0+" "+cmd_options4+" 2>&1"
199
200 proc = subprocess.Popen( args=cmd5, shell=True )
201 returncode = proc.wait()
202
203 #os.system("mv outfile_f outfilefifthk_f")
204
205 if options.k6:
206 cmd6='structure'+cmd_files+" "+cmd_options0+" "+cmd_options5+" 2>&1"
207
208 proc = subprocess.Popen( args=cmd6, shell=True )
209 returncode = proc.wait()
210
211 #os.system("mv outfile_f outfilesixthk_f")
212
213 if options.k7:
214 cmd7='structure'+cmd_files+" "+cmd_options0+" "+cmd_options6+" 2>&1"
215
216 proc = subprocess.Popen( args=cmd7, shell=True )
217 returncode = proc.wait()
218
219 #os.system("mv outfile_f outfileseventhk_f")
220
221 if options.k8:
222 cmd8='structure'+cmd_files+" "+cmd_options0+" "+cmd_options7+" 2>&1"
223
224 proc = subprocess.Popen( args=cmd8, shell=True )
225 returncode = proc.wait()
226
227 #os.system("mv outfile_f outfileeigthk_f")
228
229 if options.k9:
230 cmd9='structure'+cmd_files+" "+cmd_options0+" "+cmd_options8+" 2>&1"
231
232 proc = subprocess.Popen( args=cmd9, shell=True )
233 returncode = proc.wait()
234
235 #os.system("mv outfile_f outfileninthk_f")
236
237 if options.k10:
238 cmd10='structure'+cmd_files+" "+cmd_options0+" "+cmd_options9+" 2>&1"
239
240 proc = subprocess.Popen( args=cmd10, shell=True )
241 returncode = proc.wait()
242
243 if options.K and options.t and options.t == 'true':
244 cmd11='structure'+cmd_files+" "+cmd_options0+" "+cmd_options10+" 2>&1"
245 cmd21='structure'+cmd_files+" "+cmd_options0+" "+cmd_options20+" 2>&1"
246
247 proc = subprocess.Popen( args=cmd11, shell=True )
248 returncode = proc.wait()
249 proc = subprocess.Popen( args=cmd21, shell=True )
250 returncode = proc.wait()
251
252 if options.k2 and options.t and options.t == 'true':
253 cmd12='structure'+cmd_files+" "+cmd_options0+" "+cmd_options11+" 2>&1"
254 cmd22='structure'+cmd_files+" "+cmd_options0+" "+cmd_options21+" 2>&1"
255
256 proc = subprocess.Popen( args=cmd12, shell=True )
257 returncode = proc.wait()
258 proc = subprocess.Popen( args=cmd22, shell=True )
259 returncode = proc.wait()
260
261 #os.system("mv outfile_f outfilesecondk_f")
262
263 if options.k3 and options.t and options.t == 'true':
264 cmd13='structure'+cmd_files+" "+cmd_options0+" "+cmd_options12+" 2>&1"
265 cmd23='structure'+cmd_files+" "+cmd_options0+" "+cmd_options22+" 2>&1"
266
267 proc = subprocess.Popen( args=cmd13, shell=True )
268 returncode = proc.wait()
269 proc = subprocess.Popen( args=cmd23, shell=True )
270 returncode = proc.wait()
271
272 #os.system("mv outfile_f outfilethirdk_f")
273
274 if options.k4 and options.t and options.t == 'true':
275 cmd14='structure'+cmd_files+" "+cmd_options0+" "+cmd_options13+" 2>&1"
276 cmd24='structure'+cmd_files+" "+cmd_options0+" "+cmd_options23+" 2>&1"
277
278 proc = subprocess.Popen( args=cmd14, shell=True )
279 returncode = proc.wait()
280 proc = subprocess.Popen( args=cmd24, shell=True )
281 returncode = proc.wait()
282
283 #os.system("mv outfile_f outfilefourthk_f")
284
285 if options.k5 and options.t and options.t == 'true':
286 cmd15='structure'+cmd_files+" "+cmd_options0+" "+cmd_options14+" 2>&1"
287 cmd25='structure'+cmd_files+" "+cmd_options0+" "+cmd_options24+" 2>&1"
288
289 proc = subprocess.Popen( args=cmd15, shell=True )
290 returncode = proc.wait()
291 proc = subprocess.Popen( args=cmd25, shell=True )
292 returncode = proc.wait()
293
294 #os.system("mv outfile_f outfilefifthk_f")
295
296 if options.k6 and options.t and options.t == 'true':
297 cmd16='structure'+cmd_files+" "+cmd_options0+" "+cmd_options15+" 2>&1"
298 cmd26='structure'+cmd_files+" "+cmd_options0+" "+cmd_options25+" 2>&1"
299
300 proc = subprocess.Popen( args=cmd16, shell=True )
301 returncode = proc.wait()
302 proc = subprocess.Popen( args=cmd26, shell=True )
303 returncode = proc.wait()
304
305 #os.system("mv outfile_f outfilesixthk_f")
306
307 if options.k7 and options.t and options.t == 'true':
308 cmd17='structure'+cmd_files+" "+cmd_options0+" "+cmd_options16+" 2>&1"
309 cmd27='structure'+cmd_files+" "+cmd_options0+" "+cmd_options26+" 2>&1"
310
311 proc = subprocess.Popen( args=cmd17, shell=True )
312 returncode = proc.wait()
313 proc = subprocess.Popen( args=cmd27, shell=True )
314 returncode = proc.wait()
315
316 #os.system("mv outfile_f outfileseventhk_f")
317
318 if options.k8 and options.t and options.t == 'true':
319 cmd18='structure'+cmd_files+" "+cmd_options0+" "+cmd_options17+" 2>&1"
320 cmd28='structure'+cmd_files+" "+cmd_options0+" "+cmd_options27+" 2>&1"
321
322 proc = subprocess.Popen( args=cmd18, shell=True )
323 returncode = proc.wait()
324 proc = subprocess.Popen( args=cmd28, shell=True )
325 returncode = proc.wait()
326
327 #os.system("mv outfile_f outfileeigthk_f")
328
329 if options.k9 and options.t and options.t == 'true':
330 cmd19='structure'+cmd_files+" "+cmd_options0+" "+cmd_options18+" 2>&1"
331 cmd29='structure'+cmd_files+" "+cmd_options0+" "+cmd_options28+" 2>&1"
332
333 proc = subprocess.Popen( args=cmd19, shell=True )
334 returncode = proc.wait()
335 proc = subprocess.Popen( args=cmd29, shell=True )
336 returncode = proc.wait()
337
338 #os.system("mv outfile_f outfileninthk_f")
339
340 if options.k10 and options.t and options.t == 'true':
341 cmd20='structure'+cmd_files+" "+cmd_options0+" "+cmd_options19+" 2>&1"
342 cmd30='structure'+cmd_files+" "+cmd_options0+" "+cmd_options29+" 2>&1"
343
344 proc = subprocess.Popen( args=cmd20, shell=True )
345 returncode = proc.wait()
346 proc = subprocess.Popen( args=cmd30, shell=True )
347 returncode = proc.wait()
348
349 #os.system("mv outfile_f outfiletenthk_f")
350
351 # postprocesses
352 #if os.path.exists(tmp_output_dir+'/outfile_f'):
353 # os.system('mv '+tmp_output_dir+'/outfile_f '+options.logfile)
354 #else:
355 # sys.stderr.write('Error in structure execution; Please read the additional output (stdout)\n')
356
357
358 # copy all files inside tmp_dir into workdir
359 list_files = glob.glob(tmp_output_dir+'/*')
360
361
362 # if compress output is total
363 if options.compress_output == 'total':
364 mytotalzipfile=zipfile.ZipFile(tmp_output_dir+'/total.zip.temp', 'w')
365 os.chdir(tmp_output_dir)
366
367 for i in list_files:
368 mytotalzipfile.write(os.path.basename(i))
369 #command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt"
370 #proc = subprocess.Popen( args=command, shell=True )
371 #returncode = proc.wait()
372
373 # return the unique archive
374 os.system("mv "+tmp_output_dir+'/total.zip.temp'+" "+options.total_output)
375
376 # if compress output is default
377 if options.compress_output == 'default':
378
379 for i in list_files:
380 command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt"
381 proc = subprocess.Popen( args=command, shell=True )
382 returncode = proc.wait()
383
384
385
386 #clean up temp files
387 shutil.rmtree( tmp_dir )
388
389 if __name__=="__main__": __main__()