Mercurial > repos > ylebrascnrs > structure
comparison structure-923cc9e6aa30/Structure.py @ 0:2c0b270dae70 draft default tip
Uploaded
author | ylebrascnrs |
---|---|
date | Thu, 14 Sep 2017 08:33:05 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2c0b270dae70 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Structure is a script for model-based clustering method for inferring population structure using genotype data | |
4 | |
5 Created by Yvan LE BRAS | |
6 """ | |
7 import optparse, os, sys, subprocess, tempfile, glob, shutil | |
8 import zipfile, tarfile, gzip | |
9 from os.path import basename | |
10 | |
11 def __main__(): | |
12 | |
13 | |
14 # arguments recuperation | |
15 parser = optparse.OptionParser() | |
16 parser.add_option("--input") | |
17 parser.add_option("--param") | |
18 parser.add_option("--extraparam") | |
19 # multifile management | |
20 parser.add_option("--K") | |
21 parser.add_option("--k2") | |
22 parser.add_option("--k3") | |
23 parser.add_option("--k4") | |
24 parser.add_option("--k5") | |
25 parser.add_option("--k6") | |
26 parser.add_option("--k7") | |
27 parser.add_option("--k8") | |
28 parser.add_option("--k9") | |
29 parser.add_option("--k10") | |
30 parser.add_option("--t") | |
31 parser.add_option("--N") | |
32 parser.add_option("--L") | |
33 parser.add_option("--D") | |
34 # output management | |
35 parser.add_option("--logfile") | |
36 parser.add_option("--id") | |
37 parser.add_option("--workdir") | |
38 parser.add_option("--compress_output") | |
39 # additionnal outputs | |
40 parser.add_option("--total_output") | |
41 (options, args) = parser.parse_args() | |
42 | |
43 # create the working dir | |
44 tmp_dir = tempfile.mkdtemp(dir=options.workdir) | |
45 tmp_output_dir = tempfile.mkdtemp(dir=tmp_dir) | |
46 | |
47 print tmp_dir | |
48 | |
49 # create the structure command input line | |
50 cmd_files=" -m "+options.param+" -e "+options.extraparam+" -i "+options.input+" " | |
51 | |
52 # create the structure command line. The idea is to run one job by k value then take all results from tmp_out_putdir | |
53 cmd_options="" | |
54 cmd_options0="" | |
55 cmd_options1="" | |
56 cmd_options2="" | |
57 cmd_options3="" | |
58 cmd_options4="" | |
59 cmd_options5="" | |
60 cmd_options6="" | |
61 cmd_options7="" | |
62 cmd_options8="" | |
63 cmd_options9="" | |
64 cmd_options10="" | |
65 cmd_options11="" | |
66 cmd_options12="" | |
67 cmd_options13="" | |
68 cmd_options14="" | |
69 cmd_options15="" | |
70 cmd_options16="" | |
71 cmd_options17="" | |
72 cmd_options18="" | |
73 cmd_options19="" | |
74 cmd_options20="" | |
75 cmd_options21="" | |
76 cmd_options22="" | |
77 cmd_options23="" | |
78 cmd_options24="" | |
79 cmd_options25="" | |
80 cmd_options26="" | |
81 cmd_options27="" | |
82 cmd_options28="" | |
83 cmd_options29="" | |
84 | |
85 if options.N: | |
86 cmd_options0+=" -N "+options.N | |
87 if options.L: | |
88 cmd_options0+=" -L "+options.L | |
89 if options.D: | |
90 cmd_options0+=" -D "+options.D | |
91 if options.K: | |
92 cmd_options+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_f" | |
93 if options.k2: | |
94 cmd_options1+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_f" | |
95 if options.k3: | |
96 cmd_options2+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_f" | |
97 if options.k4: | |
98 cmd_options3+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_f" | |
99 if options.k5: | |
100 cmd_options4+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_f" | |
101 if options.k6: | |
102 cmd_options5+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_f" | |
103 if options.k7: | |
104 cmd_options6+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_f" | |
105 if options.k8: | |
106 cmd_options7+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_f" | |
107 if options.k9: | |
108 cmd_options8+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_f" | |
109 if options.k10: | |
110 cmd_options9+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_f" | |
111 if options.K and options.t and options.t == 'true': | |
112 cmd_options10+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run2_f" | |
113 if options.k2 and options.t and options.t == 'true': | |
114 cmd_options11+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run2_f" | |
115 if options.k3 and options.t and options.t == 'true': | |
116 cmd_options12+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run2_f" | |
117 if options.k4 and options.t and options.t == 'true': | |
118 cmd_options13+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run2_f" | |
119 if options.k5 and options.t and options.t == 'true': | |
120 cmd_options14+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run2_f" | |
121 if options.k6 and options.t and options.t == 'true': | |
122 cmd_options15+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run2_f" | |
123 if options.k7 and options.t and options.t == 'true': | |
124 cmd_options16+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run2_f" | |
125 if options.k8 and options.t and options.t == 'true': | |
126 cmd_options17+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run2_f" | |
127 if options.k9 and options.t and options.t == 'true': | |
128 cmd_options18+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run2_f" | |
129 if options.k10 and options.t and options.t == 'true': | |
130 cmd_options19+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run2_f" | |
131 if options.K and options.t and options.t == 'true': | |
132 cmd_options20+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run3_f" | |
133 if options.k2 and options.t and options.t == 'true': | |
134 cmd_options21+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run3_f" | |
135 if options.k3 and options.t and options.t == 'true': | |
136 cmd_options22+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run3_f" | |
137 if options.k4 and options.t and options.t == 'true': | |
138 cmd_options23+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run3_f" | |
139 if options.k5 and options.t and options.t == 'true': | |
140 cmd_options24+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run3_f" | |
141 if options.k6 and options.t and options.t == 'true': | |
142 cmd_options25+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run3_f" | |
143 if options.k7 and options.t and options.t == 'true': | |
144 cmd_options26+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run3_f" | |
145 if options.k8 and options.t and options.t == 'true': | |
146 cmd_options27+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run3_f" | |
147 if options.k9 and options.t and options.t == 'true': | |
148 cmd_options28+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run3_f" | |
149 if options.k10 and options.t and options.t == 'true': | |
150 cmd_options29+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run3_f" | |
151 | |
152 | |
153 if options.t and options.t == 'true' and not options.K: | |
154 cmd='structure'+cmd_files+" "+cmd_options0+" 2>&1" | |
155 | |
156 # execute command line | |
157 proc = subprocess.Popen( args=cmd, shell=True ) | |
158 returncode = proc.wait() | |
159 | |
160 print "\n[INFO] : "+cmd | |
161 | |
162 if options.t and options.t == 'true' and options.K: | |
163 cmd1='structure'+cmd_files+" "+cmd_options0+" "+cmd_options+" 2>&1" | |
164 | |
165 # execute command line | |
166 proc = subprocess.Popen( args=cmd1, shell=True ) | |
167 returncode = proc.wait() | |
168 | |
169 print "\n[INFO] : "+cmd1 | |
170 | |
171 #os.system("mv outfile_f outfilefirstk_f") | |
172 | |
173 if options.k2: | |
174 cmd2='structure'+cmd_files+" "+cmd_options0+" "+cmd_options1+" 2>&1" | |
175 | |
176 proc = subprocess.Popen( args=cmd2, shell=True ) | |
177 returncode = proc.wait() | |
178 | |
179 #os.system("mv outfile_f outfilesecondk_f") | |
180 | |
181 if options.k3: | |
182 cmd3='structure'+cmd_files+" "+cmd_options0+" "+cmd_options2+" 2>&1" | |
183 | |
184 proc = subprocess.Popen( args=cmd3, shell=True ) | |
185 returncode = proc.wait() | |
186 | |
187 #os.system("mv outfile_f outfilethirdk_f") | |
188 | |
189 if options.k4: | |
190 cmd4='structure'+cmd_files+" "+cmd_options0+" "+cmd_options3+" 2>&1" | |
191 | |
192 proc = subprocess.Popen( args=cmd4, shell=True ) | |
193 returncode = proc.wait() | |
194 | |
195 #os.system("mv outfile_f outfilefourthk_f") | |
196 | |
197 if options.k5: | |
198 cmd5='structure'+cmd_files+" "+cmd_options0+" "+cmd_options4+" 2>&1" | |
199 | |
200 proc = subprocess.Popen( args=cmd5, shell=True ) | |
201 returncode = proc.wait() | |
202 | |
203 #os.system("mv outfile_f outfilefifthk_f") | |
204 | |
205 if options.k6: | |
206 cmd6='structure'+cmd_files+" "+cmd_options0+" "+cmd_options5+" 2>&1" | |
207 | |
208 proc = subprocess.Popen( args=cmd6, shell=True ) | |
209 returncode = proc.wait() | |
210 | |
211 #os.system("mv outfile_f outfilesixthk_f") | |
212 | |
213 if options.k7: | |
214 cmd7='structure'+cmd_files+" "+cmd_options0+" "+cmd_options6+" 2>&1" | |
215 | |
216 proc = subprocess.Popen( args=cmd7, shell=True ) | |
217 returncode = proc.wait() | |
218 | |
219 #os.system("mv outfile_f outfileseventhk_f") | |
220 | |
221 if options.k8: | |
222 cmd8='structure'+cmd_files+" "+cmd_options0+" "+cmd_options7+" 2>&1" | |
223 | |
224 proc = subprocess.Popen( args=cmd8, shell=True ) | |
225 returncode = proc.wait() | |
226 | |
227 #os.system("mv outfile_f outfileeigthk_f") | |
228 | |
229 if options.k9: | |
230 cmd9='structure'+cmd_files+" "+cmd_options0+" "+cmd_options8+" 2>&1" | |
231 | |
232 proc = subprocess.Popen( args=cmd9, shell=True ) | |
233 returncode = proc.wait() | |
234 | |
235 #os.system("mv outfile_f outfileninthk_f") | |
236 | |
237 if options.k10: | |
238 cmd10='structure'+cmd_files+" "+cmd_options0+" "+cmd_options9+" 2>&1" | |
239 | |
240 proc = subprocess.Popen( args=cmd10, shell=True ) | |
241 returncode = proc.wait() | |
242 | |
243 if options.K and options.t and options.t == 'true': | |
244 cmd11='structure'+cmd_files+" "+cmd_options0+" "+cmd_options10+" 2>&1" | |
245 cmd21='structure'+cmd_files+" "+cmd_options0+" "+cmd_options20+" 2>&1" | |
246 | |
247 proc = subprocess.Popen( args=cmd11, shell=True ) | |
248 returncode = proc.wait() | |
249 proc = subprocess.Popen( args=cmd21, shell=True ) | |
250 returncode = proc.wait() | |
251 | |
252 if options.k2 and options.t and options.t == 'true': | |
253 cmd12='structure'+cmd_files+" "+cmd_options0+" "+cmd_options11+" 2>&1" | |
254 cmd22='structure'+cmd_files+" "+cmd_options0+" "+cmd_options21+" 2>&1" | |
255 | |
256 proc = subprocess.Popen( args=cmd12, shell=True ) | |
257 returncode = proc.wait() | |
258 proc = subprocess.Popen( args=cmd22, shell=True ) | |
259 returncode = proc.wait() | |
260 | |
261 #os.system("mv outfile_f outfilesecondk_f") | |
262 | |
263 if options.k3 and options.t and options.t == 'true': | |
264 cmd13='structure'+cmd_files+" "+cmd_options0+" "+cmd_options12+" 2>&1" | |
265 cmd23='structure'+cmd_files+" "+cmd_options0+" "+cmd_options22+" 2>&1" | |
266 | |
267 proc = subprocess.Popen( args=cmd13, shell=True ) | |
268 returncode = proc.wait() | |
269 proc = subprocess.Popen( args=cmd23, shell=True ) | |
270 returncode = proc.wait() | |
271 | |
272 #os.system("mv outfile_f outfilethirdk_f") | |
273 | |
274 if options.k4 and options.t and options.t == 'true': | |
275 cmd14='structure'+cmd_files+" "+cmd_options0+" "+cmd_options13+" 2>&1" | |
276 cmd24='structure'+cmd_files+" "+cmd_options0+" "+cmd_options23+" 2>&1" | |
277 | |
278 proc = subprocess.Popen( args=cmd14, shell=True ) | |
279 returncode = proc.wait() | |
280 proc = subprocess.Popen( args=cmd24, shell=True ) | |
281 returncode = proc.wait() | |
282 | |
283 #os.system("mv outfile_f outfilefourthk_f") | |
284 | |
285 if options.k5 and options.t and options.t == 'true': | |
286 cmd15='structure'+cmd_files+" "+cmd_options0+" "+cmd_options14+" 2>&1" | |
287 cmd25='structure'+cmd_files+" "+cmd_options0+" "+cmd_options24+" 2>&1" | |
288 | |
289 proc = subprocess.Popen( args=cmd15, shell=True ) | |
290 returncode = proc.wait() | |
291 proc = subprocess.Popen( args=cmd25, shell=True ) | |
292 returncode = proc.wait() | |
293 | |
294 #os.system("mv outfile_f outfilefifthk_f") | |
295 | |
296 if options.k6 and options.t and options.t == 'true': | |
297 cmd16='structure'+cmd_files+" "+cmd_options0+" "+cmd_options15+" 2>&1" | |
298 cmd26='structure'+cmd_files+" "+cmd_options0+" "+cmd_options25+" 2>&1" | |
299 | |
300 proc = subprocess.Popen( args=cmd16, shell=True ) | |
301 returncode = proc.wait() | |
302 proc = subprocess.Popen( args=cmd26, shell=True ) | |
303 returncode = proc.wait() | |
304 | |
305 #os.system("mv outfile_f outfilesixthk_f") | |
306 | |
307 if options.k7 and options.t and options.t == 'true': | |
308 cmd17='structure'+cmd_files+" "+cmd_options0+" "+cmd_options16+" 2>&1" | |
309 cmd27='structure'+cmd_files+" "+cmd_options0+" "+cmd_options26+" 2>&1" | |
310 | |
311 proc = subprocess.Popen( args=cmd17, shell=True ) | |
312 returncode = proc.wait() | |
313 proc = subprocess.Popen( args=cmd27, shell=True ) | |
314 returncode = proc.wait() | |
315 | |
316 #os.system("mv outfile_f outfileseventhk_f") | |
317 | |
318 if options.k8 and options.t and options.t == 'true': | |
319 cmd18='structure'+cmd_files+" "+cmd_options0+" "+cmd_options17+" 2>&1" | |
320 cmd28='structure'+cmd_files+" "+cmd_options0+" "+cmd_options27+" 2>&1" | |
321 | |
322 proc = subprocess.Popen( args=cmd18, shell=True ) | |
323 returncode = proc.wait() | |
324 proc = subprocess.Popen( args=cmd28, shell=True ) | |
325 returncode = proc.wait() | |
326 | |
327 #os.system("mv outfile_f outfileeigthk_f") | |
328 | |
329 if options.k9 and options.t and options.t == 'true': | |
330 cmd19='structure'+cmd_files+" "+cmd_options0+" "+cmd_options18+" 2>&1" | |
331 cmd29='structure'+cmd_files+" "+cmd_options0+" "+cmd_options28+" 2>&1" | |
332 | |
333 proc = subprocess.Popen( args=cmd19, shell=True ) | |
334 returncode = proc.wait() | |
335 proc = subprocess.Popen( args=cmd29, shell=True ) | |
336 returncode = proc.wait() | |
337 | |
338 #os.system("mv outfile_f outfileninthk_f") | |
339 | |
340 if options.k10 and options.t and options.t == 'true': | |
341 cmd20='structure'+cmd_files+" "+cmd_options0+" "+cmd_options19+" 2>&1" | |
342 cmd30='structure'+cmd_files+" "+cmd_options0+" "+cmd_options29+" 2>&1" | |
343 | |
344 proc = subprocess.Popen( args=cmd20, shell=True ) | |
345 returncode = proc.wait() | |
346 proc = subprocess.Popen( args=cmd30, shell=True ) | |
347 returncode = proc.wait() | |
348 | |
349 #os.system("mv outfile_f outfiletenthk_f") | |
350 | |
351 # postprocesses | |
352 #if os.path.exists(tmp_output_dir+'/outfile_f'): | |
353 # os.system('mv '+tmp_output_dir+'/outfile_f '+options.logfile) | |
354 #else: | |
355 # sys.stderr.write('Error in structure execution; Please read the additional output (stdout)\n') | |
356 | |
357 | |
358 # copy all files inside tmp_dir into workdir | |
359 list_files = glob.glob(tmp_output_dir+'/*') | |
360 | |
361 | |
362 # if compress output is total | |
363 if options.compress_output == 'total': | |
364 mytotalzipfile=zipfile.ZipFile(tmp_output_dir+'/total.zip.temp', 'w') | |
365 os.chdir(tmp_output_dir) | |
366 | |
367 for i in list_files: | |
368 mytotalzipfile.write(os.path.basename(i)) | |
369 #command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt" | |
370 #proc = subprocess.Popen( args=command, shell=True ) | |
371 #returncode = proc.wait() | |
372 | |
373 # return the unique archive | |
374 os.system("mv "+tmp_output_dir+'/total.zip.temp'+" "+options.total_output) | |
375 | |
376 # if compress output is default | |
377 if options.compress_output == 'default': | |
378 | |
379 for i in list_files: | |
380 command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt" | |
381 proc = subprocess.Popen( args=command, shell=True ) | |
382 returncode = proc.wait() | |
383 | |
384 | |
385 | |
386 #clean up temp files | |
387 shutil.rmtree( tmp_dir ) | |
388 | |
389 if __name__=="__main__": __main__() |