Mercurial > repos > ylebrascnrs > structure
diff structure-923cc9e6aa30/Structure.py @ 0:2c0b270dae70 draft default tip
Uploaded
author | ylebrascnrs |
---|---|
date | Thu, 14 Sep 2017 08:33:05 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/structure-923cc9e6aa30/Structure.py Thu Sep 14 08:33:05 2017 -0400 @@ -0,0 +1,389 @@ +#!/usr/bin/env python +""" +Structure is a script for model-based clustering method for inferring population structure using genotype data + +Created by Yvan LE BRAS +""" +import optparse, os, sys, subprocess, tempfile, glob, shutil +import zipfile, tarfile, gzip +from os.path import basename + +def __main__(): + + + # arguments recuperation + parser = optparse.OptionParser() + parser.add_option("--input") + parser.add_option("--param") + parser.add_option("--extraparam") + # multifile management + parser.add_option("--K") + parser.add_option("--k2") + parser.add_option("--k3") + parser.add_option("--k4") + parser.add_option("--k5") + parser.add_option("--k6") + parser.add_option("--k7") + parser.add_option("--k8") + parser.add_option("--k9") + parser.add_option("--k10") + parser.add_option("--t") + parser.add_option("--N") + parser.add_option("--L") + parser.add_option("--D") + # output management + parser.add_option("--logfile") + parser.add_option("--id") + parser.add_option("--workdir") + parser.add_option("--compress_output") + # additionnal outputs + parser.add_option("--total_output") + (options, args) = parser.parse_args() + + # create the working dir + tmp_dir = tempfile.mkdtemp(dir=options.workdir) + tmp_output_dir = tempfile.mkdtemp(dir=tmp_dir) + + print tmp_dir + + # create the structure command input line + cmd_files=" -m "+options.param+" -e "+options.extraparam+" -i "+options.input+" " + + # create the structure command line. The idea is to run one job by k value then take all results from tmp_out_putdir + cmd_options="" + cmd_options0="" + cmd_options1="" + cmd_options2="" + cmd_options3="" + cmd_options4="" + cmd_options5="" + cmd_options6="" + cmd_options7="" + cmd_options8="" + cmd_options9="" + cmd_options10="" + cmd_options11="" + cmd_options12="" + cmd_options13="" + cmd_options14="" + cmd_options15="" + cmd_options16="" + cmd_options17="" + cmd_options18="" + cmd_options19="" + cmd_options20="" + cmd_options21="" + cmd_options22="" + cmd_options23="" + cmd_options24="" + cmd_options25="" + cmd_options26="" + cmd_options27="" + cmd_options28="" + cmd_options29="" + + if options.N: + cmd_options0+=" -N "+options.N + if options.L: + cmd_options0+=" -L "+options.L + if options.D: + cmd_options0+=" -D "+options.D + if options.K: + cmd_options+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_f" + if options.k2: + cmd_options1+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_f" + if options.k3: + cmd_options2+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_f" + if options.k4: + cmd_options3+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_f" + if options.k5: + cmd_options4+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_f" + if options.k6: + cmd_options5+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_f" + if options.k7: + cmd_options6+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_f" + if options.k8: + cmd_options7+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_f" + if options.k9: + cmd_options8+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_f" + if options.k10: + cmd_options9+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_f" + if options.K and options.t and options.t == 'true': + cmd_options10+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run2_f" + if options.k2 and options.t and options.t == 'true': + cmd_options11+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run2_f" + if options.k3 and options.t and options.t == 'true': + cmd_options12+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run2_f" + if options.k4 and options.t and options.t == 'true': + cmd_options13+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run2_f" + if options.k5 and options.t and options.t == 'true': + cmd_options14+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run2_f" + if options.k6 and options.t and options.t == 'true': + cmd_options15+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run2_f" + if options.k7 and options.t and options.t == 'true': + cmd_options16+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run2_f" + if options.k8 and options.t and options.t == 'true': + cmd_options17+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run2_f" + if options.k9 and options.t and options.t == 'true': + cmd_options18+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run2_f" + if options.k10 and options.t and options.t == 'true': + cmd_options19+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run2_f" + if options.K and options.t and options.t == 'true': + cmd_options20+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run3_f" + if options.k2 and options.t and options.t == 'true': + cmd_options21+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run3_f" + if options.k3 and options.t and options.t == 'true': + cmd_options22+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run3_f" + if options.k4 and options.t and options.t == 'true': + cmd_options23+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run3_f" + if options.k5 and options.t and options.t == 'true': + cmd_options24+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run3_f" + if options.k6 and options.t and options.t == 'true': + cmd_options25+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run3_f" + if options.k7 and options.t and options.t == 'true': + cmd_options26+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run3_f" + if options.k8 and options.t and options.t == 'true': + cmd_options27+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run3_f" + if options.k9 and options.t and options.t == 'true': + cmd_options28+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run3_f" + if options.k10 and options.t and options.t == 'true': + cmd_options29+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run3_f" + + + if options.t and options.t == 'true' and not options.K: + cmd='structure'+cmd_files+" "+cmd_options0+" 2>&1" + + # execute command line + proc = subprocess.Popen( args=cmd, shell=True ) + returncode = proc.wait() + + print "\n[INFO] : "+cmd + + if options.t and options.t == 'true' and options.K: + cmd1='structure'+cmd_files+" "+cmd_options0+" "+cmd_options+" 2>&1" + + # execute command line + proc = subprocess.Popen( args=cmd1, shell=True ) + returncode = proc.wait() + + print "\n[INFO] : "+cmd1 + + #os.system("mv outfile_f outfilefirstk_f") + + if options.k2: + cmd2='structure'+cmd_files+" "+cmd_options0+" "+cmd_options1+" 2>&1" + + proc = subprocess.Popen( args=cmd2, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilesecondk_f") + + if options.k3: + cmd3='structure'+cmd_files+" "+cmd_options0+" "+cmd_options2+" 2>&1" + + proc = subprocess.Popen( args=cmd3, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilethirdk_f") + + if options.k4: + cmd4='structure'+cmd_files+" "+cmd_options0+" "+cmd_options3+" 2>&1" + + proc = subprocess.Popen( args=cmd4, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilefourthk_f") + + if options.k5: + cmd5='structure'+cmd_files+" "+cmd_options0+" "+cmd_options4+" 2>&1" + + proc = subprocess.Popen( args=cmd5, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilefifthk_f") + + if options.k6: + cmd6='structure'+cmd_files+" "+cmd_options0+" "+cmd_options5+" 2>&1" + + proc = subprocess.Popen( args=cmd6, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilesixthk_f") + + if options.k7: + cmd7='structure'+cmd_files+" "+cmd_options0+" "+cmd_options6+" 2>&1" + + proc = subprocess.Popen( args=cmd7, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfileseventhk_f") + + if options.k8: + cmd8='structure'+cmd_files+" "+cmd_options0+" "+cmd_options7+" 2>&1" + + proc = subprocess.Popen( args=cmd8, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfileeigthk_f") + + if options.k9: + cmd9='structure'+cmd_files+" "+cmd_options0+" "+cmd_options8+" 2>&1" + + proc = subprocess.Popen( args=cmd9, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfileninthk_f") + + if options.k10: + cmd10='structure'+cmd_files+" "+cmd_options0+" "+cmd_options9+" 2>&1" + + proc = subprocess.Popen( args=cmd10, shell=True ) + returncode = proc.wait() + + if options.K and options.t and options.t == 'true': + cmd11='structure'+cmd_files+" "+cmd_options0+" "+cmd_options10+" 2>&1" + cmd21='structure'+cmd_files+" "+cmd_options0+" "+cmd_options20+" 2>&1" + + proc = subprocess.Popen( args=cmd11, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd21, shell=True ) + returncode = proc.wait() + + if options.k2 and options.t and options.t == 'true': + cmd12='structure'+cmd_files+" "+cmd_options0+" "+cmd_options11+" 2>&1" + cmd22='structure'+cmd_files+" "+cmd_options0+" "+cmd_options21+" 2>&1" + + proc = subprocess.Popen( args=cmd12, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd22, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilesecondk_f") + + if options.k3 and options.t and options.t == 'true': + cmd13='structure'+cmd_files+" "+cmd_options0+" "+cmd_options12+" 2>&1" + cmd23='structure'+cmd_files+" "+cmd_options0+" "+cmd_options22+" 2>&1" + + proc = subprocess.Popen( args=cmd13, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd23, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilethirdk_f") + + if options.k4 and options.t and options.t == 'true': + cmd14='structure'+cmd_files+" "+cmd_options0+" "+cmd_options13+" 2>&1" + cmd24='structure'+cmd_files+" "+cmd_options0+" "+cmd_options23+" 2>&1" + + proc = subprocess.Popen( args=cmd14, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd24, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilefourthk_f") + + if options.k5 and options.t and options.t == 'true': + cmd15='structure'+cmd_files+" "+cmd_options0+" "+cmd_options14+" 2>&1" + cmd25='structure'+cmd_files+" "+cmd_options0+" "+cmd_options24+" 2>&1" + + proc = subprocess.Popen( args=cmd15, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd25, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilefifthk_f") + + if options.k6 and options.t and options.t == 'true': + cmd16='structure'+cmd_files+" "+cmd_options0+" "+cmd_options15+" 2>&1" + cmd26='structure'+cmd_files+" "+cmd_options0+" "+cmd_options25+" 2>&1" + + proc = subprocess.Popen( args=cmd16, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd26, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilesixthk_f") + + if options.k7 and options.t and options.t == 'true': + cmd17='structure'+cmd_files+" "+cmd_options0+" "+cmd_options16+" 2>&1" + cmd27='structure'+cmd_files+" "+cmd_options0+" "+cmd_options26+" 2>&1" + + proc = subprocess.Popen( args=cmd17, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd27, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfileseventhk_f") + + if options.k8 and options.t and options.t == 'true': + cmd18='structure'+cmd_files+" "+cmd_options0+" "+cmd_options17+" 2>&1" + cmd28='structure'+cmd_files+" "+cmd_options0+" "+cmd_options27+" 2>&1" + + proc = subprocess.Popen( args=cmd18, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd28, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfileeigthk_f") + + if options.k9 and options.t and options.t == 'true': + cmd19='structure'+cmd_files+" "+cmd_options0+" "+cmd_options18+" 2>&1" + cmd29='structure'+cmd_files+" "+cmd_options0+" "+cmd_options28+" 2>&1" + + proc = subprocess.Popen( args=cmd19, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd29, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfileninthk_f") + + if options.k10 and options.t and options.t == 'true': + cmd20='structure'+cmd_files+" "+cmd_options0+" "+cmd_options19+" 2>&1" + cmd30='structure'+cmd_files+" "+cmd_options0+" "+cmd_options29+" 2>&1" + + proc = subprocess.Popen( args=cmd20, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd30, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfiletenthk_f") + + # postprocesses + #if os.path.exists(tmp_output_dir+'/outfile_f'): + # os.system('mv '+tmp_output_dir+'/outfile_f '+options.logfile) + #else: + # sys.stderr.write('Error in structure execution; Please read the additional output (stdout)\n') + + + # copy all files inside tmp_dir into workdir + list_files = glob.glob(tmp_output_dir+'/*') + + + # if compress output is total + if options.compress_output == 'total': + mytotalzipfile=zipfile.ZipFile(tmp_output_dir+'/total.zip.temp', 'w') + os.chdir(tmp_output_dir) + + for i in list_files: + mytotalzipfile.write(os.path.basename(i)) + #command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt" + #proc = subprocess.Popen( args=command, shell=True ) + #returncode = proc.wait() + + # return the unique archive + os.system("mv "+tmp_output_dir+'/total.zip.temp'+" "+options.total_output) + + # if compress output is default + if options.compress_output == 'default': + + for i in list_files: + command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt" + proc = subprocess.Popen( args=command, shell=True ) + returncode = proc.wait() + + + + #clean up temp files + shutil.rmtree( tmp_dir ) + +if __name__=="__main__": __main__()