Mercurial > repos > ylebrascnrs > structure
view structure-923cc9e6aa30/Structure.py @ 0:2c0b270dae70 draft default tip
Uploaded
author | ylebrascnrs |
---|---|
date | Thu, 14 Sep 2017 08:33:05 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python """ Structure is a script for model-based clustering method for inferring population structure using genotype data Created by Yvan LE BRAS """ import optparse, os, sys, subprocess, tempfile, glob, shutil import zipfile, tarfile, gzip from os.path import basename def __main__(): # arguments recuperation parser = optparse.OptionParser() parser.add_option("--input") parser.add_option("--param") parser.add_option("--extraparam") # multifile management parser.add_option("--K") parser.add_option("--k2") parser.add_option("--k3") parser.add_option("--k4") parser.add_option("--k5") parser.add_option("--k6") parser.add_option("--k7") parser.add_option("--k8") parser.add_option("--k9") parser.add_option("--k10") parser.add_option("--t") parser.add_option("--N") parser.add_option("--L") parser.add_option("--D") # output management parser.add_option("--logfile") parser.add_option("--id") parser.add_option("--workdir") parser.add_option("--compress_output") # additionnal outputs parser.add_option("--total_output") (options, args) = parser.parse_args() # create the working dir tmp_dir = tempfile.mkdtemp(dir=options.workdir) tmp_output_dir = tempfile.mkdtemp(dir=tmp_dir) print tmp_dir # create the structure command input line cmd_files=" -m "+options.param+" -e "+options.extraparam+" -i "+options.input+" " # create the structure command line. The idea is to run one job by k value then take all results from tmp_out_putdir cmd_options="" cmd_options0="" cmd_options1="" cmd_options2="" cmd_options3="" cmd_options4="" cmd_options5="" cmd_options6="" cmd_options7="" cmd_options8="" cmd_options9="" cmd_options10="" cmd_options11="" cmd_options12="" cmd_options13="" cmd_options14="" cmd_options15="" cmd_options16="" cmd_options17="" cmd_options18="" cmd_options19="" cmd_options20="" cmd_options21="" cmd_options22="" cmd_options23="" cmd_options24="" cmd_options25="" cmd_options26="" cmd_options27="" cmd_options28="" cmd_options29="" if options.N: cmd_options0+=" -N "+options.N if options.L: cmd_options0+=" -L "+options.L if options.D: cmd_options0+=" -D "+options.D if options.K: cmd_options+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_f" if options.k2: cmd_options1+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_f" if options.k3: cmd_options2+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_f" if options.k4: cmd_options3+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_f" if options.k5: cmd_options4+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_f" if options.k6: cmd_options5+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_f" if options.k7: cmd_options6+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_f" if options.k8: cmd_options7+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_f" if options.k9: cmd_options8+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_f" if options.k10: cmd_options9+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_f" if options.K and options.t and options.t == 'true': cmd_options10+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run2_f" if options.k2 and options.t and options.t == 'true': cmd_options11+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run2_f" if options.k3 and options.t and options.t == 'true': cmd_options12+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run2_f" if options.k4 and options.t and options.t == 'true': cmd_options13+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run2_f" if options.k5 and options.t and options.t == 'true': cmd_options14+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run2_f" if options.k6 and options.t and options.t == 'true': cmd_options15+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run2_f" if options.k7 and options.t and options.t == 'true': cmd_options16+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run2_f" if options.k8 and options.t and options.t == 'true': cmd_options17+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run2_f" if options.k9 and options.t and options.t == 'true': cmd_options18+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run2_f" if options.k10 and options.t and options.t == 'true': cmd_options19+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run2_f" if options.K and options.t and options.t == 'true': cmd_options20+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run3_f" if options.k2 and options.t and options.t == 'true': cmd_options21+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run3_f" if options.k3 and options.t and options.t == 'true': cmd_options22+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run3_f" if options.k4 and options.t and options.t == 'true': cmd_options23+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run3_f" if options.k5 and options.t and options.t == 'true': cmd_options24+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run3_f" if options.k6 and options.t and options.t == 'true': cmd_options25+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run3_f" if options.k7 and options.t and options.t == 'true': cmd_options26+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run3_f" if options.k8 and options.t and options.t == 'true': cmd_options27+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run3_f" if options.k9 and options.t and options.t == 'true': cmd_options28+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run3_f" if options.k10 and options.t and options.t == 'true': cmd_options29+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run3_f" if options.t and options.t == 'true' and not options.K: cmd='structure'+cmd_files+" "+cmd_options0+" 2>&1" # execute command line proc = subprocess.Popen( args=cmd, shell=True ) returncode = proc.wait() print "\n[INFO] : "+cmd if options.t and options.t == 'true' and options.K: cmd1='structure'+cmd_files+" "+cmd_options0+" "+cmd_options+" 2>&1" # execute command line proc = subprocess.Popen( args=cmd1, shell=True ) returncode = proc.wait() print "\n[INFO] : "+cmd1 #os.system("mv outfile_f outfilefirstk_f") if options.k2: cmd2='structure'+cmd_files+" "+cmd_options0+" "+cmd_options1+" 2>&1" proc = subprocess.Popen( args=cmd2, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfilesecondk_f") if options.k3: cmd3='structure'+cmd_files+" "+cmd_options0+" "+cmd_options2+" 2>&1" proc = subprocess.Popen( args=cmd3, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfilethirdk_f") if options.k4: cmd4='structure'+cmd_files+" "+cmd_options0+" "+cmd_options3+" 2>&1" proc = subprocess.Popen( args=cmd4, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfilefourthk_f") if options.k5: cmd5='structure'+cmd_files+" "+cmd_options0+" "+cmd_options4+" 2>&1" proc = subprocess.Popen( args=cmd5, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfilefifthk_f") if options.k6: cmd6='structure'+cmd_files+" "+cmd_options0+" "+cmd_options5+" 2>&1" proc = subprocess.Popen( args=cmd6, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfilesixthk_f") if options.k7: cmd7='structure'+cmd_files+" "+cmd_options0+" "+cmd_options6+" 2>&1" proc = subprocess.Popen( args=cmd7, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfileseventhk_f") if options.k8: cmd8='structure'+cmd_files+" "+cmd_options0+" "+cmd_options7+" 2>&1" proc = subprocess.Popen( args=cmd8, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfileeigthk_f") if options.k9: cmd9='structure'+cmd_files+" "+cmd_options0+" "+cmd_options8+" 2>&1" proc = subprocess.Popen( args=cmd9, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfileninthk_f") if options.k10: cmd10='structure'+cmd_files+" "+cmd_options0+" "+cmd_options9+" 2>&1" proc = subprocess.Popen( args=cmd10, shell=True ) returncode = proc.wait() if options.K and options.t and options.t == 'true': cmd11='structure'+cmd_files+" "+cmd_options0+" "+cmd_options10+" 2>&1" cmd21='structure'+cmd_files+" "+cmd_options0+" "+cmd_options20+" 2>&1" proc = subprocess.Popen( args=cmd11, shell=True ) returncode = proc.wait() proc = subprocess.Popen( args=cmd21, shell=True ) returncode = proc.wait() if options.k2 and options.t and options.t == 'true': cmd12='structure'+cmd_files+" "+cmd_options0+" "+cmd_options11+" 2>&1" cmd22='structure'+cmd_files+" "+cmd_options0+" "+cmd_options21+" 2>&1" proc = subprocess.Popen( args=cmd12, shell=True ) returncode = proc.wait() proc = subprocess.Popen( args=cmd22, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfilesecondk_f") if options.k3 and options.t and options.t == 'true': cmd13='structure'+cmd_files+" "+cmd_options0+" "+cmd_options12+" 2>&1" cmd23='structure'+cmd_files+" "+cmd_options0+" "+cmd_options22+" 2>&1" proc = subprocess.Popen( args=cmd13, shell=True ) returncode = proc.wait() proc = subprocess.Popen( args=cmd23, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfilethirdk_f") if options.k4 and options.t and options.t == 'true': cmd14='structure'+cmd_files+" "+cmd_options0+" "+cmd_options13+" 2>&1" cmd24='structure'+cmd_files+" "+cmd_options0+" "+cmd_options23+" 2>&1" proc = subprocess.Popen( args=cmd14, shell=True ) returncode = proc.wait() proc = subprocess.Popen( args=cmd24, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfilefourthk_f") if options.k5 and options.t and options.t == 'true': cmd15='structure'+cmd_files+" "+cmd_options0+" "+cmd_options14+" 2>&1" cmd25='structure'+cmd_files+" "+cmd_options0+" "+cmd_options24+" 2>&1" proc = subprocess.Popen( args=cmd15, shell=True ) returncode = proc.wait() proc = subprocess.Popen( args=cmd25, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfilefifthk_f") if options.k6 and options.t and options.t == 'true': cmd16='structure'+cmd_files+" "+cmd_options0+" "+cmd_options15+" 2>&1" cmd26='structure'+cmd_files+" "+cmd_options0+" "+cmd_options25+" 2>&1" proc = subprocess.Popen( args=cmd16, shell=True ) returncode = proc.wait() proc = subprocess.Popen( args=cmd26, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfilesixthk_f") if options.k7 and options.t and options.t == 'true': cmd17='structure'+cmd_files+" "+cmd_options0+" "+cmd_options16+" 2>&1" cmd27='structure'+cmd_files+" "+cmd_options0+" "+cmd_options26+" 2>&1" proc = subprocess.Popen( args=cmd17, shell=True ) returncode = proc.wait() proc = subprocess.Popen( args=cmd27, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfileseventhk_f") if options.k8 and options.t and options.t == 'true': cmd18='structure'+cmd_files+" "+cmd_options0+" "+cmd_options17+" 2>&1" cmd28='structure'+cmd_files+" "+cmd_options0+" "+cmd_options27+" 2>&1" proc = subprocess.Popen( args=cmd18, shell=True ) returncode = proc.wait() proc = subprocess.Popen( args=cmd28, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfileeigthk_f") if options.k9 and options.t and options.t == 'true': cmd19='structure'+cmd_files+" "+cmd_options0+" "+cmd_options18+" 2>&1" cmd29='structure'+cmd_files+" "+cmd_options0+" "+cmd_options28+" 2>&1" proc = subprocess.Popen( args=cmd19, shell=True ) returncode = proc.wait() proc = subprocess.Popen( args=cmd29, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfileninthk_f") if options.k10 and options.t and options.t == 'true': cmd20='structure'+cmd_files+" "+cmd_options0+" "+cmd_options19+" 2>&1" cmd30='structure'+cmd_files+" "+cmd_options0+" "+cmd_options29+" 2>&1" proc = subprocess.Popen( args=cmd20, shell=True ) returncode = proc.wait() proc = subprocess.Popen( args=cmd30, shell=True ) returncode = proc.wait() #os.system("mv outfile_f outfiletenthk_f") # postprocesses #if os.path.exists(tmp_output_dir+'/outfile_f'): # os.system('mv '+tmp_output_dir+'/outfile_f '+options.logfile) #else: # sys.stderr.write('Error in structure execution; Please read the additional output (stdout)\n') # copy all files inside tmp_dir into workdir list_files = glob.glob(tmp_output_dir+'/*') # if compress output is total if options.compress_output == 'total': mytotalzipfile=zipfile.ZipFile(tmp_output_dir+'/total.zip.temp', 'w') os.chdir(tmp_output_dir) for i in list_files: mytotalzipfile.write(os.path.basename(i)) #command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt" #proc = subprocess.Popen( args=command, shell=True ) #returncode = proc.wait() # return the unique archive os.system("mv "+tmp_output_dir+'/total.zip.temp'+" "+options.total_output) # if compress output is default if options.compress_output == 'default': for i in list_files: command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt" proc = subprocess.Popen( args=command, shell=True ) returncode = proc.wait() #clean up temp files shutil.rmtree( tmp_dir ) if __name__=="__main__": __main__()