diff structure-923cc9e6aa30/Structure.py @ 0:2c0b270dae70 draft default tip

Uploaded
author ylebrascnrs
date Thu, 14 Sep 2017 08:33:05 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structure-923cc9e6aa30/Structure.py	Thu Sep 14 08:33:05 2017 -0400
@@ -0,0 +1,389 @@
+#!/usr/bin/env python
+"""
+Structure is a script for model-based clustering method for inferring population structure using genotype data
+
+Created by Yvan LE BRAS
+"""
+import optparse, os, sys, subprocess, tempfile, glob, shutil
+import zipfile, tarfile, gzip
+from os.path import basename
+
+def __main__():
+	
+
+	# arguments recuperation
+	parser = optparse.OptionParser()
+	parser.add_option("--input")
+	parser.add_option("--param")
+	parser.add_option("--extraparam")
+	# multifile management
+	parser.add_option("--K")
+	parser.add_option("--k2")
+	parser.add_option("--k3")
+	parser.add_option("--k4")
+	parser.add_option("--k5")
+	parser.add_option("--k6")
+	parser.add_option("--k7")
+	parser.add_option("--k8")
+	parser.add_option("--k9")
+	parser.add_option("--k10")
+	parser.add_option("--t")
+	parser.add_option("--N")
+	parser.add_option("--L")
+	parser.add_option("--D")
+	# output management
+        parser.add_option("--logfile")
+        parser.add_option("--id")
+        parser.add_option("--workdir")
+        parser.add_option("--compress_output")
+        # additionnal outputs
+        parser.add_option("--total_output")
+	(options, args) = parser.parse_args()
+
+        # create the working dir
+        tmp_dir = tempfile.mkdtemp(dir=options.workdir)
+        tmp_output_dir = tempfile.mkdtemp(dir=tmp_dir)
+
+	print tmp_dir 
+	
+	# create the structure command input line
+	cmd_files=" -m "+options.param+" -e "+options.extraparam+" -i "+options.input+" "
+		
+	# create the structure command line. The idea is to run one job by k value then take all results from tmp_out_putdir
+	cmd_options=""
+	cmd_options0=""
+	cmd_options1=""
+	cmd_options2=""
+	cmd_options3=""
+	cmd_options4=""
+	cmd_options5=""
+	cmd_options6=""
+	cmd_options7=""
+	cmd_options8=""
+	cmd_options9=""
+	cmd_options10=""
+	cmd_options11=""
+	cmd_options12=""
+	cmd_options13=""
+	cmd_options14=""
+	cmd_options15=""
+	cmd_options16=""
+	cmd_options17=""
+	cmd_options18=""
+	cmd_options19=""
+	cmd_options20=""
+	cmd_options21=""
+	cmd_options22=""
+	cmd_options23=""
+	cmd_options24=""
+	cmd_options25=""
+	cmd_options26=""
+	cmd_options27=""
+	cmd_options28=""
+	cmd_options29=""
+
+	if options.N:
+		cmd_options0+=" -N "+options.N
+	if options.L:
+		cmd_options0+=" -L "+options.L
+	if options.D:
+		cmd_options0+=" -D "+options.D
+	if options.K:
+		cmd_options+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_f"
+	if options.k2:
+		cmd_options1+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_f"
+	if options.k3:
+		cmd_options2+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_f"
+	if options.k4:
+		cmd_options3+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_f"
+	if options.k5:
+		cmd_options4+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_f"
+	if options.k6:
+		cmd_options5+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_f"
+	if options.k7:
+		cmd_options6+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_f"
+	if options.k8:
+		cmd_options7+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_f"
+	if options.k9:
+		cmd_options8+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_f"
+	if options.k10:
+		cmd_options9+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_f"
+	if options.K and options.t and options.t == 'true':
+		cmd_options10+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run2_f"
+	if options.k2 and options.t and options.t == 'true':
+		cmd_options11+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run2_f"
+	if options.k3 and options.t and options.t == 'true':
+		cmd_options12+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run2_f"
+	if options.k4 and options.t and options.t == 'true':
+		cmd_options13+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run2_f"
+	if options.k5 and options.t and options.t == 'true':
+		cmd_options14+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run2_f"
+	if options.k6 and options.t and options.t == 'true':
+		cmd_options15+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run2_f"
+	if options.k7 and options.t and options.t == 'true':
+		cmd_options16+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run2_f"
+	if options.k8 and options.t and options.t == 'true':
+		cmd_options17+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run2_f"
+	if options.k9 and options.t and options.t == 'true':
+		cmd_options18+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run2_f"
+	if options.k10 and options.t and options.t == 'true':
+		cmd_options19+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run2_f"
+	if options.K and options.t and options.t == 'true':
+		cmd_options20+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run3_f"
+	if options.k2 and options.t and options.t == 'true':
+		cmd_options21+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run3_f"
+	if options.k3 and options.t and options.t == 'true':
+		cmd_options22+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run3_f"
+	if options.k4 and options.t and options.t == 'true':
+		cmd_options23+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run3_f"
+	if options.k5 and options.t and options.t == 'true':
+		cmd_options24+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run3_f"
+	if options.k6 and options.t and options.t == 'true':
+		cmd_options25+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run3_f"
+	if options.k7 and options.t and options.t == 'true':
+		cmd_options26+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run3_f"
+	if options.k8 and options.t and options.t == 'true':
+		cmd_options27+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run3_f"
+	if options.k9 and options.t and options.t == 'true':
+		cmd_options28+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run3_f"
+	if options.k10 and options.t and options.t == 'true':
+		cmd_options29+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run3_f"
+
+
+	if options.t and options.t == 'true' and not options.K:	
+		cmd='structure'+cmd_files+" "+cmd_options0+" 2>&1"
+
+      	 # execute command line  
+		proc = subprocess.Popen( args=cmd, shell=True )
+		returncode = proc.wait()
+
+		print "\n[INFO] : "+cmd
+
+	if options.t and options.t == 'true' and options.K:	
+		cmd1='structure'+cmd_files+" "+cmd_options0+" "+cmd_options+" 2>&1"
+
+      	 # execute command line  
+		proc = subprocess.Popen( args=cmd1, shell=True )
+		returncode = proc.wait()
+
+		print "\n[INFO] : "+cmd1
+
+	#os.system("mv outfile_f outfilefirstk_f")
+
+	if options.k2:
+		cmd2='structure'+cmd_files+" "+cmd_options0+" "+cmd_options1+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd2, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfilesecondk_f")
+
+	if options.k3:
+		cmd3='structure'+cmd_files+" "+cmd_options0+" "+cmd_options2+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd3, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfilethirdk_f")
+
+	if options.k4:
+		cmd4='structure'+cmd_files+" "+cmd_options0+" "+cmd_options3+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd4, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfilefourthk_f")
+
+	if options.k5:
+		cmd5='structure'+cmd_files+" "+cmd_options0+" "+cmd_options4+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd5, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfilefifthk_f")
+
+	if options.k6:
+		cmd6='structure'+cmd_files+" "+cmd_options0+" "+cmd_options5+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd6, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfilesixthk_f")
+
+	if options.k7:
+		cmd7='structure'+cmd_files+" "+cmd_options0+" "+cmd_options6+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd7, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfileseventhk_f")
+
+	if options.k8:
+		cmd8='structure'+cmd_files+" "+cmd_options0+" "+cmd_options7+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd8, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfileeigthk_f")
+
+	if options.k9:
+		cmd9='structure'+cmd_files+" "+cmd_options0+" "+cmd_options8+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd9, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfileninthk_f")
+
+	if options.k10:
+		cmd10='structure'+cmd_files+" "+cmd_options0+" "+cmd_options9+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd10, shell=True )
+		returncode = proc.wait()
+
+	if options.K and options.t and options.t == 'true':
+		cmd11='structure'+cmd_files+" "+cmd_options0+" "+cmd_options10+" 2>&1"
+		cmd21='structure'+cmd_files+" "+cmd_options0+" "+cmd_options20+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd11, shell=True )
+		returncode = proc.wait()
+		proc = subprocess.Popen( args=cmd21, shell=True )
+		returncode = proc.wait()
+
+	if options.k2 and options.t and options.t == 'true':
+		cmd12='structure'+cmd_files+" "+cmd_options0+" "+cmd_options11+" 2>&1"
+		cmd22='structure'+cmd_files+" "+cmd_options0+" "+cmd_options21+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd12, shell=True )
+		returncode = proc.wait()
+		proc = subprocess.Popen( args=cmd22, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfilesecondk_f")
+
+	if options.k3 and options.t and options.t == 'true':
+		cmd13='structure'+cmd_files+" "+cmd_options0+" "+cmd_options12+" 2>&1"
+		cmd23='structure'+cmd_files+" "+cmd_options0+" "+cmd_options22+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd13, shell=True )
+		returncode = proc.wait()
+		proc = subprocess.Popen( args=cmd23, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfilethirdk_f")
+
+	if options.k4 and options.t and options.t == 'true':
+		cmd14='structure'+cmd_files+" "+cmd_options0+" "+cmd_options13+" 2>&1"
+		cmd24='structure'+cmd_files+" "+cmd_options0+" "+cmd_options23+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd14, shell=True )
+		returncode = proc.wait()
+		proc = subprocess.Popen( args=cmd24, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfilefourthk_f")
+
+	if options.k5 and options.t and options.t == 'true':
+		cmd15='structure'+cmd_files+" "+cmd_options0+" "+cmd_options14+" 2>&1"
+		cmd25='structure'+cmd_files+" "+cmd_options0+" "+cmd_options24+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd15, shell=True )
+		returncode = proc.wait()
+		proc = subprocess.Popen( args=cmd25, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfilefifthk_f")
+
+	if options.k6 and options.t and options.t == 'true':
+		cmd16='structure'+cmd_files+" "+cmd_options0+" "+cmd_options15+" 2>&1"
+		cmd26='structure'+cmd_files+" "+cmd_options0+" "+cmd_options25+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd16, shell=True )
+		returncode = proc.wait()
+		proc = subprocess.Popen( args=cmd26, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfilesixthk_f")
+
+	if options.k7 and options.t and options.t == 'true':
+		cmd17='structure'+cmd_files+" "+cmd_options0+" "+cmd_options16+" 2>&1"
+		cmd27='structure'+cmd_files+" "+cmd_options0+" "+cmd_options26+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd17, shell=True )
+		returncode = proc.wait()
+		proc = subprocess.Popen( args=cmd27, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfileseventhk_f")
+
+	if options.k8 and options.t and options.t == 'true':
+		cmd18='structure'+cmd_files+" "+cmd_options0+" "+cmd_options17+" 2>&1"
+		cmd28='structure'+cmd_files+" "+cmd_options0+" "+cmd_options27+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd18, shell=True )
+		returncode = proc.wait()
+		proc = subprocess.Popen( args=cmd28, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfileeigthk_f")
+
+	if options.k9 and options.t and options.t == 'true':
+		cmd19='structure'+cmd_files+" "+cmd_options0+" "+cmd_options18+" 2>&1"
+		cmd29='structure'+cmd_files+" "+cmd_options0+" "+cmd_options28+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd19, shell=True )
+		returncode = proc.wait()
+		proc = subprocess.Popen( args=cmd29, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfileninthk_f")
+
+	if options.k10 and options.t and options.t == 'true':
+		cmd20='structure'+cmd_files+" "+cmd_options0+" "+cmd_options19+" 2>&1"
+		cmd30='structure'+cmd_files+" "+cmd_options0+" "+cmd_options29+" 2>&1"
+
+		proc = subprocess.Popen( args=cmd20, shell=True )
+		returncode = proc.wait()
+		proc = subprocess.Popen( args=cmd30, shell=True )
+		returncode = proc.wait()
+
+		#os.system("mv outfile_f  outfiletenthk_f")
+
+	# postprocesses
+	#if os.path.exists(tmp_output_dir+'/outfile_f'):
+	#	os.system('mv '+tmp_output_dir+'/outfile_f '+options.logfile)
+	#else:
+	#	sys.stderr.write('Error in structure execution; Please read the additional output (stdout)\n')
+
+
+	# copy all files inside tmp_dir into workdir
+	list_files = glob.glob(tmp_output_dir+'/*')
+
+	
+	# if compress output is total
+	if options.compress_output == 'total':
+		mytotalzipfile=zipfile.ZipFile(tmp_output_dir+'/total.zip.temp', 'w')
+		os.chdir(tmp_output_dir)
+
+		for i in list_files:
+			mytotalzipfile.write(os.path.basename(i))
+			#command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt"
+			#proc = subprocess.Popen( args=command, shell=True )
+			#returncode = proc.wait()
+
+		# return the unique archive
+		os.system("mv "+tmp_output_dir+'/total.zip.temp'+" "+options.total_output)
+
+	# if compress output is default
+	if options.compress_output == 'default':
+
+		for i in list_files:
+			command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt"
+			proc = subprocess.Popen( args=command, shell=True )
+			returncode = proc.wait()	
+
+
+
+	#clean up temp files
+	shutil.rmtree( tmp_dir )	
+		
+if __name__=="__main__": __main__()