view structure-923cc9e6aa30/Structure.py @ 0:2c0b270dae70 draft default tip

Uploaded
author ylebrascnrs
date Thu, 14 Sep 2017 08:33:05 -0400
parents
children
line wrap: on
line source

#!/usr/bin/env python
"""
Structure is a script for model-based clustering method for inferring population structure using genotype data

Created by Yvan LE BRAS
"""
import optparse, os, sys, subprocess, tempfile, glob, shutil
import zipfile, tarfile, gzip
from os.path import basename

def __main__():
	

	# arguments recuperation
	parser = optparse.OptionParser()
	parser.add_option("--input")
	parser.add_option("--param")
	parser.add_option("--extraparam")
	# multifile management
	parser.add_option("--K")
	parser.add_option("--k2")
	parser.add_option("--k3")
	parser.add_option("--k4")
	parser.add_option("--k5")
	parser.add_option("--k6")
	parser.add_option("--k7")
	parser.add_option("--k8")
	parser.add_option("--k9")
	parser.add_option("--k10")
	parser.add_option("--t")
	parser.add_option("--N")
	parser.add_option("--L")
	parser.add_option("--D")
	# output management
        parser.add_option("--logfile")
        parser.add_option("--id")
        parser.add_option("--workdir")
        parser.add_option("--compress_output")
        # additionnal outputs
        parser.add_option("--total_output")
	(options, args) = parser.parse_args()

        # create the working dir
        tmp_dir = tempfile.mkdtemp(dir=options.workdir)
        tmp_output_dir = tempfile.mkdtemp(dir=tmp_dir)

	print tmp_dir 
	
	# create the structure command input line
	cmd_files=" -m "+options.param+" -e "+options.extraparam+" -i "+options.input+" "
		
	# create the structure command line. The idea is to run one job by k value then take all results from tmp_out_putdir
	cmd_options=""
	cmd_options0=""
	cmd_options1=""
	cmd_options2=""
	cmd_options3=""
	cmd_options4=""
	cmd_options5=""
	cmd_options6=""
	cmd_options7=""
	cmd_options8=""
	cmd_options9=""
	cmd_options10=""
	cmd_options11=""
	cmd_options12=""
	cmd_options13=""
	cmd_options14=""
	cmd_options15=""
	cmd_options16=""
	cmd_options17=""
	cmd_options18=""
	cmd_options19=""
	cmd_options20=""
	cmd_options21=""
	cmd_options22=""
	cmd_options23=""
	cmd_options24=""
	cmd_options25=""
	cmd_options26=""
	cmd_options27=""
	cmd_options28=""
	cmd_options29=""

	if options.N:
		cmd_options0+=" -N "+options.N
	if options.L:
		cmd_options0+=" -L "+options.L
	if options.D:
		cmd_options0+=" -D "+options.D
	if options.K:
		cmd_options+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_f"
	if options.k2:
		cmd_options1+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_f"
	if options.k3:
		cmd_options2+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_f"
	if options.k4:
		cmd_options3+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_f"
	if options.k5:
		cmd_options4+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_f"
	if options.k6:
		cmd_options5+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_f"
	if options.k7:
		cmd_options6+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_f"
	if options.k8:
		cmd_options7+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_f"
	if options.k9:
		cmd_options8+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_f"
	if options.k10:
		cmd_options9+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_f"
	if options.K and options.t and options.t == 'true':
		cmd_options10+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run2_f"
	if options.k2 and options.t and options.t == 'true':
		cmd_options11+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run2_f"
	if options.k3 and options.t and options.t == 'true':
		cmd_options12+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run2_f"
	if options.k4 and options.t and options.t == 'true':
		cmd_options13+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run2_f"
	if options.k5 and options.t and options.t == 'true':
		cmd_options14+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run2_f"
	if options.k6 and options.t and options.t == 'true':
		cmd_options15+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run2_f"
	if options.k7 and options.t and options.t == 'true':
		cmd_options16+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run2_f"
	if options.k8 and options.t and options.t == 'true':
		cmd_options17+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run2_f"
	if options.k9 and options.t and options.t == 'true':
		cmd_options18+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run2_f"
	if options.k10 and options.t and options.t == 'true':
		cmd_options19+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run2_f"
	if options.K and options.t and options.t == 'true':
		cmd_options20+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run3_f"
	if options.k2 and options.t and options.t == 'true':
		cmd_options21+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run3_f"
	if options.k3 and options.t and options.t == 'true':
		cmd_options22+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run3_f"
	if options.k4 and options.t and options.t == 'true':
		cmd_options23+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run3_f"
	if options.k5 and options.t and options.t == 'true':
		cmd_options24+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run3_f"
	if options.k6 and options.t and options.t == 'true':
		cmd_options25+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run3_f"
	if options.k7 and options.t and options.t == 'true':
		cmd_options26+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run3_f"
	if options.k8 and options.t and options.t == 'true':
		cmd_options27+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run3_f"
	if options.k9 and options.t and options.t == 'true':
		cmd_options28+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run3_f"
	if options.k10 and options.t and options.t == 'true':
		cmd_options29+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run3_f"


	if options.t and options.t == 'true' and not options.K:	
		cmd='structure'+cmd_files+" "+cmd_options0+" 2>&1"

      	 # execute command line  
		proc = subprocess.Popen( args=cmd, shell=True )
		returncode = proc.wait()

		print "\n[INFO] : "+cmd

	if options.t and options.t == 'true' and options.K:	
		cmd1='structure'+cmd_files+" "+cmd_options0+" "+cmd_options+" 2>&1"

      	 # execute command line  
		proc = subprocess.Popen( args=cmd1, shell=True )
		returncode = proc.wait()

		print "\n[INFO] : "+cmd1

	#os.system("mv outfile_f outfilefirstk_f")

	if options.k2:
		cmd2='structure'+cmd_files+" "+cmd_options0+" "+cmd_options1+" 2>&1"

		proc = subprocess.Popen( args=cmd2, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfilesecondk_f")

	if options.k3:
		cmd3='structure'+cmd_files+" "+cmd_options0+" "+cmd_options2+" 2>&1"

		proc = subprocess.Popen( args=cmd3, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfilethirdk_f")

	if options.k4:
		cmd4='structure'+cmd_files+" "+cmd_options0+" "+cmd_options3+" 2>&1"

		proc = subprocess.Popen( args=cmd4, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfilefourthk_f")

	if options.k5:
		cmd5='structure'+cmd_files+" "+cmd_options0+" "+cmd_options4+" 2>&1"

		proc = subprocess.Popen( args=cmd5, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfilefifthk_f")

	if options.k6:
		cmd6='structure'+cmd_files+" "+cmd_options0+" "+cmd_options5+" 2>&1"

		proc = subprocess.Popen( args=cmd6, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfilesixthk_f")

	if options.k7:
		cmd7='structure'+cmd_files+" "+cmd_options0+" "+cmd_options6+" 2>&1"

		proc = subprocess.Popen( args=cmd7, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfileseventhk_f")

	if options.k8:
		cmd8='structure'+cmd_files+" "+cmd_options0+" "+cmd_options7+" 2>&1"

		proc = subprocess.Popen( args=cmd8, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfileeigthk_f")

	if options.k9:
		cmd9='structure'+cmd_files+" "+cmd_options0+" "+cmd_options8+" 2>&1"

		proc = subprocess.Popen( args=cmd9, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfileninthk_f")

	if options.k10:
		cmd10='structure'+cmd_files+" "+cmd_options0+" "+cmd_options9+" 2>&1"

		proc = subprocess.Popen( args=cmd10, shell=True )
		returncode = proc.wait()

	if options.K and options.t and options.t == 'true':
		cmd11='structure'+cmd_files+" "+cmd_options0+" "+cmd_options10+" 2>&1"
		cmd21='structure'+cmd_files+" "+cmd_options0+" "+cmd_options20+" 2>&1"

		proc = subprocess.Popen( args=cmd11, shell=True )
		returncode = proc.wait()
		proc = subprocess.Popen( args=cmd21, shell=True )
		returncode = proc.wait()

	if options.k2 and options.t and options.t == 'true':
		cmd12='structure'+cmd_files+" "+cmd_options0+" "+cmd_options11+" 2>&1"
		cmd22='structure'+cmd_files+" "+cmd_options0+" "+cmd_options21+" 2>&1"

		proc = subprocess.Popen( args=cmd12, shell=True )
		returncode = proc.wait()
		proc = subprocess.Popen( args=cmd22, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfilesecondk_f")

	if options.k3 and options.t and options.t == 'true':
		cmd13='structure'+cmd_files+" "+cmd_options0+" "+cmd_options12+" 2>&1"
		cmd23='structure'+cmd_files+" "+cmd_options0+" "+cmd_options22+" 2>&1"

		proc = subprocess.Popen( args=cmd13, shell=True )
		returncode = proc.wait()
		proc = subprocess.Popen( args=cmd23, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfilethirdk_f")

	if options.k4 and options.t and options.t == 'true':
		cmd14='structure'+cmd_files+" "+cmd_options0+" "+cmd_options13+" 2>&1"
		cmd24='structure'+cmd_files+" "+cmd_options0+" "+cmd_options23+" 2>&1"

		proc = subprocess.Popen( args=cmd14, shell=True )
		returncode = proc.wait()
		proc = subprocess.Popen( args=cmd24, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfilefourthk_f")

	if options.k5 and options.t and options.t == 'true':
		cmd15='structure'+cmd_files+" "+cmd_options0+" "+cmd_options14+" 2>&1"
		cmd25='structure'+cmd_files+" "+cmd_options0+" "+cmd_options24+" 2>&1"

		proc = subprocess.Popen( args=cmd15, shell=True )
		returncode = proc.wait()
		proc = subprocess.Popen( args=cmd25, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfilefifthk_f")

	if options.k6 and options.t and options.t == 'true':
		cmd16='structure'+cmd_files+" "+cmd_options0+" "+cmd_options15+" 2>&1"
		cmd26='structure'+cmd_files+" "+cmd_options0+" "+cmd_options25+" 2>&1"

		proc = subprocess.Popen( args=cmd16, shell=True )
		returncode = proc.wait()
		proc = subprocess.Popen( args=cmd26, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfilesixthk_f")

	if options.k7 and options.t and options.t == 'true':
		cmd17='structure'+cmd_files+" "+cmd_options0+" "+cmd_options16+" 2>&1"
		cmd27='structure'+cmd_files+" "+cmd_options0+" "+cmd_options26+" 2>&1"

		proc = subprocess.Popen( args=cmd17, shell=True )
		returncode = proc.wait()
		proc = subprocess.Popen( args=cmd27, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfileseventhk_f")

	if options.k8 and options.t and options.t == 'true':
		cmd18='structure'+cmd_files+" "+cmd_options0+" "+cmd_options17+" 2>&1"
		cmd28='structure'+cmd_files+" "+cmd_options0+" "+cmd_options27+" 2>&1"

		proc = subprocess.Popen( args=cmd18, shell=True )
		returncode = proc.wait()
		proc = subprocess.Popen( args=cmd28, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfileeigthk_f")

	if options.k9 and options.t and options.t == 'true':
		cmd19='structure'+cmd_files+" "+cmd_options0+" "+cmd_options18+" 2>&1"
		cmd29='structure'+cmd_files+" "+cmd_options0+" "+cmd_options28+" 2>&1"

		proc = subprocess.Popen( args=cmd19, shell=True )
		returncode = proc.wait()
		proc = subprocess.Popen( args=cmd29, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfileninthk_f")

	if options.k10 and options.t and options.t == 'true':
		cmd20='structure'+cmd_files+" "+cmd_options0+" "+cmd_options19+" 2>&1"
		cmd30='structure'+cmd_files+" "+cmd_options0+" "+cmd_options29+" 2>&1"

		proc = subprocess.Popen( args=cmd20, shell=True )
		returncode = proc.wait()
		proc = subprocess.Popen( args=cmd30, shell=True )
		returncode = proc.wait()

		#os.system("mv outfile_f  outfiletenthk_f")

	# postprocesses
	#if os.path.exists(tmp_output_dir+'/outfile_f'):
	#	os.system('mv '+tmp_output_dir+'/outfile_f '+options.logfile)
	#else:
	#	sys.stderr.write('Error in structure execution; Please read the additional output (stdout)\n')


	# copy all files inside tmp_dir into workdir
	list_files = glob.glob(tmp_output_dir+'/*')

	
	# if compress output is total
	if options.compress_output == 'total':
		mytotalzipfile=zipfile.ZipFile(tmp_output_dir+'/total.zip.temp', 'w')
		os.chdir(tmp_output_dir)

		for i in list_files:
			mytotalzipfile.write(os.path.basename(i))
			#command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt"
			#proc = subprocess.Popen( args=command, shell=True )
			#returncode = proc.wait()

		# return the unique archive
		os.system("mv "+tmp_output_dir+'/total.zip.temp'+" "+options.total_output)

	# if compress output is default
	if options.compress_output == 'default':

		for i in list_files:
			command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt"
			proc = subprocess.Popen( args=command, shell=True )
			returncode = proc.wait()	



	#clean up temp files
	shutil.rmtree( tmp_dir )	
		
if __name__=="__main__": __main__()