diff seq2HLA.py @ 1:155b796033b6 draft default tip

Uploaded
author sebastian-boegel
date Fri, 21 Dec 2012 03:46:15 -0500
parents 913ea6991ee4
children
line wrap: on
line diff
--- a/seq2HLA.py	Thu Dec 20 10:37:01 2012 -0500
+++ b/seq2HLA.py	Fri Dec 21 03:46:15 2012 -0500
@@ -54,23 +54,23 @@
 readspergroup={}
 allelesPerLocus={}
 
-def main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped):
+def main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped,num_threads):
 	log = open(logfile,"w")
 #	mapopt="-a -v"+str(mismatch)
 	mapopt="-a -v"+str(mismatch)
 	#call HLA typing for Class I
-	mainClassI(runName+"-ClassI",readFile1,readFile2,bowtiebuildClassI,fastaClassI,mapopt,trim3,output1,log,gzipped)
+	mainClassI(runName+"-ClassI",readFile1,readFile2,bowtiebuildClassI,fastaClassI,mapopt,trim3,output1,log,gzipped,num_threads)
 	#call HLA typing for Class II
-	mainClassII(runName+"-ClassII",readFile1,readFile2,bowtiebuildClassII,fastaClassII,mapopt,trim3,output2,log,gzipped)
+	mainClassII(runName+"-ClassII",readFile1,readFile2,bowtiebuildClassII,fastaClassII,mapopt,trim3,output2,log,gzipped,num_threads)
 
 #---------------Class I-------------------------------
-def mainClassI(runName,readFile1,readFile2,bowtiebuild,hla1fasta,mapopt,trim3,finaloutput,log,gzipped):
+def mainClassI(runName,readFile1,readFile2,bowtiebuild,hla1fasta,mapopt,trim3,finaloutput,log,gzipped,num_threads):
 	#-------1st iteration-----------------------------------
 	log.write("----------HLA class I------------\n")
 	sam1=runName+"-iteration1.bam"
 	iteration=1
 	log.write("First iteration starts....\nMapping ......\n")
-	mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped)
+	mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped,num_threads)
 	medians=[]
 	medians.extend([0,0,0])
 	medianflag=False
@@ -90,7 +90,7 @@
 	sam2=runName+"-iteration2.bam"
 	newReadFile1=runName+"-2nditeration_1.fq"
 	newReadFile2=runName+"-2nditeration_2.fq"
-	mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped)
+	mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped,num_threads)
 	medianfile=runName+".digitalhaplotype1"
 	for i in range(2,5):
 		medians.append(linecache.getline(medianfile, i).split('\t', 3)[2])
@@ -107,13 +107,13 @@
 	expression("A","B","C",694,694,694,map,runName,readFile1,finaloutput,gzipped)
 
 #--------------Class II----------------------------------------
-def mainClassII(runName,readFile1,readFile2,bowtiebuild,hla2fasta,mapopt,trim3,finaloutput,log,gzipped):
+def mainClassII(runName,readFile1,readFile2,bowtiebuild,hla2fasta,mapopt,trim3,finaloutput,log,gzipped,num_threads):
 	#-------1st iteration----------------------------------------
 	log.write("----------HLA class II------------\n")
 	sam1=runName+"-iteration1.bam"
 	iteration=1
 	log.write("ClassII: first iteration starts....\nMapping ......\n")
-	mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped)
+	mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped,num_threads)
 	medians=[]
 	medians.extend([0,0,0])
 	medianflag=False
@@ -132,7 +132,7 @@
 	sam2=runName+"-iteration2.bam"
 	newReadFile1=runName+"-2nditeration_1.fq"
 	newReadFile2=runName+"-2nditeration_2.fq"
-	mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped)
+	mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped,num_threads)
 	medianfile=runName+".digitalhaplotype1"
 	for i in range(2,5):
 		medians.append(float(linecache.getline(medianfile, i).split('\t', 3)[2])/2.0)
@@ -149,18 +149,18 @@
 	expression("DQA1","DQB1","DRB1",400,421,421,map,runName,readFile1,finaloutput,gzipped)
 
 #performs the bowtie mapping for the 2 iterations using the given parameters
-def mapping(sam,runName,readFile1,readFile2,bowtiebuild,iteration,mapopt,trim3,log,gzipped):
+def mapping(sam,runName,readFile1,readFile2,bowtiebuild,iteration,mapopt,trim3,log,gzipped,num_threads):
 	if iteration==1:
 		if gzipped == "True":
 			sam = sam.rsplit(".",1)[0]
-			cmd1 = "bowtie --threads 4 -3 %s --quiet -S %s --al %s.aligned %s -1 <(zcat %s) -2 <(zcat %s) |  awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam)
+			cmd1 = "bowtie --threads "+num_threads+" -3 %s --quiet -S %s --al %s.aligned %s -1 <(zcat %s) -2 <(zcat %s) |  awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam)
 			mappingOutput = execBowtie(cmd1)
 			
 			cmd2 = "samtools index %s.bam" % sam
 			createBAMIndex(cmd2)
 		else:
 			sam = sam.rsplit(".",1)[0]
-			cmd1 = "bowtie --threads 4 -3 %s --quiet -S %s --al %s.aligned %s -1 %s -2 %s |  awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam)
+			cmd1 = "bowtie --threads "+num_threads+" -3 %s --quiet -S %s --al %s.aligned %s -1 %s -2 %s |  awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam)
 			mappingOutput = execBowtie(cmd1)
 
 			cmd2 = "samtools index %s.bam" % sam
@@ -169,7 +169,7 @@
 	else:
 		
 		sam = sam.rsplit(".",1)[0]
-		cmd2 = "bowtie --threads 4 -3 %s --quiet -S %s %s -1 %s -2 %s | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,bowtiebuild,readFile1,readFile2,sam)
+		cmd2 = "bowtie --threads "+num_threads+" -3 %s --quiet -S %s %s -1 %s -2 %s | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,bowtiebuild,readFile1,readFile2,sam)
 		mappingOutput = execBowtie(cmd2)
 
 		cmd3 = "samtools index %s.bam" % sam
@@ -766,6 +766,7 @@
 			dest="output2",
 			help="Output file 2")
 	parser.add_option("-g","--log",action="store",dest="logfile",help="Output Log File")
+	parser.add_option("-t","--threads",type="int",dest="num_threads",help="Define amount of threads to use for Bowtie in this script")
 
 
 	(options, args) = parser.parse_args()
@@ -796,6 +797,6 @@
 	else:
 		mismatch=3
 	trim3=str(options.trim3)
-	main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped)
+	main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped,options.num_threads)
 	sys.stderr = log_stderr
 	log_file.close()