comparison seq2HLA.py @ 1:155b796033b6 draft default tip

Uploaded
author sebastian-boegel
date Fri, 21 Dec 2012 03:46:15 -0500
parents 913ea6991ee4
children
comparison
equal deleted inserted replaced
0:913ea6991ee4 1:155b796033b6
52 #These variables need to be global, as they are filled and used by different modules 52 #These variables need to be global, as they are filled and used by different modules
53 readcount={} 53 readcount={}
54 readspergroup={} 54 readspergroup={}
55 allelesPerLocus={} 55 allelesPerLocus={}
56 56
57 def main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped): 57 def main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped,num_threads):
58 log = open(logfile,"w") 58 log = open(logfile,"w")
59 # mapopt="-a -v"+str(mismatch) 59 # mapopt="-a -v"+str(mismatch)
60 mapopt="-a -v"+str(mismatch) 60 mapopt="-a -v"+str(mismatch)
61 #call HLA typing for Class I 61 #call HLA typing for Class I
62 mainClassI(runName+"-ClassI",readFile1,readFile2,bowtiebuildClassI,fastaClassI,mapopt,trim3,output1,log,gzipped) 62 mainClassI(runName+"-ClassI",readFile1,readFile2,bowtiebuildClassI,fastaClassI,mapopt,trim3,output1,log,gzipped,num_threads)
63 #call HLA typing for Class II 63 #call HLA typing for Class II
64 mainClassII(runName+"-ClassII",readFile1,readFile2,bowtiebuildClassII,fastaClassII,mapopt,trim3,output2,log,gzipped) 64 mainClassII(runName+"-ClassII",readFile1,readFile2,bowtiebuildClassII,fastaClassII,mapopt,trim3,output2,log,gzipped,num_threads)
65 65
66 #---------------Class I------------------------------- 66 #---------------Class I-------------------------------
67 def mainClassI(runName,readFile1,readFile2,bowtiebuild,hla1fasta,mapopt,trim3,finaloutput,log,gzipped): 67 def mainClassI(runName,readFile1,readFile2,bowtiebuild,hla1fasta,mapopt,trim3,finaloutput,log,gzipped,num_threads):
68 #-------1st iteration----------------------------------- 68 #-------1st iteration-----------------------------------
69 log.write("----------HLA class I------------\n") 69 log.write("----------HLA class I------------\n")
70 sam1=runName+"-iteration1.bam" 70 sam1=runName+"-iteration1.bam"
71 iteration=1 71 iteration=1
72 log.write("First iteration starts....\nMapping ......\n") 72 log.write("First iteration starts....\nMapping ......\n")
73 mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped) 73 mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped,num_threads)
74 medians=[] 74 medians=[]
75 medians.extend([0,0,0]) 75 medians.extend([0,0,0])
76 medianflag=False 76 medianflag=False
77 #Calculation of first digital haplototype ..... 77 #Calculation of first digital haplototype .....
78 output1=runName+".digitalhaplotype1" 78 output1=runName+".digitalhaplotype1"
88 medians=[] 88 medians=[]
89 iteration=2 89 iteration=2
90 sam2=runName+"-iteration2.bam" 90 sam2=runName+"-iteration2.bam"
91 newReadFile1=runName+"-2nditeration_1.fq" 91 newReadFile1=runName+"-2nditeration_1.fq"
92 newReadFile2=runName+"-2nditeration_2.fq" 92 newReadFile2=runName+"-2nditeration_2.fq"
93 mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped) 93 mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped,num_threads)
94 medianfile=runName+".digitalhaplotype1" 94 medianfile=runName+".digitalhaplotype1"
95 for i in range(2,5): 95 for i in range(2,5):
96 medians.append(linecache.getline(medianfile, i).split('\t', 3)[2]) 96 medians.append(linecache.getline(medianfile, i).split('\t', 3)[2])
97 medianflag=True 97 medianflag=True
98 output2=runName+".digitalhaplotype2" 98 output2=runName+".digitalhaplotype2"
105 reportHLAgenotype(output1,output2,finaloutput) 105 reportHLAgenotype(output1,output2,finaloutput)
106 log.write("Calculation of locus-specific expression ...\n") 106 log.write("Calculation of locus-specific expression ...\n")
107 expression("A","B","C",694,694,694,map,runName,readFile1,finaloutput,gzipped) 107 expression("A","B","C",694,694,694,map,runName,readFile1,finaloutput,gzipped)
108 108
109 #--------------Class II---------------------------------------- 109 #--------------Class II----------------------------------------
110 def mainClassII(runName,readFile1,readFile2,bowtiebuild,hla2fasta,mapopt,trim3,finaloutput,log,gzipped): 110 def mainClassII(runName,readFile1,readFile2,bowtiebuild,hla2fasta,mapopt,trim3,finaloutput,log,gzipped,num_threads):
111 #-------1st iteration---------------------------------------- 111 #-------1st iteration----------------------------------------
112 log.write("----------HLA class II------------\n") 112 log.write("----------HLA class II------------\n")
113 sam1=runName+"-iteration1.bam" 113 sam1=runName+"-iteration1.bam"
114 iteration=1 114 iteration=1
115 log.write("ClassII: first iteration starts....\nMapping ......\n") 115 log.write("ClassII: first iteration starts....\nMapping ......\n")
116 mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped) 116 mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped,num_threads)
117 medians=[] 117 medians=[]
118 medians.extend([0,0,0]) 118 medians.extend([0,0,0])
119 medianflag=False 119 medianflag=False
120 output1=runName+".digitalhaplotype1" 120 output1=runName+".digitalhaplotype1"
121 log.write("ClassII: calculation of first digital haplototype .....\n") 121 log.write("ClassII: calculation of first digital haplototype .....\n")
130 medians=[] 130 medians=[]
131 iteration=2 131 iteration=2
132 sam2=runName+"-iteration2.bam" 132 sam2=runName+"-iteration2.bam"
133 newReadFile1=runName+"-2nditeration_1.fq" 133 newReadFile1=runName+"-2nditeration_1.fq"
134 newReadFile2=runName+"-2nditeration_2.fq" 134 newReadFile2=runName+"-2nditeration_2.fq"
135 mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped) 135 mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped,num_threads)
136 medianfile=runName+".digitalhaplotype1" 136 medianfile=runName+".digitalhaplotype1"
137 for i in range(2,5): 137 for i in range(2,5):
138 medians.append(float(linecache.getline(medianfile, i).split('\t', 3)[2])/2.0) 138 medians.append(float(linecache.getline(medianfile, i).split('\t', 3)[2])/2.0)
139 medianflag=True 139 medianflag=True
140 output2=runName+".digitalhaplotype2" 140 output2=runName+".digitalhaplotype2"
147 reportHLAgenotype(output1,output2,finaloutput) 147 reportHLAgenotype(output1,output2,finaloutput)
148 log.write("Calculation of locus-specific expression ...\n") 148 log.write("Calculation of locus-specific expression ...\n")
149 expression("DQA1","DQB1","DRB1",400,421,421,map,runName,readFile1,finaloutput,gzipped) 149 expression("DQA1","DQB1","DRB1",400,421,421,map,runName,readFile1,finaloutput,gzipped)
150 150
151 #performs the bowtie mapping for the 2 iterations using the given parameters 151 #performs the bowtie mapping for the 2 iterations using the given parameters
152 def mapping(sam,runName,readFile1,readFile2,bowtiebuild,iteration,mapopt,trim3,log,gzipped): 152 def mapping(sam,runName,readFile1,readFile2,bowtiebuild,iteration,mapopt,trim3,log,gzipped,num_threads):
153 if iteration==1: 153 if iteration==1:
154 if gzipped == "True": 154 if gzipped == "True":
155 sam = sam.rsplit(".",1)[0] 155 sam = sam.rsplit(".",1)[0]
156 cmd1 = "bowtie --threads 4 -3 %s --quiet -S %s --al %s.aligned %s -1 <(zcat %s) -2 <(zcat %s) | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam) 156 cmd1 = "bowtie --threads "+num_threads+" -3 %s --quiet -S %s --al %s.aligned %s -1 <(zcat %s) -2 <(zcat %s) | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam)
157 mappingOutput = execBowtie(cmd1) 157 mappingOutput = execBowtie(cmd1)
158 158
159 cmd2 = "samtools index %s.bam" % sam 159 cmd2 = "samtools index %s.bam" % sam
160 createBAMIndex(cmd2) 160 createBAMIndex(cmd2)
161 else: 161 else:
162 sam = sam.rsplit(".",1)[0] 162 sam = sam.rsplit(".",1)[0]
163 cmd1 = "bowtie --threads 4 -3 %s --quiet -S %s --al %s.aligned %s -1 %s -2 %s | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam) 163 cmd1 = "bowtie --threads "+num_threads+" -3 %s --quiet -S %s --al %s.aligned %s -1 %s -2 %s | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam)
164 mappingOutput = execBowtie(cmd1) 164 mappingOutput = execBowtie(cmd1)
165 165
166 cmd2 = "samtools index %s.bam" % sam 166 cmd2 = "samtools index %s.bam" % sam
167 createBAMIndex(cmd2) 167 createBAMIndex(cmd2)
168 # if iteration==2: 168 # if iteration==2:
169 else: 169 else:
170 170
171 sam = sam.rsplit(".",1)[0] 171 sam = sam.rsplit(".",1)[0]
172 cmd2 = "bowtie --threads 4 -3 %s --quiet -S %s %s -1 %s -2 %s | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,bowtiebuild,readFile1,readFile2,sam) 172 cmd2 = "bowtie --threads "+num_threads+" -3 %s --quiet -S %s %s -1 %s -2 %s | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,bowtiebuild,readFile1,readFile2,sam)
173 mappingOutput = execBowtie(cmd2) 173 mappingOutput = execBowtie(cmd2)
174 174
175 cmd3 = "samtools index %s.bam" % sam 175 cmd3 = "samtools index %s.bam" % sam
176 createBAMIndex(cmd3) 176 createBAMIndex(cmd3)
177 177
764 parser.add_option("-p", "--output2", 764 parser.add_option("-p", "--output2",
765 action="store", 765 action="store",
766 dest="output2", 766 dest="output2",
767 help="Output file 2") 767 help="Output file 2")
768 parser.add_option("-g","--log",action="store",dest="logfile",help="Output Log File") 768 parser.add_option("-g","--log",action="store",dest="logfile",help="Output Log File")
769 parser.add_option("-t","--threads",type="int",dest="num_threads",help="Define amount of threads to use for Bowtie in this script")
769 770
770 771
771 (options, args) = parser.parse_args() 772 (options, args) = parser.parse_args()
772 if not options.readFile1: 773 if not options.readFile1:
773 parser.error('File name #1 pair not given.') 774 parser.error('File name #1 pair not given.')
794 elif int(options.length)>50 and int(options.length)<=100: 795 elif int(options.length)>50 and int(options.length)<=100:
795 mismatch=2 796 mismatch=2
796 else: 797 else:
797 mismatch=3 798 mismatch=3
798 trim3=str(options.trim3) 799 trim3=str(options.trim3)
799 main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped) 800 main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped,options.num_threads)
800 sys.stderr = log_stderr 801 sys.stderr = log_stderr
801 log_file.close() 802 log_file.close()