Mercurial > repos > sebastian-boegel > seq2hla
comparison seq2HLA.py @ 1:155b796033b6 draft default tip
Uploaded
author | sebastian-boegel |
---|---|
date | Fri, 21 Dec 2012 03:46:15 -0500 |
parents | 913ea6991ee4 |
children |
comparison
equal
deleted
inserted
replaced
0:913ea6991ee4 | 1:155b796033b6 |
---|---|
52 #These variables need to be global, as they are filled and used by different modules | 52 #These variables need to be global, as they are filled and used by different modules |
53 readcount={} | 53 readcount={} |
54 readspergroup={} | 54 readspergroup={} |
55 allelesPerLocus={} | 55 allelesPerLocus={} |
56 | 56 |
57 def main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped): | 57 def main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped,num_threads): |
58 log = open(logfile,"w") | 58 log = open(logfile,"w") |
59 # mapopt="-a -v"+str(mismatch) | 59 # mapopt="-a -v"+str(mismatch) |
60 mapopt="-a -v"+str(mismatch) | 60 mapopt="-a -v"+str(mismatch) |
61 #call HLA typing for Class I | 61 #call HLA typing for Class I |
62 mainClassI(runName+"-ClassI",readFile1,readFile2,bowtiebuildClassI,fastaClassI,mapopt,trim3,output1,log,gzipped) | 62 mainClassI(runName+"-ClassI",readFile1,readFile2,bowtiebuildClassI,fastaClassI,mapopt,trim3,output1,log,gzipped,num_threads) |
63 #call HLA typing for Class II | 63 #call HLA typing for Class II |
64 mainClassII(runName+"-ClassII",readFile1,readFile2,bowtiebuildClassII,fastaClassII,mapopt,trim3,output2,log,gzipped) | 64 mainClassII(runName+"-ClassII",readFile1,readFile2,bowtiebuildClassII,fastaClassII,mapopt,trim3,output2,log,gzipped,num_threads) |
65 | 65 |
66 #---------------Class I------------------------------- | 66 #---------------Class I------------------------------- |
67 def mainClassI(runName,readFile1,readFile2,bowtiebuild,hla1fasta,mapopt,trim3,finaloutput,log,gzipped): | 67 def mainClassI(runName,readFile1,readFile2,bowtiebuild,hla1fasta,mapopt,trim3,finaloutput,log,gzipped,num_threads): |
68 #-------1st iteration----------------------------------- | 68 #-------1st iteration----------------------------------- |
69 log.write("----------HLA class I------------\n") | 69 log.write("----------HLA class I------------\n") |
70 sam1=runName+"-iteration1.bam" | 70 sam1=runName+"-iteration1.bam" |
71 iteration=1 | 71 iteration=1 |
72 log.write("First iteration starts....\nMapping ......\n") | 72 log.write("First iteration starts....\nMapping ......\n") |
73 mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped) | 73 mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped,num_threads) |
74 medians=[] | 74 medians=[] |
75 medians.extend([0,0,0]) | 75 medians.extend([0,0,0]) |
76 medianflag=False | 76 medianflag=False |
77 #Calculation of first digital haplototype ..... | 77 #Calculation of first digital haplototype ..... |
78 output1=runName+".digitalhaplotype1" | 78 output1=runName+".digitalhaplotype1" |
88 medians=[] | 88 medians=[] |
89 iteration=2 | 89 iteration=2 |
90 sam2=runName+"-iteration2.bam" | 90 sam2=runName+"-iteration2.bam" |
91 newReadFile1=runName+"-2nditeration_1.fq" | 91 newReadFile1=runName+"-2nditeration_1.fq" |
92 newReadFile2=runName+"-2nditeration_2.fq" | 92 newReadFile2=runName+"-2nditeration_2.fq" |
93 mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped) | 93 mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped,num_threads) |
94 medianfile=runName+".digitalhaplotype1" | 94 medianfile=runName+".digitalhaplotype1" |
95 for i in range(2,5): | 95 for i in range(2,5): |
96 medians.append(linecache.getline(medianfile, i).split('\t', 3)[2]) | 96 medians.append(linecache.getline(medianfile, i).split('\t', 3)[2]) |
97 medianflag=True | 97 medianflag=True |
98 output2=runName+".digitalhaplotype2" | 98 output2=runName+".digitalhaplotype2" |
105 reportHLAgenotype(output1,output2,finaloutput) | 105 reportHLAgenotype(output1,output2,finaloutput) |
106 log.write("Calculation of locus-specific expression ...\n") | 106 log.write("Calculation of locus-specific expression ...\n") |
107 expression("A","B","C",694,694,694,map,runName,readFile1,finaloutput,gzipped) | 107 expression("A","B","C",694,694,694,map,runName,readFile1,finaloutput,gzipped) |
108 | 108 |
109 #--------------Class II---------------------------------------- | 109 #--------------Class II---------------------------------------- |
110 def mainClassII(runName,readFile1,readFile2,bowtiebuild,hla2fasta,mapopt,trim3,finaloutput,log,gzipped): | 110 def mainClassII(runName,readFile1,readFile2,bowtiebuild,hla2fasta,mapopt,trim3,finaloutput,log,gzipped,num_threads): |
111 #-------1st iteration---------------------------------------- | 111 #-------1st iteration---------------------------------------- |
112 log.write("----------HLA class II------------\n") | 112 log.write("----------HLA class II------------\n") |
113 sam1=runName+"-iteration1.bam" | 113 sam1=runName+"-iteration1.bam" |
114 iteration=1 | 114 iteration=1 |
115 log.write("ClassII: first iteration starts....\nMapping ......\n") | 115 log.write("ClassII: first iteration starts....\nMapping ......\n") |
116 mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped) | 116 mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped,num_threads) |
117 medians=[] | 117 medians=[] |
118 medians.extend([0,0,0]) | 118 medians.extend([0,0,0]) |
119 medianflag=False | 119 medianflag=False |
120 output1=runName+".digitalhaplotype1" | 120 output1=runName+".digitalhaplotype1" |
121 log.write("ClassII: calculation of first digital haplototype .....\n") | 121 log.write("ClassII: calculation of first digital haplototype .....\n") |
130 medians=[] | 130 medians=[] |
131 iteration=2 | 131 iteration=2 |
132 sam2=runName+"-iteration2.bam" | 132 sam2=runName+"-iteration2.bam" |
133 newReadFile1=runName+"-2nditeration_1.fq" | 133 newReadFile1=runName+"-2nditeration_1.fq" |
134 newReadFile2=runName+"-2nditeration_2.fq" | 134 newReadFile2=runName+"-2nditeration_2.fq" |
135 mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped) | 135 mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped,num_threads) |
136 medianfile=runName+".digitalhaplotype1" | 136 medianfile=runName+".digitalhaplotype1" |
137 for i in range(2,5): | 137 for i in range(2,5): |
138 medians.append(float(linecache.getline(medianfile, i).split('\t', 3)[2])/2.0) | 138 medians.append(float(linecache.getline(medianfile, i).split('\t', 3)[2])/2.0) |
139 medianflag=True | 139 medianflag=True |
140 output2=runName+".digitalhaplotype2" | 140 output2=runName+".digitalhaplotype2" |
147 reportHLAgenotype(output1,output2,finaloutput) | 147 reportHLAgenotype(output1,output2,finaloutput) |
148 log.write("Calculation of locus-specific expression ...\n") | 148 log.write("Calculation of locus-specific expression ...\n") |
149 expression("DQA1","DQB1","DRB1",400,421,421,map,runName,readFile1,finaloutput,gzipped) | 149 expression("DQA1","DQB1","DRB1",400,421,421,map,runName,readFile1,finaloutput,gzipped) |
150 | 150 |
151 #performs the bowtie mapping for the 2 iterations using the given parameters | 151 #performs the bowtie mapping for the 2 iterations using the given parameters |
152 def mapping(sam,runName,readFile1,readFile2,bowtiebuild,iteration,mapopt,trim3,log,gzipped): | 152 def mapping(sam,runName,readFile1,readFile2,bowtiebuild,iteration,mapopt,trim3,log,gzipped,num_threads): |
153 if iteration==1: | 153 if iteration==1: |
154 if gzipped == "True": | 154 if gzipped == "True": |
155 sam = sam.rsplit(".",1)[0] | 155 sam = sam.rsplit(".",1)[0] |
156 cmd1 = "bowtie --threads 4 -3 %s --quiet -S %s --al %s.aligned %s -1 <(zcat %s) -2 <(zcat %s) | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam) | 156 cmd1 = "bowtie --threads "+num_threads+" -3 %s --quiet -S %s --al %s.aligned %s -1 <(zcat %s) -2 <(zcat %s) | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam) |
157 mappingOutput = execBowtie(cmd1) | 157 mappingOutput = execBowtie(cmd1) |
158 | 158 |
159 cmd2 = "samtools index %s.bam" % sam | 159 cmd2 = "samtools index %s.bam" % sam |
160 createBAMIndex(cmd2) | 160 createBAMIndex(cmd2) |
161 else: | 161 else: |
162 sam = sam.rsplit(".",1)[0] | 162 sam = sam.rsplit(".",1)[0] |
163 cmd1 = "bowtie --threads 4 -3 %s --quiet -S %s --al %s.aligned %s -1 %s -2 %s | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam) | 163 cmd1 = "bowtie --threads "+num_threads+" -3 %s --quiet -S %s --al %s.aligned %s -1 %s -2 %s | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam) |
164 mappingOutput = execBowtie(cmd1) | 164 mappingOutput = execBowtie(cmd1) |
165 | 165 |
166 cmd2 = "samtools index %s.bam" % sam | 166 cmd2 = "samtools index %s.bam" % sam |
167 createBAMIndex(cmd2) | 167 createBAMIndex(cmd2) |
168 # if iteration==2: | 168 # if iteration==2: |
169 else: | 169 else: |
170 | 170 |
171 sam = sam.rsplit(".",1)[0] | 171 sam = sam.rsplit(".",1)[0] |
172 cmd2 = "bowtie --threads 4 -3 %s --quiet -S %s %s -1 %s -2 %s | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,bowtiebuild,readFile1,readFile2,sam) | 172 cmd2 = "bowtie --threads "+num_threads+" -3 %s --quiet -S %s %s -1 %s -2 %s | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,bowtiebuild,readFile1,readFile2,sam) |
173 mappingOutput = execBowtie(cmd2) | 173 mappingOutput = execBowtie(cmd2) |
174 | 174 |
175 cmd3 = "samtools index %s.bam" % sam | 175 cmd3 = "samtools index %s.bam" % sam |
176 createBAMIndex(cmd3) | 176 createBAMIndex(cmd3) |
177 | 177 |
764 parser.add_option("-p", "--output2", | 764 parser.add_option("-p", "--output2", |
765 action="store", | 765 action="store", |
766 dest="output2", | 766 dest="output2", |
767 help="Output file 2") | 767 help="Output file 2") |
768 parser.add_option("-g","--log",action="store",dest="logfile",help="Output Log File") | 768 parser.add_option("-g","--log",action="store",dest="logfile",help="Output Log File") |
769 parser.add_option("-t","--threads",type="int",dest="num_threads",help="Define amount of threads to use for Bowtie in this script") | |
769 | 770 |
770 | 771 |
771 (options, args) = parser.parse_args() | 772 (options, args) = parser.parse_args() |
772 if not options.readFile1: | 773 if not options.readFile1: |
773 parser.error('File name #1 pair not given.') | 774 parser.error('File name #1 pair not given.') |
794 elif int(options.length)>50 and int(options.length)<=100: | 795 elif int(options.length)>50 and int(options.length)<=100: |
795 mismatch=2 | 796 mismatch=2 |
796 else: | 797 else: |
797 mismatch=3 | 798 mismatch=3 |
798 trim3=str(options.trim3) | 799 trim3=str(options.trim3) |
799 main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped) | 800 main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped,options.num_threads) |
800 sys.stderr = log_stderr | 801 sys.stderr = log_stderr |
801 log_file.close() | 802 log_file.close() |