Mercurial > repos > estrain > seqsero_v1
comparison SeqSero/libs/BWA_analysis_O_new_dependent.py @ 0:c577b57b7c74 draft
Uploaded
| author | estrain |
|---|---|
| date | Wed, 06 Dec 2017 15:59:29 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c577b57b7c74 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 #tyr_of_O2_O9.fasta should be in the same directory, in it, O9 should be first then O2 | |
| 3 | |
| 4 import os | |
| 5 from Bio import SeqIO | |
| 6 import sys | |
| 7 from Initial_functions import Uniq | |
| 8 from Bio.Blast import NCBIXML | |
| 9 | |
| 10 def BWA_O_analysis(sra_name,additional_file,database,mapping_mode,file_mode): | |
| 11 if file_mode=="1":#interleaved | |
| 12 if sra_name[-3:]=="sra": | |
| 13 os.system("fastq-dump --split-files "+sra_name) | |
| 14 del_fastq=1 | |
| 15 for_fq=sra_name.replace(".sra","_1.fastq") | |
| 16 rev_fq=sra_name.replace(".sra","_2.fastq") | |
| 17 for_sai=sra_name.replace(".sra","_1.sai") | |
| 18 rev_sai=sra_name.replace(".sra","_2.sai") | |
| 19 sam=sra_name.replace(".sra",".sam") | |
| 20 bam=sra_name.replace(".sra",".bam") | |
| 21 else: | |
| 22 del_fastq=0 | |
| 23 core_id=sra_name.split(".fastq")[0] | |
| 24 try: | |
| 25 os.system("gunzip "+sra_name) | |
| 26 except: | |
| 27 pass | |
| 28 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) | |
| 29 os.system("perl "+dirpath+"/split_interleaved_fastq.pl --input "+core_id+".fastq --output "+core_id.replace(".","_")+".fastq")#######03152016 | |
| 30 ori_size=os.path.getsize(core_id+".fastq")#######03152016 | |
| 31 os.system("mv "+core_id.replace(".","_")+"-read1.fastq"+" "+core_id+"-read1.fastq")#######03152016 | |
| 32 os.system("mv "+core_id.replace(".","_")+"-read2.fastq"+" "+core_id+"-read2.fastq")#######03152016 | |
| 33 for_fq=core_id+"-read1.fastq"#######03152016 | |
| 34 rev_fq=core_id+"-read2.fastq"#######03152016 | |
| 35 if float(os.path.getsize(for_fq))/ori_size<=0.1 or float(os.path.getsize(rev_fq))/ori_size<=0.1:#09092015#12292015#######03152016 | |
| 36 os.system("echo haha")#09092015 | |
| 37 os.system("perl "+dirpath+"/splitPairedEndReads.pl "+core_id+".fastq")#09092015 | |
| 38 os.system("mv "+core_id+".fastq_1 "+for_fq)##09092015 | |
| 39 os.system("mv "+core_id+".fastq_2 "+rev_fq)##09092015 | |
| 40 else:#09092015 | |
| 41 os.system("echo hehe")#09092015 | |
| 42 for_sai=core_id+"_1.sai" | |
| 43 rev_sai=core_id+"_2.sai" | |
| 44 sam=core_id+".sam" | |
| 45 bam=core_id+".bam" | |
| 46 elif file_mode=="2":#seperated | |
| 47 forword_seq=sra_name | |
| 48 reverse_seq=additional_file | |
| 49 try: | |
| 50 os.system("gunzip "+forword_seq) | |
| 51 except: | |
| 52 pass | |
| 53 try: | |
| 54 os.system("gunzip "+reverse_seq) | |
| 55 except: | |
| 56 pass | |
| 57 for_core_id=forword_seq.split(".fastq")[0] | |
| 58 re_core_id=reverse_seq.split(".fastq")[0] | |
| 59 for_fq=for_core_id+".fastq" | |
| 60 rev_fq=re_core_id+".fastq" | |
| 61 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))#######03152016 | |
| 62 print "check fastq id and make them in accordance with each other...please wait..." | |
| 63 os.system("python "+dirpath+"/compare_and_change_two_fastq_id.py "+for_fq+" "+rev_fq)#######03152016 | |
| 64 for_sai=for_core_id+".sai" | |
| 65 rev_sai=re_core_id+".sai" | |
| 66 sam=for_core_id+".sam" | |
| 67 bam=sam.replace(".sam",".bam") | |
| 68 elif file_mode=="3":#single-end | |
| 69 if sra_name[-3:]=="sra": | |
| 70 os.system("fastq-dump --split-files "+sra_name)###01/28/2015 | |
| 71 del_fastq=1 | |
| 72 for_fq=sra_name.replace(".sra","_1.fastq") | |
| 73 for_sai=sra_name.replace(".sra","_1.sai") | |
| 74 sam=sra_name.replace(".sra",".sam") | |
| 75 bam=sra_name.replace(".sra",".bam") | |
| 76 else: | |
| 77 del_fastq=0 | |
| 78 core_id=sra_name.split(".fastq")[0] | |
| 79 try: | |
| 80 os.system("gunzip "+sra_name) | |
| 81 except: | |
| 82 pass | |
| 83 for_fq=core_id+".fastq" | |
| 84 for_sai=core_id+"_1.sai" | |
| 85 sam=core_id+".sam" | |
| 86 bam=core_id+".bam" | |
| 87 | |
| 88 os.system("bwa index "+database) | |
| 89 if file_mode!="3": | |
| 90 if mapping_mode=="sam": | |
| 91 os.system("bwa aln "+database+" "+for_fq+" > "+for_sai) | |
| 92 os.system("bwa aln "+database+" "+rev_fq+" > "+rev_sai) | |
| 93 os.system("bwa sampe "+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam) | |
| 94 elif mapping_mode=="mem": | |
| 95 os.system("bwa mem "+database+" "+for_fq+" "+rev_fq+" > "+sam) #2014/12/23 | |
| 96 elif mapping_mode=="nanopore": ## | |
| 97 os.system("bwa mem -x ont2d "+database+" "+for_fq+" "+rev_fq+" > "+sam)## | |
| 98 else: | |
| 99 if mapping_mode=="mem": | |
| 100 os.system("bwa mem "+database+" "+for_fq+" > "+sam) #2014/12/23 | |
| 101 elif mapping_mode=="sam": | |
| 102 os.system("bwa aln "+database+" "+for_fq+" > "+for_sai) | |
| 103 os.system("bwa samse "+database+" "+for_sai+" "+for_fq+" > "+sam) | |
| 104 elif mapping_mode=="nanopore":## | |
| 105 os.system("bwa mem -x ont2d "+database+" "+for_fq+" > "+sam)## | |
| 106 os.system("samtools view -F 4 -Sbh "+sam+" > "+bam) | |
| 107 os.system("samtools view -h -o "+sam+" "+bam) | |
| 108 | |
| 109 file=open(sam,"r") | |
| 110 handle=file.readlines() | |
| 111 name_list=[] | |
| 112 for line in handle: | |
| 113 if len(line)>300: | |
| 114 name_list.append(line.split("\t")[2]) | |
| 115 a,b=Uniq(name_list) | |
| 116 c=dict(zip(a,b)) | |
| 117 final_O=sorted(c.iteritems(), key=lambda d:d[1], reverse = True) #order from frequency high to low, but tuple while not list | |
| 118 Sero_list_O=[] | |
| 119 print "Final_Otype_list:" | |
| 120 print final_O | |
| 121 num_1=0#new inserted | |
| 122 O9_wbav=0 | |
| 123 O310_wzx=0 | |
| 124 O946_wzy=0 | |
| 125 if len(final_O)>0: #new inserted | |
| 126 for x in final_O:#new inserted | |
| 127 num_1=num_1+x[1]#new inserted | |
| 128 if "O-9,46_wbaV" in x[0]: | |
| 129 O9_wbaV=x[1] | |
| 130 if "O-3,10_wzx" in x[0]: | |
| 131 O310_wzx=x[1] | |
| 132 if "O-9,46_wzy" in x[0]: | |
| 133 O946_wzy=x[1] | |
| 134 if "O-3,10_not_in_1,3,19" in x[0]: | |
| 135 O310_no_1319=x[1] | |
| 136 if "O-9,46,27_partial_wzy" in x[0]: | |
| 137 O94627=x[1] | |
| 138 O_list=[] | |
| 139 O_choice="" | |
| 140 | |
| 141 | |
| 142 print "$$$Genome:",sra_name | |
| 143 if len(final_O)==0: | |
| 144 print "$$$No Otype, due to no hit" | |
| 145 else: | |
| 146 if final_O[0][1]<8: | |
| 147 print "$$$No Otype, due to the hit reads number is small." | |
| 148 else: | |
| 149 for x in final_O: | |
| 150 if x[1]>5: | |
| 151 O_list.append(x[0]) | |
| 152 qq=1# | |
| 153 for x in final_O:# | |
| 154 if "sdf" in x[0] and x[1]>3:# | |
| 155 qq=0# | |
| 156 print "$$$",x[0],"got a hit, reads:",x[1]# | |
| 157 final_O.remove(x) | |
| 158 if qq!=0:# | |
| 159 print "$$$No sdf exists"# | |
| 160 | |
| 161 if "O-9,46_wbaV" in O_list and float(O9_wbaV)/float(num_1) > 0.1: | |
| 162 if "O-9,46_wzy" in O_list and float(O946_wzy)/float(num_1) > 0.1: | |
| 163 O_choice="O-9,46" | |
| 164 print "$$$Most possilble Otype: O-9,46" | |
| 165 elif "O-9,46,27_partial_wzy" in O_list and float(O94627)/float(num_1) > 0.1: | |
| 166 O_choice="O-9,46,27" | |
| 167 print "$$$Most possilble Otype: O-9,46,27" | |
| 168 else: | |
| 169 O_choice="O-9" | |
| 170 if file_mode=="3": | |
| 171 rev_fq="" | |
| 172 rev_sai="" | |
| 173 assembly(sra_name,O_choice,for_fq,rev_fq,for_sai,rev_sai,sam,bam,mapping_mode) | |
| 174 else: | |
| 175 assembly(sra_name,O_choice,for_fq,rev_fq,for_sai,rev_sai,sam,bam,mapping_mode) | |
| 176 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list) and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1: | |
| 177 if "O-3,10_not_in_1,3,19" in O_list and float(O310_no_1319)/float(num_1) > 0.1: | |
| 178 O_choice="O-3,10" | |
| 179 print "$$$Most possilble Otype: O-3,10" | |
| 180 else: | |
| 181 O_choice="O-1,3,19" | |
| 182 print "$$$Most possilble Otype: O-1,3,19" | |
| 183 else: | |
| 184 try: | |
| 185 O_choice=final_O[0][0].split("_")[0] | |
| 186 if O_choice=="O-1,3,19": | |
| 187 O_choice=final_O[1][0].split("_")[0] | |
| 188 print "$$$Most possilble Otype: ",O_choice | |
| 189 except: | |
| 190 print "$$$No suitable Otype, or failure of mapping (please check the quality of raw reads)" | |
| 191 | |
| 192 | |
| 193 def assembly(sra_name,potential_choice,for_fq,rev_fq,for_sai,rev_sai,sam,bam,mapping_mode): | |
| 194 database="ParaA_rfb.fasta" | |
| 195 os.system("bwa index database/"+database)###01/28/2015 | |
| 196 if rev_fq=="":#2015/09/09 | |
| 197 if mapping_mode=="mem": | |
| 198 os.system("bwa mem database/"+database+" "+for_fq+" > "+sam) #2014/12/23 | |
| 199 elif mapping_mode=="sam": | |
| 200 os.system("bwa aln database/"+database+" "+for_fq+" > "+for_sai) | |
| 201 os.system("bwa samse database/"+database+" "+for_sai+" "+for_fq+" > "+sam) | |
| 202 elif mapping_mode=="nanopore":## | |
| 203 os.system("bwa mem -x ont2d database/"+database+" "+for_fq+" > "+sam)## | |
| 204 else: | |
| 205 if mapping_mode=="mem": | |
| 206 os.system("bwa mem database/"+database+" "+for_fq+" "+rev_fq+" > "+sam) #2014/12/23 | |
| 207 elif mapping_mode=="sam": | |
| 208 os.system("bwa aln database/"+database+" "+for_fq+" > "+for_sai) | |
| 209 os.system("bwa aln database/"+database+" "+rev_fq+" > "+rev_sai) | |
| 210 os.system("bwa sampe database/"+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam) | |
| 211 elif mapping_mode=="nanopore": | |
| 212 os.system("bwa mem -x ont2d database/"+database+" "+for_fq+" "+rev_fq+" > "+sam) | |
| 213 os.system("samtools view -F 4 -Sbh "+sam+" > "+bam) | |
| 214 os.system("samtools view -h -o "+sam+" "+bam) | |
| 215 os.system("cat "+sam+"|awk '{if ($5>0) {print $10}}'>"+sam+"_seq.txt") | |
| 216 os.system("cat "+sam+"|awk '{if ($5>0) {print $1}}'>"+sam+"_title.txt") | |
| 217 file1=open(sam+"_title.txt","r") | |
| 218 file2=open(sam+"_seq.txt","r") | |
| 219 file1=file1.readlines() | |
| 220 file2=file2.readlines() | |
| 221 file=open(sam+".fasta","w") | |
| 222 for i in range(len(file1)): | |
| 223 title=">"+file1[i] | |
| 224 seq=file2[i] | |
| 225 if len(seq)>=50 and len(title)>6:#generally,can be adjusted | |
| 226 file.write(title) | |
| 227 file.write(seq) | |
| 228 file.close() | |
| 229 database2="tyr_of_O2_O9.fasta" | |
| 230 os.system('makeblastdb -in database/'+database2+' -out '+database2+'_db '+'-dbtype nucl') | |
| 231 os.system("blastn -query "+sam+".fasta"+" -db "+database2+"_db -out "+sam+"_vs_O29.xml -outfmt 5") | |
| 232 handle=open(sam+"_vs_O29.xml") | |
| 233 handle=NCBIXML.parse(handle) | |
| 234 handle=list(handle) | |
| 235 O9_bigger=0 | |
| 236 O2_bigger=0 | |
| 237 for x in handle: | |
| 238 O9_score=0 | |
| 239 O2_score=0 | |
| 240 try: | |
| 241 if 'O-9' in x.alignments[0].hit_def: | |
| 242 O9_score=x.alignments[0].hsps[0].bits | |
| 243 O2_score=x.alignments[1].hsps[0].bits | |
| 244 elif 'O-2' in x.alignments[0].hit_def: | |
| 245 O9_score=x.alignments[1].hsps[0].bits | |
| 246 O2_score=x.alignments[0].hsps[0].bits | |
| 247 if O9_score>O2_score: | |
| 248 O9_bigger+=1 | |
| 249 if O9_score<O2_score: | |
| 250 O2_bigger+=1 | |
| 251 except: | |
| 252 continue | |
| 253 print "$$$Genome:",sra_name | |
| 254 if O9_bigger>O2_bigger: | |
| 255 print "$$$Most possible Otype is O-9" | |
| 256 elif O9_bigger<O2_bigger: | |
| 257 print "$$$Most possible Otype is O-2" | |
| 258 else: | |
| 259 print "$$$No suitable one, because can't distinct it's O-9 or O-2, but ",potential_choice," has a more possibility." | |
| 260 print "O-9 number is:",O9_bigger | |
| 261 print "O-2 number is:",O2_bigger | |
| 262 | |
| 263 os.system("rm "+sam+"_title.txt")###01/28/2015 | |
| 264 os.system("rm "+sam+"_seq.txt")###01/28/2015 | |
| 265 os.system("rm "+sam+".fasta")###01/28/2015 | |
| 266 os.system("rm "+database2+"_db.*")###01/28/2015 | |
| 267 os.system("rm "+sam+"_vs_O29.xml")###01/28/2015 | |
| 268 | |
| 269 | |
| 270 target=sys.argv[1] #should be sra format | |
| 271 data_base=sys.argv[2] | |
| 272 mapping_mode=sys.argv[3] | |
| 273 if sys.argv[4] not in ("1","2","3"): | |
| 274 additional_file=sys.argv[4] | |
| 275 file_mode=sys.argv[5] | |
| 276 else: | |
| 277 additional_file="" | |
| 278 file_mode=sys.argv[4] | |
| 279 | |
| 280 BWA_O_analysis(target,additional_file,data_base,mapping_mode,file_mode) |
