comparison fasta_interlacer.py @ 4:d397f5a85464 draft

Uploaded
author petr-novak
date Wed, 18 Sep 2019 06:30:04 -0400
parents a4cd8608ef6b
children
comparison
equal deleted inserted replaced
3:e320ef2d105a 4:d397f5a85464
77 if charA == charB: 77 if charA == charB:
78 sys.stderr.write( 78 sys.stderr.write(
79 "last character of sequence id must be used for distinguishing pairs!") 79 "last character of sequence id must be used for distinguishing pairs!")
80 exit(1) 80 exit(1)
81 # check first thousand! 81 # check first thousand!
82 for i in range(1000): 82 for i in range(3):
83 seqA = readSingleSeq(fA) 83 seqA = readSingleSeq(fA)
84 seqB = readSingleSeq(fB) 84 seqB = readSingleSeq(fB)
85 if (not seqA) or (not seqB): 85 if (not seqA) or (not seqB):
86 # end of file: 86 # end of file:
87 if i == 0: 87 if i == 0:
103 buffB = {} 103 buffB = {}
104 buffA_names = [] 104 buffA_names = []
105 buffB_names = [] 105 buffB_names = []
106 106
107 while True: 107 while True:
108
109 seqA = readSingleSeq(fA) 108 seqA = readSingleSeq(fA)
110 seqB = readSingleSeq(fB) 109 seqB = readSingleSeq(fB)
111
112 if not seqA and not seqB: 110 if not seqA and not seqB:
113 break # end of file 111 break # end of file
114 112
115 ## validation and direct checking only if not end of files 113 ## validation and direct checking only if not end of files
116 if seqA and seqB: 114 if seqA and seqB:
136 writeSingleSeq(fPairs, seqtmp) 134 writeSingleSeq(fPairs, seqtmp)
137 # can I empty buffA ??? 135 # can I empty buffA ???
138 for i in buffA_names: 136 for i in buffA_names:
139 seqtmp = {"name": i + charA, "sequence": buffA[i]} 137 seqtmp = {"name": i + charA, "sequence": buffA[i]}
140 writeSingleSeq(single, seqtmp) 138 writeSingleSeq(single, seqtmp)
141 buffA = {} 139 buffA = {}
142 buffA_names = [] 140 buffA_names = []
143 141
144 j = 0 142 j = 0
145 for i in buffB_names: 143 for i in buffB_names:
146 seqtmp = {"name": i + charB, "sequence": buffB[i]} 144 seqtmp = {"name": i + charB, "sequence": buffB[i]}
147 del buffB[i] 145 del buffB[i]
164 writeSingleSeq(fPairs, seqB) 162 writeSingleSeq(fPairs, seqB)
165 # can I empty buffB ??? 163 # can I empty buffB ???
166 for i in buffB_names: 164 for i in buffB_names:
167 seqtmp = {"name": i + charB, "sequence": buffB[i]} 165 seqtmp = {"name": i + charB, "sequence": buffB[i]}
168 writeSingleSeq(single, seqtmp) 166 writeSingleSeq(single, seqtmp)
169 buffB = {} 167 buffB = {}
170 buffB_names = [] 168 buffB_names = []
171 169
172 j = 0 170 j = 0
173 for i in buffA_names: 171 for i in buffA_names:
174 seqtmp = {"name": i + charA, "sequence": buffA[i]} 172 seqtmp = {"name": i + charA, "sequence": buffA[i]}
175 del buffA[i] 173 del buffA[i]
181 writeSingleSeq(single, seqtmp) 179 writeSingleSeq(single, seqtmp)
182 180
183 else: 181 else:
184 buffB[seqB["name"][:-1]] = seqB['sequence'] 182 buffB[seqB["name"][:-1]] = seqB['sequence']
185 buffB_names.append(seqB["name"][:-1]) 183 buffB_names.append(seqB["name"][:-1])
186 fA.close() 184
187 fB.close() 185 fA.close()
188 fPairs.close() 186 fB.close()
189 # write rest of singles: 187 fPairs.close()
188
189 # write rest of singles:
190 for i in buffA: 190 for i in buffA:
191 seqtmp = {"name": i + charA, "sequence": buffA[i]} 191 seqtmp = {"name": i + charA, "sequence": buffA[i]}
192 writeSingleSeq(single, seqtmp) 192 writeSingleSeq(single, seqtmp)
193 for i in buffB: 193 for i in buffB:
194 seqtmp = {"name": i + charB, "sequence": buffB[i]} 194 seqtmp = {"name": i + charB, "sequence": buffB[i]}
195 writeSingleSeq(single, seqtmp) 195 writeSingleSeq(single, seqtmp)
196 single.close() 196 single.close()
197 197
198 198
199 if __name__ == "__main__": 199 if __name__ == "__main__":
200 main() 200 main()