Mercurial > repos > cpt > cpt_find_spanins
comparison findSpanin.py @ 3:fd70980a516b draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:42:01 +0000 |
parents | |
children | 673d1776d3b9 |
comparison
equal
deleted
inserted
replaced
2:1a7fef71aee3 | 3:fd70980a516b |
---|---|
1 ##### findSpanin.pl --> findSpanin.py | |
2 ######### Much of this code is very "blocked", in the sense that one thing happens...then a function happens on the return...then another function...etc...etc... | |
3 | |
4 import argparse | |
5 import os | |
6 import re # new | |
7 import itertools # new | |
8 from collections import Counter, OrderedDict | |
9 from spaninFuncs import ( | |
10 getDescriptions, | |
11 grabLocs, | |
12 spaninProximity, | |
13 splitStrands, | |
14 tuple_fasta, | |
15 lineWrapper, | |
16 ) | |
17 | |
18 ### Requirement Inputs | |
19 #### INPUT : putative_isp.fa & putative_osp.fa (in that order) | |
20 #### PARAMETERS : | |
21 | |
22 ############################################################################### | |
23 def write_output(candidates): | |
24 """output file function...maybe not needed""" | |
25 pass | |
26 | |
27 | |
28 def reconfigure_dict(spanins): | |
29 """ | |
30 re organizes dictionary to be more friendly for checks | |
31 """ | |
32 | |
33 new_spanin_dict = {} | |
34 | |
35 for each_spanin_type, data_dict in spanins.items(): | |
36 # print(f"{each_spanin_type} == {data_dict}") | |
37 new_spanin_dict[each_spanin_type] = {} | |
38 new_spanin_dict[each_spanin_type]["positive"] = {} | |
39 new_spanin_dict[each_spanin_type]["negative"] = {} | |
40 new_spanin_dict[each_spanin_type]["positive"]["coords"] = [] | |
41 new_spanin_dict[each_spanin_type]["negative"]["coords"] = [] | |
42 for outter_orf, inner_data in data_dict.items(): | |
43 list_of_hits = [] | |
44 for data_content in inner_data: | |
45 # print(data_content) | |
46 data_content.insert(0, outter_orf) | |
47 # print(f"new data_content -> {data_content}") | |
48 # print(data_content) | |
49 # list_of_hits += [data_content] | |
50 # new_spanin_dict[each_spanin_type] += [data_content] | |
51 if data_content[6] == "+": | |
52 # print(f"{each_spanin_type} @ POSITIVE") | |
53 new_spanin_dict[each_spanin_type]["positive"]["coords"] += [ | |
54 data_content | |
55 ] | |
56 elif data_content[6] == "-": | |
57 # print(f"{each_spanin_type} @ NEGATIVE") | |
58 new_spanin_dict[each_spanin_type]["negative"]["coords"] += [ | |
59 data_content | |
60 ] | |
61 # print(new_spanin_dict[each_spanin_type]) | |
62 # print(reorganized) | |
63 # print(f"{outter_orf} => {inner_data}") | |
64 # print(new_spanin_dict) | |
65 | |
66 # print('\n') | |
67 # for k, v in new_spanin_dict.items(): | |
68 # print(k) | |
69 # print(v) | |
70 return new_spanin_dict | |
71 | |
72 | |
73 def check_for_uniques(spanins): | |
74 """ | |
75 Checks for unique spanins based on spanin_type. | |
76 If the positive strand end site is _the same_ for a i-spanin, we would group that as "1". | |
77 i.e. if ORF1, ORF2, and ORF3 all ended with location 4231, they would not be unique. | |
78 """ | |
79 pair_dict = {} | |
80 pair_dict = { | |
81 "pairs": { | |
82 "location_amount": [], | |
83 "pair_number": {}, | |
84 } | |
85 } | |
86 for each_spanin_type, spanin_data in spanins.items(): | |
87 # print(f"{each_spanin_type} ===> {spanin_data}") | |
88 # early declarations for cases of no results | |
89 pos_check = [] # end checks | |
90 pos_uniques = [] | |
91 neg_check = [] # start checks | |
92 neg_uniques = [] | |
93 unique_ends = [] | |
94 pos_amt_unique = 0 | |
95 neg_amt_unique = 0 | |
96 amt_positive = 0 | |
97 amt_negative = 0 | |
98 spanin_data["uniques"] = 0 | |
99 spanin_data["amount"] = 0 | |
100 # spanin_data['positive']['amt_positive'] = 0 | |
101 # spanin_data['positive']['pos_amt_unique'] = 0 | |
102 # spanin_data['positive']['isp_match'] = [] | |
103 # spanin_data['negative']['amt_negative'] = 0 | |
104 # spanin_data['negative']['neg_amt_unique'] = 0 | |
105 # spanin_data['negative']['isp_match'] = [] | |
106 # print(spanin_data) | |
107 if spanin_data["positive"]["coords"]: | |
108 # do something... | |
109 # print('in other function') | |
110 # print(spanin_data['positive']['coords']) | |
111 for each_hit in spanin_data["positive"]["coords"]: | |
112 pos_check.append(each_hit[2]) | |
113 pair_dict["pairs"]["location_amount"].append(each_hit[2]) | |
114 pos_uniques = list( | |
115 set( | |
116 [ | |
117 end_site | |
118 for end_site in pos_check | |
119 if pos_check.count(end_site) >= 1 | |
120 ] | |
121 ) | |
122 ) | |
123 # print(pos_check) | |
124 # print(pos_uniques) | |
125 amt_positive = len(spanin_data["positive"]["coords"]) | |
126 pos_amt_unique = len(pos_uniques) | |
127 if amt_positive: | |
128 spanin_data["positive"]["amt_positive"] = amt_positive | |
129 spanin_data["positive"]["pos_amt_unique"] = pos_amt_unique | |
130 # pair_dict['pairs']['locations'].extend(pos_uniques) | |
131 else: | |
132 spanin_data["positive"]["amt_positive"] = 0 | |
133 spanin_data["positive"]["pos_amt_unique"] = 0 | |
134 if spanin_data["negative"]["coords"]: | |
135 | |
136 # do something else... | |
137 # print('in other function') | |
138 # print(spanin_data['negative']['coords']) | |
139 for each_hit in spanin_data["negative"]["coords"]: | |
140 neg_check.append(each_hit[1]) | |
141 pair_dict["pairs"]["location_amount"].append(each_hit[1]) | |
142 neg_uniques = list( | |
143 set( | |
144 [ | |
145 start_site | |
146 for start_site in neg_check | |
147 if neg_check.count(start_site) >= 1 | |
148 ] | |
149 ) | |
150 ) | |
151 # print(neg_uniques) | |
152 amt_negative = len(spanin_data["negative"]["coords"]) | |
153 neg_amt_unique = len(neg_uniques) | |
154 if amt_negative: | |
155 spanin_data["negative"]["amt_negative"] = amt_negative | |
156 spanin_data["negative"]["neg_amt_unique"] = neg_amt_unique | |
157 # pair_dict['pairs']['locations'].extend(neg_uniques) | |
158 else: | |
159 spanin_data["negative"]["amt_negative"] = 0 | |
160 spanin_data["negative"]["neg_amt_unique"] = 0 | |
161 spanin_data["uniques"] += ( | |
162 spanin_data["positive"]["pos_amt_unique"] | |
163 + spanin_data["negative"]["neg_amt_unique"] | |
164 ) | |
165 spanin_data["amount"] += ( | |
166 spanin_data["positive"]["amt_positive"] | |
167 + spanin_data["negative"]["amt_negative"] | |
168 ) | |
169 # print(spanin_data['uniques']) | |
170 list(set(pair_dict["pairs"]["location_amount"])) | |
171 pair_dict["pairs"]["location_amount"] = dict( | |
172 Counter(pair_dict["pairs"]["location_amount"]) | |
173 ) | |
174 for data in pair_dict.values(): | |
175 # print(data['locations']) | |
176 # print(type(data['locations'])) | |
177 v = 0 | |
178 for loc, count in data["location_amount"].items(): | |
179 # data['pair_number'] = {loc | |
180 v += 1 | |
181 data["pair_number"][loc] = v | |
182 # print(dict(Counter(pair_dict['pairs']['locations']))) | |
183 # print(pair_dict) | |
184 spanins["total_amount"] = ( | |
185 spanins["EMBEDDED"]["amount"] | |
186 + spanins["SEPARATED"]["amount"] | |
187 + spanins["OVERLAPPED"]["amount"] | |
188 ) | |
189 spanins["total_unique"] = ( | |
190 spanins["EMBEDDED"]["uniques"] | |
191 + spanins["SEPARATED"]["uniques"] | |
192 + spanins["OVERLAPPED"]["uniques"] | |
193 ) | |
194 # spanins['total_unique'] = len(pair_dict['pairs']['pair_number']) | |
195 return spanins, pair_dict | |
196 | |
197 | |
198 if __name__ == "__main__": | |
199 | |
200 # Common parameters for both ISP / OSP portion of script | |
201 | |
202 parser = argparse.ArgumentParser( | |
203 description="Trim the putative protein candidates and find potential i-spanin / o-spanin pairs" | |
204 ) | |
205 | |
206 parser.add_argument( | |
207 "putative_isp_fasta_file", | |
208 type=argparse.FileType("r"), | |
209 help='Putative i-spanin FASTA file, output of "generate-putative-isp"', | |
210 ) # the "input" argument | |
211 | |
212 parser.add_argument( | |
213 "putative_osp_fasta_file", | |
214 type=argparse.FileType("r"), | |
215 help='Putative o-spanin FASTA file, output of "generate-putative-osp"', | |
216 ) | |
217 | |
218 parser.add_argument( | |
219 "--max_isp_osp_distance", | |
220 dest="max_isp_osp_distance", | |
221 default=10, | |
222 type=int, | |
223 help="max distance from end of i-spanin to start of o-spanin, measured in AAs", | |
224 ) | |
225 | |
226 parser.add_argument( | |
227 "--embedded_txt", | |
228 dest="embedded_txt", | |
229 type=argparse.FileType("w"), | |
230 default="_findSpanin_embedded_results.txt", | |
231 help="Results of potential embedded spanins", | |
232 ) | |
233 parser.add_argument( | |
234 "--overlap_txt", | |
235 dest="overlap_txt", | |
236 type=argparse.FileType("w"), | |
237 default="_findSpanin_overlap_results.txt", | |
238 help="Results of potential overlapping spanins", | |
239 ) | |
240 parser.add_argument( | |
241 "--separate_txt", | |
242 dest="separate_txt", | |
243 type=argparse.FileType("w"), | |
244 default="_findSpanin_separated_results.txt", | |
245 help="Results of potential separated spanins", | |
246 ) | |
247 | |
248 parser.add_argument( | |
249 "--summary_txt", | |
250 dest="summary_txt", | |
251 type=argparse.FileType("w"), | |
252 default="_findSpanin_summary.txt", | |
253 help="Results of potential spanin pairs", | |
254 ) | |
255 parser.add_argument( | |
256 "-v", action="version", version="0.3.0" | |
257 ) # Is this manually updated? | |
258 args = parser.parse_args() | |
259 | |
260 #### RE-WRITE | |
261 SPANIN_TYPES = {} | |
262 SPANIN_TYPES["EMBEDDED"] = {} | |
263 SPANIN_TYPES["OVERLAPPED"] = {} | |
264 SPANIN_TYPES["SEPARATED"] = {} | |
265 # SPANIN_TYPES = { | |
266 # 'EMBEDDED' : {}, | |
267 # 'OVERLAPPED' : {}, | |
268 # 'SEPARATED' : {}, | |
269 # } | |
270 | |
271 isp = getDescriptions(args.putative_isp_fasta_file) | |
272 args.putative_isp_fasta_file = open(args.putative_isp_fasta_file.name, "r") | |
273 isp_full = tuple_fasta(args.putative_isp_fasta_file) | |
274 | |
275 osp = getDescriptions(args.putative_osp_fasta_file) | |
276 args.putative_osp_fasta_file = open(args.putative_osp_fasta_file.name, "r") | |
277 osp_full = tuple_fasta(args.putative_osp_fasta_file) | |
278 | |
279 #### location data | |
280 location_data = {"isp": [], "osp": []} | |
281 spanins = [isp, osp] | |
282 for idx, each_spanin_type in enumerate(spanins): | |
283 for description in each_spanin_type: | |
284 locations = grabLocs(description) | |
285 if idx == 0: # i-spanin | |
286 location_data["isp"].append(locations) | |
287 elif idx == 1: # o-spanin | |
288 location_data["osp"].append(locations) | |
289 | |
290 #### Check for types of spanins | |
291 embedded, overlap, separate = spaninProximity( | |
292 isp=location_data["isp"], | |
293 osp=location_data["osp"], | |
294 max_dist=args.max_isp_osp_distance * 3, | |
295 ) | |
296 | |
297 SPANIN_TYPES["EMBEDDED"] = embedded | |
298 SPANIN_TYPES["OVERLAPPED"] = overlap | |
299 SPANIN_TYPES["SEPARATED"] = separate | |
300 | |
301 # for spanin_type, spanin in SPANIN_TYPES.items(): | |
302 # s = 0 | |
303 # for sequence in spanin.values(): | |
304 # s += len(sequence) | |
305 # SPANIN_TYPES[spanin_type]['amount'] = s | |
306 # SPANIN_TYPES[spanin_type]['unique'] = len(spanin.keys()) | |
307 | |
308 # check_for_unique_spanins(SPANIN_TYPES) | |
309 spanins = reconfigure_dict(SPANIN_TYPES) | |
310 spanins, pair_dict = check_for_uniques(spanins) | |
311 # print(pair_dict) | |
312 with args.summary_txt as f: | |
313 for each_spanin_type, spanin_data in spanins.items(): | |
314 try: | |
315 if each_spanin_type not in ["total_amount", "total_unique"]: | |
316 # print(each_spanin_type) | |
317 # print(each_spanin_type) | |
318 f.write( | |
319 "=~~~~~= " | |
320 + str(each_spanin_type) | |
321 + " Spanin Candidate Statistics =~~~~~=\n" | |
322 ) | |
323 f.writelines( | |
324 "Total Candidate Pairs = " + str(spanin_data["amount"]) + "\n" | |
325 ) | |
326 f.writelines( | |
327 "Total Unique Pairs = " + str(spanin_data["uniques"]) + "\n" | |
328 ) | |
329 if each_spanin_type == "EMBEDDED": | |
330 for k, v in SPANIN_TYPES["EMBEDDED"].items(): | |
331 # print(k) | |
332 f.writelines( | |
333 "" | |
334 + str(k) | |
335 + " ==> Amount of corresponding candidate o-spanins(s): " | |
336 + str(len(v)) | |
337 + "\n" | |
338 ) | |
339 if each_spanin_type == "SEPARATED": | |
340 for k, v in SPANIN_TYPES["SEPARATED"].items(): | |
341 f.writelines( | |
342 "" | |
343 + str(k) | |
344 + " ==> Amount of corresponding candidate o-spanins(s): " | |
345 + str(len(v)) | |
346 + "\n" | |
347 ) | |
348 if each_spanin_type == "OVERLAPPED": | |
349 for k, v in SPANIN_TYPES["OVERLAPPED"].items(): | |
350 f.writelines( | |
351 "" | |
352 + str(k) | |
353 + " ==> Amount of corresponding candidate o-spanins(s): " | |
354 + str(len(v)) | |
355 + "\n" | |
356 ) | |
357 except TypeError: | |
358 continue | |
359 f.write("\n=~~~~~= Tally from ALL spanin types =~~~~~=\n") | |
360 f.writelines("Total Candidates = " + str(spanins["total_amount"]) + "\n") | |
361 f.writelines( | |
362 "Total Unique Candidate Pairs = " + str(spanins["total_unique"]) + "\n" | |
363 ) | |
364 | |
365 args.putative_isp_fasta_file = open(args.putative_isp_fasta_file.name, "r") | |
366 isp_full = tuple_fasta(args.putative_isp_fasta_file) | |
367 | |
368 args.putative_osp_fasta_file = open(args.putative_osp_fasta_file.name, "r") | |
369 osp_full = tuple_fasta(args.putative_osp_fasta_file) | |
370 | |
371 # print(isp_full) | |
372 isp_seqs = [] | |
373 osp_seqs = [] | |
374 for isp_tupe in isp_full: | |
375 # print(isp_tupe) | |
376 for pisp, posp in embedded.items(): | |
377 # print(f"ISP = searching for {pisp} in {isp_tupe[0]}") | |
378 if re.search(("(" + str(pisp) + ")\D"), isp_tupe[0]): | |
379 # print(isp_tupe[0]) | |
380 # print(peri_count) | |
381 peri_count = str.split(isp_tupe[0], "~=")[1] | |
382 isp_seqs.append((pisp, isp_tupe[1], peri_count)) | |
383 # print(isp_seqs) | |
384 for osp_tupe in osp_full: | |
385 for pisp, posp in embedded.items(): | |
386 for data in posp: | |
387 # print(f"OSP = searching for {data[3]} in {osp_tupe[0]}, coming from this object: {data}") | |
388 if re.search(("(" + str(data[3]) + ")\D"), osp_tupe[0]): | |
389 peri_count = str.split(osp_tupe[0], "~=")[1] | |
390 osp_seqs.append((data[3], osp_tupe[1], peri_count)) | |
391 | |
392 with args.embedded_txt as f: | |
393 f.write("================ embedded spanin candidates =================\n") | |
394 f.write( | |
395 "isp\tisp_start\tisp_end\tosp\tosp_start\tosp_end\tstrand\tpair_number\n" | |
396 ) | |
397 if embedded != {}: | |
398 # print(embedded) | |
399 for pisp, posp in embedded.items(): | |
400 # print(f"{pisp} - {posp}") | |
401 f.write(pisp + "\n") | |
402 for each_posp in posp: | |
403 # print(posp) | |
404 f.write( | |
405 "\t{}\t{}\t{}\t{}\t{}\t{}\t".format( | |
406 each_posp[1], | |
407 each_posp[2], | |
408 each_posp[3], | |
409 each_posp[4], | |
410 each_posp[5], | |
411 each_posp[6], | |
412 ) | |
413 ) | |
414 if each_posp[6] == "+": | |
415 if each_posp[2] in pair_dict["pairs"]["pair_number"].keys(): | |
416 f.write( | |
417 "" | |
418 + str(pair_dict["pairs"]["pair_number"][each_posp[2]]) | |
419 + "\n" | |
420 ) | |
421 elif each_posp[6] == "-": | |
422 if each_posp[1] in pair_dict["pairs"]["pair_number"].keys(): | |
423 f.write( | |
424 "" | |
425 + str(pair_dict["pairs"]["pair_number"][each_posp[1]]) | |
426 + "\n" | |
427 ) | |
428 else: | |
429 f.write("nothing found") | |
430 | |
431 with open(args.embedded_txt.name, "a") as f: | |
432 f.write("\n================= embedded candidate sequences ================\n") | |
433 f.write("======================= isp ==========================\n\n") | |
434 for isp_data in isp_seqs: | |
435 # print(isp_data) | |
436 f.write( | |
437 ">isp_orf::{}-peri_count~={}\n{}\n".format( | |
438 isp_data[0], isp_data[2], lineWrapper(isp_data[1]) | |
439 ) | |
440 ) | |
441 f.write("\n======================= osp ========================\n\n") | |
442 for osp_data in osp_seqs: | |
443 f.write( | |
444 ">osp_orf::{}-peri_count~={}\n{}\n".format( | |
445 osp_data[0], osp_data[2], lineWrapper(osp_data[1]) | |
446 ) | |
447 ) | |
448 | |
449 args.putative_isp_fasta_file = open(args.putative_isp_fasta_file.name, "r") | |
450 isp_full = tuple_fasta(args.putative_isp_fasta_file) | |
451 | |
452 args.putative_osp_fasta_file = open(args.putative_osp_fasta_file.name, "r") | |
453 osp_full = tuple_fasta(args.putative_osp_fasta_file) | |
454 | |
455 isp_seqs = [] | |
456 osp_seqs = [] | |
457 for isp_tupe in isp_full: | |
458 peri_count = str.split(isp_tupe[0], "~=")[1] | |
459 for pisp, posp in overlap.items(): | |
460 if re.search(("(" + str(pisp) + ")\D"), isp_tupe[0]): | |
461 peri_count = str.split(isp_tupe[0], "~=")[1] | |
462 isp_seqs.append((pisp, isp_tupe[1], peri_count)) | |
463 | |
464 for osp_tupe in osp_full: | |
465 for pisp, posp in overlap.items(): | |
466 for data in posp: | |
467 if re.search(("(" + str(data[3]) + ")\D"), osp_tupe[0]): | |
468 peri_count = str.split(osp_tupe[0], "~=")[1] | |
469 osp_seqs.append((data[3], osp_tupe[1], peri_count)) | |
470 | |
471 with args.overlap_txt as f: | |
472 f.write("================ overlap spanin candidates =================\n") | |
473 f.write( | |
474 "isp\tisp_start\tisp_end\tosp\tosp_start\tosp_end\tstrand\tpair_number\n" | |
475 ) | |
476 if overlap != {}: | |
477 for pisp, posp in overlap.items(): | |
478 f.write(pisp + "\n") | |
479 for each_posp in posp: | |
480 f.write( | |
481 "\t{}\t{}\t{}\t{}\t{}\t{}\t".format( | |
482 each_posp[1], | |
483 each_posp[2], | |
484 each_posp[3], | |
485 each_posp[4], | |
486 each_posp[5], | |
487 each_posp[6], | |
488 ) | |
489 ) | |
490 if each_posp[6] == "+": | |
491 if each_posp[2] in pair_dict["pairs"]["pair_number"].keys(): | |
492 # print('ovl ; +') | |
493 f.write( | |
494 "" | |
495 + str(pair_dict["pairs"]["pair_number"][each_posp[2]]) | |
496 + "\n" | |
497 ) | |
498 elif each_posp[6] == "-": | |
499 if each_posp[1] in pair_dict["pairs"]["pair_number"].keys(): | |
500 f.write( | |
501 "" | |
502 + str(pair_dict["pairs"]["pair_number"][each_posp[1]]) | |
503 + "\n" | |
504 ) | |
505 else: | |
506 f.write("nothing found") | |
507 | |
508 with open(args.overlap_txt.name, "a") as f: | |
509 # print(isp_seqs) | |
510 f.write("\n================= overlap candidate sequences ================\n") | |
511 f.write("======================= isp ==========================\n\n") | |
512 for isp_data in isp_seqs: | |
513 f.write( | |
514 ">isp_orf::{}-pericount~={}\n{}\n".format( | |
515 isp_data[0], isp_data[2], lineWrapper(isp_data[1]) | |
516 ) | |
517 ) | |
518 f.write("\n======================= osp ========================\n\n") | |
519 for osp_data in osp_seqs: | |
520 f.write( | |
521 ">osp_orf::{}-pericount~={}\n{}\n".format( | |
522 osp_data[0], osp_data[2], lineWrapper(osp_data[1]) | |
523 ) | |
524 ) | |
525 | |
526 args.putative_isp_fasta_file = open(args.putative_isp_fasta_file.name, "r") | |
527 isp_full = tuple_fasta(args.putative_isp_fasta_file) | |
528 args.putative_osp_fasta_file = open(args.putative_osp_fasta_file.name, "r") | |
529 osp_full = tuple_fasta(args.putative_osp_fasta_file) | |
530 | |
531 isp_seqs = [] | |
532 osp_seqs = [] | |
533 for isp_tupe in isp_full: | |
534 for pisp, posp in separate.items(): | |
535 if re.search(("(" + str(pisp) + ")\D"), isp_tupe[0]): | |
536 peri_count = str.split(isp_tupe[0], "~=")[1] | |
537 isp_seqs.append((pisp, isp_tupe[1], peri_count)) | |
538 # print(isp_seqs) | |
539 for osp_tupe in osp_full: | |
540 for pisp, posp in separate.items(): | |
541 for data in posp: | |
542 if re.search(("(" + str(data[3]) + ")\D"), osp_tupe[0]): | |
543 peri_count = str.split(osp_tupe[0], "~=")[1] | |
544 osp_seqs.append((data[3], osp_tupe[1], peri_count)) | |
545 | |
546 with args.separate_txt as f: | |
547 f.write("================ separated spanin candidates =================\n") | |
548 f.write( | |
549 "isp\tisp_start\tisp_end\tosp\tosp_start\tosp_end\tstrand\tpair_number\n" | |
550 ) | |
551 if separate != {}: | |
552 for pisp, posp in separate.items(): | |
553 f.write(pisp + "\n") | |
554 for each_posp in posp: | |
555 f.write( | |
556 "\t{}\t{}\t{}\t{}\t{}\t{}\t".format( | |
557 each_posp[1], | |
558 each_posp[2], | |
559 each_posp[3], | |
560 each_posp[4], | |
561 each_posp[5], | |
562 each_posp[6], | |
563 ) | |
564 ) | |
565 if each_posp[6] == "+": | |
566 if each_posp[2] in pair_dict["pairs"]["pair_number"].keys(): | |
567 f.write( | |
568 "" | |
569 + str(pair_dict["pairs"]["pair_number"][each_posp[2]]) | |
570 + "\n" | |
571 ) | |
572 elif each_posp[6] == "-": | |
573 if each_posp[1] in pair_dict["pairs"]["pair_number"].keys(): | |
574 f.write( | |
575 "" | |
576 + str(pair_dict["pairs"]["pair_number"][each_posp[1]]) | |
577 + "\n" | |
578 ) | |
579 else: | |
580 f.write("nothing found") | |
581 | |
582 with open(args.separate_txt.name, "a") as f: | |
583 f.write("\n================= separated candidate sequences ================\n") | |
584 f.write("======================= isp ==========================\n\n") | |
585 for isp_data in isp_seqs: | |
586 f.write( | |
587 ">isp_orf::{}-pericount~={}\n{}\n".format( | |
588 isp_data[0], isp_data[2], lineWrapper(isp_data[1]) | |
589 ) | |
590 ) | |
591 f.write("\n======================= osp ========================\n\n") | |
592 for osp_data in osp_seqs: | |
593 f.write( | |
594 ">osp_orf::{}-pericount~={}\n{}\n".format( | |
595 osp_data[0], osp_data[2], lineWrapper(osp_data[1]) | |
596 ) | |
597 ) |