Mercurial > repos > cpt > cpt_intersect_adj
annotate intersect_and_adjacent.py @ 5:00aab5199327 draft default tip
planemo upload commit 6b68423e1d9bbced95ecfb92e26329a7e8d7df47
author | cpt |
---|---|
date | Mon, 12 Aug 2024 04:40:20 +0000 |
parents | 3e3b5ba626b9 |
children |
rev | line source |
---|---|
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
1 #!/usr/bin/env python |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
2 import logging |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
3 import argparse |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
4 from intervaltree import IntervalTree, Interval |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
5 from CPT_GFFParser import gffParse, gffWrite |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
6 from Bio.SeqRecord import SeqRecord |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
7 from Bio.Seq import Seq |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
8 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
9 logging.basicConfig(level=logging.INFO) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
10 log = logging.getLogger(__name__) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
11 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
12 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
13 def validFeat(rec): |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
14 for feat in rec.features: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
15 if feat.type != "remark" and feat.type != "annotation": |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
16 return True |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
17 return False |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
18 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
19 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
20 def treeFeatures(features, window): |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
21 for feat in features: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
22 # Interval(begin, end, data) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
23 yield Interval( |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
24 int(feat.location.start) - int(window), |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
25 int(feat.location.end) + int(window), |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
26 feat.id, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
27 ) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
28 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
29 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
30 def treeFeatures_noRem(features, window): |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
31 for feat in features: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
32 if feat.type == "remark" or feat.type == "annotation": |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
33 continue |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
34 # Interval(begin, end, data) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
35 yield Interval( |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
36 int(feat.location.start) - int(window), |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
37 int(feat.location.end) + int(window), |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
38 feat.id, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
39 ) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
40 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
41 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
42 def intersect(a, b, window, stranding): |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
43 rec_a = list(gffParse(a)) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
44 rec_b = list(gffParse(b)) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
45 rec_a_out = [] |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
46 rec_b_out = [] |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
47 maxLen = min(len(rec_a), len(rec_b)) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
48 iterate = 0 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
49 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
50 if maxLen > 0: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
51 while iterate < maxLen: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
52 rec_a_i = rec_a[iterate] |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
53 rec_b_i = rec_b[iterate] |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
54 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
55 if (not validFeat(rec_a_i)) or (not validFeat(rec_b_i)): |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
56 rec_a_out.append( |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
57 SeqRecord( |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
58 rec_a[iterate].seq, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
59 rec_a[iterate].id, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
60 rec_a[iterate].name, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
61 rec_a[iterate].description, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
62 rec_a[iterate].dbxrefs, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
63 [], |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
64 rec_a[iterate].annotations, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
65 ) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
66 ) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
67 rec_b_out.append( |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
68 SeqRecord( |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
69 rec_b[iterate].seq, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
70 rec_b[iterate].id, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
71 rec_b[iterate].name, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
72 rec_b[iterate].description, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
73 rec_b[iterate].dbxrefs, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
74 [], |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
75 rec_b[iterate].annotations, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
76 ) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
77 ) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
78 iterate += 1 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
79 continue |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
80 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
81 a_neg = [] |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
82 a_pos = [] |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
83 b_neg = [] |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
84 b_pos = [] |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
85 tree_a = [] |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
86 tree_b = [] |
4
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
87 |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
88 for feat in rec_a_i.features: |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
89 if feat.type == "remark" or feat.type == "annotation": |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
90 continue |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
91 interval = Interval( |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
92 int(feat.location.start) - int(window), |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
93 int(feat.location.end) + int(window), |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
94 feat.id, |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
95 ) |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
96 if stranding: |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
97 if feat.strand > 0: |
4
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
98 a_pos.append(interval) |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
99 else: |
4
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
100 a_neg.append(interval) |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
101 else: |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
102 tree_a.append(interval) |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
103 |
4
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
104 for feat in rec_b_i.features: |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
105 if feat.type == "remark" or feat.type == "annotation": |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
106 continue |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
107 interval = Interval( |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
108 int(feat.location.start) - int(window), |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
109 int(feat.location.end) + int(window), |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
110 feat.id, |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
111 ) |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
112 if stranding: |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
113 if feat.strand > 0: |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
114 b_pos.append(interval) |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
115 else: |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
116 b_neg.append(interval) |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
117 else: |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
118 tree_b.append(interval) |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
119 |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
120 if stranding: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
121 tree_a_pos = IntervalTree(a_pos) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
122 tree_a_neg = IntervalTree(a_neg) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
123 tree_b_pos = IntervalTree(b_pos) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
124 tree_b_neg = IntervalTree(b_neg) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
125 else: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
126 tree_a = IntervalTree(tree_a) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
127 tree_b = IntervalTree(tree_b) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
128 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
129 # Used to map ids back to features later |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
130 rec_a_map = {f.id: f for f in rec_a_i.features} |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
131 rec_b_map = {f.id: f for f in rec_b_i.features} |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
132 |
5
00aab5199327
planemo upload commit 6b68423e1d9bbced95ecfb92e26329a7e8d7df47
cpt
parents:
4
diff
changeset
|
133 rec_a_hits_in_b = {} |
00aab5199327
planemo upload commit 6b68423e1d9bbced95ecfb92e26329a7e8d7df47
cpt
parents:
4
diff
changeset
|
134 rec_b_hits_in_a = {} |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
135 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
136 for feature in rec_a_i.features: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
137 if feature.type == "remark" or feature.type == "annotation": |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
138 continue |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
139 |
4
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
140 if not stranding: |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
141 hits = tree_b[ |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
142 int(feature.location.start) : int(feature.location.end) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
143 ] |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
144 for hit in hits: |
5
00aab5199327
planemo upload commit 6b68423e1d9bbced95ecfb92e26329a7e8d7df47
cpt
parents:
4
diff
changeset
|
145 rec_a_hits_in_b[hit.data] = rec_b_map[hit.data] |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
146 else: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
147 if feature.strand > 0: |
4
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
148 hits = tree_b_pos[ |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
149 int(feature.location.start) : int(feature.location.end) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
150 ] |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
151 else: |
4
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
152 hits = tree_b_neg[ |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
153 int(feature.location.start) : int(feature.location.end) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
154 ] |
4
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
155 for hit in hits: |
5
00aab5199327
planemo upload commit 6b68423e1d9bbced95ecfb92e26329a7e8d7df47
cpt
parents:
4
diff
changeset
|
156 rec_a_hits_in_b[hit.data] = rec_b_map[hit.data] |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
157 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
158 for feature in rec_b_i.features: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
159 if feature.type == "remark" or feature.type == "annotation": |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
160 continue |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
161 |
4
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
162 if not stranding: |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
163 hits = tree_a[ |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
164 int(feature.location.start) : int(feature.location.end) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
165 ] |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
166 for hit in hits: |
5
00aab5199327
planemo upload commit 6b68423e1d9bbced95ecfb92e26329a7e8d7df47
cpt
parents:
4
diff
changeset
|
167 rec_b_hits_in_a[hit.data] = rec_a_map[hit.data] |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
168 else: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
169 if feature.strand > 0: |
4
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
170 hits = tree_a_pos[ |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
171 int(feature.location.start) : int(feature.location.end) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
172 ] |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
173 else: |
4
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
174 hits = tree_a_neg[ |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
175 int(feature.location.start) : int(feature.location.end) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
176 ] |
4
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
177 for hit in hits: |
5
00aab5199327
planemo upload commit 6b68423e1d9bbced95ecfb92e26329a7e8d7df47
cpt
parents:
4
diff
changeset
|
178 rec_b_hits_in_a[hit.data] = rec_a_map[hit.data] |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
179 |
5
00aab5199327
planemo upload commit 6b68423e1d9bbced95ecfb92e26329a7e8d7df47
cpt
parents:
4
diff
changeset
|
180 # Sort features by start position |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
181 rec_a_out.append( |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
182 SeqRecord( |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
183 rec_a[iterate].seq, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
184 rec_a[iterate].id, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
185 rec_a[iterate].name, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
186 rec_a[iterate].description, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
187 rec_a[iterate].dbxrefs, |
5
00aab5199327
planemo upload commit 6b68423e1d9bbced95ecfb92e26329a7e8d7df47
cpt
parents:
4
diff
changeset
|
188 sorted( |
00aab5199327
planemo upload commit 6b68423e1d9bbced95ecfb92e26329a7e8d7df47
cpt
parents:
4
diff
changeset
|
189 rec_a_hits_in_b.values(), key=lambda feat: feat.location.start |
00aab5199327
planemo upload commit 6b68423e1d9bbced95ecfb92e26329a7e8d7df47
cpt
parents:
4
diff
changeset
|
190 ), |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
191 rec_a[iterate].annotations, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
192 ) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
193 ) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
194 rec_b_out.append( |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
195 SeqRecord( |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
196 rec_b[iterate].seq, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
197 rec_b[iterate].id, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
198 rec_b[iterate].name, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
199 rec_b[iterate].description, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
200 rec_b[iterate].dbxrefs, |
5
00aab5199327
planemo upload commit 6b68423e1d9bbced95ecfb92e26329a7e8d7df47
cpt
parents:
4
diff
changeset
|
201 sorted( |
00aab5199327
planemo upload commit 6b68423e1d9bbced95ecfb92e26329a7e8d7df47
cpt
parents:
4
diff
changeset
|
202 rec_b_hits_in_a.values(), key=lambda feat: feat.location.start |
00aab5199327
planemo upload commit 6b68423e1d9bbced95ecfb92e26329a7e8d7df47
cpt
parents:
4
diff
changeset
|
203 ), |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
204 rec_b[iterate].annotations, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
205 ) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
206 ) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
207 iterate += 1 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
208 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
209 else: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
210 # If one input is empty, output two empty result files. |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
211 rec_a_out = [SeqRecord(Seq(""), "none")] |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
212 rec_b_out = [SeqRecord(Seq(""), "none")] |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
213 return rec_a_out, rec_b_out |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
214 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
215 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
216 if __name__ == "__main__": |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
217 parser = argparse.ArgumentParser( |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
218 description="rebase gff3 features against parent locations", epilog="" |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
219 ) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
220 parser.add_argument("a", type=argparse.FileType("r")) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
221 parser.add_argument("b", type=argparse.FileType("r")) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
222 parser.add_argument( |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
223 "window", |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
224 type=int, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
225 default=50, |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
226 help="Allows features this far away to still be considered 'adjacent'", |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
227 ) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
228 parser.add_argument( |
4
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
229 "-stranding", |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
230 action="store_true", |
3e3b5ba626b9
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
cpt
parents:
3
diff
changeset
|
231 help="Only allow adjacency for same-strand features", |
0
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
232 ) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
233 parser.add_argument("--oa", type=str, default="a_hits_near_b.gff") |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
234 parser.add_argument("--ob", type=str, default="b_hits_near_a.gff") |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
235 args = parser.parse_args() |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
236 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
237 b, a = intersect(args.a, args.b, args.window, args.stranding) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
238 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
239 with open(args.oa, "w") as handle: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
240 for rec in a: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
241 gffWrite([rec], handle) |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
242 |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
243 with open(args.ob, "w") as handle: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
244 for rec in b: |
923d2528480d
planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff
changeset
|
245 gffWrite([rec], handle) |