Mercurial > repos > cpt > cpt_intersect_adj
comparison intersect_and_adjacent.py @ 4:3e3b5ba626b9 draft
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
author | cpt |
---|---|
date | Mon, 12 Aug 2024 04:33:20 +0000 |
parents | 10000414e916 |
children | 00aab5199327 |
comparison
equal
deleted
inserted
replaced
3:10000414e916 | 4:3e3b5ba626b9 |
---|---|
82 a_pos = [] | 82 a_pos = [] |
83 b_neg = [] | 83 b_neg = [] |
84 b_pos = [] | 84 b_pos = [] |
85 tree_a = [] | 85 tree_a = [] |
86 tree_b = [] | 86 tree_b = [] |
87 if stranding == True: | 87 |
88 for feat in rec_a_i.features: | 88 for feat in rec_a_i.features: |
89 if feat.type == "remark" or feat.type == "annotation": | 89 if feat.type == "remark" or feat.type == "annotation": |
90 continue | 90 continue |
91 interval = Interval( | |
92 int(feat.location.start) - int(window), | |
93 int(feat.location.end) + int(window), | |
94 feat.id, | |
95 ) | |
96 if stranding: | |
91 if feat.strand > 0: | 97 if feat.strand > 0: |
92 a_pos.append( | 98 a_pos.append(interval) |
93 Interval( | 99 else: |
94 int(feat.location.start) - int(window), | 100 a_neg.append(interval) |
95 int(feat.location.end) + int(window), | 101 else: |
96 feat.id, | 102 tree_a.append(interval) |
97 ) | 103 |
98 ) | 104 for feat in rec_b_i.features: |
99 else: | 105 if feat.type == "remark" or feat.type == "annotation": |
100 a_neg.append( | 106 continue |
101 Interval( | 107 interval = Interval( |
102 int(feat.location.start) - int(window), | 108 int(feat.location.start) - int(window), |
103 int(feat.location.end) + int(window), | 109 int(feat.location.end) + int(window), |
104 feat.id, | 110 feat.id, |
105 ) | 111 ) |
106 ) | 112 if stranding: |
107 | |
108 for feat in rec_b_i.features: | |
109 if feat.type == "remark" or feat.type == "annotation": | |
110 continue | |
111 if feat.strand > 0: | 113 if feat.strand > 0: |
112 b_pos.append( | 114 b_pos.append(interval) |
113 Interval( | 115 else: |
114 int(feat.location.start) - int(window), | 116 b_neg.append(interval) |
115 int(feat.location.end) + int(window), | 117 else: |
116 feat.id, | 118 tree_b.append(interval) |
117 ) | 119 |
118 ) | |
119 else: | |
120 b_neg.append( | |
121 Interval( | |
122 int(feat.location.start) - int(window), | |
123 int(feat.location.end) + int(window), | |
124 feat.id, | |
125 ) | |
126 ) | |
127 | |
128 else: | |
129 for feat in rec_a_i.features: | |
130 if feat.type == "remark" or feat.type == "annotation": | |
131 continue | |
132 tree_a.append( | |
133 Interval( | |
134 int(feat.location.start) - int(window), | |
135 int(feat.location.end) + int(window), | |
136 feat.id, | |
137 ) | |
138 ) | |
139 for feat in rec_b_i.features: | |
140 if feat.type == "remark" or feat.type == "annotation": | |
141 continue | |
142 tree_b.append( | |
143 Interval( | |
144 int(feat.location.start) - int(window), | |
145 int(feat.location.end) + int(window), | |
146 feat.id, | |
147 ) | |
148 ) | |
149 if stranding: | 120 if stranding: |
150 # builds interval tree from Interval objects of form (start, end, id) for each feature | |
151 # tree_a = IntervalTree(list(treeFeatures_noRem(rec_a_i.features, window))) | |
152 # tree_b = IntervalTree(list(treeFeatures_noRem(rec_b_i.features, window))) | |
153 # else: | |
154 tree_a_pos = IntervalTree(a_pos) | 121 tree_a_pos = IntervalTree(a_pos) |
155 tree_a_neg = IntervalTree(a_neg) | 122 tree_a_neg = IntervalTree(a_neg) |
156 tree_b_pos = IntervalTree(b_pos) | 123 tree_b_pos = IntervalTree(b_pos) |
157 tree_b_neg = IntervalTree(b_neg) | 124 tree_b_neg = IntervalTree(b_neg) |
158 else: | 125 else: |
165 | 132 |
166 rec_a_hits_in_b = [] | 133 rec_a_hits_in_b = [] |
167 rec_b_hits_in_a = [] | 134 rec_b_hits_in_a = [] |
168 | 135 |
169 for feature in rec_a_i.features: | 136 for feature in rec_a_i.features: |
170 # Save each feature in rec_a that overlaps a feature in rec_b | |
171 # hits = tree_b.find_range((int(feature.location.start), int(feature.location.end))) | |
172 | |
173 if feature.type == "remark" or feature.type == "annotation": | 137 if feature.type == "remark" or feature.type == "annotation": |
174 continue | 138 continue |
175 | 139 |
176 if stranding == False: | 140 if not stranding: |
177 hits = tree_b[ | 141 hits = tree_b[ |
178 int(feature.location.start) : int(feature.location.end) | 142 int(feature.location.start) : int(feature.location.end) |
179 ] | 143 ] |
180 | |
181 # feature id is saved in interval result.data, use map to get full feature | |
182 for hit in hits: | 144 for hit in hits: |
183 rec_a_hits_in_b.append(rec_b_map[hit.data]) | 145 rec_a_hits_in_b.append(rec_b_map[hit.data]) |
184 | |
185 else: | 146 else: |
186 if feature.strand > 0: | 147 if feature.strand > 0: |
187 hits_pos = tree_b_pos[ | 148 hits = tree_b_pos[ |
188 int(feature.location.start) : int(feature.location.end) | 149 int(feature.location.start) : int(feature.location.end) |
189 ] | 150 ] |
190 for hit in hits_pos: | 151 else: |
191 rec_a_hits_in_b.append(rec_b_map[hit.data]) | 152 hits = tree_b_neg[ |
192 else: | 153 int(feature.location.start) : int(feature.location.end) |
193 hits_neg = tree_b_neg[ | 154 ] |
194 int(feature.location.start) : int(feature.location.end) | 155 for hit in hits: |
195 ] | 156 rec_a_hits_in_b.append(rec_b_map[hit.data]) |
196 for hit in hits_neg: | |
197 rec_a_hits_in_b.append(rec_b_map[hit.data]) | |
198 | 157 |
199 for feature in rec_b_i.features: | 158 for feature in rec_b_i.features: |
200 if feature.type == "remark" or feature.type == "annotation": | 159 if feature.type == "remark" or feature.type == "annotation": |
201 continue | 160 continue |
202 | 161 |
203 if stranding == False: | 162 if not stranding: |
204 hits = tree_a[ | 163 hits = tree_a[ |
205 int(feature.location.start) : int(feature.location.end) | 164 int(feature.location.start) : int(feature.location.end) |
206 ] | 165 ] |
207 | |
208 # feature id is saved in interval result.data, use map to get full feature | |
209 for hit in hits: | 166 for hit in hits: |
210 rec_b_hits_in_a.append(rec_a_map[hit.data]) | 167 rec_b_hits_in_a.append(rec_a_map[hit.data]) |
211 | |
212 else: | 168 else: |
213 if feature.strand > 0: | 169 if feature.strand > 0: |
214 hits_pos = tree_a_pos[ | 170 hits = tree_a_pos[ |
215 int(feature.location.start) : int(feature.location.end) | 171 int(feature.location.start) : int(feature.location.end) |
216 ] | 172 ] |
217 for hit in hits_pos: | 173 else: |
218 rec_b_hits_in_a.append(rec_a_map[hit.data]) | 174 hits = tree_a_neg[ |
219 else: | 175 int(feature.location.start) : int(feature.location.end) |
220 hits_neg = tree_a_neg[ | 176 ] |
221 int(feature.location.start) : int(feature.location.end) | 177 for hit in hits: |
222 ] | 178 rec_b_hits_in_a.append(rec_a_map[hit.data]) |
223 for hit in hits_neg: | |
224 rec_b_hits_in_a.append(rec_a_map[hit.data]) | |
225 | 179 |
226 # Remove duplicate features using sets | 180 # Remove duplicate features using sets |
227 rec_a_out.append( | 181 rec_a_out.append( |
228 SeqRecord( | 182 SeqRecord( |
229 rec_a[iterate].seq, | 183 rec_a[iterate].seq, |
266 type=int, | 220 type=int, |
267 default=50, | 221 default=50, |
268 help="Allows features this far away to still be considered 'adjacent'", | 222 help="Allows features this far away to still be considered 'adjacent'", |
269 ) | 223 ) |
270 parser.add_argument( | 224 parser.add_argument( |
271 "stranding", | 225 "-stranding", |
272 nargs="?", | 226 action="store_true", |
273 default="", | 227 help="Only allow adjacency for same-strand features", |
274 help="Only allow adjacency for same-strand features if set to '-stranding'", | |
275 ) | 228 ) |
276 parser.add_argument("--oa", type=str, default="a_hits_near_b.gff") | 229 parser.add_argument("--oa", type=str, default="a_hits_near_b.gff") |
277 parser.add_argument("--ob", type=str, default="b_hits_near_a.gff") | 230 parser.add_argument("--ob", type=str, default="b_hits_near_a.gff") |
278 args = parser.parse_args() | 231 args = parser.parse_args() |
279 | 232 |
280 stranding = args.stranding == "-stranding" | |
281 b, a = intersect(args.a, args.b, args.window, args.stranding) | 233 b, a = intersect(args.a, args.b, args.window, args.stranding) |
282 | 234 |
283 with open(args.oa, "w") as handle: | 235 with open(args.oa, "w") as handle: |
284 for rec in a: | 236 for rec in a: |
285 gffWrite([rec], handle) | 237 gffWrite([rec], handle) |