comparison intersect_and_adjacent.py @ 4:3e3b5ba626b9 draft

planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
author cpt
date Mon, 12 Aug 2024 04:33:20 +0000
parents 10000414e916
children 00aab5199327
comparison
equal deleted inserted replaced
3:10000414e916 4:3e3b5ba626b9
82 a_pos = [] 82 a_pos = []
83 b_neg = [] 83 b_neg = []
84 b_pos = [] 84 b_pos = []
85 tree_a = [] 85 tree_a = []
86 tree_b = [] 86 tree_b = []
87 if stranding == True: 87
88 for feat in rec_a_i.features: 88 for feat in rec_a_i.features:
89 if feat.type == "remark" or feat.type == "annotation": 89 if feat.type == "remark" or feat.type == "annotation":
90 continue 90 continue
91 interval = Interval(
92 int(feat.location.start) - int(window),
93 int(feat.location.end) + int(window),
94 feat.id,
95 )
96 if stranding:
91 if feat.strand > 0: 97 if feat.strand > 0:
92 a_pos.append( 98 a_pos.append(interval)
93 Interval( 99 else:
94 int(feat.location.start) - int(window), 100 a_neg.append(interval)
95 int(feat.location.end) + int(window), 101 else:
96 feat.id, 102 tree_a.append(interval)
97 ) 103
98 ) 104 for feat in rec_b_i.features:
99 else: 105 if feat.type == "remark" or feat.type == "annotation":
100 a_neg.append( 106 continue
101 Interval( 107 interval = Interval(
102 int(feat.location.start) - int(window), 108 int(feat.location.start) - int(window),
103 int(feat.location.end) + int(window), 109 int(feat.location.end) + int(window),
104 feat.id, 110 feat.id,
105 ) 111 )
106 ) 112 if stranding:
107
108 for feat in rec_b_i.features:
109 if feat.type == "remark" or feat.type == "annotation":
110 continue
111 if feat.strand > 0: 113 if feat.strand > 0:
112 b_pos.append( 114 b_pos.append(interval)
113 Interval( 115 else:
114 int(feat.location.start) - int(window), 116 b_neg.append(interval)
115 int(feat.location.end) + int(window), 117 else:
116 feat.id, 118 tree_b.append(interval)
117 ) 119
118 )
119 else:
120 b_neg.append(
121 Interval(
122 int(feat.location.start) - int(window),
123 int(feat.location.end) + int(window),
124 feat.id,
125 )
126 )
127
128 else:
129 for feat in rec_a_i.features:
130 if feat.type == "remark" or feat.type == "annotation":
131 continue
132 tree_a.append(
133 Interval(
134 int(feat.location.start) - int(window),
135 int(feat.location.end) + int(window),
136 feat.id,
137 )
138 )
139 for feat in rec_b_i.features:
140 if feat.type == "remark" or feat.type == "annotation":
141 continue
142 tree_b.append(
143 Interval(
144 int(feat.location.start) - int(window),
145 int(feat.location.end) + int(window),
146 feat.id,
147 )
148 )
149 if stranding: 120 if stranding:
150 # builds interval tree from Interval objects of form (start, end, id) for each feature
151 # tree_a = IntervalTree(list(treeFeatures_noRem(rec_a_i.features, window)))
152 # tree_b = IntervalTree(list(treeFeatures_noRem(rec_b_i.features, window)))
153 # else:
154 tree_a_pos = IntervalTree(a_pos) 121 tree_a_pos = IntervalTree(a_pos)
155 tree_a_neg = IntervalTree(a_neg) 122 tree_a_neg = IntervalTree(a_neg)
156 tree_b_pos = IntervalTree(b_pos) 123 tree_b_pos = IntervalTree(b_pos)
157 tree_b_neg = IntervalTree(b_neg) 124 tree_b_neg = IntervalTree(b_neg)
158 else: 125 else:
165 132
166 rec_a_hits_in_b = [] 133 rec_a_hits_in_b = []
167 rec_b_hits_in_a = [] 134 rec_b_hits_in_a = []
168 135
169 for feature in rec_a_i.features: 136 for feature in rec_a_i.features:
170 # Save each feature in rec_a that overlaps a feature in rec_b
171 # hits = tree_b.find_range((int(feature.location.start), int(feature.location.end)))
172
173 if feature.type == "remark" or feature.type == "annotation": 137 if feature.type == "remark" or feature.type == "annotation":
174 continue 138 continue
175 139
176 if stranding == False: 140 if not stranding:
177 hits = tree_b[ 141 hits = tree_b[
178 int(feature.location.start) : int(feature.location.end) 142 int(feature.location.start) : int(feature.location.end)
179 ] 143 ]
180
181 # feature id is saved in interval result.data, use map to get full feature
182 for hit in hits: 144 for hit in hits:
183 rec_a_hits_in_b.append(rec_b_map[hit.data]) 145 rec_a_hits_in_b.append(rec_b_map[hit.data])
184
185 else: 146 else:
186 if feature.strand > 0: 147 if feature.strand > 0:
187 hits_pos = tree_b_pos[ 148 hits = tree_b_pos[
188 int(feature.location.start) : int(feature.location.end) 149 int(feature.location.start) : int(feature.location.end)
189 ] 150 ]
190 for hit in hits_pos: 151 else:
191 rec_a_hits_in_b.append(rec_b_map[hit.data]) 152 hits = tree_b_neg[
192 else: 153 int(feature.location.start) : int(feature.location.end)
193 hits_neg = tree_b_neg[ 154 ]
194 int(feature.location.start) : int(feature.location.end) 155 for hit in hits:
195 ] 156 rec_a_hits_in_b.append(rec_b_map[hit.data])
196 for hit in hits_neg:
197 rec_a_hits_in_b.append(rec_b_map[hit.data])
198 157
199 for feature in rec_b_i.features: 158 for feature in rec_b_i.features:
200 if feature.type == "remark" or feature.type == "annotation": 159 if feature.type == "remark" or feature.type == "annotation":
201 continue 160 continue
202 161
203 if stranding == False: 162 if not stranding:
204 hits = tree_a[ 163 hits = tree_a[
205 int(feature.location.start) : int(feature.location.end) 164 int(feature.location.start) : int(feature.location.end)
206 ] 165 ]
207
208 # feature id is saved in interval result.data, use map to get full feature
209 for hit in hits: 166 for hit in hits:
210 rec_b_hits_in_a.append(rec_a_map[hit.data]) 167 rec_b_hits_in_a.append(rec_a_map[hit.data])
211
212 else: 168 else:
213 if feature.strand > 0: 169 if feature.strand > 0:
214 hits_pos = tree_a_pos[ 170 hits = tree_a_pos[
215 int(feature.location.start) : int(feature.location.end) 171 int(feature.location.start) : int(feature.location.end)
216 ] 172 ]
217 for hit in hits_pos: 173 else:
218 rec_b_hits_in_a.append(rec_a_map[hit.data]) 174 hits = tree_a_neg[
219 else: 175 int(feature.location.start) : int(feature.location.end)
220 hits_neg = tree_a_neg[ 176 ]
221 int(feature.location.start) : int(feature.location.end) 177 for hit in hits:
222 ] 178 rec_b_hits_in_a.append(rec_a_map[hit.data])
223 for hit in hits_neg:
224 rec_b_hits_in_a.append(rec_a_map[hit.data])
225 179
226 # Remove duplicate features using sets 180 # Remove duplicate features using sets
227 rec_a_out.append( 181 rec_a_out.append(
228 SeqRecord( 182 SeqRecord(
229 rec_a[iterate].seq, 183 rec_a[iterate].seq,
266 type=int, 220 type=int,
267 default=50, 221 default=50,
268 help="Allows features this far away to still be considered 'adjacent'", 222 help="Allows features this far away to still be considered 'adjacent'",
269 ) 223 )
270 parser.add_argument( 224 parser.add_argument(
271 "stranding", 225 "-stranding",
272 nargs="?", 226 action="store_true",
273 default="", 227 help="Only allow adjacency for same-strand features",
274 help="Only allow adjacency for same-strand features if set to '-stranding'",
275 ) 228 )
276 parser.add_argument("--oa", type=str, default="a_hits_near_b.gff") 229 parser.add_argument("--oa", type=str, default="a_hits_near_b.gff")
277 parser.add_argument("--ob", type=str, default="b_hits_near_a.gff") 230 parser.add_argument("--ob", type=str, default="b_hits_near_a.gff")
278 args = parser.parse_args() 231 args = parser.parse_args()
279 232
280 stranding = args.stranding == "-stranding"
281 b, a = intersect(args.a, args.b, args.window, args.stranding) 233 b, a = intersect(args.a, args.b, args.window, args.stranding)
282 234
283 with open(args.oa, "w") as handle: 235 with open(args.oa, "w") as handle:
284 for rec in a: 236 for rec in a:
285 gffWrite([rec], handle) 237 gffWrite([rec], handle)