comparison intersect_and_adjacent.py @ 5:00aab5199327 draft default tip

planemo upload commit 6b68423e1d9bbced95ecfb92e26329a7e8d7df47
author cpt
date Mon, 12 Aug 2024 04:40:20 +0000
parents 3e3b5ba626b9
children
comparison
equal deleted inserted replaced
4:3e3b5ba626b9 5:00aab5199327
128 128
129 # Used to map ids back to features later 129 # Used to map ids back to features later
130 rec_a_map = {f.id: f for f in rec_a_i.features} 130 rec_a_map = {f.id: f for f in rec_a_i.features}
131 rec_b_map = {f.id: f for f in rec_b_i.features} 131 rec_b_map = {f.id: f for f in rec_b_i.features}
132 132
133 rec_a_hits_in_b = [] 133 rec_a_hits_in_b = {}
134 rec_b_hits_in_a = [] 134 rec_b_hits_in_a = {}
135 135
136 for feature in rec_a_i.features: 136 for feature in rec_a_i.features:
137 if feature.type == "remark" or feature.type == "annotation": 137 if feature.type == "remark" or feature.type == "annotation":
138 continue 138 continue
139 139
140 if not stranding: 140 if not stranding:
141 hits = tree_b[ 141 hits = tree_b[
142 int(feature.location.start) : int(feature.location.end) 142 int(feature.location.start) : int(feature.location.end)
143 ] 143 ]
144 for hit in hits: 144 for hit in hits:
145 rec_a_hits_in_b.append(rec_b_map[hit.data]) 145 rec_a_hits_in_b[hit.data] = rec_b_map[hit.data]
146 else: 146 else:
147 if feature.strand > 0: 147 if feature.strand > 0:
148 hits = tree_b_pos[ 148 hits = tree_b_pos[
149 int(feature.location.start) : int(feature.location.end) 149 int(feature.location.start) : int(feature.location.end)
150 ] 150 ]
151 else: 151 else:
152 hits = tree_b_neg[ 152 hits = tree_b_neg[
153 int(feature.location.start) : int(feature.location.end) 153 int(feature.location.start) : int(feature.location.end)
154 ] 154 ]
155 for hit in hits: 155 for hit in hits:
156 rec_a_hits_in_b.append(rec_b_map[hit.data]) 156 rec_a_hits_in_b[hit.data] = rec_b_map[hit.data]
157 157
158 for feature in rec_b_i.features: 158 for feature in rec_b_i.features:
159 if feature.type == "remark" or feature.type == "annotation": 159 if feature.type == "remark" or feature.type == "annotation":
160 continue 160 continue
161 161
162 if not stranding: 162 if not stranding:
163 hits = tree_a[ 163 hits = tree_a[
164 int(feature.location.start) : int(feature.location.end) 164 int(feature.location.start) : int(feature.location.end)
165 ] 165 ]
166 for hit in hits: 166 for hit in hits:
167 rec_b_hits_in_a.append(rec_a_map[hit.data]) 167 rec_b_hits_in_a[hit.data] = rec_a_map[hit.data]
168 else: 168 else:
169 if feature.strand > 0: 169 if feature.strand > 0:
170 hits = tree_a_pos[ 170 hits = tree_a_pos[
171 int(feature.location.start) : int(feature.location.end) 171 int(feature.location.start) : int(feature.location.end)
172 ] 172 ]
173 else: 173 else:
174 hits = tree_a_neg[ 174 hits = tree_a_neg[
175 int(feature.location.start) : int(feature.location.end) 175 int(feature.location.start) : int(feature.location.end)
176 ] 176 ]
177 for hit in hits: 177 for hit in hits:
178 rec_b_hits_in_a.append(rec_a_map[hit.data]) 178 rec_b_hits_in_a[hit.data] = rec_a_map[hit.data]
179 179
180 # Remove duplicate features using sets 180 # Sort features by start position
181 rec_a_out.append( 181 rec_a_out.append(
182 SeqRecord( 182 SeqRecord(
183 rec_a[iterate].seq, 183 rec_a[iterate].seq,
184 rec_a[iterate].id, 184 rec_a[iterate].id,
185 rec_a[iterate].name, 185 rec_a[iterate].name,
186 rec_a[iterate].description, 186 rec_a[iterate].description,
187 rec_a[iterate].dbxrefs, 187 rec_a[iterate].dbxrefs,
188 sorted(set(rec_a_hits_in_b), key=lambda feat: feat.location.start), 188 sorted(
189 rec_a_hits_in_b.values(), key=lambda feat: feat.location.start
190 ),
189 rec_a[iterate].annotations, 191 rec_a[iterate].annotations,
190 ) 192 )
191 ) 193 )
192 rec_b_out.append( 194 rec_b_out.append(
193 SeqRecord( 195 SeqRecord(
194 rec_b[iterate].seq, 196 rec_b[iterate].seq,
195 rec_b[iterate].id, 197 rec_b[iterate].id,
196 rec_b[iterate].name, 198 rec_b[iterate].name,
197 rec_b[iterate].description, 199 rec_b[iterate].description,
198 rec_b[iterate].dbxrefs, 200 rec_b[iterate].dbxrefs,
199 sorted(set(rec_b_hits_in_a), key=lambda feat: feat.location.start), 201 sorted(
202 rec_b_hits_in_a.values(), key=lambda feat: feat.location.start
203 ),
200 rec_b[iterate].annotations, 204 rec_b[iterate].annotations,
201 ) 205 )
202 ) 206 )
203 iterate += 1 207 iterate += 1
204 208