Mercurial > repos > cpt > cpt_intersect_adj
comparison intersect_and_adjacent.py @ 4:3e3b5ba626b9 draft
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
| author | cpt |
|---|---|
| date | Mon, 12 Aug 2024 04:33:20 +0000 |
| parents | 10000414e916 |
| children | 00aab5199327 |
comparison
equal
deleted
inserted
replaced
| 3:10000414e916 | 4:3e3b5ba626b9 |
|---|---|
| 82 a_pos = [] | 82 a_pos = [] |
| 83 b_neg = [] | 83 b_neg = [] |
| 84 b_pos = [] | 84 b_pos = [] |
| 85 tree_a = [] | 85 tree_a = [] |
| 86 tree_b = [] | 86 tree_b = [] |
| 87 if stranding == True: | 87 |
| 88 for feat in rec_a_i.features: | 88 for feat in rec_a_i.features: |
| 89 if feat.type == "remark" or feat.type == "annotation": | 89 if feat.type == "remark" or feat.type == "annotation": |
| 90 continue | 90 continue |
| 91 interval = Interval( | |
| 92 int(feat.location.start) - int(window), | |
| 93 int(feat.location.end) + int(window), | |
| 94 feat.id, | |
| 95 ) | |
| 96 if stranding: | |
| 91 if feat.strand > 0: | 97 if feat.strand > 0: |
| 92 a_pos.append( | 98 a_pos.append(interval) |
| 93 Interval( | 99 else: |
| 94 int(feat.location.start) - int(window), | 100 a_neg.append(interval) |
| 95 int(feat.location.end) + int(window), | 101 else: |
| 96 feat.id, | 102 tree_a.append(interval) |
| 97 ) | 103 |
| 98 ) | 104 for feat in rec_b_i.features: |
| 99 else: | 105 if feat.type == "remark" or feat.type == "annotation": |
| 100 a_neg.append( | 106 continue |
| 101 Interval( | 107 interval = Interval( |
| 102 int(feat.location.start) - int(window), | 108 int(feat.location.start) - int(window), |
| 103 int(feat.location.end) + int(window), | 109 int(feat.location.end) + int(window), |
| 104 feat.id, | 110 feat.id, |
| 105 ) | 111 ) |
| 106 ) | 112 if stranding: |
| 107 | |
| 108 for feat in rec_b_i.features: | |
| 109 if feat.type == "remark" or feat.type == "annotation": | |
| 110 continue | |
| 111 if feat.strand > 0: | 113 if feat.strand > 0: |
| 112 b_pos.append( | 114 b_pos.append(interval) |
| 113 Interval( | 115 else: |
| 114 int(feat.location.start) - int(window), | 116 b_neg.append(interval) |
| 115 int(feat.location.end) + int(window), | 117 else: |
| 116 feat.id, | 118 tree_b.append(interval) |
| 117 ) | 119 |
| 118 ) | |
| 119 else: | |
| 120 b_neg.append( | |
| 121 Interval( | |
| 122 int(feat.location.start) - int(window), | |
| 123 int(feat.location.end) + int(window), | |
| 124 feat.id, | |
| 125 ) | |
| 126 ) | |
| 127 | |
| 128 else: | |
| 129 for feat in rec_a_i.features: | |
| 130 if feat.type == "remark" or feat.type == "annotation": | |
| 131 continue | |
| 132 tree_a.append( | |
| 133 Interval( | |
| 134 int(feat.location.start) - int(window), | |
| 135 int(feat.location.end) + int(window), | |
| 136 feat.id, | |
| 137 ) | |
| 138 ) | |
| 139 for feat in rec_b_i.features: | |
| 140 if feat.type == "remark" or feat.type == "annotation": | |
| 141 continue | |
| 142 tree_b.append( | |
| 143 Interval( | |
| 144 int(feat.location.start) - int(window), | |
| 145 int(feat.location.end) + int(window), | |
| 146 feat.id, | |
| 147 ) | |
| 148 ) | |
| 149 if stranding: | 120 if stranding: |
| 150 # builds interval tree from Interval objects of form (start, end, id) for each feature | |
| 151 # tree_a = IntervalTree(list(treeFeatures_noRem(rec_a_i.features, window))) | |
| 152 # tree_b = IntervalTree(list(treeFeatures_noRem(rec_b_i.features, window))) | |
| 153 # else: | |
| 154 tree_a_pos = IntervalTree(a_pos) | 121 tree_a_pos = IntervalTree(a_pos) |
| 155 tree_a_neg = IntervalTree(a_neg) | 122 tree_a_neg = IntervalTree(a_neg) |
| 156 tree_b_pos = IntervalTree(b_pos) | 123 tree_b_pos = IntervalTree(b_pos) |
| 157 tree_b_neg = IntervalTree(b_neg) | 124 tree_b_neg = IntervalTree(b_neg) |
| 158 else: | 125 else: |
| 165 | 132 |
| 166 rec_a_hits_in_b = [] | 133 rec_a_hits_in_b = [] |
| 167 rec_b_hits_in_a = [] | 134 rec_b_hits_in_a = [] |
| 168 | 135 |
| 169 for feature in rec_a_i.features: | 136 for feature in rec_a_i.features: |
| 170 # Save each feature in rec_a that overlaps a feature in rec_b | |
| 171 # hits = tree_b.find_range((int(feature.location.start), int(feature.location.end))) | |
| 172 | |
| 173 if feature.type == "remark" or feature.type == "annotation": | 137 if feature.type == "remark" or feature.type == "annotation": |
| 174 continue | 138 continue |
| 175 | 139 |
| 176 if stranding == False: | 140 if not stranding: |
| 177 hits = tree_b[ | 141 hits = tree_b[ |
| 178 int(feature.location.start) : int(feature.location.end) | 142 int(feature.location.start) : int(feature.location.end) |
| 179 ] | 143 ] |
| 180 | |
| 181 # feature id is saved in interval result.data, use map to get full feature | |
| 182 for hit in hits: | 144 for hit in hits: |
| 183 rec_a_hits_in_b.append(rec_b_map[hit.data]) | 145 rec_a_hits_in_b.append(rec_b_map[hit.data]) |
| 184 | |
| 185 else: | 146 else: |
| 186 if feature.strand > 0: | 147 if feature.strand > 0: |
| 187 hits_pos = tree_b_pos[ | 148 hits = tree_b_pos[ |
| 188 int(feature.location.start) : int(feature.location.end) | 149 int(feature.location.start) : int(feature.location.end) |
| 189 ] | 150 ] |
| 190 for hit in hits_pos: | 151 else: |
| 191 rec_a_hits_in_b.append(rec_b_map[hit.data]) | 152 hits = tree_b_neg[ |
| 192 else: | 153 int(feature.location.start) : int(feature.location.end) |
| 193 hits_neg = tree_b_neg[ | 154 ] |
| 194 int(feature.location.start) : int(feature.location.end) | 155 for hit in hits: |
| 195 ] | 156 rec_a_hits_in_b.append(rec_b_map[hit.data]) |
| 196 for hit in hits_neg: | |
| 197 rec_a_hits_in_b.append(rec_b_map[hit.data]) | |
| 198 | 157 |
| 199 for feature in rec_b_i.features: | 158 for feature in rec_b_i.features: |
| 200 if feature.type == "remark" or feature.type == "annotation": | 159 if feature.type == "remark" or feature.type == "annotation": |
| 201 continue | 160 continue |
| 202 | 161 |
| 203 if stranding == False: | 162 if not stranding: |
| 204 hits = tree_a[ | 163 hits = tree_a[ |
| 205 int(feature.location.start) : int(feature.location.end) | 164 int(feature.location.start) : int(feature.location.end) |
| 206 ] | 165 ] |
| 207 | |
| 208 # feature id is saved in interval result.data, use map to get full feature | |
| 209 for hit in hits: | 166 for hit in hits: |
| 210 rec_b_hits_in_a.append(rec_a_map[hit.data]) | 167 rec_b_hits_in_a.append(rec_a_map[hit.data]) |
| 211 | |
| 212 else: | 168 else: |
| 213 if feature.strand > 0: | 169 if feature.strand > 0: |
| 214 hits_pos = tree_a_pos[ | 170 hits = tree_a_pos[ |
| 215 int(feature.location.start) : int(feature.location.end) | 171 int(feature.location.start) : int(feature.location.end) |
| 216 ] | 172 ] |
| 217 for hit in hits_pos: | 173 else: |
| 218 rec_b_hits_in_a.append(rec_a_map[hit.data]) | 174 hits = tree_a_neg[ |
| 219 else: | 175 int(feature.location.start) : int(feature.location.end) |
| 220 hits_neg = tree_a_neg[ | 176 ] |
| 221 int(feature.location.start) : int(feature.location.end) | 177 for hit in hits: |
| 222 ] | 178 rec_b_hits_in_a.append(rec_a_map[hit.data]) |
| 223 for hit in hits_neg: | |
| 224 rec_b_hits_in_a.append(rec_a_map[hit.data]) | |
| 225 | 179 |
| 226 # Remove duplicate features using sets | 180 # Remove duplicate features using sets |
| 227 rec_a_out.append( | 181 rec_a_out.append( |
| 228 SeqRecord( | 182 SeqRecord( |
| 229 rec_a[iterate].seq, | 183 rec_a[iterate].seq, |
| 266 type=int, | 220 type=int, |
| 267 default=50, | 221 default=50, |
| 268 help="Allows features this far away to still be considered 'adjacent'", | 222 help="Allows features this far away to still be considered 'adjacent'", |
| 269 ) | 223 ) |
| 270 parser.add_argument( | 224 parser.add_argument( |
| 271 "stranding", | 225 "-stranding", |
| 272 nargs="?", | 226 action="store_true", |
| 273 default="", | 227 help="Only allow adjacency for same-strand features", |
| 274 help="Only allow adjacency for same-strand features if set to '-stranding'", | |
| 275 ) | 228 ) |
| 276 parser.add_argument("--oa", type=str, default="a_hits_near_b.gff") | 229 parser.add_argument("--oa", type=str, default="a_hits_near_b.gff") |
| 277 parser.add_argument("--ob", type=str, default="b_hits_near_a.gff") | 230 parser.add_argument("--ob", type=str, default="b_hits_near_a.gff") |
| 278 args = parser.parse_args() | 231 args = parser.parse_args() |
| 279 | 232 |
| 280 stranding = args.stranding == "-stranding" | |
| 281 b, a = intersect(args.a, args.b, args.window, args.stranding) | 233 b, a = intersect(args.a, args.b, args.window, args.stranding) |
| 282 | 234 |
| 283 with open(args.oa, "w") as handle: | 235 with open(args.oa, "w") as handle: |
| 284 for rec in a: | 236 for rec in a: |
| 285 gffWrite([rec], handle) | 237 gffWrite([rec], handle) |
