Mercurial > repos > cpt > cpt_intersect_adj
changeset 4:3e3b5ba626b9 draft
planemo upload commit c3db49ef44729e27fa7ca5ade2ea27d7074072ca
author | cpt |
---|---|
date | Mon, 12 Aug 2024 04:33:20 +0000 |
parents | 10000414e916 |
children | 00aab5199327 |
files | intersect_and_adjacent.py |
diffstat | 1 files changed, 43 insertions(+), 91 deletions(-) [+] |
line wrap: on
line diff
--- a/intersect_and_adjacent.py Mon Aug 12 04:23:34 2024 +0000 +++ b/intersect_and_adjacent.py Mon Aug 12 04:33:20 2024 +0000 @@ -84,73 +84,40 @@ b_pos = [] tree_a = [] tree_b = [] - if stranding == True: - for feat in rec_a_i.features: - if feat.type == "remark" or feat.type == "annotation": - continue + + for feat in rec_a_i.features: + if feat.type == "remark" or feat.type == "annotation": + continue + interval = Interval( + int(feat.location.start) - int(window), + int(feat.location.end) + int(window), + feat.id, + ) + if stranding: if feat.strand > 0: - a_pos.append( - Interval( - int(feat.location.start) - int(window), - int(feat.location.end) + int(window), - feat.id, - ) - ) - else: - a_neg.append( - Interval( - int(feat.location.start) - int(window), - int(feat.location.end) + int(window), - feat.id, - ) - ) - - for feat in rec_b_i.features: - if feat.type == "remark" or feat.type == "annotation": - continue - if feat.strand > 0: - b_pos.append( - Interval( - int(feat.location.start) - int(window), - int(feat.location.end) + int(window), - feat.id, - ) - ) + a_pos.append(interval) else: - b_neg.append( - Interval( - int(feat.location.start) - int(window), - int(feat.location.end) + int(window), - feat.id, - ) - ) + a_neg.append(interval) + else: + tree_a.append(interval) - else: - for feat in rec_a_i.features: - if feat.type == "remark" or feat.type == "annotation": - continue - tree_a.append( - Interval( - int(feat.location.start) - int(window), - int(feat.location.end) + int(window), - feat.id, - ) - ) - for feat in rec_b_i.features: - if feat.type == "remark" or feat.type == "annotation": - continue - tree_b.append( - Interval( - int(feat.location.start) - int(window), - int(feat.location.end) + int(window), - feat.id, - ) - ) + for feat in rec_b_i.features: + if feat.type == "remark" or feat.type == "annotation": + continue + interval = Interval( + int(feat.location.start) - int(window), + int(feat.location.end) + int(window), + feat.id, + ) + if stranding: + if feat.strand > 0: + b_pos.append(interval) + else: + b_neg.append(interval) + else: + tree_b.append(interval) + if stranding: - # builds interval tree from Interval objects of form (start, end, id) for each feature - # tree_a = IntervalTree(list(treeFeatures_noRem(rec_a_i.features, window))) - # tree_b = IntervalTree(list(treeFeatures_noRem(rec_b_i.features, window))) - # else: tree_a_pos = IntervalTree(a_pos) tree_a_neg = IntervalTree(a_neg) tree_b_pos = IntervalTree(b_pos) @@ -167,61 +134,48 @@ rec_b_hits_in_a = [] for feature in rec_a_i.features: - # Save each feature in rec_a that overlaps a feature in rec_b - # hits = tree_b.find_range((int(feature.location.start), int(feature.location.end))) - if feature.type == "remark" or feature.type == "annotation": continue - if stranding == False: + if not stranding: hits = tree_b[ int(feature.location.start) : int(feature.location.end) ] - - # feature id is saved in interval result.data, use map to get full feature for hit in hits: rec_a_hits_in_b.append(rec_b_map[hit.data]) - else: if feature.strand > 0: - hits_pos = tree_b_pos[ + hits = tree_b_pos[ int(feature.location.start) : int(feature.location.end) ] - for hit in hits_pos: - rec_a_hits_in_b.append(rec_b_map[hit.data]) else: - hits_neg = tree_b_neg[ + hits = tree_b_neg[ int(feature.location.start) : int(feature.location.end) ] - for hit in hits_neg: - rec_a_hits_in_b.append(rec_b_map[hit.data]) + for hit in hits: + rec_a_hits_in_b.append(rec_b_map[hit.data]) for feature in rec_b_i.features: if feature.type == "remark" or feature.type == "annotation": continue - if stranding == False: + if not stranding: hits = tree_a[ int(feature.location.start) : int(feature.location.end) ] - - # feature id is saved in interval result.data, use map to get full feature for hit in hits: rec_b_hits_in_a.append(rec_a_map[hit.data]) - else: if feature.strand > 0: - hits_pos = tree_a_pos[ + hits = tree_a_pos[ int(feature.location.start) : int(feature.location.end) ] - for hit in hits_pos: - rec_b_hits_in_a.append(rec_a_map[hit.data]) else: - hits_neg = tree_a_neg[ + hits = tree_a_neg[ int(feature.location.start) : int(feature.location.end) ] - for hit in hits_neg: - rec_b_hits_in_a.append(rec_a_map[hit.data]) + for hit in hits: + rec_b_hits_in_a.append(rec_a_map[hit.data]) # Remove duplicate features using sets rec_a_out.append( @@ -268,16 +222,14 @@ help="Allows features this far away to still be considered 'adjacent'", ) parser.add_argument( - "stranding", - nargs="?", - default="", - help="Only allow adjacency for same-strand features if set to '-stranding'", + "-stranding", + action="store_true", + help="Only allow adjacency for same-strand features", ) parser.add_argument("--oa", type=str, default="a_hits_near_b.gff") parser.add_argument("--ob", type=str, default="b_hits_near_a.gff") args = parser.parse_args() - stranding = args.stranding == "-stranding" b, a = intersect(args.a, args.b, args.window, args.stranding) with open(args.oa, "w") as handle: