annotate intersect_and_adjacent.py @ 1:f7d19040fafa draft

planemo upload commit 79bba3858aa6c0b5cc271fe69c485830acd428e2
author cpt
date Sun, 11 Aug 2024 22:22:44 +0000
parents 923d2528480d
children 10000414e916
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
1 #!/usr/bin/env python
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
2 import logging
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
3 import argparse
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
4 from intervaltree import IntervalTree, Interval
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
5 from CPT_GFFParser import gffParse, gffWrite
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
6 from Bio.SeqRecord import SeqRecord
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
7 from Bio.Seq import Seq
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
8
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
9 logging.basicConfig(level=logging.INFO)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
10 log = logging.getLogger(__name__)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
11
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
12
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
13 def validFeat(rec):
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
14 for feat in rec.features:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
15 if feat.type != "remark" and feat.type != "annotation":
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
16 return True
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
17 return False
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
18
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
19
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
20 def treeFeatures(features, window):
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
21 for feat in features:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
22 # Interval(begin, end, data)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
23 yield Interval(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
24 int(feat.location.start) - int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
25 int(feat.location.end) + int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
26 feat.id,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
27 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
28
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
29
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
30 def treeFeatures_noRem(features, window):
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
31 for feat in features:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
32 if feat.type == "remark" or feat.type == "annotation":
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
33 continue
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
34 # Interval(begin, end, data)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
35 yield Interval(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
36 int(feat.location.start) - int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
37 int(feat.location.end) + int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
38 feat.id,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
39 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
40
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
41
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
42 def intersect(a, b, window, stranding):
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
43 rec_a = list(gffParse(a))
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
44 rec_b = list(gffParse(b))
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
45 rec_a_out = []
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
46 rec_b_out = []
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
47 maxLen = min(len(rec_a), len(rec_b))
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
48 iterate = 0
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
49
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
50 if maxLen > 0:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
51 while iterate < maxLen:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
52 rec_a_i = rec_a[iterate]
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
53 rec_b_i = rec_b[iterate]
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
54
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
55 if (not validFeat(rec_a_i)) or (not validFeat(rec_b_i)):
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
56 rec_a_out.append(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
57 SeqRecord(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
58 rec_a[iterate].seq,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
59 rec_a[iterate].id,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
60 rec_a[iterate].name,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
61 rec_a[iterate].description,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
62 rec_a[iterate].dbxrefs,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
63 [],
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
64 rec_a[iterate].annotations,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
65 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
66 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
67 rec_b_out.append(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
68 SeqRecord(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
69 rec_b[iterate].seq,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
70 rec_b[iterate].id,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
71 rec_b[iterate].name,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
72 rec_b[iterate].description,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
73 rec_b[iterate].dbxrefs,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
74 [],
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
75 rec_b[iterate].annotations,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
76 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
77 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
78 iterate += 1
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
79 continue
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
80
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
81 a_neg = []
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
82 a_pos = []
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
83 b_neg = []
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
84 b_pos = []
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
85 tree_a = []
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
86 tree_b = []
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
87 if stranding == True:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
88 for feat in rec_a_i.features:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
89 if feat.type == "remark" or feat.type == "annotation":
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
90 continue
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
91 if feat.strand > 0:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
92 a_pos.append(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
93 Interval(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
94 int(feat.location.start) - int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
95 int(feat.location.end) + int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
96 feat.id,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
97 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
98 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
99 else:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
100 a_neg.append(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
101 Interval(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
102 int(feat.location.start) - int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
103 int(feat.location.end) + int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
104 feat.id,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
105 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
106 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
107
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
108 for feat in rec_b_i.features:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
109 if feat.type == "remark" or feat.type == "annotation":
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
110 continue
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
111 if feat.strand > 0:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
112 b_pos.append(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
113 Interval(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
114 int(feat.location.start) - int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
115 int(feat.location.end) + int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
116 feat.id,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
117 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
118 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
119 else:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
120 b_neg.append(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
121 Interval(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
122 int(feat.location.start) - int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
123 int(feat.location.end) + int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
124 feat.id,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
125 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
126 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
127
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
128 else:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
129 for feat in rec_a_i.features:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
130 if feat.type == "remark" or feat.type == "annotation":
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
131 continue
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
132 tree_a.append(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
133 Interval(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
134 int(feat.location.start) - int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
135 int(feat.location.end) + int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
136 feat.id,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
137 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
138 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
139 for feat in rec_b_i.features:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
140 if feat.type == "remark" or feat.type == "annotation":
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
141 continue
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
142 tree_b.append(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
143 Interval(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
144 int(feat.location.start) - int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
145 int(feat.location.end) + int(window),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
146 feat.id,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
147 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
148 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
149 if stranding:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
150 # builds interval tree from Interval objects of form (start, end, id) for each feature
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
151 # tree_a = IntervalTree(list(treeFeatures_noRem(rec_a_i.features, window)))
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
152 # tree_b = IntervalTree(list(treeFeatures_noRem(rec_b_i.features, window)))
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
153 # else:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
154 tree_a_pos = IntervalTree(a_pos)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
155 tree_a_neg = IntervalTree(a_neg)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
156 tree_b_pos = IntervalTree(b_pos)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
157 tree_b_neg = IntervalTree(b_neg)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
158 else:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
159 tree_a = IntervalTree(tree_a)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
160 tree_b = IntervalTree(tree_b)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
161
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
162 # Used to map ids back to features later
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
163 rec_a_map = {f.id: f for f in rec_a_i.features}
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
164 rec_b_map = {f.id: f for f in rec_b_i.features}
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
165
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
166 rec_a_hits_in_b = []
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
167 rec_b_hits_in_a = []
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
168
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
169 for feature in rec_a_i.features:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
170 # Save each feature in rec_a that overlaps a feature in rec_b
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
171 # hits = tree_b.find_range((int(feature.location.start), int(feature.location.end)))
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
172
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
173 if feature.type == "remark" or feature.type == "annotation":
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
174 continue
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
175
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
176 if stranding == False:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
177 hits = tree_b[
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
178 int(feature.location.start) : int(feature.location.end)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
179 ]
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
180
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
181 # feature id is saved in interval result.data, use map to get full feature
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
182 for hit in hits:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
183 rec_a_hits_in_b.append(rec_b_map[hit.data])
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
184
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
185 else:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
186 if feature.strand > 0:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
187 hits_pos = tree_b_pos[
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
188 int(feature.location.start) : int(feature.location.end)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
189 ]
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
190 for hit in hits_pos:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
191 rec_a_hits_in_b.append(rec_b_map[hit.data])
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
192 else:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
193 hits_neg = tree_b_neg[
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
194 int(feature.location.start) : int(feature.location.end)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
195 ]
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
196 for hit in hits_neg:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
197 rec_a_hits_in_b.append(rec_b_map[hit.data])
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
198
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
199 for feature in rec_b_i.features:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
200 if feature.type == "remark" or feature.type == "annotation":
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
201 continue
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
202
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
203 if stranding == False:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
204 hits = tree_a[
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
205 int(feature.location.start) : int(feature.location.end)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
206 ]
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
207
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
208 # feature id is saved in interval result.data, use map to get full feature
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
209 for hit in hits:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
210 rec_b_hits_in_a.append(rec_a_map[hit.data])
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
211
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
212 else:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
213 if feature.strand > 0:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
214 hits_pos = tree_a_pos[
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
215 int(feature.location.start) : int(feature.location.end)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
216 ]
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
217 for hit in hits_pos:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
218 rec_b_hits_in_a.append(rec_a_map[hit.data])
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
219 else:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
220 hits_neg = tree_a_neg[
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
221 int(feature.location.start) : int(feature.location.end)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
222 ]
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
223 for hit in hits_neg:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
224 rec_b_hits_in_a.append(rec_a_map[hit.data])
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
225
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
226 # Remove duplicate features using sets
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
227 rec_a_out.append(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
228 SeqRecord(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
229 rec_a[iterate].seq,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
230 rec_a[iterate].id,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
231 rec_a[iterate].name,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
232 rec_a[iterate].description,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
233 rec_a[iterate].dbxrefs,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
234 sorted(set(rec_a_hits_in_b), key=lambda feat: feat.location.start),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
235 rec_a[iterate].annotations,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
236 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
237 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
238 rec_b_out.append(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
239 SeqRecord(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
240 rec_b[iterate].seq,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
241 rec_b[iterate].id,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
242 rec_b[iterate].name,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
243 rec_b[iterate].description,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
244 rec_b[iterate].dbxrefs,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
245 sorted(set(rec_b_hits_in_a), key=lambda feat: feat.location.start),
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
246 rec_b[iterate].annotations,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
247 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
248 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
249 iterate += 1
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
250
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
251 else:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
252 # If one input is empty, output two empty result files.
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
253 rec_a_out = [SeqRecord(Seq(""), "none")]
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
254 rec_b_out = [SeqRecord(Seq(""), "none")]
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
255 return rec_a_out, rec_b_out
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
256
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
257
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
258 if __name__ == "__main__":
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
259 parser = argparse.ArgumentParser(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
260 description="rebase gff3 features against parent locations", epilog=""
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
261 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
262 parser.add_argument("a", type=argparse.FileType("r"))
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
263 parser.add_argument("b", type=argparse.FileType("r"))
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
264 parser.add_argument(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
265 "window",
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
266 type=int,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
267 default=50,
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
268 help="Allows features this far away to still be considered 'adjacent'",
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
269 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
270 parser.add_argument(
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
271 "-stranding",
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
272 action="store_true",
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
273 help="Only allow adjacency for same-strand features",
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
274 )
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
275 parser.add_argument("--oa", type=str, default="a_hits_near_b.gff")
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
276 parser.add_argument("--ob", type=str, default="b_hits_near_a.gff")
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
277 args = parser.parse_args()
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
278
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
279 b, a = intersect(args.a, args.b, args.window, args.stranding)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
280
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
281 with open(args.oa, "w") as handle:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
282 for rec in a:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
283 gffWrite([rec], handle)
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
284
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
285 with open(args.ob, "w") as handle:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
286 for rec in b:
923d2528480d planemo upload commit 8500f316f8325380aa5d0000fbd13dd96012b1bb
cpt
parents:
diff changeset
287 gffWrite([rec], handle)