Mercurial > repos > cpt > cpt_gff_extract_seq
annotate gff3.py @ 4:34b80e483fb8 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:43:58 +0000 |
parents | |
children |
rev | line source |
---|---|
4
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1 import copy |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
2 import logging |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
3 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
4 log = logging.getLogger() |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
5 log.setLevel(logging.WARN) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
6 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
7 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
8 def feature_lambda( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
9 feature_list, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
10 test, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
11 test_kwargs, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
12 subfeatures=True, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
13 parent=None, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
14 invert=False, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
15 recurse=True, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
16 ): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
17 """Recursively search through features, testing each with a test function, yielding matches. |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
18 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
19 GFF3 is a hierachical data structure, so we need to be able to recursively |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
20 search through features. E.g. if you're looking for a feature with |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
21 ID='bob.42', you can't just do a simple list comprehension with a test |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
22 case. You don't know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in. |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
23 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
24 :type feature_list: list |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
25 :param feature_list: an iterable of features |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
26 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
27 :type test: function reference |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
28 :param test: a closure with the method signature (feature, **kwargs) where |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
29 the kwargs are those passed in the next argument. This |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
30 function should return True or False, True if the feature is |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
31 to be yielded as part of the main feature_lambda function, or |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
32 False if it is to be ignored. This function CAN mutate the |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
33 features passed to it (think "apply"). |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
34 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
35 :type test_kwargs: dictionary |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
36 :param test_kwargs: kwargs to pass to your closure when it is called. |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
37 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
38 :type subfeatures: boolean |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
39 :param subfeatures: when a feature is matched, should just that feature be |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
40 yielded to the caller, or should the entire sub_feature |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
41 tree for that feature be included? subfeatures=True is |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
42 useful in cases such as searching for a gene feature, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
43 and wanting to know what RBS/Shine_Dalgarno_sequences |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
44 are in the sub_feature tree (which can be accomplished |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
45 with two feature_lambda calls). subfeatures=False is |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
46 useful in cases when you want to process (and possibly |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
47 return) the entire feature tree, such as applying a |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
48 qualifier to every single feature. |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
49 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
50 :type invert: boolean |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
51 :param invert: Negate/invert the result of the filter. |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
52 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
53 :rtype: yielded list |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
54 :return: Yields a list of matching features. |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
55 """ |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
56 # Either the top level set of [features] or the subfeature attribute |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
57 for feature in feature_list: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
58 feature._parent = parent |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
59 if not parent: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
60 # Set to self so we cannot go above root. |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
61 feature._parent = feature |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
62 test_result = test(feature, **test_kwargs) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
63 # if (not invert and test_result) or (invert and not test_result): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
64 if invert ^ test_result: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
65 if not subfeatures: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
66 feature_copy = copy.deepcopy(feature) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
67 feature_copy.sub_features = list() |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
68 yield feature_copy |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
69 else: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
70 yield feature |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
71 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
72 if recurse and hasattr(feature, "sub_features"): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
73 for x in feature_lambda( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
74 feature.sub_features, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
75 test, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
76 test_kwargs, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
77 subfeatures=subfeatures, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
78 parent=feature, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
79 invert=invert, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
80 recurse=recurse, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
81 ): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
82 yield x |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
83 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
84 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
85 def fetchParent(feature): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
86 if not hasattr(feature, "_parent") or feature._parent is None: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
87 return feature |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
88 else: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
89 return fetchParent(feature._parent) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
90 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
91 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
92 def feature_test_true(feature, **kwargs): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
93 return True |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
94 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
95 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
96 def feature_test_type(feature, **kwargs): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
97 if "type" in kwargs: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
98 return str(feature.type).upper() == str(kwargs["type"]).upper() |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
99 elif "types" in kwargs: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
100 for x in kwargs["types"]: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
101 if str(feature.type).upper() == str(x).upper(): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
102 return True |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
103 return False |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
104 raise Exception("Incorrect feature_test_type call, need type or types") |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
105 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
106 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
107 def feature_test_qual_value(feature, **kwargs): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
108 """Test qualifier values. |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
109 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
110 For every feature, check that at least one value in |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
111 feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list'] |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
112 """ |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
113 if isinstance(kwargs["qualifier"], list): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
114 for qualifier in kwargs["qualifier"]: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
115 for attribute_value in feature.qualifiers.get(qualifier, []): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
116 if attribute_value in kwargs["attribute_list"]: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
117 return True |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
118 else: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
119 for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
120 if attribute_value in kwargs["attribute_list"]: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
121 return True |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
122 return False |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
123 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
124 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
125 def feature_test_location(feature, **kwargs): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
126 if "strand" in kwargs: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
127 if feature.location.strand != kwargs["strand"]: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
128 return False |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
129 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
130 return feature.location.start <= kwargs["loc"] <= feature.location.end |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
131 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
132 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
133 def feature_test_quals(feature, **kwargs): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
134 """ |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
135 Example:: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
136 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
137 a = Feature(qualifiers={'Note': ['Some notes', 'Aasdf']}) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
138 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
139 # Check if a contains a Note |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
140 feature_test_quals(a, {'Note': None}) # Returns True |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
141 feature_test_quals(a, {'Product': None}) # Returns False |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
142 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
143 # Check if a contains a note with specific value |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
144 feature_test_quals(a, {'Note': ['ome']}) # Returns True |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
145 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
146 # Check if a contains a note with specific value |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
147 feature_test_quals(a, {'Note': ['other']}) # Returns False |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
148 """ |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
149 for key in kwargs: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
150 if key not in feature.qualifiers: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
151 return False |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
152 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
153 # Key is present, no value specified |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
154 if kwargs[key] is None: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
155 return True |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
156 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
157 # Otherwise there is a key value we're looking for. |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
158 # so we make a list of matches |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
159 matches = [] |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
160 # And check all of the feature qualifier valuse |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
161 for value in feature.qualifiers[key]: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
162 # For that kwargs[key] value |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
163 for x in kwargs[key]: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
164 matches.append(x in value) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
165 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
166 # If none matched, then we return false. |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
167 if not any(matches): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
168 return False |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
169 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
170 return True |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
171 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
172 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
173 def feature_test_contains(feature, **kwargs): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
174 if "index" in kwargs: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
175 return feature.location.start < kwargs["index"] < feature.location.end |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
176 elif "range" in kwargs: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
177 return ( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
178 feature.location.start < kwargs["range"]["start"] < feature.location.end |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
179 and feature.location.start < kwargs["range"]["end"] < feature.location.end |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
180 ) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
181 else: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
182 raise RuntimeError("Must use index or range keyword") |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
183 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
184 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
185 def get_id(feature=None, parent_prefix=None): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
186 result = "" |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
187 if parent_prefix is not None: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
188 result += parent_prefix + "|" |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
189 if "locus_tag" in feature.qualifiers: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
190 result += feature.qualifiers["locus_tag"][0] |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
191 elif "gene" in feature.qualifiers: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
192 result += feature.qualifiers["gene"][0] |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
193 elif "Gene" in feature.qualifiers: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
194 result += feature.qualifiers["Gene"][0] |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
195 elif "product" in feature.qualifiers: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
196 result += feature.qualifiers["product"][0] |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
197 elif "Product" in feature.qualifiers: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
198 result += feature.qualifiers["Product"][0] |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
199 elif "Name" in feature.qualifiers: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
200 result += feature.qualifiers["Name"][0] |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
201 else: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
202 return feature.id |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
203 # Leaving in case bad things happen. |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
204 # result += '%s_%s_%s_%s' % ( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
205 # feature.id, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
206 # feature.location.start, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
207 # feature.location.end, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
208 # feature.location.strand |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
209 # ) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
210 return result |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
211 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
212 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
213 def get_gff3_id(gene): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
214 return gene.qualifiers.get("Name", [gene.id])[0] |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
215 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
216 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
217 def ensure_location_in_bounds(start=0, end=0, parent_length=0): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
218 # This prevents frameshift errors |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
219 while start < 0: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
220 start += 3 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
221 while end < 0: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
222 end += 3 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
223 while start > parent_length: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
224 start -= 3 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
225 while end > parent_length: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
226 end -= 3 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
227 return (start, end) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
228 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
229 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
230 def coding_genes(feature_list): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
231 for x in genes(feature_list): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
232 if ( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
233 len( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
234 list( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
235 feature_lambda( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
236 x.sub_features, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
237 feature_test_type, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
238 {"type": "CDS"}, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
239 subfeatures=False, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
240 ) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
241 ) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
242 ) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
243 > 0 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
244 ): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
245 yield x |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
246 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
247 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
248 def genes(feature_list, feature_type="gene", sort=False): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
249 """ |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
250 Simple filter to extract gene features from the feature set. |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
251 """ |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
252 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
253 if not sort: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
254 for x in feature_lambda( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
255 feature_list, feature_test_type, {"type": feature_type}, subfeatures=True |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
256 ): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
257 yield x |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
258 else: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
259 data = list(genes(feature_list, feature_type=feature_type, sort=False)) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
260 data = sorted(data, key=lambda feature: feature.location.start) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
261 for x in data: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
262 yield x |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
263 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
264 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
265 def wa_unified_product_name(feature): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
266 """ |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
267 Try and figure out a name. We gave conflicting instructions, so |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
268 this isn't as trivial as it should be. Sometimes it will be in |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
269 'product' or 'Product', othertimes in 'Name' |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
270 """ |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
271 # Manually applied tags. |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
272 protein_product = feature.qualifiers.get( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
273 "product", feature.qualifiers.get("Product", [None]) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
274 )[0] |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
275 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
276 # If neither of those are available ... |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
277 if protein_product is None: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
278 # And there's a name... |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
279 if "Name" in feature.qualifiers: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
280 if not is_uuid(feature.qualifiers["Name"][0]): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
281 protein_product = feature.qualifiers["Name"][0] |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
282 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
283 return protein_product |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
284 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
285 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
286 def is_uuid(name): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
287 return name.count("-") == 4 and len(name) == 36 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
288 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
289 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
290 def get_rbs_from(gene): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
291 # Normal RBS annotation types |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
292 rbs_rbs = list( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
293 feature_lambda( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
294 gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
295 ) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
296 ) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
297 rbs_sds = list( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
298 feature_lambda( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
299 gene.sub_features, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
300 feature_test_type, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
301 {"type": "Shine_Dalgarno_sequence"}, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
302 subfeatures=False, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
303 ) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
304 ) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
305 # Fraking apollo |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
306 apollo_exons = list( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
307 feature_lambda( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
308 gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
309 ) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
310 ) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
311 apollo_exons = [x for x in apollo_exons if len(x) < 10] |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
312 # These are more NCBI's style |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
313 regulatory_elements = list( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
314 feature_lambda( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
315 gene.sub_features, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
316 feature_test_type, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
317 {"type": "regulatory"}, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
318 subfeatures=False, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
319 ) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
320 ) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
321 rbs_regulatory = list( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
322 feature_lambda( |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
323 regulatory_elements, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
324 feature_test_quals, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
325 {"regulatory_class": ["ribosome_binding_site"]}, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
326 subfeatures=False, |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
327 ) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
328 ) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
329 # Here's hoping you find just one ;) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
330 return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
331 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
332 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
333 def nice_name(record): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
334 """ |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
335 get the real name rather than NCBI IDs and so on. If fails, will return record.id |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
336 """ |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
337 name = record.id |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
338 likely_parental_contig = list(genes(record.features, feature_type="contig")) |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
339 if len(likely_parental_contig) == 1: |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
340 name = likely_parental_contig[0].qualifiers.get("organism", [name])[0] |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
341 return name |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
342 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
343 |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
344 def fsort(it): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
345 for i in sorted(it, key=lambda x: int(x.location.start)): |
34b80e483fb8
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
346 yield i |