Mercurial > repos > fubar > jbrowse2dev
comparison jbrowse2/gff3_rebase.py @ 6:88b9b105c09b draft
Uploaded
| author | fubar |
|---|---|
| date | Fri, 05 Jan 2024 01:58:02 +0000 |
| parents | cd5d63cd0eb5 |
| children |
comparison
equal
deleted
inserted
replaced
| 5:42ca8804cd93 | 6:88b9b105c09b |
|---|---|
| 61 feature_copy.sub_features = [] | 61 feature_copy.sub_features = [] |
| 62 yield feature_copy | 62 yield feature_copy |
| 63 else: | 63 else: |
| 64 yield feature | 64 yield feature |
| 65 | 65 |
| 66 if hasattr(feature, 'sub_features'): | 66 if hasattr(feature, "sub_features"): |
| 67 for x in feature_lambda(feature.sub_features, test, test_kwargs, subfeatures=subfeatures): | 67 for x in feature_lambda( |
| 68 feature.sub_features, test, test_kwargs, subfeatures=subfeatures | |
| 69 ): | |
| 68 yield x | 70 yield x |
| 69 | 71 |
| 70 | 72 |
| 71 def feature_test_qual_value(feature, **kwargs): | 73 def feature_test_qual_value(feature, **kwargs): |
| 72 """Test qualifier values. | 74 """Test qualifier values. |
| 73 | 75 |
| 74 For every feature, check that at least one value in | 76 For every feature, check that at least one value in |
| 75 feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list'] | 77 feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list'] |
| 76 """ | 78 """ |
| 77 for attribute_value in feature.qualifiers.get(kwargs['qualifier'], []): | 79 for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []): |
| 78 if attribute_value in kwargs['attribute_list']: | 80 if attribute_value in kwargs["attribute_list"]: |
| 79 return True | 81 return True |
| 80 return False | 82 return False |
| 81 | 83 |
| 82 | 84 |
| 83 def __get_features(child, interpro=False): | 85 def __get_features(child, interpro=False): |
| 88 # Get the record id as parent_feature_id (since this is how it will be during remapping) | 90 # Get the record id as parent_feature_id (since this is how it will be during remapping) |
| 89 parent_feature_id = rec.id | 91 parent_feature_id = rec.id |
| 90 # If it's an interpro specific gff3 file | 92 # If it's an interpro specific gff3 file |
| 91 if interpro: | 93 if interpro: |
| 92 # Then we ignore polypeptide features as they're useless | 94 # Then we ignore polypeptide features as they're useless |
| 93 if feature.type == 'polypeptide': | 95 if feature.type == "polypeptide": |
| 94 continue | 96 continue |
| 95 # If there's an underscore, we strip up to that underscore? | 97 # If there's an underscore, we strip up to that underscore? |
| 96 # I do not know the rationale for this, removing. | 98 # I do not know the rationale for this, removing. |
| 97 # if '_' in parent_feature_id: | 99 # if '_' in parent_feature_id: |
| 98 # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:] | 100 # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:] |
| 99 | 101 |
| 100 try: | 102 try: |
| 101 child_features[parent_feature_id].append(feature) | 103 child_features[parent_feature_id].append(feature) |
| 102 except KeyError: | 104 except KeyError: |
| 103 child_features[parent_feature_id] = [feature] | 105 child_features[parent_feature_id] = [feature] |
| 132 if ne < 0: | 134 if ne < 0: |
| 133 ne %= 3 | 135 ne %= 3 |
| 134 | 136 |
| 135 feature.location = FeatureLocation(ns, ne, strand=st) | 137 feature.location = FeatureLocation(ns, ne, strand=st) |
| 136 | 138 |
| 137 if hasattr(feature, 'sub_features'): | 139 if hasattr(feature, "sub_features"): |
| 138 for subfeature in feature.sub_features: | 140 for subfeature in feature.sub_features: |
| 139 __update_feature_location(subfeature, parent, protein2dna) | 141 __update_feature_location(subfeature, parent, protein2dna) |
| 140 | 142 |
| 141 | 143 |
| 142 def rebase(parent, child, interpro=False, protein2dna=False, map_by='ID'): | 144 def rebase(parent, child, interpro=False, protein2dna=False, map_by="ID"): |
| 143 # get all of the features we will be re-mapping in a dictionary, keyed by parent feature ID | 145 # get all of the features we will be re-mapping in a dictionary, keyed by parent feature ID |
| 144 child_features = __get_features(child, interpro=interpro) | 146 child_features = __get_features(child, interpro=interpro) |
| 145 | 147 |
| 146 for rec in GFF.parse(parent): | 148 for rec in GFF.parse(parent): |
| 147 replacement_features = [] | 149 replacement_features = [] |
| 148 for feature in feature_lambda( | 150 for feature in feature_lambda( |
| 149 rec.features, | 151 rec.features, |
| 150 # Filter features in the parent genome by those that are | 152 # Filter features in the parent genome by those that are |
| 151 # "interesting", i.e. have results in child_features array. | 153 # "interesting", i.e. have results in child_features array. |
| 152 # Probably an unnecessary optimisation. | 154 # Probably an unnecessary optimisation. |
| 153 feature_test_qual_value, | 155 feature_test_qual_value, |
| 154 { | 156 { |
| 155 'qualifier': map_by, | 157 "qualifier": map_by, |
| 156 'attribute_list': child_features.keys(), | 158 "attribute_list": child_features.keys(), |
| 157 }, | 159 }, |
| 158 subfeatures=False): | 160 subfeatures=False, |
| 161 ): | |
| 159 | 162 |
| 160 # Features which will be re-mapped | 163 # Features which will be re-mapped |
| 161 to_remap = child_features[feature.id] | 164 to_remap = child_features[feature.id] |
| 162 # TODO: update starts | 165 # TODO: update starts |
| 163 fixed_features = [] | 166 fixed_features = [] |
| 164 for x in to_remap: | 167 for x in to_remap: |
| 165 # Then update the location of the actual feature | 168 # Then update the location of the actual feature |
| 166 __update_feature_location(x, feature, protein2dna) | 169 __update_feature_location(x, feature, protein2dna) |
| 167 | 170 |
| 168 if interpro: | 171 if interpro: |
| 169 for y in ('status', 'Target'): | 172 for y in ("status", "Target"): |
| 170 try: | 173 try: |
| 171 del x.qualifiers[y] | 174 del x.qualifiers[y] |
| 172 except Exception: | 175 except Exception: |
| 173 pass | 176 pass |
| 174 | 177 |
| 179 rec.features = replacement_features | 182 rec.features = replacement_features |
| 180 rec.annotations = {} | 183 rec.annotations = {} |
| 181 GFF.write([rec], sys.stdout) | 184 GFF.write([rec], sys.stdout) |
| 182 | 185 |
| 183 | 186 |
| 184 if __name__ == '__main__': | 187 if __name__ == "__main__": |
| 185 parser = argparse.ArgumentParser(description='rebase gff3 features against parent locations', epilog="") | 188 parser = argparse.ArgumentParser( |
| 186 parser.add_argument('parent', type=argparse.FileType('r'), help='Parent GFF3 annotations') | 189 description="rebase gff3 features against parent locations", epilog="" |
| 187 parser.add_argument('child', type=argparse.FileType('r'), help='Child GFF3 annotations to rebase against parent') | 190 ) |
| 188 parser.add_argument('--interpro', action='store_true', | 191 parser.add_argument( |
| 189 help='Interpro specific modifications') | 192 "parent", type=argparse.FileType("r"), help="Parent GFF3 annotations" |
| 190 parser.add_argument('--protein2dna', action='store_true', | 193 ) |
| 191 help='Map protein translated results to original DNA data') | 194 parser.add_argument( |
| 192 parser.add_argument('--map_by', help='Map by key', default='ID') | 195 "child", |
| 196 type=argparse.FileType("r"), | |
| 197 help="Child GFF3 annotations to rebase against parent", | |
| 198 ) | |
| 199 parser.add_argument( | |
| 200 "--interpro", action="store_true", help="Interpro specific modifications" | |
| 201 ) | |
| 202 parser.add_argument( | |
| 203 "--protein2dna", | |
| 204 action="store_true", | |
| 205 help="Map protein translated results to original DNA data", | |
| 206 ) | |
| 207 parser.add_argument("--map_by", help="Map by key", default="ID") | |
| 193 args = parser.parse_args() | 208 args = parser.parse_args() |
| 194 rebase(**vars(args)) | 209 rebase(**vars(args)) |
