Mercurial > repos > cpt > cpt_fix_aragorn
annotate fix-aragorn-gff3.py @ 5:8da37080e35f draft default tip
planemo upload commit f33bdf952d796c5d7a240b132af3c4cbd102decc
| author | cpt |
|---|---|
| date | Fri, 05 Jan 2024 05:51:12 +0000 |
| parents | 733cb0807083 |
| children |
| rev | line source |
|---|---|
|
3
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1 #!/usr/bin/env python |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
2 import sys |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
3 import logging |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
4 import argparse |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
5 from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
6 from gff3 import feature_lambda, feature_test_type |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
7 |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
8 logging.basicConfig(level=logging.INFO) |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
9 log = logging.getLogger(__name__) |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
10 |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
11 |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
12 def fixed_feature(rec): |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
13 for idx, feature in enumerate( |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
14 feature_lambda( |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
15 rec.features, |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
16 feature_test_type, |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
17 {"types": ["tRNA", "tmRNA"]}, |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
18 subfeatures=True, |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
19 ) |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
20 ): |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
21 |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
22 fid = "%s-%03d" % (feature.type, 1 + idx) |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
23 try: |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
24 name = [feature.type + "-" + feature.qualifiers["Codon"][0]] |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
25 except KeyError: |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
26 name = [feature.qualifiers["product"][0]] |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
27 try: |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
28 origSource = feature.qualifiers["source"][0] |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
29 except: |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
30 origSource = "." |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
31 gene = gffSeqFeature( |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
32 location=feature.location, |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
33 type="gene", |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
34 qualifiers={"ID": [fid + ".gene"], "source": [origSource], "Name": name}, |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
35 ) |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
36 feature.qualifiers["Name"] = name |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
37 # Below that we have an mRNA |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
38 exon = gffSeqFeature( |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
39 location=feature.location, |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
40 type="exon", |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
41 qualifiers={"source": [origSource], "ID": ["%s.exon" % fid], "Name": name}, |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
42 ) |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
43 feature.qualifiers["ID"] = [fid] |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
44 exon.qualifiers["Parent"] = [fid] |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
45 feature.qualifiers["Parent"] = [fid + ".gene"] |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
46 # gene -> trna -> exon |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
47 feature.sub_features = [exon] |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
48 gene.sub_features = [feature] |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
49 yield gene |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
50 |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
51 |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
52 def gff_filter(gff3): |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
53 found_gff = False |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
54 for rec in gffParse(gff3): |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
55 found_gff = True |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
56 rec.features = sorted(list(fixed_feature(rec)), key=lambda x: x.location.start) |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
57 rec.annotations = {} |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
58 gffWrite([rec], sys.stdout) |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
59 if not found_gff: |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
60 print("##gff-version 3") |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
61 |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
62 |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
63 if __name__ == "__main__": |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
64 parser = argparse.ArgumentParser(description="add parent gene features to CDSs") |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
65 parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations") |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
66 args = parser.parse_args() |
|
f0f0ab9db43f
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
67 gff_filter(**vars(args)) |
