Mercurial > repos > vipints > fml_gff3togtf
annotate GFFtools-GX/GFFParser.py @ 3:ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
author | vipints |
---|---|
date | Wed, 11 Jun 2014 16:29:25 -0400 |
parents | |
children |
rev | line source |
---|---|
3
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
1 #!/usr/bin/env python |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
2 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
3 Extract genome annotation from a GFF (a tab delimited format for storing sequence features and annotations) file. |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
4 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
5 Requirements: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
6 Numpy :- http://numpy.org/ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
7 Scipy :- http://scipy.org/ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
8 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
9 Copyright (C) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
10 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
11 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
12 2012-2014 Memorial Sloan Kettering Cancer Center, New York City, USA. |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
13 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
14 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
15 import re |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
16 import os |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
17 import sys |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
18 import urllib |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
19 import numpy as np |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
20 import scipy.io as sio |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
21 from collections import defaultdict |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
22 import helper as utils |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
23 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
24 def attribute_tags(col9): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
25 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
26 Split the key-value tags from the attribute column, it takes column number 9 from GTF/GFF file |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
27 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
28 @args col9: attribute column from GFF file |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
29 @type col9: str |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
30 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
31 info = defaultdict(list) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
32 is_gff = False |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
33 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
34 if not col9: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
35 return is_gff, info |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
36 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
37 # trim the line ending semi-colon ucsc may have some white-space |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
38 col9 = col9.rstrip(';| ') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
39 # attributes from 9th column |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
40 atbs = col9.split(" ; ") |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
41 if len(atbs) == 1: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
42 atbs = col9.split("; ") |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
43 if len(atbs) == 1: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
44 atbs = col9.split(";") |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
45 # check the GFF3 pattern which has key value pairs like: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
46 gff3_pat = re.compile("\w+=") |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
47 # sometime GTF have: gene_id uc002zkg.1; |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
48 gtf_pat = re.compile("\s?\w+\s") |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
49 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
50 key_vals = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
51 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
52 if gff3_pat.match(atbs[0]): # gff3 pattern |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
53 is_gff = True |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
54 key_vals = [at.split('=') for at in atbs] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
55 elif gtf_pat.match(atbs[0]): # gtf pattern |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
56 for at in atbs: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
57 key_vals.append(at.strip().split(" ",1)) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
58 else: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
59 # to handle attribute column has only single value |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
60 key_vals.append(['ID', atbs[0]]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
61 # get key, val items |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
62 for item in key_vals: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
63 key, val = item |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
64 # replace the double qoutes from feature identifier |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
65 val = re.sub('"', '', val) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
66 # replace the web formating place holders to plain text format |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
67 info[key].extend([urllib.unquote(v) for v in val.split(',') if v]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
68 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
69 return is_gff, info |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
70 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
71 def spec_features_keywd(gff_parts): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
72 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
73 Specify the feature key word according to the GFF specifications |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
74 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
75 @args gff_parts: attribute field key |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
76 @type gff_parts: str |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
77 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
78 for t_id in ["transcript_id", "transcriptId", "proteinId"]: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
79 try: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
80 gff_parts["info"]["Parent"] = gff_parts["info"][t_id] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
81 break |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
82 except KeyError: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
83 pass |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
84 for g_id in ["gene_id", "geneid", "geneId", "name", "gene_name", "genename"]: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
85 try: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
86 gff_parts["info"]["GParent"] = gff_parts["info"][g_id] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
87 break |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
88 except KeyError: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
89 pass |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
90 ## TODO key words |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
91 for flat_name in ["Transcript", "CDS"]: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
92 if gff_parts["info"].has_key(flat_name): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
93 # parents |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
94 if gff_parts['type'] in [flat_name] or re.search(r'transcript', gff_parts['type'], re.IGNORECASE): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
95 if not gff_parts['id']: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
96 gff_parts['id'] = gff_parts['info'][flat_name][0] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
97 #gff_parts["info"]["ID"] = [gff_parts["id"]] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
98 # children |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
99 elif gff_parts["type"] in ["intron", "exon", "three_prime_UTR", |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
100 "coding_exon", "five_prime_UTR", "CDS", "stop_codon", |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
101 "start_codon"]: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
102 gff_parts["info"]["Parent"] = gff_parts["info"][flat_name] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
103 break |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
104 return gff_parts |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
105 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
106 def Parse(ga_file): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
107 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
108 Parsing GFF/GTF file based on feature relationship, it takes the input file. |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
109 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
110 @args ga_file: input file name |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
111 @type ga_file: str |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
112 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
113 child_map = defaultdict(list) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
114 parent_map = dict() |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
115 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
116 ga_handle = utils.open_file(ga_file) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
117 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
118 for rec in ga_handle: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
119 rec = rec.strip('\n\r') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
120 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
121 # skip empty line fasta identifier and commented line |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
122 if not rec or rec[0] in ['#', '>']: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
123 continue |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
124 # skip the genome sequence |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
125 if not re.search('\t', rec): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
126 continue |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
127 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
128 parts = rec.split('\t') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
129 assert len(parts) >= 8, rec |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
130 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
131 # process the attribute column (9th column) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
132 ftype, tags = attribute_tags(parts[-1]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
133 if not tags: # skip the line if no attribute column. |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
134 continue |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
135 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
136 # extract fields |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
137 if parts[1]: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
138 tags["source"] = parts[1] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
139 if parts[7]: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
140 tags["phase"] = parts[7] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
141 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
142 gff_info = dict() |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
143 gff_info['info'] = dict(tags) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
144 #gff_info["is_gff3"] = ftype |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
145 gff_info['chr'] = parts[0] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
146 gff_info['score'] = parts[5] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
147 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
148 if parts[3] and parts[4]: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
149 gff_info['location'] = [int(parts[3]) , |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
150 int(parts[4])] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
151 gff_info['type'] = parts[2] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
152 gff_info['id'] = tags.get('ID', [''])[0] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
153 if parts[6] in ['?', '.']: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
154 parts[6] = None |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
155 gff_info['strand'] = parts[6] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
156 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
157 # key word according to the GFF spec. |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
158 if not ftype: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
159 gff_info = spec_features_keywd(gff_info) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
160 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
161 # link the feature relationships |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
162 if gff_info['info'].has_key('Parent'): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
163 for p in gff_info['info']['Parent']: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
164 if p == gff_info['id']: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
165 gff_info['id'] = '' |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
166 break |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
167 rec_category = 'child' |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
168 elif gff_info['id']: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
169 rec_category = 'parent' |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
170 else: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
171 rec_category = 'record' |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
172 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
173 # depends on the record category organize the features |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
174 if rec_category == 'child': |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
175 for p in gff_info['info']['Parent']: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
176 # create the data structure based on source and feature id |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
177 child_map[(gff_info['chr'], gff_info['info']['source'], p)].append( |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
178 dict( type = gff_info['type'], |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
179 location = gff_info['location'], |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
180 strand = gff_info['strand'], |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
181 score = gff_info['score'], |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
182 ID = gff_info['id'], |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
183 gene_id = gff_info['info'].get('GParent', '') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
184 )) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
185 elif rec_category == 'parent': |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
186 parent_map[(gff_info['chr'], gff_info['info']['source'], gff_info['id'])] = dict( |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
187 type = gff_info['type'], |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
188 location = gff_info['location'], |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
189 strand = gff_info['strand'], |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
190 score = gff_info['score'], |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
191 name = tags.get('Name', [''])[0]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
192 elif rec_category == 'record': |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
193 #TODO how to handle plain records? |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
194 c = 1 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
195 ga_handle.close() |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
196 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
197 # depends on file type create parent feature |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
198 if not ftype: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
199 parent_map, child_map = create_missing_feature_type(parent_map, child_map) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
200 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
201 # connecting parent child relations |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
202 # // essentially the parent child features are here from any type of GTF/GFF2/GFF3 file |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
203 gene_mat = format_gene_models(parent_map, child_map) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
204 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
205 return gene_mat |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
206 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
207 def format_gene_models(parent_nf_map, child_nf_map): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
208 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
209 Genarate GeneObject based on the parsed file contents |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
210 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
211 @args parent_nf_map: parent features with source and chromosome information |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
212 @type parent_nf_map: collections defaultdict |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
213 @args child_nf_map: transctipt and exon information are encoded |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
214 @type child_nf_map: collections defaultdict |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
215 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
216 g_cnt = 0 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
217 gene = np.zeros((len(parent_nf_map),), dtype = utils.init_gene()) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
218 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
219 for pkey, pdet in parent_nf_map.items(): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
220 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
221 # considering only gene features |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
222 if not re.search(r'gene', pdet.get('type', '')): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
223 continue |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
224 # infer the gene start and stop if not there in the |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
225 if not pdet.get('location', []): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
226 GNS, GNE = [], [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
227 # multiple number of transcripts |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
228 for L1 in child_nf_map[pkey]: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
229 GNS.append(L1.get('location', [])[0]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
230 GNE.append(L1.get('location', [])[1]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
231 GNS.sort() |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
232 GNE.sort() |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
233 pdet['location'] = [GNS[0], GNE[-1]] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
234 orient = pdet.get('strand', '') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
235 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
236 gene[g_cnt]['id'] = g_cnt +1 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
237 gene[g_cnt]['chr'] = pkey[0] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
238 gene[g_cnt]['source'] = pkey[1] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
239 gene[g_cnt]['name'] = pkey[-1] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
240 gene[g_cnt]['start'] = pdet.get('location', [])[0] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
241 gene[g_cnt]['stop'] = pdet.get('location', [])[1] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
242 gene[g_cnt]['strand'] = orient |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
243 gene[g_cnt]['score'] = pdet.get('score','') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
244 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
245 # default value |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
246 gene[g_cnt]['is_alt_spliced'] = gene[g_cnt]['is_alt'] = 0 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
247 if len(child_nf_map[pkey]) > 1: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
248 gene[g_cnt]['is_alt_spliced'] = gene[g_cnt]['is_alt'] = 1 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
249 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
250 # complete sub-feature for all transcripts |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
251 dim = len(child_nf_map[pkey]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
252 TRS = np.zeros((dim,), dtype=np.object) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
253 TR_TYP = np.zeros((dim,), dtype=np.object) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
254 EXON = np.zeros((dim,), dtype=np.object) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
255 UTR5 = np.zeros((dim,), dtype=np.object) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
256 UTR3 = np.zeros((dim,), dtype=np.object) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
257 CDS = np.zeros((dim,), dtype=np.object) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
258 TISc = np.zeros((dim,), dtype=np.object) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
259 TSSc = np.zeros((dim,), dtype=np.object) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
260 CLV = np.zeros((dim,), dtype=np.object) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
261 CSTOP = np.zeros((dim,), dtype=np.object) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
262 TSTAT = np.zeros((dim,), dtype=np.object) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
263 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
264 # fetching corresponding transcripts |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
265 for xq, Lv1 in enumerate(child_nf_map[pkey]): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
266 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
267 TID = Lv1.get('ID', '') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
268 TRS[xq]= np.array([TID]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
269 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
270 TYPE = Lv1.get('type', '') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
271 TR_TYP[xq] = np.array('') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
272 TR_TYP[xq] = np.array(TYPE) if TYPE else TR_TYP[xq] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
273 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
274 orient = Lv1.get('strand', '') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
275 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
276 # fetching different sub-features |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
277 child_feat = defaultdict(list) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
278 for Lv2 in child_nf_map[(pkey[0], pkey[1], TID)]: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
279 E_TYP = Lv2.get('type', '') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
280 child_feat[E_TYP].append(Lv2.get('location')) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
281 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
282 # make general ascending order of coordinates |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
283 if orient == '-': |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
284 for etype, excod in child_feat.items(): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
285 if len(excod) > 1: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
286 if excod[0][0] > excod[-1][0]: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
287 excod.reverse() |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
288 child_feat[etype] = excod |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
289 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
290 # make exon coordinate from cds and utr regions |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
291 if not child_feat.get('exon'): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
292 if child_feat.get('CDS'): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
293 exon_cod = utils.make_Exon_cod( orient, |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
294 NonetoemptyList(child_feat.get('five_prime_UTR')), |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
295 NonetoemptyList(child_feat.get('CDS')), |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
296 NonetoemptyList(child_feat.get('three_prime_UTR'))) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
297 child_feat['exon'] = exon_cod |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
298 else: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
299 # TODO only UTR's |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
300 # searching through keys to find a pattern describing exon feature |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
301 ex_key_pattern = [k for k in child_feat if k.endswith("exon")] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
302 if ex_key_pattern: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
303 child_feat['exon'] = child_feat[ex_key_pattern[0]] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
304 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
305 # stop_codon are seperated from CDS, add the coordinates based on strand |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
306 if child_feat.get('stop_codon'): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
307 if orient == '+': |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
308 if child_feat.get('stop_codon')[0][0] - child_feat.get('CDS')[-1][1] == 1: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
309 child_feat['CDS'][-1] = [child_feat.get('CDS')[-1][0], child_feat.get('stop_codon')[0][1]] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
310 else: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
311 child_feat['CDS'].append(child_feat.get('stop_codon')[0]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
312 elif orient == '-': |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
313 if child_feat.get('CDS')[0][0] - child_feat.get('stop_codon')[0][1] == 1: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
314 child_feat['CDS'][0] = [child_feat.get('stop_codon')[0][0], child_feat.get('CDS')[0][1]] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
315 else: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
316 child_feat['CDS'].insert(0, child_feat.get('stop_codon')[0]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
317 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
318 # transcript signal sites |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
319 TIS, cdsStop, TSS, cleave = [], [], [], [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
320 cds_status, exon_status, utr_status = 0, 0, 0 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
321 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
322 if child_feat.get('exon'): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
323 TSS = [child_feat.get('exon')[-1][1]] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
324 TSS = [child_feat.get('exon')[0][0]] if orient == '+' else TSS |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
325 cleave = [child_feat.get('exon')[0][0]] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
326 cleave = [child_feat.get('exon')[-1][1]] if orient == '+' else cleave |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
327 exon_status = 1 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
328 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
329 if child_feat.get('CDS'): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
330 if orient == '+': |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
331 TIS = [child_feat.get('CDS')[0][0]] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
332 cdsStop = [child_feat.get('CDS')[-1][1]-3] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
333 else: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
334 TIS = [child_feat.get('CDS')[-1][1]] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
335 cdsStop = [child_feat.get('CDS')[0][0]+3] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
336 cds_status = 1 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
337 # cds phase calculation |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
338 child_feat['CDS'] = utils.add_CDS_phase(orient, child_feat.get('CDS')) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
339 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
340 # sub-feature status |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
341 if child_feat.get('three_prime_UTR') or child_feat.get('five_prime_UTR'): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
342 utr_status =1 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
343 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
344 if utr_status == cds_status == exon_status == 1: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
345 t_status = 1 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
346 else: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
347 t_status = 0 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
348 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
349 # add sub-feature # make array for export to different out |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
350 TSTAT[xq] = t_status |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
351 EXON[xq] = np.array(child_feat.get('exon'), np.float64) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
352 UTR5[xq] = np.array(NonetoemptyList(child_feat.get('five_prime_UTR'))) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
353 UTR3[xq] = np.array(NonetoemptyList(child_feat.get('three_prime_UTR'))) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
354 CDS[xq] = np.array(NonetoemptyList(child_feat.get('CDS'))) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
355 TISc[xq] = np.array(TIS) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
356 CSTOP[xq] = np.array(cdsStop) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
357 TSSc[xq] = np.array(TSS) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
358 CLV[xq] = np.array(cleave) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
359 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
360 # add sub-features to the parent gene feature |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
361 gene[g_cnt]['transcript_status'] = TSTAT |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
362 gene[g_cnt]['transcripts'] = TRS |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
363 gene[g_cnt]['exons'] = EXON |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
364 gene[g_cnt]['utr5_exons'] = UTR5 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
365 gene[g_cnt]['cds_exons'] = CDS |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
366 gene[g_cnt]['utr3_exons'] = UTR3 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
367 gene[g_cnt]['transcript_type'] = TR_TYP |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
368 gene[g_cnt]['tis'] = TISc |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
369 gene[g_cnt]['cdsStop'] = CSTOP |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
370 gene[g_cnt]['tss'] = TSSc |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
371 gene[g_cnt]['cleave'] = CLV |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
372 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
373 gene[g_cnt]['gene_info'] = dict( ID = pkey[-1], |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
374 Name = pdet.get('name'), |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
375 Source = pkey[1]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
376 # few empty fields // TODO fill this: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
377 gene[g_cnt]['anno_id'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
378 gene[g_cnt]['confgenes_id'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
379 gene[g_cnt]['alias'] = '' |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
380 gene[g_cnt]['name2'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
381 gene[g_cnt]['chr_num'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
382 gene[g_cnt]['paralogs'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
383 gene[g_cnt]['transcript_info'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
384 gene[g_cnt]['transcript_valid'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
385 gene[g_cnt]['exons_confirmed'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
386 gene[g_cnt]['tis_conf'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
387 gene[g_cnt]['tis_info'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
388 gene[g_cnt]['cdsStop_conf'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
389 gene[g_cnt]['cdsStop_info'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
390 gene[g_cnt]['tss_info'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
391 gene[g_cnt]['tss_conf'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
392 gene[g_cnt]['cleave_info'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
393 gene[g_cnt]['cleave_conf'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
394 gene[g_cnt]['polya_info'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
395 gene[g_cnt]['polya_conf'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
396 gene[g_cnt]['is_valid'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
397 gene[g_cnt]['transcript_complete'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
398 gene[g_cnt]['is_complete'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
399 gene[g_cnt]['is_correctly_gff3_referenced'] = '' |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
400 gene[g_cnt]['splicegraph'] = [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
401 g_cnt += 1 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
402 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
403 ## deleting empty gene records from the main array |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
404 XPFLG=0 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
405 for XP, ens in enumerate(gene): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
406 if ens[0]==0: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
407 XPFLG=1 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
408 break |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
409 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
410 if XPFLG==1: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
411 XQC = range(XP, len(gene)+1) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
412 gene = np.delete(gene, XQC) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
413 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
414 return gene |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
415 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
416 def NonetoemptyList(XS): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
417 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
418 Convert a None type to empty list |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
419 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
420 @args XS: None type |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
421 @type XS: str |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
422 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
423 return [] if XS is None else XS |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
424 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
425 def create_missing_feature_type(p_feat, c_feat): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
426 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
427 GFF/GTF file defines only child features. This function tries to create |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
428 the parent feature from the information provided in the attribute column. |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
429 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
430 example: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
431 chr21 hg19_knownGene exon 9690071 9690100 0.000000 + . gene_id "uc002zkg.1"; transcript_id "uc002zkg.1"; |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
432 chr21 hg19_knownGene exon 9692178 9692207 0.000000 + . gene_id "uc021wgt.1"; transcript_id "uc021wgt.1"; |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
433 chr21 hg19_knownGene exon 9711935 9712038 0.000000 + . gene_id "uc011abu.2"; transcript_id "uc011abu.2"; |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
434 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
435 This function gets the parsed feature annotations. |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
436 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
437 @args p_feat: Parent feature map |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
438 @type p_feat: collections defaultdict |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
439 @args c_feat: Child feature map |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
440 @type c_feat: collections defaultdict |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
441 """ |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
442 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
443 child_n_map = defaultdict(list) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
444 for fid, det in c_feat.items(): |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
445 # get the details from grand child |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
446 GID = STRD = SCR = None |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
447 SPOS, EPOS = [], [] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
448 TYP = dict() |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
449 for gchild in det: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
450 GID = gchild.get('gene_id', [''])[0] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
451 SPOS.append(gchild.get('location', [])[0]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
452 EPOS.append(gchild.get('location', [])[1]) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
453 STRD = gchild.get('strand', '') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
454 SCR = gchild.get('score', '') |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
455 TYP[gchild.get('type', '')] = 1 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
456 SPOS.sort() |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
457 EPOS.sort() |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
458 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
459 # infer transcript type |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
460 transcript_type = 'transcript' |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
461 transcript_type = 'mRNA' if TYP.get('CDS', '') or TYP.get('cds', '') else transcript_type |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
462 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
463 # gene id and transcript id are same |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
464 transcript_id = fid[-1] |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
465 if GID == transcript_id: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
466 transcript_id = 'Transcript:' + str(GID) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
467 |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
468 # level -1 feature type |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
469 p_feat[(fid[0], fid[1], GID)] = dict( type = 'gene', |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
470 location = [], ## infer location based on multiple transcripts |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
471 strand = STRD, |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
472 name = GID ) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
473 # level -2 feature type |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
474 child_n_map[(fid[0], fid[1], GID)].append( |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
475 dict( type = transcript_type, |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
476 location = [SPOS[0], EPOS[-1]], |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
477 strand = STRD, |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
478 score = SCR, |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
479 ID = transcript_id, |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
480 gene_id = '' )) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
481 # reorganizing the grand child |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
482 for gchild in det: |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
483 child_n_map[(fid[0], fid[1], transcript_id)].append( |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
484 dict( type = gchild.get('type', ''), |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
485 location = gchild.get('location'), |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
486 strand = gchild.get('strand'), |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
487 ID = gchild.get('ID'), |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
488 score = gchild.get('score'), |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
489 gene_id = '' )) |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
490 return p_feat, child_n_map |
ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff
changeset
|
491 |