annotate GFFtools-GX/GFFParser.py @ 3:ff2c2e6f4ab3

Uploaded version 2.0.0 of gfftools ready to import to local instance
author vipints
date Wed, 11 Jun 2014 16:29:25 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
1 #!/usr/bin/env python
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
2 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
3 Extract genome annotation from a GFF (a tab delimited format for storing sequence features and annotations) file.
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
4
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
5 Requirements:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
6 Numpy :- http://numpy.org/
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
7 Scipy :- http://scipy.org/
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
8
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
9 Copyright (C)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
10
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
11 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany.
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
12 2012-2014 Memorial Sloan Kettering Cancer Center, New York City, USA.
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
13 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
14
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
15 import re
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
16 import os
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
17 import sys
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
18 import urllib
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
19 import numpy as np
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
20 import scipy.io as sio
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
21 from collections import defaultdict
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
22 import helper as utils
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
23
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
24 def attribute_tags(col9):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
25 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
26 Split the key-value tags from the attribute column, it takes column number 9 from GTF/GFF file
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
27
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
28 @args col9: attribute column from GFF file
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
29 @type col9: str
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
30 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
31 info = defaultdict(list)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
32 is_gff = False
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
33
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
34 if not col9:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
35 return is_gff, info
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
36
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
37 # trim the line ending semi-colon ucsc may have some white-space
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
38 col9 = col9.rstrip(';| ')
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
39 # attributes from 9th column
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
40 atbs = col9.split(" ; ")
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
41 if len(atbs) == 1:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
42 atbs = col9.split("; ")
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
43 if len(atbs) == 1:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
44 atbs = col9.split(";")
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
45 # check the GFF3 pattern which has key value pairs like:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
46 gff3_pat = re.compile("\w+=")
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
47 # sometime GTF have: gene_id uc002zkg.1;
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
48 gtf_pat = re.compile("\s?\w+\s")
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
49
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
50 key_vals = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
51
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
52 if gff3_pat.match(atbs[0]): # gff3 pattern
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
53 is_gff = True
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
54 key_vals = [at.split('=') for at in atbs]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
55 elif gtf_pat.match(atbs[0]): # gtf pattern
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
56 for at in atbs:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
57 key_vals.append(at.strip().split(" ",1))
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
58 else:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
59 # to handle attribute column has only single value
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
60 key_vals.append(['ID', atbs[0]])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
61 # get key, val items
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
62 for item in key_vals:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
63 key, val = item
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
64 # replace the double qoutes from feature identifier
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
65 val = re.sub('"', '', val)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
66 # replace the web formating place holders to plain text format
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
67 info[key].extend([urllib.unquote(v) for v in val.split(',') if v])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
68
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
69 return is_gff, info
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
70
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
71 def spec_features_keywd(gff_parts):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
72 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
73 Specify the feature key word according to the GFF specifications
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
74
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
75 @args gff_parts: attribute field key
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
76 @type gff_parts: str
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
77 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
78 for t_id in ["transcript_id", "transcriptId", "proteinId"]:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
79 try:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
80 gff_parts["info"]["Parent"] = gff_parts["info"][t_id]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
81 break
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
82 except KeyError:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
83 pass
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
84 for g_id in ["gene_id", "geneid", "geneId", "name", "gene_name", "genename"]:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
85 try:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
86 gff_parts["info"]["GParent"] = gff_parts["info"][g_id]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
87 break
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
88 except KeyError:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
89 pass
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
90 ## TODO key words
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
91 for flat_name in ["Transcript", "CDS"]:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
92 if gff_parts["info"].has_key(flat_name):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
93 # parents
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
94 if gff_parts['type'] in [flat_name] or re.search(r'transcript', gff_parts['type'], re.IGNORECASE):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
95 if not gff_parts['id']:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
96 gff_parts['id'] = gff_parts['info'][flat_name][0]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
97 #gff_parts["info"]["ID"] = [gff_parts["id"]]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
98 # children
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
99 elif gff_parts["type"] in ["intron", "exon", "three_prime_UTR",
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
100 "coding_exon", "five_prime_UTR", "CDS", "stop_codon",
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
101 "start_codon"]:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
102 gff_parts["info"]["Parent"] = gff_parts["info"][flat_name]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
103 break
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
104 return gff_parts
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
105
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
106 def Parse(ga_file):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
107 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
108 Parsing GFF/GTF file based on feature relationship, it takes the input file.
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
109
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
110 @args ga_file: input file name
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
111 @type ga_file: str
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
112 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
113 child_map = defaultdict(list)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
114 parent_map = dict()
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
115
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
116 ga_handle = utils.open_file(ga_file)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
117
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
118 for rec in ga_handle:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
119 rec = rec.strip('\n\r')
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
120
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
121 # skip empty line fasta identifier and commented line
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
122 if not rec or rec[0] in ['#', '>']:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
123 continue
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
124 # skip the genome sequence
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
125 if not re.search('\t', rec):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
126 continue
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
127
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
128 parts = rec.split('\t')
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
129 assert len(parts) >= 8, rec
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
130
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
131 # process the attribute column (9th column)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
132 ftype, tags = attribute_tags(parts[-1])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
133 if not tags: # skip the line if no attribute column.
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
134 continue
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
135
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
136 # extract fields
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
137 if parts[1]:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
138 tags["source"] = parts[1]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
139 if parts[7]:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
140 tags["phase"] = parts[7]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
141
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
142 gff_info = dict()
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
143 gff_info['info'] = dict(tags)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
144 #gff_info["is_gff3"] = ftype
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
145 gff_info['chr'] = parts[0]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
146 gff_info['score'] = parts[5]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
147
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
148 if parts[3] and parts[4]:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
149 gff_info['location'] = [int(parts[3]) ,
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
150 int(parts[4])]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
151 gff_info['type'] = parts[2]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
152 gff_info['id'] = tags.get('ID', [''])[0]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
153 if parts[6] in ['?', '.']:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
154 parts[6] = None
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
155 gff_info['strand'] = parts[6]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
156
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
157 # key word according to the GFF spec.
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
158 if not ftype:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
159 gff_info = spec_features_keywd(gff_info)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
160
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
161 # link the feature relationships
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
162 if gff_info['info'].has_key('Parent'):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
163 for p in gff_info['info']['Parent']:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
164 if p == gff_info['id']:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
165 gff_info['id'] = ''
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
166 break
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
167 rec_category = 'child'
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
168 elif gff_info['id']:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
169 rec_category = 'parent'
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
170 else:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
171 rec_category = 'record'
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
172
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
173 # depends on the record category organize the features
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
174 if rec_category == 'child':
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
175 for p in gff_info['info']['Parent']:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
176 # create the data structure based on source and feature id
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
177 child_map[(gff_info['chr'], gff_info['info']['source'], p)].append(
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
178 dict( type = gff_info['type'],
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
179 location = gff_info['location'],
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
180 strand = gff_info['strand'],
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
181 score = gff_info['score'],
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
182 ID = gff_info['id'],
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
183 gene_id = gff_info['info'].get('GParent', '')
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
184 ))
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
185 elif rec_category == 'parent':
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
186 parent_map[(gff_info['chr'], gff_info['info']['source'], gff_info['id'])] = dict(
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
187 type = gff_info['type'],
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
188 location = gff_info['location'],
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
189 strand = gff_info['strand'],
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
190 score = gff_info['score'],
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
191 name = tags.get('Name', [''])[0])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
192 elif rec_category == 'record':
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
193 #TODO how to handle plain records?
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
194 c = 1
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
195 ga_handle.close()
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
196
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
197 # depends on file type create parent feature
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
198 if not ftype:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
199 parent_map, child_map = create_missing_feature_type(parent_map, child_map)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
200
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
201 # connecting parent child relations
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
202 # // essentially the parent child features are here from any type of GTF/GFF2/GFF3 file
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
203 gene_mat = format_gene_models(parent_map, child_map)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
204
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
205 return gene_mat
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
206
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
207 def format_gene_models(parent_nf_map, child_nf_map):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
208 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
209 Genarate GeneObject based on the parsed file contents
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
210
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
211 @args parent_nf_map: parent features with source and chromosome information
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
212 @type parent_nf_map: collections defaultdict
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
213 @args child_nf_map: transctipt and exon information are encoded
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
214 @type child_nf_map: collections defaultdict
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
215 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
216 g_cnt = 0
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
217 gene = np.zeros((len(parent_nf_map),), dtype = utils.init_gene())
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
218
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
219 for pkey, pdet in parent_nf_map.items():
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
220
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
221 # considering only gene features
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
222 if not re.search(r'gene', pdet.get('type', '')):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
223 continue
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
224 # infer the gene start and stop if not there in the
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
225 if not pdet.get('location', []):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
226 GNS, GNE = [], []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
227 # multiple number of transcripts
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
228 for L1 in child_nf_map[pkey]:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
229 GNS.append(L1.get('location', [])[0])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
230 GNE.append(L1.get('location', [])[1])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
231 GNS.sort()
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
232 GNE.sort()
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
233 pdet['location'] = [GNS[0], GNE[-1]]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
234 orient = pdet.get('strand', '')
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
235
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
236 gene[g_cnt]['id'] = g_cnt +1
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
237 gene[g_cnt]['chr'] = pkey[0]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
238 gene[g_cnt]['source'] = pkey[1]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
239 gene[g_cnt]['name'] = pkey[-1]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
240 gene[g_cnt]['start'] = pdet.get('location', [])[0]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
241 gene[g_cnt]['stop'] = pdet.get('location', [])[1]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
242 gene[g_cnt]['strand'] = orient
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
243 gene[g_cnt]['score'] = pdet.get('score','')
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
244
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
245 # default value
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
246 gene[g_cnt]['is_alt_spliced'] = gene[g_cnt]['is_alt'] = 0
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
247 if len(child_nf_map[pkey]) > 1:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
248 gene[g_cnt]['is_alt_spliced'] = gene[g_cnt]['is_alt'] = 1
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
249
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
250 # complete sub-feature for all transcripts
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
251 dim = len(child_nf_map[pkey])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
252 TRS = np.zeros((dim,), dtype=np.object)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
253 TR_TYP = np.zeros((dim,), dtype=np.object)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
254 EXON = np.zeros((dim,), dtype=np.object)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
255 UTR5 = np.zeros((dim,), dtype=np.object)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
256 UTR3 = np.zeros((dim,), dtype=np.object)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
257 CDS = np.zeros((dim,), dtype=np.object)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
258 TISc = np.zeros((dim,), dtype=np.object)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
259 TSSc = np.zeros((dim,), dtype=np.object)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
260 CLV = np.zeros((dim,), dtype=np.object)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
261 CSTOP = np.zeros((dim,), dtype=np.object)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
262 TSTAT = np.zeros((dim,), dtype=np.object)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
263
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
264 # fetching corresponding transcripts
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
265 for xq, Lv1 in enumerate(child_nf_map[pkey]):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
266
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
267 TID = Lv1.get('ID', '')
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
268 TRS[xq]= np.array([TID])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
269
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
270 TYPE = Lv1.get('type', '')
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
271 TR_TYP[xq] = np.array('')
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
272 TR_TYP[xq] = np.array(TYPE) if TYPE else TR_TYP[xq]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
273
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
274 orient = Lv1.get('strand', '')
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
275
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
276 # fetching different sub-features
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
277 child_feat = defaultdict(list)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
278 for Lv2 in child_nf_map[(pkey[0], pkey[1], TID)]:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
279 E_TYP = Lv2.get('type', '')
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
280 child_feat[E_TYP].append(Lv2.get('location'))
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
281
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
282 # make general ascending order of coordinates
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
283 if orient == '-':
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
284 for etype, excod in child_feat.items():
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
285 if len(excod) > 1:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
286 if excod[0][0] > excod[-1][0]:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
287 excod.reverse()
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
288 child_feat[etype] = excod
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
289
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
290 # make exon coordinate from cds and utr regions
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
291 if not child_feat.get('exon'):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
292 if child_feat.get('CDS'):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
293 exon_cod = utils.make_Exon_cod( orient,
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
294 NonetoemptyList(child_feat.get('five_prime_UTR')),
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
295 NonetoemptyList(child_feat.get('CDS')),
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
296 NonetoemptyList(child_feat.get('three_prime_UTR')))
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
297 child_feat['exon'] = exon_cod
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
298 else:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
299 # TODO only UTR's
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
300 # searching through keys to find a pattern describing exon feature
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
301 ex_key_pattern = [k for k in child_feat if k.endswith("exon")]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
302 if ex_key_pattern:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
303 child_feat['exon'] = child_feat[ex_key_pattern[0]]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
304
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
305 # stop_codon are seperated from CDS, add the coordinates based on strand
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
306 if child_feat.get('stop_codon'):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
307 if orient == '+':
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
308 if child_feat.get('stop_codon')[0][0] - child_feat.get('CDS')[-1][1] == 1:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
309 child_feat['CDS'][-1] = [child_feat.get('CDS')[-1][0], child_feat.get('stop_codon')[0][1]]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
310 else:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
311 child_feat['CDS'].append(child_feat.get('stop_codon')[0])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
312 elif orient == '-':
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
313 if child_feat.get('CDS')[0][0] - child_feat.get('stop_codon')[0][1] == 1:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
314 child_feat['CDS'][0] = [child_feat.get('stop_codon')[0][0], child_feat.get('CDS')[0][1]]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
315 else:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
316 child_feat['CDS'].insert(0, child_feat.get('stop_codon')[0])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
317
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
318 # transcript signal sites
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
319 TIS, cdsStop, TSS, cleave = [], [], [], []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
320 cds_status, exon_status, utr_status = 0, 0, 0
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
321
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
322 if child_feat.get('exon'):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
323 TSS = [child_feat.get('exon')[-1][1]]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
324 TSS = [child_feat.get('exon')[0][0]] if orient == '+' else TSS
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
325 cleave = [child_feat.get('exon')[0][0]]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
326 cleave = [child_feat.get('exon')[-1][1]] if orient == '+' else cleave
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
327 exon_status = 1
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
328
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
329 if child_feat.get('CDS'):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
330 if orient == '+':
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
331 TIS = [child_feat.get('CDS')[0][0]]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
332 cdsStop = [child_feat.get('CDS')[-1][1]-3]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
333 else:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
334 TIS = [child_feat.get('CDS')[-1][1]]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
335 cdsStop = [child_feat.get('CDS')[0][0]+3]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
336 cds_status = 1
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
337 # cds phase calculation
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
338 child_feat['CDS'] = utils.add_CDS_phase(orient, child_feat.get('CDS'))
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
339
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
340 # sub-feature status
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
341 if child_feat.get('three_prime_UTR') or child_feat.get('five_prime_UTR'):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
342 utr_status =1
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
343
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
344 if utr_status == cds_status == exon_status == 1:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
345 t_status = 1
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
346 else:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
347 t_status = 0
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
348
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
349 # add sub-feature # make array for export to different out
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
350 TSTAT[xq] = t_status
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
351 EXON[xq] = np.array(child_feat.get('exon'), np.float64)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
352 UTR5[xq] = np.array(NonetoemptyList(child_feat.get('five_prime_UTR')))
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
353 UTR3[xq] = np.array(NonetoemptyList(child_feat.get('three_prime_UTR')))
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
354 CDS[xq] = np.array(NonetoemptyList(child_feat.get('CDS')))
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
355 TISc[xq] = np.array(TIS)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
356 CSTOP[xq] = np.array(cdsStop)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
357 TSSc[xq] = np.array(TSS)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
358 CLV[xq] = np.array(cleave)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
359
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
360 # add sub-features to the parent gene feature
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
361 gene[g_cnt]['transcript_status'] = TSTAT
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
362 gene[g_cnt]['transcripts'] = TRS
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
363 gene[g_cnt]['exons'] = EXON
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
364 gene[g_cnt]['utr5_exons'] = UTR5
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
365 gene[g_cnt]['cds_exons'] = CDS
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
366 gene[g_cnt]['utr3_exons'] = UTR3
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
367 gene[g_cnt]['transcript_type'] = TR_TYP
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
368 gene[g_cnt]['tis'] = TISc
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
369 gene[g_cnt]['cdsStop'] = CSTOP
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
370 gene[g_cnt]['tss'] = TSSc
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
371 gene[g_cnt]['cleave'] = CLV
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
372
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
373 gene[g_cnt]['gene_info'] = dict( ID = pkey[-1],
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
374 Name = pdet.get('name'),
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
375 Source = pkey[1])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
376 # few empty fields // TODO fill this:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
377 gene[g_cnt]['anno_id'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
378 gene[g_cnt]['confgenes_id'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
379 gene[g_cnt]['alias'] = ''
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
380 gene[g_cnt]['name2'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
381 gene[g_cnt]['chr_num'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
382 gene[g_cnt]['paralogs'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
383 gene[g_cnt]['transcript_info'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
384 gene[g_cnt]['transcript_valid'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
385 gene[g_cnt]['exons_confirmed'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
386 gene[g_cnt]['tis_conf'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
387 gene[g_cnt]['tis_info'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
388 gene[g_cnt]['cdsStop_conf'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
389 gene[g_cnt]['cdsStop_info'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
390 gene[g_cnt]['tss_info'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
391 gene[g_cnt]['tss_conf'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
392 gene[g_cnt]['cleave_info'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
393 gene[g_cnt]['cleave_conf'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
394 gene[g_cnt]['polya_info'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
395 gene[g_cnt]['polya_conf'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
396 gene[g_cnt]['is_valid'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
397 gene[g_cnt]['transcript_complete'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
398 gene[g_cnt]['is_complete'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
399 gene[g_cnt]['is_correctly_gff3_referenced'] = ''
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
400 gene[g_cnt]['splicegraph'] = []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
401 g_cnt += 1
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
402
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
403 ## deleting empty gene records from the main array
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
404 XPFLG=0
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
405 for XP, ens in enumerate(gene):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
406 if ens[0]==0:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
407 XPFLG=1
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
408 break
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
409
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
410 if XPFLG==1:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
411 XQC = range(XP, len(gene)+1)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
412 gene = np.delete(gene, XQC)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
413
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
414 return gene
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
415
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
416 def NonetoemptyList(XS):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
417 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
418 Convert a None type to empty list
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
419
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
420 @args XS: None type
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
421 @type XS: str
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
422 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
423 return [] if XS is None else XS
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
424
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
425 def create_missing_feature_type(p_feat, c_feat):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
426 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
427 GFF/GTF file defines only child features. This function tries to create
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
428 the parent feature from the information provided in the attribute column.
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
429
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
430 example:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
431 chr21 hg19_knownGene exon 9690071 9690100 0.000000 + . gene_id "uc002zkg.1"; transcript_id "uc002zkg.1";
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
432 chr21 hg19_knownGene exon 9692178 9692207 0.000000 + . gene_id "uc021wgt.1"; transcript_id "uc021wgt.1";
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
433 chr21 hg19_knownGene exon 9711935 9712038 0.000000 + . gene_id "uc011abu.2"; transcript_id "uc011abu.2";
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
434
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
435 This function gets the parsed feature annotations.
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
436
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
437 @args p_feat: Parent feature map
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
438 @type p_feat: collections defaultdict
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
439 @args c_feat: Child feature map
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
440 @type c_feat: collections defaultdict
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
441 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
442
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
443 child_n_map = defaultdict(list)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
444 for fid, det in c_feat.items():
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
445 # get the details from grand child
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
446 GID = STRD = SCR = None
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
447 SPOS, EPOS = [], []
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
448 TYP = dict()
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
449 for gchild in det:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
450 GID = gchild.get('gene_id', [''])[0]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
451 SPOS.append(gchild.get('location', [])[0])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
452 EPOS.append(gchild.get('location', [])[1])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
453 STRD = gchild.get('strand', '')
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
454 SCR = gchild.get('score', '')
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
455 TYP[gchild.get('type', '')] = 1
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
456 SPOS.sort()
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
457 EPOS.sort()
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
458
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
459 # infer transcript type
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
460 transcript_type = 'transcript'
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
461 transcript_type = 'mRNA' if TYP.get('CDS', '') or TYP.get('cds', '') else transcript_type
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
462
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
463 # gene id and transcript id are same
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
464 transcript_id = fid[-1]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
465 if GID == transcript_id:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
466 transcript_id = 'Transcript:' + str(GID)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
467
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
468 # level -1 feature type
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
469 p_feat[(fid[0], fid[1], GID)] = dict( type = 'gene',
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
470 location = [], ## infer location based on multiple transcripts
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
471 strand = STRD,
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
472 name = GID )
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
473 # level -2 feature type
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
474 child_n_map[(fid[0], fid[1], GID)].append(
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
475 dict( type = transcript_type,
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
476 location = [SPOS[0], EPOS[-1]],
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
477 strand = STRD,
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
478 score = SCR,
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
479 ID = transcript_id,
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
480 gene_id = '' ))
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
481 # reorganizing the grand child
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
482 for gchild in det:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
483 child_n_map[(fid[0], fid[1], transcript_id)].append(
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
484 dict( type = gchild.get('type', ''),
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
485 location = gchild.get('location'),
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
486 strand = gchild.get('strand'),
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
487 ID = gchild.get('ID'),
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
488 score = gchild.get('score'),
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
489 gene_id = '' ))
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
490 return p_feat, child_n_map
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
491