Mercurial > repos > vipints > rdiff
annotate rDiff/tools/helper.py @ 2:233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
author | vipints <vipin@cbio.mskcc.org> |
---|---|
date | Tue, 08 Oct 2013 07:15:44 -0400 |
parents | |
children |
rev | line source |
---|---|
2
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
2 """ |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
3 Common utility functions |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
4 """ |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
5 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
6 import os |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
7 import re |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
8 import sys |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
9 import gzip |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
10 import bz2 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
11 import numpy |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
12 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
13 def init_gene_DE(): |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
14 """ |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
15 Initializing the gene structure for DE |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
16 """ |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
17 gene_det = [('id', 'f8'), |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
18 ('chr', 'S15'), |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
19 ('exons', numpy.dtype), |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
20 ('gene_info', numpy.dtype), |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
21 ('is_alt_spliced', 'f8'), |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
22 ('name', 'S25'), |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
23 ('source', 'S25'), |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
24 ('start', 'f8'), |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
25 ('stop', 'f8'), |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
26 ('strand', 'S2'), |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
27 ('transcripts', numpy.dtype)] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
28 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
29 return gene_det |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
30 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
31 def _open_file(fname): |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
32 """ |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
33 Open the file (supports .gz .bz2) and returns the handler |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
34 """ |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
35 try: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
36 if os.path.splitext(fname)[1] == ".gz": |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
37 FH = gzip.open(fname, 'rb') |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
38 elif os.path.splitext(fname)[1] == ".bz2": |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
39 FH = bz2.BZ2File(fname, 'rb') |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
40 else: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
41 FH = open(fname, 'rU') |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
42 except Exception as error: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
43 sys.exit(error) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
44 return FH |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
45 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
46 def make_Exon_cod(strand_p, five_p_utr, cds_cod, three_p_utr): |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
47 """ |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
48 Create exon cordinates from UTR's and CDS region |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
49 """ |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
50 exon_pos = [] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
51 if strand_p == '+': |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
52 utr5_start, utr5_end = 0, 0 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
53 if five_p_utr != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
54 utr5_start, utr5_end = five_p_utr[-1][0], five_p_utr[-1][1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
55 cds_5start, cds_5end = cds_cod[0][0], cds_cod[0][1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
56 jun_exon = [] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
57 if cds_5start-utr5_end == 0 or cds_5start-utr5_end == 1: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
58 jun_exon = [utr5_start, cds_5end] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
59 if len(cds_cod) == 1: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
60 five_prime_flag = 0 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
61 if jun_exon != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
62 five_p_utr = five_p_utr[:-1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
63 five_prime_flag = 1 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
64 for utr5 in five_p_utr: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
65 exon_pos.append(utr5) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
66 jun_exon = [] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
67 utr3_start, utr3_end = 0, 0 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
68 if three_p_utr != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
69 utr3_start = three_p_utr[0][0] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
70 utr3_end = three_p_utr[0][1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
71 if utr3_start-cds_5end == 0 or utr3_start-cds_5end == 1: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
72 jun_exon = [cds_5start, utr3_end] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
73 three_prime_flag = 0 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
74 if jun_exon != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
75 cds_cod = cds_cod[:-1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
76 three_p_utr = three_p_utr[1:] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
77 three_prime_flag = 1 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
78 if five_prime_flag == 1 and three_prime_flag == 1: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
79 exon_pos.append([utr5_start, utr3_end]) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
80 if five_prime_flag == 1 and three_prime_flag == 0: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
81 exon_pos.append([utr5_start, cds_5end]) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
82 cds_cod = cds_cod[:-1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
83 if five_prime_flag == 0 and three_prime_flag == 1: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
84 exon_pos.append([cds_5start, utr3_end]) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
85 for cds in cds_cod: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
86 exon_pos.append(cds) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
87 for utr3 in three_p_utr: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
88 exon_pos.append(utr3) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
89 else: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
90 if jun_exon != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
91 five_p_utr = five_p_utr[:-1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
92 cds_cod = cds_cod[1:] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
93 for utr5 in five_p_utr: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
94 exon_pos.append(utr5) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
95 exon_pos.append(jun_exon) if jun_exon != [] else '' |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
96 jun_exon = [] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
97 utr3_start, utr3_end = 0, 0 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
98 if three_p_utr != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
99 utr3_start = three_p_utr[0][0] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
100 utr3_end = three_p_utr[0][1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
101 cds_3start = cds_cod[-1][0] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
102 cds_3end = cds_cod[-1][1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
103 if utr3_start-cds_3end == 0 or utr3_start-cds_3end == 1: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
104 jun_exon = [cds_3start, utr3_end] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
105 if jun_exon != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
106 cds_cod = cds_cod[:-1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
107 three_p_utr = three_p_utr[1:] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
108 for cds in cds_cod: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
109 exon_pos.append(cds) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
110 exon_pos.append(jun_exon) if jun_exon != [] else '' |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
111 for utr3 in three_p_utr: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
112 exon_pos.append(utr3) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
113 elif strand_p == '-': |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
114 utr3_start, utr3_end = 0, 0 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
115 if three_p_utr != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
116 utr3_start = three_p_utr[-1][0] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
117 utr3_end = three_p_utr[-1][1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
118 cds_3start = cds_cod[0][0] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
119 cds_3end = cds_cod[0][1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
120 jun_exon = [] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
121 if cds_3start-utr3_end == 0 or cds_3start-utr3_end == 1: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
122 jun_exon = [utr3_start, cds_3end] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
123 if len(cds_cod) == 1: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
124 three_prime_flag = 0 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
125 if jun_exon != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
126 three_p_utr = three_p_utr[:-1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
127 three_prime_flag = 1 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
128 for utr3 in three_p_utr: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
129 exon_pos.append(utr3) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
130 jun_exon = [] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
131 (utr5_start, utr5_end) = (0, 0) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
132 if five_p_utr != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
133 utr5_start = five_p_utr[0][0] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
134 utr5_end = five_p_utr[0][1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
135 if utr5_start-cds_3end == 0 or utr5_start-cds_3end == 1: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
136 jun_exon = [cds_3start, utr5_end] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
137 five_prime_flag = 0 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
138 if jun_exon != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
139 cds_cod = cds_cod[:-1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
140 five_p_utr = five_p_utr[1:] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
141 five_prime_flag = 1 |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
142 if three_prime_flag == 1 and five_prime_flag == 1: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
143 exon_pos.append([utr3_start, utr5_end]) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
144 if three_prime_flag == 1 and five_prime_flag == 0: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
145 exon_pos.append([utr3_start, cds_3end]) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
146 cds_cod = cds_cod[:-1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
147 if three_prime_flag == 0 and five_prime_flag == 1: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
148 exon_pos.append([cds_3start, utr5_end]) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
149 for cds in cds_cod: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
150 exon_pos.append(cds) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
151 for utr5 in five_p_utr: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
152 exon_pos.append(utr5) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
153 else: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
154 if jun_exon != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
155 three_p_utr = three_p_utr[:-1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
156 cds_cod = cds_cod[1:] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
157 for utr3 in three_p_utr: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
158 exon_pos.append(utr3) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
159 if jun_exon != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
160 exon_pos.append(jun_exon) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
161 jun_exon = [] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
162 (utr5_start, utr5_end) = (0, 0) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
163 if five_p_utr != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
164 utr5_start = five_p_utr[0][0] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
165 utr5_end = five_p_utr[0][1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
166 cds_5start = cds_cod[-1][0] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
167 cds_5end = cds_cod[-1][1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
168 if utr5_start-cds_5end == 0 or utr5_start-cds_5end == 1: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
169 jun_exon = [cds_5start, utr5_end] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
170 if jun_exon != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
171 cds_cod = cds_cod[:-1] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
172 five_p_utr = five_p_utr[1:] |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
173 for cds in cds_cod: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
174 exon_pos.append(cds) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
175 if jun_exon != []: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
176 exon_pos.append(jun_exon) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
177 for utr5 in five_p_utr: |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
178 exon_pos.append(utr5) |
233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
179 return exon_pos |