Mercurial > repos > vipints > deseq_hts
comparison deseq-hts_2.0/tools/helper.py @ 10:2fe512c7bfdf draft
DESeq2 version 1.0.19 added to the repo
author | vipints <vipin@cbio.mskcc.org> |
---|---|
date | Tue, 08 Oct 2013 08:15:34 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:e27b4f7811c2 | 10:2fe512c7bfdf |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Common utility functions | |
4 """ | |
5 | |
6 import os | |
7 import re | |
8 import sys | |
9 import gzip | |
10 import bz2 | |
11 import numpy | |
12 | |
13 def init_gene_DE(): | |
14 """ | |
15 Initializing the gene structure for DE | |
16 """ | |
17 gene_det = [('id', 'f8'), | |
18 ('chr', 'S15'), | |
19 ('exons', numpy.dtype), | |
20 ('gene_info', numpy.dtype), | |
21 ('is_alt_spliced', 'f8'), | |
22 ('name', 'S25'), | |
23 ('source', 'S25'), | |
24 ('start', 'f8'), | |
25 ('stop', 'f8'), | |
26 ('strand', 'S2'), | |
27 ('transcripts', numpy.dtype)] | |
28 | |
29 return gene_det | |
30 | |
31 def _open_file(fname): | |
32 """ | |
33 Open the file (supports .gz .bz2) and returns the handler | |
34 """ | |
35 try: | |
36 if os.path.splitext(fname)[1] == ".gz": | |
37 FH = gzip.open(fname, 'rb') | |
38 elif os.path.splitext(fname)[1] == ".bz2": | |
39 FH = bz2.BZ2File(fname, 'rb') | |
40 else: | |
41 FH = open(fname, 'rU') | |
42 except Exception as error: | |
43 sys.exit(error) | |
44 return FH | |
45 | |
46 def make_Exon_cod(strand_p, five_p_utr, cds_cod, three_p_utr): | |
47 """ | |
48 Create exon cordinates from UTR's and CDS region | |
49 """ | |
50 exon_pos = [] | |
51 if strand_p == '+': | |
52 utr5_start, utr5_end = 0, 0 | |
53 if five_p_utr != []: | |
54 utr5_start, utr5_end = five_p_utr[-1][0], five_p_utr[-1][1] | |
55 cds_5start, cds_5end = cds_cod[0][0], cds_cod[0][1] | |
56 jun_exon = [] | |
57 if cds_5start-utr5_end == 0 or cds_5start-utr5_end == 1: | |
58 jun_exon = [utr5_start, cds_5end] | |
59 if len(cds_cod) == 1: | |
60 five_prime_flag = 0 | |
61 if jun_exon != []: | |
62 five_p_utr = five_p_utr[:-1] | |
63 five_prime_flag = 1 | |
64 for utr5 in five_p_utr: | |
65 exon_pos.append(utr5) | |
66 jun_exon = [] | |
67 utr3_start, utr3_end = 0, 0 | |
68 if three_p_utr != []: | |
69 utr3_start = three_p_utr[0][0] | |
70 utr3_end = three_p_utr[0][1] | |
71 if utr3_start-cds_5end == 0 or utr3_start-cds_5end == 1: | |
72 jun_exon = [cds_5start, utr3_end] | |
73 three_prime_flag = 0 | |
74 if jun_exon != []: | |
75 cds_cod = cds_cod[:-1] | |
76 three_p_utr = three_p_utr[1:] | |
77 three_prime_flag = 1 | |
78 if five_prime_flag == 1 and three_prime_flag == 1: | |
79 exon_pos.append([utr5_start, utr3_end]) | |
80 if five_prime_flag == 1 and three_prime_flag == 0: | |
81 exon_pos.append([utr5_start, cds_5end]) | |
82 cds_cod = cds_cod[:-1] | |
83 if five_prime_flag == 0 and three_prime_flag == 1: | |
84 exon_pos.append([cds_5start, utr3_end]) | |
85 for cds in cds_cod: | |
86 exon_pos.append(cds) | |
87 for utr3 in three_p_utr: | |
88 exon_pos.append(utr3) | |
89 else: | |
90 if jun_exon != []: | |
91 five_p_utr = five_p_utr[:-1] | |
92 cds_cod = cds_cod[1:] | |
93 for utr5 in five_p_utr: | |
94 exon_pos.append(utr5) | |
95 exon_pos.append(jun_exon) if jun_exon != [] else '' | |
96 jun_exon = [] | |
97 utr3_start, utr3_end = 0, 0 | |
98 if three_p_utr != []: | |
99 utr3_start = three_p_utr[0][0] | |
100 utr3_end = three_p_utr[0][1] | |
101 cds_3start = cds_cod[-1][0] | |
102 cds_3end = cds_cod[-1][1] | |
103 if utr3_start-cds_3end == 0 or utr3_start-cds_3end == 1: | |
104 jun_exon = [cds_3start, utr3_end] | |
105 if jun_exon != []: | |
106 cds_cod = cds_cod[:-1] | |
107 three_p_utr = three_p_utr[1:] | |
108 for cds in cds_cod: | |
109 exon_pos.append(cds) | |
110 exon_pos.append(jun_exon) if jun_exon != [] else '' | |
111 for utr3 in three_p_utr: | |
112 exon_pos.append(utr3) | |
113 elif strand_p == '-': | |
114 utr3_start, utr3_end = 0, 0 | |
115 if three_p_utr != []: | |
116 utr3_start = three_p_utr[-1][0] | |
117 utr3_end = three_p_utr[-1][1] | |
118 cds_3start = cds_cod[0][0] | |
119 cds_3end = cds_cod[0][1] | |
120 jun_exon = [] | |
121 if cds_3start-utr3_end == 0 or cds_3start-utr3_end == 1: | |
122 jun_exon = [utr3_start, cds_3end] | |
123 if len(cds_cod) == 1: | |
124 three_prime_flag = 0 | |
125 if jun_exon != []: | |
126 three_p_utr = three_p_utr[:-1] | |
127 three_prime_flag = 1 | |
128 for utr3 in three_p_utr: | |
129 exon_pos.append(utr3) | |
130 jun_exon = [] | |
131 (utr5_start, utr5_end) = (0, 0) | |
132 if five_p_utr != []: | |
133 utr5_start = five_p_utr[0][0] | |
134 utr5_end = five_p_utr[0][1] | |
135 if utr5_start-cds_3end == 0 or utr5_start-cds_3end == 1: | |
136 jun_exon = [cds_3start, utr5_end] | |
137 five_prime_flag = 0 | |
138 if jun_exon != []: | |
139 cds_cod = cds_cod[:-1] | |
140 five_p_utr = five_p_utr[1:] | |
141 five_prime_flag = 1 | |
142 if three_prime_flag == 1 and five_prime_flag == 1: | |
143 exon_pos.append([utr3_start, utr5_end]) | |
144 if three_prime_flag == 1 and five_prime_flag == 0: | |
145 exon_pos.append([utr3_start, cds_3end]) | |
146 cds_cod = cds_cod[:-1] | |
147 if three_prime_flag == 0 and five_prime_flag == 1: | |
148 exon_pos.append([cds_3start, utr5_end]) | |
149 for cds in cds_cod: | |
150 exon_pos.append(cds) | |
151 for utr5 in five_p_utr: | |
152 exon_pos.append(utr5) | |
153 else: | |
154 if jun_exon != []: | |
155 three_p_utr = three_p_utr[:-1] | |
156 cds_cod = cds_cod[1:] | |
157 for utr3 in three_p_utr: | |
158 exon_pos.append(utr3) | |
159 if jun_exon != []: | |
160 exon_pos.append(jun_exon) | |
161 jun_exon = [] | |
162 (utr5_start, utr5_end) = (0, 0) | |
163 if five_p_utr != []: | |
164 utr5_start = five_p_utr[0][0] | |
165 utr5_end = five_p_utr[0][1] | |
166 cds_5start = cds_cod[-1][0] | |
167 cds_5end = cds_cod[-1][1] | |
168 if utr5_start-cds_5end == 0 or utr5_start-cds_5end == 1: | |
169 jun_exon = [cds_5start, utr5_end] | |
170 if jun_exon != []: | |
171 cds_cod = cds_cod[:-1] | |
172 five_p_utr = five_p_utr[1:] | |
173 for cds in cds_cod: | |
174 exon_pos.append(cds) | |
175 if jun_exon != []: | |
176 exon_pos.append(jun_exon) | |
177 for utr5 in five_p_utr: | |
178 exon_pos.append(utr5) | |
179 return exon_pos |