annotate Marea/marea_cluster.py @ 14:1a0c8c2780f2 draft

Uploaded
author bimib
date Wed, 13 Feb 2019 05:48:56 -0500
parents e96f3b85e5a0
children c71ac0bb12de
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
1 from __future__ import division
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
2 import os
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
3 import sys
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
4 import pandas as pd
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
5 import collections
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
6 import pickle as pk
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
7 import argparse
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
8 from sklearn.cluster import KMeans
13
e96f3b85e5a0 Uploaded
bimib
parents: 12
diff changeset
9 import matplotlib
14
1a0c8c2780f2 Uploaded
bimib
parents: 13
diff changeset
10 # Force matplotlib to not use any Xwindows backend.
1a0c8c2780f2 Uploaded
bimib
parents: 13
diff changeset
11 matplotlib.use('Agg')
0
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
12 import matplotlib.pyplot as plt
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
13
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
14 ########################## argparse ###########################################
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
15
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
16 def process_args(args):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
17 parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
18 description = 'process some value\'s' +
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
19 ' genes to create class.')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
20 parser.add_argument('-rs', '--rules_selector',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
21 type = str,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
22 default = 'HMRcore',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
23 choices = ['HMRcore', 'Recon', 'Custom'],
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
24 help = 'chose which type of dataset you want use')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
25 parser.add_argument('-cr', '--custom',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
26 type = str,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
27 help='your dataset if you want custom rules')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
28 parser.add_argument('-ch', '--cond_hier',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
29 type = str,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
30 default = 'no',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
31 choices = ['no', 'yes'],
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
32 help = 'chose if you wanna hierical dendrogram')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
33 parser.add_argument('-lk', '--k_min',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
34 type = int,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
35 help = 'min number of cluster')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
36 parser.add_argument('-uk', '--k_max',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
37 type = int,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
38 help = 'max number of cluster')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
39 parser.add_argument('-li', '--linkage',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
40 type = str,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
41 choices = ['single', 'complete', 'average'],
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
42 help='linkage hierarchical cluster')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
43 parser.add_argument('-d', '--data',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
44 type = str,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
45 required = True,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
46 help = 'input dataset')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
47 parser.add_argument('-n', '--none',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
48 type = str,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
49 default = 'true',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
50 choices = ['true', 'false'],
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
51 help = 'compute Nan values')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
52 parser.add_argument('-td', '--tool_dir',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
53 type = str,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
54 required = True,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
55 help = 'your tool directory')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
56 parser.add_argument('-na', '--name',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
57 type = str,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
58 help = 'name of dataset')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
59 parser.add_argument('-de', '--dendro',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
60 help = "Dendrogram out")
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
61 parser.add_argument('-ol', '--out_log',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
62 help = "Output log")
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
63 parser.add_argument('-el', '--elbow',
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
64 help = "Out elbow")
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
65 args = parser.parse_args()
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
66 return args
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
67
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
68 ########################### warning ###########################################
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
69
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
70 def warning(s):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
71 args = process_args(sys.argv)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
72 with open(args.out_log, 'a') as log:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
73 log.write(s)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
74
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
75 ############################ dataset input ####################################
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
76
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
77 def read_dataset(data, name):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
78 try:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
79 dataset = pd.read_csv(data, sep = '\t', header = 0)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
80 except pd.errors.EmptyDataError:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
81 sys.exit('Execution aborted: wrong format of '+name+'\n')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
82 if len(dataset.columns) < 2:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
83 sys.exit('Execution aborted: wrong format of '+name+'\n')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
84 return dataset
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
85
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
86 ############################ dataset name #####################################
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
87
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
88 def name_dataset(name_data, count):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
89 if str(name_data) == 'Dataset':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
90 return str(name_data) + '_' + str(count)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
91 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
92 return str(name_data)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
93
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
94 ############################ load id e rules ##################################
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
95
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
96 def load_id_rules(reactions):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
97 ids, rules = [], []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
98 for key, value in reactions.items():
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
99 ids.append(key)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
100 rules.append(value)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
101 return (ids, rules)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
102
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
103 ############################ check_methods ####################################
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
104
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
105 def gene_type(l, name):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
106 if check_hgnc(l):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
107 return 'hugo_id'
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
108 elif check_ensembl(l):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
109 return 'ensembl_gene_id'
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
110 elif check_symbol(l):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
111 return 'symbol'
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
112 elif check_entrez(l):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
113 return 'entrez_id'
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
114 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
115 sys.exit('Execution aborted:\n' +
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
116 'gene ID type in ' + name + ' not supported. Supported ID' +
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
117 'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
118
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
119 def check_hgnc(l):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
120 if len(l) > 5:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
121 if (l.upper()).startswith('HGNC:'):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
122 return l[5:].isdigit()
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
123 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
124 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
125 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
126 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
127
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
128 def check_ensembl(l):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
129 if len(l) == 15:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
130 if (l.upper()).startswith('ENS'):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
131 return l[4:].isdigit()
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
132 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
133 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
134 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
135 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
136
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
137 def check_symbol(l):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
138 if len(l) > 0:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
139 if l[0].isalpha() and l[1:].isalnum():
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
140 return True
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
141 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
142 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
143 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
144 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
145
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
146 def check_entrez(l):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
147 if len(l) > 0:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
148 return l.isdigit()
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
149 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
150 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
151
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
152 def check_bool(b):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
153 if b == 'true':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
154 return True
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
155 elif b == 'false':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
156 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
157
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
158 ############################ make recon #######################################
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
159
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
160 def check_and_doWord(l):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
161 tmp = []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
162 tmp_genes = []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
163 count = 0
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
164 while l:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
165 if count >= 0:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
166 if l[0] == '(':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
167 count += 1
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
168 tmp.append(l[0])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
169 l.pop(0)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
170 elif l[0] == ')':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
171 count -= 1
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
172 tmp.append(l[0])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
173 l.pop(0)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
174 elif l[0] == ' ':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
175 l.pop(0)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
176 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
177 word = []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
178 while l:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
179 if l[0] in [' ', '(', ')']:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
180 break
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
181 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
182 word.append(l[0])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
183 l.pop(0)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
184 word = ''.join(word)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
185 tmp.append(word)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
186 if not(word in ['or', 'and']):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
187 tmp_genes.append(word)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
188 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
189 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
190 if count == 0:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
191 return (tmp, tmp_genes)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
192 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
193 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
194
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
195 def brackets_to_list(l):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
196 tmp = []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
197 while l:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
198 if l[0] == '(':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
199 l.pop(0)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
200 tmp.append(resolve_brackets(l))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
201 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
202 tmp.append(l[0])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
203 l.pop(0)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
204 return tmp
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
205
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
206 def resolve_brackets(l):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
207 tmp = []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
208 while l[0] != ')':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
209 if l[0] == '(':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
210 l.pop(0)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
211 tmp.append(resolve_brackets(l))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
212 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
213 tmp.append(l[0])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
214 l.pop(0)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
215 l.pop(0)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
216 return tmp
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
217
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
218 def priorityAND(l):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
219 tmp = []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
220 flag = True
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
221 while l:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
222 if len(l) == 1:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
223 if isinstance(l[0], list):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
224 tmp.append(priorityAND(l[0]))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
225 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
226 tmp.append(l[0])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
227 l = l[1:]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
228 elif l[0] == 'or':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
229 tmp.append(l[0])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
230 flag = False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
231 l = l[1:]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
232 elif l[1] == 'or':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
233 if isinstance(l[0], list):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
234 tmp.append(priorityAND(l[0]))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
235 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
236 tmp.append(l[0])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
237 tmp.append(l[1])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
238 flag = False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
239 l = l[2:]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
240 elif l[1] == 'and':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
241 tmpAnd = []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
242 if isinstance(l[0], list):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
243 tmpAnd.append(priorityAND(l[0]))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
244 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
245 tmpAnd.append(l[0])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
246 tmpAnd.append(l[1])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
247 if isinstance(l[2], list):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
248 tmpAnd.append(priorityAND(l[2]))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
249 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
250 tmpAnd.append(l[2])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
251 l = l[3:]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
252 while l:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
253 if l[0] == 'and':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
254 tmpAnd.append(l[0])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
255 if isinstance(l[1], list):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
256 tmpAnd.append(priorityAND(l[1]))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
257 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
258 tmpAnd.append(l[1])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
259 l = l[2:]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
260 elif l[0] == 'or':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
261 flag = False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
262 break
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
263 if flag == True: #se ci sono solo AND nella lista
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
264 tmp.extend(tmpAnd)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
265 elif flag == False:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
266 tmp.append(tmpAnd)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
267 return tmp
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
268
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
269 def checkRule(l):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
270 if len(l) == 1:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
271 if isinstance(l[0], list):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
272 if checkRule(l[0]) is False:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
273 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
274 elif len(l) > 2:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
275 if checkRule2(l) is False:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
276 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
277 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
278 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
279 return True
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
280
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
281 def checkRule2(l):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
282 while l:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
283 if len(l) == 1:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
284 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
285 elif isinstance(l[0], list) and l[1] in ['and', 'or']:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
286 if checkRule(l[0]) is False:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
287 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
288 if isinstance(l[2], list):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
289 if checkRule(l[2]) is False:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
290 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
291 l = l[3:]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
292 elif l[1] in ['and', 'or']:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
293 if isinstance(l[2], list):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
294 if checkRule(l[2]) is False:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
295 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
296 l = l[3:]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
297 elif l[0] in ['and', 'or']:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
298 if isinstance(l[1], list):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
299 if checkRule(l[1]) is False:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
300 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
301 l = l[2:]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
302 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
303 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
304 return True
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
305
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
306 def do_rules(rules):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
307 split_rules = []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
308 err_rules = []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
309 tmp_gene_in_rule = []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
310 for i in range(len(rules)):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
311 tmp = list(rules[i])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
312 if tmp:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
313 tmp, tmp_genes = check_and_doWord(tmp)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
314 tmp_gene_in_rule.extend(tmp_genes)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
315 if tmp is False:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
316 split_rules.append([])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
317 err_rules.append(rules[i])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
318 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
319 tmp = brackets_to_list(tmp)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
320 if checkRule(tmp):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
321 split_rules.append(priorityAND(tmp))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
322 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
323 split_rules.append([])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
324 err_rules.append(rules[i])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
325 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
326 split_rules.append([])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
327 if err_rules:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
328 warning('Warning: wrong format rule in ' + str(err_rules) + '\n')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
329 return (split_rules, list(set(tmp_gene_in_rule)))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
330
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
331 def make_recon(data):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
332 try:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
333 import cobra as cb
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
334 import warnings
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
335 with warnings.catch_warnings():
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
336 warnings.simplefilter('ignore')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
337 recon = cb.io.read_sbml_model(data)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
338 react = recon.reactions
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
339 rules = [react[i].gene_reaction_rule for i in range(len(react))]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
340 ids = [react[i].id for i in range(len(react))]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
341 except cb.io.sbml3.CobraSBMLError:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
342 try:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
343 data = (pd.read_csv(data, sep = '\t', dtype = str)).fillna('')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
344 if len(data.columns) < 2:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
345 sys.exit('Execution aborted: wrong format of ' +
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
346 'custom GPR rules\n')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
347 if not len(data.columns) == 2:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
348 warning('WARNING: more than 2 columns in custom GPR rules.\n' +
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
349 'Extra columns have been disregarded\n')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
350 ids = list(data.iloc[:, 0])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
351 rules = list(data.iloc[:, 1])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
352 except pd.errors.EmptyDataError:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
353 sys.exit('Execution aborted: wrong format of custom GPR rules\n')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
354 except pd.errors.ParserError:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
355 sys.exit('Execution aborted: wrong format of custom GPR rules\n')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
356 split_rules, tmp_genes = do_rules(rules)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
357 gene_in_rule = {}
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
358 for i in tmp_genes:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
359 gene_in_rule[i] = 'ok'
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
360 return (ids, split_rules, gene_in_rule)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
361
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
362 ############################ resolve_methods ##################################
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
363
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
364 def replace_gene_value(l, d):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
365 tmp = []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
366 err = []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
367 while l:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
368 if isinstance(l[0], list):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
369 tmp_rules, tmp_err = replace_gene_value(l[0], d)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
370 tmp.append(tmp_rules)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
371 err.extend(tmp_err)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
372 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
373 value = replace_gene(l[0],d)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
374 tmp.append(value)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
375 if value == None:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
376 err.append(l[0])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
377 l = l[1:]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
378 return (tmp, err)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
379
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
380 def replace_gene(l, d):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
381 if l =='and' or l == 'or':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
382 return l
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
383 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
384 value = d.get(l, None)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
385 if not(value == None or isinstance(value, (int, float))):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
386 sys.exit('Execution aborted: ' + value + ' value not valid\n')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
387 return value
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
388
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
389 def compute(val1, op, val2, cn):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
390 if val1 != None and val2 != None:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
391 if op == 'and':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
392 return min(val1, val2)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
393 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
394 return val1 + val2
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
395 elif op == 'and':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
396 if cn is True:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
397 if val1 != None:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
398 return val1
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
399 elif val2 != None:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
400 return val2
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
401 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
402 return None
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
403 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
404 return None
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
405 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
406 if val1 != None:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
407 return val1
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
408 elif val2 != None:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
409 return val2
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
410 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
411 return None
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
412
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
413 def control(ris, l, cn):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
414 if len(l) == 1:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
415 if isinstance(l[0], (float, int)) or l[0] == None:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
416 return l[0]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
417 elif isinstance(l[0], list):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
418 return control(None, l[0], cn)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
419 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
420 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
421 elif len(l) > 2:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
422 return control_list(ris, l, cn)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
423 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
424 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
425
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
426 def control_list(ris, l, cn):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
427 while l:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
428 if len(l) == 1:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
429 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
430 elif (isinstance(l[0], (float, int)) or
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
431 l[0] == None) and l[1] in ['and', 'or']:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
432 if isinstance(l[2], (float, int)) or l[2] == None:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
433 ris = compute(l[0], l[1], l[2], cn)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
434 elif isinstance(l[2], list):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
435 tmp = control(None, l[2], cn)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
436 if tmp is False:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
437 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
438 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
439 ris = compute(l[0], l[1], tmp, cn)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
440 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
441 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
442 l = l[3:]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
443 elif l[0] in ['and', 'or']:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
444 if isinstance(l[1], (float, int)) or l[1] == None:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
445 ris = compute(ris, l[0], l[1], cn)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
446 elif isinstance(l[1], list):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
447 tmp = control(None,l[1], cn)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
448 if tmp is False:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
449 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
450 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
451 ris = compute(ris, l[0], tmp, cn)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
452 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
453 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
454 l = l[2:]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
455 elif isinstance(l[0], list) and l[1] in ['and', 'or']:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
456 if isinstance(l[2], (float, int)) or l[2] == None:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
457 tmp = control(None, l[0], cn)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
458 if tmp is False:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
459 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
460 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
461 ris = compute(tmp, l[1], l[2], cn)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
462 elif isinstance(l[2], list):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
463 tmp = control(None, l[0], cn)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
464 tmp2 = control(None, l[2], cn)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
465 if tmp is False or tmp2 is False:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
466 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
467 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
468 ris = compute(tmp, l[1], tmp2, cn)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
469 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
470 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
471 l = l[3:]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
472 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
473 return False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
474 return ris
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
475
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
476 ############################ gene #############################################
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
477
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
478 def data_gene(gene, type_gene, name, gene_custom):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
479 args = process_args(sys.argv)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
480 for i in range(len(gene)):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
481 tmp = gene.iloc[i, 0]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
482 if tmp.startswith(' ') or tmp.endswith(' '):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
483 gene.iloc[i, 0] = (tmp.lstrip()).rstrip()
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
484 gene_dup = [item for item, count in
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
485 collections.Counter(gene[gene.columns[0]]).items() if count > 1]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
486 pat_dup = [item for item, count in
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
487 collections.Counter(list(gene.columns)).items() if count > 1]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
488 if gene_dup:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
489 if gene_custom == None:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
490 if args.rules_selector == 'HMRcore':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
491 gene_in_rule = pk.load(open(args.tool_dir +
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
492 '/local/HMRcore_genes.p', 'rb'))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
493 elif args.rules_selector == 'Recon':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
494 gene_in_rule = pk.load(open(args.tool_dir +
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
495 '/local/Recon_genes.p', 'rb'))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
496 gene_in_rule = gene_in_rule.get(type_gene)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
497 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
498 gene_in_rule = gene_custom
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
499 tmp = []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
500 for i in gene_dup:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
501 if gene_in_rule.get(i) == 'ok':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
502 tmp.append(i)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
503 if tmp:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
504 sys.exit('Execution aborted because gene ID '
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
505 + str(tmp) + ' in ' + name + ' is duplicated\n')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
506 if pat_dup:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
507 sys.exit('Execution aborted: duplicated label\n'
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
508 + str(pat_dup) + 'in ' + name + '\n')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
509 return (gene.set_index(gene.columns[0])).to_dict()
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
510
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
511 ############################ resolve ##########################################
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
512
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
513 def resolve(genes, rules, ids, resolve_none, name):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
514 resolve_rules = {}
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
515 not_found = []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
516 flag = False
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
517 for key, value in genes.items():
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
518 tmp_resolve = []
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
519 for i in range(len(rules)):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
520 tmp = rules[i]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
521 if tmp:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
522 tmp, err = replace_gene_value(tmp, value)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
523 if err:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
524 not_found.extend(err)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
525 ris = control(None, tmp, resolve_none)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
526 if ris is False or ris == None:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
527 tmp_resolve.append(None)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
528 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
529 tmp_resolve.append(ris)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
530 flag = True
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
531 else:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
532 tmp_resolve.append(None)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
533 resolve_rules[key] = tmp_resolve
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
534 if flag is False:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
535 sys.exit('Execution aborted: no computable score' +
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
536 ' (due to missing gene values) for class '
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
537 + name + ', the class has been disregarded\n')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
538 return (resolve_rules, list(set(not_found)))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
539
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
540 ################################# clustering ##################################
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
541
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
542 def f_cluster(resolve_rules):
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
543 os.makedirs('cluster_out')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
544 args = process_args(sys.argv)
13
e96f3b85e5a0 Uploaded
bimib
parents: 12
diff changeset
545 k_min = args.k_min
e96f3b85e5a0 Uploaded
bimib
parents: 12
diff changeset
546 k_max = args.k_max
e96f3b85e5a0 Uploaded
bimib
parents: 12
diff changeset
547 if k_min > k_max:
e96f3b85e5a0 Uploaded
bimib
parents: 12
diff changeset
548 warning('k range boundaries inverted.\n')
e96f3b85e5a0 Uploaded
bimib
parents: 12
diff changeset
549 tmp = k_min
e96f3b85e5a0 Uploaded
bimib
parents: 12
diff changeset
550 k_min = k_max
e96f3b85e5a0 Uploaded
bimib
parents: 12
diff changeset
551 k_max = tmp
e96f3b85e5a0 Uploaded
bimib
parents: 12
diff changeset
552 else:
e96f3b85e5a0 Uploaded
bimib
parents: 12
diff changeset
553 warning('k range correct.\n')
0
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
554 cluster_data = pd.DataFrame.from_dict(resolve_rules, orient = 'index')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
555 for i in cluster_data.columns:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
556 tmp = cluster_data[i][0]
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
557 if tmp == None:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
558 cluster_data = cluster_data.drop(columns=[i])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
559 distorsion = []
13
e96f3b85e5a0 Uploaded
bimib
parents: 12
diff changeset
560 for i in range(k_min, k_max+1):
0
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
561 tmp_kmeans = KMeans(n_clusters = i,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
562 n_init = 100,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
563 max_iter = 300,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
564 random_state = 0).fit(cluster_data)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
565 distorsion.append(tmp_kmeans.inertia_)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
566 predict = tmp_kmeans.predict(cluster_data)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
567 predict = [x+1 for x in predict]
7
771a40335876 enchanted .py
bimib
parents: 6
diff changeset
568 classe = (pd.DataFrame(list(zip(cluster_data.index, predict)))).astype(str)
0
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
569 dest = 'cluster_out/K=' + str(i) + '_' + args.name+'.tsv'
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
570 classe.to_csv(dest, sep = '\t', index = False,
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
571 header = ['Patient_ID', 'Class'])
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
572 plt.figure(0)
13
e96f3b85e5a0 Uploaded
bimib
parents: 12
diff changeset
573 plt.plot(range(k_min, k_max+1), distorsion, marker = 'o')
0
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
574 plt.xlabel('Number of cluster')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
575 plt.ylabel('Distorsion')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
576 plt.savefig(args.elbow, dpi = 240, format = 'pdf')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
577 if args.cond_hier == 'yes':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
578 import scipy.cluster.hierarchy as hier
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
579 lin = hier.linkage(cluster_data, args.linkage)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
580 plt.figure(1)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
581 plt.figure(figsize=(10, 5))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
582 hier.dendrogram(lin, leaf_font_size = 2, labels = cluster_data.index)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
583 plt.savefig(args.dendro, dpi = 480, format = 'pdf')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
584 return None
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
585
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
586 ################################# main ########################################
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
587
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
588 def main():
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
589 args = process_args(sys.argv)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
590 if args.rules_selector == 'HMRcore':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
591 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb'))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
592 elif args.rules_selector == 'Recon':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
593 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb'))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
594 elif args.rules_selector == 'Custom':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
595 ids, rules, gene_in_rule = make_recon(args.custom)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
596 resolve_none = check_bool(args.none)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
597 dataset = read_dataset(args.data, args.name)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
598 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
599 type_gene = gene_type(dataset.iloc[0, 0], args.name)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
600 if args.rules_selector != 'Custom':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
601 genes = data_gene(dataset, type_gene, args.name, None)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
602 ids, rules = load_id_rules(recon.get(type_gene))
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
603 elif args.rules_selector == 'Custom':
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
604 genes = data_gene(dataset, type_gene, args.name, gene_in_rule)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
605 resolve_rules, err = resolve(genes, rules, ids, resolve_none, args.name)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
606 if err:
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
607 warning('WARNING: gene\n' + str(err) + '\nnot found in class '
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
608 + args.name + ', the expression level for this gene ' +
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
609 'will be considered NaN\n')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
610 f_cluster(resolve_rules)
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
611 warning('Execution succeeded')
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
612 return None
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
613
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
614 ###############################################################################
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
615
23ac9cf12788 Uploaded
bimib
parents:
diff changeset
616 if __name__ == "__main__":
6
5721182715a7 xml requirement
bimib
parents: 0
diff changeset
617 main()