Mercurial > repos > bimib > marea
comparison Marea/ras_generator.py @ 46:5d5d01ef1d68 draft
Uploaded
author | bimib |
---|---|
date | Wed, 22 Jan 2020 11:50:54 -0500 |
parents | |
children | 3af9d394367c |
comparison
equal
deleted
inserted
replaced
45:7aa966c488a4 | 46:5d5d01ef1d68 |
---|---|
1 from __future__ import division | |
2 import sys | |
3 import pandas as pd | |
4 import itertools as it | |
5 import scipy.stats as st | |
6 import collections | |
7 import lxml.etree as ET | |
8 import pickle as pk | |
9 import math | |
10 import os | |
11 import argparse | |
12 from svglib.svglib import svg2rlg | |
13 from reportlab.graphics import renderPDF | |
14 | |
15 ########################## argparse ########################################## | |
16 | |
17 def process_args(args): | |
18 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', | |
19 description = 'process some value\'s'+ | |
20 ' genes to create a comparison\'s map.') | |
21 parser.add_argument('-rs', '--rules_selector', | |
22 type = str, | |
23 default = 'HMRcore', | |
24 choices = ['HMRcore', 'Recon', 'Custom'], | |
25 help = 'chose which type of dataset you want use') | |
26 parser.add_argument('-cr', '--custom', | |
27 type = str, | |
28 help='your dataset if you want custom rules') | |
29 parser.add_argument('-na', '--names', | |
30 type = str, | |
31 nargs = '+', | |
32 help = 'input names') | |
33 parser.add_argument('-n', '--none', | |
34 type = str, | |
35 default = 'true', | |
36 choices = ['true', 'false'], | |
37 help = 'compute Nan values') | |
38 parser.add_argument('-pv' ,'--pValue', | |
39 type = float, | |
40 default = 0.05, | |
41 help = 'P-Value threshold (default: %(default)s)') | |
42 parser.add_argument('-fc', '--fChange', | |
43 type = float, | |
44 default = 1.5, | |
45 help = 'Fold-Change threshold (default: %(default)s)') | |
46 parser.add_argument('-td', '--tool_dir', | |
47 type = str, | |
48 required = True, | |
49 help = 'your tool directory') | |
50 parser.add_argument('-op', '--option', | |
51 type = str, | |
52 choices = ['datasets', 'dataset_class', 'datasets_rasonly'], | |
53 help='dataset or dataset and class') | |
54 parser.add_argument('-ol', '--out_log', | |
55 help = "Output log") | |
56 parser.add_argument('-ids', '--input_datas', | |
57 type = str, | |
58 nargs = '+', | |
59 help = 'input datasets') | |
60 parser.add_argument('-id', '--input_data', | |
61 type = str, | |
62 help = 'input dataset') | |
63 parser.add_argument('-ic', '--input_class', | |
64 type = str, | |
65 help = 'sample group specification') | |
66 parser.add_argument('-cm', '--custom_map', | |
67 type = str, | |
68 help = 'custom map') | |
69 parser.add_argument('-yn', '--yes_no', | |
70 type = str, | |
71 choices = ['yes', 'no'], | |
72 help = 'if make or not custom map') | |
73 parser.add_argument('-gs', '--generate_svg', | |
74 type = str, | |
75 default = 'true', | |
76 choices = ['true', 'false'], | |
77 help = 'generate svg map') | |
78 parser.add_argument('-gp', '--generate_pdf', | |
79 type = str, | |
80 default = 'true', | |
81 choices = ['true', 'false'], | |
82 help = 'generate pdf map') | |
83 parser.add_argument('-gr', '--generate_ras', | |
84 type = str, | |
85 default = 'true', | |
86 choices = ['true', 'false'], | |
87 help = 'generate reaction activity score') | |
88 parser.add_argument('-sr', '--single_ras_file', | |
89 type = str, | |
90 help = 'file that will contain ras') | |
91 | |
92 args = parser.parse_args() | |
93 return args | |
94 | |
95 ########################### warning ########################################### | |
96 | |
97 def warning(s): | |
98 args = process_args(sys.argv) | |
99 with open(args.out_log, 'a') as log: | |
100 log.write(s) | |
101 | |
102 ############################ dataset input #################################### | |
103 | |
104 def read_dataset(data, name): | |
105 try: | |
106 dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python') | |
107 except pd.errors.EmptyDataError: | |
108 sys.exit('Execution aborted: wrong format of ' + name + '\n') | |
109 if len(dataset.columns) < 2: | |
110 sys.exit('Execution aborted: wrong format of ' + name + '\n') | |
111 return dataset | |
112 | |
113 ############################ dataset name ##################################### | |
114 | |
115 def name_dataset(name_data, count): | |
116 if str(name_data) == 'Dataset': | |
117 return str(name_data) + '_' + str(count) | |
118 else: | |
119 return str(name_data) | |
120 | |
121 ############################ load id e rules ################################## | |
122 | |
123 def load_id_rules(reactions): | |
124 ids, rules = [], [] | |
125 for key, value in reactions.items(): | |
126 ids.append(key) | |
127 rules.append(value) | |
128 return (ids, rules) | |
129 | |
130 ############################ check_methods #################################### | |
131 | |
132 def gene_type(l, name): | |
133 if check_hgnc(l): | |
134 return 'hugo_id' | |
135 elif check_ensembl(l): | |
136 return 'ensembl_gene_id' | |
137 elif check_symbol(l): | |
138 return 'symbol' | |
139 elif check_entrez(l): | |
140 return 'entrez_id' | |
141 else: | |
142 sys.exit('Execution aborted:\n' + | |
143 'gene ID type in ' + name + ' not supported. Supported ID'+ | |
144 'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n') | |
145 | |
146 def check_hgnc(l): | |
147 if len(l) > 5: | |
148 if (l.upper()).startswith('HGNC:'): | |
149 return l[5:].isdigit() | |
150 else: | |
151 return False | |
152 else: | |
153 return False | |
154 | |
155 def check_ensembl(l): | |
156 if len(l) == 15: | |
157 if (l.upper()).startswith('ENS'): | |
158 return l[4:].isdigit() | |
159 else: | |
160 return False | |
161 else: | |
162 return False | |
163 | |
164 def check_symbol(l): | |
165 if len(l) > 0: | |
166 if l[0].isalpha() and l[1:].isalnum(): | |
167 return True | |
168 else: | |
169 return False | |
170 else: | |
171 return False | |
172 | |
173 def check_entrez(l): | |
174 if len(l) > 0: | |
175 return l.isdigit() | |
176 else: | |
177 return False | |
178 | |
179 def check_bool(b): | |
180 if b == 'true': | |
181 return True | |
182 elif b == 'false': | |
183 return False | |
184 | |
185 ############################ resolve_methods ################################## | |
186 | |
187 def replace_gene_value(l, d): | |
188 tmp = [] | |
189 err = [] | |
190 while l: | |
191 if isinstance(l[0], list): | |
192 tmp_rules, tmp_err = replace_gene_value(l[0], d) | |
193 tmp.append(tmp_rules) | |
194 err.extend(tmp_err) | |
195 else: | |
196 value = replace_gene(l[0], d) | |
197 tmp.append(value) | |
198 if value == None: | |
199 err.append(l[0]) | |
200 l = l[1:] | |
201 return (tmp, err) | |
202 | |
203 | |
204 def replace_gene(l, d): | |
205 if l =='and' or l == 'or': | |
206 return l | |
207 else: | |
208 value = d.get(l, None) | |
209 if not(value == None or isinstance(value, (int, float))): | |
210 sys.exit('Execution aborted: ' + value + ' value not valid\n') | |
211 return value | |
212 | |
213 def computes(val1, op, val2, cn): | |
214 if val1 != None and val2 != None: | |
215 if op == 'and': | |
216 return min(val1, val2) | |
217 else: | |
218 return val1 + val2 | |
219 elif op == 'and': | |
220 if cn is True: | |
221 if val1 != None: | |
222 return val1 | |
223 elif val2 != None: | |
224 return val2 | |
225 else: | |
226 return None | |
227 else: | |
228 return None | |
229 else: | |
230 if val1 != None: | |
231 return val1 | |
232 elif val2 != None: | |
233 return val2 | |
234 else: | |
235 return None | |
236 | |
237 def control(ris, l, cn): | |
238 if len(l) == 1: | |
239 if isinstance(l[0], (float, int)) or l[0] == None: | |
240 return l[0] | |
241 elif isinstance(l[0], list): | |
242 return control(None, l[0], cn) | |
243 else: | |
244 return False | |
245 elif len(l) > 2: | |
246 return control_list(ris, l, cn) | |
247 else: | |
248 return False | |
249 | |
250 def control_list(ris, l, cn): | |
251 while l: | |
252 if len(l) == 1: | |
253 return False | |
254 elif (isinstance(l[0], (float, int)) or | |
255 l[0] == None) and l[1] in ['and', 'or']: | |
256 if isinstance(l[2], (float, int)) or l[2] == None: | |
257 ris = computes(l[0], l[1], l[2], cn) | |
258 elif isinstance(l[2], list): | |
259 tmp = control(None, l[2], cn) | |
260 if tmp is False: | |
261 return False | |
262 else: | |
263 ris = computes(l[0], l[1], tmp, cn) | |
264 else: | |
265 return False | |
266 l = l[3:] | |
267 elif l[0] in ['and', 'or']: | |
268 if isinstance(l[1], (float, int)) or l[1] == None: | |
269 ris = computes(ris, l[0], l[1], cn) | |
270 elif isinstance(l[1], list): | |
271 tmp = control(None,l[1], cn) | |
272 if tmp is False: | |
273 return False | |
274 else: | |
275 ris = computes(ris, l[0], tmp, cn) | |
276 else: | |
277 return False | |
278 l = l[2:] | |
279 elif isinstance(l[0], list) and l[1] in ['and', 'or']: | |
280 if isinstance(l[2], (float, int)) or l[2] == None: | |
281 tmp = control(None, l[0], cn) | |
282 if tmp is False: | |
283 return False | |
284 else: | |
285 ris = computes(tmp, l[1], l[2], cn) | |
286 elif isinstance(l[2], list): | |
287 tmp = control(None, l[0], cn) | |
288 tmp2 = control(None, l[2], cn) | |
289 if tmp is False or tmp2 is False: | |
290 return False | |
291 else: | |
292 ris = computes(tmp, l[1], tmp2, cn) | |
293 else: | |
294 return False | |
295 l = l[3:] | |
296 else: | |
297 return False | |
298 return ris | |
299 | |
300 ############################ map_methods ###################################### | |
301 | |
302 def fold_change(avg1, avg2): | |
303 if avg1 == 0 and avg2 == 0: | |
304 return 0 | |
305 elif avg1 == 0: | |
306 return '-INF' | |
307 elif avg2 == 0: | |
308 return 'INF' | |
309 else: | |
310 return math.log(avg1 / avg2, 2) | |
311 | |
312 def fix_style(l, col, width, dash): | |
313 tmp = l.split(';') | |
314 flag_col = False | |
315 flag_width = False | |
316 flag_dash = False | |
317 for i in range(len(tmp)): | |
318 if tmp[i].startswith('stroke:'): | |
319 tmp[i] = 'stroke:' + col | |
320 flag_col = True | |
321 if tmp[i].startswith('stroke-width:'): | |
322 tmp[i] = 'stroke-width:' + width | |
323 flag_width = True | |
324 if tmp[i].startswith('stroke-dasharray:'): | |
325 tmp[i] = 'stroke-dasharray:' + dash | |
326 flag_dash = True | |
327 if not flag_col: | |
328 tmp.append('stroke:' + col) | |
329 if not flag_width: | |
330 tmp.append('stroke-width:' + width) | |
331 if not flag_dash: | |
332 tmp.append('stroke-dasharray:' + dash) | |
333 return ';'.join(tmp) | |
334 | |
335 def fix_map(d, core_map, threshold_P_V, threshold_F_C, max_F_C): | |
336 maxT = 12 | |
337 minT = 2 | |
338 grey = '#BEBEBE' | |
339 blue = '#0000FF' | |
340 red = '#E41A1C' | |
341 for el in core_map.iter(): | |
342 el_id = str(el.get('id')) | |
343 if el_id.startswith('R_'): | |
344 tmp = d.get(el_id[2:]) | |
345 if tmp != None: | |
346 p_val = tmp[0] | |
347 f_c = tmp[1] | |
348 if p_val < threshold_P_V: | |
349 if not isinstance(f_c, str): | |
350 if abs(f_c) < math.log(threshold_F_C, 2): | |
351 col = grey | |
352 width = str(minT) | |
353 else: | |
354 if f_c < 0: | |
355 col = blue | |
356 elif f_c > 0: | |
357 col = red | |
358 width = str(max((abs(f_c) * maxT) / max_F_C, minT)) | |
359 else: | |
360 if f_c == '-INF': | |
361 col = blue | |
362 elif f_c == 'INF': | |
363 col = red | |
364 width = str(maxT) | |
365 dash = 'none' | |
366 else: | |
367 dash = '5,5' | |
368 col = grey | |
369 width = str(minT) | |
370 el.set('style', fix_style(el.get('style'), col, width, dash)) | |
371 return core_map | |
372 | |
373 ############################ make recon ####################################### | |
374 | |
375 def check_and_doWord(l): | |
376 tmp = [] | |
377 tmp_genes = [] | |
378 count = 0 | |
379 while l: | |
380 if count >= 0: | |
381 if l[0] == '(': | |
382 count += 1 | |
383 tmp.append(l[0]) | |
384 l.pop(0) | |
385 elif l[0] == ')': | |
386 count -= 1 | |
387 tmp.append(l[0]) | |
388 l.pop(0) | |
389 elif l[0] == ' ': | |
390 l.pop(0) | |
391 else: | |
392 word = [] | |
393 while l: | |
394 if l[0] in [' ', '(', ')']: | |
395 break | |
396 else: | |
397 word.append(l[0]) | |
398 l.pop(0) | |
399 word = ''.join(word) | |
400 tmp.append(word) | |
401 if not(word in ['or', 'and']): | |
402 tmp_genes.append(word) | |
403 else: | |
404 return False | |
405 if count == 0: | |
406 return (tmp, tmp_genes) | |
407 else: | |
408 return False | |
409 | |
410 def brackets_to_list(l): | |
411 tmp = [] | |
412 while l: | |
413 if l[0] == '(': | |
414 l.pop(0) | |
415 tmp.append(resolve_brackets(l)) | |
416 else: | |
417 tmp.append(l[0]) | |
418 l.pop(0) | |
419 return tmp | |
420 | |
421 def resolve_brackets(l): | |
422 tmp = [] | |
423 while l[0] != ')': | |
424 if l[0] == '(': | |
425 l.pop(0) | |
426 tmp.append(resolve_brackets(l)) | |
427 else: | |
428 tmp.append(l[0]) | |
429 l.pop(0) | |
430 l.pop(0) | |
431 return tmp | |
432 | |
433 def priorityAND(l): | |
434 tmp = [] | |
435 flag = True | |
436 while l: | |
437 if len(l) == 1: | |
438 if isinstance(l[0], list): | |
439 tmp.append(priorityAND(l[0])) | |
440 else: | |
441 tmp.append(l[0]) | |
442 l = l[1:] | |
443 elif l[0] == 'or': | |
444 tmp.append(l[0]) | |
445 flag = False | |
446 l = l[1:] | |
447 elif l[1] == 'or': | |
448 if isinstance(l[0], list): | |
449 tmp.append(priorityAND(l[0])) | |
450 else: | |
451 tmp.append(l[0]) | |
452 tmp.append(l[1]) | |
453 flag = False | |
454 l = l[2:] | |
455 elif l[1] == 'and': | |
456 tmpAnd = [] | |
457 if isinstance(l[0], list): | |
458 tmpAnd.append(priorityAND(l[0])) | |
459 else: | |
460 tmpAnd.append(l[0]) | |
461 tmpAnd.append(l[1]) | |
462 if isinstance(l[2], list): | |
463 tmpAnd.append(priorityAND(l[2])) | |
464 else: | |
465 tmpAnd.append(l[2]) | |
466 l = l[3:] | |
467 while l: | |
468 if l[0] == 'and': | |
469 tmpAnd.append(l[0]) | |
470 if isinstance(l[1], list): | |
471 tmpAnd.append(priorityAND(l[1])) | |
472 else: | |
473 tmpAnd.append(l[1]) | |
474 l = l[2:] | |
475 elif l[0] == 'or': | |
476 flag = False | |
477 break | |
478 if flag == True: #when there are only AND in list | |
479 tmp.extend(tmpAnd) | |
480 elif flag == False: | |
481 tmp.append(tmpAnd) | |
482 return tmp | |
483 | |
484 def checkRule(l): | |
485 if len(l) == 1: | |
486 if isinstance(l[0], list): | |
487 if checkRule(l[0]) is False: | |
488 return False | |
489 elif len(l) > 2: | |
490 if checkRule2(l) is False: | |
491 return False | |
492 else: | |
493 return False | |
494 return True | |
495 | |
496 def checkRule2(l): | |
497 while l: | |
498 if len(l) == 1: | |
499 return False | |
500 elif isinstance(l[0], list) and l[1] in ['and', 'or']: | |
501 if checkRule(l[0]) is False: | |
502 return False | |
503 if isinstance(l[2], list): | |
504 if checkRule(l[2]) is False: | |
505 return False | |
506 l = l[3:] | |
507 elif l[1] in ['and', 'or']: | |
508 if isinstance(l[2], list): | |
509 if checkRule(l[2]) is False: | |
510 return False | |
511 l = l[3:] | |
512 elif l[0] in ['and', 'or']: | |
513 if isinstance(l[1], list): | |
514 if checkRule(l[1]) is False: | |
515 return False | |
516 l = l[2:] | |
517 else: | |
518 return False | |
519 return True | |
520 | |
521 def do_rules(rules): | |
522 split_rules = [] | |
523 err_rules = [] | |
524 tmp_gene_in_rule = [] | |
525 for i in range(len(rules)): | |
526 tmp = list(rules[i]) | |
527 if tmp: | |
528 tmp, tmp_genes = check_and_doWord(tmp) | |
529 tmp_gene_in_rule.extend(tmp_genes) | |
530 if tmp is False: | |
531 split_rules.append([]) | |
532 err_rules.append(rules[i]) | |
533 else: | |
534 tmp = brackets_to_list(tmp) | |
535 if checkRule(tmp): | |
536 split_rules.append(priorityAND(tmp)) | |
537 else: | |
538 split_rules.append([]) | |
539 err_rules.append(rules[i]) | |
540 else: | |
541 split_rules.append([]) | |
542 if err_rules: | |
543 warning('Warning: wrong format rule in ' + str(err_rules) + '\n') | |
544 return (split_rules, list(set(tmp_gene_in_rule))) | |
545 | |
546 def make_recon(data): | |
547 try: | |
548 import cobra as cb | |
549 import warnings | |
550 with warnings.catch_warnings(): | |
551 warnings.simplefilter('ignore') | |
552 recon = cb.io.read_sbml_model(data) | |
553 react = recon.reactions | |
554 rules = [react[i].gene_reaction_rule for i in range(len(react))] | |
555 ids = [react[i].id for i in range(len(react))] | |
556 except cb.io.sbml3.CobraSBMLError: | |
557 try: | |
558 data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('') | |
559 if len(data.columns) < 2: | |
560 sys.exit('Execution aborted: wrong format of '+ | |
561 'custom datarules\n') | |
562 if not len(data.columns) == 2: | |
563 warning('Warning: more than 2 columns in custom datarules.\n' + | |
564 'Extra columns have been disregarded\n') | |
565 ids = list(data.iloc[:, 0]) | |
566 rules = list(data.iloc[:, 1]) | |
567 except pd.errors.EmptyDataError: | |
568 sys.exit('Execution aborted: wrong format of custom datarules\n') | |
569 except pd.errors.ParserError: | |
570 sys.exit('Execution aborted: wrong format of custom datarules\n') | |
571 split_rules, tmp_genes = do_rules(rules) | |
572 gene_in_rule = {} | |
573 for i in tmp_genes: | |
574 gene_in_rule[i] = 'ok' | |
575 return (ids, split_rules, gene_in_rule) | |
576 | |
577 ############################ gene ############################################# | |
578 | |
579 def data_gene(gene, type_gene, name, gene_custom): | |
580 args = process_args(sys.argv) | |
581 for i in range(len(gene)): | |
582 tmp = gene.iloc[i, 0] | |
583 if tmp.startswith(' ') or tmp.endswith(' '): | |
584 gene.iloc[i, 0] = (tmp.lstrip()).rstrip() | |
585 gene_dup = [item for item, count in | |
586 collections.Counter(gene[gene.columns[0]]).items() if count > 1] | |
587 pat_dup = [item for item, count in | |
588 collections.Counter(list(gene.columns)).items() if count > 1] | |
589 | |
590 if gene_dup: | |
591 if gene_custom == None: | |
592 if args.rules_selector == 'HMRcore': | |
593 gene_in_rule = pk.load(open(args.tool_dir + | |
594 '/local/HMRcore_genes.p', 'rb')) | |
595 elif args.rules_selector == 'Recon': | |
596 gene_in_rule = pk.load(open(args.tool_dir + | |
597 '/local/Recon_genes.p', 'rb')) | |
598 gene_in_rule = gene_in_rule.get(type_gene) | |
599 else: | |
600 gene_in_rule = gene_custom | |
601 tmp = [] | |
602 for i in gene_dup: | |
603 if gene_in_rule.get(i) == 'ok': | |
604 tmp.append(i) | |
605 if tmp: | |
606 sys.exit('Execution aborted because gene ID ' | |
607 +str(tmp)+' in '+name+' is duplicated\n') | |
608 if pat_dup: | |
609 warning('Warning: duplicated label\n' + str(pat_dup) + 'in ' + name + | |
610 '\n') | |
611 | |
612 return (gene.set_index(gene.columns[0])).to_dict() | |
613 | |
614 ############################ resolve ########################################## | |
615 | |
616 def resolve(genes, rules, ids, resolve_none, name): | |
617 resolve_rules = {} | |
618 names_array = [] | |
619 not_found = [] | |
620 flag = False | |
621 for key, value in genes.items(): | |
622 tmp_resolve = [] | |
623 for i in range(len(rules)): | |
624 tmp = rules[i] | |
625 if tmp: | |
626 tmp, err = replace_gene_value(tmp, value) | |
627 if err: | |
628 not_found.extend(err) | |
629 ris = control(None, tmp, resolve_none) | |
630 if ris is False or ris == None: | |
631 tmp_resolve.append(None) | |
632 else: | |
633 tmp_resolve.append(ris) | |
634 flag = True | |
635 else: | |
636 tmp_resolve.append(None) | |
637 resolve_rules[key] = tmp_resolve | |
638 if flag is False: | |
639 warning('Warning: no computable score (due to missing gene values)' + | |
640 'for class ' + name + ', the class has been disregarded\n') | |
641 return (None, None) | |
642 return (resolve_rules, list(set(not_found))) | |
643 | |
644 ############################ split class ###################################### | |
645 | |
646 def split_class(classes, resolve_rules): | |
647 class_pat = {} | |
648 for i in range(len(classes)): | |
649 classe = classes.iloc[i, 1] | |
650 if not pd.isnull(classe): | |
651 l = [] | |
652 for j in range(i, len(classes)): | |
653 if classes.iloc[j, 1] == classe: | |
654 pat_id = classes.iloc[j, 0] | |
655 tmp = resolve_rules.get(pat_id, None) | |
656 if tmp != None: | |
657 l.append(tmp) | |
658 classes.iloc[j, 1] = None | |
659 if l: | |
660 class_pat[classe] = list(map(list, zip(*l))) | |
661 else: | |
662 warning('Warning: no sample found in class ' + classe + | |
663 ', the class has been disregarded\n') | |
664 return class_pat | |
665 | |
666 ############################ create_ras ####################################### | |
667 | |
668 def create_ras (resolve_rules, dataset_name, single_ras, rules, ids): | |
669 | |
670 if resolve_rules == None: | |
671 warning("Couldn't generate RAS for current dataset: " + dataset_name) | |
672 | |
673 for geni in resolve_rules.values(): | |
674 for i, valori in enumerate(geni): | |
675 if valori == None: | |
676 geni[i] = 'None' | |
677 | |
678 output_ras = pd.DataFrame.from_dict(resolve_rules) | |
679 | |
680 output_ras.insert(0, 'Reactions', ids) | |
681 output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False) | |
682 | |
683 if (single_ras): | |
684 args = process_args(sys.argv) | |
685 text_file = open(args.single_ras_file, "w") | |
686 else: | |
687 text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w") | |
688 | |
689 text_file.write(output_to_csv) | |
690 text_file.close() | |
691 | |
692 ############################ map ############################################## | |
693 | |
694 def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf): | |
695 args = process_args(sys.argv) | |
696 if (not class_pat) or (len(class_pat.keys()) < 2): | |
697 sys.exit('Execution aborted: classes provided for comparisons are ' + | |
698 'less than two\n') | |
699 for i, j in it.combinations(class_pat.keys(), 2): | |
700 tmp = {} | |
701 count = 0 | |
702 max_F_C = 0 | |
703 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): | |
704 try: | |
705 stat_D, p_value = st.ks_2samp(l1, l2) | |
706 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) | |
707 if not isinstance(avg, str): | |
708 if max_F_C < abs(avg): | |
709 max_F_C = abs(avg) | |
710 tmp[ids[count]] = [float(p_value), avg] | |
711 count += 1 | |
712 except (TypeError, ZeroDivisionError): | |
713 count += 1 | |
714 tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' | |
715 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") | |
716 tmp_csv = tmp_csv.reset_index() | |
717 header = ['ids', 'P_Value', 'Log2(fold change)'] | |
718 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | |
719 | |
720 if create_svg or create_pdf: | |
721 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' | |
722 and args.yes_no == 'yes'): | |
723 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | |
724 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' | |
725 with open(file_svg, 'wb') as new_map: | |
726 new_map.write(ET.tostring(core_map)) | |
727 | |
728 | |
729 if create_pdf: | |
730 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' | |
731 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | |
732 | |
733 if not create_svg: | |
734 #Ho utilizzato il file svg per generare il pdf, | |
735 #ma l'utente non ne ha richiesto il ritorno, quindi | |
736 #lo elimino | |
737 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') | |
738 | |
739 return None | |
740 | |
741 ############################ MAIN ############################################# | |
742 | |
743 def main(): | |
744 args = process_args(sys.argv) | |
745 | |
746 create_svg = check_bool(args.generate_svg) | |
747 create_pdf = check_bool(args.generate_pdf) | |
748 generate_ras = check_bool(args.generate_ras) | |
749 | |
750 os.makedirs('result') | |
751 | |
752 if generate_ras: | |
753 os.makedirs('ras') | |
754 | |
755 if args.rules_selector == 'HMRcore': | |
756 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) | |
757 elif args.rules_selector == 'Recon': | |
758 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) | |
759 elif args.rules_selector == 'Custom': | |
760 ids, rules, gene_in_rule = make_recon(args.custom) | |
761 | |
762 resolve_none = check_bool(args.none) | |
763 | |
764 class_pat = {} | |
765 | |
766 if args.option == 'datasets_rasonly': | |
767 name = "RAS Dataset" | |
768 dataset = read_dataset(args.input_datas[0],"dataset") | |
769 | |
770 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) | |
771 | |
772 type_gene = gene_type(dataset.iloc[0, 0], name) | |
773 | |
774 if args.rules_selector != 'Custom': | |
775 genes = data_gene(dataset, type_gene, name, None) | |
776 ids, rules = load_id_rules(recon.get(type_gene)) | |
777 elif args.rules_selector == 'Custom': | |
778 genes = data_gene(dataset, type_gene, name, gene_in_rule) | |
779 | |
780 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) | |
781 | |
782 create_ras(resolve_rules, name, True, rules, ids) | |
783 | |
784 if err != None and err: | |
785 warning('Warning: gene\n' + str(err) + '\nnot found in class ' | |
786 + name + ', the expression level for this gene ' + | |
787 'will be considered NaN\n') | |
788 | |
789 print('execution succeded') | |
790 return None | |
791 | |
792 | |
793 elif args.option == 'datasets': | |
794 num = 1 | |
795 for i, j in zip(args.input_datas, args.names): | |
796 | |
797 name = name_dataset(j, num) | |
798 dataset = read_dataset(i, name) | |
799 | |
800 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) | |
801 | |
802 type_gene = gene_type(dataset.iloc[0, 0], name) | |
803 | |
804 if args.rules_selector != 'Custom': | |
805 genes = data_gene(dataset, type_gene, name, None) | |
806 ids, rules = load_id_rules(recon.get(type_gene)) | |
807 elif args.rules_selector == 'Custom': | |
808 genes = data_gene(dataset, type_gene, name, gene_in_rule) | |
809 | |
810 | |
811 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) | |
812 | |
813 if generate_ras: | |
814 create_ras(resolve_rules, name, False, rules, ids) | |
815 | |
816 if err != None and err: | |
817 warning('Warning: gene\n' + str(err) + '\nnot found in class ' | |
818 + name + ', the expression level for this gene ' + | |
819 'will be considered NaN\n') | |
820 if resolve_rules != None: | |
821 class_pat[name] = list(map(list, zip(*resolve_rules.values()))) | |
822 num += 1 | |
823 elif args.option == 'dataset_class': | |
824 name = 'RNAseq' | |
825 dataset = read_dataset(args.input_data, name) | |
826 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) | |
827 type_gene = gene_type(dataset.iloc[0, 0], name) | |
828 classes = read_dataset(args.input_class, 'class') | |
829 if not len(classes.columns) == 2: | |
830 warning('Warning: more than 2 columns in class file. Extra' + | |
831 'columns have been disregarded\n') | |
832 classes = classes.astype(str) | |
833 if args.rules_selector != 'Custom': | |
834 genes = data_gene(dataset, type_gene, name, None) | |
835 ids, rules = load_id_rules(recon.get(type_gene)) | |
836 elif args.rules_selector == 'Custom': | |
837 genes = data_gene(dataset, type_gene, name, gene_in_rule) | |
838 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) | |
839 if err != None and err: | |
840 warning('Warning: gene\n'+str(err)+'\nnot found in class ' | |
841 + name + ', the expression level for this gene ' + | |
842 'will be considered NaN\n') | |
843 if resolve_rules != None: | |
844 class_pat = split_class(classes, resolve_rules) | |
845 if generate_ras: | |
846 create_ras(resolve_rules, name, False, rules, ids) | |
847 | |
848 | |
849 if args.rules_selector == 'Custom': | |
850 if args.yes_no == 'yes': | |
851 try: | |
852 core_map = ET.parse(args.custom_map) | |
853 except (ET.XMLSyntaxError, ET.XMLSchemaParseError): | |
854 sys.exit('Execution aborted: custom map in wrong format') | |
855 elif args.yes_no == 'no': | |
856 core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg') | |
857 else: | |
858 core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') | |
859 | |
860 maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf) | |
861 | |
862 print('Execution succeded') | |
863 | |
864 return None | |
865 | |
866 ############################################################################### | |
867 | |
868 if __name__ == "__main__": | |
869 main() |