Mercurial > repos > bimib > marea
comparison Marea/marea.py @ 0:23ac9cf12788 draft
Uploaded
author | bimib |
---|---|
date | Tue, 06 Nov 2018 03:16:21 -0500 |
parents | |
children | 7c76e8e319c2 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:23ac9cf12788 |
---|---|
1 | |
2 from __future__ import division | |
3 import sys | |
4 import pandas as pd | |
5 import itertools as it | |
6 import scipy.stats as st | |
7 import collections | |
8 import lxml.etree as ET | |
9 import pickle as pk | |
10 import math | |
11 import os | |
12 import argparse | |
13 from svglib.svglib import svg2rlg | |
14 from reportlab.graphics import renderPDF | |
15 | |
16 ########################## argparse ########################################### | |
17 | |
18 def process_args(args): | |
19 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', | |
20 description = 'process some value\'s'+ | |
21 ' genes to create a comparison\'s map.') | |
22 parser.add_argument('-rs', '--rules_selector', | |
23 type = str, | |
24 default = 'HMRcore', | |
25 choices = ['HMRcore', 'Recon', 'Custom'], | |
26 help = 'chose which type of dataset you want use') | |
27 parser.add_argument('-cr', '--custom', | |
28 type = str, | |
29 help='your dataset if you want custom rules') | |
30 parser.add_argument('-na', '--names', | |
31 type = str, | |
32 nargs = '+', | |
33 help = 'input names') | |
34 parser.add_argument('-n', '--none', | |
35 type = str, | |
36 default = 'true', | |
37 choices = ['true', 'false'], | |
38 help = 'compute Nan values') | |
39 parser.add_argument('-pv' ,'--pValue', | |
40 type = float, | |
41 default = 0.05, | |
42 help = 'P-Value threshold (default: %(default)s)') | |
43 parser.add_argument('-fc', '--fChange', | |
44 type = float, | |
45 default = 1.5, | |
46 help = 'Fold-Change threshold (default: %(default)s)') | |
47 parser.add_argument('-td', '--tool_dir', | |
48 type = str, | |
49 required = True, | |
50 help = 'your tool directory') | |
51 parser.add_argument('-op', '--option', | |
52 type = str, | |
53 choices = ['datasets', 'dataset_class'], | |
54 help='dataset or dataset and class') | |
55 parser.add_argument('-ol', '--out_log', | |
56 help = "Output log") | |
57 parser.add_argument('-ids', '--input_datas', | |
58 type = str, | |
59 nargs = '+', | |
60 help = 'input datasets') | |
61 parser.add_argument('-id', '--input_data', | |
62 type = str, | |
63 help = 'input dataset') | |
64 parser.add_argument('-ic', '--input_class', | |
65 type = str, | |
66 help = 'sample group specification') | |
67 parser.add_argument('-cm', '--custom_map', | |
68 type = str, | |
69 help = 'custom map') | |
70 parser.add_argument('-yn', '--yes_no', | |
71 type = str, | |
72 choices = ['yes', 'no'], | |
73 help = 'if make or not custom map') | |
74 args = parser.parse_args() | |
75 return args | |
76 | |
77 ########################### warning ########################################### | |
78 | |
79 def warning(s): | |
80 args = process_args(sys.argv) | |
81 with open(args.out_log, 'a') as log: | |
82 log.write(s) | |
83 | |
84 ############################ dataset input #################################### | |
85 | |
86 def read_dataset(data, name): | |
87 try: | |
88 dataset = pd.read_csv(data, sep = '\t', header = 0) | |
89 except pd.errors.EmptyDataError: | |
90 sys.exit('Execution aborted: wrong format of ' + name + '\n') | |
91 if len(dataset.columns) < 2: | |
92 sys.exit('Execution aborted: wrong format of ' + name + '\n') | |
93 return dataset | |
94 | |
95 ############################ dataset name ##################################### | |
96 | |
97 def name_dataset(name_data, count): | |
98 if str(name_data) == 'Dataset': | |
99 return str(name_data) + '_' + str(count) | |
100 else: | |
101 return str(name_data) | |
102 | |
103 ############################ load id e rules ################################## | |
104 | |
105 def load_id_rules(reactions): | |
106 ids, rules = [], [] | |
107 for key, value in reactions.items(): | |
108 ids.append(key) | |
109 rules.append(value) | |
110 return (ids, rules) | |
111 | |
112 ############################ check_methods #################################### | |
113 | |
114 def gene_type(l, name): | |
115 if check_hgnc(l): | |
116 return 'hugo_id' | |
117 elif check_ensembl(l): | |
118 return 'ensembl_gene_id' | |
119 elif check_symbol(l): | |
120 return 'symbol' | |
121 elif check_entrez(l): | |
122 return 'entrez_id' | |
123 else: | |
124 sys.exit('Execution aborted:\n' + | |
125 'gene ID type in ' + name + ' not supported. Supported ID'+ | |
126 'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n') | |
127 | |
128 def check_hgnc(l): | |
129 if len(l) > 5: | |
130 if (l.upper()).startswith('HGNC:'): | |
131 return l[5:].isdigit() | |
132 else: | |
133 return False | |
134 else: | |
135 return False | |
136 | |
137 def check_ensembl(l): | |
138 if len(l) == 15: | |
139 if (l.upper()).startswith('ENS'): | |
140 return l[4:].isdigit() | |
141 else: | |
142 return False | |
143 else: | |
144 return False | |
145 | |
146 def check_symbol(l): | |
147 if len(l) > 0: | |
148 if l[0].isalpha() and l[1:].isalnum(): | |
149 return True | |
150 else: | |
151 return False | |
152 else: | |
153 return False | |
154 | |
155 def check_entrez(l): | |
156 if len(l) > 0: | |
157 return l.isdigit() | |
158 else: | |
159 return False | |
160 | |
161 def check_bool(b): | |
162 if b == 'true': | |
163 return True | |
164 elif b == 'false': | |
165 return False | |
166 | |
167 ############################ resolve_methods ################################## | |
168 | |
169 def replace_gene_value(l, d): | |
170 tmp = [] | |
171 err = [] | |
172 while l: | |
173 if isinstance(l[0], list): | |
174 tmp_rules, tmp_err = replace_gene_value(l[0], d) | |
175 tmp.append(tmp_rules) | |
176 err.extend(tmp_err) | |
177 else: | |
178 value = replace_gene(l[0], d) | |
179 tmp.append(value) | |
180 if value == None: | |
181 err.append(l[0]) | |
182 l = l[1:] | |
183 return (tmp, err) | |
184 | |
185 def replace_gene(l, d): | |
186 if l =='and' or l == 'or': | |
187 return l | |
188 else: | |
189 value = d.get(l, None) | |
190 if not(value == None or isinstance(value, (int, float))): | |
191 sys.exit('Execution aborted: ' + value + ' value not valid\n') | |
192 return value | |
193 | |
194 def computes(val1, op, val2, cn): | |
195 if val1 != None and val2 != None: | |
196 if op == 'and': | |
197 return min(val1, val2) | |
198 else: | |
199 return val1 + val2 | |
200 elif op == 'and': | |
201 if cn is True: | |
202 if val1 != None: | |
203 return val1 | |
204 elif val2 != None: | |
205 return val2 | |
206 else: | |
207 return None | |
208 else: | |
209 return None | |
210 else: | |
211 if val1 != None: | |
212 return val1 | |
213 elif val2 != None: | |
214 return val2 | |
215 else: | |
216 return None | |
217 | |
218 def control(ris, l, cn): | |
219 if len(l) == 1: | |
220 if isinstance(l[0], (float, int)) or l[0] == None: | |
221 return l[0] | |
222 elif isinstance(l[0], list): | |
223 return control(None, l[0], cn) | |
224 else: | |
225 return False | |
226 elif len(l) > 2: | |
227 return control_list(ris, l, cn) | |
228 else: | |
229 return False | |
230 | |
231 def control_list(ris, l, cn): | |
232 while l: | |
233 if len(l) == 1: | |
234 return False | |
235 elif (isinstance(l[0], (float, int)) or | |
236 l[0] == None) and l[1] in ['and', 'or']: | |
237 if isinstance(l[2], (float, int)) or l[2] == None: | |
238 ris = computes(l[0], l[1], l[2], cn) | |
239 elif isinstance(l[2], list): | |
240 tmp = control(None, l[2], cn) | |
241 if tmp is False: | |
242 return False | |
243 else: | |
244 ris = computes(l[0], l[1], tmp, cn) | |
245 else: | |
246 return False | |
247 l = l[3:] | |
248 elif l[0] in ['and', 'or']: | |
249 if isinstance(l[1], (float, int)) or l[1] == None: | |
250 ris = computes(ris, l[0], l[1], cn) | |
251 elif isinstance(l[1], list): | |
252 tmp = control(None,l[1], cn) | |
253 if tmp is False: | |
254 return False | |
255 else: | |
256 ris = computes(ris, l[0], tmp, cn) | |
257 else: | |
258 return False | |
259 l = l[2:] | |
260 elif isinstance(l[0], list) and l[1] in ['and', 'or']: | |
261 if isinstance(l[2], (float, int)) or l[2] == None: | |
262 tmp = control(None, l[0], cn) | |
263 if tmp is False: | |
264 return False | |
265 else: | |
266 ris = computes(tmp, l[1], l[2], cn) | |
267 elif isinstance(l[2], list): | |
268 tmp = control(None, l[0], cn) | |
269 tmp2 = control(None, l[2], cn) | |
270 if tmp is False or tmp2 is False: | |
271 return False | |
272 else: | |
273 ris = computes(tmp, l[1], tmp2, cn) | |
274 else: | |
275 return False | |
276 l = l[3:] | |
277 else: | |
278 return False | |
279 return ris | |
280 | |
281 ############################ map_methods ###################################### | |
282 | |
283 def fold_change(avg1, avg2): | |
284 if avg1 == 0 and avg2 == 0: | |
285 return 0 | |
286 elif avg1 == 0: | |
287 return '-INF' | |
288 elif avg2 == 0: | |
289 return 'INF' | |
290 else: | |
291 return math.log(avg1 / avg2, 2) | |
292 | |
293 def fix_style(l, col, width, dash): | |
294 tmp = l.split(';') | |
295 flag_col = False | |
296 flag_width = False | |
297 flag_dash = False | |
298 for i in range(len(tmp)): | |
299 if tmp[i].startswith('stroke:'): | |
300 tmp[i] = 'stroke:' + col | |
301 flag_col = True | |
302 if tmp[i].startswith('stroke-width:'): | |
303 tmp[i] = 'stroke-width:' + width | |
304 flag_width = True | |
305 if tmp[i].startswith('stroke-dasharray:'): | |
306 tmp[i] = 'stroke-dasharray:' + dash | |
307 flag_dash = True | |
308 if not flag_col: | |
309 tmp.append('stroke:' + col) | |
310 if not flag_width: | |
311 tmp.append('stroke-width:' + width) | |
312 if not flag_dash: | |
313 tmp.append('stroke-dasharray:' + dash) | |
314 return ';'.join(tmp) | |
315 | |
316 def fix_map(d, core_map, threshold_P_V, threshold_F_C, max_F_C): | |
317 maxT = 12 | |
318 minT = 2 | |
319 grey = '#BEBEBE' | |
320 blue = '#0000FF' | |
321 red = '#E41A1C' | |
322 for el in core_map.iter(): | |
323 el_id = str(el.get('id')) | |
324 if el_id.startswith('R_'): | |
325 tmp = d.get(el_id[2:]) | |
326 if tmp != None: | |
327 p_val = tmp[0] | |
328 f_c = tmp[1] | |
329 if p_val < threshold_P_V: | |
330 if not isinstance(f_c, str): | |
331 if abs(f_c) < math.log(threshold_F_C, 2): | |
332 col = grey | |
333 width = str(minT) | |
334 else: | |
335 if f_c < 0: | |
336 col = blue | |
337 elif f_c > 0: | |
338 col = red | |
339 width = str(max((abs(f_c) * maxT) / max_F_C, minT)) | |
340 else: | |
341 if f_c == '-INF': | |
342 col = blue | |
343 elif f_c == 'INF': | |
344 col = red | |
345 width = str(maxT) | |
346 dash = 'none' | |
347 else: | |
348 dash = '5,5' | |
349 col = grey | |
350 width = str(minT) | |
351 el.set('style', fix_style(el.get('style'), col, width, dash)) | |
352 return core_map | |
353 | |
354 ############################ make recon ####################################### | |
355 | |
356 def check_and_doWord(l): | |
357 tmp = [] | |
358 tmp_genes = [] | |
359 count = 0 | |
360 while l: | |
361 if count >= 0: | |
362 if l[0] == '(': | |
363 count += 1 | |
364 tmp.append(l[0]) | |
365 l.pop(0) | |
366 elif l[0] == ')': | |
367 count -= 1 | |
368 tmp.append(l[0]) | |
369 l.pop(0) | |
370 elif l[0] == ' ': | |
371 l.pop(0) | |
372 else: | |
373 word = [] | |
374 while l: | |
375 if l[0] in [' ', '(', ')']: | |
376 break | |
377 else: | |
378 word.append(l[0]) | |
379 l.pop(0) | |
380 word = ''.join(word) | |
381 tmp.append(word) | |
382 if not(word in ['or', 'and']): | |
383 tmp_genes.append(word) | |
384 else: | |
385 return False | |
386 if count == 0: | |
387 return (tmp, tmp_genes) | |
388 else: | |
389 return False | |
390 | |
391 def brackets_to_list(l): | |
392 tmp = [] | |
393 while l: | |
394 if l[0] == '(': | |
395 l.pop(0) | |
396 tmp.append(resolve_brackets(l)) | |
397 else: | |
398 tmp.append(l[0]) | |
399 l.pop(0) | |
400 return tmp | |
401 | |
402 def resolve_brackets(l): | |
403 tmp = [] | |
404 while l[0] != ')': | |
405 if l[0] == '(': | |
406 l.pop(0) | |
407 tmp.append(resolve_brackets(l)) | |
408 else: | |
409 tmp.append(l[0]) | |
410 l.pop(0) | |
411 l.pop(0) | |
412 return tmp | |
413 | |
414 def priorityAND(l): | |
415 tmp = [] | |
416 flag = True | |
417 while l: | |
418 if len(l) == 1: | |
419 if isinstance(l[0], list): | |
420 tmp.append(priorityAND(l[0])) | |
421 else: | |
422 tmp.append(l[0]) | |
423 l = l[1:] | |
424 elif l[0] == 'or': | |
425 tmp.append(l[0]) | |
426 flag = False | |
427 l = l[1:] | |
428 elif l[1] == 'or': | |
429 if isinstance(l[0], list): | |
430 tmp.append(priorityAND(l[0])) | |
431 else: | |
432 tmp.append(l[0]) | |
433 tmp.append(l[1]) | |
434 flag = False | |
435 l = l[2:] | |
436 elif l[1] == 'and': | |
437 tmpAnd = [] | |
438 if isinstance(l[0], list): | |
439 tmpAnd.append(priorityAND(l[0])) | |
440 else: | |
441 tmpAnd.append(l[0]) | |
442 tmpAnd.append(l[1]) | |
443 if isinstance(l[2], list): | |
444 tmpAnd.append(priorityAND(l[2])) | |
445 else: | |
446 tmpAnd.append(l[2]) | |
447 l = l[3:] | |
448 while l: | |
449 if l[0] == 'and': | |
450 tmpAnd.append(l[0]) | |
451 if isinstance(l[1], list): | |
452 tmpAnd.append(priorityAND(l[1])) | |
453 else: | |
454 tmpAnd.append(l[1]) | |
455 l = l[2:] | |
456 elif l[0] == 'or': | |
457 flag = False | |
458 break | |
459 if flag == True: #se ci sono solo AND nella lista | |
460 tmp.extend(tmpAnd) | |
461 elif flag == False: | |
462 tmp.append(tmpAnd) | |
463 return tmp | |
464 | |
465 def checkRule(l): | |
466 if len(l) == 1: | |
467 if isinstance(l[0], list): | |
468 if checkRule(l[0]) is False: | |
469 return False | |
470 elif len(l) > 2: | |
471 if checkRule2(l) is False: | |
472 return False | |
473 else: | |
474 return False | |
475 return True | |
476 | |
477 def checkRule2(l): | |
478 while l: | |
479 if len(l) == 1: | |
480 return False | |
481 elif isinstance(l[0], list) and l[1] in ['and', 'or']: | |
482 if checkRule(l[0]) is False: | |
483 return False | |
484 if isinstance(l[2], list): | |
485 if checkRule(l[2]) is False: | |
486 return False | |
487 l = l[3:] | |
488 elif l[1] in ['and', 'or']: | |
489 if isinstance(l[2], list): | |
490 if checkRule(l[2]) is False: | |
491 return False | |
492 l = l[3:] | |
493 elif l[0] in ['and', 'or']: | |
494 if isinstance(l[1], list): | |
495 if checkRule(l[1]) is False: | |
496 return False | |
497 l = l[2:] | |
498 else: | |
499 return False | |
500 return True | |
501 | |
502 def do_rules(rules): | |
503 split_rules = [] | |
504 err_rules = [] | |
505 tmp_gene_in_rule = [] | |
506 for i in range(len(rules)): | |
507 tmp = list(rules[i]) | |
508 if tmp: | |
509 tmp, tmp_genes = check_and_doWord(tmp) | |
510 tmp_gene_in_rule.extend(tmp_genes) | |
511 if tmp is False: | |
512 split_rules.append([]) | |
513 err_rules.append(rules[i]) | |
514 else: | |
515 tmp = brackets_to_list(tmp) | |
516 if checkRule(tmp): | |
517 split_rules.append(priorityAND(tmp)) | |
518 else: | |
519 split_rules.append([]) | |
520 err_rules.append(rules[i]) | |
521 else: | |
522 split_rules.append([]) | |
523 if err_rules: | |
524 warning('Warning: wrong format rule in ' + str(err_rules) + '\n') | |
525 return (split_rules, list(set(tmp_gene_in_rule))) | |
526 | |
527 def make_recon(data): | |
528 try: | |
529 import cobra as cb | |
530 import warnings | |
531 with warnings.catch_warnings(): | |
532 warnings.simplefilter('ignore') | |
533 recon = cb.io.read_sbml_model(data) | |
534 react = recon.reactions | |
535 rules = [react[i].gene_reaction_rule for i in range(len(react))] | |
536 ids = [react[i].id for i in range(len(react))] | |
537 except cb.io.sbml3.CobraSBMLError: | |
538 try: | |
539 data = (pd.read_csv(data, sep = '\t', dtype = str)).fillna('') | |
540 if len(data.columns) < 2: | |
541 sys.exit('Execution aborted: wrong format of '+ | |
542 'custom datarules\n') | |
543 if not len(data.columns) == 2: | |
544 warning('Warning: more than 2 columns in custom datarules.\n' + | |
545 'Extra columns have been disregarded\n') | |
546 ids = list(data.iloc[:, 0]) | |
547 rules = list(data.iloc[:, 1]) | |
548 except pd.errors.EmptyDataError: | |
549 sys.exit('Execution aborted: wrong format of custom datarules\n') | |
550 except pd.errors.ParserError: | |
551 sys.exit('Execution aborted: wrong format of custom datarules\n') | |
552 split_rules, tmp_genes = do_rules(rules) | |
553 gene_in_rule = {} | |
554 for i in tmp_genes: | |
555 gene_in_rule[i] = 'ok' | |
556 return (ids, split_rules, gene_in_rule) | |
557 | |
558 ############################ gene ############################################# | |
559 | |
560 def data_gene(gene, type_gene, name, gene_custom): | |
561 args = process_args(sys.argv) | |
562 for i in range(len(gene)): | |
563 tmp = gene.iloc[i, 0] | |
564 if tmp.startswith(' ') or tmp.endswith(' '): | |
565 gene.iloc[i, 0] = (tmp.lstrip()).rstrip() | |
566 gene_dup = [item for item, count in | |
567 collections.Counter(gene[gene.columns[0]]).items() if count > 1] | |
568 pat_dup = [item for item, count in | |
569 collections.Counter(list(gene.columns)).items() if count > 1] | |
570 if gene_dup: | |
571 if gene_custom == None: | |
572 if args.rules_selector == 'HMRcore': | |
573 gene_in_rule = pk.load(open(args.tool_dir + | |
574 '/local/HMRcore_genes.p', 'rb')) | |
575 elif args.rules_selector == 'Recon': | |
576 gene_in_rule = pk.load(open(args.tool_dir + | |
577 '/local/Recon_genes.p', 'rb')) | |
578 gene_in_rule = gene_in_rule.get(type_gene) | |
579 else: | |
580 gene_in_rule = gene_custom | |
581 tmp = [] | |
582 for i in gene_dup: | |
583 if gene_in_rule.get(i) == 'ok': | |
584 tmp.append(i) | |
585 if tmp: | |
586 sys.exit('Execution aborted because gene ID ' | |
587 +str(tmp)+' in '+name+' is duplicated\n') | |
588 if pat_dup: | |
589 warning('Warning: duplicated label\n' + str(pat_dup) + 'in ' + name + | |
590 '\n') | |
591 return (gene.set_index(gene.columns[0])).to_dict() | |
592 | |
593 ############################ resolve ########################################## | |
594 | |
595 def resolve(genes, rules, ids, resolve_none, name): | |
596 resolve_rules = {} | |
597 not_found = [] | |
598 flag = False | |
599 for key, value in genes.items(): | |
600 tmp_resolve = [] | |
601 for i in range(len(rules)): | |
602 tmp = rules[i] | |
603 if tmp: | |
604 tmp, err = replace_gene_value(tmp, value) | |
605 if err: | |
606 not_found.extend(err) | |
607 ris = control(None, tmp, resolve_none) | |
608 if ris is False or ris == None: | |
609 tmp_resolve.append(None) | |
610 else: | |
611 tmp_resolve.append(ris) | |
612 flag = True | |
613 else: | |
614 tmp_resolve.append(None) | |
615 resolve_rules[key] = tmp_resolve | |
616 if flag is False: | |
617 warning('Warning: no computable score (due to missing gene values)' + | |
618 'for class ' + name + ', the class has been disregarded\n') | |
619 return (None, None) | |
620 return (resolve_rules, list(set(not_found))) | |
621 | |
622 ############################ split class ###################################### | |
623 | |
624 def split_class(classes, resolve_rules): | |
625 class_pat = {} | |
626 for i in range(len(classes)): | |
627 classe = classes.iloc[i, 1] | |
628 if not pd.isnull(classe): | |
629 l = [] | |
630 for j in range(i, len(classes)): | |
631 if classes.iloc[j, 1] == classe: | |
632 pat_id = classes.iloc[j, 0] | |
633 tmp = resolve_rules.get(pat_id, None) | |
634 if tmp != None: | |
635 l.append(tmp) | |
636 classes.iloc[j, 1] = None | |
637 if l: | |
638 class_pat[classe] = list(map(list, zip(*l))) | |
639 else: | |
640 warning('Warning: no sample found in class ' + classe + | |
641 ', the class has been disregarded\n') | |
642 return class_pat | |
643 | |
644 ############################ map ############################################## | |
645 | |
646 def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C): | |
647 args = process_args(sys.argv) | |
648 if (not class_pat) or (len(class_pat.keys()) < 2): | |
649 sys.exit('Execution aborted: classes provided for comparisons are ' + | |
650 'less than two\n') | |
651 for i, j in it.combinations(class_pat.keys(), 2): | |
652 tmp = {} | |
653 count = 0 | |
654 max_F_C = 0 | |
655 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): | |
656 try: | |
657 stat_D, p_value = st.ks_2samp(l1, l2) | |
658 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) | |
659 if not isinstance(avg, str): | |
660 if max_F_C < abs(avg): | |
661 max_F_C = abs(avg) | |
662 tmp[ids[count]] = [float(p_value), avg] | |
663 count += 1 | |
664 except (TypeError, ZeroDivisionError): | |
665 count += 1 | |
666 tab = 'table_out/' + i + '_vs_' + j + '.tsv' | |
667 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") | |
668 tmp_csv = tmp_csv.reset_index() | |
669 header = ['ids', 'P_Value', 'Average'] | |
670 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | |
671 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' | |
672 and args.yes_no == 'yes'): | |
673 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | |
674 file_svg = 'map_svg/' + i + '_vs_' + j + '.svg' | |
675 with open(file_svg, 'wb') as new_map: | |
676 new_map.write(ET.tostring(core_map, encoding='UTF-8', | |
677 method='xml')) | |
678 file_pdf = 'map_pdf/' + i + '_vs_' + j + '.pdf' | |
679 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | |
680 return None | |
681 | |
682 ############################ MAIN ############################################# | |
683 | |
684 def main(): | |
685 args = process_args(sys.argv) | |
686 os.makedirs('table_out') | |
687 if args.rules_selector == 'HMRcore': | |
688 os.makedirs('map_svg') | |
689 os.makedirs('map_pdf') | |
690 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) | |
691 elif args.rules_selector == 'Recon': | |
692 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) | |
693 elif args.rules_selector == 'Custom': | |
694 ids, rules, gene_in_rule = make_recon(args.custom) | |
695 resolve_none = check_bool(args.none) | |
696 class_pat = {} | |
697 if args.option == 'datasets': | |
698 num = 1 | |
699 #if len(args.names) != len(set(args.names)): | |
700 # sys.exit('Execution aborted: datasets name duplicated') | |
701 for i, j in zip(args.input_datas, args.names): | |
702 name = name_dataset(j, num) | |
703 dataset = read_dataset(i, name) | |
704 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) | |
705 type_gene = gene_type(dataset.iloc[0, 0], name) | |
706 if args.rules_selector != 'Custom': | |
707 genes = data_gene(dataset, type_gene, name, None) | |
708 ids, rules = load_id_rules(recon.get(type_gene)) | |
709 elif args.rules_selector == 'Custom': | |
710 genes = data_gene(dataset, type_gene, name, gene_in_rule) | |
711 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) | |
712 if err != None and err: | |
713 warning('Warning: gene\n' + str(err) + '\nnot found in class ' | |
714 + name + ', the expression level for this gene ' + | |
715 'will be considered NaN\n') | |
716 if resolve_rules != None: | |
717 class_pat[name] = list(map(list, zip(*resolve_rules.values()))) | |
718 num += 1 | |
719 elif args.option == 'dataset_class': | |
720 name = 'RNAseq' | |
721 dataset = read_dataset(args.input_data, name) | |
722 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) | |
723 type_gene = gene_type(dataset.iloc[0, 0], name) | |
724 classes = read_dataset(args.input_class, 'class') | |
725 if not len(classes.columns) == 2: | |
726 warning('Warning: more than 2 columns in class file. Extra' + | |
727 'columns have been disregarded\n') | |
728 classes = classes.astype(str) | |
729 if args.rules_selector != 'Custom': | |
730 genes = data_gene(dataset, type_gene, name, None) | |
731 ids, rules = load_id_rules(recon.get(type_gene)) | |
732 elif args.rules_selector == 'Custom': | |
733 genes = data_gene(dataset, type_gene, name, gene_in_rule) | |
734 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) | |
735 if err != None and err: | |
736 warning('Warning: gene\n'+str(err)+'\nnot found in class ' | |
737 + name + ', the expression level for this gene ' + | |
738 'will be considered NaN\n') | |
739 if resolve_rules != None: | |
740 class_pat = split_class(classes, resolve_rules) | |
741 if args.rules_selector == 'Custom': | |
742 if args.yes_no == 'yes': | |
743 os.makedirs('map_svg') | |
744 os.makedirs('map_pdf') | |
745 try: | |
746 core_map = ET.parse(args.custom_map) | |
747 except (ET.XMLSyntaxError, ET.XMLSchemaParseError): | |
748 sys.exit('Execution aborted: custom map in wrong format') | |
749 elif args.yes_no == 'no': | |
750 core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg') | |
751 else: | |
752 core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') | |
753 maps(core_map, class_pat, ids, args.pValue, args.fChange) | |
754 warning('Execution succeeded') | |
755 return None | |
756 | |
757 ############################################################################### | |
758 | |
759 if __name__ == "__main__": | |
760 main() |