| 46 | 1 from __future__ import division | 
|  | 2 import sys | 
|  | 3 import pandas as pd | 
|  | 4 import itertools as it | 
|  | 5 import scipy.stats as st | 
|  | 6 import collections | 
|  | 7 import lxml.etree as ET | 
|  | 8 import pickle as pk | 
|  | 9 import math | 
|  | 10 import os | 
|  | 11 import argparse | 
|  | 12 from svglib.svglib import svg2rlg | 
|  | 13 from reportlab.graphics import renderPDF | 
|  | 14 | 
|  | 15 ########################## argparse ########################################## | 
|  | 16 | 
|  | 17 def process_args(args): | 
|  | 18     parser = argparse.ArgumentParser(usage = '%(prog)s [options]', | 
|  | 19                                      description = 'process some value\'s'+ | 
|  | 20                                      ' genes to create a comparison\'s map.') | 
|  | 21     parser.add_argument('-rs', '--rules_selector', | 
|  | 22                         type = str, | 
|  | 23                         default = 'HMRcore', | 
|  | 24                         choices = ['HMRcore', 'Recon', 'Custom'], | 
|  | 25                         help = 'chose which type of dataset you want use') | 
|  | 26     parser.add_argument('-cr', '--custom', | 
|  | 27                         type = str, | 
|  | 28                         help='your dataset if you want custom rules') | 
|  | 29     parser.add_argument('-na', '--names', | 
|  | 30                         type = str, | 
|  | 31                         nargs = '+', | 
|  | 32                         help = 'input names') | 
|  | 33     parser.add_argument('-n', '--none', | 
|  | 34                         type = str, | 
|  | 35                         default = 'true', | 
|  | 36                         choices = ['true', 'false'], | 
|  | 37                         help = 'compute Nan values') | 
|  | 38     parser.add_argument('-pv' ,'--pValue', | 
|  | 39                         type = float, | 
|  | 40                         default = 0.05, | 
|  | 41                         help = 'P-Value threshold (default: %(default)s)') | 
|  | 42     parser.add_argument('-fc', '--fChange', | 
|  | 43                         type = float, | 
|  | 44                         default = 1.5, | 
|  | 45                         help = 'Fold-Change threshold (default: %(default)s)') | 
|  | 46     parser.add_argument('-td', '--tool_dir', | 
|  | 47                         type = str, | 
|  | 48                         required = True, | 
|  | 49                         help = 'your tool directory') | 
|  | 50     parser.add_argument('-op', '--option', | 
|  | 51                         type = str, | 
|  | 52                         choices = ['datasets', 'dataset_class', 'datasets_rasonly'], | 
|  | 53                         help='dataset or dataset and class') | 
|  | 54     parser.add_argument('-ol', '--out_log', | 
|  | 55                         help = "Output log") | 
|  | 56     parser.add_argument('-ids', '--input_datas', | 
|  | 57                         type = str, | 
|  | 58                         nargs = '+', | 
|  | 59                         help = 'input datasets') | 
|  | 60     parser.add_argument('-id', '--input_data', | 
|  | 61                         type = str, | 
|  | 62                         help = 'input dataset') | 
|  | 63     parser.add_argument('-ic', '--input_class', | 
|  | 64                         type = str, | 
|  | 65                         help = 'sample group specification') | 
|  | 66     parser.add_argument('-cm', '--custom_map', | 
|  | 67                         type = str, | 
|  | 68                         help = 'custom map') | 
|  | 69     parser.add_argument('-yn', '--yes_no', | 
|  | 70                         type = str, | 
|  | 71                         choices = ['yes', 'no'], | 
|  | 72                         help = 'if make or not custom map') | 
|  | 73     parser.add_argument('-gs', '--generate_svg', | 
|  | 74                         type = str, | 
|  | 75                         default = 'true', | 
|  | 76                         choices = ['true', 'false'], | 
|  | 77                         help = 'generate svg map') | 
|  | 78     parser.add_argument('-gp', '--generate_pdf', | 
|  | 79                         type = str, | 
|  | 80                         default = 'true', | 
|  | 81                         choices = ['true', 'false'], | 
|  | 82                         help = 'generate pdf map') | 
|  | 83     parser.add_argument('-gr', '--generate_ras', | 
|  | 84                         type = str, | 
|  | 85                         default = 'true', | 
|  | 86                         choices = ['true', 'false'], | 
|  | 87                         help = 'generate reaction activity score') | 
|  | 88     parser.add_argument('-sr', '--single_ras_file', | 
|  | 89                          type = str, | 
|  | 90                          help = 'file that will contain ras') | 
|  | 91 | 
|  | 92     args = parser.parse_args() | 
|  | 93     return args | 
|  | 94 | 
|  | 95 ########################### warning ########################################### | 
|  | 96 | 
|  | 97 def warning(s): | 
|  | 98     args = process_args(sys.argv) | 
|  | 99     with open(args.out_log, 'a') as log: | 
|  | 100             log.write(s) | 
|  | 101 | 
|  | 102 ############################ dataset input #################################### | 
|  | 103 | 
|  | 104 def read_dataset(data, name): | 
|  | 105     try: | 
|  | 106         dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python') | 
|  | 107     except pd.errors.EmptyDataError: | 
|  | 108         sys.exit('Execution aborted: wrong format of ' + name + '\n') | 
|  | 109     if len(dataset.columns) < 2: | 
|  | 110         sys.exit('Execution aborted: wrong format of ' + name + '\n') | 
|  | 111     return dataset | 
|  | 112 | 
|  | 113 ############################ dataset name ##################################### | 
|  | 114 | 
|  | 115 def name_dataset(name_data, count): | 
|  | 116     if str(name_data) == 'Dataset': | 
|  | 117         return str(name_data) + '_' + str(count) | 
|  | 118     else: | 
|  | 119         return str(name_data) | 
|  | 120 | 
|  | 121 ############################ load id e rules ################################## | 
|  | 122 | 
|  | 123 def load_id_rules(reactions): | 
|  | 124     ids, rules = [], [] | 
|  | 125     for key, value in reactions.items(): | 
|  | 126             ids.append(key) | 
|  | 127             rules.append(value) | 
|  | 128     return (ids, rules) | 
|  | 129 | 
|  | 130 ############################ check_methods #################################### | 
|  | 131 | 
|  | 132 def gene_type(l, name): | 
|  | 133     if check_hgnc(l): | 
|  | 134         return 'hugo_id' | 
|  | 135     elif check_ensembl(l): | 
|  | 136         return 'ensembl_gene_id' | 
|  | 137     elif check_symbol(l): | 
|  | 138         return 'symbol' | 
|  | 139     elif check_entrez(l): | 
|  | 140         return 'entrez_id' | 
|  | 141     else: | 
|  | 142         sys.exit('Execution aborted:\n' + | 
|  | 143                  'gene ID type in ' + name + ' not supported. Supported ID'+ | 
|  | 144                  'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n') | 
|  | 145 | 
|  | 146 def check_hgnc(l): | 
|  | 147     if len(l) > 5: | 
|  | 148         if (l.upper()).startswith('HGNC:'): | 
|  | 149             return l[5:].isdigit() | 
|  | 150         else: | 
|  | 151             return False | 
|  | 152     else: | 
|  | 153         return False | 
|  | 154 | 
|  | 155 def check_ensembl(l): | 
|  | 156     if len(l) == 15: | 
|  | 157         if (l.upper()).startswith('ENS'): | 
|  | 158             return l[4:].isdigit() | 
|  | 159         else: | 
|  | 160             return False | 
|  | 161     else: | 
|  | 162         return False | 
|  | 163 | 
|  | 164 def check_symbol(l): | 
|  | 165     if len(l) > 0: | 
|  | 166         if l[0].isalpha() and l[1:].isalnum(): | 
|  | 167             return True | 
|  | 168         else: | 
|  | 169             return False | 
|  | 170     else: | 
|  | 171         return False | 
|  | 172 | 
|  | 173 def check_entrez(l): | 
|  | 174     if len(l) > 0: | 
|  | 175         return l.isdigit() | 
|  | 176     else: | 
|  | 177         return False | 
|  | 178 | 
|  | 179 def check_bool(b): | 
|  | 180     if b == 'true': | 
|  | 181         return True | 
|  | 182     elif b == 'false': | 
|  | 183         return False | 
|  | 184 | 
|  | 185 ############################ resolve_methods ################################## | 
|  | 186 | 
|  | 187 def replace_gene_value(l, d): | 
|  | 188     tmp = [] | 
|  | 189     err = [] | 
|  | 190     while l: | 
|  | 191         if isinstance(l[0], list): | 
|  | 192             tmp_rules, tmp_err = replace_gene_value(l[0], d) | 
|  | 193             tmp.append(tmp_rules) | 
|  | 194             err.extend(tmp_err) | 
|  | 195         else: | 
|  | 196             value = replace_gene(l[0], d) | 
|  | 197             tmp.append(value) | 
|  | 198             if value == None: | 
|  | 199                 err.append(l[0]) | 
|  | 200         l = l[1:] | 
|  | 201     return (tmp, err) | 
|  | 202 | 
|  | 203 | 
|  | 204 def replace_gene(l, d): | 
|  | 205     if l =='and' or l == 'or': | 
|  | 206         return l | 
|  | 207     else: | 
|  | 208         value = d.get(l, None) | 
|  | 209         if not(value == None or isinstance(value, (int, float))): | 
|  | 210             sys.exit('Execution aborted: ' + value + ' value not valid\n') | 
|  | 211         return value | 
|  | 212 | 
|  | 213 def computes(val1, op, val2, cn): | 
|  | 214     if val1 != None and val2 != None: | 
|  | 215         if op == 'and': | 
|  | 216             return min(val1, val2) | 
|  | 217         else: | 
|  | 218             return val1 + val2 | 
|  | 219     elif op == 'and': | 
|  | 220         if cn is True: | 
|  | 221             if val1 != None: | 
|  | 222                 return val1 | 
|  | 223             elif val2 != None: | 
|  | 224                 return val2 | 
|  | 225             else: | 
|  | 226                 return None | 
|  | 227         else: | 
|  | 228             return None | 
|  | 229     else: | 
|  | 230         if val1 != None: | 
|  | 231             return val1 | 
|  | 232         elif val2 != None: | 
|  | 233             return val2 | 
|  | 234         else: | 
|  | 235             return None | 
|  | 236 | 
|  | 237 def control(ris, l, cn): | 
|  | 238     if len(l) == 1: | 
|  | 239         if isinstance(l[0], (float, int)) or l[0] == None: | 
|  | 240             return l[0] | 
|  | 241         elif isinstance(l[0], list): | 
|  | 242             return control(None, l[0], cn) | 
|  | 243         else: | 
|  | 244             return False | 
|  | 245     elif len(l) > 2: | 
|  | 246         return control_list(ris, l, cn) | 
|  | 247     else: | 
|  | 248         return False | 
|  | 249 | 
|  | 250 def control_list(ris, l, cn): | 
|  | 251     while l: | 
|  | 252         if len(l) == 1: | 
|  | 253             return False | 
|  | 254         elif (isinstance(l[0], (float, int)) or | 
|  | 255               l[0] == None) and l[1] in ['and', 'or']: | 
|  | 256             if isinstance(l[2], (float, int)) or l[2] == None: | 
|  | 257                 ris = computes(l[0], l[1], l[2], cn) | 
|  | 258             elif isinstance(l[2], list): | 
|  | 259                 tmp = control(None, l[2], cn) | 
|  | 260                 if tmp is False: | 
|  | 261                     return False | 
|  | 262                 else: | 
|  | 263                     ris = computes(l[0], l[1], tmp, cn) | 
|  | 264             else: | 
|  | 265                 return False | 
|  | 266             l = l[3:] | 
|  | 267         elif l[0] in ['and', 'or']: | 
|  | 268             if isinstance(l[1], (float, int)) or l[1] == None: | 
|  | 269                 ris = computes(ris, l[0], l[1], cn) | 
|  | 270             elif isinstance(l[1], list): | 
|  | 271                 tmp = control(None,l[1], cn) | 
|  | 272                 if tmp is False: | 
|  | 273                     return False | 
|  | 274                 else: | 
|  | 275                     ris = computes(ris, l[0], tmp, cn) | 
|  | 276             else: | 
|  | 277                 return False | 
|  | 278             l = l[2:] | 
|  | 279         elif isinstance(l[0], list) and l[1] in ['and', 'or']: | 
|  | 280             if isinstance(l[2], (float, int)) or l[2] == None: | 
|  | 281                 tmp = control(None, l[0], cn) | 
|  | 282                 if tmp is False: | 
|  | 283                     return False | 
|  | 284                 else: | 
|  | 285                     ris = computes(tmp, l[1], l[2], cn) | 
|  | 286             elif isinstance(l[2], list): | 
|  | 287                 tmp = control(None, l[0], cn) | 
|  | 288                 tmp2 = control(None, l[2], cn) | 
|  | 289                 if tmp is False or tmp2 is False: | 
|  | 290                     return False | 
|  | 291                 else: | 
|  | 292                     ris = computes(tmp, l[1], tmp2, cn) | 
|  | 293             else: | 
|  | 294                 return False | 
|  | 295             l = l[3:] | 
|  | 296         else: | 
|  | 297             return False | 
|  | 298     return ris | 
|  | 299 | 
|  | 300 ############################ map_methods ###################################### | 
|  | 301 | 
|  | 302 def fold_change(avg1, avg2): | 
|  | 303     if avg1 == 0 and avg2 == 0: | 
|  | 304         return 0 | 
|  | 305     elif avg1 == 0: | 
|  | 306         return '-INF' | 
|  | 307     elif avg2 == 0: | 
|  | 308         return 'INF' | 
|  | 309     else: | 
|  | 310         return math.log(avg1 / avg2, 2) | 
|  | 311 | 
|  | 312 def fix_style(l, col, width, dash): | 
|  | 313     tmp = l.split(';') | 
|  | 314     flag_col = False | 
|  | 315     flag_width = False | 
|  | 316     flag_dash = False | 
|  | 317     for i in range(len(tmp)): | 
|  | 318         if tmp[i].startswith('stroke:'): | 
|  | 319             tmp[i] = 'stroke:' + col | 
|  | 320             flag_col = True | 
|  | 321         if tmp[i].startswith('stroke-width:'): | 
|  | 322             tmp[i] = 'stroke-width:' + width | 
|  | 323             flag_width = True | 
|  | 324         if tmp[i].startswith('stroke-dasharray:'): | 
|  | 325             tmp[i] = 'stroke-dasharray:' + dash | 
|  | 326             flag_dash = True | 
|  | 327     if not flag_col: | 
|  | 328         tmp.append('stroke:' + col) | 
|  | 329     if not flag_width: | 
|  | 330         tmp.append('stroke-width:' + width) | 
|  | 331     if not flag_dash: | 
|  | 332         tmp.append('stroke-dasharray:' + dash) | 
|  | 333     return ';'.join(tmp) | 
|  | 334 | 
|  | 335 def fix_map(d, core_map, threshold_P_V, threshold_F_C, max_F_C): | 
|  | 336     maxT = 12 | 
|  | 337     minT = 2 | 
|  | 338     grey = '#BEBEBE' | 
|  | 339     blue = '#0000FF' | 
|  | 340     red = '#E41A1C' | 
|  | 341     for el in core_map.iter(): | 
|  | 342         el_id = str(el.get('id')) | 
|  | 343         if el_id.startswith('R_'): | 
|  | 344             tmp = d.get(el_id[2:]) | 
|  | 345             if tmp != None: | 
|  | 346                 p_val = tmp[0] | 
|  | 347                 f_c = tmp[1] | 
|  | 348                 if p_val < threshold_P_V: | 
|  | 349                     if not isinstance(f_c, str): | 
|  | 350                         if abs(f_c) < math.log(threshold_F_C, 2): | 
|  | 351                             col = grey | 
|  | 352                             width = str(minT) | 
|  | 353                         else: | 
|  | 354                             if f_c < 0: | 
|  | 355                                 col = blue | 
|  | 356                             elif f_c > 0: | 
|  | 357                                 col = red | 
|  | 358                             width = str(max((abs(f_c) * maxT) / max_F_C, minT)) | 
|  | 359                     else: | 
|  | 360                         if f_c == '-INF': | 
|  | 361                             col = blue | 
|  | 362                         elif f_c == 'INF': | 
|  | 363                             col = red | 
|  | 364                         width = str(maxT) | 
|  | 365                     dash = 'none' | 
|  | 366                 else: | 
|  | 367                     dash = '5,5' | 
|  | 368                     col = grey | 
|  | 369                     width = str(minT) | 
|  | 370                 el.set('style', fix_style(el.get('style'), col, width, dash)) | 
|  | 371     return core_map | 
|  | 372 | 
|  | 373 ############################ make recon ####################################### | 
|  | 374 | 
|  | 375 def check_and_doWord(l): | 
|  | 376     tmp = [] | 
|  | 377     tmp_genes = [] | 
|  | 378     count = 0 | 
|  | 379     while l: | 
|  | 380         if count >= 0: | 
|  | 381             if l[0] == '(': | 
|  | 382                 count += 1 | 
|  | 383                 tmp.append(l[0]) | 
|  | 384                 l.pop(0) | 
|  | 385             elif l[0] == ')': | 
|  | 386                 count -= 1 | 
|  | 387                 tmp.append(l[0]) | 
|  | 388                 l.pop(0) | 
|  | 389             elif l[0] == ' ': | 
|  | 390                 l.pop(0) | 
|  | 391             else: | 
|  | 392                 word = [] | 
|  | 393                 while l: | 
|  | 394                     if l[0] in [' ', '(', ')']: | 
|  | 395                         break | 
|  | 396                     else: | 
|  | 397                         word.append(l[0]) | 
|  | 398                         l.pop(0) | 
|  | 399                 word = ''.join(word) | 
|  | 400                 tmp.append(word) | 
|  | 401                 if not(word in ['or', 'and']): | 
|  | 402                     tmp_genes.append(word) | 
|  | 403         else: | 
|  | 404             return False | 
|  | 405     if count == 0: | 
|  | 406         return (tmp, tmp_genes) | 
|  | 407     else: | 
|  | 408         return False | 
|  | 409 | 
|  | 410 def brackets_to_list(l): | 
|  | 411     tmp = [] | 
|  | 412     while l: | 
|  | 413         if l[0] == '(': | 
|  | 414             l.pop(0) | 
|  | 415             tmp.append(resolve_brackets(l)) | 
|  | 416         else: | 
|  | 417             tmp.append(l[0]) | 
|  | 418             l.pop(0) | 
|  | 419     return tmp | 
|  | 420 | 
|  | 421 def resolve_brackets(l): | 
|  | 422     tmp = [] | 
|  | 423     while l[0] != ')': | 
|  | 424         if l[0] == '(': | 
|  | 425             l.pop(0) | 
|  | 426             tmp.append(resolve_brackets(l)) | 
|  | 427         else: | 
|  | 428             tmp.append(l[0]) | 
|  | 429             l.pop(0) | 
|  | 430     l.pop(0) | 
|  | 431     return tmp | 
|  | 432 | 
|  | 433 def priorityAND(l): | 
|  | 434     tmp = [] | 
|  | 435     flag = True | 
|  | 436     while l: | 
|  | 437         if len(l) == 1: | 
|  | 438             if isinstance(l[0], list): | 
|  | 439                 tmp.append(priorityAND(l[0])) | 
|  | 440             else: | 
|  | 441                 tmp.append(l[0]) | 
|  | 442             l = l[1:] | 
|  | 443         elif l[0] == 'or': | 
|  | 444             tmp.append(l[0]) | 
|  | 445             flag = False | 
|  | 446             l = l[1:] | 
|  | 447         elif l[1] == 'or': | 
|  | 448             if isinstance(l[0], list): | 
|  | 449                 tmp.append(priorityAND(l[0])) | 
|  | 450             else: | 
|  | 451                 tmp.append(l[0]) | 
|  | 452             tmp.append(l[1]) | 
|  | 453             flag = False | 
|  | 454             l = l[2:] | 
|  | 455         elif l[1] == 'and': | 
|  | 456             tmpAnd = [] | 
|  | 457             if isinstance(l[0], list): | 
|  | 458                 tmpAnd.append(priorityAND(l[0])) | 
|  | 459             else: | 
|  | 460                 tmpAnd.append(l[0]) | 
|  | 461             tmpAnd.append(l[1]) | 
|  | 462             if isinstance(l[2], list): | 
|  | 463                 tmpAnd.append(priorityAND(l[2])) | 
|  | 464             else: | 
|  | 465                 tmpAnd.append(l[2]) | 
|  | 466             l = l[3:] | 
|  | 467             while l: | 
|  | 468                 if l[0] == 'and': | 
|  | 469                     tmpAnd.append(l[0]) | 
|  | 470                     if isinstance(l[1], list): | 
|  | 471                         tmpAnd.append(priorityAND(l[1])) | 
|  | 472                     else: | 
|  | 473                         tmpAnd.append(l[1]) | 
|  | 474                     l = l[2:] | 
|  | 475                 elif l[0] == 'or': | 
|  | 476                     flag = False | 
|  | 477                     break | 
|  | 478             if flag == True: #when there are only AND in list | 
|  | 479                 tmp.extend(tmpAnd) | 
|  | 480             elif flag == False: | 
|  | 481                 tmp.append(tmpAnd) | 
|  | 482     return tmp | 
|  | 483 | 
|  | 484 def checkRule(l): | 
|  | 485     if len(l) == 1: | 
|  | 486         if isinstance(l[0], list): | 
|  | 487             if checkRule(l[0]) is False: | 
|  | 488                 return False | 
|  | 489     elif len(l) > 2: | 
|  | 490         if checkRule2(l) is False: | 
|  | 491             return False | 
|  | 492     else: | 
|  | 493         return False | 
|  | 494     return True | 
|  | 495 | 
|  | 496 def checkRule2(l): | 
|  | 497     while l: | 
|  | 498         if len(l) == 1: | 
|  | 499             return False | 
|  | 500         elif isinstance(l[0], list) and l[1] in ['and', 'or']: | 
|  | 501             if checkRule(l[0]) is False: | 
|  | 502                 return False | 
|  | 503             if isinstance(l[2], list): | 
|  | 504                 if checkRule(l[2]) is False: | 
|  | 505                     return False | 
|  | 506             l = l[3:] | 
|  | 507         elif l[1] in ['and', 'or']: | 
|  | 508             if isinstance(l[2], list): | 
|  | 509                 if checkRule(l[2]) is False: | 
|  | 510                     return False | 
|  | 511             l = l[3:] | 
|  | 512         elif l[0] in ['and', 'or']: | 
|  | 513             if isinstance(l[1], list): | 
|  | 514                 if checkRule(l[1]) is False: | 
|  | 515                     return False | 
|  | 516             l = l[2:] | 
|  | 517         else: | 
|  | 518             return False | 
|  | 519     return True | 
|  | 520 | 
|  | 521 def do_rules(rules): | 
|  | 522     split_rules = [] | 
|  | 523     err_rules = [] | 
|  | 524     tmp_gene_in_rule = [] | 
|  | 525     for i in range(len(rules)): | 
|  | 526         tmp = list(rules[i]) | 
|  | 527         if tmp: | 
|  | 528             tmp, tmp_genes = check_and_doWord(tmp) | 
|  | 529             tmp_gene_in_rule.extend(tmp_genes) | 
|  | 530             if tmp is False: | 
|  | 531                 split_rules.append([]) | 
|  | 532                 err_rules.append(rules[i]) | 
|  | 533             else: | 
|  | 534                 tmp = brackets_to_list(tmp) | 
|  | 535                 if checkRule(tmp): | 
|  | 536                     split_rules.append(priorityAND(tmp)) | 
|  | 537                 else: | 
|  | 538                     split_rules.append([]) | 
|  | 539                     err_rules.append(rules[i]) | 
|  | 540         else: | 
|  | 541             split_rules.append([]) | 
|  | 542     if err_rules: | 
|  | 543         warning('Warning: wrong format rule in ' + str(err_rules) + '\n') | 
|  | 544     return (split_rules, list(set(tmp_gene_in_rule))) | 
|  | 545 | 
|  | 546 def make_recon(data): | 
|  | 547     try: | 
|  | 548         import cobra as cb | 
|  | 549         import warnings | 
|  | 550         with warnings.catch_warnings(): | 
|  | 551             warnings.simplefilter('ignore') | 
|  | 552             recon = cb.io.read_sbml_model(data) | 
|  | 553         react = recon.reactions | 
|  | 554         rules = [react[i].gene_reaction_rule for i in range(len(react))] | 
|  | 555         ids = [react[i].id for i in range(len(react))] | 
|  | 556     except cb.io.sbml3.CobraSBMLError: | 
|  | 557         try: | 
|  | 558             data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('') | 
|  | 559             if len(data.columns) < 2: | 
|  | 560                 sys.exit('Execution aborted: wrong format of '+ | 
|  | 561                          'custom datarules\n') | 
|  | 562             if not len(data.columns) == 2: | 
|  | 563                 warning('Warning: more than 2 columns in custom datarules.\n' + | 
|  | 564                         'Extra columns have been disregarded\n') | 
|  | 565             ids = list(data.iloc[:, 0]) | 
|  | 566             rules = list(data.iloc[:, 1]) | 
|  | 567         except pd.errors.EmptyDataError: | 
|  | 568             sys.exit('Execution aborted: wrong format of custom datarules\n') | 
|  | 569         except pd.errors.ParserError: | 
|  | 570             sys.exit('Execution aborted: wrong format of custom datarules\n') | 
|  | 571     split_rules, tmp_genes = do_rules(rules) | 
|  | 572     gene_in_rule = {} | 
|  | 573     for i in tmp_genes: | 
|  | 574         gene_in_rule[i] = 'ok' | 
|  | 575     return (ids, split_rules, gene_in_rule) | 
|  | 576 | 
|  | 577 ############################ gene ############################################# | 
|  | 578 | 
|  | 579 def data_gene(gene, type_gene, name, gene_custom): | 
|  | 580     args = process_args(sys.argv) | 
|  | 581     for i in range(len(gene)): | 
|  | 582         tmp = gene.iloc[i, 0] | 
|  | 583         if tmp.startswith(' ') or tmp.endswith(' '): | 
|  | 584             gene.iloc[i, 0] = (tmp.lstrip()).rstrip() | 
|  | 585     gene_dup = [item for item, count in | 
|  | 586                collections.Counter(gene[gene.columns[0]]).items() if count > 1] | 
|  | 587     pat_dup = [item for item, count in | 
|  | 588                collections.Counter(list(gene.columns)).items() if count > 1] | 
|  | 589 | 
|  | 590     if gene_dup: | 
|  | 591         if gene_custom == None: | 
|  | 592             if args.rules_selector == 'HMRcore': | 
|  | 593                 gene_in_rule = pk.load(open(args.tool_dir + | 
|  | 594                                             '/local/HMRcore_genes.p', 'rb')) | 
|  | 595             elif args.rules_selector == 'Recon': | 
|  | 596                 gene_in_rule = pk.load(open(args.tool_dir + | 
|  | 597                                             '/local/Recon_genes.p', 'rb')) | 
|  | 598             gene_in_rule = gene_in_rule.get(type_gene) | 
|  | 599         else: | 
|  | 600             gene_in_rule = gene_custom | 
|  | 601         tmp = [] | 
|  | 602         for i in gene_dup: | 
|  | 603             if gene_in_rule.get(i) == 'ok': | 
|  | 604                 tmp.append(i) | 
|  | 605         if tmp: | 
|  | 606             sys.exit('Execution aborted because gene ID ' | 
|  | 607                      +str(tmp)+' in '+name+' is duplicated\n') | 
|  | 608     if pat_dup: | 
|  | 609         warning('Warning: duplicated label\n' + str(pat_dup) + 'in ' + name + | 
|  | 610                 '\n') | 
|  | 611 | 
|  | 612     return (gene.set_index(gene.columns[0])).to_dict() | 
|  | 613 | 
|  | 614 ############################ resolve ########################################## | 
|  | 615 | 
|  | 616 def resolve(genes, rules, ids, resolve_none, name): | 
|  | 617     resolve_rules = {} | 
|  | 618     names_array = [] | 
|  | 619     not_found = [] | 
|  | 620     flag = False | 
|  | 621     for key, value in genes.items(): | 
|  | 622         tmp_resolve = [] | 
|  | 623         for i in range(len(rules)): | 
|  | 624             tmp = rules[i] | 
|  | 625             if tmp: | 
|  | 626                 tmp, err = replace_gene_value(tmp, value) | 
|  | 627                 if err: | 
|  | 628                     not_found.extend(err) | 
|  | 629                 ris = control(None, tmp, resolve_none) | 
|  | 630                 if ris is False or ris == None: | 
|  | 631                     tmp_resolve.append(None) | 
|  | 632                 else: | 
|  | 633                     tmp_resolve.append(ris) | 
|  | 634                     flag = True | 
|  | 635             else: | 
|  | 636                 tmp_resolve.append(None) | 
|  | 637         resolve_rules[key] = tmp_resolve | 
|  | 638     if flag is False: | 
|  | 639         warning('Warning: no computable score (due to missing gene values)' + | 
|  | 640                 'for class ' + name + ', the class has been disregarded\n') | 
|  | 641         return (None, None) | 
|  | 642     return (resolve_rules, list(set(not_found))) | 
|  | 643 | 
|  | 644 ############################ split class ###################################### | 
|  | 645 | 
|  | 646 def split_class(classes, resolve_rules): | 
|  | 647     class_pat = {} | 
|  | 648     for i in range(len(classes)): | 
|  | 649         classe = classes.iloc[i, 1] | 
|  | 650         if not pd.isnull(classe): | 
|  | 651             l = [] | 
|  | 652             for j in range(i, len(classes)): | 
|  | 653                 if classes.iloc[j, 1] == classe: | 
|  | 654                     pat_id = classes.iloc[j, 0] | 
|  | 655                     tmp = resolve_rules.get(pat_id, None) | 
|  | 656                     if tmp != None: | 
|  | 657                         l.append(tmp) | 
|  | 658                     classes.iloc[j, 1] = None | 
|  | 659             if l: | 
|  | 660                 class_pat[classe] = list(map(list, zip(*l))) | 
|  | 661             else: | 
|  | 662                 warning('Warning: no sample found in class ' + classe + | 
|  | 663                         ', the class has been disregarded\n') | 
|  | 664     return class_pat | 
|  | 665 | 
|  | 666 ############################ create_ras ####################################### | 
|  | 667 | 
|  | 668 def create_ras (resolve_rules, dataset_name, single_ras, rules, ids): | 
|  | 669 | 
|  | 670     if resolve_rules == None: | 
|  | 671         warning("Couldn't generate RAS for current dataset: " + dataset_name) | 
|  | 672 | 
|  | 673     for geni in resolve_rules.values(): | 
|  | 674         for i, valori in enumerate(geni): | 
|  | 675             if valori == None: | 
|  | 676                 geni[i] = 'None' | 
|  | 677 | 
|  | 678     output_ras = pd.DataFrame.from_dict(resolve_rules) | 
|  | 679 | 
|  | 680     output_ras.insert(0, 'Reactions', ids) | 
|  | 681     output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False) | 
|  | 682 | 
|  | 683     if (single_ras): | 
|  | 684         args = process_args(sys.argv) | 
|  | 685         text_file = open(args.single_ras_file, "w") | 
|  | 686     else: | 
|  | 687         text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w") | 
|  | 688 | 
|  | 689     text_file.write(output_to_csv) | 
|  | 690     text_file.close() | 
|  | 691 | 
|  | 692 ############################ map ############################################## | 
|  | 693 | 
|  | 694 def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf): | 
|  | 695     args = process_args(sys.argv) | 
|  | 696     if (not class_pat) or (len(class_pat.keys()) < 2): | 
|  | 697         sys.exit('Execution aborted: classes provided for comparisons are ' + | 
|  | 698                  'less than two\n') | 
|  | 699     for i, j in it.combinations(class_pat.keys(), 2): | 
|  | 700         tmp = {} | 
|  | 701         count = 0 | 
|  | 702         max_F_C = 0 | 
|  | 703         for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): | 
|  | 704             try: | 
|  | 705                stat_D, p_value = st.ks_2samp(l1, l2) | 
|  | 706                avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) | 
|  | 707                if not isinstance(avg, str): | 
|  | 708                    if max_F_C < abs(avg): | 
|  | 709                        max_F_C = abs(avg) | 
|  | 710                tmp[ids[count]] = [float(p_value), avg] | 
|  | 711                count += 1 | 
|  | 712             except (TypeError, ZeroDivisionError): | 
|  | 713                count += 1 | 
|  | 714         tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' | 
|  | 715         tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") | 
|  | 716         tmp_csv = tmp_csv.reset_index() | 
|  | 717         header = ['ids', 'P_Value', 'Log2(fold change)'] | 
|  | 718         tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | 
|  | 719 | 
|  | 720         if create_svg or create_pdf: | 
|  | 721             if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' | 
|  | 722                                                     and args.yes_no == 'yes'): | 
|  | 723                 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | 
|  | 724                 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' | 
|  | 725                 with open(file_svg, 'wb') as new_map: | 
|  | 726                     new_map.write(ET.tostring(core_map)) | 
|  | 727 | 
|  | 728 | 
|  | 729                 if create_pdf: | 
|  | 730                     file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' | 
|  | 731                     renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | 
|  | 732 | 
|  | 733                 if not create_svg: | 
|  | 734                     #Ho utilizzato il file svg per generare il pdf, | 
|  | 735                     #ma l'utente non ne ha richiesto il ritorno, quindi | 
|  | 736                     #lo elimino | 
|  | 737                     os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') | 
|  | 738 | 
|  | 739     return None | 
|  | 740 | 
|  | 741 ############################ MAIN ############################################# | 
|  | 742 | 
|  | 743 def main(): | 
|  | 744     args = process_args(sys.argv) | 
|  | 745 | 
|  | 746     create_svg = check_bool(args.generate_svg) | 
|  | 747     create_pdf = check_bool(args.generate_pdf) | 
|  | 748     generate_ras = check_bool(args.generate_ras) | 
|  | 749 | 
|  | 750     os.makedirs('result') | 
|  | 751 | 
|  | 752     if generate_ras: | 
|  | 753         os.makedirs('ras') | 
|  | 754 | 
|  | 755     if args.rules_selector == 'HMRcore': | 
|  | 756         recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) | 
|  | 757     elif args.rules_selector == 'Recon': | 
|  | 758         recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) | 
|  | 759     elif args.rules_selector == 'Custom': | 
|  | 760         ids, rules, gene_in_rule = make_recon(args.custom) | 
|  | 761 | 
|  | 762     resolve_none = check_bool(args.none) | 
|  | 763 | 
|  | 764     class_pat = {} | 
|  | 765 | 
|  | 766     if args.option == 'datasets_rasonly': | 
|  | 767         name = "RAS Dataset" | 
|  | 768         dataset = read_dataset(args.input_datas[0],"dataset") | 
|  | 769 | 
|  | 770         dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) | 
|  | 771 | 
|  | 772         type_gene = gene_type(dataset.iloc[0, 0], name) | 
|  | 773 | 
|  | 774         if args.rules_selector != 'Custom': | 
|  | 775             genes = data_gene(dataset, type_gene, name, None) | 
|  | 776             ids, rules = load_id_rules(recon.get(type_gene)) | 
|  | 777         elif args.rules_selector == 'Custom': | 
|  | 778             genes = data_gene(dataset, type_gene, name, gene_in_rule) | 
|  | 779 | 
|  | 780         resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) | 
|  | 781 | 
|  | 782         create_ras(resolve_rules, name, True, rules, ids) | 
|  | 783 | 
|  | 784         if err != None and err: | 
|  | 785             warning('Warning: gene\n' + str(err) + '\nnot found in class ' | 
|  | 786                 + name + ', the expression level for this gene ' + | 
|  | 787                 'will be considered NaN\n') | 
|  | 788 | 
|  | 789         print('execution succeded') | 
|  | 790         return None | 
|  | 791 | 
|  | 792 | 
|  | 793     elif args.option == 'datasets': | 
|  | 794         num = 1 | 
|  | 795         for i, j in zip(args.input_datas, args.names): | 
|  | 796 | 
|  | 797             name = name_dataset(j, num) | 
|  | 798             dataset = read_dataset(i, name) | 
|  | 799 | 
|  | 800             dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) | 
|  | 801 | 
|  | 802             type_gene = gene_type(dataset.iloc[0, 0], name) | 
|  | 803 | 
|  | 804             if args.rules_selector != 'Custom': | 
|  | 805                 genes = data_gene(dataset, type_gene, name, None) | 
|  | 806                 ids, rules = load_id_rules(recon.get(type_gene)) | 
|  | 807             elif args.rules_selector == 'Custom': | 
|  | 808                 genes = data_gene(dataset, type_gene, name, gene_in_rule) | 
|  | 809 | 
|  | 810 | 
|  | 811             resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) | 
|  | 812 | 
|  | 813             if generate_ras: | 
|  | 814                 create_ras(resolve_rules, name, False, rules, ids) | 
|  | 815 | 
|  | 816             if err != None and err: | 
|  | 817                 warning('Warning: gene\n' + str(err) + '\nnot found in class ' | 
|  | 818                     + name + ', the expression level for this gene ' + | 
|  | 819                     'will be considered NaN\n') | 
|  | 820             if resolve_rules != None: | 
|  | 821                 class_pat[name] = list(map(list, zip(*resolve_rules.values()))) | 
|  | 822             num += 1 | 
|  | 823     elif args.option == 'dataset_class': | 
|  | 824         name = 'RNAseq' | 
|  | 825         dataset = read_dataset(args.input_data, name) | 
|  | 826         dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) | 
|  | 827         type_gene = gene_type(dataset.iloc[0, 0], name) | 
|  | 828         classes = read_dataset(args.input_class, 'class') | 
|  | 829         if not len(classes.columns) == 2: | 
|  | 830             warning('Warning: more than 2 columns in class file. Extra' + | 
|  | 831                     'columns have been disregarded\n') | 
|  | 832         classes = classes.astype(str) | 
|  | 833         if args.rules_selector != 'Custom': | 
|  | 834             genes = data_gene(dataset, type_gene, name, None) | 
|  | 835             ids, rules = load_id_rules(recon.get(type_gene)) | 
|  | 836         elif args.rules_selector == 'Custom': | 
|  | 837             genes = data_gene(dataset, type_gene, name, gene_in_rule) | 
|  | 838         resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) | 
|  | 839         if err != None and err: | 
|  | 840             warning('Warning: gene\n'+str(err)+'\nnot found in class ' | 
|  | 841                     + name + ', the expression level for this gene ' + | 
|  | 842                     'will be considered NaN\n') | 
|  | 843         if resolve_rules != None: | 
|  | 844             class_pat = split_class(classes, resolve_rules) | 
|  | 845             if generate_ras: | 
|  | 846                 create_ras(resolve_rules, name, False, rules, ids) | 
|  | 847 | 
|  | 848 | 
|  | 849     if args.rules_selector == 'Custom': | 
|  | 850         if args.yes_no == 'yes': | 
|  | 851             try: | 
|  | 852                 core_map = ET.parse(args.custom_map) | 
|  | 853             except (ET.XMLSyntaxError, ET.XMLSchemaParseError): | 
|  | 854                 sys.exit('Execution aborted: custom map in wrong format') | 
|  | 855         elif args.yes_no == 'no': | 
|  | 856             core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg') | 
|  | 857     else: | 
|  | 858         core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') | 
|  | 859 | 
|  | 860     maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf) | 
|  | 861 | 
|  | 862     print('Execution succeded') | 
|  | 863 | 
|  | 864     return None | 
|  | 865 | 
|  | 866 ############################################################################### | 
|  | 867 | 
|  | 868 if __name__ == "__main__": | 
|  | 869     main() |