| 
46
 | 
     1 from __future__ import division
 | 
| 
 | 
     2 import sys
 | 
| 
 | 
     3 import pandas as pd
 | 
| 
 | 
     4 import collections
 | 
| 
 | 
     5 import pickle as pk
 | 
| 
 | 
     6 import math
 | 
| 
 | 
     7 import argparse
 | 
| 
 | 
     8 
 | 
| 
 | 
     9 ########################## argparse ##########################################
 | 
| 
 | 
    10 
 | 
| 
 | 
    11 def process_args(args):
 | 
| 
 | 
    12     parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
 | 
| 
 | 
    13                                      description = 'process some value\'s'+
 | 
| 
 | 
    14                                      ' genes to create a comparison\'s map.')
 | 
| 
 | 
    15     parser.add_argument('-rs', '--rules_selector', 
 | 
| 
 | 
    16                         type = str,
 | 
| 
 | 
    17                         default = 'HMRcore',
 | 
| 
 | 
    18                         choices = ['HMRcore', 'Recon', 'Custom'], 
 | 
| 
 | 
    19                         help = 'chose which type of dataset you want use')
 | 
| 
 | 
    20     parser.add_argument('-cr', '--custom',
 | 
| 
 | 
    21                         type = str,
 | 
| 
 | 
    22                         help='your dataset if you want custom rules')
 | 
| 
 | 
    23     parser.add_argument('-n', '--none',
 | 
| 
 | 
    24                         type = str,
 | 
| 
 | 
    25                         default = 'true',
 | 
| 
 | 
    26                         choices = ['true', 'false'], 
 | 
| 
 | 
    27                         help = 'compute Nan values')
 | 
| 
 | 
    28     parser.add_argument('-td', '--tool_dir',
 | 
| 
 | 
    29                         type = str,
 | 
| 
 | 
    30                         required = True,
 | 
| 
 | 
    31                         help = 'your tool directory')
 | 
| 
 | 
    32     parser.add_argument('-ol', '--out_log', 
 | 
| 
 | 
    33                         help = "Output log")    
 | 
| 
47
 | 
    34     parser.add_argument('-id', '--input',
 | 
| 
46
 | 
    35                         type = str,
 | 
| 
 | 
    36                         help = 'input dataset')
 | 
| 
47
 | 
    37     parser.add_argument('-ra', '--ras_output',
 | 
| 
46
 | 
    38                         type = str,
 | 
| 
47
 | 
    39                         required = True,
 | 
| 
 | 
    40                         help = 'ras output')
 | 
| 
 | 
    41     
 | 
| 
46
 | 
    42     args = parser.parse_args()
 | 
| 
 | 
    43     return args
 | 
| 
 | 
    44 
 | 
| 
 | 
    45 ########################### warning ###########################################
 | 
| 
 | 
    46 
 | 
| 
 | 
    47 def warning(s):
 | 
| 
 | 
    48     args = process_args(sys.argv)
 | 
| 
 | 
    49     with open(args.out_log, 'a') as log:
 | 
| 
 | 
    50             log.write(s)
 | 
| 
 | 
    51             
 | 
| 
 | 
    52 ############################ dataset input ####################################
 | 
| 
 | 
    53 
 | 
| 
 | 
    54 def read_dataset(data, name):
 | 
| 
 | 
    55     try:
 | 
| 
 | 
    56         dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python')
 | 
| 
 | 
    57     except pd.errors.EmptyDataError:
 | 
| 
 | 
    58         sys.exit('Execution aborted: wrong format of ' + name + '\n')
 | 
| 
 | 
    59     if len(dataset.columns) < 2:
 | 
| 
 | 
    60         sys.exit('Execution aborted: wrong format of ' + name + '\n')
 | 
| 
 | 
    61     return dataset
 | 
| 
 | 
    62 
 | 
| 
 | 
    63 ############################ dataset name #####################################
 | 
| 
 | 
    64 
 | 
| 
 | 
    65 def name_dataset(name_data, count):
 | 
| 
 | 
    66     if str(name_data) == 'Dataset':
 | 
| 
 | 
    67         return str(name_data) + '_' + str(count)
 | 
| 
 | 
    68     else:
 | 
| 
 | 
    69         return str(name_data)
 | 
| 
 | 
    70     
 | 
| 
 | 
    71 ############################ load id e rules ##################################
 | 
| 
 | 
    72 
 | 
| 
 | 
    73 def load_id_rules(reactions):
 | 
| 
 | 
    74     ids, rules = [], []
 | 
| 
 | 
    75     for key, value in reactions.items():
 | 
| 
 | 
    76             ids.append(key)
 | 
| 
 | 
    77             rules.append(value)
 | 
| 
 | 
    78     return (ids, rules)
 | 
| 
 | 
    79 
 | 
| 
 | 
    80 ############################ check_methods ####################################
 | 
| 
 | 
    81 
 | 
| 
 | 
    82 def gene_type(l, name):
 | 
| 
 | 
    83     if check_hgnc(l):
 | 
| 
 | 
    84         return 'hugo_id'
 | 
| 
 | 
    85     elif check_ensembl(l):
 | 
| 
 | 
    86         return 'ensembl_gene_id'
 | 
| 
 | 
    87     elif check_symbol(l):
 | 
| 
 | 
    88         return 'symbol'
 | 
| 
 | 
    89     elif check_entrez(l):
 | 
| 
 | 
    90         return 'entrez_id'
 | 
| 
 | 
    91     else:
 | 
| 
 | 
    92         sys.exit('Execution aborted:\n' +
 | 
| 
 | 
    93                  'gene ID type in ' + name + ' not supported. Supported ID'+
 | 
| 
 | 
    94                  'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n')
 | 
| 
 | 
    95 
 | 
| 
 | 
    96 def check_hgnc(l):
 | 
| 
 | 
    97     if len(l) > 5:
 | 
| 
 | 
    98         if (l.upper()).startswith('HGNC:'):
 | 
| 
 | 
    99             return l[5:].isdigit()
 | 
| 
 | 
   100         else:
 | 
| 
 | 
   101             return False
 | 
| 
 | 
   102     else:
 | 
| 
 | 
   103         return False
 | 
| 
 | 
   104 
 | 
| 
 | 
   105 def check_ensembl(l): 
 | 
| 
 | 
   106     if len(l) == 15:
 | 
| 
 | 
   107         if (l.upper()).startswith('ENS'):
 | 
| 
 | 
   108             return l[4:].isdigit()
 | 
| 
 | 
   109         else:  
 | 
| 
 | 
   110             return False 
 | 
| 
 | 
   111     else: 
 | 
| 
 | 
   112         return False 
 | 
| 
 | 
   113 
 | 
| 
 | 
   114 def check_symbol(l):
 | 
| 
 | 
   115     if len(l) > 0:
 | 
| 
 | 
   116         if l[0].isalpha() and l[1:].isalnum():
 | 
| 
 | 
   117             return True
 | 
| 
 | 
   118         else:
 | 
| 
 | 
   119             return False
 | 
| 
 | 
   120     else:
 | 
| 
 | 
   121         return False
 | 
| 
 | 
   122 
 | 
| 
 | 
   123 def check_entrez(l): 
 | 
| 
 | 
   124     if len(l) > 0:
 | 
| 
 | 
   125         return l.isdigit()
 | 
| 
 | 
   126     else: 
 | 
| 
 | 
   127         return False
 | 
| 
 | 
   128 
 | 
| 
 | 
   129 def check_bool(b):
 | 
| 
 | 
   130     if b == 'true':
 | 
| 
 | 
   131         return True
 | 
| 
 | 
   132     elif b == 'false':
 | 
| 
 | 
   133         return False
 | 
| 
 | 
   134     
 | 
| 
 | 
   135 ############################ resolve_methods ##################################
 | 
| 
 | 
   136 
 | 
| 
 | 
   137 def replace_gene_value(l, d):
 | 
| 
 | 
   138     tmp = []
 | 
| 
 | 
   139     err = []
 | 
| 
 | 
   140     while l:
 | 
| 
 | 
   141         if isinstance(l[0], list):
 | 
| 
 | 
   142             tmp_rules, tmp_err = replace_gene_value(l[0], d)
 | 
| 
 | 
   143             tmp.append(tmp_rules)
 | 
| 
 | 
   144             err.extend(tmp_err)
 | 
| 
 | 
   145         else:
 | 
| 
 | 
   146             value = replace_gene(l[0], d)
 | 
| 
 | 
   147             tmp.append(value)
 | 
| 
 | 
   148             if value == None:
 | 
| 
 | 
   149                 err.append(l[0])
 | 
| 
 | 
   150         l = l[1:]
 | 
| 
 | 
   151     return (tmp, err)
 | 
| 
 | 
   152 
 | 
| 
 | 
   153 
 | 
| 
 | 
   154 def replace_gene(l, d):
 | 
| 
 | 
   155     if l =='and' or l == 'or':
 | 
| 
 | 
   156         return l
 | 
| 
 | 
   157     else:
 | 
| 
 | 
   158         value = d.get(l, None)
 | 
| 
 | 
   159         if not(value == None or isinstance(value, (int, float))):
 | 
| 
 | 
   160             sys.exit('Execution aborted: ' + value + ' value not valid\n')
 | 
| 
 | 
   161         return value
 | 
| 
 | 
   162 
 | 
| 
 | 
   163 def computes(val1, op, val2, cn):
 | 
| 
 | 
   164     if val1 != None and val2 != None:
 | 
| 
 | 
   165         if op == 'and':
 | 
| 
 | 
   166             return min(val1, val2)
 | 
| 
 | 
   167         else:
 | 
| 
 | 
   168             return val1 + val2
 | 
| 
 | 
   169     elif op == 'and':
 | 
| 
 | 
   170         if cn is True:
 | 
| 
 | 
   171             if val1 != None:
 | 
| 
 | 
   172                 return val1
 | 
| 
 | 
   173             elif val2 != None:
 | 
| 
 | 
   174                 return val2
 | 
| 
 | 
   175             else:
 | 
| 
 | 
   176                 return None
 | 
| 
 | 
   177         else:
 | 
| 
 | 
   178             return None
 | 
| 
 | 
   179     else:
 | 
| 
 | 
   180         if val1 != None:
 | 
| 
 | 
   181             return val1
 | 
| 
 | 
   182         elif val2 != None:
 | 
| 
 | 
   183             return val2
 | 
| 
 | 
   184         else:
 | 
| 
 | 
   185             return None
 | 
| 
 | 
   186 
 | 
| 
 | 
   187 def control(ris, l, cn):
 | 
| 
 | 
   188     if len(l) == 1:
 | 
| 
 | 
   189         if isinstance(l[0], (float, int)) or l[0] == None:
 | 
| 
 | 
   190             return l[0]
 | 
| 
 | 
   191         elif isinstance(l[0], list):
 | 
| 
 | 
   192             return control(None, l[0], cn)
 | 
| 
 | 
   193         else:
 | 
| 
 | 
   194             return False
 | 
| 
 | 
   195     elif len(l) > 2:
 | 
| 
 | 
   196         return control_list(ris, l, cn)
 | 
| 
 | 
   197     else:
 | 
| 
 | 
   198         return False
 | 
| 
 | 
   199 
 | 
| 
 | 
   200 def control_list(ris, l, cn):
 | 
| 
 | 
   201     while l:
 | 
| 
 | 
   202         if len(l) == 1:
 | 
| 
 | 
   203             return False
 | 
| 
 | 
   204         elif (isinstance(l[0], (float, int)) or
 | 
| 
 | 
   205               l[0] == None) and l[1] in ['and', 'or']:
 | 
| 
 | 
   206             if isinstance(l[2], (float, int)) or l[2] == None:
 | 
| 
 | 
   207                 ris = computes(l[0], l[1], l[2], cn)            
 | 
| 
 | 
   208             elif isinstance(l[2], list):
 | 
| 
 | 
   209                 tmp = control(None, l[2], cn)
 | 
| 
 | 
   210                 if tmp is False:
 | 
| 
 | 
   211                     return False
 | 
| 
 | 
   212                 else:
 | 
| 
 | 
   213                     ris = computes(l[0], l[1], tmp, cn)
 | 
| 
 | 
   214             else:
 | 
| 
 | 
   215                 return False
 | 
| 
 | 
   216             l = l[3:]
 | 
| 
 | 
   217         elif l[0] in ['and', 'or']:
 | 
| 
 | 
   218             if isinstance(l[1], (float, int)) or l[1] == None:
 | 
| 
 | 
   219                 ris = computes(ris, l[0], l[1], cn)
 | 
| 
 | 
   220             elif isinstance(l[1], list):
 | 
| 
 | 
   221                 tmp = control(None,l[1], cn)
 | 
| 
 | 
   222                 if tmp is False:
 | 
| 
 | 
   223                     return False
 | 
| 
 | 
   224                 else:
 | 
| 
 | 
   225                     ris = computes(ris, l[0], tmp, cn)
 | 
| 
 | 
   226             else:
 | 
| 
 | 
   227                 return False
 | 
| 
 | 
   228             l = l[2:]
 | 
| 
 | 
   229         elif isinstance(l[0], list) and l[1] in ['and', 'or']:
 | 
| 
 | 
   230             if isinstance(l[2], (float, int)) or l[2] == None:
 | 
| 
 | 
   231                 tmp = control(None, l[0], cn)
 | 
| 
 | 
   232                 if tmp is False:
 | 
| 
 | 
   233                     return False
 | 
| 
 | 
   234                 else:
 | 
| 
 | 
   235                     ris = computes(tmp, l[1], l[2], cn)
 | 
| 
 | 
   236             elif isinstance(l[2], list):
 | 
| 
 | 
   237                 tmp = control(None, l[0], cn)
 | 
| 
 | 
   238                 tmp2 = control(None, l[2], cn)
 | 
| 
 | 
   239                 if tmp is False or tmp2 is False:
 | 
| 
 | 
   240                     return False
 | 
| 
 | 
   241                 else:
 | 
| 
 | 
   242                     ris = computes(tmp, l[1], tmp2, cn)
 | 
| 
 | 
   243             else:
 | 
| 
 | 
   244                 return False
 | 
| 
 | 
   245             l = l[3:]
 | 
| 
 | 
   246         else:
 | 
| 
 | 
   247             return False
 | 
| 
 | 
   248     return ris
 | 
| 
 | 
   249 
 | 
| 
 | 
   250 ############################ make recon #######################################
 | 
| 
 | 
   251 
 | 
| 
 | 
   252 def check_and_doWord(l):
 | 
| 
 | 
   253     tmp = []
 | 
| 
 | 
   254     tmp_genes = []
 | 
| 
 | 
   255     count = 0
 | 
| 
 | 
   256     while l:
 | 
| 
 | 
   257         if count >= 0:
 | 
| 
 | 
   258             if l[0] == '(':
 | 
| 
 | 
   259                 count += 1
 | 
| 
 | 
   260                 tmp.append(l[0])
 | 
| 
 | 
   261                 l.pop(0)
 | 
| 
 | 
   262             elif l[0] == ')':
 | 
| 
 | 
   263                 count -= 1
 | 
| 
 | 
   264                 tmp.append(l[0])
 | 
| 
 | 
   265                 l.pop(0)
 | 
| 
 | 
   266             elif l[0] == ' ':
 | 
| 
 | 
   267                 l.pop(0)
 | 
| 
 | 
   268             else:
 | 
| 
 | 
   269                 word = []
 | 
| 
 | 
   270                 while l:
 | 
| 
 | 
   271                     if l[0] in [' ', '(', ')']:
 | 
| 
 | 
   272                         break
 | 
| 
 | 
   273                     else:
 | 
| 
 | 
   274                         word.append(l[0])
 | 
| 
 | 
   275                         l.pop(0)
 | 
| 
 | 
   276                 word = ''.join(word)
 | 
| 
 | 
   277                 tmp.append(word)
 | 
| 
 | 
   278                 if not(word in ['or', 'and']):
 | 
| 
 | 
   279                     tmp_genes.append(word)
 | 
| 
 | 
   280         else:
 | 
| 
 | 
   281             return False
 | 
| 
 | 
   282     if count == 0:
 | 
| 
 | 
   283         return (tmp, tmp_genes)
 | 
| 
 | 
   284     else:
 | 
| 
 | 
   285         return False
 | 
| 
 | 
   286 
 | 
| 
 | 
   287 def brackets_to_list(l):
 | 
| 
 | 
   288     tmp = []
 | 
| 
 | 
   289     while l:
 | 
| 
 | 
   290         if l[0] == '(':
 | 
| 
 | 
   291             l.pop(0)
 | 
| 
 | 
   292             tmp.append(resolve_brackets(l))
 | 
| 
 | 
   293         else:
 | 
| 
 | 
   294             tmp.append(l[0])
 | 
| 
 | 
   295             l.pop(0)
 | 
| 
 | 
   296     return tmp
 | 
| 
 | 
   297 
 | 
| 
 | 
   298 def resolve_brackets(l):
 | 
| 
 | 
   299     tmp = []
 | 
| 
 | 
   300     while l[0] != ')':
 | 
| 
 | 
   301         if l[0] == '(':
 | 
| 
 | 
   302             l.pop(0)
 | 
| 
 | 
   303             tmp.append(resolve_brackets(l))
 | 
| 
 | 
   304         else:
 | 
| 
 | 
   305             tmp.append(l[0])
 | 
| 
 | 
   306             l.pop(0)
 | 
| 
 | 
   307     l.pop(0)
 | 
| 
 | 
   308     return tmp
 | 
| 
 | 
   309 
 | 
| 
 | 
   310 def priorityAND(l):
 | 
| 
 | 
   311     tmp = []
 | 
| 
 | 
   312     flag = True
 | 
| 
 | 
   313     while l:
 | 
| 
 | 
   314         if len(l) == 1:
 | 
| 
 | 
   315             if isinstance(l[0], list):
 | 
| 
 | 
   316                 tmp.append(priorityAND(l[0]))
 | 
| 
 | 
   317             else:
 | 
| 
 | 
   318                 tmp.append(l[0])
 | 
| 
 | 
   319             l = l[1:]
 | 
| 
 | 
   320         elif l[0] == 'or':
 | 
| 
 | 
   321             tmp.append(l[0])
 | 
| 
 | 
   322             flag = False
 | 
| 
 | 
   323             l = l[1:]
 | 
| 
 | 
   324         elif l[1] == 'or':
 | 
| 
 | 
   325             if isinstance(l[0], list): 
 | 
| 
 | 
   326                 tmp.append(priorityAND(l[0]))
 | 
| 
 | 
   327             else:
 | 
| 
 | 
   328                 tmp.append(l[0])
 | 
| 
 | 
   329             tmp.append(l[1])
 | 
| 
 | 
   330             flag = False
 | 
| 
 | 
   331             l = l[2:]
 | 
| 
 | 
   332         elif l[1] == 'and':
 | 
| 
 | 
   333             tmpAnd = []
 | 
| 
 | 
   334             if isinstance(l[0], list): 
 | 
| 
 | 
   335                 tmpAnd.append(priorityAND(l[0]))
 | 
| 
 | 
   336             else:
 | 
| 
 | 
   337                 tmpAnd.append(l[0])
 | 
| 
 | 
   338             tmpAnd.append(l[1])
 | 
| 
 | 
   339             if isinstance(l[2], list): 
 | 
| 
 | 
   340                 tmpAnd.append(priorityAND(l[2]))
 | 
| 
 | 
   341             else:
 | 
| 
 | 
   342                 tmpAnd.append(l[2])
 | 
| 
 | 
   343             l = l[3:]
 | 
| 
 | 
   344             while l:
 | 
| 
 | 
   345                 if l[0] == 'and':
 | 
| 
 | 
   346                     tmpAnd.append(l[0])
 | 
| 
 | 
   347                     if isinstance(l[1], list): 
 | 
| 
 | 
   348                         tmpAnd.append(priorityAND(l[1]))
 | 
| 
 | 
   349                     else:
 | 
| 
 | 
   350                         tmpAnd.append(l[1])
 | 
| 
 | 
   351                     l = l[2:]
 | 
| 
 | 
   352                 elif l[0] == 'or':
 | 
| 
 | 
   353                     flag = False
 | 
| 
 | 
   354                     break
 | 
| 
 | 
   355             if flag == True: #when there are only AND in list
 | 
| 
 | 
   356                 tmp.extend(tmpAnd)
 | 
| 
 | 
   357             elif flag == False:
 | 
| 
 | 
   358                 tmp.append(tmpAnd)
 | 
| 
 | 
   359     return tmp
 | 
| 
 | 
   360 
 | 
| 
 | 
   361 def checkRule(l):
 | 
| 
 | 
   362     if len(l) == 1:
 | 
| 
 | 
   363         if isinstance(l[0], list):
 | 
| 
 | 
   364             if checkRule(l[0]) is False:
 | 
| 
 | 
   365                 return False
 | 
| 
 | 
   366     elif len(l) > 2:
 | 
| 
 | 
   367         if checkRule2(l) is False:
 | 
| 
 | 
   368             return False
 | 
| 
 | 
   369     else:
 | 
| 
 | 
   370         return False
 | 
| 
 | 
   371     return True
 | 
| 
 | 
   372 
 | 
| 
 | 
   373 def checkRule2(l):
 | 
| 
 | 
   374     while l:
 | 
| 
 | 
   375         if len(l) == 1:
 | 
| 
 | 
   376             return False
 | 
| 
 | 
   377         elif isinstance(l[0], list) and l[1] in ['and', 'or']:
 | 
| 
 | 
   378             if checkRule(l[0]) is False:
 | 
| 
 | 
   379                 return False
 | 
| 
 | 
   380             if isinstance(l[2], list):
 | 
| 
 | 
   381                 if checkRule(l[2]) is False:
 | 
| 
 | 
   382                     return False
 | 
| 
 | 
   383             l = l[3:]
 | 
| 
 | 
   384         elif l[1] in ['and', 'or']:
 | 
| 
 | 
   385             if isinstance(l[2], list):
 | 
| 
 | 
   386                 if checkRule(l[2]) is False:
 | 
| 
 | 
   387                     return False
 | 
| 
 | 
   388             l = l[3:]
 | 
| 
 | 
   389         elif l[0] in ['and', 'or']:
 | 
| 
 | 
   390             if isinstance(l[1], list):
 | 
| 
 | 
   391                 if checkRule(l[1]) is False:
 | 
| 
 | 
   392                     return False
 | 
| 
 | 
   393             l = l[2:]
 | 
| 
 | 
   394         else:
 | 
| 
 | 
   395             return False
 | 
| 
 | 
   396     return True
 | 
| 
 | 
   397 
 | 
| 
 | 
   398 def do_rules(rules):
 | 
| 
 | 
   399     split_rules = []
 | 
| 
 | 
   400     err_rules = []
 | 
| 
 | 
   401     tmp_gene_in_rule = []
 | 
| 
 | 
   402     for i in range(len(rules)):
 | 
| 
 | 
   403         tmp = list(rules[i])
 | 
| 
 | 
   404         if tmp:
 | 
| 
 | 
   405             tmp, tmp_genes = check_and_doWord(tmp)
 | 
| 
 | 
   406             tmp_gene_in_rule.extend(tmp_genes)
 | 
| 
 | 
   407             if tmp is False:
 | 
| 
 | 
   408                 split_rules.append([])
 | 
| 
 | 
   409                 err_rules.append(rules[i])
 | 
| 
 | 
   410             else:
 | 
| 
 | 
   411                 tmp = brackets_to_list(tmp)
 | 
| 
 | 
   412                 if checkRule(tmp):
 | 
| 
 | 
   413                     split_rules.append(priorityAND(tmp))
 | 
| 
 | 
   414                 else:
 | 
| 
 | 
   415                     split_rules.append([])
 | 
| 
 | 
   416                     err_rules.append(rules[i])
 | 
| 
 | 
   417         else:
 | 
| 
 | 
   418             split_rules.append([])
 | 
| 
 | 
   419     if err_rules:
 | 
| 
 | 
   420         warning('Warning: wrong format rule in ' + str(err_rules) + '\n')
 | 
| 
 | 
   421     return (split_rules, list(set(tmp_gene_in_rule)))
 | 
| 
 | 
   422 
 | 
| 
 | 
   423 def make_recon(data):
 | 
| 
 | 
   424     try:
 | 
| 
 | 
   425         import cobra as cb
 | 
| 
 | 
   426         import warnings
 | 
| 
 | 
   427         with warnings.catch_warnings():
 | 
| 
 | 
   428             warnings.simplefilter('ignore')
 | 
| 
 | 
   429             recon = cb.io.read_sbml_model(data)
 | 
| 
 | 
   430         react = recon.reactions
 | 
| 
 | 
   431         rules = [react[i].gene_reaction_rule for i in range(len(react))]
 | 
| 
 | 
   432         ids = [react[i].id for i in range(len(react))]
 | 
| 
 | 
   433     except cb.io.sbml3.CobraSBMLError:
 | 
| 
 | 
   434         try:
 | 
| 
 | 
   435             data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('')
 | 
| 
 | 
   436             if len(data.columns) < 2:
 | 
| 
 | 
   437                 sys.exit('Execution aborted: wrong format of '+
 | 
| 
 | 
   438                          'custom datarules\n')
 | 
| 
 | 
   439             if not len(data.columns) == 2:
 | 
| 
 | 
   440                 warning('Warning: more than 2 columns in custom datarules.\n' +
 | 
| 
 | 
   441                         'Extra columns have been disregarded\n')
 | 
| 
 | 
   442             ids = list(data.iloc[:, 0])
 | 
| 
 | 
   443             rules = list(data.iloc[:, 1])
 | 
| 
 | 
   444         except pd.errors.EmptyDataError:
 | 
| 
 | 
   445             sys.exit('Execution aborted: wrong format of custom datarules\n')
 | 
| 
 | 
   446         except pd.errors.ParserError:
 | 
| 
 | 
   447             sys.exit('Execution aborted: wrong format of custom datarules\n')            
 | 
| 
 | 
   448     split_rules, tmp_genes = do_rules(rules)
 | 
| 
 | 
   449     gene_in_rule = {}
 | 
| 
 | 
   450     for i in tmp_genes:
 | 
| 
 | 
   451         gene_in_rule[i] = 'ok'
 | 
| 
 | 
   452     return (ids, split_rules, gene_in_rule)
 | 
| 
 | 
   453 
 | 
| 
 | 
   454 ############################ gene #############################################
 | 
| 
 | 
   455 
 | 
| 
 | 
   456 def data_gene(gene, type_gene, name, gene_custom):
 | 
| 
 | 
   457     args = process_args(sys.argv)    
 | 
| 
 | 
   458     for i in range(len(gene)):
 | 
| 
 | 
   459         tmp = gene.iloc[i, 0]
 | 
| 
 | 
   460         if tmp.startswith(' ') or tmp.endswith(' '):
 | 
| 
 | 
   461             gene.iloc[i, 0] = (tmp.lstrip()).rstrip()
 | 
| 
 | 
   462     gene_dup = [item for item, count in 
 | 
| 
 | 
   463                collections.Counter(gene[gene.columns[0]]).items() if count > 1]
 | 
| 
 | 
   464     pat_dup = [item for item, count in 
 | 
| 
 | 
   465                collections.Counter(list(gene.columns)).items() if count > 1]
 | 
| 
 | 
   466 
 | 
| 
 | 
   467     if gene_dup:
 | 
| 
 | 
   468         if gene_custom == None:
 | 
| 
 | 
   469             if args.rules_selector == 'HMRcore':
 | 
| 
 | 
   470                 gene_in_rule = pk.load(open(args.tool_dir +
 | 
| 
 | 
   471                                             '/local/HMRcore_genes.p', 'rb'))
 | 
| 
 | 
   472             elif args.rules_selector == 'Recon':
 | 
| 
 | 
   473                 gene_in_rule = pk.load(open(args.tool_dir +
 | 
| 
 | 
   474                                             '/local/Recon_genes.p', 'rb'))
 | 
| 
 | 
   475             gene_in_rule = gene_in_rule.get(type_gene)
 | 
| 
 | 
   476         else:
 | 
| 
 | 
   477             gene_in_rule = gene_custom
 | 
| 
 | 
   478         tmp = []
 | 
| 
 | 
   479         for i in gene_dup:
 | 
| 
 | 
   480             if gene_in_rule.get(i) == 'ok':
 | 
| 
 | 
   481                 tmp.append(i)
 | 
| 
 | 
   482         if tmp:
 | 
| 
 | 
   483             sys.exit('Execution aborted because gene ID '
 | 
| 
 | 
   484                      +str(tmp)+' in '+name+' is duplicated\n')
 | 
| 
 | 
   485     if pat_dup:
 | 
| 
 | 
   486         warning('Warning: duplicated label\n' + str(pat_dup) + 'in ' + name + 
 | 
| 
 | 
   487                 '\n')
 | 
| 
 | 
   488         
 | 
| 
 | 
   489     return (gene.set_index(gene.columns[0])).to_dict()
 | 
| 
 | 
   490 
 | 
| 
 | 
   491 ############################ resolve ##########################################
 | 
| 
 | 
   492 
 | 
| 
 | 
   493 def resolve(genes, rules, ids, resolve_none, name):
 | 
| 
 | 
   494     resolve_rules = {}
 | 
| 
 | 
   495     not_found = []
 | 
| 
 | 
   496     flag = False
 | 
| 
 | 
   497     for key, value in genes.items():
 | 
| 
 | 
   498         tmp_resolve = []
 | 
| 
 | 
   499         for i in range(len(rules)):
 | 
| 
 | 
   500             tmp = rules[i]
 | 
| 
 | 
   501             if tmp:
 | 
| 
 | 
   502                 tmp, err = replace_gene_value(tmp, value)
 | 
| 
 | 
   503                 if err:
 | 
| 
 | 
   504                     not_found.extend(err)
 | 
| 
 | 
   505                 ris = control(None, tmp, resolve_none)
 | 
| 
 | 
   506                 if ris is False or ris == None:
 | 
| 
 | 
   507                     tmp_resolve.append(None)
 | 
| 
 | 
   508                 else:
 | 
| 
 | 
   509                     tmp_resolve.append(ris)
 | 
| 
 | 
   510                     flag = True
 | 
| 
 | 
   511             else:
 | 
| 
 | 
   512                 tmp_resolve.append(None)    
 | 
| 
 | 
   513         resolve_rules[key] = tmp_resolve
 | 
| 
 | 
   514     if flag is False:
 | 
| 
 | 
   515         warning('Warning: no computable score (due to missing gene values)' +
 | 
| 
 | 
   516                 'for class ' + name + ', the class has been disregarded\n')
 | 
| 
 | 
   517         return (None, None)
 | 
| 
 | 
   518     return (resolve_rules, list(set(not_found)))
 | 
| 
 | 
   519 
 | 
| 
 | 
   520 ############################ split class ######################################
 | 
| 
 | 
   521 
 | 
| 
 | 
   522 def split_class(classes, resolve_rules):
 | 
| 
 | 
   523     class_pat = {}
 | 
| 
 | 
   524     for i in range(len(classes)):
 | 
| 
 | 
   525         classe = classes.iloc[i, 1]
 | 
| 
 | 
   526         if not pd.isnull(classe):
 | 
| 
 | 
   527             l = []
 | 
| 
 | 
   528             for j in range(i, len(classes)):
 | 
| 
 | 
   529                 if classes.iloc[j, 1] == classe:
 | 
| 
 | 
   530                     pat_id = classes.iloc[j, 0]
 | 
| 
 | 
   531                     if tmp != None:
 | 
| 
 | 
   532                         l.append(tmp)
 | 
| 
 | 
   533                     classes.iloc[j, 1] = None
 | 
| 
 | 
   534             if l:
 | 
| 
 | 
   535                 class_pat[classe] = list(map(list, zip(*l)))
 | 
| 
 | 
   536             else:
 | 
| 
 | 
   537                 warning('Warning: no sample found in class ' + classe +
 | 
| 
 | 
   538                         ', the class has been disregarded\n')
 | 
| 
 | 
   539     return class_pat
 | 
| 
 | 
   540 
 | 
| 
 | 
   541 ############################ create_ras #######################################
 | 
| 
 | 
   542 
 | 
| 
47
 | 
   543 def create_ras (resolve_rules, dataset_name, rules, ids, file):
 | 
| 
46
 | 
   544 
 | 
| 
 | 
   545     if resolve_rules == None:
 | 
| 
 | 
   546         warning("Couldn't generate RAS for current dataset: " + dataset_name)
 | 
| 
 | 
   547 
 | 
| 
 | 
   548     for geni in resolve_rules.values():
 | 
| 
 | 
   549         for i, valori in enumerate(geni):
 | 
| 
 | 
   550             if valori == None:
 | 
| 
 | 
   551                 geni[i] = 'None'
 | 
| 
 | 
   552                 
 | 
| 
 | 
   553     output_ras = pd.DataFrame.from_dict(resolve_rules)
 | 
| 
 | 
   554     
 | 
| 
 | 
   555     output_ras.insert(0, 'Reactions', ids)
 | 
| 
 | 
   556     output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False)
 | 
| 
 | 
   557     
 | 
| 
47
 | 
   558     text_file = open(file, "w")
 | 
| 
46
 | 
   559     
 | 
| 
 | 
   560     text_file.write(output_to_csv)
 | 
| 
 | 
   561     text_file.close()
 | 
| 
 | 
   562 
 | 
| 
 | 
   563 ############################ MAIN #############################################
 | 
| 
 | 
   564 
 | 
| 
 | 
   565 def main():
 | 
| 
 | 
   566     args = process_args(sys.argv)
 | 
| 
 | 
   567 
 | 
| 
 | 
   568     if args.rules_selector == 'HMRcore':        
 | 
| 
 | 
   569         recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb'))
 | 
| 
 | 
   570     elif args.rules_selector == 'Recon':
 | 
| 
 | 
   571         recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb'))
 | 
| 
 | 
   572     elif args.rules_selector == 'Custom':
 | 
| 
 | 
   573         ids, rules, gene_in_rule = make_recon(args.custom)
 | 
| 
 | 
   574         
 | 
| 
 | 
   575     resolve_none = check_bool(args.none)
 | 
| 
 | 
   576     
 | 
| 
 | 
   577     
 | 
| 
47
 | 
   578     name = "RAS Dataset"
 | 
| 
 | 
   579     dataset = read_dataset(args.input, "dataset")
 | 
| 
46
 | 
   580 
 | 
| 
47
 | 
   581     dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
 | 
| 
46
 | 
   582 
 | 
| 
47
 | 
   583     type_gene = gene_type(dataset.iloc[0, 0], name) 
 | 
| 
 | 
   584         
 | 
| 
 | 
   585     if args.rules_selector != 'Custom':
 | 
| 
 | 
   586         genes = data_gene(dataset, type_gene, name, None)
 | 
| 
 | 
   587         ids, rules = load_id_rules(recon.get(type_gene))
 | 
| 
 | 
   588     elif args.rules_selector == 'Custom':
 | 
| 
 | 
   589         genes = data_gene(dataset, type_gene, name, gene_in_rule)
 | 
| 
46
 | 
   590     
 | 
| 
47
 | 
   591     resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
 | 
| 
 | 
   592 
 | 
| 
 | 
   593     create_ras(resolve_rules, name, rules, ids, args.ras_output)
 | 
| 
 | 
   594       
 | 
| 
 | 
   595     if err != None and err:
 | 
| 
 | 
   596         warning('Warning: gene\n' + str(err) + '\nnot found in class '
 | 
| 
 | 
   597             + name + ', the expression level for this gene ' +
 | 
| 
 | 
   598             'will be considered NaN\n')
 | 
| 
 | 
   599 
 | 
| 
 | 
   600     
 | 
| 
46
 | 
   601     print('Execution succeded')
 | 
| 
 | 
   602 
 | 
| 
 | 
   603     return None
 | 
| 
 | 
   604 
 | 
| 
 | 
   605 ###############################################################################
 | 
| 
 | 
   606 
 | 
| 
 | 
   607 if __name__ == "__main__":
 | 
| 
 | 
   608     main()
 |