| 
0
 | 
     1 
 | 
| 
 | 
     2 from __future__ import division
 | 
| 
 | 
     3 import os
 | 
| 
 | 
     4 import sys
 | 
| 
 | 
     5 import pandas as pd
 | 
| 
 | 
     6 import collections
 | 
| 
 | 
     7 import pickle as pk
 | 
| 
 | 
     8 import argparse
 | 
| 
 | 
     9 from sklearn.cluster import KMeans
 | 
| 
 | 
    10 import matplotlib.pyplot as plt
 | 
| 
 | 
    11 
 | 
| 
 | 
    12 ########################## argparse ###########################################
 | 
| 
 | 
    13 
 | 
| 
 | 
    14 def process_args(args):
 | 
| 
 | 
    15     parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
 | 
| 
 | 
    16                                      description = 'process some value\'s' +
 | 
| 
 | 
    17                                      ' genes to create class.')
 | 
| 
 | 
    18     parser.add_argument('-rs', '--rules_selector', 
 | 
| 
 | 
    19                         type = str,
 | 
| 
 | 
    20                         default = 'HMRcore',
 | 
| 
 | 
    21                         choices = ['HMRcore', 'Recon', 'Custom'], 
 | 
| 
 | 
    22                         help = 'chose which type of dataset you want use')
 | 
| 
 | 
    23     parser.add_argument('-cr', '--custom',
 | 
| 
 | 
    24                         type = str,
 | 
| 
 | 
    25                         help='your dataset if you want custom rules')
 | 
| 
 | 
    26     parser.add_argument('-ch', '--cond_hier', 
 | 
| 
 | 
    27                         type = str,
 | 
| 
 | 
    28                         default = 'no',
 | 
| 
 | 
    29                         choices = ['no', 'yes'], 
 | 
| 
 | 
    30                         help = 'chose if you wanna hierical dendrogram')
 | 
| 
 | 
    31     parser.add_argument('-lk', '--k_min', 
 | 
| 
 | 
    32                         type = int,
 | 
| 
 | 
    33                         help = 'min number of cluster')
 | 
| 
 | 
    34     parser.add_argument('-uk', '--k_max', 
 | 
| 
 | 
    35                         type = int,
 | 
| 
 | 
    36                         help = 'max number of cluster')
 | 
| 
 | 
    37     parser.add_argument('-li', '--linkage', 
 | 
| 
 | 
    38                         type = str, 
 | 
| 
 | 
    39                         choices = ['single', 'complete', 'average'], 
 | 
| 
 | 
    40                         help='linkage hierarchical cluster')
 | 
| 
 | 
    41     parser.add_argument('-d', '--data',
 | 
| 
 | 
    42                         type = str,
 | 
| 
 | 
    43                         required = True,
 | 
| 
 | 
    44                         help = 'input dataset')
 | 
| 
 | 
    45     parser.add_argument('-n', '--none',
 | 
| 
 | 
    46                         type = str,
 | 
| 
 | 
    47                         default = 'true',
 | 
| 
 | 
    48                         choices = ['true', 'false'], 
 | 
| 
 | 
    49                         help = 'compute Nan values')
 | 
| 
 | 
    50     parser.add_argument('-td', '--tool_dir',
 | 
| 
 | 
    51                         type = str,
 | 
| 
 | 
    52                         required = True,
 | 
| 
 | 
    53                         help = 'your tool directory')
 | 
| 
 | 
    54     parser.add_argument('-na', '--name',
 | 
| 
 | 
    55                         type = str,
 | 
| 
 | 
    56                         help = 'name of dataset')
 | 
| 
 | 
    57     parser.add_argument('-de', '--dendro', 
 | 
| 
 | 
    58                         help = "Dendrogram out")
 | 
| 
 | 
    59     parser.add_argument('-ol', '--out_log', 
 | 
| 
 | 
    60                         help = "Output log")
 | 
| 
 | 
    61     parser.add_argument('-el', '--elbow', 
 | 
| 
 | 
    62                         help = "Out elbow")
 | 
| 
 | 
    63     args = parser.parse_args()
 | 
| 
 | 
    64     return args
 | 
| 
 | 
    65 
 | 
| 
 | 
    66 ########################### warning ###########################################
 | 
| 
 | 
    67 
 | 
| 
 | 
    68 def warning(s):
 | 
| 
 | 
    69     args = process_args(sys.argv)
 | 
| 
 | 
    70     with open(args.out_log, 'a') as log:
 | 
| 
 | 
    71             log.write(s)
 | 
| 
 | 
    72             
 | 
| 
 | 
    73 ############################ dataset input ####################################
 | 
| 
 | 
    74 
 | 
| 
 | 
    75 def read_dataset(data, name):
 | 
| 
 | 
    76     try:
 | 
| 
 | 
    77         dataset = pd.read_csv(data, sep = '\t', header = 0)
 | 
| 
 | 
    78     except pd.errors.EmptyDataError:
 | 
| 
 | 
    79         sys.exit('Execution aborted: wrong format of '+name+'\n')
 | 
| 
 | 
    80     if len(dataset.columns) < 2:
 | 
| 
 | 
    81         sys.exit('Execution aborted: wrong format of '+name+'\n')
 | 
| 
 | 
    82     return dataset
 | 
| 
 | 
    83 
 | 
| 
 | 
    84 ############################ dataset name #####################################
 | 
| 
 | 
    85 
 | 
| 
 | 
    86 def name_dataset(name_data, count):
 | 
| 
 | 
    87     if str(name_data) == 'Dataset':
 | 
| 
 | 
    88         return str(name_data) + '_' + str(count)
 | 
| 
 | 
    89     else:
 | 
| 
 | 
    90         return str(name_data)
 | 
| 
 | 
    91     
 | 
| 
 | 
    92 ############################ load id e rules ##################################
 | 
| 
 | 
    93 
 | 
| 
 | 
    94 def load_id_rules(reactions):
 | 
| 
 | 
    95     ids, rules = [], []
 | 
| 
 | 
    96     for key, value in reactions.items():
 | 
| 
 | 
    97             ids.append(key)
 | 
| 
 | 
    98             rules.append(value)
 | 
| 
 | 
    99     return (ids, rules)
 | 
| 
 | 
   100 
 | 
| 
 | 
   101 ############################ check_methods ####################################
 | 
| 
 | 
   102 
 | 
| 
 | 
   103 def gene_type(l, name):
 | 
| 
 | 
   104     if check_hgnc(l):
 | 
| 
 | 
   105         return 'hugo_id'
 | 
| 
 | 
   106     elif check_ensembl(l):
 | 
| 
 | 
   107         return 'ensembl_gene_id'
 | 
| 
 | 
   108     elif check_symbol(l):
 | 
| 
 | 
   109         return 'symbol'
 | 
| 
 | 
   110     elif check_entrez(l):
 | 
| 
 | 
   111         return 'entrez_id'
 | 
| 
 | 
   112     else:
 | 
| 
 | 
   113         sys.exit('Execution aborted:\n' +
 | 
| 
 | 
   114                  'gene ID type in ' + name + ' not supported. Supported ID' +
 | 
| 
 | 
   115                  'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n')
 | 
| 
 | 
   116 
 | 
| 
 | 
   117 def check_hgnc(l):
 | 
| 
 | 
   118     if len(l) > 5:
 | 
| 
 | 
   119         if (l.upper()).startswith('HGNC:'):
 | 
| 
 | 
   120             return l[5:].isdigit()
 | 
| 
 | 
   121         else:
 | 
| 
 | 
   122             return False
 | 
| 
 | 
   123     else:
 | 
| 
 | 
   124         return False
 | 
| 
 | 
   125 
 | 
| 
 | 
   126 def check_ensembl(l): 
 | 
| 
 | 
   127     if len(l) == 15:
 | 
| 
 | 
   128         if (l.upper()).startswith('ENS'):
 | 
| 
 | 
   129             return l[4:].isdigit()
 | 
| 
 | 
   130         else:  
 | 
| 
 | 
   131             return False 
 | 
| 
 | 
   132     else: 
 | 
| 
 | 
   133         return False 
 | 
| 
 | 
   134 
 | 
| 
 | 
   135 def check_symbol(l):
 | 
| 
 | 
   136     if len(l) > 0:
 | 
| 
 | 
   137         if l[0].isalpha() and l[1:].isalnum():
 | 
| 
 | 
   138             return True
 | 
| 
 | 
   139         else:
 | 
| 
 | 
   140             return False
 | 
| 
 | 
   141     else:
 | 
| 
 | 
   142         return False
 | 
| 
 | 
   143 
 | 
| 
 | 
   144 def check_entrez(l): 
 | 
| 
 | 
   145     if len(l) > 0:
 | 
| 
 | 
   146         return l.isdigit()
 | 
| 
 | 
   147     else: 
 | 
| 
 | 
   148         return False
 | 
| 
 | 
   149 
 | 
| 
 | 
   150 def check_bool(b):
 | 
| 
 | 
   151     if b == 'true':
 | 
| 
 | 
   152         return True
 | 
| 
 | 
   153     elif b == 'false':
 | 
| 
 | 
   154         return False
 | 
| 
 | 
   155     
 | 
| 
 | 
   156 ############################ make recon #######################################
 | 
| 
 | 
   157 
 | 
| 
 | 
   158 def check_and_doWord(l):
 | 
| 
 | 
   159     tmp = []
 | 
| 
 | 
   160     tmp_genes = []
 | 
| 
 | 
   161     count = 0
 | 
| 
 | 
   162     while l:
 | 
| 
 | 
   163         if count >= 0:
 | 
| 
 | 
   164             if l[0] == '(':
 | 
| 
 | 
   165                 count += 1
 | 
| 
 | 
   166                 tmp.append(l[0])
 | 
| 
 | 
   167                 l.pop(0)
 | 
| 
 | 
   168             elif l[0] == ')':
 | 
| 
 | 
   169                 count -= 1
 | 
| 
 | 
   170                 tmp.append(l[0])
 | 
| 
 | 
   171                 l.pop(0)
 | 
| 
 | 
   172             elif l[0] == ' ':
 | 
| 
 | 
   173                 l.pop(0)
 | 
| 
 | 
   174             else:
 | 
| 
 | 
   175                 word = []
 | 
| 
 | 
   176                 while l:
 | 
| 
 | 
   177                     if l[0] in [' ', '(', ')']:
 | 
| 
 | 
   178                         break
 | 
| 
 | 
   179                     else:
 | 
| 
 | 
   180                         word.append(l[0])
 | 
| 
 | 
   181                         l.pop(0)
 | 
| 
 | 
   182                 word = ''.join(word)
 | 
| 
 | 
   183                 tmp.append(word)
 | 
| 
 | 
   184                 if not(word in ['or', 'and']):
 | 
| 
 | 
   185                     tmp_genes.append(word)
 | 
| 
 | 
   186         else:
 | 
| 
 | 
   187             return False
 | 
| 
 | 
   188     if count == 0:
 | 
| 
 | 
   189         return (tmp, tmp_genes)
 | 
| 
 | 
   190     else:
 | 
| 
 | 
   191         return False
 | 
| 
 | 
   192 
 | 
| 
 | 
   193 def brackets_to_list(l):
 | 
| 
 | 
   194     tmp = []
 | 
| 
 | 
   195     while l:
 | 
| 
 | 
   196         if l[0] == '(':
 | 
| 
 | 
   197             l.pop(0)
 | 
| 
 | 
   198             tmp.append(resolve_brackets(l))
 | 
| 
 | 
   199         else:
 | 
| 
 | 
   200             tmp.append(l[0])
 | 
| 
 | 
   201             l.pop(0)
 | 
| 
 | 
   202     return tmp
 | 
| 
 | 
   203 
 | 
| 
 | 
   204 def resolve_brackets(l):
 | 
| 
 | 
   205     tmp = []
 | 
| 
 | 
   206     while l[0] != ')':
 | 
| 
 | 
   207         if l[0] == '(':
 | 
| 
 | 
   208             l.pop(0)
 | 
| 
 | 
   209             tmp.append(resolve_brackets(l))
 | 
| 
 | 
   210         else:
 | 
| 
 | 
   211             tmp.append(l[0])
 | 
| 
 | 
   212             l.pop(0)
 | 
| 
 | 
   213     l.pop(0)
 | 
| 
 | 
   214     return tmp
 | 
| 
 | 
   215 
 | 
| 
 | 
   216 def priorityAND(l):
 | 
| 
 | 
   217     tmp = []
 | 
| 
 | 
   218     flag = True
 | 
| 
 | 
   219     while l:
 | 
| 
 | 
   220         if len(l) == 1:
 | 
| 
 | 
   221             if isinstance(l[0], list):
 | 
| 
 | 
   222                 tmp.append(priorityAND(l[0]))
 | 
| 
 | 
   223             else:
 | 
| 
 | 
   224                 tmp.append(l[0])
 | 
| 
 | 
   225             l = l[1:]
 | 
| 
 | 
   226         elif l[0] == 'or':
 | 
| 
 | 
   227             tmp.append(l[0])
 | 
| 
 | 
   228             flag = False
 | 
| 
 | 
   229             l = l[1:]
 | 
| 
 | 
   230         elif l[1] == 'or':
 | 
| 
 | 
   231             if isinstance(l[0], list): 
 | 
| 
 | 
   232                 tmp.append(priorityAND(l[0]))
 | 
| 
 | 
   233             else:
 | 
| 
 | 
   234                 tmp.append(l[0])
 | 
| 
 | 
   235             tmp.append(l[1])
 | 
| 
 | 
   236             flag = False
 | 
| 
 | 
   237             l = l[2:]
 | 
| 
 | 
   238         elif l[1] == 'and':
 | 
| 
 | 
   239             tmpAnd = []
 | 
| 
 | 
   240             if isinstance(l[0], list): 
 | 
| 
 | 
   241                 tmpAnd.append(priorityAND(l[0]))
 | 
| 
 | 
   242             else:
 | 
| 
 | 
   243                 tmpAnd.append(l[0])
 | 
| 
 | 
   244             tmpAnd.append(l[1])
 | 
| 
 | 
   245             if isinstance(l[2], list): 
 | 
| 
 | 
   246                 tmpAnd.append(priorityAND(l[2]))
 | 
| 
 | 
   247             else:
 | 
| 
 | 
   248                 tmpAnd.append(l[2])
 | 
| 
 | 
   249             l = l[3:]
 | 
| 
 | 
   250             while l:
 | 
| 
 | 
   251                 if l[0] == 'and':
 | 
| 
 | 
   252                     tmpAnd.append(l[0])
 | 
| 
 | 
   253                     if isinstance(l[1], list): 
 | 
| 
 | 
   254                         tmpAnd.append(priorityAND(l[1]))
 | 
| 
 | 
   255                     else:
 | 
| 
 | 
   256                         tmpAnd.append(l[1])
 | 
| 
 | 
   257                     l = l[2:]
 | 
| 
 | 
   258                 elif l[0] == 'or':
 | 
| 
 | 
   259                     flag = False
 | 
| 
 | 
   260                     break
 | 
| 
 | 
   261             if flag == True: #se ci sono solo AND nella lista
 | 
| 
 | 
   262                 tmp.extend(tmpAnd)
 | 
| 
 | 
   263             elif flag == False:
 | 
| 
 | 
   264                 tmp.append(tmpAnd)
 | 
| 
 | 
   265     return tmp
 | 
| 
 | 
   266 
 | 
| 
 | 
   267 def checkRule(l):
 | 
| 
 | 
   268     if len(l) == 1:
 | 
| 
 | 
   269         if isinstance(l[0], list):
 | 
| 
 | 
   270             if checkRule(l[0]) is False:
 | 
| 
 | 
   271                 return False
 | 
| 
 | 
   272     elif len(l) > 2:
 | 
| 
 | 
   273         if checkRule2(l) is False:
 | 
| 
 | 
   274             return False
 | 
| 
 | 
   275     else:
 | 
| 
 | 
   276         return False
 | 
| 
 | 
   277     return True
 | 
| 
 | 
   278 
 | 
| 
 | 
   279 def checkRule2(l):
 | 
| 
 | 
   280     while l:
 | 
| 
 | 
   281         if len(l) == 1:
 | 
| 
 | 
   282             return False
 | 
| 
 | 
   283         elif isinstance(l[0], list) and l[1] in ['and', 'or']:
 | 
| 
 | 
   284             if checkRule(l[0]) is False:
 | 
| 
 | 
   285                 return False
 | 
| 
 | 
   286             if isinstance(l[2], list):
 | 
| 
 | 
   287                 if checkRule(l[2]) is False:
 | 
| 
 | 
   288                     return False
 | 
| 
 | 
   289             l = l[3:]
 | 
| 
 | 
   290         elif l[1] in ['and', 'or']:
 | 
| 
 | 
   291             if isinstance(l[2], list):
 | 
| 
 | 
   292                 if checkRule(l[2]) is False:
 | 
| 
 | 
   293                     return False
 | 
| 
 | 
   294             l = l[3:]
 | 
| 
 | 
   295         elif l[0] in ['and', 'or']:
 | 
| 
 | 
   296             if isinstance(l[1], list):
 | 
| 
 | 
   297                 if checkRule(l[1]) is False:
 | 
| 
 | 
   298                     return False
 | 
| 
 | 
   299             l = l[2:]
 | 
| 
 | 
   300         else:
 | 
| 
 | 
   301             return False
 | 
| 
 | 
   302     return True
 | 
| 
 | 
   303 
 | 
| 
 | 
   304 def do_rules(rules):
 | 
| 
 | 
   305     split_rules = []
 | 
| 
 | 
   306     err_rules = []
 | 
| 
 | 
   307     tmp_gene_in_rule = []
 | 
| 
 | 
   308     for i in range(len(rules)):
 | 
| 
 | 
   309         tmp = list(rules[i])
 | 
| 
 | 
   310         if tmp:
 | 
| 
 | 
   311             tmp, tmp_genes = check_and_doWord(tmp)
 | 
| 
 | 
   312             tmp_gene_in_rule.extend(tmp_genes)
 | 
| 
 | 
   313             if tmp is False:
 | 
| 
 | 
   314                 split_rules.append([])
 | 
| 
 | 
   315                 err_rules.append(rules[i])
 | 
| 
 | 
   316             else:
 | 
| 
 | 
   317                 tmp = brackets_to_list(tmp)
 | 
| 
 | 
   318                 if checkRule(tmp):
 | 
| 
 | 
   319                     split_rules.append(priorityAND(tmp))
 | 
| 
 | 
   320                 else:
 | 
| 
 | 
   321                     split_rules.append([])
 | 
| 
 | 
   322                     err_rules.append(rules[i])
 | 
| 
 | 
   323         else:
 | 
| 
 | 
   324             split_rules.append([])
 | 
| 
 | 
   325     if err_rules:
 | 
| 
 | 
   326         warning('Warning: wrong format rule in ' + str(err_rules) + '\n')
 | 
| 
 | 
   327     return (split_rules, list(set(tmp_gene_in_rule)))
 | 
| 
 | 
   328 
 | 
| 
 | 
   329 def make_recon(data):
 | 
| 
 | 
   330     try:
 | 
| 
 | 
   331         import cobra as cb
 | 
| 
 | 
   332         import warnings
 | 
| 
 | 
   333         with warnings.catch_warnings():
 | 
| 
 | 
   334             warnings.simplefilter('ignore')
 | 
| 
 | 
   335             recon = cb.io.read_sbml_model(data)
 | 
| 
 | 
   336         react = recon.reactions
 | 
| 
 | 
   337         rules = [react[i].gene_reaction_rule for i in range(len(react))]
 | 
| 
 | 
   338         ids = [react[i].id for i in range(len(react))]
 | 
| 
 | 
   339     except cb.io.sbml3.CobraSBMLError:
 | 
| 
 | 
   340         try:
 | 
| 
 | 
   341             data = (pd.read_csv(data, sep = '\t', dtype = str)).fillna('')
 | 
| 
 | 
   342             if len(data.columns) < 2:
 | 
| 
 | 
   343                 sys.exit('Execution aborted: wrong format of ' +
 | 
| 
 | 
   344                          'custom GPR rules\n')
 | 
| 
 | 
   345             if not len(data.columns) == 2:
 | 
| 
 | 
   346                 warning('WARNING: more than 2 columns in custom GPR rules.\n' +
 | 
| 
 | 
   347                         'Extra columns have been disregarded\n')
 | 
| 
 | 
   348             ids = list(data.iloc[:, 0])
 | 
| 
 | 
   349             rules = list(data.iloc[:, 1])
 | 
| 
 | 
   350         except pd.errors.EmptyDataError:
 | 
| 
 | 
   351             sys.exit('Execution aborted: wrong format of custom GPR rules\n')
 | 
| 
 | 
   352         except pd.errors.ParserError:
 | 
| 
 | 
   353             sys.exit('Execution aborted: wrong format of custom GPR rules\n')  
 | 
| 
 | 
   354     split_rules, tmp_genes = do_rules(rules)
 | 
| 
 | 
   355     gene_in_rule = {}
 | 
| 
 | 
   356     for i in tmp_genes:
 | 
| 
 | 
   357         gene_in_rule[i] = 'ok'
 | 
| 
 | 
   358     return (ids, split_rules, gene_in_rule)
 | 
| 
 | 
   359 
 | 
| 
 | 
   360 ############################ resolve_methods ##################################
 | 
| 
 | 
   361 
 | 
| 
 | 
   362 def replace_gene_value(l, d):
 | 
| 
 | 
   363     tmp = []
 | 
| 
 | 
   364     err = []
 | 
| 
 | 
   365     while l:
 | 
| 
 | 
   366         if isinstance(l[0], list):
 | 
| 
 | 
   367             tmp_rules, tmp_err = replace_gene_value(l[0], d)
 | 
| 
 | 
   368             tmp.append(tmp_rules)
 | 
| 
 | 
   369             err.extend(tmp_err)
 | 
| 
 | 
   370         else:
 | 
| 
 | 
   371             value = replace_gene(l[0],d)
 | 
| 
 | 
   372             tmp.append(value)
 | 
| 
 | 
   373             if value == None:
 | 
| 
 | 
   374                 err.append(l[0])
 | 
| 
 | 
   375         l = l[1:]
 | 
| 
 | 
   376     return (tmp, err)
 | 
| 
 | 
   377 
 | 
| 
 | 
   378 def replace_gene(l, d):
 | 
| 
 | 
   379     if l =='and' or l == 'or':
 | 
| 
 | 
   380         return l
 | 
| 
 | 
   381     else:
 | 
| 
 | 
   382         value = d.get(l, None)
 | 
| 
 | 
   383         if not(value == None or isinstance(value, (int, float))):
 | 
| 
 | 
   384             sys.exit('Execution aborted: ' + value + ' value not valid\n')
 | 
| 
 | 
   385         return value
 | 
| 
 | 
   386 
 | 
| 
 | 
   387 def compute(val1, op, val2, cn):
 | 
| 
 | 
   388     if val1 != None and val2 != None:
 | 
| 
 | 
   389         if op == 'and':
 | 
| 
 | 
   390             return min(val1, val2)
 | 
| 
 | 
   391         else:
 | 
| 
 | 
   392             return val1 + val2
 | 
| 
 | 
   393     elif op == 'and':
 | 
| 
 | 
   394         if cn is True:
 | 
| 
 | 
   395             if val1 != None:
 | 
| 
 | 
   396                 return val1
 | 
| 
 | 
   397             elif val2 != None:
 | 
| 
 | 
   398                 return val2
 | 
| 
 | 
   399             else:
 | 
| 
 | 
   400                 return None
 | 
| 
 | 
   401         else:
 | 
| 
 | 
   402             return None
 | 
| 
 | 
   403     else:
 | 
| 
 | 
   404         if val1 != None:
 | 
| 
 | 
   405             return val1
 | 
| 
 | 
   406         elif val2 != None:
 | 
| 
 | 
   407             return val2
 | 
| 
 | 
   408         else:
 | 
| 
 | 
   409             return None
 | 
| 
 | 
   410 
 | 
| 
 | 
   411 def control(ris, l, cn):
 | 
| 
 | 
   412     if len(l) == 1:
 | 
| 
 | 
   413         if isinstance(l[0], (float, int)) or l[0] == None:
 | 
| 
 | 
   414             return l[0]
 | 
| 
 | 
   415         elif isinstance(l[0], list):
 | 
| 
 | 
   416             return control(None, l[0], cn)
 | 
| 
 | 
   417         else:
 | 
| 
 | 
   418             return False
 | 
| 
 | 
   419     elif len(l) > 2:
 | 
| 
 | 
   420         return control_list(ris, l, cn)
 | 
| 
 | 
   421     else:
 | 
| 
 | 
   422         return False
 | 
| 
 | 
   423 
 | 
| 
 | 
   424 def control_list(ris, l, cn):
 | 
| 
 | 
   425     while l:
 | 
| 
 | 
   426         if len(l) == 1:
 | 
| 
 | 
   427             return False
 | 
| 
 | 
   428         elif (isinstance(l[0], (float, int)) or
 | 
| 
 | 
   429               l[0] == None) and l[1] in ['and', 'or']:
 | 
| 
 | 
   430             if isinstance(l[2], (float, int)) or l[2] == None:
 | 
| 
 | 
   431                 ris = compute(l[0], l[1], l[2], cn)            
 | 
| 
 | 
   432             elif isinstance(l[2], list):
 | 
| 
 | 
   433                 tmp = control(None, l[2], cn)
 | 
| 
 | 
   434                 if tmp is False:
 | 
| 
 | 
   435                     return False
 | 
| 
 | 
   436                 else:
 | 
| 
 | 
   437                     ris = compute(l[0], l[1], tmp, cn)
 | 
| 
 | 
   438             else:
 | 
| 
 | 
   439                 return False
 | 
| 
 | 
   440             l = l[3:]
 | 
| 
 | 
   441         elif l[0] in ['and', 'or']:
 | 
| 
 | 
   442             if isinstance(l[1], (float, int)) or l[1] == None:
 | 
| 
 | 
   443                 ris = compute(ris, l[0], l[1], cn)
 | 
| 
 | 
   444             elif isinstance(l[1], list):
 | 
| 
 | 
   445                 tmp = control(None,l[1], cn)
 | 
| 
 | 
   446                 if tmp is False:
 | 
| 
 | 
   447                     return False
 | 
| 
 | 
   448                 else:
 | 
| 
 | 
   449                     ris = compute(ris, l[0], tmp, cn)
 | 
| 
 | 
   450             else:
 | 
| 
 | 
   451                 return False
 | 
| 
 | 
   452             l = l[2:]
 | 
| 
 | 
   453         elif isinstance(l[0], list) and l[1] in ['and', 'or']:
 | 
| 
 | 
   454             if isinstance(l[2], (float, int)) or l[2] == None:
 | 
| 
 | 
   455                 tmp = control(None, l[0], cn)
 | 
| 
 | 
   456                 if tmp is False:
 | 
| 
 | 
   457                     return False
 | 
| 
 | 
   458                 else:
 | 
| 
 | 
   459                     ris = compute(tmp, l[1], l[2], cn)
 | 
| 
 | 
   460             elif isinstance(l[2], list):
 | 
| 
 | 
   461                 tmp = control(None, l[0], cn)
 | 
| 
 | 
   462                 tmp2 = control(None, l[2], cn)
 | 
| 
 | 
   463                 if tmp is False or tmp2 is False:
 | 
| 
 | 
   464                     return False
 | 
| 
 | 
   465                 else:
 | 
| 
 | 
   466                     ris = compute(tmp, l[1], tmp2, cn)
 | 
| 
 | 
   467             else:
 | 
| 
 | 
   468                 return False
 | 
| 
 | 
   469             l = l[3:]
 | 
| 
 | 
   470         else:
 | 
| 
 | 
   471             return False
 | 
| 
 | 
   472     return ris
 | 
| 
 | 
   473 
 | 
| 
 | 
   474 ############################ gene #############################################
 | 
| 
 | 
   475 
 | 
| 
 | 
   476 def data_gene(gene, type_gene, name, gene_custom):
 | 
| 
 | 
   477     args = process_args(sys.argv)
 | 
| 
 | 
   478     for i in range(len(gene)):
 | 
| 
 | 
   479         tmp = gene.iloc[i, 0]
 | 
| 
 | 
   480         if tmp.startswith(' ') or tmp.endswith(' '):
 | 
| 
 | 
   481             gene.iloc[i, 0] = (tmp.lstrip()).rstrip()
 | 
| 
 | 
   482     gene_dup = [item for item, count in 
 | 
| 
 | 
   483                collections.Counter(gene[gene.columns[0]]).items() if count > 1]
 | 
| 
 | 
   484     pat_dup = [item for item, count in 
 | 
| 
 | 
   485                collections.Counter(list(gene.columns)).items() if count > 1]
 | 
| 
 | 
   486     if gene_dup:
 | 
| 
 | 
   487         if gene_custom == None:
 | 
| 
 | 
   488             if args.rules_selector == 'HMRcore':
 | 
| 
 | 
   489                 gene_in_rule = pk.load(open(args.tool_dir +
 | 
| 
 | 
   490                                             '/local/HMRcore_genes.p', 'rb'))
 | 
| 
 | 
   491             elif args.rules_selector == 'Recon':
 | 
| 
 | 
   492                 gene_in_rule = pk.load(open(args.tool_dir +
 | 
| 
 | 
   493                                             '/local/Recon_genes.p', 'rb'))
 | 
| 
 | 
   494             gene_in_rule = gene_in_rule.get(type_gene)
 | 
| 
 | 
   495         else:
 | 
| 
 | 
   496             gene_in_rule = gene_custom
 | 
| 
 | 
   497         tmp = []
 | 
| 
 | 
   498         for i in gene_dup:
 | 
| 
 | 
   499             if gene_in_rule.get(i) == 'ok':
 | 
| 
 | 
   500                 tmp.append(i)
 | 
| 
 | 
   501         if tmp:
 | 
| 
 | 
   502             sys.exit('Execution aborted because gene ID '
 | 
| 
 | 
   503                      + str(tmp) + ' in ' + name + ' is duplicated\n')
 | 
| 
 | 
   504     if pat_dup:
 | 
| 
 | 
   505         sys.exit('Execution aborted: duplicated label\n'
 | 
| 
 | 
   506                  + str(pat_dup) + 'in ' + name + '\n')
 | 
| 
 | 
   507     return (gene.set_index(gene.columns[0])).to_dict()
 | 
| 
 | 
   508 
 | 
| 
 | 
   509 ############################ resolve ##########################################
 | 
| 
 | 
   510 
 | 
| 
 | 
   511 def resolve(genes, rules, ids, resolve_none, name):
 | 
| 
 | 
   512     resolve_rules = {}
 | 
| 
 | 
   513     not_found = []
 | 
| 
 | 
   514     flag = False
 | 
| 
 | 
   515     for key, value in genes.items():
 | 
| 
 | 
   516         tmp_resolve = []
 | 
| 
 | 
   517         for i in range(len(rules)):
 | 
| 
 | 
   518             tmp = rules[i]
 | 
| 
 | 
   519             if tmp:
 | 
| 
 | 
   520                 tmp, err = replace_gene_value(tmp, value)
 | 
| 
 | 
   521                 if err:
 | 
| 
 | 
   522                     not_found.extend(err)
 | 
| 
 | 
   523                 ris = control(None, tmp, resolve_none)
 | 
| 
 | 
   524                 if ris is False or ris == None:
 | 
| 
 | 
   525                     tmp_resolve.append(None)
 | 
| 
 | 
   526                 else:
 | 
| 
 | 
   527                     tmp_resolve.append(ris)
 | 
| 
 | 
   528                     flag = True
 | 
| 
 | 
   529             else:
 | 
| 
 | 
   530                 tmp_resolve.append(None)        
 | 
| 
 | 
   531         resolve_rules[key] = tmp_resolve
 | 
| 
 | 
   532     if flag is False:
 | 
| 
 | 
   533         sys.exit('Execution aborted: no computable score' +
 | 
| 
 | 
   534                  ' (due to missing gene values) for class '
 | 
| 
 | 
   535                  + name + ', the class has been disregarded\n')
 | 
| 
 | 
   536     return (resolve_rules, list(set(not_found)))
 | 
| 
 | 
   537 
 | 
| 
 | 
   538 ################################# clustering ##################################
 | 
| 
 | 
   539 
 | 
| 
 | 
   540 def f_cluster(resolve_rules):
 | 
| 
 | 
   541     os.makedirs('cluster_out')
 | 
| 
 | 
   542     args = process_args(sys.argv)
 | 
| 
 | 
   543     cluster_data = pd.DataFrame.from_dict(resolve_rules, orient = 'index')
 | 
| 
 | 
   544     for i in cluster_data.columns:
 | 
| 
 | 
   545         tmp = cluster_data[i][0]
 | 
| 
 | 
   546         if tmp == None:
 | 
| 
 | 
   547             cluster_data = cluster_data.drop(columns=[i])
 | 
| 
 | 
   548     distorsion = []
 | 
| 
 | 
   549     for i in range(args.k_min, args.k_max+1):
 | 
| 
 | 
   550         tmp_kmeans = KMeans(n_clusters = i,
 | 
| 
 | 
   551                             n_init = 100, 
 | 
| 
 | 
   552                             max_iter = 300,
 | 
| 
 | 
   553                             random_state = 0).fit(cluster_data)
 | 
| 
 | 
   554         distorsion.append(tmp_kmeans.inertia_)
 | 
| 
 | 
   555         predict = tmp_kmeans.predict(cluster_data)
 | 
| 
 | 
   556         predict = [x+1 for x in predict]
 | 
| 
 | 
   557         classe = (pd.DataFrame(zip(cluster_data.index, predict))).astype(str)
 | 
| 
 | 
   558         dest = 'cluster_out/K=' + str(i) + '_' + args.name+'.tsv'
 | 
| 
 | 
   559         classe.to_csv(dest, sep = '\t', index = False,
 | 
| 
 | 
   560                       header = ['Patient_ID', 'Class'])
 | 
| 
 | 
   561     plt.figure(0)
 | 
| 
 | 
   562     plt.plot(range(args.k_min, args.k_max+1), distorsion, marker = 'o')
 | 
| 
 | 
   563     plt.xlabel('Number of cluster')
 | 
| 
 | 
   564     plt.ylabel('Distorsion')
 | 
| 
 | 
   565     plt.savefig(args.elbow, dpi = 240, format = 'pdf')
 | 
| 
 | 
   566     if args.cond_hier == 'yes':
 | 
| 
 | 
   567         import scipy.cluster.hierarchy as hier
 | 
| 
 | 
   568         lin = hier.linkage(cluster_data, args.linkage)
 | 
| 
 | 
   569         plt.figure(1)
 | 
| 
 | 
   570         plt.figure(figsize=(10, 5))
 | 
| 
 | 
   571         hier.dendrogram(lin, leaf_font_size = 2, labels = cluster_data.index)
 | 
| 
 | 
   572         plt.savefig(args.dendro, dpi = 480, format = 'pdf')
 | 
| 
 | 
   573     return None
 | 
| 
 | 
   574 
 | 
| 
 | 
   575 ################################# main ########################################
 | 
| 
 | 
   576 
 | 
| 
 | 
   577 def main():
 | 
| 
 | 
   578     args = process_args(sys.argv)
 | 
| 
 | 
   579     if args.k_min > args.k_max:
 | 
| 
 | 
   580         sys.exit('Execution aborted: max cluster > min cluster')
 | 
| 
 | 
   581     if args.rules_selector == 'HMRcore':
 | 
| 
 | 
   582         recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb'))
 | 
| 
 | 
   583     elif args.rules_selector == 'Recon':
 | 
| 
 | 
   584         recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb'))
 | 
| 
 | 
   585     elif args.rules_selector == 'Custom':
 | 
| 
 | 
   586         ids, rules, gene_in_rule = make_recon(args.custom)
 | 
| 
 | 
   587     resolve_none = check_bool(args.none)
 | 
| 
 | 
   588     dataset = read_dataset(args.data, args.name)
 | 
| 
 | 
   589     dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
 | 
| 
 | 
   590     type_gene = gene_type(dataset.iloc[0, 0], args.name)
 | 
| 
 | 
   591     if args.rules_selector != 'Custom':
 | 
| 
 | 
   592         genes = data_gene(dataset, type_gene, args.name, None)
 | 
| 
 | 
   593         ids, rules = load_id_rules(recon.get(type_gene))
 | 
| 
 | 
   594     elif args.rules_selector == 'Custom':
 | 
| 
 | 
   595         genes = data_gene(dataset, type_gene, args.name, gene_in_rule)
 | 
| 
 | 
   596     resolve_rules, err = resolve(genes, rules, ids, resolve_none, args.name)
 | 
| 
 | 
   597     if err:
 | 
| 
 | 
   598         warning('WARNING: gene\n' + str(err) + '\nnot found in class '  
 | 
| 
 | 
   599                 + args.name + ', the expression level for this gene ' +
 | 
| 
 | 
   600                 'will be considered NaN\n')
 | 
| 
 | 
   601     f_cluster(resolve_rules)
 | 
| 
 | 
   602     warning('Execution succeeded')
 | 
| 
 | 
   603     return None
 | 
| 
 | 
   604 
 | 
| 
 | 
   605 ###############################################################################
 | 
| 
 | 
   606 
 | 
| 
 | 
   607 if __name__ == "__main__":
 | 
| 
 | 
   608     main() |