view Marea/ras_generator.py @ 46:5d5d01ef1d68 draft

Uploaded
author bimib
date Wed, 22 Jan 2020 11:50:54 -0500
parents
children 3af9d394367c
line wrap: on
line source

from __future__ import division
import sys
import pandas as pd
import itertools as it
import scipy.stats as st
import collections
import lxml.etree as ET
import pickle as pk
import math
import os
import argparse
from svglib.svglib import svg2rlg
from reportlab.graphics import renderPDF

########################## argparse ##########################################

def process_args(args):
    parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
                                     description = 'process some value\'s'+
                                     ' genes to create a comparison\'s map.')
    parser.add_argument('-rs', '--rules_selector', 
                        type = str,
                        default = 'HMRcore',
                        choices = ['HMRcore', 'Recon', 'Custom'], 
                        help = 'chose which type of dataset you want use')
    parser.add_argument('-cr', '--custom',
                        type = str,
                        help='your dataset if you want custom rules')
    parser.add_argument('-na', '--names', 
                        type = str,
                        nargs = '+', 
                        help = 'input names')
    parser.add_argument('-n', '--none',
                        type = str,
                        default = 'true',
                        choices = ['true', 'false'], 
                        help = 'compute Nan values')
    parser.add_argument('-pv' ,'--pValue', 
                        type = float, 
                        default = 0.05, 
                        help = 'P-Value threshold (default: %(default)s)')
    parser.add_argument('-fc', '--fChange', 
                        type = float, 
                        default = 1.5, 
                        help = 'Fold-Change threshold (default: %(default)s)')
    parser.add_argument('-td', '--tool_dir',
                        type = str,
                        required = True,
                        help = 'your tool directory')
    parser.add_argument('-op', '--option', 
                        type = str, 
                        choices = ['datasets', 'dataset_class', 'datasets_rasonly'],
                        help='dataset or dataset and class')
    parser.add_argument('-ol', '--out_log', 
                        help = "Output log")    
    parser.add_argument('-ids', '--input_datas', 
                        type = str,
                        nargs = '+', 
                        help = 'input datasets')
    parser.add_argument('-id', '--input_data',
                        type = str,
                        help = 'input dataset')
    parser.add_argument('-ic', '--input_class', 
                        type = str, 
                        help = 'sample group specification')
    parser.add_argument('-cm', '--custom_map', 
                        type = str, 
                        help = 'custom map')
    parser.add_argument('-yn', '--yes_no', 
                        type = str,
                        choices = ['yes', 'no'],
                        help = 'if make or not custom map')
    parser.add_argument('-gs', '--generate_svg',
                        type = str,
                        default = 'true',
                        choices = ['true', 'false'], 
                        help = 'generate svg map')
    parser.add_argument('-gp', '--generate_pdf',
                        type = str,
                        default = 'true',
                        choices = ['true', 'false'], 
                        help = 'generate pdf map')
    parser.add_argument('-gr', '--generate_ras',
                        type = str,
                        default = 'true',
                        choices = ['true', 'false'],
                        help = 'generate reaction activity score')
    parser.add_argument('-sr', '--single_ras_file',  
                         type = str,              
                         help = 'file that will contain ras')
    					
    args = parser.parse_args()
    return args

########################### warning ###########################################

def warning(s):
    args = process_args(sys.argv)
    with open(args.out_log, 'a') as log:
            log.write(s)
            
############################ dataset input ####################################

def read_dataset(data, name):
    try:
        dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python')
    except pd.errors.EmptyDataError:
        sys.exit('Execution aborted: wrong format of ' + name + '\n')
    if len(dataset.columns) < 2:
        sys.exit('Execution aborted: wrong format of ' + name + '\n')
    return dataset

############################ dataset name #####################################

def name_dataset(name_data, count):
    if str(name_data) == 'Dataset':
        return str(name_data) + '_' + str(count)
    else:
        return str(name_data)
    
############################ load id e rules ##################################

def load_id_rules(reactions):
    ids, rules = [], []
    for key, value in reactions.items():
            ids.append(key)
            rules.append(value)
    return (ids, rules)

############################ check_methods ####################################

def gene_type(l, name):
    if check_hgnc(l):
        return 'hugo_id'
    elif check_ensembl(l):
        return 'ensembl_gene_id'
    elif check_symbol(l):
        return 'symbol'
    elif check_entrez(l):
        return 'entrez_id'
    else:
        sys.exit('Execution aborted:\n' +
                 'gene ID type in ' + name + ' not supported. Supported ID'+
                 'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n')

def check_hgnc(l):
    if len(l) > 5:
        if (l.upper()).startswith('HGNC:'):
            return l[5:].isdigit()
        else:
            return False
    else:
        return False

def check_ensembl(l): 
    if len(l) == 15:
        if (l.upper()).startswith('ENS'):
            return l[4:].isdigit()
        else:  
            return False 
    else: 
        return False 

def check_symbol(l):
    if len(l) > 0:
        if l[0].isalpha() and l[1:].isalnum():
            return True
        else:
            return False
    else:
        return False

def check_entrez(l): 
    if len(l) > 0:
        return l.isdigit()
    else: 
        return False

def check_bool(b):
    if b == 'true':
        return True
    elif b == 'false':
        return False
    
############################ resolve_methods ##################################

def replace_gene_value(l, d):
    tmp = []
    err = []
    while l:
        if isinstance(l[0], list):
            tmp_rules, tmp_err = replace_gene_value(l[0], d)
            tmp.append(tmp_rules)
            err.extend(tmp_err)
        else:
            value = replace_gene(l[0], d)
            tmp.append(value)
            if value == None:
                err.append(l[0])
        l = l[1:]
    return (tmp, err)


def replace_gene(l, d):
    if l =='and' or l == 'or':
        return l
    else:
        value = d.get(l, None)
        if not(value == None or isinstance(value, (int, float))):
            sys.exit('Execution aborted: ' + value + ' value not valid\n')
        return value

def computes(val1, op, val2, cn):
    if val1 != None and val2 != None:
        if op == 'and':
            return min(val1, val2)
        else:
            return val1 + val2
    elif op == 'and':
        if cn is True:
            if val1 != None:
                return val1
            elif val2 != None:
                return val2
            else:
                return None
        else:
            return None
    else:
        if val1 != None:
            return val1
        elif val2 != None:
            return val2
        else:
            return None

def control(ris, l, cn):
    if len(l) == 1:
        if isinstance(l[0], (float, int)) or l[0] == None:
            return l[0]
        elif isinstance(l[0], list):
            return control(None, l[0], cn)
        else:
            return False
    elif len(l) > 2:
        return control_list(ris, l, cn)
    else:
        return False

def control_list(ris, l, cn):
    while l:
        if len(l) == 1:
            return False
        elif (isinstance(l[0], (float, int)) or
              l[0] == None) and l[1] in ['and', 'or']:
            if isinstance(l[2], (float, int)) or l[2] == None:
                ris = computes(l[0], l[1], l[2], cn)            
            elif isinstance(l[2], list):
                tmp = control(None, l[2], cn)
                if tmp is False:
                    return False
                else:
                    ris = computes(l[0], l[1], tmp, cn)
            else:
                return False
            l = l[3:]
        elif l[0] in ['and', 'or']:
            if isinstance(l[1], (float, int)) or l[1] == None:
                ris = computes(ris, l[0], l[1], cn)
            elif isinstance(l[1], list):
                tmp = control(None,l[1], cn)
                if tmp is False:
                    return False
                else:
                    ris = computes(ris, l[0], tmp, cn)
            else:
                return False
            l = l[2:]
        elif isinstance(l[0], list) and l[1] in ['and', 'or']:
            if isinstance(l[2], (float, int)) or l[2] == None:
                tmp = control(None, l[0], cn)
                if tmp is False:
                    return False
                else:
                    ris = computes(tmp, l[1], l[2], cn)
            elif isinstance(l[2], list):
                tmp = control(None, l[0], cn)
                tmp2 = control(None, l[2], cn)
                if tmp is False or tmp2 is False:
                    return False
                else:
                    ris = computes(tmp, l[1], tmp2, cn)
            else:
                return False
            l = l[3:]
        else:
            return False
    return ris

############################ map_methods ######################################

def fold_change(avg1, avg2):
    if avg1 == 0 and avg2 == 0:
        return 0
    elif avg1 == 0:
        return '-INF'
    elif avg2 == 0:
        return 'INF'
    else:
        return math.log(avg1 / avg2, 2)
    
def fix_style(l, col, width, dash):
    tmp = l.split(';')
    flag_col = False
    flag_width = False
    flag_dash = False
    for i in range(len(tmp)):
        if tmp[i].startswith('stroke:'):
            tmp[i] = 'stroke:' + col
            flag_col = True
        if tmp[i].startswith('stroke-width:'):
            tmp[i] = 'stroke-width:' + width
            flag_width = True
        if tmp[i].startswith('stroke-dasharray:'):
            tmp[i] = 'stroke-dasharray:' + dash
            flag_dash = True
    if not flag_col:
        tmp.append('stroke:' + col)
    if not flag_width:
        tmp.append('stroke-width:' + width)
    if not flag_dash:
        tmp.append('stroke-dasharray:' + dash)
    return ';'.join(tmp)

def fix_map(d, core_map, threshold_P_V, threshold_F_C, max_F_C):
    maxT = 12
    minT = 2
    grey = '#BEBEBE'
    blue = '#0000FF'
    red = '#E41A1C'
    for el in core_map.iter():
        el_id = str(el.get('id'))
        if el_id.startswith('R_'):
            tmp = d.get(el_id[2:])
            if tmp != None:
                p_val = tmp[0]
                f_c = tmp[1]
                if p_val < threshold_P_V:
                    if not isinstance(f_c, str):
                        if abs(f_c) < math.log(threshold_F_C, 2):
                            col = grey
                            width = str(minT)
                        else:
                            if f_c < 0:
                                col = blue
                            elif f_c > 0:
                                col = red
                            width = str(max((abs(f_c) * maxT) / max_F_C, minT))
                    else:
                        if f_c == '-INF':
                            col = blue
                        elif f_c == 'INF':
                            col = red
                        width = str(maxT)
                    dash = 'none'
                else:
                    dash = '5,5'
                    col = grey
                    width = str(minT)
                el.set('style', fix_style(el.get('style'), col, width, dash))
    return core_map

############################ make recon #######################################

def check_and_doWord(l):
    tmp = []
    tmp_genes = []
    count = 0
    while l:
        if count >= 0:
            if l[0] == '(':
                count += 1
                tmp.append(l[0])
                l.pop(0)
            elif l[0] == ')':
                count -= 1
                tmp.append(l[0])
                l.pop(0)
            elif l[0] == ' ':
                l.pop(0)
            else:
                word = []
                while l:
                    if l[0] in [' ', '(', ')']:
                        break
                    else:
                        word.append(l[0])
                        l.pop(0)
                word = ''.join(word)
                tmp.append(word)
                if not(word in ['or', 'and']):
                    tmp_genes.append(word)
        else:
            return False
    if count == 0:
        return (tmp, tmp_genes)
    else:
        return False

def brackets_to_list(l):
    tmp = []
    while l:
        if l[0] == '(':
            l.pop(0)
            tmp.append(resolve_brackets(l))
        else:
            tmp.append(l[0])
            l.pop(0)
    return tmp

def resolve_brackets(l):
    tmp = []
    while l[0] != ')':
        if l[0] == '(':
            l.pop(0)
            tmp.append(resolve_brackets(l))
        else:
            tmp.append(l[0])
            l.pop(0)
    l.pop(0)
    return tmp

def priorityAND(l):
    tmp = []
    flag = True
    while l:
        if len(l) == 1:
            if isinstance(l[0], list):
                tmp.append(priorityAND(l[0]))
            else:
                tmp.append(l[0])
            l = l[1:]
        elif l[0] == 'or':
            tmp.append(l[0])
            flag = False
            l = l[1:]
        elif l[1] == 'or':
            if isinstance(l[0], list): 
                tmp.append(priorityAND(l[0]))
            else:
                tmp.append(l[0])
            tmp.append(l[1])
            flag = False
            l = l[2:]
        elif l[1] == 'and':
            tmpAnd = []
            if isinstance(l[0], list): 
                tmpAnd.append(priorityAND(l[0]))
            else:
                tmpAnd.append(l[0])
            tmpAnd.append(l[1])
            if isinstance(l[2], list): 
                tmpAnd.append(priorityAND(l[2]))
            else:
                tmpAnd.append(l[2])
            l = l[3:]
            while l:
                if l[0] == 'and':
                    tmpAnd.append(l[0])
                    if isinstance(l[1], list): 
                        tmpAnd.append(priorityAND(l[1]))
                    else:
                        tmpAnd.append(l[1])
                    l = l[2:]
                elif l[0] == 'or':
                    flag = False
                    break
            if flag == True: #when there are only AND in list
                tmp.extend(tmpAnd)
            elif flag == False:
                tmp.append(tmpAnd)
    return tmp

def checkRule(l):
    if len(l) == 1:
        if isinstance(l[0], list):
            if checkRule(l[0]) is False:
                return False
    elif len(l) > 2:
        if checkRule2(l) is False:
            return False
    else:
        return False
    return True

def checkRule2(l):
    while l:
        if len(l) == 1:
            return False
        elif isinstance(l[0], list) and l[1] in ['and', 'or']:
            if checkRule(l[0]) is False:
                return False
            if isinstance(l[2], list):
                if checkRule(l[2]) is False:
                    return False
            l = l[3:]
        elif l[1] in ['and', 'or']:
            if isinstance(l[2], list):
                if checkRule(l[2]) is False:
                    return False
            l = l[3:]
        elif l[0] in ['and', 'or']:
            if isinstance(l[1], list):
                if checkRule(l[1]) is False:
                    return False
            l = l[2:]
        else:
            return False
    return True

def do_rules(rules):
    split_rules = []
    err_rules = []
    tmp_gene_in_rule = []
    for i in range(len(rules)):
        tmp = list(rules[i])
        if tmp:
            tmp, tmp_genes = check_and_doWord(tmp)
            tmp_gene_in_rule.extend(tmp_genes)
            if tmp is False:
                split_rules.append([])
                err_rules.append(rules[i])
            else:
                tmp = brackets_to_list(tmp)
                if checkRule(tmp):
                    split_rules.append(priorityAND(tmp))
                else:
                    split_rules.append([])
                    err_rules.append(rules[i])
        else:
            split_rules.append([])
    if err_rules:
        warning('Warning: wrong format rule in ' + str(err_rules) + '\n')
    return (split_rules, list(set(tmp_gene_in_rule)))

def make_recon(data):
    try:
        import cobra as cb
        import warnings
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            recon = cb.io.read_sbml_model(data)
        react = recon.reactions
        rules = [react[i].gene_reaction_rule for i in range(len(react))]
        ids = [react[i].id for i in range(len(react))]
    except cb.io.sbml3.CobraSBMLError:
        try:
            data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('')
            if len(data.columns) < 2:
                sys.exit('Execution aborted: wrong format of '+
                         'custom datarules\n')
            if not len(data.columns) == 2:
                warning('Warning: more than 2 columns in custom datarules.\n' +
                        'Extra columns have been disregarded\n')
            ids = list(data.iloc[:, 0])
            rules = list(data.iloc[:, 1])
        except pd.errors.EmptyDataError:
            sys.exit('Execution aborted: wrong format of custom datarules\n')
        except pd.errors.ParserError:
            sys.exit('Execution aborted: wrong format of custom datarules\n')            
    split_rules, tmp_genes = do_rules(rules)
    gene_in_rule = {}
    for i in tmp_genes:
        gene_in_rule[i] = 'ok'
    return (ids, split_rules, gene_in_rule)

############################ gene #############################################

def data_gene(gene, type_gene, name, gene_custom):
    args = process_args(sys.argv)    
    for i in range(len(gene)):
        tmp = gene.iloc[i, 0]
        if tmp.startswith(' ') or tmp.endswith(' '):
            gene.iloc[i, 0] = (tmp.lstrip()).rstrip()
    gene_dup = [item for item, count in 
               collections.Counter(gene[gene.columns[0]]).items() if count > 1]
    pat_dup = [item for item, count in 
               collections.Counter(list(gene.columns)).items() if count > 1]

    if gene_dup:
        if gene_custom == None:
            if args.rules_selector == 'HMRcore':
                gene_in_rule = pk.load(open(args.tool_dir +
                                            '/local/HMRcore_genes.p', 'rb'))
            elif args.rules_selector == 'Recon':
                gene_in_rule = pk.load(open(args.tool_dir +
                                            '/local/Recon_genes.p', 'rb'))
            gene_in_rule = gene_in_rule.get(type_gene)
        else:
            gene_in_rule = gene_custom
        tmp = []
        for i in gene_dup:
            if gene_in_rule.get(i) == 'ok':
                tmp.append(i)
        if tmp:
            sys.exit('Execution aborted because gene ID '
                     +str(tmp)+' in '+name+' is duplicated\n')
    if pat_dup:
        warning('Warning: duplicated label\n' + str(pat_dup) + 'in ' + name + 
                '\n')
        
    return (gene.set_index(gene.columns[0])).to_dict()

############################ resolve ##########################################

def resolve(genes, rules, ids, resolve_none, name):
    resolve_rules = {}
    names_array = []
    not_found = []
    flag = False
    for key, value in genes.items():
        tmp_resolve = []
        for i in range(len(rules)):
            tmp = rules[i]
            if tmp:
                tmp, err = replace_gene_value(tmp, value)
                if err:
                    not_found.extend(err)
                ris = control(None, tmp, resolve_none)
                if ris is False or ris == None:
                    tmp_resolve.append(None)
                else:
                    tmp_resolve.append(ris)
                    flag = True
            else:
                tmp_resolve.append(None)    
        resolve_rules[key] = tmp_resolve
    if flag is False:
        warning('Warning: no computable score (due to missing gene values)' +
                'for class ' + name + ', the class has been disregarded\n')
        return (None, None)
    return (resolve_rules, list(set(not_found)))

############################ split class ######################################

def split_class(classes, resolve_rules):
    class_pat = {}
    for i in range(len(classes)):
        classe = classes.iloc[i, 1]
        if not pd.isnull(classe):
            l = []
            for j in range(i, len(classes)):
                if classes.iloc[j, 1] == classe:
                    pat_id = classes.iloc[j, 0]
                    tmp = resolve_rules.get(pat_id, None)
                    if tmp != None:
                        l.append(tmp)
                    classes.iloc[j, 1] = None
            if l:
                class_pat[classe] = list(map(list, zip(*l)))
            else:
                warning('Warning: no sample found in class ' + classe +
                        ', the class has been disregarded\n')
    return class_pat

############################ create_ras #######################################

def create_ras (resolve_rules, dataset_name, single_ras, rules, ids):

    if resolve_rules == None:
        warning("Couldn't generate RAS for current dataset: " + dataset_name)

    for geni in resolve_rules.values():
        for i, valori in enumerate(geni):
            if valori == None:
                geni[i] = 'None'
                
    output_ras = pd.DataFrame.from_dict(resolve_rules)
    
    output_ras.insert(0, 'Reactions', ids)
    output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False)
    
    if (single_ras):
        args = process_args(sys.argv)
        text_file = open(args.single_ras_file, "w")
    else:
        text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w")
    
    text_file.write(output_to_csv)
    text_file.close()

############################ map ##############################################

def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf):
    args = process_args(sys.argv)
    if (not class_pat) or (len(class_pat.keys()) < 2):
        sys.exit('Execution aborted: classes provided for comparisons are ' +
                 'less than two\n')
    for i, j in it.combinations(class_pat.keys(), 2):
        tmp = {}
        count = 0
        max_F_C = 0
        for l1, l2 in zip(class_pat.get(i), class_pat.get(j)):
            try:
               stat_D, p_value = st.ks_2samp(l1, l2)
               avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
               if not isinstance(avg, str):
                   if max_F_C < abs(avg):
                       max_F_C = abs(avg)
               tmp[ids[count]] = [float(p_value), avg]
               count += 1
            except (TypeError, ZeroDivisionError):
               count += 1
        tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv'
        tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
        tmp_csv = tmp_csv.reset_index()
        header = ['ids', 'P_Value', 'Log2(fold change)']
        tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
        
        if create_svg or create_pdf:
            if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom'
                                                    and args.yes_no == 'yes'):
                fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
                file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'
                with open(file_svg, 'wb') as new_map:
                    new_map.write(ET.tostring(core_map))
                    
                
                if create_pdf:
                    file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf'
                    renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)
                
                if not create_svg:
                    #Ho utilizzato il file svg per generare il pdf, 
                    #ma l'utente non ne ha richiesto il ritorno, quindi
                    #lo elimino
                    os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg')
                    
    return None

############################ MAIN #############################################

def main():
    args = process_args(sys.argv)
    
    create_svg = check_bool(args.generate_svg)
    create_pdf = check_bool(args.generate_pdf)
    generate_ras = check_bool(args.generate_ras)

    os.makedirs('result')

    if generate_ras:
        os.makedirs('ras')
    
    if args.rules_selector == 'HMRcore':        
        recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb'))
    elif args.rules_selector == 'Recon':
        recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb'))
    elif args.rules_selector == 'Custom':
        ids, rules, gene_in_rule = make_recon(args.custom)
        
    resolve_none = check_bool(args.none)
    
    class_pat = {}
    
    if args.option == 'datasets_rasonly':
        name = "RAS Dataset"
        dataset = read_dataset(args.input_datas[0],"dataset")

        dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)

        type_gene = gene_type(dataset.iloc[0, 0], name) 
            
        if args.rules_selector != 'Custom':
            genes = data_gene(dataset, type_gene, name, None)
            ids, rules = load_id_rules(recon.get(type_gene))
        elif args.rules_selector == 'Custom':
            genes = data_gene(dataset, type_gene, name, gene_in_rule)
        
        resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)

        create_ras(resolve_rules, name, True, rules, ids)
          
        if err != None and err:
            warning('Warning: gene\n' + str(err) + '\nnot found in class '
                + name + ', the expression level for this gene ' +
                'will be considered NaN\n')
        
        print('execution succeded')
        return None
    
    
    elif args.option == 'datasets':
        num = 1
        for i, j in zip(args.input_datas, args.names):

            name = name_dataset(j, num)
            dataset = read_dataset(i, name)

            dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)

            type_gene = gene_type(dataset.iloc[0, 0], name) 
            
            if args.rules_selector != 'Custom':
                genes = data_gene(dataset, type_gene, name, None)
                ids, rules = load_id_rules(recon.get(type_gene))
            elif args.rules_selector == 'Custom':
                genes = data_gene(dataset, type_gene, name, gene_in_rule)
                
                
            resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)

            if generate_ras:
                create_ras(resolve_rules, name, False, rules, ids)
            
            if err != None and err:
                warning('Warning: gene\n' + str(err) + '\nnot found in class '
                    + name + ', the expression level for this gene ' +
                    'will be considered NaN\n')
            if resolve_rules != None:
                class_pat[name] = list(map(list, zip(*resolve_rules.values())))
            num += 1
    elif args.option == 'dataset_class':
        name = 'RNAseq'
        dataset = read_dataset(args.input_data, name)
        dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
        type_gene = gene_type(dataset.iloc[0, 0], name)
        classes = read_dataset(args.input_class, 'class')
        if not len(classes.columns) == 2:
            warning('Warning: more than 2 columns in class file. Extra' +
                    'columns have been disregarded\n')
        classes = classes.astype(str)
        if args.rules_selector != 'Custom':
            genes = data_gene(dataset, type_gene, name, None)
            ids, rules = load_id_rules(recon.get(type_gene))
        elif args.rules_selector == 'Custom':
            genes = data_gene(dataset, type_gene, name, gene_in_rule)
        resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
        if err != None and err:
            warning('Warning: gene\n'+str(err)+'\nnot found in class '
                    + name + ', the expression level for this gene ' +
                    'will be considered NaN\n')
        if resolve_rules != None:
            class_pat = split_class(classes, resolve_rules)
            if generate_ras:
                create_ras(resolve_rules, name, False, rules, ids)
    
    	
    if args.rules_selector == 'Custom':
        if args.yes_no == 'yes':
            try:
                core_map = ET.parse(args.custom_map)
            except (ET.XMLSyntaxError, ET.XMLSchemaParseError):
                sys.exit('Execution aborted: custom map in wrong format')
        elif args.yes_no == 'no':
            core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg')
    else:       
        core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg')
        
    maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf)
        
    print('Execution succeded')

    return None

###############################################################################

if __name__ == "__main__":
    main()