Mercurial > repos > bimib > marea
changeset 47:3af9d394367c draft
Uploaded
author | bimib |
---|---|
date | Wed, 19 Feb 2020 10:44:52 -0500 |
parents | 5d5d01ef1d68 |
children | e4235b5231e4 |
files | Marea/marea.py Marea/marea.xml Marea/ras_generator.py Marea/ras_generator.xml |
diffstat | 4 files changed, 375 insertions(+), 735 deletions(-) [+] |
line wrap: on
line diff
--- a/Marea/marea.py Wed Jan 22 11:50:54 2020 -0500 +++ b/Marea/marea.py Wed Feb 19 10:44:52 2020 -0500 @@ -26,10 +26,6 @@ parser.add_argument('-cr', '--custom', type = str, help='your dataset if you want custom rules') - parser.add_argument('-na', '--names', - type = str, - nargs = '+', - help = 'input names') parser.add_argument('-n', '--none', type = str, default = 'true', @@ -37,7 +33,7 @@ help = 'compute Nan values') parser.add_argument('-pv' ,'--pValue', type = float, - default = 0.05, + default = 0.1, help = 'P-Value threshold (default: %(default)s)') parser.add_argument('-fc', '--fChange', type = float, @@ -49,14 +45,10 @@ help = 'your tool directory') parser.add_argument('-op', '--option', type = str, - choices = ['datasets', 'dataset_class', 'datasets_rasonly'], + choices = ['datasets', 'dataset_class'], help='dataset or dataset and class') parser.add_argument('-ol', '--out_log', help = "Output log") - parser.add_argument('-ids', '--input_datas', - type = str, - nargs = '+', - help = 'input datasets') parser.add_argument('-id', '--input_data', type = str, help = 'input dataset') @@ -80,15 +72,21 @@ default = 'true', choices = ['true', 'false'], help = 'generate pdf map') - parser.add_argument('-gr', '--generate_ras', + parser.add_argument('-on', '--control', + type = str) + parser.add_argument('-co', '--comparison', + type = str, + default = '1vs1', + choices = ['manyvsmany', 'onevsrest', 'onevsmany']) + parser.add_argument('-ids', '--input_datas', type = str, - default = 'true', - choices = ['true', 'false'], - help = 'generate reaction activity score') - parser.add_argument('-sr', '--single_ras_file', - type = str, - help = 'file that will contain ras') - + nargs = '+', + help = 'input datasets') + parser.add_argument('-na', '--names', + type = str, + nargs = '+', + help = 'input names') + args = parser.parse_args() return args @@ -615,7 +613,6 @@ def resolve(genes, rules, ids, resolve_none, name): resolve_rules = {} - names_array = [] not_found = [] flag = False for key, value in genes.items(): @@ -663,79 +660,157 @@ ', the class has been disregarded\n') return class_pat -############################ create_ras ####################################### - -def create_ras (resolve_rules, dataset_name, single_ras, rules, ids): - - if resolve_rules == None: - warning("Couldn't generate RAS for current dataset: " + dataset_name) - - for geni in resolve_rules.values(): - for i, valori in enumerate(geni): - if valori == None: - geni[i] = 'None' - - output_ras = pd.DataFrame.from_dict(resolve_rules) - - output_ras.insert(0, 'Reactions', ids) - output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False) - - if (single_ras): - args = process_args(sys.argv) - text_file = open(args.single_ras_file, "w") - else: - text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w") - - text_file.write(output_to_csv) - text_file.close() - ############################ map ############################################## -def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf): +def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf, comparison, control): args = process_args(sys.argv) if (not class_pat) or (len(class_pat.keys()) < 2): sys.exit('Execution aborted: classes provided for comparisons are ' + 'less than two\n') - for i, j in it.combinations(class_pat.keys(), 2): - tmp = {} - count = 0 - max_F_C = 0 - for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): - try: - stat_D, p_value = st.ks_2samp(l1, l2) - avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) - if not isinstance(avg, str): - if max_F_C < abs(avg): - max_F_C = abs(avg) - tmp[ids[count]] = [float(p_value), avg] - count += 1 - except (TypeError, ZeroDivisionError): - count += 1 - tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' - tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") - tmp_csv = tmp_csv.reset_index() - header = ['ids', 'P_Value', 'Log2(fold change)'] - tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) + + if comparison == "manyvsmany": + for i, j in it.combinations(class_pat.keys(), 2): + + tmp = {} + count = 0 + max_F_C = 0 + for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): + try: + stat_D, p_value = st.ks_2samp(l1, l2) + #sum(l1) da errore secondo me perchè ha null + avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) + if not isinstance(avg, str): + if max_F_C < abs(avg): + max_F_C = abs(avg) + tmp[ids[count]] = [float(p_value), avg] + count += 1 + except (TypeError, ZeroDivisionError): + count += 1 + tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' + tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") + tmp_csv = tmp_csv.reset_index() + header = ['ids', 'P_Value', 'Log2(fold change)'] + tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) + + if create_svg or create_pdf: + if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' + and args.yes_no == 'yes'): + fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) + file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' + with open(file_svg, 'wb') as new_map: + new_map.write(ET.tostring(core_map)) + + + if create_pdf: + file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' + renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) + + if not create_svg: + #Ho utilizzato il file svg per generare il pdf, + #ma l'utente non ne ha richiesto il ritorno, quindi + #lo elimino + + os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') + elif comparison == "onevsrest": + for single_cluster in class_pat.keys(): + t = [] + for k in class_pat.keys(): + if k != single_cluster: + t.append(class_pat.get(k)) + rest = [] + for i in t: + rest = rest + i + + tmp = {} + count = 0 + max_F_C = 0 + + for l1, l2 in zip(rest, class_pat.get(single_cluster)): + try: + stat_D, p_value = st.ks_2samp(l1, l2) + avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) + if not isinstance(avg, str): + if max_F_C < abs(avg): + max_F_C = abs(avg) + tmp[ids[count]] = [float(p_value), avg] + count += 1 + except (TypeError, ZeroDivisionError): + count += 1 + tab = 'result/' + single_cluster + '_vs_rest (Tabular Result).tsv' + tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") + tmp_csv = tmp_csv.reset_index() + header = ['ids', 'P_Value', 'Log2(fold change)'] + tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) + + if create_svg or create_pdf: + if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' + and args.yes_no == 'yes'): + fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) + file_svg = 'result/' + single_cluster + '_vs_ rest (SVG Map).svg' + with open(file_svg, 'wb') as new_map: + new_map.write(ET.tostring(core_map)) + + + if create_pdf: + file_pdf = 'result/' + single_cluster + '_vs_ rest (PDF Map).pdf' + renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) + + if not create_svg: + os.remove('result/' + single_cluster + '_vs_ rest (SVG Map).svg') + + elif comparison == "onevsmany": + for i, j in it.combinations(class_pat.keys(), 2): + + if i != control and j != control: + print(str(control) + " " + str(i) + " " + str(j)) + #Se è un confronto fra elementi diversi dal controllo, skippo + continue + + print('vado') + tmp = {} + count = 0 + max_F_C = 0 + for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): + try: + stat_D, p_value = st.ks_2samp(l1, l2) + #sum(l1) da errore secondo me perchè ha null + avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) + if not isinstance(avg, str): + if max_F_C < abs(avg): + max_F_C = abs(avg) + tmp[ids[count]] = [float(p_value), avg] + count += 1 + except (TypeError, ZeroDivisionError): + count += 1 + tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' + tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") + tmp_csv = tmp_csv.reset_index() + header = ['ids', 'P_Value', 'Log2(fold change)'] + tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) + + if create_svg or create_pdf: + if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' + and args.yes_no == 'yes'): + fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) + file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' + with open(file_svg, 'wb') as new_map: + new_map.write(ET.tostring(core_map)) + + + if create_pdf: + file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' + renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) + + if not create_svg: + #Ho utilizzato il file svg per generare il pdf, + #ma l'utente non ne ha richiesto il ritorno, quindi + #lo elimino + + os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') - if create_svg or create_pdf: - if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' - and args.yes_no == 'yes'): - fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) - file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' - with open(file_svg, 'wb') as new_map: - new_map.write(ET.tostring(core_map)) - - - if create_pdf: - file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' - renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) - - if not create_svg: - #Ho utilizzato il file svg per generare il pdf, - #ma l'utente non ne ha richiesto il ritorno, quindi - #lo elimino - os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') - + + + return None ############################ MAIN ############################################# @@ -745,12 +820,9 @@ create_svg = check_bool(args.generate_svg) create_pdf = check_bool(args.generate_pdf) - generate_ras = check_bool(args.generate_ras) - os.makedirs('result') - - if generate_ras: - os.makedirs('ras') + if os.path.isdir('result') == False: + os.makedirs('result') if args.rules_selector == 'HMRcore': recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) @@ -758,93 +830,60 @@ recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) elif args.rules_selector == 'Custom': ids, rules, gene_in_rule = make_recon(args.custom) - - resolve_none = check_bool(args.none) - + class_pat = {} - if args.option == 'datasets_rasonly': - name = "RAS Dataset" - dataset = read_dataset(args.input_datas[0],"dataset") - - dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - - type_gene = gene_type(dataset.iloc[0, 0], name) - - if args.rules_selector != 'Custom': - genes = data_gene(dataset, type_gene, name, None) - ids, rules = load_id_rules(recon.get(type_gene)) - elif args.rules_selector == 'Custom': - genes = data_gene(dataset, type_gene, name, gene_in_rule) - - resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) - - create_ras(resolve_rules, name, True, rules, ids) - - if err != None and err: - warning('Warning: gene\n' + str(err) + '\nnot found in class ' - + name + ', the expression level for this gene ' + - 'will be considered NaN\n') - - print('execution succeded') - return None - - - elif args.option == 'datasets': + if args.option == 'datasets': num = 1 for i, j in zip(args.input_datas, args.names): - name = name_dataset(j, num) - dataset = read_dataset(i, name) - - dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - - type_gene = gene_type(dataset.iloc[0, 0], name) + resolve_rules = read_dataset(i, name) + + resolve_rules.iloc[:, 0] = (resolve_rules.iloc[:, 0]).astype(str) - if args.rules_selector != 'Custom': - genes = data_gene(dataset, type_gene, name, None) - ids, rules = load_id_rules(recon.get(type_gene)) - elif args.rules_selector == 'Custom': - genes = data_gene(dataset, type_gene, name, gene_in_rule) - - - resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) + ids = pd.Series.tolist(resolve_rules.iloc[:, 0]) - if generate_ras: - create_ras(resolve_rules, name, False, rules, ids) + resolve_rules = resolve_rules.drop(resolve_rules.columns[[0]], axis=1) + resolve_rules = resolve_rules.replace({'None': None}) + resolve_rules = resolve_rules.to_dict('list') - if err != None and err: - warning('Warning: gene\n' + str(err) + '\nnot found in class ' - + name + ', the expression level for this gene ' + - 'will be considered NaN\n') + #Converto i valori da str a float + to_float = lambda x: float(x) if (x != None) else None + + resolve_rules_float = {} + + for k in resolve_rules: + resolve_rules_float[k] = list(map(to_float, resolve_rules[k])); resolve_rules_float + if resolve_rules != None: - class_pat[name] = list(map(list, zip(*resolve_rules.values()))) + class_pat[name] = list(map(list, zip(*resolve_rules_float.values()))) + num += 1 - elif args.option == 'dataset_class': - name = 'RNAseq' - dataset = read_dataset(args.input_data, name) - dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - type_gene = gene_type(dataset.iloc[0, 0], name) + + if args.option == 'dataset_class': + name = 'RAS' + resolve_rules = read_dataset(args.input_data, name) + resolve_rules.iloc[:, 0] = (resolve_rules.iloc[:, 0]).astype(str) + + ids = pd.Series.tolist(resolve_rules.iloc[:, 0]) + + resolve_rules = resolve_rules.drop(resolve_rules.columns[[0]], axis=1) + resolve_rules = resolve_rules.replace({'None': None}) + resolve_rules = resolve_rules.to_dict('list') + + #Converto i valori da str a float + to_float = lambda x: float(x) if (x != None) else None + + resolve_rules_float = {} + + for k in resolve_rules: + resolve_rules_float[k] = list(map(to_float, resolve_rules[k])); resolve_rules_float + classes = read_dataset(args.input_class, 'class') - if not len(classes.columns) == 2: - warning('Warning: more than 2 columns in class file. Extra' + - 'columns have been disregarded\n') classes = classes.astype(str) - if args.rules_selector != 'Custom': - genes = data_gene(dataset, type_gene, name, None) - ids, rules = load_id_rules(recon.get(type_gene)) - elif args.rules_selector == 'Custom': - genes = data_gene(dataset, type_gene, name, gene_in_rule) - resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) - if err != None and err: - warning('Warning: gene\n'+str(err)+'\nnot found in class ' - + name + ', the expression level for this gene ' + - 'will be considered NaN\n') - if resolve_rules != None: - class_pat = split_class(classes, resolve_rules) - if generate_ras: - create_ras(resolve_rules, name, False, rules, ids) - + + if resolve_rules_float != None: + class_pat = split_class(classes, resolve_rules_float) if args.rules_selector == 'Custom': if args.yes_no == 'yes': @@ -857,11 +896,12 @@ else: core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') - maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf) + maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf, args.comparison, args.control) print('Execution succeded') + + return None - return None ###############################################################################
--- a/Marea/marea.xml Wed Jan 22 11:50:54 2020 -0500 +++ b/Marea/marea.xml Wed Feb 19 10:44:52 2020 -0500 @@ -1,28 +1,19 @@ -<tool id="MaREA" name="Metabolic Reaction Enrichment Analysis" version="1.0.6"> - <description></description> - <macros> - <import>marea_macros.xml</import> - </macros> - <requirements> - <requirement type="package" version="0.23.0">pandas</requirement> - <requirement type="package" version="1.1.0">scipy</requirement> - <requirement type="package" version="0.10.1">cobra</requirement> - <requirement type="package" version="4.2.1">lxml</requirement> - <requirement type="package" version="0.8.1">svglib</requirement> - <requirement type="package" version="3.4.0">reportlab</requirement> - </requirements> - <command detect_errors="exit_code"> - <![CDATA[ +<tool id="MaREA" name="Metabolic Reaction Enrichment Analysis" version="1.0.7"> + <macros> + <import>marea_macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="0.23.0">pandas</requirement> + <requirement type="package" version="1.1.0">scipy</requirement> + <requirement type="package" version="0.10.1">cobra</requirement> + <requirement type="package" version="4.2.1">lxml</requirement> + <requirement type="package" version="0.8.1">svglib</requirement> + <requirement type="package" version="3.4.0">reportlab</requirement> + </requirements> + <command detect_errors="exit_code"> + <![CDATA[ python $__tool_directory__/marea.py - --rules_selector $cond_rule.rules_selector - #if $cond_rule.rules_selector == 'Custom': - --custom ${cond_rule.Custom_rules} - --yes_no ${cond_rule.cond_map.yes_no} - #if $cond_rule.cond_map.yes_no == 'yes': - --custom_map $cond_rule.cond_map.Custom_map - #end if - #end if - + --tool_dir $__tool_directory__ --option $cond.type_selector --out_log $log @@ -36,153 +27,130 @@ #for $data in $cond.input_Datasets: ${data.input_name} #end for + --comparison ${cond.comparis.comparison} #if $cond.advanced.choice == 'true': - --none ${cond.advanced.None} - --pValue ${cond.advanced.pValue} - --fChange ${cond.advanced.fChange} - --generate_svg ${cond.advanced.generateSvg} - --generate_pdf ${cond.advanced.generatePdf} - --generate_ras ${cond.advanced.generateRas} - #else - --none true - --pValue 0.05 - --fChange 1.5 - --generate_svg false - --generate_pdf true - --generate_ras false - #end if + --pValue ${cond.advanced.pValue} + --fChange ${cond.advanced.fChange} + --generate_svg ${cond.advanced.generateSvg} + --generate_pdf ${cond.advanced.generatePdf} + #else + --pValue 0.05 + --fChange 1.5 + --generate_svg false + --generate_pdf true + --generate_ras false + #end if #elif $cond.type_selector == 'dataset_class': --input_data ${input_data} --input_class ${input_class} + --comparison ${cond.comparis.comparison} + #if $cond.comparis.comparison == 'onevsmany' + --control ${cond.comparis.controlgroup} + #end if #if $cond.advanced.choice == 'true': - --none ${cond.advanced.None} - --pValue ${cond.advanced.pValue} - --fChange ${cond.advanced.fChange} - --generate_svg ${cond.advanced.generateSvg} - --generate_pdf ${cond.advanced.generatePdf} - --generate_ras ${cond.advanced.generateRas} - #else - --none true - --pValue 0.05 - --fChange 1.5 - --generate_svg false - --generate_pdf true - --generate_ras false - #end if - #end if - #if $cond.type_selector == 'datasets_rasonly': - --input_datas ${input_Datasets} - --single_ras_file $ras_single - --none ${cond.None} + --pValue ${cond.advanced.pValue} + --fChange ${cond.advanced.fChange} + --generate_svg ${cond.advanced.generateSvg} + --generate_pdf ${cond.advanced.generatePdf} + #else + --pValue 0.05 + --fChange 1.5 + --generate_svg false + --generate_pdf true + #end if #end if ]]> - </command> - - <inputs> - <conditional name="cond_rule"> - <expand macro="options"/> - <when value="HMRcore"> - </when> - <when value="Recon"> - </when> - <when value="Custom"> - <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" /> - <conditional name="cond_map"> - <param name="yes_no" type="select" label="Custom map? (optional)"> - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="yes"> - <param name="Custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/> - </when> - <when value="no"> - </when> - </conditional> - </when> - </conditional> - <conditional name="cond"> - <param name="type_selector" argument="--option" type="select" label="Input format:"> - <option value="datasets" selected="true">RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N</option> - <option value="dataset_class">RNAseq of all samples + sample group specification</option> - <option value="datasets_rasonly" selected="true">RNAseq dataset</option> - </param> - <when value="datasets"> - <repeat name="input_Datasets" title="RNAseq" min="2"> - <param name="input" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" /> - <param name="input_name" argument="--names" type="text" label="Dataset's name:" value="Dataset" help="Default: Dataset" /> - </repeat> - <conditional name="advanced"> + </command> + <inputs> + <conditional name="cond"> + <param name="type_selector" argument="--option" type="select" label="Input format:"> + <option value="datasets" selected="true">RAS of group 1 + RAS of group 2 + ... + RAS of group N</option> + <option value="dataset_class">RAS of all samples + sample group specification</option> + </param> + <when value="datasets"> + <repeat name="input_Datasets" title="RAS dataset" min="2"> + <param name="input" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" /> + <param name="input_name" argument="--names" type="text" label="Dataset's name:" value="Dataset" help="Default: Dataset" /> + </repeat> + <conditional name="comparis"> + <param name="comparison" argument="--comparison" type="select" label="Groups comparison:"> + <option value="manyvsmany" selected="true">One vs One</option> + <option value="onevsrest">One vs All</option> + <option value="onevsmany">One vs Control</option> + </param> + <when value="onevsmany"> + <param name="controlgroup" argument="--controlgroup" type="text" label="Control group label:" value="0" help="Name of group label to be compared to others"/> + </when> + </conditional> + <conditional name="advanced"> <param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom parameters for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps."> - <option value="true" selected="true">No</option> - <option value="false">Yes</option> + <option value="true" selected="true">No</option> + <option value="false">Yes</option> </param> - <when value="false"> - </when> + <when value="false"></when> <when value="true"> - <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> - <param name="pValue" argument="--pValue" type="float" size="20" value="0.01" max="1" min="0" label="P-value threshold:" help="min value 0" /> - <param name="fChange" argument="--fChange" type="float" size="20" value="1.2" min="1" label="Fold-Change threshold:" help="min value 1" /> - <param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" /> - <param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" /> - <param name="generateRas" argument="--generateRas" type="boolean" checked="false" label="Generate Reaction Activity Score for each table" help="Generate Reaction Activity Score for each table" /> + <conditional name="cond_map"> + <param name="choice" type="boolean" checked="false" label="Use custom map?" help="Use this option only if you have generated RAS using a custom set of rules"> + <option value="false" selected="true">No</option> + <option value="true">Yes</option> + </param> + <when value="true"> + <param name="Custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/> + </when> + </conditional> + <param name="pValue" argument="--pValue" type="float" size="20" value="0.01" max="1" min="0" label="P-value threshold:" help="min value 0" /> + <param name="fChange" argument="--fChange" type="float" size="20" value="1.2" min="1" label="Fold-Change threshold:" help="min value 1" /> + <param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" /> + <param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" /> </when> - </conditional> - </when> - <when value="datasets_rasonly"> - <param name="input_Datasets" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" /> - <param name="input_name" argument="--names" type="text" label="Dataset's name:" value="Dataset" help="Default: Dataset" /> - <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> - </when> - <when value="dataset_class"> - <param name="input_data" argument="--input_data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" /> + </conditional> + </when> + <when value="dataset_class"> + <param name="input_data" argument="--input_data" type="data" format="tabular, csv, tsv" label="RAS of all samples" /> <param name="input_class" argument="--input_class" type="data" format="tabular, csv, tsv" label="Sample group specification" /> - <conditional name="advanced"> - <param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom parameters for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps."> - <option value="true" selected="true">No</option> - <option value="false">Yes</option> + <conditional name="comparis"> + <param name="comparison" argument="--comparison" type="select" label="Groups comparison:"> + <option value="manyvsmany" selected="true">One vs One</option> + <option value="onevsrest">One vs All</option> + <option value="onevsmany">One vs Control</option> </param> - <when value="false"> - </when> - <when value="true"> - <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> - <param name="pValue" argument="--pValue" type="float" size="20" value="0.01" max="1" min="0" label="P-value threshold:" help="min value 0" /> - <param name="fChange" argument="--fChange" type="float" size="20" value="1.2" min="1" label="Fold-Change threshold:" help="min value 1" /> - <param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" /> - <param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" /> - <param name="generateRas" argument="--generateRas" type="boolean" checked="false" label="Generate Reaction Activity Score for each table" help="Generate Reaction Activity Score for each table" /> + <when value="onevsmany"> + <param name="controlgroup" argument="--controlgroup" type="text" label="Control group label:" value="0" help="Name of group label to be compared to others"/> </when> - </conditional> - </when> - </conditional> - - - - - </inputs> - - <outputs> - <data format="txt" name="log" label="MaREA - Log" /> - <data format="tabular" name="ras_single" label="MaREA - RAS - ${cond.input_name}"> - <filter>cond['type_selector'] == "datasets_rasonly"</filter> - </data> - <collection name="results" type="list" label="MaREA - Results"> - <filter>cond['type_selector'] == "datasets" or cond['type_selector'] == "dataset_class"</filter> - <discover_datasets pattern="__name_and_ext__" directory="result"/> - </collection> - <collection name="ras" type="list" label="MaREA - RAS list" format_source="tabular"> - <filter>cond['type_selector'] != "datasets_rasonly" and cond['advanced']['choice'] and cond['advanced']['generateRas']</filter> - <discover_datasets pattern="__name_and_ext__" directory="ras" format="tabular"/> + </conditional> + <conditional name="advanced"> + <param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom parameters for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps."> + <option value="true" selected="true">No</option> + <option value="false">Yes</option> + </param> + <when value="false"></when> + <when value="true"> + <conditional name="cond_map"> + <param name="choice" type="boolean" checked="false" label="Use custom map?" help="Use this option only if you have generated RAS using a custom set of rules"> + <option value="false" selected="true">No</option> + <option value="true">Yes</option> + </param> + <when value="true"> + <param name="Custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/> + </when> + </conditional> + <param name="pValue" argument="--pValue" type="float" size="20" value="0.01" max="1" min="0" label="P-value threshold:" help="min value 0" /> + <param name="fChange" argument="--fChange" type="float" size="20" value="1.2" min="1" label="Fold-Change threshold:" help="min value 1" /> + <param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" /> + <param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" /> + </when> + </conditional> + </when> + </conditional> +</inputs> +<outputs> + <data format="txt" name="log" label="MaREA - Log" /> + <collection name="results" type="list" label="MaREA - Results"> + <discover_datasets pattern="__name_and_ext__" directory="result"/> </collection> - - </outputs> - <tests> - <test> - <param name="pValue" value="0.56"/> - <output name="log" file="log.txt"/> - </test> - </tests> - <help> -<![CDATA[ +</outputs> +<help> + <![CDATA[ What it does ------------- @@ -268,7 +236,6 @@ .. _MaREA cluster analysis: http://link del tool di cluster.org ]]> - </help> - <expand macro="citations" /> -</tool> +</help> +<expand macro="citations" />undefined</tool>
--- a/Marea/ras_generator.py Wed Jan 22 11:50:54 2020 -0500 +++ b/Marea/ras_generator.py Wed Feb 19 10:44:52 2020 -0500 @@ -1,16 +1,10 @@ from __future__ import division import sys import pandas as pd -import itertools as it -import scipy.stats as st import collections -import lxml.etree as ET import pickle as pk import math -import os import argparse -from svglib.svglib import svg2rlg -from reportlab.graphics import renderPDF ########################## argparse ########################################## @@ -26,69 +20,25 @@ parser.add_argument('-cr', '--custom', type = str, help='your dataset if you want custom rules') - parser.add_argument('-na', '--names', - type = str, - nargs = '+', - help = 'input names') parser.add_argument('-n', '--none', type = str, default = 'true', choices = ['true', 'false'], help = 'compute Nan values') - parser.add_argument('-pv' ,'--pValue', - type = float, - default = 0.05, - help = 'P-Value threshold (default: %(default)s)') - parser.add_argument('-fc', '--fChange', - type = float, - default = 1.5, - help = 'Fold-Change threshold (default: %(default)s)') parser.add_argument('-td', '--tool_dir', type = str, required = True, help = 'your tool directory') - parser.add_argument('-op', '--option', - type = str, - choices = ['datasets', 'dataset_class', 'datasets_rasonly'], - help='dataset or dataset and class') parser.add_argument('-ol', '--out_log', help = "Output log") - parser.add_argument('-ids', '--input_datas', - type = str, - nargs = '+', - help = 'input datasets') - parser.add_argument('-id', '--input_data', + parser.add_argument('-id', '--input', type = str, help = 'input dataset') - parser.add_argument('-ic', '--input_class', - type = str, - help = 'sample group specification') - parser.add_argument('-cm', '--custom_map', - type = str, - help = 'custom map') - parser.add_argument('-yn', '--yes_no', - type = str, - choices = ['yes', 'no'], - help = 'if make or not custom map') - parser.add_argument('-gs', '--generate_svg', + parser.add_argument('-ra', '--ras_output', type = str, - default = 'true', - choices = ['true', 'false'], - help = 'generate svg map') - parser.add_argument('-gp', '--generate_pdf', - type = str, - default = 'true', - choices = ['true', 'false'], - help = 'generate pdf map') - parser.add_argument('-gr', '--generate_ras', - type = str, - default = 'true', - choices = ['true', 'false'], - help = 'generate reaction activity score') - parser.add_argument('-sr', '--single_ras_file', - type = str, - help = 'file that will contain ras') - + required = True, + help = 'ras output') + args = parser.parse_args() return args @@ -297,79 +247,6 @@ return False return ris -############################ map_methods ###################################### - -def fold_change(avg1, avg2): - if avg1 == 0 and avg2 == 0: - return 0 - elif avg1 == 0: - return '-INF' - elif avg2 == 0: - return 'INF' - else: - return math.log(avg1 / avg2, 2) - -def fix_style(l, col, width, dash): - tmp = l.split(';') - flag_col = False - flag_width = False - flag_dash = False - for i in range(len(tmp)): - if tmp[i].startswith('stroke:'): - tmp[i] = 'stroke:' + col - flag_col = True - if tmp[i].startswith('stroke-width:'): - tmp[i] = 'stroke-width:' + width - flag_width = True - if tmp[i].startswith('stroke-dasharray:'): - tmp[i] = 'stroke-dasharray:' + dash - flag_dash = True - if not flag_col: - tmp.append('stroke:' + col) - if not flag_width: - tmp.append('stroke-width:' + width) - if not flag_dash: - tmp.append('stroke-dasharray:' + dash) - return ';'.join(tmp) - -def fix_map(d, core_map, threshold_P_V, threshold_F_C, max_F_C): - maxT = 12 - minT = 2 - grey = '#BEBEBE' - blue = '#0000FF' - red = '#E41A1C' - for el in core_map.iter(): - el_id = str(el.get('id')) - if el_id.startswith('R_'): - tmp = d.get(el_id[2:]) - if tmp != None: - p_val = tmp[0] - f_c = tmp[1] - if p_val < threshold_P_V: - if not isinstance(f_c, str): - if abs(f_c) < math.log(threshold_F_C, 2): - col = grey - width = str(minT) - else: - if f_c < 0: - col = blue - elif f_c > 0: - col = red - width = str(max((abs(f_c) * maxT) / max_F_C, minT)) - else: - if f_c == '-INF': - col = blue - elif f_c == 'INF': - col = red - width = str(maxT) - dash = 'none' - else: - dash = '5,5' - col = grey - width = str(minT) - el.set('style', fix_style(el.get('style'), col, width, dash)) - return core_map - ############################ make recon ####################################### def check_and_doWord(l): @@ -615,7 +492,6 @@ def resolve(genes, rules, ids, resolve_none, name): resolve_rules = {} - names_array = [] not_found = [] flag = False for key, value in genes.items(): @@ -652,7 +528,6 @@ for j in range(i, len(classes)): if classes.iloc[j, 1] == classe: pat_id = classes.iloc[j, 0] - tmp = resolve_rules.get(pat_id, None) if tmp != None: l.append(tmp) classes.iloc[j, 1] = None @@ -665,7 +540,7 @@ ############################ create_ras ####################################### -def create_ras (resolve_rules, dataset_name, single_ras, rules, ids): +def create_ras (resolve_rules, dataset_name, rules, ids, file): if resolve_rules == None: warning("Couldn't generate RAS for current dataset: " + dataset_name) @@ -680,78 +555,16 @@ output_ras.insert(0, 'Reactions', ids) output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False) - if (single_ras): - args = process_args(sys.argv) - text_file = open(args.single_ras_file, "w") - else: - text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w") + text_file = open(file, "w") text_file.write(output_to_csv) text_file.close() -############################ map ############################################## - -def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf): - args = process_args(sys.argv) - if (not class_pat) or (len(class_pat.keys()) < 2): - sys.exit('Execution aborted: classes provided for comparisons are ' + - 'less than two\n') - for i, j in it.combinations(class_pat.keys(), 2): - tmp = {} - count = 0 - max_F_C = 0 - for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): - try: - stat_D, p_value = st.ks_2samp(l1, l2) - avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) - if not isinstance(avg, str): - if max_F_C < abs(avg): - max_F_C = abs(avg) - tmp[ids[count]] = [float(p_value), avg] - count += 1 - except (TypeError, ZeroDivisionError): - count += 1 - tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' - tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") - tmp_csv = tmp_csv.reset_index() - header = ['ids', 'P_Value', 'Log2(fold change)'] - tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) - - if create_svg or create_pdf: - if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' - and args.yes_no == 'yes'): - fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) - file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' - with open(file_svg, 'wb') as new_map: - new_map.write(ET.tostring(core_map)) - - - if create_pdf: - file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' - renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) - - if not create_svg: - #Ho utilizzato il file svg per generare il pdf, - #ma l'utente non ne ha richiesto il ritorno, quindi - #lo elimino - os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') - - return None - ############################ MAIN ############################################# def main(): args = process_args(sys.argv) - - create_svg = check_bool(args.generate_svg) - create_pdf = check_bool(args.generate_pdf) - generate_ras = check_bool(args.generate_ras) - os.makedirs('result') - - if generate_ras: - os.makedirs('ras') - if args.rules_selector == 'HMRcore': recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) elif args.rules_selector == 'Recon': @@ -761,104 +574,30 @@ resolve_none = check_bool(args.none) - class_pat = {} - if args.option == 'datasets_rasonly': - name = "RAS Dataset" - dataset = read_dataset(args.input_datas[0],"dataset") - - dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - - type_gene = gene_type(dataset.iloc[0, 0], name) - - if args.rules_selector != 'Custom': - genes = data_gene(dataset, type_gene, name, None) - ids, rules = load_id_rules(recon.get(type_gene)) - elif args.rules_selector == 'Custom': - genes = data_gene(dataset, type_gene, name, gene_in_rule) - - resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) + name = "RAS Dataset" + dataset = read_dataset(args.input, "dataset") - create_ras(resolve_rules, name, True, rules, ids) - - if err != None and err: - warning('Warning: gene\n' + str(err) + '\nnot found in class ' - + name + ', the expression level for this gene ' + - 'will be considered NaN\n') - - print('execution succeded') - return None - - - elif args.option == 'datasets': - num = 1 - for i, j in zip(args.input_datas, args.names): - - name = name_dataset(j, num) - dataset = read_dataset(i, name) - - dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - - type_gene = gene_type(dataset.iloc[0, 0], name) - - if args.rules_selector != 'Custom': - genes = data_gene(dataset, type_gene, name, None) - ids, rules = load_id_rules(recon.get(type_gene)) - elif args.rules_selector == 'Custom': - genes = data_gene(dataset, type_gene, name, gene_in_rule) - - - resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) + dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - if generate_ras: - create_ras(resolve_rules, name, False, rules, ids) - - if err != None and err: - warning('Warning: gene\n' + str(err) + '\nnot found in class ' - + name + ', the expression level for this gene ' + - 'will be considered NaN\n') - if resolve_rules != None: - class_pat[name] = list(map(list, zip(*resolve_rules.values()))) - num += 1 - elif args.option == 'dataset_class': - name = 'RNAseq' - dataset = read_dataset(args.input_data, name) - dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - type_gene = gene_type(dataset.iloc[0, 0], name) - classes = read_dataset(args.input_class, 'class') - if not len(classes.columns) == 2: - warning('Warning: more than 2 columns in class file. Extra' + - 'columns have been disregarded\n') - classes = classes.astype(str) - if args.rules_selector != 'Custom': - genes = data_gene(dataset, type_gene, name, None) - ids, rules = load_id_rules(recon.get(type_gene)) - elif args.rules_selector == 'Custom': - genes = data_gene(dataset, type_gene, name, gene_in_rule) - resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) - if err != None and err: - warning('Warning: gene\n'+str(err)+'\nnot found in class ' - + name + ', the expression level for this gene ' + - 'will be considered NaN\n') - if resolve_rules != None: - class_pat = split_class(classes, resolve_rules) - if generate_ras: - create_ras(resolve_rules, name, False, rules, ids) + type_gene = gene_type(dataset.iloc[0, 0], name) + + if args.rules_selector != 'Custom': + genes = data_gene(dataset, type_gene, name, None) + ids, rules = load_id_rules(recon.get(type_gene)) + elif args.rules_selector == 'Custom': + genes = data_gene(dataset, type_gene, name, gene_in_rule) - - if args.rules_selector == 'Custom': - if args.yes_no == 'yes': - try: - core_map = ET.parse(args.custom_map) - except (ET.XMLSyntaxError, ET.XMLSchemaParseError): - sys.exit('Execution aborted: custom map in wrong format') - elif args.yes_no == 'no': - core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg') - else: - core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') - - maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf) - + resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) + + create_ras(resolve_rules, name, rules, ids, args.ras_output) + + if err != None and err: + warning('Warning: gene\n' + str(err) + '\nnot found in class ' + + name + ', the expression level for this gene ' + + 'will be considered NaN\n') + + print('Execution succeded') return None
--- a/Marea/ras_generator.xml Wed Jan 22 11:50:54 2020 -0500 +++ b/Marea/ras_generator.xml Wed Feb 19 10:44:52 2020 -0500 @@ -1,5 +1,5 @@ -<tool id="MaREA RAS Generator" name="RAS Generator" version="1.0.0"> - <description></description> +<tool id="MaREA RAS Generator" name="Expression2RAS" version="1.0.2"> + <description>- Reaction Activity Scores computation</description> <macros> <import>marea_macros.xml</import> </macros> @@ -13,18 +13,15 @@ </requirements> <command detect_errors="exit_code"> <![CDATA[ - python $__tool_directory__/marea.py - --rules_selector $cond_rule.rules_selector - --tool_dir $__tool_directory__ - --option $cond.type_selector - --out_log $log - --input_datas ${input_Datasets} - --single_ras_file $ras_single - --none ${cond.None} - #end if + python $__tool_directory__/ras_generator.py + --rules_selector $cond_rule.rules_selector + --input $input + --none $none + --tool_dir $__tool_directory__ + --out_log $log + --ras_output $ras_output ]]> </command> - <inputs> <conditional name="cond_rule"> <expand macro="options"/> @@ -47,124 +44,21 @@ </conditional> </when> </conditional> - <conditional name="cond"> - - <param name="input_Datasets" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" /> - <param name="input_name" argument="--names" type="text" label="Dataset's name:" value="Dataset" help="Default: Dataset" /> - <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> - - - </when> - </conditional> + <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Gene Expression dataset:" /> + <param name="name" argument="--name" type="text" label="Dataset's name:" value="Dataset" help="Default: Dataset" /> + <param name="none" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> </inputs> <outputs> - <data format="txt" name="log" label="MaREA - Log" /> - <data format="tabular" name="ras_single" label="MaREA - RAS - ${cond.input_name}"> - <filter>cond['type_selector'] == "datasets_rasonly"</filter> - </data> - <collection name="results" type="list" label="MaREA - Results"> - <filter>cond['type_selector'] == "datasets" or cond['type_selector'] == "dataset_class"</filter> - <discover_datasets pattern="__name_and_ext__" directory="result"/> - </collection> - <collection name="ras" type="list" label="MaREA - RAS list" format_source="tabular"> - <filter>cond['type_selector'] != "datasets_rasonly" and cond['advanced']['choice'] and cond['advanced']['generateRas']</filter> - <discover_datasets pattern="__name_and_ext__" directory="ras" format="tabular"/> - </collection> - + <data format="txt" name="log" label="Expression2RAS - $name - Log" /> + <data format="tabular" name="ras_output" label="$name RAS"/> </outputs> - <tests> - <test> - <param name="pValue" value="0.56"/> - <output name="log" file="log.txt"/> - </test> - </tests> + <help> <![CDATA[ What it does ------------- - -This tool analyzes RNA-seq dataset(s) as described in Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724. - -Accepted files are: - - option 1) two or more RNA-seq datasets, each referring to samples in a given condition/class. The user can specify a label for each class (as e.g. "*classA*" and "*classB*"); - - option 2) one RNA dataset and one class-file specifying the class/condition each sample belongs to. - -Optional files: - - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats: - - * (Cobra Toolbox and CobraPy compliant) xml of metabolic model; - * .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2). - - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example. - -The tool generates: - 1) a tab-separated file: reporting fold-change and p-values of reaction activity scores (RASs) between a pair of conditions/classes; - 2) a metabolic map file (downlodable as .svg): visualizing up- and down-regulated reactions between a pair of conditions/classes; - 3) a log file (.txt). - -RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID. - -Class-file format: each row of the class-file reports the sample ID (column1) and the label of the class/condition the sample belongs to (column 2). - -To calculate P-Values and Fold-Changes and to generate maps, comparisons are performed for each possible pair of classes. - -Output files will be named as classA_vs_classB. Reactions will conventionally be reported as up-regulated (down-regulated) if they are significantly more (less) active in class having label "classA". - - -Example input -------------- - -**"Custom Rules"** option: - -Custom Rules Dastaset: - -@CUSTOM_RULES_EXEMPLE@ - -**"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N"** option: - -RNA-seq Dataset 1: - -@DATASET_EXEMPLE1@ - -RNA-seq Dataset 2: - -@DATASET_EXEMPLE2@ - -**"RNAseq of all samples + sample group specification"** option: - -RNA-seq Dataset: - -@DATASET_EXEMPLE1@ - -Class-file: - -+------------+------------+ -| Patient_ID | class | -+============+============+ -| TCGAAA3529 | MSI | -+------------+------------+ -| TCGAA62671 | MSS | -+------------+------------+ -| TCGAA62672 | MSI | -+------------+------------+ - -| - -.. class:: infomark - -**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_. - -.. class:: infomark - -**TIP**: If your dataset is not split into classes, use `MaREA cluster analysis`_. - -@REFERENCE@ - -.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724 -.. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj -.. _MaREA cluster analysis: http://link del tool di cluster.org - ]]> </help> <expand macro="citations" />