Repository 'marea'
hg clone https://toolshed.g2.bx.psu.edu/repos/bimib/marea

Changeset 30:e88efefbd015 (2019-10-15)
Previous changeset 29:9fcb0e8d6d47 (2019-10-14) Next changeset 31:944e15aa970a (2019-10-15)
Commit message:
fix changes
added:
Desktop/Marea/marea.py
Desktop/Marea/marea.xml
Desktop/Marea/marea_cluster.py
Desktop/Marea/marea_cluster.xml
Desktop/Marea/marea_macros.xml
b
diff -r 9fcb0e8d6d47 -r e88efefbd015 Desktop/Marea/marea.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Desktop/Marea/marea.py Tue Oct 15 12:21:16 2019 -0400
[
b'@@ -0,0 +1,861 @@\n+from __future__ import division\n+import sys\n+import pandas as pd\n+import itertools as it\n+import scipy.stats as st\n+import collections\n+import lxml.etree as ET\n+import shutil\n+import pickle as pk\n+import math\n+import os\n+import argparse\n+from svglib.svglib import svg2rlg\n+from reportlab.graphics import renderPDF\n+\n+########################## argparse ##########################################\n+\n+def process_args(args):\n+    parser = argparse.ArgumentParser(usage = \'%(prog)s [options]\',\n+                                     description = \'process some value\\\'s\'+\n+                                     \' genes to create a comparison\\\'s map.\')\n+    parser.add_argument(\'-rs\', \'--rules_selector\', \n+                        type = str,\n+                        default = \'HMRcore\',\n+                        choices = [\'HMRcore\', \'Recon\', \'Custom\'], \n+                        help = \'chose which type of dataset you want use\')\n+    parser.add_argument(\'-cr\', \'--custom\',\n+                        type = str,\n+                        help=\'your dataset if you want custom rules\')\n+    parser.add_argument(\'-na\', \'--names\', \n+                        type = str,\n+                        nargs = \'+\', \n+                        help = \'input names\')\n+    parser.add_argument(\'-n\', \'--none\',\n+                        type = str,\n+                        default = \'true\',\n+                        choices = [\'true\', \'false\'], \n+                        help = \'compute Nan values\')\n+    parser.add_argument(\'-pv\' ,\'--pValue\', \n+                        type = float, \n+                        default = 0.05, \n+                        help = \'P-Value threshold (default: %(default)s)\')\n+    parser.add_argument(\'-fc\', \'--fChange\', \n+                        type = float, \n+                        default = 1.5, \n+                        help = \'Fold-Change threshold (default: %(default)s)\')\n+    parser.add_argument(\'-td\', \'--tool_dir\',\n+                        type = str,\n+                        required = True,\n+                        help = \'your tool directory\')\n+    parser.add_argument(\'-op\', \'--option\', \n+                        type = str, \n+                        choices = [\'datasets\', \'dataset_class\', \'datasets_rasonly\'],\n+                        help=\'dataset or dataset and class\')\n+    parser.add_argument(\'-ol\', \'--out_log\', \n+                        help = "Output log")    \n+    parser.add_argument(\'-ids\', \'--input_datas\', \n+                        type = str,\n+                        nargs = \'+\', \n+                        help = \'input datasets\')\n+    parser.add_argument(\'-id\', \'--input_data\',\n+                        type = str,\n+                        help = \'input dataset\')\n+    parser.add_argument(\'-ic\', \'--input_class\', \n+                        type = str, \n+                        help = \'sample group specification\')\n+    parser.add_argument(\'-cm\', \'--custom_map\', \n+                        type = str, \n+                        help = \'custom map\')\n+    parser.add_argument(\'-yn\', \'--yes_no\', \n+                        type = str,\n+                        choices = [\'yes\', \'no\'],\n+                        help = \'if make or not custom map\')\n+    parser.add_argument(\'-gs\', \'--generate_svg\',\n+                        type = str,\n+                        default = \'true\',\n+                        choices = [\'true\', \'false\'], \n+                        help = \'generate svg map\')\n+    parser.add_argument(\'-gp\', \'--generate_pdf\',\n+                        type = str,\n+                        default = \'true\',\n+                        choices = [\'true\', \'false\'], \n+                        help = \'generate pdf map\')\n+    parser.add_argument(\'-gr\', \'--generate_ras\',\n+                        type = str,\n+                        default = \'true\',\n+                        choices = [\'true\', \'false\'],\n+                        help = \'generate reaction activity score\')\n+    parser.add_argument(\'-sr\', \'--single_ras_file\',  \n+                  '..b'taset.iloc[0, 0], name) \n+            \n+        if args.rules_selector != \'Custom\':\n+            genes = data_gene(dataset, type_gene, name, None)\n+            ids, rules = load_id_rules(recon.get(type_gene))\n+        elif args.rules_selector == \'Custom\':\n+            genes = data_gene(dataset, type_gene, name, gene_in_rule)\n+                \n+        resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n+\n+        create_ras(resolve_rules, name, True)\n+          \n+        if err != None and err:\n+            warning(\'Warning: gene\\n\' + str(err) + \'\\nnot found in class \'\n+                + name + \', the expression level for this gene \' +\n+                \'will be considered NaN\\n\')\n+        \n+        print(\'execution succeded\')\n+        return None\n+    \n+    \n+    elif args.option == \'datasets\':\n+        num = 1\n+        for i, j in zip(args.input_datas, args.names):\n+\n+            name = name_dataset(j, num)\n+            dataset = read_dataset(i, name)\n+\n+            dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)\n+\n+            type_gene = gene_type(dataset.iloc[0, 0], name) \n+            \n+            if args.rules_selector != \'Custom\':\n+                genes = data_gene(dataset, type_gene, name, None)\n+                ids, rules = load_id_rules(recon.get(type_gene))\n+            elif args.rules_selector == \'Custom\':\n+                genes = data_gene(dataset, type_gene, name, gene_in_rule)\n+                \n+            resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n+\n+            if generate_ras:\n+                create_ras(resolve_rules, name, False)\n+            \n+            if err != None and err:\n+                warning(\'Warning: gene\\n\' + str(err) + \'\\nnot found in class \'\n+                    + name + \', the expression level for this gene \' +\n+                    \'will be considered NaN\\n\')\n+            if resolve_rules != None:\n+                class_pat[name] = list(map(list, zip(*resolve_rules.values())))\n+            num += 1\n+    elif args.option == \'dataset_class\':\n+        name = \'RNAseq\'\n+        dataset = read_dataset(args.input_data, name)\n+        dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)\n+        type_gene = gene_type(dataset.iloc[0, 0], name)\n+        classes = read_dataset(args.input_class, \'class\')\n+        if not len(classes.columns) == 2:\n+            warning(\'Warning: more than 2 columns in class file. Extra\' +\n+                    \'columns have been disregarded\\n\')\n+        classes = classes.astype(str)\n+        if args.rules_selector != \'Custom\':\n+            genes = data_gene(dataset, type_gene, name, None)\n+            ids, rules = load_id_rules(recon.get(type_gene))\n+        elif args.rules_selector == \'Custom\':\n+            genes = data_gene(dataset, type_gene, name, gene_in_rule)\n+        resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n+        if err != None and err:\n+            warning(\'Warning: gene\\n\'+str(err)+\'\\nnot found in class \'\n+                    + name + \', the expression level for this gene \' +\n+                    \'will be considered NaN\\n\')\n+        if resolve_rules != None:\n+            class_pat = split_class(classes, resolve_rules)\n+    \n+    \t\n+    if args.rules_selector == \'Custom\':\n+        if args.yes_no == \'yes\':\n+            try:\n+                core_map = ET.parse(args.custom_map)\n+            except (ET.XMLSyntaxError, ET.XMLSchemaParseError):\n+                sys.exit(\'Execution aborted: custom map in wrong format\')\n+        elif args.yes_no == \'no\':\n+            core_map = ET.parse(args.tool_dir + \'/local/HMRcoreMap.svg\')\n+    else:       \n+        core_map = ET.parse(args.tool_dir+\'/local/HMRcoreMap.svg\')\n+        \n+    maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf)\n+        \n+    print(\'Execution succeded\')\n+\n+    return None\n+\n+###############################################################################\n+\n+if __name__ == "__main__":\n+    main()\n'
b
diff -r 9fcb0e8d6d47 -r e88efefbd015 Desktop/Marea/marea.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Desktop/Marea/marea.xml Tue Oct 15 12:21:16 2019 -0400
[
b'@@ -0,0 +1,274 @@\n+<tool id="MaREA" name="Metabolic Reaction Enrichment Analysis" version="1.0.3">\n+    <description></description>\n+    <macros>\n+        <import>marea_macros.xml</import>\n+    </macros>\n+    <requirements>\n+        <requirement type="package" version="0.23.0">pandas</requirement>\n+        <requirement type="package" version="1.1.0">scipy</requirement>\n+        <requirement type="package" version="0.10.1">cobra</requirement>\n+        <requirement type="package" version="4.2.1">lxml</requirement>\n+        <requirement type="package" version="0.8.1">svglib</requirement>\n+        <requirement type="package" version="3.4.0">reportlab</requirement>\n+    </requirements>\n+    <command detect_errors="exit_code">\n+        <![CDATA[\n+      \tpython $__tool_directory__/marea.py\n+        --rules_selector $cond_rule.rules_selector\n+        #if $cond_rule.rules_selector == \'Custom\':\n+            --custom ${cond_rule.Custom_rules}\n+            --yes_no ${cond_rule.cond_map.yes_no}\n+            #if $cond_rule.cond_map.yes_no == \'yes\':\n+                --custom_map $cond_rule.cond_map.Custom_map\n+            #end if\n+        #end if\n+\t\n+      \t--tool_dir $__tool_directory__\n+      \t--option $cond.type_selector\n+        --out_log $log\t\t\n+\t\n+        #if $cond.type_selector == \'datasets\':\n+            --input_datas\n+            #for $data in $cond.input_Datasets:\n+                ${data.input}\n+            #end for\n+            --names\n+            #for $data in $cond.input_Datasets:\n+                ${data.input_name}\n+            #end for\n+            #if $cond.advanced.choice == \'true\':\n+      \t    --none ${cond.advanced.None}\n+      \t    --pValue ${cond.advanced.pValue}\n+      \t    --fChange ${cond.advanced.fChange}\n+\t    \t--generate_svg ${cond.advanced.generateSvg}\n+\t    \t--generate_pdf ${cond.advanced.generatePdf}\n+\t    --generate_ras ${cond.advanced.generateRas}\n+\t#else \n+\t    --none true\n+\t    --pValue 0.05\n+\t    --fChange 1.5\n+\t    --generate_svg false\n+\t    --generate_pdf true\n+\t    --generate_ras false\n+\t#end if\n+        #elif $cond.type_selector == \'dataset_class\':\n+            --input_data ${input_data}\n+            --input_class ${input_class}\n+            #if $cond.advanced.choice == \'true\':\n+      \t    --none ${cond.advanced.None}\n+      \t    --pValue ${cond.advanced.pValue}\n+      \t    --fChange ${cond.advanced.fChange}\n+\t    --generate_svg ${cond.advanced.generateSvg}\n+\t    --generate_pdf ${cond.advanced.generatePdf}\n+\t    --generate_ras ${cond.advanced.generateRas}\n+\t#else \n+\t    --none true\n+\t    --pValue 0.05\n+\t    --fChange 1.5\n+\t    --generate_svg false\n+\t    --generate_pdf true\n+\t    --generate_ras false\n+\t#end if\n+        #end if\n+        #if $cond.type_selector == \'datasets_rasonly\':\n+            --input_datas ${input_Datasets}\n+            --single_ras_file $ras_single\n+            --none ${cond.advanced.None}\n+        #end if\n+        ]]>\n+    </command>\n+\n+    <inputs>\n+        <conditional name="cond_rule">\n+            <expand macro="options"/>\n+            <when value="HMRcore">\n+            </when>\n+            <when value="Recon">\n+            </when>\n+            <when value="Custom">\n+                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />\n+                <conditional name="cond_map">\n+                    <param name="yes_no" type="select" label="Custom map? (optional)">\n+                        <option value="no" selected="true">no</option>\n+                        <option value="yes">yes</option>\n+                    </param>\n+                    <when value="yes">\n+                        <param name="Custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/>\n+                    </when>\n+                    <when value="no">\n+                    </when>\n+                </conditional>\n+            </when>\n+        </conditional>\n+        <conditional name="cond">\n+            <param name="type_selector" '..b'ctor\'] == "datasets_rasonly"</filter>\n+        </data>\n+        <collection name="results" type="list" label="MaREA - Results">\n+        <filter>cond[\'type_selector\'] == "datasets" or cond[\'type_selector\'] == "dataset_class"</filter>\n+            <discover_datasets pattern="__name_and_ext__" directory="result"/>\n+        </collection>\n+\t<collection name="ras" type="list" label="MaREA - RAS list" format_source="tabular">\n+\t    <filter>cond[\'type_selector\'] != "datasets_rasonly" and cond[\'advanced\'][\'choice\'] and cond[\'advanced\'][\'generateRas\']</filter>\n+    \t    <discover_datasets pattern="__name_and_ext__" directory="ras" format="tabular"/>\n+\t</collection>\n+\t\n+    </outputs>\n+    <tests>\n+        <test>\n+            <param name="pValue" value="0.56"/>\n+            <output name="log" file="log.txt"/>\n+        </test>\n+    </tests>\n+    <help>\n+<![CDATA[\n+\n+What it does\n+-------------\n+\n+This tool analyzes RNA-seq dataset(s) as described in Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724.\n+\n+Accepted files are: \n+    - option 1) two or more RNA-seq datasets, each referring to samples in a given condition/class. The user can specify a label for each class (as e.g. "*classA*" and "*classB*");\n+    - option 2) one RNA dataset and one class-file specifying the class/condition each sample belongs to.\n+\n+Optional files:\n+    - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats:\n+\n+\t* (Cobra Toolbox and CobraPy compliant) xml of metabolic model;\n+\t* .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2).\n+    - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example.\n+\n+The tool generates:\n+    1) a tab-separated file: reporting fold-change and p-values of reaction activity scores (RASs) between a pair of conditions/classes;\n+    2) a metabolic map file (downlodable as .svg): visualizing up- and down-regulated reactions between a pair of conditions/classes;\n+    3) a log file (.txt).\n+\n+RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID.\n+\n+Class-file format: each row of the class-file reports the sample ID (column1) and the label of the class/condition the sample belongs to (column 2).\n+\n+To calculate P-Values and Fold-Changes and to generate maps, comparisons are performed for each possible pair of classes.\n+\n+Output files will be named as classA_vs_classB. Reactions will conventionally be reported as up-regulated (down-regulated) if they are significantly more (less) active in class having label "classA".\n+\n+\n+Example input\n+-------------\n+\n+**"Custom Rules"** option:\n+\n+Custom Rules Dastaset:\n+\n+@CUSTOM_RULES_EXEMPLE@\n+\n+**"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N"** option:\n+\n+RNA-seq Dataset 1:\t\t\t\t\t\t\n+\n+@DATASET_EXEMPLE1@\n+\n+RNA-seq Dataset 2:\n+\n+@DATASET_EXEMPLE2@\n+\n+**"RNAseq of all samples + sample group specification"** option:\n+\n+RNA-seq Dataset:\n+\n+@DATASET_EXEMPLE1@\n+\n+Class-file:\n+\n++------------+------------+   \n+| Patient_ID |    class   |   \n++============+============+   \n+| TCGAAA3529 |     MSI    |   \n++------------+------------+    \n+| TCGAA62671 |     MSS    |    \n++------------+------------+    \n+| TCGAA62672 |     MSI    |   \n++------------+------------+\n+\n+|\n+\n+.. class:: infomark\n+\n+**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.\n+\n+.. class:: infomark\n+\n+**TIP**: If your dataset is not split into classes, use `MaREA cluster analysis`_.\n+\n+@REFERENCE@\n+\n+.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724\n+.. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj\n+.. _MaREA cluster analysis: http://link del tool di cluster.org\n+\n+]]>\n+    </help>\n+    <expand macro="citations" />\n+</tool>\n+\t\n'
b
diff -r 9fcb0e8d6d47 -r e88efefbd015 Desktop/Marea/marea_cluster.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Desktop/Marea/marea_cluster.py Tue Oct 15 12:21:16 2019 -0400
[
b'@@ -0,0 +1,401 @@\n+# -*- coding: utf-8 -*-\n+"""\n+Created on Mon Jun 3 19:51:00 2019\n+@author: Narger\n+"""\n+\n+import sys\n+import argparse\n+import os\n+from sklearn.datasets import make_blobs\n+from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering\n+from sklearn.metrics import silhouette_samples, silhouette_score, davies_bouldin_score, cluster\n+import matplotlib\n+matplotlib.use(\'agg\')\n+import matplotlib.pyplot as plt\n+import scipy.cluster.hierarchy as shc   \n+import matplotlib.cm as cm\n+import numpy as np\n+import pandas as pd\n+\n+################################# process args ###############################\n+\n+def process_args(args):\n+    parser = argparse.ArgumentParser(usage = \'%(prog)s [options]\',\n+                                     description = \'process some value\\\'s\' +\n+                                     \' genes to create class.\')\n+\n+    parser.add_argument(\'-ol\', \'--out_log\', \n+                        help = "Output log")\n+    \n+    parser.add_argument(\'-in\', \'--input\',\n+                        type = str,\n+                        help = \'input dataset\')\n+    \n+    parser.add_argument(\'-cy\', \'--cluster_type\',\n+                        type = str,\n+                        choices = [\'kmeans\', \'meanshift\', \'dbscan\', \'hierarchy\'],\n+                        default = \'kmeans\',\n+                        help = \'choose clustering algorythm\')\n+    \n+    parser.add_argument(\'-k1\', \'--k_min\', \n+                        type = int,\n+                        default = 2,\n+                        help = \'choose minimun cluster number to be generated\')\n+    \n+    parser.add_argument(\'-k2\', \'--k_max\', \n+                        type = int,\n+                        default = 7,\n+                        help = \'choose maximum cluster number to be generated\')\n+    \n+    parser.add_argument(\'-el\', \'--elbow\', \n+                        type = str,\n+                        default = \'false\',\n+                        choices = [\'true\', \'false\'],\n+                        help = \'choose if you want to generate an elbow plot for kmeans\')\n+    \n+    parser.add_argument(\'-si\', \'--silhouette\', \n+                        type = str,\n+                        default = \'false\',\n+                        choices = [\'true\', \'false\'],\n+                        help = \'choose if you want silhouette plots\')\n+    \n+    parser.add_argument(\'-db\', \'--davies\', \n+                        type = str,\n+                        default = \'false\',\n+                        choices = [\'true\', \'false\'],\n+                        help = \'choose if you want davies bouldin scores\')\n+    \n+    parser.add_argument(\'-td\', \'--tool_dir\',\n+                        type = str,\n+                        required = True,\n+                        help = \'your tool directory\')\n+                        \n+    parser.add_argument(\'-ms\', \'--min_samples\',\n+                        type = int,\n+                        help = \'min samples for dbscan (optional)\')\n+                        \n+    parser.add_argument(\'-ep\', \'--eps\',\n+                        type = int,\n+                        help = \'eps for dbscan (optional)\')\n+                        \n+    parser.add_argument(\'-bc\', \'--best_cluster\',\n+                        type = str,\n+                        help = \'output of best cluster tsv\')\n+    \t\t\t\t\n+    \n+    \n+    args = parser.parse_args()\n+    return args\n+\n+########################### warning ###########################################\n+\n+def warning(s):\n+    args = process_args(sys.argv)\n+    with open(args.out_log, \'a\') as log:\n+        log.write(s + "\\n\\n")\n+    print(s)\n+\n+########################## read dataset ######################################\n+    \n+def read_dataset(dataset):\n+    try:\n+        dataset = pd.read_csv(dataset, sep = \'\\t\', header = 0)\n+    except pd.errors.EmptyDataError:\n+        sys.exit(\'Execution aborted: wrong format of dataset\\n\')\n+    if len(dataset.columns) < 2:\n+        sys.exit(\'Execution aborted: wrong format of dataset\\n\')\n+    return datase'..b'   # Label the silhouette plots with their cluster numbers at the middle\n+        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))\n+        \n+        # Compute the new y_lower for next plot\n+        y_lower = y_upper + 10  # 10 for the 0 samples\n+    \n+        ax1.set_title("The silhouette plot for the various clusters.")\n+        ax1.set_xlabel("The silhouette coefficient values")\n+        ax1.set_ylabel("Cluster label")\n+        \n+        # The vertical line for average silhouette score of all the values\n+        ax1.axvline(x=silhouette_avg, color="red", linestyle="--")\n+    \n+        ax1.set_yticks([])  # Clear the yaxis labels / ticks\n+        ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])\n+        \n+        \n+        plt.suptitle(("Silhouette analysis for clustering on sample data "\n+                          "with n_clusters = " + str(n_clusters) + "\\nAverage silhouette_score = " + str(silhouette_avg)), fontsize=12, fontweight=\'bold\')\n+            \n+            \n+        plt.savefig(path, bbox_inches=\'tight\')\n+            \n+######################## dbscan ##############################################\n+    \n+def dbscan(dataset, eps, min_samples):\n+    if not os.path.exists(\'clustering\'):\n+        os.makedirs(\'clustering\')\n+        \n+    if eps is not None:\n+    \tclusterer = DBSCAN(eps = eps, min_samples = min_samples)\n+    else:\n+    \tclusterer = DBSCAN()\n+    \n+    clustering = clusterer.fit(dataset)\n+    \n+    core_samples_mask = np.zeros_like(clustering.labels_, dtype=bool)\n+    core_samples_mask[clustering.core_sample_indices_] = True\n+    labels = clustering.labels_\n+\n+    # Number of clusters in labels, ignoring noise if present.\n+    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)\n+    \n+    \n+    ##TODO: PLOT SU DBSCAN (no centers) e HIERARCHICAL\n+    \n+    \n+    write_to_csv(dataset, labels, \'clustering/dbscan_results.tsv\')\n+    \n+########################## hierachical #######################################\n+    \n+def hierachical_agglomerative(dataset, k_min, k_max):\n+\n+    if not os.path.exists(\'clustering\'):\n+        os.makedirs(\'clustering\')\n+    \n+    plt.figure(figsize=(10, 7))  \n+    plt.title("Customer Dendograms")  \n+    shc.dendrogram(shc.linkage(dataset, method=\'ward\'))  \n+    fig = plt.gcf()\n+    fig.savefig(\'clustering/dendogram.png\', dpi=200)\n+    \n+    range_n_clusters = [i for i in range(k_min, k_max+1)]\n+\n+    for n_clusters in range_n_clusters:\n+        \n+        cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity=\'euclidean\', linkage=\'ward\')  \n+        cluster.fit_predict(dataset)  \n+        cluster_labels = cluster.labels_\n+        \n+        silhouette_avg = silhouette_score(dataset, cluster_labels)\n+        write_to_csv(dataset, cluster_labels, \'clustering/hierarchical_with_\' + str(n_clusters) + \'_clusters.tsv\')\n+        #warning("For n_clusters =", n_clusters,\n+              #"The average silhouette_score is :", silhouette_avg)\n+        \n+        \n+       \n+\n+    \n+############################# main ###########################################\n+\n+\n+def main():\n+    if not os.path.exists(\'clustering\'):\n+        os.makedirs(\'clustering\')\n+\n+    args = process_args(sys.argv)\n+    \n+    #Data read\n+    \n+    X = read_dataset(args.input)\n+    X = pd.DataFrame.to_dict(X, orient=\'list\')\n+    X = rewrite_input(X)\n+    X = pd.DataFrame.from_dict(X, orient = \'index\')\n+    \n+    for i in X.columns:\n+        tmp = X[i][0]\n+        if tmp == None:\n+            X = X.drop(columns=[i])\n+    \n+    \n+    if args.cluster_type == \'kmeans\':\n+        kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.davies, args.best_cluster)\n+    \n+    if args.cluster_type == \'dbscan\':\n+        dbscan(X, args.eps, args.min_samples)\n+        \n+    if args.cluster_type == \'hierarchy\':\n+        hierachical_agglomerative(X, args.k_min, args.k_max)\n+        \n+##############################################################################\n+\n+if __name__ == "__main__":\n+    main()\n'
b
diff -r 9fcb0e8d6d47 -r e88efefbd015 Desktop/Marea/marea_cluster.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Desktop/Marea/marea_cluster.xml Tue Oct 15 12:21:16 2019 -0400
[
@@ -0,0 +1,95 @@
+<tool id="MaREA_cluester" name="Cluster Analysis" version="1.0.6">
+    <description></description>
+    <macros>
+        <import>marea_macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="0.25.1">pandas</requirement>
+        <requirement type="package" version="1.1.0">scipy</requirement>
+        <requirement type="package" version="0.10.1">cobra</requirement>
+        <requirement type="package" version="0.21.3">scikit-learn</requirement>
+        <requirement type="package" version="2.2.2">matplotlib</requirement>
+ <requirement type="package" version="1.17">numpy</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+        <![CDATA[
+       python $__tool_directory__/marea_cluster.py
+        --input $input
+       --tool_dir $__tool_directory__
+        --out_log $log
+        --best_cluster $best_cluster
+        --cluster_type ${data.clust_type}
+        #if $data.clust_type == 'kmeans':
+         --k_min ${data.k_min}
+         --k_max ${data.k_max}
+         --elbow ${data.elbow}
+         --silhouette ${data.silhouette}
+        #end if
+        #if $data.clust_type == 'dbscan':
+         #if $data.dbscan_advanced.advanced == 'true'
+         --eps ${data.dbscan_advanced.eps}
+         --min_samples ${data.dbscan_advanced.min_samples}
+         #end if
+        #end if
+        #if $data.clust_type == 'hierarchy':
+         --k_min ${data.k_min}
+         --k_max ${data.k_max}
+       #end if
+        ]]>
+    </command>
+    <inputs>
+        <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" />
+        
+        <conditional name="data">
+ <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">
+                 <option value="kmeans" selected="true">KMeans</option>
+                 <option value="dbscan">DBSCAN</option>
+                 <option value="hierarchy">Agglomerative Hierarchical</option>
+         </param>
+         <when value="kmeans">
+         <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
+         <param name="k_max" argument="--k_max" type="integer" min="2" max="20" value="3" label="Max number of clusters (k) to be tested" />
+         <param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/>
+         <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
+         </when>
+         <when value="dbscan">
+         <conditional name="dbscan_advanced">
+         <param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)">
+         <option value="true">Yes</option>
+         <option value="false">No</option>
+         </param>
+         <when value="false"></when>
+         <when value="true">
+         <param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" />
+         <param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/>
+        
+         </when>
+         </conditional>   
+         </when>
+         <when value="hierarchy">
+         <param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" />
+         <param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" />
+         </when>
+ </conditional>
+    </inputs>
+
+    <outputs>
+        <data format="txt" name="log" label="${tool.name} - Log" />
+        <data format="tabular" name="best_cluster" label="${tool.name} - Best cluster" />
+        <collection name="results" type="list" label="${tool.name} - Plots and results">
+            <discover_datasets pattern="__name_and_ext__" directory="clustering"/>
+        </collection>
+    </outputs>
+    <help>
+<![CDATA[
+
+What it does
+-------------
+
+
+]]>
+    </help>
+    <expand macro="citations" />
+</tool>
+
+
b
diff -r 9fcb0e8d6d47 -r e88efefbd015 Desktop/Marea/marea_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Desktop/Marea/marea_macros.xml Tue Oct 15 12:21:16 2019 -0400
b
@@ -0,0 +1,92 @@
+<macros>
+
+    <xml name="options">
+        <param name="rules_selector" argument="--rules_selector" type="select" label="Gene-Protein-Reaction rules:">
+            <option value="HMRcore" selected="true">HMRcore rules</option>
+            <option value="Recon">Recon 2.2 rules</option>
+            <option value="Custom">Custom rules</option>
+        </param>
+    </xml>
+
+   <token name="@CUSTOM_RULES_EXEMPLE@">
+
++--------------------+-------------------------------+
+|         id         |     rule (with entrez-id)     |
++====================+===============================+
+|        SHMT1       |        155060 or 10357        |
++--------------------+-------------------------------+
+|        NIT2        |      155060 or 100134869      |
++--------------------+-------------------------------+
+| GOT1_GOT2_GOT1L1_2 | 155060 and 100134869 or 10357 |
++--------------------+-------------------------------+
+
+|
+
+    </token>
+
+    <token name="@DATASET_EXEMPLE1@">
+
++------------+------------+------------+------------+   
+|  Hugo_ID   | TCGAA62670 | TCGAA62671 | TCGAA62672 |   
++============+============+============+============+   
+| HGNC:24086 |  0.523167  |  0.371355  |  0.925661  |   
++------------+------------+------------+------------+    
+| HGNC:24086 |  0.568765  |  0.765567  |  0.456789  |    
++------------+------------+------------+------------+    
+| HGNC:9876  |  0.876545  |  0.768933  |  0.987654  |   
++------------+------------+------------+------------+    
+| HGNC:9     |  0.456788  |  0.876543  |  0.876542  |   
++------------+------------+------------+------------+    
+| HGNC:23    |  0.876543  |  0.786543  |  0.897654  |   
++------------+------------+------------+------------+ 
+   
+|
+
+    </token>
+
+    <token name="@DATASET_EXEMPLE2@">
+
++-------------+------------+------------+------------+
+| Hugo_Symbol | TCGAA62670 | TCGAA62671 | TCGAA62672 |
++=============+============+============+============+
+|    A1BG     |  0.523167  |  0.371355  |  0.925661  |
++-------------+------------+------------+------------+
+|    A1CF     |  0.568765  |  0.765567  |  0.456789  |
++-------------+------------+------------+------------+
+|     A2M     |  0.876545  |  0.768933  |  0.987654  |
++-------------+------------+------------+------------+
+|    A4GALT   |  0.456788  |  0.876543  |  0.876542  |
++-------------+------------+------------+------------+
+|   M664Y65   |  0.876543  |  0.786543  |  0.897654  |
++-------------+------------+------------+------------+
+
+|
+
+    </token>
+
+    <token name="@REFERENCE@">
+
+This tool is developed by the `BIMIB`_ at the `Department of Informatics, Systems and Communications`_ of `University of Milan - Bicocca`_.
+
+.. _BIMIB: http://sito di bio.org
+.. _Department of Informatics, Systems and Communications: http://www.disco.unimib.it/go/Home/English
+.. _University of Milan - Bicocca: https://www.unimib.it/
+
+    </token>
+
+    <xml name="citations">
+        <citations> <!--esempio di citazione-->
+            <citation type="bibtex">
+@online{lh32017,
+  author = {Alex Graudenzi, Davide Maspero, Cluadio Isella, Marzia Di Filippo, Giancarlo Mauri, Enzo Medico, Marco Antoniotti, Chiara Damiani},
+  year = {2018},
+  title = {MaREA: Metabolic feature extraction, enrichment and visualization of RNAseq},
+  publisher = {bioRxiv},
+  journal = {bioRxiv},
+  url = {https://www.biorxiv.org/content/early/2018/01/16/248724},
+}
+            </citation>
+        </citations>
+    </xml>
+
+</macros>