Repository 'marea'
hg clone https://toolshed.g2.bx.psu.edu/repos/bimib/marea

Changeset 31:944e15aa970a (2019-10-15)
Previous changeset 30:e88efefbd015 (2019-10-15) Next changeset 32:b795e3e163e0 (2019-10-16)
Commit message:
Uploaded
modified:
Marea/marea.py
Marea/marea.xml
Marea/marea_cluster.py
Marea/marea_cluster.xml
removed:
Desktop/Marea/marea.py
Desktop/Marea/marea.xml
Desktop/Marea/marea_cluster.py
Desktop/Marea/marea_cluster.xml
Desktop/Marea/marea_macros.xml
b
diff -r e88efefbd015 -r 944e15aa970a Desktop/Marea/marea.py
--- a/Desktop/Marea/marea.py Tue Oct 15 12:21:16 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,861 +0,0 @@\n-from __future__ import division\n-import sys\n-import pandas as pd\n-import itertools as it\n-import scipy.stats as st\n-import collections\n-import lxml.etree as ET\n-import shutil\n-import pickle as pk\n-import math\n-import os\n-import argparse\n-from svglib.svglib import svg2rlg\n-from reportlab.graphics import renderPDF\n-\n-########################## argparse ##########################################\n-\n-def process_args(args):\n-    parser = argparse.ArgumentParser(usage = \'%(prog)s [options]\',\n-                                     description = \'process some value\\\'s\'+\n-                                     \' genes to create a comparison\\\'s map.\')\n-    parser.add_argument(\'-rs\', \'--rules_selector\', \n-                        type = str,\n-                        default = \'HMRcore\',\n-                        choices = [\'HMRcore\', \'Recon\', \'Custom\'], \n-                        help = \'chose which type of dataset you want use\')\n-    parser.add_argument(\'-cr\', \'--custom\',\n-                        type = str,\n-                        help=\'your dataset if you want custom rules\')\n-    parser.add_argument(\'-na\', \'--names\', \n-                        type = str,\n-                        nargs = \'+\', \n-                        help = \'input names\')\n-    parser.add_argument(\'-n\', \'--none\',\n-                        type = str,\n-                        default = \'true\',\n-                        choices = [\'true\', \'false\'], \n-                        help = \'compute Nan values\')\n-    parser.add_argument(\'-pv\' ,\'--pValue\', \n-                        type = float, \n-                        default = 0.05, \n-                        help = \'P-Value threshold (default: %(default)s)\')\n-    parser.add_argument(\'-fc\', \'--fChange\', \n-                        type = float, \n-                        default = 1.5, \n-                        help = \'Fold-Change threshold (default: %(default)s)\')\n-    parser.add_argument(\'-td\', \'--tool_dir\',\n-                        type = str,\n-                        required = True,\n-                        help = \'your tool directory\')\n-    parser.add_argument(\'-op\', \'--option\', \n-                        type = str, \n-                        choices = [\'datasets\', \'dataset_class\', \'datasets_rasonly\'],\n-                        help=\'dataset or dataset and class\')\n-    parser.add_argument(\'-ol\', \'--out_log\', \n-                        help = "Output log")    \n-    parser.add_argument(\'-ids\', \'--input_datas\', \n-                        type = str,\n-                        nargs = \'+\', \n-                        help = \'input datasets\')\n-    parser.add_argument(\'-id\', \'--input_data\',\n-                        type = str,\n-                        help = \'input dataset\')\n-    parser.add_argument(\'-ic\', \'--input_class\', \n-                        type = str, \n-                        help = \'sample group specification\')\n-    parser.add_argument(\'-cm\', \'--custom_map\', \n-                        type = str, \n-                        help = \'custom map\')\n-    parser.add_argument(\'-yn\', \'--yes_no\', \n-                        type = str,\n-                        choices = [\'yes\', \'no\'],\n-                        help = \'if make or not custom map\')\n-    parser.add_argument(\'-gs\', \'--generate_svg\',\n-                        type = str,\n-                        default = \'true\',\n-                        choices = [\'true\', \'false\'], \n-                        help = \'generate svg map\')\n-    parser.add_argument(\'-gp\', \'--generate_pdf\',\n-                        type = str,\n-                        default = \'true\',\n-                        choices = [\'true\', \'false\'], \n-                        help = \'generate pdf map\')\n-    parser.add_argument(\'-gr\', \'--generate_ras\',\n-                        type = str,\n-                        default = \'true\',\n-                        choices = [\'true\', \'false\'],\n-                        help = \'generate reaction activity score\')\n-    parser.add_argument(\'-sr\', \'--single_ras_file\',  \n-                  '..b'taset.iloc[0, 0], name) \n-            \n-        if args.rules_selector != \'Custom\':\n-            genes = data_gene(dataset, type_gene, name, None)\n-            ids, rules = load_id_rules(recon.get(type_gene))\n-        elif args.rules_selector == \'Custom\':\n-            genes = data_gene(dataset, type_gene, name, gene_in_rule)\n-                \n-        resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n-\n-        create_ras(resolve_rules, name, True)\n-          \n-        if err != None and err:\n-            warning(\'Warning: gene\\n\' + str(err) + \'\\nnot found in class \'\n-                + name + \', the expression level for this gene \' +\n-                \'will be considered NaN\\n\')\n-        \n-        print(\'execution succeded\')\n-        return None\n-    \n-    \n-    elif args.option == \'datasets\':\n-        num = 1\n-        for i, j in zip(args.input_datas, args.names):\n-\n-            name = name_dataset(j, num)\n-            dataset = read_dataset(i, name)\n-\n-            dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)\n-\n-            type_gene = gene_type(dataset.iloc[0, 0], name) \n-            \n-            if args.rules_selector != \'Custom\':\n-                genes = data_gene(dataset, type_gene, name, None)\n-                ids, rules = load_id_rules(recon.get(type_gene))\n-            elif args.rules_selector == \'Custom\':\n-                genes = data_gene(dataset, type_gene, name, gene_in_rule)\n-                \n-            resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n-\n-            if generate_ras:\n-                create_ras(resolve_rules, name, False)\n-            \n-            if err != None and err:\n-                warning(\'Warning: gene\\n\' + str(err) + \'\\nnot found in class \'\n-                    + name + \', the expression level for this gene \' +\n-                    \'will be considered NaN\\n\')\n-            if resolve_rules != None:\n-                class_pat[name] = list(map(list, zip(*resolve_rules.values())))\n-            num += 1\n-    elif args.option == \'dataset_class\':\n-        name = \'RNAseq\'\n-        dataset = read_dataset(args.input_data, name)\n-        dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)\n-        type_gene = gene_type(dataset.iloc[0, 0], name)\n-        classes = read_dataset(args.input_class, \'class\')\n-        if not len(classes.columns) == 2:\n-            warning(\'Warning: more than 2 columns in class file. Extra\' +\n-                    \'columns have been disregarded\\n\')\n-        classes = classes.astype(str)\n-        if args.rules_selector != \'Custom\':\n-            genes = data_gene(dataset, type_gene, name, None)\n-            ids, rules = load_id_rules(recon.get(type_gene))\n-        elif args.rules_selector == \'Custom\':\n-            genes = data_gene(dataset, type_gene, name, gene_in_rule)\n-        resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n-        if err != None and err:\n-            warning(\'Warning: gene\\n\'+str(err)+\'\\nnot found in class \'\n-                    + name + \', the expression level for this gene \' +\n-                    \'will be considered NaN\\n\')\n-        if resolve_rules != None:\n-            class_pat = split_class(classes, resolve_rules)\n-    \n-    \t\n-    if args.rules_selector == \'Custom\':\n-        if args.yes_no == \'yes\':\n-            try:\n-                core_map = ET.parse(args.custom_map)\n-            except (ET.XMLSyntaxError, ET.XMLSchemaParseError):\n-                sys.exit(\'Execution aborted: custom map in wrong format\')\n-        elif args.yes_no == \'no\':\n-            core_map = ET.parse(args.tool_dir + \'/local/HMRcoreMap.svg\')\n-    else:       \n-        core_map = ET.parse(args.tool_dir+\'/local/HMRcoreMap.svg\')\n-        \n-    maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf)\n-        \n-    print(\'Execution succeded\')\n-\n-    return None\n-\n-###############################################################################\n-\n-if __name__ == "__main__":\n-    main()\n'
b
diff -r e88efefbd015 -r 944e15aa970a Desktop/Marea/marea.xml
--- a/Desktop/Marea/marea.xml Tue Oct 15 12:21:16 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,274 +0,0 @@\n-<tool id="MaREA" name="Metabolic Reaction Enrichment Analysis" version="1.0.3">\n-    <description></description>\n-    <macros>\n-        <import>marea_macros.xml</import>\n-    </macros>\n-    <requirements>\n-        <requirement type="package" version="0.23.0">pandas</requirement>\n-        <requirement type="package" version="1.1.0">scipy</requirement>\n-        <requirement type="package" version="0.10.1">cobra</requirement>\n-        <requirement type="package" version="4.2.1">lxml</requirement>\n-        <requirement type="package" version="0.8.1">svglib</requirement>\n-        <requirement type="package" version="3.4.0">reportlab</requirement>\n-    </requirements>\n-    <command detect_errors="exit_code">\n-        <![CDATA[\n-      \tpython $__tool_directory__/marea.py\n-        --rules_selector $cond_rule.rules_selector\n-        #if $cond_rule.rules_selector == \'Custom\':\n-            --custom ${cond_rule.Custom_rules}\n-            --yes_no ${cond_rule.cond_map.yes_no}\n-            #if $cond_rule.cond_map.yes_no == \'yes\':\n-                --custom_map $cond_rule.cond_map.Custom_map\n-            #end if\n-        #end if\n-\t\n-      \t--tool_dir $__tool_directory__\n-      \t--option $cond.type_selector\n-        --out_log $log\t\t\n-\t\n-        #if $cond.type_selector == \'datasets\':\n-            --input_datas\n-            #for $data in $cond.input_Datasets:\n-                ${data.input}\n-            #end for\n-            --names\n-            #for $data in $cond.input_Datasets:\n-                ${data.input_name}\n-            #end for\n-            #if $cond.advanced.choice == \'true\':\n-      \t    --none ${cond.advanced.None}\n-      \t    --pValue ${cond.advanced.pValue}\n-      \t    --fChange ${cond.advanced.fChange}\n-\t    \t--generate_svg ${cond.advanced.generateSvg}\n-\t    \t--generate_pdf ${cond.advanced.generatePdf}\n-\t    --generate_ras ${cond.advanced.generateRas}\n-\t#else \n-\t    --none true\n-\t    --pValue 0.05\n-\t    --fChange 1.5\n-\t    --generate_svg false\n-\t    --generate_pdf true\n-\t    --generate_ras false\n-\t#end if\n-        #elif $cond.type_selector == \'dataset_class\':\n-            --input_data ${input_data}\n-            --input_class ${input_class}\n-            #if $cond.advanced.choice == \'true\':\n-      \t    --none ${cond.advanced.None}\n-      \t    --pValue ${cond.advanced.pValue}\n-      \t    --fChange ${cond.advanced.fChange}\n-\t    --generate_svg ${cond.advanced.generateSvg}\n-\t    --generate_pdf ${cond.advanced.generatePdf}\n-\t    --generate_ras ${cond.advanced.generateRas}\n-\t#else \n-\t    --none true\n-\t    --pValue 0.05\n-\t    --fChange 1.5\n-\t    --generate_svg false\n-\t    --generate_pdf true\n-\t    --generate_ras false\n-\t#end if\n-        #end if\n-        #if $cond.type_selector == \'datasets_rasonly\':\n-            --input_datas ${input_Datasets}\n-            --single_ras_file $ras_single\n-            --none ${cond.advanced.None}\n-        #end if\n-        ]]>\n-    </command>\n-\n-    <inputs>\n-        <conditional name="cond_rule">\n-            <expand macro="options"/>\n-            <when value="HMRcore">\n-            </when>\n-            <when value="Recon">\n-            </when>\n-            <when value="Custom">\n-                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />\n-                <conditional name="cond_map">\n-                    <param name="yes_no" type="select" label="Custom map? (optional)">\n-                        <option value="no" selected="true">no</option>\n-                        <option value="yes">yes</option>\n-                    </param>\n-                    <when value="yes">\n-                        <param name="Custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/>\n-                    </when>\n-                    <when value="no">\n-                    </when>\n-                </conditional>\n-            </when>\n-        </conditional>\n-        <conditional name="cond">\n-            <param name="type_selector" '..b'ctor\'] == "datasets_rasonly"</filter>\n-        </data>\n-        <collection name="results" type="list" label="MaREA - Results">\n-        <filter>cond[\'type_selector\'] == "datasets" or cond[\'type_selector\'] == "dataset_class"</filter>\n-            <discover_datasets pattern="__name_and_ext__" directory="result"/>\n-        </collection>\n-\t<collection name="ras" type="list" label="MaREA - RAS list" format_source="tabular">\n-\t    <filter>cond[\'type_selector\'] != "datasets_rasonly" and cond[\'advanced\'][\'choice\'] and cond[\'advanced\'][\'generateRas\']</filter>\n-    \t    <discover_datasets pattern="__name_and_ext__" directory="ras" format="tabular"/>\n-\t</collection>\n-\t\n-    </outputs>\n-    <tests>\n-        <test>\n-            <param name="pValue" value="0.56"/>\n-            <output name="log" file="log.txt"/>\n-        </test>\n-    </tests>\n-    <help>\n-<![CDATA[\n-\n-What it does\n--------------\n-\n-This tool analyzes RNA-seq dataset(s) as described in Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724.\n-\n-Accepted files are: \n-    - option 1) two or more RNA-seq datasets, each referring to samples in a given condition/class. The user can specify a label for each class (as e.g. "*classA*" and "*classB*");\n-    - option 2) one RNA dataset and one class-file specifying the class/condition each sample belongs to.\n-\n-Optional files:\n-    - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats:\n-\n-\t* (Cobra Toolbox and CobraPy compliant) xml of metabolic model;\n-\t* .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2).\n-    - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example.\n-\n-The tool generates:\n-    1) a tab-separated file: reporting fold-change and p-values of reaction activity scores (RASs) between a pair of conditions/classes;\n-    2) a metabolic map file (downlodable as .svg): visualizing up- and down-regulated reactions between a pair of conditions/classes;\n-    3) a log file (.txt).\n-\n-RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID.\n-\n-Class-file format: each row of the class-file reports the sample ID (column1) and the label of the class/condition the sample belongs to (column 2).\n-\n-To calculate P-Values and Fold-Changes and to generate maps, comparisons are performed for each possible pair of classes.\n-\n-Output files will be named as classA_vs_classB. Reactions will conventionally be reported as up-regulated (down-regulated) if they are significantly more (less) active in class having label "classA".\n-\n-\n-Example input\n--------------\n-\n-**"Custom Rules"** option:\n-\n-Custom Rules Dastaset:\n-\n-@CUSTOM_RULES_EXEMPLE@\n-\n-**"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N"** option:\n-\n-RNA-seq Dataset 1:\t\t\t\t\t\t\n-\n-@DATASET_EXEMPLE1@\n-\n-RNA-seq Dataset 2:\n-\n-@DATASET_EXEMPLE2@\n-\n-**"RNAseq of all samples + sample group specification"** option:\n-\n-RNA-seq Dataset:\n-\n-@DATASET_EXEMPLE1@\n-\n-Class-file:\n-\n-+------------+------------+   \n-| Patient_ID |    class   |   \n-+============+============+   \n-| TCGAAA3529 |     MSI    |   \n-+------------+------------+    \n-| TCGAA62671 |     MSS    |    \n-+------------+------------+    \n-| TCGAA62672 |     MSI    |   \n-+------------+------------+\n-\n-|\n-\n-.. class:: infomark\n-\n-**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.\n-\n-.. class:: infomark\n-\n-**TIP**: If your dataset is not split into classes, use `MaREA cluster analysis`_.\n-\n-@REFERENCE@\n-\n-.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724\n-.. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj\n-.. _MaREA cluster analysis: http://link del tool di cluster.org\n-\n-]]>\n-    </help>\n-    <expand macro="citations" />\n-</tool>\n-\t\n'
b
diff -r e88efefbd015 -r 944e15aa970a Desktop/Marea/marea_cluster.py
--- a/Desktop/Marea/marea_cluster.py Tue Oct 15 12:21:16 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,401 +0,0 @@\n-# -*- coding: utf-8 -*-\n-"""\n-Created on Mon Jun 3 19:51:00 2019\n-@author: Narger\n-"""\n-\n-import sys\n-import argparse\n-import os\n-from sklearn.datasets import make_blobs\n-from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering\n-from sklearn.metrics import silhouette_samples, silhouette_score, davies_bouldin_score, cluster\n-import matplotlib\n-matplotlib.use(\'agg\')\n-import matplotlib.pyplot as plt\n-import scipy.cluster.hierarchy as shc   \n-import matplotlib.cm as cm\n-import numpy as np\n-import pandas as pd\n-\n-################################# process args ###############################\n-\n-def process_args(args):\n-    parser = argparse.ArgumentParser(usage = \'%(prog)s [options]\',\n-                                     description = \'process some value\\\'s\' +\n-                                     \' genes to create class.\')\n-\n-    parser.add_argument(\'-ol\', \'--out_log\', \n-                        help = "Output log")\n-    \n-    parser.add_argument(\'-in\', \'--input\',\n-                        type = str,\n-                        help = \'input dataset\')\n-    \n-    parser.add_argument(\'-cy\', \'--cluster_type\',\n-                        type = str,\n-                        choices = [\'kmeans\', \'meanshift\', \'dbscan\', \'hierarchy\'],\n-                        default = \'kmeans\',\n-                        help = \'choose clustering algorythm\')\n-    \n-    parser.add_argument(\'-k1\', \'--k_min\', \n-                        type = int,\n-                        default = 2,\n-                        help = \'choose minimun cluster number to be generated\')\n-    \n-    parser.add_argument(\'-k2\', \'--k_max\', \n-                        type = int,\n-                        default = 7,\n-                        help = \'choose maximum cluster number to be generated\')\n-    \n-    parser.add_argument(\'-el\', \'--elbow\', \n-                        type = str,\n-                        default = \'false\',\n-                        choices = [\'true\', \'false\'],\n-                        help = \'choose if you want to generate an elbow plot for kmeans\')\n-    \n-    parser.add_argument(\'-si\', \'--silhouette\', \n-                        type = str,\n-                        default = \'false\',\n-                        choices = [\'true\', \'false\'],\n-                        help = \'choose if you want silhouette plots\')\n-    \n-    parser.add_argument(\'-db\', \'--davies\', \n-                        type = str,\n-                        default = \'false\',\n-                        choices = [\'true\', \'false\'],\n-                        help = \'choose if you want davies bouldin scores\')\n-    \n-    parser.add_argument(\'-td\', \'--tool_dir\',\n-                        type = str,\n-                        required = True,\n-                        help = \'your tool directory\')\n-                        \n-    parser.add_argument(\'-ms\', \'--min_samples\',\n-                        type = int,\n-                        help = \'min samples for dbscan (optional)\')\n-                        \n-    parser.add_argument(\'-ep\', \'--eps\',\n-                        type = int,\n-                        help = \'eps for dbscan (optional)\')\n-                        \n-    parser.add_argument(\'-bc\', \'--best_cluster\',\n-                        type = str,\n-                        help = \'output of best cluster tsv\')\n-    \t\t\t\t\n-    \n-    \n-    args = parser.parse_args()\n-    return args\n-\n-########################### warning ###########################################\n-\n-def warning(s):\n-    args = process_args(sys.argv)\n-    with open(args.out_log, \'a\') as log:\n-        log.write(s + "\\n\\n")\n-    print(s)\n-\n-########################## read dataset ######################################\n-    \n-def read_dataset(dataset):\n-    try:\n-        dataset = pd.read_csv(dataset, sep = \'\\t\', header = 0)\n-    except pd.errors.EmptyDataError:\n-        sys.exit(\'Execution aborted: wrong format of dataset\\n\')\n-    if len(dataset.columns) < 2:\n-        sys.exit(\'Execution aborted: wrong format of dataset\\n\')\n-    return datase'..b'   # Label the silhouette plots with their cluster numbers at the middle\n-        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))\n-        \n-        # Compute the new y_lower for next plot\n-        y_lower = y_upper + 10  # 10 for the 0 samples\n-    \n-        ax1.set_title("The silhouette plot for the various clusters.")\n-        ax1.set_xlabel("The silhouette coefficient values")\n-        ax1.set_ylabel("Cluster label")\n-        \n-        # The vertical line for average silhouette score of all the values\n-        ax1.axvline(x=silhouette_avg, color="red", linestyle="--")\n-    \n-        ax1.set_yticks([])  # Clear the yaxis labels / ticks\n-        ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])\n-        \n-        \n-        plt.suptitle(("Silhouette analysis for clustering on sample data "\n-                          "with n_clusters = " + str(n_clusters) + "\\nAverage silhouette_score = " + str(silhouette_avg)), fontsize=12, fontweight=\'bold\')\n-            \n-            \n-        plt.savefig(path, bbox_inches=\'tight\')\n-            \n-######################## dbscan ##############################################\n-    \n-def dbscan(dataset, eps, min_samples):\n-    if not os.path.exists(\'clustering\'):\n-        os.makedirs(\'clustering\')\n-        \n-    if eps is not None:\n-    \tclusterer = DBSCAN(eps = eps, min_samples = min_samples)\n-    else:\n-    \tclusterer = DBSCAN()\n-    \n-    clustering = clusterer.fit(dataset)\n-    \n-    core_samples_mask = np.zeros_like(clustering.labels_, dtype=bool)\n-    core_samples_mask[clustering.core_sample_indices_] = True\n-    labels = clustering.labels_\n-\n-    # Number of clusters in labels, ignoring noise if present.\n-    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)\n-    \n-    \n-    ##TODO: PLOT SU DBSCAN (no centers) e HIERARCHICAL\n-    \n-    \n-    write_to_csv(dataset, labels, \'clustering/dbscan_results.tsv\')\n-    \n-########################## hierachical #######################################\n-    \n-def hierachical_agglomerative(dataset, k_min, k_max):\n-\n-    if not os.path.exists(\'clustering\'):\n-        os.makedirs(\'clustering\')\n-    \n-    plt.figure(figsize=(10, 7))  \n-    plt.title("Customer Dendograms")  \n-    shc.dendrogram(shc.linkage(dataset, method=\'ward\'))  \n-    fig = plt.gcf()\n-    fig.savefig(\'clustering/dendogram.png\', dpi=200)\n-    \n-    range_n_clusters = [i for i in range(k_min, k_max+1)]\n-\n-    for n_clusters in range_n_clusters:\n-        \n-        cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity=\'euclidean\', linkage=\'ward\')  \n-        cluster.fit_predict(dataset)  \n-        cluster_labels = cluster.labels_\n-        \n-        silhouette_avg = silhouette_score(dataset, cluster_labels)\n-        write_to_csv(dataset, cluster_labels, \'clustering/hierarchical_with_\' + str(n_clusters) + \'_clusters.tsv\')\n-        #warning("For n_clusters =", n_clusters,\n-              #"The average silhouette_score is :", silhouette_avg)\n-        \n-        \n-       \n-\n-    \n-############################# main ###########################################\n-\n-\n-def main():\n-    if not os.path.exists(\'clustering\'):\n-        os.makedirs(\'clustering\')\n-\n-    args = process_args(sys.argv)\n-    \n-    #Data read\n-    \n-    X = read_dataset(args.input)\n-    X = pd.DataFrame.to_dict(X, orient=\'list\')\n-    X = rewrite_input(X)\n-    X = pd.DataFrame.from_dict(X, orient = \'index\')\n-    \n-    for i in X.columns:\n-        tmp = X[i][0]\n-        if tmp == None:\n-            X = X.drop(columns=[i])\n-    \n-    \n-    if args.cluster_type == \'kmeans\':\n-        kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.davies, args.best_cluster)\n-    \n-    if args.cluster_type == \'dbscan\':\n-        dbscan(X, args.eps, args.min_samples)\n-        \n-    if args.cluster_type == \'hierarchy\':\n-        hierachical_agglomerative(X, args.k_min, args.k_max)\n-        \n-##############################################################################\n-\n-if __name__ == "__main__":\n-    main()\n'
b
diff -r e88efefbd015 -r 944e15aa970a Desktop/Marea/marea_cluster.xml
--- a/Desktop/Marea/marea_cluster.xml Tue Oct 15 12:21:16 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,95 +0,0 @@
-<tool id="MaREA_cluester" name="Cluster Analysis" version="1.0.6">
-    <description></description>
-    <macros>
-        <import>marea_macros.xml</import>
-    </macros>
-    <requirements>
-        <requirement type="package" version="0.25.1">pandas</requirement>
-        <requirement type="package" version="1.1.0">scipy</requirement>
-        <requirement type="package" version="0.10.1">cobra</requirement>
-        <requirement type="package" version="0.21.3">scikit-learn</requirement>
-        <requirement type="package" version="2.2.2">matplotlib</requirement>
- <requirement type="package" version="1.17">numpy</requirement>
-    </requirements>
-    <command detect_errors="exit_code">
-        <![CDATA[
-       python $__tool_directory__/marea_cluster.py
-        --input $input
-       --tool_dir $__tool_directory__
-        --out_log $log
-        --best_cluster $best_cluster
-        --cluster_type ${data.clust_type}
-        #if $data.clust_type == 'kmeans':
-         --k_min ${data.k_min}
-         --k_max ${data.k_max}
-         --elbow ${data.elbow}
-         --silhouette ${data.silhouette}
-        #end if
-        #if $data.clust_type == 'dbscan':
-         #if $data.dbscan_advanced.advanced == 'true'
-         --eps ${data.dbscan_advanced.eps}
-         --min_samples ${data.dbscan_advanced.min_samples}
-         #end if
-        #end if
-        #if $data.clust_type == 'hierarchy':
-         --k_min ${data.k_min}
-         --k_max ${data.k_max}
-       #end if
-        ]]>
-    </command>
-    <inputs>
-        <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" />
-        
-        <conditional name="data">
- <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">
-                 <option value="kmeans" selected="true">KMeans</option>
-                 <option value="dbscan">DBSCAN</option>
-                 <option value="hierarchy">Agglomerative Hierarchical</option>
-         </param>
-         <when value="kmeans">
-         <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
-         <param name="k_max" argument="--k_max" type="integer" min="2" max="20" value="3" label="Max number of clusters (k) to be tested" />
-         <param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/>
-         <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
-         </when>
-         <when value="dbscan">
-         <conditional name="dbscan_advanced">
-         <param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)">
-         <option value="true">Yes</option>
-         <option value="false">No</option>
-         </param>
-         <when value="false"></when>
-         <when value="true">
-         <param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" />
-         <param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/>
-        
-         </when>
-         </conditional>   
-         </when>
-         <when value="hierarchy">
-         <param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" />
-         <param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" />
-         </when>
- </conditional>
-    </inputs>
-
-    <outputs>
-        <data format="txt" name="log" label="${tool.name} - Log" />
-        <data format="tabular" name="best_cluster" label="${tool.name} - Best cluster" />
-        <collection name="results" type="list" label="${tool.name} - Plots and results">
-            <discover_datasets pattern="__name_and_ext__" directory="clustering"/>
-        </collection>
-    </outputs>
-    <help>
-<![CDATA[
-
-What it does
--------------
-
-
-]]>
-    </help>
-    <expand macro="citations" />
-</tool>
-
-
b
diff -r e88efefbd015 -r 944e15aa970a Desktop/Marea/marea_macros.xml
--- a/Desktop/Marea/marea_macros.xml Tue Oct 15 12:21:16 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,92 +0,0 @@
-<macros>
-
-    <xml name="options">
-        <param name="rules_selector" argument="--rules_selector" type="select" label="Gene-Protein-Reaction rules:">
-            <option value="HMRcore" selected="true">HMRcore rules</option>
-            <option value="Recon">Recon 2.2 rules</option>
-            <option value="Custom">Custom rules</option>
-        </param>
-    </xml>
-
-   <token name="@CUSTOM_RULES_EXEMPLE@">
-
-+--------------------+-------------------------------+
-|         id         |     rule (with entrez-id)     |
-+====================+===============================+
-|        SHMT1       |        155060 or 10357        |
-+--------------------+-------------------------------+
-|        NIT2        |      155060 or 100134869      |
-+--------------------+-------------------------------+
-| GOT1_GOT2_GOT1L1_2 | 155060 and 100134869 or 10357 |
-+--------------------+-------------------------------+
-
-|
-
-    </token>
-
-    <token name="@DATASET_EXEMPLE1@">
-
-+------------+------------+------------+------------+   
-|  Hugo_ID   | TCGAA62670 | TCGAA62671 | TCGAA62672 |   
-+============+============+============+============+   
-| HGNC:24086 |  0.523167  |  0.371355  |  0.925661  |   
-+------------+------------+------------+------------+    
-| HGNC:24086 |  0.568765  |  0.765567  |  0.456789  |    
-+------------+------------+------------+------------+    
-| HGNC:9876  |  0.876545  |  0.768933  |  0.987654  |   
-+------------+------------+------------+------------+    
-| HGNC:9     |  0.456788  |  0.876543  |  0.876542  |   
-+------------+------------+------------+------------+    
-| HGNC:23    |  0.876543  |  0.786543  |  0.897654  |   
-+------------+------------+------------+------------+ 
-   
-|
-
-    </token>
-
-    <token name="@DATASET_EXEMPLE2@">
-
-+-------------+------------+------------+------------+
-| Hugo_Symbol | TCGAA62670 | TCGAA62671 | TCGAA62672 |
-+=============+============+============+============+
-|    A1BG     |  0.523167  |  0.371355  |  0.925661  |
-+-------------+------------+------------+------------+
-|    A1CF     |  0.568765  |  0.765567  |  0.456789  |
-+-------------+------------+------------+------------+
-|     A2M     |  0.876545  |  0.768933  |  0.987654  |
-+-------------+------------+------------+------------+
-|    A4GALT   |  0.456788  |  0.876543  |  0.876542  |
-+-------------+------------+------------+------------+
-|   M664Y65   |  0.876543  |  0.786543  |  0.897654  |
-+-------------+------------+------------+------------+
-
-|
-
-    </token>
-
-    <token name="@REFERENCE@">
-
-This tool is developed by the `BIMIB`_ at the `Department of Informatics, Systems and Communications`_ of `University of Milan - Bicocca`_.
-
-.. _BIMIB: http://sito di bio.org
-.. _Department of Informatics, Systems and Communications: http://www.disco.unimib.it/go/Home/English
-.. _University of Milan - Bicocca: https://www.unimib.it/
-
-    </token>
-
-    <xml name="citations">
-        <citations> <!--esempio di citazione-->
-            <citation type="bibtex">
-@online{lh32017,
-  author = {Alex Graudenzi, Davide Maspero, Cluadio Isella, Marzia Di Filippo, Giancarlo Mauri, Enzo Medico, Marco Antoniotti, Chiara Damiani},
-  year = {2018},
-  title = {MaREA: Metabolic feature extraction, enrichment and visualization of RNAseq},
-  publisher = {bioRxiv},
-  journal = {bioRxiv},
-  url = {https://www.biorxiv.org/content/early/2018/01/16/248724},
-}
-            </citation>
-        </citations>
-    </xml>
-
-</macros>
b
diff -r e88efefbd015 -r 944e15aa970a Marea/marea.py
--- a/Marea/marea.py Tue Oct 15 12:21:16 2019 -0400
+++ b/Marea/marea.py Tue Oct 15 12:22:43 2019 -0400
[
@@ -709,7 +709,7 @@
         tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv'
         tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")
         tmp_csv = tmp_csv.reset_index()
-        header = ['ids', 'P_Value', 'Average']
+        header = ['ids', 'P_Value', 'Log2(fold change)']
         tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
         
         if create_svg or create_pdf:
b
diff -r e88efefbd015 -r 944e15aa970a Marea/marea.xml
--- a/Marea/marea.xml Tue Oct 15 12:21:16 2019 -0400
+++ b/Marea/marea.xml Tue Oct 15 12:22:43 2019 -0400
[
b'@@ -1,4 +1,4 @@\n-<tool id="MaREA" name="Metabolic Reaction Enrichment Analysis" version="1.0.2">\n+<tool id="MaREA" name="Metabolic Reaction Enrichment Analysis" version="1.0.3">\n     <description></description>\n     <macros>\n         <import>marea_macros.xml</import>\n@@ -22,21 +22,7 @@\n                 --custom_map $cond_rule.cond_map.Custom_map\n             #end if\n         #end if\n-\t#if $advanced.choice == \'true\':\n-      \t    --none ${advanced.None}\n-      \t    --pValue ${advanced.pValue}\n-      \t    --fChange ${advanced.fChange}\n-\t    --generate_svg ${advanced.generateSvg}\n-\t    --generate_pdf ${advanced.generatePdf}\n-\t    --generate_ras ${advanced.generateRas}\n-\t#else \n-\t    --none true\n-\t    --pValue 0.05\n-\t    --fChange 1.5\n-\t    --generate_svg false\n-\t    --generate_pdf true\n-\t    --generate_ras false\n-\t#end if\n+\t\n       \t--tool_dir $__tool_directory__\n       \t--option $cond.type_selector\n         --out_log $log\t\t\n@@ -50,13 +36,44 @@\n             #for $data in $cond.input_Datasets:\n                 ${data.input_name}\n             #end for\n+            #if $cond.advanced.choice == \'true\':\n+      \t    --none ${cond.advanced.None}\n+      \t    --pValue ${cond.advanced.pValue}\n+      \t    --fChange ${cond.advanced.fChange}\n+\t    \t--generate_svg ${cond.advanced.generateSvg}\n+\t    \t--generate_pdf ${cond.advanced.generatePdf}\n+\t    --generate_ras ${cond.advanced.generateRas}\n+\t#else \n+\t    --none true\n+\t    --pValue 0.05\n+\t    --fChange 1.5\n+\t    --generate_svg false\n+\t    --generate_pdf true\n+\t    --generate_ras false\n+\t#end if\n         #elif $cond.type_selector == \'dataset_class\':\n             --input_data ${input_data}\n             --input_class ${input_class}\n+            #if $cond.advanced.choice == \'true\':\n+      \t    --none ${cond.advanced.None}\n+      \t    --pValue ${cond.advanced.pValue}\n+      \t    --fChange ${cond.advanced.fChange}\n+\t    --generate_svg ${cond.advanced.generateSvg}\n+\t    --generate_pdf ${cond.advanced.generatePdf}\n+\t    --generate_ras ${cond.advanced.generateRas}\n+\t#else \n+\t    --none true\n+\t    --pValue 0.05\n+\t    --fChange 1.5\n+\t    --generate_svg false\n+\t    --generate_pdf true\n+\t    --generate_ras false\n+\t#end if\n         #end if\n         #if $cond.type_selector == \'datasets_rasonly\':\n             --input_datas ${input_Datasets}\n             --single_ras_file $ras_single\n+            --none ${cond.advanced.None}\n         #end if\n         ]]>\n     </command>\n@@ -94,48 +111,66 @@\n                     <param name="input" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" />\t\n                     <param name="input_name" argument="--names" type="text" label="Dataset\'s name:" value="Dataset" help="Default: Dataset" />\n                 </repeat>\n+                <conditional name="advanced">\n+\t\t\t\t\t<param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom rules for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps.">\n+\t\t    \t\t\t<option value="true" selected="true">No</option>\n+\t\t    \t\t\t<option value="false">Yes</option>\n+\t\t\t\t\t</param>\n+\t\t\t\t\t<when value="false">\n+\t\t\t\t\t</when>\n+\t\t\t\t\t<when value="true">\n+\t\t    \t\t\t<param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> \n+\t\t    \t\t\t<param name="pValue" argument="--pValue" type="float" size="20" value="0.01" max="1" min="0" label="P-value threshold:" help="min value 0" />\n+\t\t    \t\t\t<param name="fChange" argument="--fChange" type="float" size="20" value="1.2" min="1" label="Fold-Change threshold:" help="min value 1" />\n+\t\t    \t\t\t<param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" />\n+\t\t    \t\t\t<param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the pro'..b'one" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> \n+\t\t    \t\t\t<param name="pValue" argument="--pValue" type="float" size="20" value="0.01" max="1" min="0" label="P-value threshold:" help="min value 0" />\n+\t\t    \t\t\t<param name="fChange" argument="--fChange" type="float" size="20" value="1.2" min="1" label="Fold-Change threshold:" help="min value 1" />\n+\t\t    \t\t\t<param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" />\n+\t\t    \t\t\t<param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" />\t\n+\t\t    \t\t\t<param name="generateRas" argument="--generateRas" type="boolean" checked="false" label="Generate Reaction Activity Score for each table" help="Generate Reaction Activity Score for each table" />\t\t\n+\t\t\t\t\t</when>\n+    \t</conditional>\n             </when>\n         </conditional>\n        \n-       \n-       <!--TODO: NASCONDERE ADVANCED SE RAS ONLY-->\n+      \n        \n-\t<conditional name="advanced">\n-\t\t<param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom rules for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps.">\n-\t\t    <option value="true" selected="true">No</option>\n-\t\t    <option value="false">Yes</option>\n-\t\t</param>\n-\t\t<when value="false">\n-\t\t</when>\n-\t\t<when value="true">\n-\t\t    <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> \n-\t\t    <param name="pValue" argument="--pValue" type="float" size="20" value="0.05" max="1" min="0" label="P-value threshold:" help="min value 0" />\n-\t\t    <param name="fChange" argument="--fChange" type="float" size="20" value="1.5" min="1" label="Fold-Change threshold:" help="min value 1" />\n-\t\t    <param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" />\n-\t\t    <param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" />\t\n-\t\t    <param name="generateRas" argument="--generateRas" type="boolean" checked="false" label="Generate Reaction Activity Score for each table" help="Generate Reaction Activity Score for each table" />\t\t\n-\t\t</when>\n-    \t</conditional>\n+\t\n     </inputs>\n \n     <outputs>\n-        <data format="txt" name="log" label="${tool.name} - Log" />\n-        <data format="tabular" name="ras_single" label="${tool.name} - RAS">\n+        <data format="txt" name="log" label="MaREA - Log" />\n+        <data format="tabular" name="ras_single" label="MaREA - RAS - ${cond.input_name}">\n         \t<filter>cond[\'type_selector\'] == "datasets_rasonly"</filter>\n         </data>\n-        <collection name="results" type="list" label="${tool.name} - Results">\n+        <collection name="results" type="list" label="MaREA - Results">\n         <filter>cond[\'type_selector\'] == "datasets" or cond[\'type_selector\'] == "dataset_class"</filter>\n             <discover_datasets pattern="__name_and_ext__" directory="result"/>\n         </collection>\n-\t<collection name="ras" type="list" label="${tool.name} - RAS list" format_source="tabular">\n-\t    <filter>advanced[\'choice\'] and advanced[\'generateRas\']</filter>\n+\t<collection name="ras" type="list" label="MaREA - RAS list" format_source="tabular">\n+\t    <filter>cond[\'type_selector\'] != "datasets_rasonly" and cond[\'advanced\'][\'choice\'] and cond[\'advanced\'][\'generateRas\']</filter>\n     \t    <discover_datasets pattern="__name_and_ext__" directory="ras" format="tabular"/>\n \t</collection>\n \t\n'
b
diff -r e88efefbd015 -r 944e15aa970a Marea/marea_cluster.py
--- a/Marea/marea_cluster.py Tue Oct 15 12:21:16 2019 -0400
+++ b/Marea/marea_cluster.py Tue Oct 15 12:22:43 2019 -0400
[
@@ -176,6 +176,10 @@
     scores = []
     all_labels = []
     
+    clusterer = KMeans(n_clusters=1, random_state=10)
+    distortions.append(clusterer.fit(dataset).inertia_)
+    
+    
     for n_clusters in range_n_clusters:
         clusterer = KMeans(n_clusters=n_clusters, random_state=10)
         cluster_labels = clusterer.fit_predict(dataset)
@@ -227,8 +231,10 @@
     
 def elbow_plot (distortions, k_min, k_max):
     plt.figure(0)
-    plt.plot(range(k_min, k_max+1), distortions, marker = 'o')
-    plt.xlabel('Number of cluster')
+    x = list(range(k_min, k_max + 1))
+    x.insert(0, 1)
+    plt.plot(x, distortions, marker = 'o')
+    plt.xlabel('Number of clusters (k)')
     plt.ylabel('Distortion')
     s = 'clustering/elbow_plot.png'
     fig = plt.gcf()
b
diff -r e88efefbd015 -r 944e15aa970a Marea/marea_cluster.xml
--- a/Marea/marea_cluster.xml Tue Oct 15 12:21:16 2019 -0400
+++ b/Marea/marea_cluster.xml Tue Oct 15 12:22:43 2019 -0400
b
@@ -1,4 +1,4 @@
-<tool id="MaREA_cluester" name="Cluster Analysis" version="1.0.5">
+<tool id="MaREA_cluester" name="Cluster Analysis" version="1.0.6">
     <description></description>
     <macros>
         <import>marea_macros.xml</import>
@@ -38,7 +38,7 @@
         ]]>
     </command>
     <inputs>
-        <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" />
+        <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" />
         
         <conditional name="data">
  <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">
@@ -47,8 +47,8 @@
                  <option value="hierarchy">Agglomerative Hierarchical</option>
          </param>
          <when value="kmeans">
-         <param name="k_min" argument="--k_min" type="integer" min="1" max="99" value="3" label="Min number of clusters (k) to be tested" />
-         <param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" />
+         <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
+         <param name="k_max" argument="--k_max" type="integer" min="2" max="20" value="3" label="Max number of clusters (k) to be tested" />
          <param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/>
          <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
          </when>