Repository 'marea'
hg clone https://toolshed.g2.bx.psu.edu/repos/bimib/marea

Changeset 16:c71ac0bb12de (2019-10-01)
Previous changeset 15:d0e7f14b773f (2019-10-01) Next changeset 17:640f303d0cec (2019-10-01)
Commit message:
Uploaded
modified:
Marea/marea.py
Marea/marea.xml
Marea/marea_cluster.py
Marea/marea_cluster.xml
added:
Marea/local/desktop.ini
removed:
marea-1.0.1/local/HMRcoreMap.svg
marea-1.0.1/local/HMRcore_genes.p
marea-1.0.1/local/HMRcore_rules.p
marea-1.0.1/local/Recon_genes.p
marea-1.0.1/local/Recon_rules.p
marea-1.0.1/local/desktop.ini
marea-1.0.1/marea.py
marea-1.0.1/marea.xml
marea-1.0.1/marea_cluster.py
marea-1.0.1/marea_cluster.xml
marea-1.0.1/marea_macros.xml
b
diff -r d0e7f14b773f -r c71ac0bb12de Marea/local/desktop.ini
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Marea/local/desktop.ini Tue Oct 01 06:05:13 2019 -0400
[
@@ -0,0 +1,6 @@
+[.ShellClassInfo]
+IconResource=C:\WINDOWS\System32\SHELL32.dll,4
+[ViewState]
+Mode=
+Vid=
+FolderType=Generic
b
diff -r d0e7f14b773f -r c71ac0bb12de Marea/marea.py
--- a/Marea/marea.py Tue Oct 01 06:03:12 2019 -0400
+++ b/Marea/marea.py Tue Oct 01 06:05:13 2019 -0400
[
b'@@ -1,4 +1,3 @@\n-\n from __future__ import division\n import sys\n import pandas as pd\n@@ -6,6 +5,7 @@\n import scipy.stats as st\n import collections\n import lxml.etree as ET\n+import shutil\n import pickle as pk\n import math\n import os\n@@ -13,7 +13,7 @@\n from svglib.svglib import svg2rlg\n from reportlab.graphics import renderPDF\n \n-########################## argparse ###########################################\n+########################## argparse ##########################################\n \n def process_args(args):\n     parser = argparse.ArgumentParser(usage = \'%(prog)s [options]\',\n@@ -71,6 +71,21 @@\n                         type = str,\n                         choices = [\'yes\', \'no\'],\n                         help = \'if make or not custom map\')\n+    parser.add_argument(\'-gs\', \'--generate_svg\',\n+                        type = str,\n+                        default = \'true\',\n+                        choices = [\'true\', \'false\'], \n+                        help = \'generate svg map\')\n+    parser.add_argument(\'-gp\', \'--generate_pdf\',\n+                        type = str,\n+                        default = \'true\',\n+                        choices = [\'true\', \'false\'], \n+                        help = \'generate pdf map\')\n+    parser.add_argument(\'-gr\', \'--generate_ras\',\n+                        type = str,\n+                        default = \'true\',\n+                        choices = [\'true\', \'false\'],\n+                        help = \'generate reaction activity score\')\n     args = parser.parse_args()\n     return args\n \n@@ -85,7 +100,7 @@\n \n def read_dataset(data, name):\n     try:\n-        dataset = pd.read_csv(data, sep = \'\\t\', header = 0)\n+        dataset = pd.read_csv(data, sep = \'\\t\', header = 0, engine=\'python\')\n     except pd.errors.EmptyDataError:\n         sys.exit(\'Execution aborted: wrong format of \' + name + \'\\n\')\n     if len(dataset.columns) < 2:\n@@ -536,7 +551,7 @@\n         ids = [react[i].id for i in range(len(react))]\n     except cb.io.sbml3.CobraSBMLError:\n         try:\n-            data = (pd.read_csv(data, sep = \'\\t\', dtype = str)).fillna(\'\')\n+            data = (pd.read_csv(data, sep = \'\\t\', dtype = str, engine=\'python\')).fillna(\'\')\n             if len(data.columns) < 2:\n                 sys.exit(\'Execution aborted: wrong format of \'+\n                          \'custom datarules\\n\')\n@@ -641,9 +656,28 @@\n                         \', the class has been disregarded\\n\')\n     return class_pat\n \n+############################ create_ras #######################################\n+\n+def create_ras (resolve_rules, dataset_name):\n+\n+    if resolve_rules == None:\n+        warning("Couldn\'t generate RAS for current dataset: " + dataset_name)\n+\n+    for geni in resolve_rules.values():\n+        for i, valori in enumerate(geni):\n+            if valori == None:\n+                geni[i] = \'None\'\n+                \n+    output_ras = pd.DataFrame.from_dict(resolve_rules)\n+    output_to_csv = pd.DataFrame.to_csv(output_ras, sep = \'\\t\', index = False)\n+                \n+    text_file = open("ras/Reaction_Activity_Score_Of_" + dataset_name + ".tsv", "w")\n+    text_file.write(output_to_csv)\n+    text_file.close()\n+\n ############################ map ##############################################\n \n-def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C):\n+def maps(core_map, class_pat, ids, threshold_P_V, threshold_F_C, create_svg, create_pdf):\n     args = process_args(sys.argv)\n     if (not class_pat) or (len(class_pat.keys()) < 2):\n         sys.exit(\'Execution aborted: classes provided for comparisons are \' +\n@@ -663,52 +697,81 @@\n                count += 1\n             except (TypeError, ZeroDivisionError):\n                count += 1\n-        tab = \'table_out/\' + i + \'_vs_\' + j + \'.tsv\'\n+        tab = \'result/\' + i + \'_vs_\' + j + \' (Tabular Result).tsv\'\n         tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index")\n         tmp_csv = tmp_csv.reset_index()\n         header = [\'ids\', \'P_Value\', \'Average\']\n         tmp_csv.to_csv(tab, sep = \'\\t\','..b"            fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)\n+                file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'\n+                with open(file_svg, 'wb') as new_map:\n+                    new_map.write(ET.tostring(core_map))\n+                    \n+                \n+                if create_pdf:\n+                    file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf'\n+                    renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)\n+                \n+                if not create_svg:\n+                    #Ho utilizzato il file svg per generare il pdf, \n+                    #ma l'utente non ne ha richiesto il ritorno, quindi\n+                    #lo elimino\n+                    os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg')\n+                    \n     return None\n \n ############################ MAIN #############################################\n \n def main():\n     args = process_args(sys.argv)\n-    os.makedirs('table_out')\n-    if args.rules_selector == 'HMRcore':\n-        os.makedirs('map_svg')\n-        os.makedirs('map_pdf')\n+    \n+    create_svg = check_bool(args.generate_svg)\n+    create_pdf = check_bool(args.generate_pdf)\n+    generate_ras = check_bool(args.generate_ras)\n+    \n+    os.makedirs('result')\n+\n+    if generate_ras:\n+        os.makedirs('ras')\n+    \n+    if args.rules_selector == 'HMRcore':        \n         recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb'))\n     elif args.rules_selector == 'Recon':\n         recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb'))\n     elif args.rules_selector == 'Custom':\n         ids, rules, gene_in_rule = make_recon(args.custom)\n+        \n     resolve_none = check_bool(args.none)\n+    \n     class_pat = {}\n+    \n     if args.option == 'datasets':\n         num = 1\n-        #if len(args.names) != len(set(args.names)):\n-        #    sys.exit('Execution aborted: datasets name duplicated')\n         for i, j in zip(args.input_datas, args.names):\n+\n             name = name_dataset(j, num)\n             dataset = read_dataset(i, name)\n+\n             dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)\n-            type_gene = gene_type(dataset.iloc[0, 0], name)\n+\n+            type_gene = gene_type(dataset.iloc[0, 0], name) \n+            \n             if args.rules_selector != 'Custom':\n                 genes = data_gene(dataset, type_gene, name, None)\n                 ids, rules = load_id_rules(recon.get(type_gene))\n             elif args.rules_selector == 'Custom':\n                 genes = data_gene(dataset, type_gene, name, gene_in_rule)\n+                \n             resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n+\n+            if generate_ras:\n+                create_ras(resolve_rules, name)\n+                \n+            \n             if err != None and err:\n                 warning('Warning: gene\\n' + str(err) + '\\nnot found in class '\n                     + name + ', the expression level for this gene ' +\n@@ -738,10 +801,9 @@\n                     'will be considered NaN\\n')\n         if resolve_rules != None:\n             class_pat = split_class(classes, resolve_rules)\n+            \n     if args.rules_selector == 'Custom':\n         if args.yes_no == 'yes':\n-            os.makedirs('map_svg')\n-            os.makedirs('map_pdf')\n             try:\n                 core_map = ET.parse(args.custom_map)\n             except (ET.XMLSyntaxError, ET.XMLSchemaParseError):\n@@ -750,8 +812,11 @@\n             core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg')\n     else:       \n         core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg')\n-    maps(core_map, class_pat, ids, args.pValue, args.fChange)\n-    warning('Execution succeeded')\n+        \n+    maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf)\n+        \n+    print('Execution succeded')\n+\n     return None\n \n ###############################################################################\n"
b
diff -r d0e7f14b773f -r c71ac0bb12de Marea/marea.xml
--- a/Marea/marea.xml Tue Oct 01 06:03:12 2019 -0400
+++ b/Marea/marea.xml Tue Oct 01 06:05:13 2019 -0400
[
b'@@ -1,200 +1,223 @@\n-<tool id="MaREA" name="Metabolic Enrichment Analysis" version="1.0.0">\r\n-    <description>for Galaxy</description>\r\n-    <macros>\r\n-        <import>marea_macros.xml</import>\r\n-    </macros>\r\n-    <requirements>\r\n-        <requirement type="package" version="0.23.0">pandas</requirement>\r\n-        <requirement type="package" version="1.1.0">scipy</requirement>\r\n-        <requirement type="package" version="0.10.1">cobra</requirement>\r\n-        <requirement type="package" version="4.2.1">lxml</requirement>\r\n-        <requirement type="package" version="0.8.1">svglib</requirement>\r\n-        <requirement type="package" version="3.4.0">reportlab</requirement>\r\n-    </requirements>\r\n-    <command detect_errors="exit_code">\r\n-        <![CDATA[\r\n-      \tpython $__tool_directory__/marea.py\r\n-        --rules_selector $cond_rule.rules_selector\r\n-        #if $cond_rule.rules_selector == \'Custom\':\r\n-            --custom ${cond_rule.Custom_rules}\r\n-            --yes_no ${cond_rule.cond_map.yes_no}\r\n-            #if $cond_rule.cond_map.yes_no == \'yes\':\r\n-                --custom_map $cond_rule.cond_map.Custom_map\r\n-            #end if\r\n-        #end if\r\n-      \t--none $None\r\n-      \t--pValue $pValue\r\n-      \t--fChange $fChange\r\n-      \t--tool_dir $__tool_directory__\r\n-      \t--option $cond.type_selector\r\n-        --out_log $log\r\n-        #if $cond.type_selector == \'datasets\':\r\n-            --input_datas\r\n-            #for $data in $cond.input_Datasets:\r\n-                ${data.input}\r\n-            #end for\r\n-            --names\r\n-            #for $data in $cond.input_Datasets:\r\n-                ${data.input_name}\r\n-            #end for\r\n-        #elif $cond.type_selector == \'dataset_class\':\r\n-            --input_data ${input_data}\r\n-            --input_class ${input_class}\r\n-        #end if\r\n-        ]]>\r\n-    </command>\r\n-\r\n-    <inputs>\r\n-        <conditional name="cond_rule">\r\n-            <expand macro="options"/>\r\n-            <when value="HMRcore">\r\n-            </when>\r\n-            <when value="Recon">\r\n-            </when>\r\n-            <when value="Custom">\r\n-                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />\r\n-                <conditional name="cond_map">\r\n-                    <param name="yes_no" type="select" label="Custom map? (optional)">\r\n-                        <option value="no" selected="true">no</option>\r\n-                        <option value="yes">yes</option>\r\n-                    </param>\r\n-                    <when value="yes">\r\n-                        <param name="Custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/>\r\n-                    </when>\r\n-                    <when value="no">\r\n-                    </when>\r\n-                </conditional>\r\n-            </when>\r\n-        </conditional>\r\n-        <conditional name="cond">\r\n-            <param name="type_selector" argument="--option" type="select" label="Input format:">\r\n-                <option value="datasets" selected="true">RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N</option>\r\n-                <option value="dataset_class">RNAseq of all samples + sample group specification</option>\r\n-            </param>\r\n-            <when value="datasets">\r\n-                <repeat name="input_Datasets" title="RNAseq" min="2">\r\n-                    <param name="input" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" />\t\r\n-                    <param name="input_name" argument="--names" type="text" label="Dataset\'s name:" value="Dataset" help="Defalut: Dataset" />\r\n-                </repeat>\r\n-            </when>\r\n-            <when value="dataset_class">\r\n-                <param name="input_data" argument="--input_data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" />\r\n-                <param name="input_class" argument="--input_class" type="data" format="tabular, csv, tsv" lab'..b'ity Score for each table" help="Generate Reaction Activity Score for each table" />\t\t\n+\t\t</when>\n+    \t</conditional>\n+    </inputs>\n+\n+    <outputs>\n+        <data format="txt" name="log" label="${tool.name} - Log" />\n+        <collection name="results" type="list" label="${tool.name} - Results">\n+            <discover_datasets pattern="__name_and_ext__" directory="result"/>\n+        </collection>\n+\t<collection name="ras" type="list" label="${tool.name} - RAS" format_source="tabular">\n+\t    <filter>advanced[\'choice\'] and advanced[\'generateRas\']</filter>\n+    \t    <discover_datasets pattern="__name_and_ext__" directory="ras" format="tabular"/>\n+\t</collection>\n+    </outputs>\n+    <tests>\n+        <test>\n+            <param name="pValue" value="0.56"/>\n+            <output name="log" file="log.txt"/>\n+        </test>\n+    </tests>\n+    <help>\n+<![CDATA[\n+\n+What it does\n+-------------\n+\n+This tool analyzes RNA-seq dataset(s) as described in Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724.\n+\n+Accepted files are: \n+    - option 1) two or more RNA-seq datasets, each referring to samples in a given condition/class. The user can specify a label for each class (as e.g. "*classA*" and "*classB*");\n+    - option 2) one RNA dataset and one class-file specifying the class/condition each sample belongs to.\n+\n+Optional files:\n+    - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats:\n+\n+\t* (Cobra Toolbox and CobraPy compliant) xml of metabolic model;\n+\t* .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2).\n+    - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example.\n+\n+The tool generates:\n+    1) a tab-separated file: reporting fold-change and p-values of reaction activity scores (RASs) between a pair of conditions/classes;\n+    2) a metabolic map file (downlodable as .svg): visualizing up- and down-regulated reactions between a pair of conditions/classes;\n+    3) a log file (.txt).\n+\n+RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID.\n+\n+Class-file format: each row of the class-file reports the sample ID (column1) and the label of the class/condition the sample belongs to (column 2).\n+\n+To calculate P-Values and Fold-Changes and to generate maps, comparisons are performed for each possible pair of classes.\n+\n+Output files will be named as classA_vs_classB. Reactions will conventionally be reported as up-regulated (down-regulated) if they are significantly more (less) active in class having label "classA".\n+\n+\n+Example input\n+-------------\n+\n+**"Custom Rules"** option:\n+\n+Custom Rules Dastaset:\n+\n+@CUSTOM_RULES_EXEMPLE@\n+\n+**"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N"** option:\n+\n+RNA-seq Dataset 1:\t\t\t\t\t\t\n+\n+@DATASET_EXEMPLE1@\n+\n+RNA-seq Dataset 2:\n+\n+@DATASET_EXEMPLE2@\n+\n+**"RNAseq of all samples + sample group specification"** option:\n+\n+RNA-seq Dataset:\n+\n+@DATASET_EXEMPLE1@\n+\n+Class-file:\n+\n++------------+------------+   \n+| Patient_ID |    class   |   \n++============+============+   \n+| TCGAAA3529 |     MSI    |   \n++------------+------------+    \n+| TCGAA62671 |     MSS    |    \n++------------+------------+    \n+| TCGAA62672 |     MSI    |   \n++------------+------------+\n+\n+|\n+\n+.. class:: infomark\n+\n+**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.\n+\n+.. class:: infomark\n+\n+**TIP**: If your dataset is not split into classes, use `MaREA cluster analysis`_.\n+\n+@REFERENCE@\n+\n+.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724\n+.. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj\n+.. _MaREA cluster analysis: http://link del tool di cluster.org\n+\n+]]>\n+    </help>\n+    <expand macro="citations" />\n+</tool>\n+\t\n'
b
diff -r d0e7f14b773f -r c71ac0bb12de Marea/marea_cluster.py
--- a/Marea/marea_cluster.py Tue Oct 01 06:03:12 2019 -0400
+++ b/Marea/marea_cluster.py Tue Oct 01 06:05:13 2019 -0400
[
b'@@ -1,67 +1,84 @@\n-from __future__ import division\n-import os\n+# -*- coding: utf-8 -*-\n+"""\n+Created on Mon Jun 3 19:51:00 2019\n+\n+@author: Narger\n+"""\n+\n import sys\n-import pandas as pd\n-import collections\n-import pickle as pk\n import argparse\n-from sklearn.cluster import KMeans\n-import matplotlib\n-# Force matplotlib to not use any Xwindows backend.\n-matplotlib.use(\'Agg\')\n+import os\n+from sklearn.datasets import make_blobs\n+from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering\n+from sklearn.metrics import silhouette_samples, silhouette_score, davies_bouldin_score, cluster\n import matplotlib.pyplot as plt\n+import scipy.cluster.hierarchy as shc   \n+import matplotlib.cm as cm\n+import numpy as np\n+import pandas as pd\n \n-########################## argparse ###########################################\n+################################# process args ###############################\n \n def process_args(args):\n     parser = argparse.ArgumentParser(usage = \'%(prog)s [options]\',\n                                      description = \'process some value\\\'s\' +\n                                      \' genes to create class.\')\n-    parser.add_argument(\'-rs\', \'--rules_selector\', \n+\n+    parser.add_argument(\'-ol\', \'--out_log\', \n+                        help = "Output log")\n+    \n+    parser.add_argument(\'-in\', \'--input\',\n                         type = str,\n-                        default = \'HMRcore\',\n-                        choices = [\'HMRcore\', \'Recon\', \'Custom\'], \n-                        help = \'chose which type of dataset you want use\')\n-    parser.add_argument(\'-cr\', \'--custom\',\n+                        help = \'input dataset\')\n+    \n+    parser.add_argument(\'-cy\', \'--cluster_type\',\n                         type = str,\n-                        help=\'your dataset if you want custom rules\')\n-    parser.add_argument(\'-ch\', \'--cond_hier\', \n-                        type = str,\n-                        default = \'no\',\n-                        choices = [\'no\', \'yes\'], \n-                        help = \'chose if you wanna hierical dendrogram\')\n-    parser.add_argument(\'-lk\', \'--k_min\', \n+                        choices = [\'kmeans\', \'meanshift\', \'dbscan\', \'hierarchy\'],\n+                        default = \'kmeans\',\n+                        help = \'choose clustering algorythm\')\n+    \n+    parser.add_argument(\'-k1\', \'--k_min\', \n+                        type = int,\n+                        default = 2,\n+                        help = \'choose minimun cluster number to be generated\')\n+    \n+    parser.add_argument(\'-k2\', \'--k_max\', \n                         type = int,\n-                        help = \'min number of cluster\')\n-    parser.add_argument(\'-uk\', \'--k_max\', \n-                        type = int,\n-                        help = \'max number of cluster\')\n-    parser.add_argument(\'-li\', \'--linkage\', \n-                        type = str, \n-                        choices = [\'single\', \'complete\', \'average\'], \n-                        help=\'linkage hierarchical cluster\')\n-    parser.add_argument(\'-d\', \'--data\',\n+                        default = 7,\n+                        help = \'choose maximum cluster number to be generated\')\n+    \n+    parser.add_argument(\'-el\', \'--elbow\', \n+                        type = str,\n+                        default = \'false\',\n+                        choices = [\'true\', \'false\'],\n+                        help = \'choose if you want to generate an elbow plot for kmeans\')\n+    \n+    parser.add_argument(\'-si\', \'--silhouette\', \n                         type = str,\n-                        required = True,\n-                        help = \'input dataset\')\n-    parser.add_argument(\'-n\', \'--none\',\n+                        default = \'false\',\n+                        choices = [\'true\', \'false\'],\n+                        help = \'choose if you want silhouette plots\')\n+    \n+    parser.add_argument(\'-db\', \'--davies\', \n                         type = str,\n-                        default = \'true\',\n-                        choi'..b'ries inverted.\\n\')\n-         tmp = k_min\n-         k_min = k_max\n-         k_max = tmp\n-    else: \n-        warning(\'k range correct.\\n\')\n-    cluster_data = pd.DataFrame.from_dict(resolve_rules, orient = \'index\')\n-    for i in cluster_data.columns:\n-        tmp = cluster_data[i][0]\n-        if tmp == None:\n-            cluster_data = cluster_data.drop(columns=[i])\n-    distorsion = []\n-    for i in range(k_min, k_max+1):\n-        tmp_kmeans = KMeans(n_clusters = i,\n-                            n_init = 100, \n-                            max_iter = 300,\n-                            random_state = 0).fit(cluster_data)\n-        distorsion.append(tmp_kmeans.inertia_)\n-        predict = tmp_kmeans.predict(cluster_data)\n-        predict = [x+1 for x in predict]\n-        classe = (pd.DataFrame(list(zip(cluster_data.index, predict)))).astype(str)\n-        dest = \'cluster_out/K=\' + str(i) + \'_\' + args.name+\'.tsv\'\n-        classe.to_csv(dest, sep = \'\\t\', index = False,\n-                      header = [\'Patient_ID\', \'Class\'])\n-    plt.figure(0)\n-    plt.plot(range(k_min, k_max+1), distorsion, marker = \'o\')\n-    plt.xlabel(\'Number of cluster\')\n-    plt.ylabel(\'Distorsion\')\n-    plt.savefig(args.elbow, dpi = 240, format = \'pdf\')\n-    if args.cond_hier == \'yes\':\n-        import scipy.cluster.hierarchy as hier\n-        lin = hier.linkage(cluster_data, args.linkage)\n-        plt.figure(1)\n-        plt.figure(figsize=(10, 5))\n-        hier.dendrogram(lin, leaf_font_size = 2, labels = cluster_data.index)\n-        plt.savefig(args.dendro, dpi = 480, format = \'pdf\')\n-    return None\n+    \n+############################# main ###########################################\n \n-################################# main ########################################\n \n def main():\n+    if not os.path.exists(\'clustering\'):\n+        os.makedirs(\'clustering\')\n+\n     args = process_args(sys.argv)\n-    if args.rules_selector == \'HMRcore\':\n-        recon = pk.load(open(args.tool_dir + \'/local/HMRcore_rules.p\', \'rb\'))\n-    elif args.rules_selector == \'Recon\':\n-        recon = pk.load(open(args.tool_dir + \'/local/Recon_rules.p\', \'rb\'))\n-    elif args.rules_selector == \'Custom\':\n-        ids, rules, gene_in_rule = make_recon(args.custom)\n-    resolve_none = check_bool(args.none)\n-    dataset = read_dataset(args.data, args.name)\n-    dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)\n-    type_gene = gene_type(dataset.iloc[0, 0], args.name)\n-    if args.rules_selector != \'Custom\':\n-        genes = data_gene(dataset, type_gene, args.name, None)\n-        ids, rules = load_id_rules(recon.get(type_gene))\n-    elif args.rules_selector == \'Custom\':\n-        genes = data_gene(dataset, type_gene, args.name, gene_in_rule)\n-    resolve_rules, err = resolve(genes, rules, ids, resolve_none, args.name)\n-    if err:\n-        warning(\'WARNING: gene\\n\' + str(err) + \'\\nnot found in class \'  \n-                + args.name + \', the expression level for this gene \' +\n-                \'will be considered NaN\\n\')\n-    f_cluster(resolve_rules)\n-    warning(\'Execution succeeded\')\n-    return None\n-\n-###############################################################################\n+    \n+    #Data read\n+    \n+    X = read_dataset(args.input)\n+    X = pd.DataFrame.to_dict(X, orient=\'list\')\n+    X = rewrite_input(X)\n+    X = pd.DataFrame.from_dict(X, orient = \'index\')\n+    \n+    for i in X.columns:\n+        tmp = X[i][0]\n+        if tmp == None:\n+            X = X.drop(columns=[i])\n+                \n+    X = pd.DataFrame.to_numpy(X)\n+    \n+    \n+    if args.cluster_type == \'kmeans\':\n+        kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.davies)\n+    \n+    if args.cluster_type == \'dbscan\':\n+        dbscan(X, args.eps, args.min_samples)\n+        \n+    if args.cluster_type == \'hierarchy\':\n+        hierachical_agglomerative(X, args.k_min, args.k_max)\n+        \n+##############################################################################\n \n if __name__ == "__main__":\n     main()\n'
b
diff -r d0e7f14b773f -r c71ac0bb12de Marea/marea_cluster.xml
--- a/Marea/marea_cluster.xml Tue Oct 01 06:03:12 2019 -0400
+++ b/Marea/marea_cluster.xml Tue Oct 01 06:05:13 2019 -0400
[
b'@@ -1,148 +1,92 @@\n-<tool id="MaREA_cluester" name="MaREA cluster analysis" version="1.0.0">\r\n-    <description>of Reaction Activity Scores</description>\r\n-    <macros>\r\n-        <import>marea_macros.xml</import>\r\n-    </macros>\r\n-    <requirements>\r\n-        <requirement type="package" version="0.23.0">pandas</requirement>\r\n-        <requirement type="package" version="1.1.0">scipy</requirement>\r\n-        <requirement type="package" version="0.10.1">cobra</requirement>\r\n-        <requirement type="package" version="0.19.1">scikit-learn</requirement>\r\n-        <requirement type="package" version="2.2.2">matplotlib</requirement>\r\n-    </requirements>\r\n-    <command detect_errors="exit_code">\r\n-        <![CDATA[\r\n-      \tpython $__tool_directory__/marea_cluster.py\r\n-        --rules_selector $cond_rule.rules_selector\r\n-        #if $cond_rule.rules_selector == \'Custom\':\r\n-            --custom ${cond_rule.Custom_rules}\r\n-        #end if\r\n-        --cond_hier $cond_hier.hier\r\n-        #if $cond_hier.hier == \'yes\':\r\n-            --linkage ${cond_hier.linkage}\r\n-            --dendro $dendrogram\r\n-        #end if\r\n-        --k_max $k_max\r\n-        --k_min $k_min\r\n-        --data $input\r\n-        --name $name\r\n-      \t--none $None\r\n-      \t--tool_dir $__tool_directory__\r\n-        --out_log $log\r\n-        --elbow $elbow\r\n-        ]]>\r\n-    </command>\r\n-    <inputs>\r\n-        <conditional name="cond_rule">\r\n-            <expand macro="options"/>\r\n-            <when value="Custom">\r\n-                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />\r\n-            </when>\r\n-            <when value="HMRcore">\r\n-            </when>\r\n-            <when value="Recon">\r\n-            </when>\r\n-        </conditional>\r\n-        <param name="input" argument="--data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" />\r\n-        <param name="name" argument="--name" type="text" label="Output name prefix" value="dataset" />\r\n-        <param name="k_min" argument="--k_min" type="integer" size="20" value="3" min="2" max="30" label="Min number of clusters (k) to be tested (k-means)"/>\r\n-        <param name="k_max" argument="--k_max" type="integer" size="20" value="3" min="2" max="30" label="Max number of clusters (k) to be tested (k-means)"/>\r\n-        <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" help="If NO is selected, (A and NaN) is solved as (NaN)" />\r\n-\t<conditional name="cond_hier">\r\n-            <param name="hier" argument="--cond_hier" type="select" label="Produce dendrogram (hierarchical clustering):">\r\n-                <option value="no" selected="true">no</option>\r\n-                <option value="yes">yes</option>\r\n-            </param>\r\n-            <when value="yes">\r\n-                <param name="linkage" argument="--linkage" type="select" label="Linkage type:">\r\n-                    <option value="single" selected="true">Single: minimum distance between all observations of two sets</option>\r\n-                    <option value="complete">Complete: maximum distance between all observations of two sets</option>\r\n-                    <option value="average">Average: average distance between all observations of two sets</option>\r\n-                </param>\r\n-            </when>\r\n-            <when value="no">\r\n-            </when>\r\n-        </conditional>\r\n-    </inputs>\r\n-\r\n-    <outputs>\r\n-        <data format="txt" name="log" label="Log" />\r\n-        <data format="pdf" name="dendrogram" label="$name dendrogram">\r\n-            <filter>cond_hier[\'hier\'] == \'yes\'</filter>\r\n-        </data>\r\n-        <data format="pdf" name="elbow" label="$name elbow evaluation method" />\r\n-        <collection name="cluster_out" type="list" label="Clusters $k_min - $k_max">\r\n-            <discover_datasets pattern="__name_and_ext__" directory="cluster_out" />\r\n-        </collection>\r\n-    </outputs>\r\n-    <tes'..b' <requirement type="package" version="1.1.0">scipy</requirement>\n+        <requirement type="package" version="0.10.1">cobra</requirement>\n+        <requirement type="package" version="0.21.3">scikit-learn</requirement>\n+        <requirement type="package" version="2.2.2">matplotlib</requirement>\n+\t<requirement type="package" version="1.17">numpy</requirement>\n+    </requirements>\n+    <command detect_errors="exit_code">\n+        <![CDATA[\n+      \tpython $__tool_directory__/marea_cluster.py\n+        --input $input\n+      \t--tool_dir $__tool_directory__\n+        --out_log $log\n+        #if $data.clust_type == \'kmeans\':\n+        \t--k_min ${data.k_min}\n+        \t--k_max ${data.k_max}\n+        \t--elbow ${data.elbow}\n+        \t--silhouette ${data.silhouette}\n+        #end if\n+        #if $data.clust_type == \'dbscan\':\n+        \t#if $data.dbscan_advanced.advanced == \'true\'\n+        \t\t--eps ${data.dbscan_advanced.eps}\n+        \t\t--min_samples ${data.dbscan_advanced.min_samples}\n+        \t#end if\n+        #end if\n+        #if $data.clust_type == \'hierarchy\':\n+        \t--k_min ${data.k_min}\n+        \t--k_max ${data.k_max}\n+      \t#end if\n+        ]]>\n+    </command>\n+    <inputs>\n+        <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" />\n+        \n+        <conditional name="data">\n+\t\t\t<param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">\n+                \t<option value="kmeans" selected="true">KMeans</option>\n+                \t<option value="dbscan">DBSCAN</option>\n+                \t<option value="hierarchy">Agglomerative Hierarchical</option>\n+        \t</param>\n+        \t<when value="kmeans">\n+        \t\t<param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" />\n+        \t\t<param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" />\n+        \t\t<param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/>\n+        \t\t<param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>\n+        \t</when>\n+        \t<when value="dbscan">\n+        \t\t<conditional name="dbscan_advanced">\n+        \t\t\t<param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)">\n+        \t\t\t\t<option value="true">Yes</option>\n+        \t\t\t\t<option value="false">No</option>\n+        \t\t\t</param>\n+        \t\t\t<when value="false"></when>\n+        \t\t\t<when value="true">\n+        \t\t\t\t<param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" />\n+        \t\t\t\t<param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/>\n+        \t\t\t\n+        \t\t\t</when>\n+        \t\t</conditional>   \t\n+        \t</when>\n+        \t<when value="hierarchy">\n+        \t\t<param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" />\n+        \t\t<param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" />\n+        \t</when>\n+\t\t</conditional>\n+    </inputs>\n+\n+    <outputs>\n+        <data format="txt" name="log" label="${tool.name} - Log" />\n+        <collection name="results" type="list" label="${tool.name} - Results">\n+            <discover_datasets pattern="__name_and_ext__" directory="clustering"/>\n+        </collection>\n+    </outputs>\n+    <help>\n+<![CDATA[\n+\n+What it does\n+-------------\n+\n+\n+]]>\n+    </help>\n+    <expand macro="citations" />\n+</tool>\n+\t\n+\t\n'
b
diff -r d0e7f14b773f -r c71ac0bb12de marea-1.0.1/local/HMRcoreMap.svg
--- a/marea-1.0.1/local/HMRcoreMap.svg Tue Oct 01 06:03:12 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,7702 +0,0 @@\n-<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n-<!-- Generator: Adobe Illustrator 22.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->\n-\n-<svg\n-   xmlns:dc="http://purl.org/dc/elements/1.1/"\n-   xmlns:cc="http://creativecommons.org/ns#"\n-   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"\n-   xmlns:svg="http://www.w3.org/2000/svg"\n-   xmlns="http://www.w3.org/2000/svg"\n-   xmlns:xlink="http://www.w3.org/1999/xlink"\n-   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"\n-   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"\n-   version="1.1"\n-   x="0px"\n-   y="0px"\n-   viewBox="0 0 1904.8016 1511.2752"\n-   xml:space="preserve"\n-   id="svg2"\n-   inkscape:version="0.91 r13725"\n-   sodipodi:docname="HMRcoreMap.svg"\n-   width="1904.8015"\n-   height="1511.2753"><metadata\n-     id="metadata2021"><rdf:RDF><cc:Work\n-         rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type\n-           rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title /></cc:Work></rdf:RDF></metadata><defs\n-     id="defs2019"><sodipodi:namedview\n-       showguides="true"\n-       showgrid="true"\n-       pagecolor="#ffffff"\n-       inkscape:zoom="1.4702451"\n-       inkscape:window-y="-8"\n-       inkscape:window-x="-8"\n-       inkscape:window-width="1920"\n-       inkscape:window-maximized="1"\n-       inkscape:window-height="1017"\n-       inkscape:snap-page="false"\n-       inkscape:snap-grids="true"\n-       inkscape:pageshadow="2"\n-       inkscape:pageopacity="0.0"\n-       inkscape:document-units="px"\n-       inkscape:cy="338.10986"\n-       inkscape:cx="1343.7768"\n-       inkscape:current-layer="layer1"\n-       id="base"\n-       fit-margin-top="0"\n-       fit-margin-right="0"\n-       fit-margin-left="0"\n-       fit-margin-bottom="0"\n-       borderopacity="1.0"\n-       bordercolor="#666666"><inkscape:grid\n-         type="xygrid"\n-         originy="72.926308"\n-         originx="-97.409688"\n-         id="grid3434"\n-         dotted="true" /></sodipodi:namedview></defs><sodipodi:namedview\n-     pagecolor="#ffffff"\n-     bordercolor="#666666"\n-     borderopacity="1"\n-     objecttolerance="10"\n-     gridtolerance="10"\n-     guidetolerance="10"\n-     inkscape:pageopacity="0"\n-     inkscape:pageshadow="2"\n-     inkscape:window-width="1920"\n-     inkscape:window-height="1017"\n-     id="namedview2017"\n-     showgrid="false"\n-     inkscape:zoom="0.44727204"\n-     inkscape:cx="497.63252"\n-     inkscape:cy="796.80241"\n-     inkscape:window-x="-8"\n-     inkscape:window-y="-8"\n-     inkscape:window-maximized="1"\n-     inkscape:current-layer="svg2"\n-     fit-margin-top="0"\n-     fit-margin-left="0"\n-     fit-margin-right="0"\n-     fit-margin-bottom="0" /><style\n-     type="text/css"\n-     id="style4">\n-\t.st0{display:none;}\n-\t.st1{display:inline;}\n-\t.st2{fill:none;stroke:#5AB6E7;stroke-width:7;stroke-linejoin:round;}\n-\t.st3{fill:none;stroke:#5AB6E7;stroke-width:7;stroke-linejoin:round;stroke-dasharray:11.9422,11.9422;}\n-\t.st4{fill:none;stroke:#5AB6E7;stroke-width:7;stroke-linejoin:round;stroke-dasharray:12.1815,12.1815;}\n-\t.st5{font-family:\'Helvetica\';}\n-\t.st6{font-size:30px;}\n-\t.st7{font-size:39.262px;}\n-\t.st8{fill:none;stroke:#0000FF;stroke-width:30;}\n-\t.st9{fill:none;stroke:#E41A1C;stroke-width:30;}\n-\t.st10{fill:none;stroke:#BEBEBE;stroke-width:30;}\n-\t.st11{stroke:#000000;stroke-width:30;}\n-\t.st12{fill:none;stroke:#BEBEBE;stroke-width:30;stroke-dasharray:30,30;stroke-dashoffset:6;}\n-\t.st13{fill:none;stroke:#000000;stroke-width:1.8444;}\n-\t.st14{fill:none;stroke:#000000;stroke-width:2.1821;}\n-\t.st15{font-family:\'Calibri-Bold\';}\n-\t.st16{font-size:16px;}\n-\t.st17{font-family:\'Calibri\';}\n-\t.st18{font-size:10px;}\n-\t.st19{fill:none;stroke:#000000;stroke-width:1.8856;}\n-\t.st20{fill:none;stroke:#000000;stroke-width:1.9459;}\n-\t.st21{fill:none;stroke:#000000;stroke-width:2.2892;}\n-\t.st22{fill:none;stroke:#000000;stroke-width:2.5;}\n-\t.st23{fill:none;stroke:#000000;stroke-width:1.9412;}\n-\t.st24{fill:none;str'..b'31.89,1231.8186 2.2,-7.3 2.2,7.3 -2.2,-1.8 -2.2,1.8 z"\n-     class="st14"\n-     inkscape:label="Glutamine_DM_COOP b"\n-     inkscape:connector-curvature="0"\n-     id="B_Glutamine_DM_COOP" /><path\n-     style="fill:none;stroke:#000000;stroke-width:2.18210006"\n-     d="m 1233.89,1279.4186 0,-48"\n-     class="st14"\n-     inkscape:label="Glutamine_DM_COOP"\n-     inkscape:connector-curvature="0"\n-     id="R_Glutamine_DM_COOP" /><flowRoot\n-     xml:space="preserve"\n-     id="flowRoot5366"\n-     style="font-style:normal;font-weight:normal;font-size:35px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n-     transform="translate(-20.6,18.418554)"><flowRegion\n-       id="flowRegion5368"><rect\n-         id="rect5370"\n-         width="1165.1471"\n-         height="77.465683"\n-         x="306.70087"\n-         y="-39.523308" /></flowRegion><flowPara\n-       id="flowPara5372" /></flowRoot><flowRoot\n-     xml:space="preserve"\n-     id="TitoloConfronto"\n-     style="font-style:normal;font-weight:normal;font-size:35px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n-     inkscape:label="TitoloConfronto"\n-     transform="translate(-18.364224,56.426743)"><flowRegion\n-       id="flowRegion5376"><rect\n-         id="rect5378"\n-         width="1869.6877"\n-         height="68.569115"\n-         x="301.95807"\n-         y="-69.56102" /></flowRegion><flowPara\n-       id="TitleText"\n-       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:45px;font-family:sans-serif;-inkscape-font-specification:sans-serif">TITOLO: TITOLOTITOLO </flowPara></flowRoot><flowRoot\n-     xml:space="preserve"\n-     id="flowRoot5382"\n-     style="font-style:normal;font-weight:normal;font-size:35px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n-     transform="translate(-16.64767,38.180207)"><flowRegion\n-       id="flowRegion5384"><rect\n-         id="rect5386"\n-         width="275.00043"\n-         height="149.79698"\n-         x="1681.3033"\n-         y="204.59315" /></flowRegion><flowPara\n-       id="flowPara5390"\n-       style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:\'sans-serif Bold\'">Fold Change</flowPara></flowRoot><flowRoot\n-     xml:space="preserve"\n-     id="FC_min"\n-     style="font-style:normal;font-weight:normal;font-size:35px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n-     transform="translate(-8.622366,131.05768)"\n-     inkscape:label="FC_min"><flowRegion\n-       id="flowRegion5384-2"><rect\n-         id="rect5386-9"\n-         width="275.00043"\n-         height="149.79698"\n-         x="1681.3033"\n-         y="204.59315" /></flowRegion><flowPara\n-       id="Val_FC_min">min: </flowPara></flowRoot><flowRoot\n-     xml:space="preserve"\n-     id="FC_max"\n-     style="font-style:normal;font-weight:normal;font-size:35px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n-     transform="translate(-17.492772,95.648076)"\n-     inkscape:label="FC_max"><flowRegion\n-       id="flowRegion5384-2-2"><rect\n-         id="rect5386-9-9"\n-         width="275.00043"\n-         height="149.79698"\n-         x="1681.3033"\n-         y="204.59315" /></flowRegion><flowPara\n-       id="Val_FC_max">max:</flowPara></flowRoot></svg>\n\\ No newline at end of file\n'
b
diff -r d0e7f14b773f -r c71ac0bb12de marea-1.0.1/local/HMRcore_genes.p
b
Binary file marea-1.0.1/local/HMRcore_genes.p has changed
b
diff -r d0e7f14b773f -r c71ac0bb12de marea-1.0.1/local/HMRcore_rules.p
b
Binary file marea-1.0.1/local/HMRcore_rules.p has changed
b
diff -r d0e7f14b773f -r c71ac0bb12de marea-1.0.1/local/Recon_genes.p
b
Binary file marea-1.0.1/local/Recon_genes.p has changed
b
diff -r d0e7f14b773f -r c71ac0bb12de marea-1.0.1/local/Recon_rules.p
b
Binary file marea-1.0.1/local/Recon_rules.p has changed
b
diff -r d0e7f14b773f -r c71ac0bb12de marea-1.0.1/local/desktop.ini
--- a/marea-1.0.1/local/desktop.ini Tue Oct 01 06:03:12 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,6 +0,0 @@
-[.ShellClassInfo]
-IconResource=C:\WINDOWS\System32\SHELL32.dll,4
-[ViewState]
-Mode=
-Vid=
-FolderType=Generic
b
diff -r d0e7f14b773f -r c71ac0bb12de marea-1.0.1/marea.py
--- a/marea-1.0.1/marea.py Tue Oct 01 06:03:12 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,825 +0,0 @@\n-from __future__ import division\n-import sys\n-import pandas as pd\n-import itertools as it\n-import scipy.stats as st\n-import collections\n-import lxml.etree as ET\n-import shutil\n-import pickle as pk\n-import math\n-import os\n-import argparse\n-from svglib.svglib import svg2rlg\n-from reportlab.graphics import renderPDF\n-\n-########################## argparse ##########################################\n-\n-def process_args(args):\n-    parser = argparse.ArgumentParser(usage = \'%(prog)s [options]\',\n-                                     description = \'process some value\\\'s\'+\n-                                     \' genes to create a comparison\\\'s map.\')\n-    parser.add_argument(\'-rs\', \'--rules_selector\', \n-                        type = str,\n-                        default = \'HMRcore\',\n-                        choices = [\'HMRcore\', \'Recon\', \'Custom\'], \n-                        help = \'chose which type of dataset you want use\')\n-    parser.add_argument(\'-cr\', \'--custom\',\n-                        type = str,\n-                        help=\'your dataset if you want custom rules\')\n-    parser.add_argument(\'-na\', \'--names\', \n-                        type = str,\n-                        nargs = \'+\', \n-                        help = \'input names\')\n-    parser.add_argument(\'-n\', \'--none\',\n-                        type = str,\n-                        default = \'true\',\n-                        choices = [\'true\', \'false\'], \n-                        help = \'compute Nan values\')\n-    parser.add_argument(\'-pv\' ,\'--pValue\', \n-                        type = float, \n-                        default = 0.05, \n-                        help = \'P-Value threshold (default: %(default)s)\')\n-    parser.add_argument(\'-fc\', \'--fChange\', \n-                        type = float, \n-                        default = 1.5, \n-                        help = \'Fold-Change threshold (default: %(default)s)\')\n-    parser.add_argument(\'-td\', \'--tool_dir\',\n-                        type = str,\n-                        required = True,\n-                        help = \'your tool directory\')\n-    parser.add_argument(\'-op\', \'--option\', \n-                        type = str, \n-                        choices = [\'datasets\', \'dataset_class\'],\n-                        help=\'dataset or dataset and class\')\n-    parser.add_argument(\'-ol\', \'--out_log\', \n-                        help = "Output log")    \n-    parser.add_argument(\'-ids\', \'--input_datas\', \n-                        type = str,\n-                        nargs = \'+\', \n-                        help = \'input datasets\')\n-    parser.add_argument(\'-id\', \'--input_data\',\n-                        type = str,\n-                        help = \'input dataset\')\n-    parser.add_argument(\'-ic\', \'--input_class\', \n-                        type = str, \n-                        help = \'sample group specification\')\n-    parser.add_argument(\'-cm\', \'--custom_map\', \n-                        type = str, \n-                        help = \'custom map\')\n-    parser.add_argument(\'-yn\', \'--yes_no\', \n-                        type = str,\n-                        choices = [\'yes\', \'no\'],\n-                        help = \'if make or not custom map\')\n-    parser.add_argument(\'-gs\', \'--generate_svg\',\n-                        type = str,\n-                        default = \'true\',\n-                        choices = [\'true\', \'false\'], \n-                        help = \'generate svg map\')\n-    parser.add_argument(\'-gp\', \'--generate_pdf\',\n-                        type = str,\n-                        default = \'true\',\n-                        choices = [\'true\', \'false\'], \n-                        help = \'generate pdf map\')\n-    parser.add_argument(\'-gr\', \'--generate_ras\',\n-                        type = str,\n-                        default = \'true\',\n-                        choices = [\'true\', \'false\'],\n-                        help = \'generate reaction activity score\')\n-    args = parser.parse_args()\n-    return args\n-\n-########################### warning ######'..b' #############################################\n-\n-def main():\n-    args = process_args(sys.argv)\n-    \n-    create_svg = check_bool(args.generate_svg)\n-    create_pdf = check_bool(args.generate_pdf)\n-    generate_ras = check_bool(args.generate_ras)\n-    \n-    os.makedirs(\'result\')\n-\n-    if generate_ras:\n-        os.makedirs(\'ras\')\n-    \n-    if args.rules_selector == \'HMRcore\':        \n-        recon = pk.load(open(args.tool_dir + \'/local/HMRcore_rules.p\', \'rb\'))\n-    elif args.rules_selector == \'Recon\':\n-        recon = pk.load(open(args.tool_dir + \'/local/Recon_rules.p\', \'rb\'))\n-    elif args.rules_selector == \'Custom\':\n-        ids, rules, gene_in_rule = make_recon(args.custom)\n-        \n-    resolve_none = check_bool(args.none)\n-    \n-    class_pat = {}\n-    \n-    if args.option == \'datasets\':\n-        num = 1\n-        for i, j in zip(args.input_datas, args.names):\n-\n-            name = name_dataset(j, num)\n-            dataset = read_dataset(i, name)\n-\n-            dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)\n-\n-            type_gene = gene_type(dataset.iloc[0, 0], name) \n-            \n-            if args.rules_selector != \'Custom\':\n-                genes = data_gene(dataset, type_gene, name, None)\n-                ids, rules = load_id_rules(recon.get(type_gene))\n-            elif args.rules_selector == \'Custom\':\n-                genes = data_gene(dataset, type_gene, name, gene_in_rule)\n-                \n-            resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n-\n-            if generate_ras:\n-                create_ras(resolve_rules, name)\n-                \n-            \n-            if err != None and err:\n-                warning(\'Warning: gene\\n\' + str(err) + \'\\nnot found in class \'\n-                    + name + \', the expression level for this gene \' +\n-                    \'will be considered NaN\\n\')\n-            if resolve_rules != None:\n-                class_pat[name] = list(map(list, zip(*resolve_rules.values())))\n-            num += 1\n-    elif args.option == \'dataset_class\':\n-        name = \'RNAseq\'\n-        dataset = read_dataset(args.input_data, name)\n-        dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)\n-        type_gene = gene_type(dataset.iloc[0, 0], name)\n-        classes = read_dataset(args.input_class, \'class\')\n-        if not len(classes.columns) == 2:\n-            warning(\'Warning: more than 2 columns in class file. Extra\' +\n-                    \'columns have been disregarded\\n\')\n-        classes = classes.astype(str)\n-        if args.rules_selector != \'Custom\':\n-            genes = data_gene(dataset, type_gene, name, None)\n-            ids, rules = load_id_rules(recon.get(type_gene))\n-        elif args.rules_selector == \'Custom\':\n-            genes = data_gene(dataset, type_gene, name, gene_in_rule)\n-        resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n-        if err != None and err:\n-            warning(\'Warning: gene\\n\'+str(err)+\'\\nnot found in class \'\n-                    + name + \', the expression level for this gene \' +\n-                    \'will be considered NaN\\n\')\n-        if resolve_rules != None:\n-            class_pat = split_class(classes, resolve_rules)\n-            \n-    if args.rules_selector == \'Custom\':\n-        if args.yes_no == \'yes\':\n-            try:\n-                core_map = ET.parse(args.custom_map)\n-            except (ET.XMLSyntaxError, ET.XMLSchemaParseError):\n-                sys.exit(\'Execution aborted: custom map in wrong format\')\n-        elif args.yes_no == \'no\':\n-            core_map = ET.parse(args.tool_dir + \'/local/HMRcoreMap.svg\')\n-    else:       \n-        core_map = ET.parse(args.tool_dir+\'/local/HMRcoreMap.svg\')\n-        \n-    maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf)\n-        \n-    print(\'Execution succeded\')\n-\n-    return None\n-\n-###############################################################################\n-\n-if __name__ == "__main__":\n-    main()\n'
b
diff -r d0e7f14b773f -r c71ac0bb12de marea-1.0.1/marea.xml
--- a/marea-1.0.1/marea.xml Tue Oct 01 06:03:12 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,223 +0,0 @@\n-<tool id="MaREA" name="Metabolic Enrichment Analysis" version="1.0.1">\n-    <description>for Galaxy - 1.0.1</description>\n-    <macros>\n-        <import>marea_macros.xml</import>\n-    </macros>\n-    <requirements>\n-        <requirement type="package" version="0.23.0">pandas</requirement>\n-        <requirement type="package" version="1.1.0">scipy</requirement>\n-        <requirement type="package" version="0.10.1">cobra</requirement>\n-        <requirement type="package" version="4.2.1">lxml</requirement>\n-        <requirement type="package" version="0.8.1">svglib</requirement>\n-        <requirement type="package" version="3.4.0">reportlab</requirement>\n-    </requirements>\n-    <command detect_errors="exit_code">\n-        <![CDATA[\n-      \tpython $__tool_directory__/marea.py\n-        --rules_selector $cond_rule.rules_selector\n-        #if $cond_rule.rules_selector == \'Custom\':\n-            --custom ${cond_rule.Custom_rules}\n-            --yes_no ${cond_rule.cond_map.yes_no}\n-            #if $cond_rule.cond_map.yes_no == \'yes\':\n-                --custom_map $cond_rule.cond_map.Custom_map\n-            #end if\n-        #end if\n-\t#if $advanced.choice == \'true\':\n-      \t    --none ${advanced.None}\n-      \t    --pValue ${advanced.pValue}\n-      \t    --fChange ${advanced.fChange}\n-\t    --generate_svg ${advanced.generateSvg}\n-\t    --generate_pdf ${advanced.generatePdf}\n-\t    --generate_ras ${advanced.generateRas}\n-\t#else \n-\t    --none true\n-\t    --pValue 0.05\n-\t    --fChange 1.5\n-\t    --generate_svg false\n-\t    --generate_pdf true\n-\t    --generate_ras false\n-\t#end if\n-      \t--tool_dir $__tool_directory__\n-      \t--option $cond.type_selector\n-        --out_log $log\t\t\n-\t\n-        #if $cond.type_selector == \'datasets\':\n-            --input_datas\n-            #for $data in $cond.input_Datasets:\n-                ${data.input}\n-            #end for\n-            --names\n-            #for $data in $cond.input_Datasets:\n-                ${data.input_name}\n-            #end for\n-        #elif $cond.type_selector == \'dataset_class\':\n-            --input_data ${input_data}\n-            --input_class ${input_class}\n-        #end if\n-        ]]>\n-    </command>\n-\n-    <inputs>\n-        <conditional name="cond_rule">\n-            <expand macro="options"/>\n-            <when value="HMRcore">\n-            </when>\n-            <when value="Recon">\n-            </when>\n-            <when value="Custom">\n-                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />\n-                <conditional name="cond_map">\n-                    <param name="yes_no" type="select" label="Custom map? (optional)">\n-                        <option value="no" selected="true">no</option>\n-                        <option value="yes">yes</option>\n-                    </param>\n-                    <when value="yes">\n-                        <param name="Custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/>\n-                    </when>\n-                    <when value="no">\n-                    </when>\n-                </conditional>\n-            </when>\n-        </conditional>\n-        <conditional name="cond">\n-            <param name="type_selector" argument="--option" type="select" label="Input format:">\n-                <option value="datasets" selected="true">RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N</option>\n-                <option value="dataset_class">RNAseq of all samples + sample group specification</option>\n-            </param>\n-            <when value="datasets">\n-                <repeat name="input_Datasets" title="RNAseq" min="2">\n-                    <param name="input" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" />\t\n-                    <param name="input_name" argument="--names" type="text" label="Dataset\'s name:" value="Dataset" help="Default: Dataset" />\n-                </repeat>\n-         '..b'ity Score for each table" help="Generate Reaction Activity Score for each table" />\t\t\n-\t\t</when>\n-    \t</conditional>\n-    </inputs>\n-\n-    <outputs>\n-        <data format="txt" name="log" label="${tool.name} - Log" />\n-        <collection name="results" type="list" label="${tool.name} - Results">\n-            <discover_datasets pattern="__name_and_ext__" directory="result"/>\n-        </collection>\n-\t<collection name="ras" type="list" label="${tool.name} - RAS" format_source="tabular">\n-\t    <filter>advanced[\'choice\'] and advanced[\'generateRas\']</filter>\n-    \t    <discover_datasets pattern="__name_and_ext__" directory="ras" format="tabular"/>\n-\t</collection>\n-    </outputs>\n-    <tests>\n-        <test>\n-            <param name="pValue" value="0.56"/>\n-            <output name="log" file="log.txt"/>\n-        </test>\n-    </tests>\n-    <help>\n-<![CDATA[\n-\n-What it does\n--------------\n-\n-This tool analyzes RNA-seq dataset(s) as described in Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724.\n-\n-Accepted files are: \n-    - option 1) two or more RNA-seq datasets, each referring to samples in a given condition/class. The user can specify a label for each class (as e.g. "*classA*" and "*classB*");\n-    - option 2) one RNA dataset and one class-file specifying the class/condition each sample belongs to.\n-\n-Optional files:\n-    - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats:\n-\n-\t* (Cobra Toolbox and CobraPy compliant) xml of metabolic model;\n-\t* .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2).\n-    - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example.\n-\n-The tool generates:\n-    1) a tab-separated file: reporting fold-change and p-values of reaction activity scores (RASs) between a pair of conditions/classes;\n-    2) a metabolic map file (downlodable as .svg): visualizing up- and down-regulated reactions between a pair of conditions/classes;\n-    3) a log file (.txt).\n-\n-RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID.\n-\n-Class-file format: each row of the class-file reports the sample ID (column1) and the label of the class/condition the sample belongs to (column 2).\n-\n-To calculate P-Values and Fold-Changes and to generate maps, comparisons are performed for each possible pair of classes.\n-\n-Output files will be named as classA_vs_classB. Reactions will conventionally be reported as up-regulated (down-regulated) if they are significantly more (less) active in class having label "classA".\n-\n-\n-Example input\n--------------\n-\n-**"Custom Rules"** option:\n-\n-Custom Rules Dastaset:\n-\n-@CUSTOM_RULES_EXEMPLE@\n-\n-**"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N"** option:\n-\n-RNA-seq Dataset 1:\t\t\t\t\t\t\n-\n-@DATASET_EXEMPLE1@\n-\n-RNA-seq Dataset 2:\n-\n-@DATASET_EXEMPLE2@\n-\n-**"RNAseq of all samples + sample group specification"** option:\n-\n-RNA-seq Dataset:\n-\n-@DATASET_EXEMPLE1@\n-\n-Class-file:\n-\n-+------------+------------+   \n-| Patient_ID |    class   |   \n-+============+============+   \n-| TCGAAA3529 |     MSI    |   \n-+------------+------------+    \n-| TCGAA62671 |     MSS    |    \n-+------------+------------+    \n-| TCGAA62672 |     MSI    |   \n-+------------+------------+\n-\n-|\n-\n-.. class:: infomark\n-\n-**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.\n-\n-.. class:: infomark\n-\n-**TIP**: If your dataset is not split into classes, use `MaREA cluster analysis`_.\n-\n-@REFERENCE@\n-\n-.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724\n-.. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj\n-.. _MaREA cluster analysis: http://link del tool di cluster.org\n-\n-]]>\n-    </help>\n-    <expand macro="citations" />\n-</tool>\n-\t\n'
b
diff -r d0e7f14b773f -r c71ac0bb12de marea-1.0.1/marea_cluster.py
--- a/marea-1.0.1/marea_cluster.py Tue Oct 01 06:03:12 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,417 +0,0 @@\n-# -*- coding: utf-8 -*-\n-"""\n-Created on Mon Jun 3 19:51:00 2019\n-\n-@author: Narger\n-"""\n-\n-import sys\n-import argparse\n-import os\n-from sklearn.datasets import make_blobs\n-from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering\n-from sklearn.metrics import silhouette_samples, silhouette_score, davies_bouldin_score, cluster\n-import matplotlib.pyplot as plt\n-import scipy.cluster.hierarchy as shc   \n-import matplotlib.cm as cm\n-import numpy as np\n-import pandas as pd\n-\n-################################# process args ###############################\n-\n-def process_args(args):\n-    parser = argparse.ArgumentParser(usage = \'%(prog)s [options]\',\n-                                     description = \'process some value\\\'s\' +\n-                                     \' genes to create class.\')\n-\n-    parser.add_argument(\'-ol\', \'--out_log\', \n-                        help = "Output log")\n-    \n-    parser.add_argument(\'-in\', \'--input\',\n-                        type = str,\n-                        help = \'input dataset\')\n-    \n-    parser.add_argument(\'-cy\', \'--cluster_type\',\n-                        type = str,\n-                        choices = [\'kmeans\', \'meanshift\', \'dbscan\', \'hierarchy\'],\n-                        default = \'kmeans\',\n-                        help = \'choose clustering algorythm\')\n-    \n-    parser.add_argument(\'-k1\', \'--k_min\', \n-                        type = int,\n-                        default = 2,\n-                        help = \'choose minimun cluster number to be generated\')\n-    \n-    parser.add_argument(\'-k2\', \'--k_max\', \n-                        type = int,\n-                        default = 7,\n-                        help = \'choose maximum cluster number to be generated\')\n-    \n-    parser.add_argument(\'-el\', \'--elbow\', \n-                        type = str,\n-                        default = \'false\',\n-                        choices = [\'true\', \'false\'],\n-                        help = \'choose if you want to generate an elbow plot for kmeans\')\n-    \n-    parser.add_argument(\'-si\', \'--silhouette\', \n-                        type = str,\n-                        default = \'false\',\n-                        choices = [\'true\', \'false\'],\n-                        help = \'choose if you want silhouette plots\')\n-    \n-    parser.add_argument(\'-db\', \'--davies\', \n-                        type = str,\n-                        default = \'false\',\n-                        choices = [\'true\', \'false\'],\n-                        help = \'choose if you want davies bouldin scores\')\n-    \n-    parser.add_argument(\'-td\', \'--tool_dir\',\n-                        type = str,\n-                        required = True,\n-                        help = \'your tool directory\')\n-                        \n-    parser.add_argument(\'-ms\', \'--min_samples\',\n-                        type = int,\n-                        help = \'min samples for dbscan (optional)\')\n-                        \n-    parser.add_argument(\'-ep\', \'--eps\',\n-                        type = int,\n-                        help = \'eps for dbscan (optional)\')\n-    \n-    \n-    args = parser.parse_args()\n-    return args\n-\n-########################### warning ###########################################\n-\n-def warning(s):\n-    args = process_args(sys.argv)\n-    with open(args.out_log, \'a\') as log:\n-        log.write(s + "\\n\\n")\n-    print(s)\n-\n-########################## read dataset ######################################\n-    \n-def read_dataset(dataset):\n-    try:\n-        dataset = pd.read_csv(dataset, sep = \'\\t\', header = 0)\n-    except pd.errors.EmptyDataError:\n-        sys.exit(\'Execution aborted: wrong format of dataset\\n\')\n-    if len(dataset.columns) < 2:\n-        sys.exit(\'Execution aborted: wrong format of dataset\\n\')\n-    return dataset\n-\n-############################ rewrite_input ###################################\n-    \n-def rewrite_input(dataset):\n-    #Riscrivo il dataset come dizionario di liste, \n-    #non come dizionario di dizionari\n-    \n-    for'..b'   warning("For n_clusters =" + str(n_clusters_) + \n-              "The average silhouette_score is :" + str(silhouette_avg))\n-    \n-    ##TODO: PLOT SU DBSCAN (no centers) e HIERARCHICAL\n-\n-    # Black removed and is used for noise instead.\n-    unique_labels = set(labels)\n-    colors = [plt.cm.Spectral(each)\n-          for each in np.linspace(0, 1, len(unique_labels))]\n-    for k, col in zip(unique_labels, colors):\n-        if k == -1:\n-            # Black used for noise.\n-            col = [0, 0, 0, 1]\n-\n-        class_member_mask = (labels == k)\n-    \n-        xy = dataset[class_member_mask & core_samples_mask]\n-        plt.plot(xy[:, 0], xy[:, 1], \'o\', markerfacecolor=tuple(col),\n-                 markeredgecolor=\'k\', markersize=14)\n-    \n-        xy = dataset[class_member_mask & ~core_samples_mask]\n-        plt.plot(xy[:, 0], xy[:, 1], \'o\', markerfacecolor=tuple(col),\n-                 markeredgecolor=\'k\', markersize=6)\n-\n-    plt.title(\'Estimated number of clusters: %d\' % n_clusters_)\n-    s = \'clustering/dbscan_output/dbscan_plot.png\'\n-    fig = plt.gcf()\n-    fig.set_size_inches(18.5, 10.5, forward = True)\n-    fig.savefig(s, dpi=100)\n-    \n-    \n-    write_to_csv(dataset, labels, \'clustering/dbscan_output/dbscan_results.tsv\')\n-    \n-########################## hierachical #######################################\n-    \n-def hierachical_agglomerative(dataset, k_min, k_max):\n-\n-    if not os.path.exists(\'clustering/agglomerative_output\'):\n-        os.makedirs(\'clustering/agglomerative_output\')\n-    \n-    plt.figure(figsize=(10, 7))  \n-    plt.title("Customer Dendograms")  \n-    shc.dendrogram(shc.linkage(dataset, method=\'ward\'))  \n-    fig = plt.gcf()\n-    fig.savefig(\'clustering/agglomerative_output/dendogram.png\', dpi=200)\n-    \n-    range_n_clusters = [i for i in range(k_min, k_max+1)]\n-\n-    for n_clusters in range_n_clusters:\n-        \n-        cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity=\'euclidean\', linkage=\'ward\')  \n-        cluster.fit_predict(dataset)  \n-        cluster_labels = cluster.labels_\n-        \n-        silhouette_avg = silhouette_score(dataset, cluster_labels)\n-        warning("For n_clusters =", n_clusters,\n-              "The average silhouette_score is :", silhouette_avg)\n-        \n-        plt.clf()\n-        plt.figure(figsize=(10, 7))  \n-        plt.title("Agglomerative Hierarchical Clustering\\nwith " + str(n_clusters) + " clusters and " + str(silhouette_avg) + " silhouette score")\n-        plt.scatter(dataset[:,0], dataset[:,1], c = cluster_labels, cmap=\'rainbow\') \n-        s = \'clustering/agglomerative_output/hierachical_\' + str(n_clusters) + \'_clusters.png\'\n-        fig = plt.gcf()\n-        fig.set_size_inches(10, 7, forward = True)\n-        fig.savefig(s, dpi=200)\n-        \n-        write_to_csv(dataset, cluster_labels, \'clustering/agglomerative_output/agglomerative_hierarchical_with_\' + str(n_clusters) + \'_clusters.tsv\')\n-        \n-       \n-\n-    \n-############################# main ###########################################\n-\n-\n-def main():\n-    if not os.path.exists(\'clustering\'):\n-        os.makedirs(\'clustering\')\n-\n-    args = process_args(sys.argv)\n-    \n-    #Data read\n-    \n-    X = read_dataset(args.input)\n-    X = pd.DataFrame.to_dict(X, orient=\'list\')\n-    X = rewrite_input(X)\n-    X = pd.DataFrame.from_dict(X, orient = \'index\')\n-    \n-    for i in X.columns:\n-        tmp = X[i][0]\n-        if tmp == None:\n-            X = X.drop(columns=[i])\n-                \n-    X = pd.DataFrame.to_numpy(X)\n-    \n-    \n-    if args.cluster_type == \'kmeans\':\n-        kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.davies)\n-    \n-    if args.cluster_type == \'dbscan\':\n-        dbscan(X, args.eps, args.min_samples)\n-        \n-    if args.cluster_type == \'hierarchy\':\n-        hierachical_agglomerative(X, args.k_min, args.k_max)\n-        \n-##############################################################################\n-\n-if __name__ == "__main__":\n-    main()\n'
b
diff -r d0e7f14b773f -r c71ac0bb12de marea-1.0.1/marea_cluster.xml
--- a/marea-1.0.1/marea_cluster.xml Tue Oct 01 06:03:12 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,92 +0,0 @@
-<tool id="MaREA_cluester" name="MaREA cluster analysis" version="1.0.1">
-    <description>of Reaction Activity Scores - 1.0.1</description>
-    <macros>
-        <import>marea_macros.xml</import>
-    </macros>
-    <requirements>
-        <requirement type="package" version="0.23.0">pandas</requirement>
-        <requirement type="package" version="1.1.0">scipy</requirement>
-        <requirement type="package" version="0.10.1">cobra</requirement>
-        <requirement type="package" version="0.21.3">scikit-learn</requirement>
-        <requirement type="package" version="2.2.2">matplotlib</requirement>
- <requirement type="package" version="1.17">numpy</requirement>
-    </requirements>
-    <command detect_errors="exit_code">
-        <![CDATA[
-       python $__tool_directory__/marea_cluster.py
-        --input $input
-       --tool_dir $__tool_directory__
-        --out_log $log
-        #if $data.clust_type == 'kmeans':
-         --k_min ${data.k_min}
-         --k_max ${data.k_max}
-         --elbow ${data.elbow}
-         --silhouette ${data.silhouette}
-        #end if
-        #if $data.clust_type == 'dbscan':
-         #if $data.dbscan_advanced.advanced == 'true'
-         --eps ${data.dbscan_advanced.eps}
-         --min_samples ${data.dbscan_advanced.min_samples}
-         #end if
-        #end if
-        #if $data.clust_type == 'hierarchy':
-         --k_min ${data.k_min}
-         --k_max ${data.k_max}
-       #end if
-        ]]>
-    </command>
-    <inputs>
-        <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" />
-        
-        <conditional name="data">
- <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">
-                 <option value="kmeans" selected="true">KMeans</option>
-                 <option value="dbscan">DBSCAN</option>
-                 <option value="hierarchy">Agglomerative Hierarchical</option>
-         </param>
-         <when value="kmeans">
-         <param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" />
-         <param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" />
-         <param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/>
-         <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
-         </when>
-         <when value="dbscan">
-         <conditional name="dbscan_advanced">
-         <param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)">
-         <option value="true">Yes</option>
-         <option value="false">No</option>
-         </param>
-         <when value="false"></when>
-         <when value="true">
-         <param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" />
-         <param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/>
-        
-         </when>
-         </conditional>   
-         </when>
-         <when value="hierarchy">
-         <param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" />
-         <param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" />
-         </when>
- </conditional>
-    </inputs>
-
-    <outputs>
-        <data format="txt" name="log" label="${tool.name} - Log" />
-        <collection name="results" type="list" label="${tool.name} - Results">
-            <discover_datasets pattern="__name_and_ext__" directory="clustering"/>
-        </collection>
-    </outputs>
-    <help>
-<![CDATA[
-
-What it does
--------------
-
-
-]]>
-    </help>
-    <expand macro="citations" />
-</tool>
-
-
b
diff -r d0e7f14b773f -r c71ac0bb12de marea-1.0.1/marea_macros.xml
--- a/marea-1.0.1/marea_macros.xml Tue Oct 01 06:03:12 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,92 +0,0 @@
-<macros>
-
-    <xml name="options">
-        <param name="rules_selector" argument="--rules_selector" type="select" label="Gene-Protein-Reaction rules:">
-            <option value="HMRcore" selected="true">HMRcore rules</option>
-            <option value="Recon">Recon 2.2 rules</option>
-            <option value="Custom">Custom rules</option>
-        </param>
-    </xml>
-
-   <token name="@CUSTOM_RULES_EXEMPLE@">
-
-+--------------------+-------------------------------+
-|         id         |     rule (with entrez-id)     |
-+====================+===============================+
-|        SHMT1       |        155060 or 10357        |
-+--------------------+-------------------------------+
-|        NIT2        |      155060 or 100134869      |
-+--------------------+-------------------------------+
-| GOT1_GOT2_GOT1L1_2 | 155060 and 100134869 or 10357 |
-+--------------------+-------------------------------+
-
-|
-
-    </token>
-
-    <token name="@DATASET_EXEMPLE1@">
-
-+------------+------------+------------+------------+   
-|  Hugo_ID   | TCGAA62670 | TCGAA62671 | TCGAA62672 |   
-+============+============+============+============+   
-| HGNC:24086 |  0.523167  |  0.371355  |  0.925661  |   
-+------------+------------+------------+------------+    
-| HGNC:24086 |  0.568765  |  0.765567  |  0.456789  |    
-+------------+------------+------------+------------+    
-| HGNC:9876  |  0.876545  |  0.768933  |  0.987654  |   
-+------------+------------+------------+------------+    
-| HGNC:9     |  0.456788  |  0.876543  |  0.876542  |   
-+------------+------------+------------+------------+    
-| HGNC:23    |  0.876543  |  0.786543  |  0.897654  |   
-+------------+------------+------------+------------+ 
-   
-|
-
-    </token>
-
-    <token name="@DATASET_EXEMPLE2@">
-
-+-------------+------------+------------+------------+
-| Hugo_Symbol | TCGAA62670 | TCGAA62671 | TCGAA62672 |
-+=============+============+============+============+
-|    A1BG     |  0.523167  |  0.371355  |  0.925661  |
-+-------------+------------+------------+------------+
-|    A1CF     |  0.568765  |  0.765567  |  0.456789  |
-+-------------+------------+------------+------------+
-|     A2M     |  0.876545  |  0.768933  |  0.987654  |
-+-------------+------------+------------+------------+
-|    A4GALT   |  0.456788  |  0.876543  |  0.876542  |
-+-------------+------------+------------+------------+
-|   M664Y65   |  0.876543  |  0.786543  |  0.897654  |
-+-------------+------------+------------+------------+
-
-|
-
-    </token>
-
-    <token name="@REFERENCE@">
-
-This tool is developed by the `BIMIB`_ at the `Department of Informatics, Systems and Communications`_ of `University of Milan - Bicocca`_.
-
-.. _BIMIB: http://sito di bio.org
-.. _Department of Informatics, Systems and Communications: http://www.disco.unimib.it/go/Home/English
-.. _University of Milan - Bicocca: https://www.unimib.it/
-
-    </token>
-
-    <xml name="citations">
-        <citations> <!--esempio di citazione-->
-            <citation type="bibtex">
-@online{lh32017,
-  author = {Alex Graudenzi, Davide Maspero, Cluadio Isella, Marzia Di Filippo, Giancarlo Mauri, Enzo Medico, Marco Antoniotti, Chiara Damiani},
-  year = {2018},
-  title = {MaREA: Metabolic feature extraction, enrichment and visualization of RNAseq},
-  publisher = {bioRxiv},
-  journal = {bioRxiv},
-  url = {https://www.biorxiv.org/content/early/2018/01/16/248724},
-}
-            </citation>
-        </citations>
-    </xml>
-
-</macros>