Repository 'marea'
hg clone https://toolshed.g2.bx.psu.edu/repos/bimib/marea

Changeset 0:23ac9cf12788 (2018-11-06)
Next changeset 1:9e63d5f02d62 (2018-11-07)
Commit message:
Uploaded
added:
Marea/local/HMRcoreMap.svg
Marea/local/HMRcore_genes.p
Marea/local/HMRcore_rules.p
Marea/local/Recon_genes.p
Marea/local/Recon_rules.p
Marea/marea.py
Marea/marea.xml
Marea/marea_cluster.py
Marea/marea_cluster.xml
b
diff -r 000000000000 -r 23ac9cf12788 Marea/local/HMRcoreMap.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Marea/local/HMRcoreMap.svg Tue Nov 06 03:16:21 2018 -0500
b
b'@@ -0,0 +1,7702 @@\n+<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n+<!-- Generator: Adobe Illustrator 22.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->\n+\n+<svg\n+   xmlns:dc="http://purl.org/dc/elements/1.1/"\n+   xmlns:cc="http://creativecommons.org/ns#"\n+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"\n+   xmlns:svg="http://www.w3.org/2000/svg"\n+   xmlns="http://www.w3.org/2000/svg"\n+   xmlns:xlink="http://www.w3.org/1999/xlink"\n+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"\n+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"\n+   version="1.1"\n+   x="0px"\n+   y="0px"\n+   viewBox="0 0 1904.8016 1511.2752"\n+   xml:space="preserve"\n+   id="svg2"\n+   inkscape:version="0.91 r13725"\n+   sodipodi:docname="HMRcoreMap.svg"\n+   width="1904.8015"\n+   height="1511.2753"><metadata\n+     id="metadata2021"><rdf:RDF><cc:Work\n+         rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type\n+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title /></cc:Work></rdf:RDF></metadata><defs\n+     id="defs2019"><sodipodi:namedview\n+       showguides="true"\n+       showgrid="true"\n+       pagecolor="#ffffff"\n+       inkscape:zoom="1.4702451"\n+       inkscape:window-y="-8"\n+       inkscape:window-x="-8"\n+       inkscape:window-width="1920"\n+       inkscape:window-maximized="1"\n+       inkscape:window-height="1017"\n+       inkscape:snap-page="false"\n+       inkscape:snap-grids="true"\n+       inkscape:pageshadow="2"\n+       inkscape:pageopacity="0.0"\n+       inkscape:document-units="px"\n+       inkscape:cy="338.10986"\n+       inkscape:cx="1343.7768"\n+       inkscape:current-layer="layer1"\n+       id="base"\n+       fit-margin-top="0"\n+       fit-margin-right="0"\n+       fit-margin-left="0"\n+       fit-margin-bottom="0"\n+       borderopacity="1.0"\n+       bordercolor="#666666"><inkscape:grid\n+         type="xygrid"\n+         originy="72.926308"\n+         originx="-97.409688"\n+         id="grid3434"\n+         dotted="true" /></sodipodi:namedview></defs><sodipodi:namedview\n+     pagecolor="#ffffff"\n+     bordercolor="#666666"\n+     borderopacity="1"\n+     objecttolerance="10"\n+     gridtolerance="10"\n+     guidetolerance="10"\n+     inkscape:pageopacity="0"\n+     inkscape:pageshadow="2"\n+     inkscape:window-width="1920"\n+     inkscape:window-height="1017"\n+     id="namedview2017"\n+     showgrid="false"\n+     inkscape:zoom="0.44727204"\n+     inkscape:cx="497.63252"\n+     inkscape:cy="796.80241"\n+     inkscape:window-x="-8"\n+     inkscape:window-y="-8"\n+     inkscape:window-maximized="1"\n+     inkscape:current-layer="svg2"\n+     fit-margin-top="0"\n+     fit-margin-left="0"\n+     fit-margin-right="0"\n+     fit-margin-bottom="0" /><style\n+     type="text/css"\n+     id="style4">\n+\t.st0{display:none;}\n+\t.st1{display:inline;}\n+\t.st2{fill:none;stroke:#5AB6E7;stroke-width:7;stroke-linejoin:round;}\n+\t.st3{fill:none;stroke:#5AB6E7;stroke-width:7;stroke-linejoin:round;stroke-dasharray:11.9422,11.9422;}\n+\t.st4{fill:none;stroke:#5AB6E7;stroke-width:7;stroke-linejoin:round;stroke-dasharray:12.1815,12.1815;}\n+\t.st5{font-family:\'Helvetica\';}\n+\t.st6{font-size:30px;}\n+\t.st7{font-size:39.262px;}\n+\t.st8{fill:none;stroke:#0000FF;stroke-width:30;}\n+\t.st9{fill:none;stroke:#E41A1C;stroke-width:30;}\n+\t.st10{fill:none;stroke:#BEBEBE;stroke-width:30;}\n+\t.st11{stroke:#000000;stroke-width:30;}\n+\t.st12{fill:none;stroke:#BEBEBE;stroke-width:30;stroke-dasharray:30,30;stroke-dashoffset:6;}\n+\t.st13{fill:none;stroke:#000000;stroke-width:1.8444;}\n+\t.st14{fill:none;stroke:#000000;stroke-width:2.1821;}\n+\t.st15{font-family:\'Calibri-Bold\';}\n+\t.st16{font-size:16px;}\n+\t.st17{font-family:\'Calibri\';}\n+\t.st18{font-size:10px;}\n+\t.st19{fill:none;stroke:#000000;stroke-width:1.8856;}\n+\t.st20{fill:none;stroke:#000000;stroke-width:1.9459;}\n+\t.st21{fill:none;stroke:#000000;stroke-width:2.2892;}\n+\t.st22{fill:none;stroke:#000000;stroke-width:2.5;}\n+\t.st23{fill:none;stroke:#000000;stroke-width:1.9412;}\n+\t.st24{fill:none;str'..b'31.89,1231.8186 2.2,-7.3 2.2,7.3 -2.2,-1.8 -2.2,1.8 z"\n+     class="st14"\n+     inkscape:label="Glutamine_DM_COOP b"\n+     inkscape:connector-curvature="0"\n+     id="B_Glutamine_DM_COOP" /><path\n+     style="fill:none;stroke:#000000;stroke-width:2.18210006"\n+     d="m 1233.89,1279.4186 0,-48"\n+     class="st14"\n+     inkscape:label="Glutamine_DM_COOP"\n+     inkscape:connector-curvature="0"\n+     id="R_Glutamine_DM_COOP" /><flowRoot\n+     xml:space="preserve"\n+     id="flowRoot5366"\n+     style="font-style:normal;font-weight:normal;font-size:35px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n+     transform="translate(-20.6,18.418554)"><flowRegion\n+       id="flowRegion5368"><rect\n+         id="rect5370"\n+         width="1165.1471"\n+         height="77.465683"\n+         x="306.70087"\n+         y="-39.523308" /></flowRegion><flowPara\n+       id="flowPara5372" /></flowRoot><flowRoot\n+     xml:space="preserve"\n+     id="TitoloConfronto"\n+     style="font-style:normal;font-weight:normal;font-size:35px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n+     inkscape:label="TitoloConfronto"\n+     transform="translate(-18.364224,56.426743)"><flowRegion\n+       id="flowRegion5376"><rect\n+         id="rect5378"\n+         width="1869.6877"\n+         height="68.569115"\n+         x="301.95807"\n+         y="-69.56102" /></flowRegion><flowPara\n+       id="TitleText"\n+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:45px;font-family:sans-serif;-inkscape-font-specification:sans-serif">TITOLO: TITOLOTITOLO </flowPara></flowRoot><flowRoot\n+     xml:space="preserve"\n+     id="flowRoot5382"\n+     style="font-style:normal;font-weight:normal;font-size:35px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n+     transform="translate(-16.64767,38.180207)"><flowRegion\n+       id="flowRegion5384"><rect\n+         id="rect5386"\n+         width="275.00043"\n+         height="149.79698"\n+         x="1681.3033"\n+         y="204.59315" /></flowRegion><flowPara\n+       id="flowPara5390"\n+       style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:\'sans-serif Bold\'">Fold Change</flowPara></flowRoot><flowRoot\n+     xml:space="preserve"\n+     id="FC_min"\n+     style="font-style:normal;font-weight:normal;font-size:35px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n+     transform="translate(-8.622366,131.05768)"\n+     inkscape:label="FC_min"><flowRegion\n+       id="flowRegion5384-2"><rect\n+         id="rect5386-9"\n+         width="275.00043"\n+         height="149.79698"\n+         x="1681.3033"\n+         y="204.59315" /></flowRegion><flowPara\n+       id="Val_FC_min">min: </flowPara></flowRoot><flowRoot\n+     xml:space="preserve"\n+     id="FC_max"\n+     style="font-style:normal;font-weight:normal;font-size:35px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n+     transform="translate(-17.492772,95.648076)"\n+     inkscape:label="FC_max"><flowRegion\n+       id="flowRegion5384-2-2"><rect\n+         id="rect5386-9-9"\n+         width="275.00043"\n+         height="149.79698"\n+         x="1681.3033"\n+         y="204.59315" /></flowRegion><flowPara\n+       id="Val_FC_max">max:</flowPara></flowRoot></svg>\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 23ac9cf12788 Marea/local/HMRcore_genes.p
b
Binary file Marea/local/HMRcore_genes.p has changed
b
diff -r 000000000000 -r 23ac9cf12788 Marea/local/HMRcore_rules.p
b
Binary file Marea/local/HMRcore_rules.p has changed
b
diff -r 000000000000 -r 23ac9cf12788 Marea/local/Recon_genes.p
b
Binary file Marea/local/Recon_genes.p has changed
b
diff -r 000000000000 -r 23ac9cf12788 Marea/local/Recon_rules.p
b
Binary file Marea/local/Recon_rules.p has changed
b
diff -r 000000000000 -r 23ac9cf12788 Marea/marea.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Marea/marea.py Tue Nov 06 03:16:21 2018 -0500
[
b'@@ -0,0 +1,760 @@\n+\n+from __future__ import division\n+import sys\n+import pandas as pd\n+import itertools as it\n+import scipy.stats as st\n+import collections\n+import lxml.etree as ET\n+import pickle as pk\n+import math\n+import os\n+import argparse\n+from svglib.svglib import svg2rlg\n+from reportlab.graphics import renderPDF\n+\n+########################## argparse ###########################################\n+\n+def process_args(args):\n+    parser = argparse.ArgumentParser(usage = \'%(prog)s [options]\',\n+                                     description = \'process some value\\\'s\'+\n+                                     \' genes to create a comparison\\\'s map.\')\n+    parser.add_argument(\'-rs\', \'--rules_selector\', \n+                        type = str,\n+                        default = \'HMRcore\',\n+                        choices = [\'HMRcore\', \'Recon\', \'Custom\'], \n+                        help = \'chose which type of dataset you want use\')\n+    parser.add_argument(\'-cr\', \'--custom\',\n+                        type = str,\n+                        help=\'your dataset if you want custom rules\')\n+    parser.add_argument(\'-na\', \'--names\', \n+                        type = str,\n+                        nargs = \'+\', \n+                        help = \'input names\')\n+    parser.add_argument(\'-n\', \'--none\',\n+                        type = str,\n+                        default = \'true\',\n+                        choices = [\'true\', \'false\'], \n+                        help = \'compute Nan values\')\n+    parser.add_argument(\'-pv\' ,\'--pValue\', \n+                        type = float, \n+                        default = 0.05, \n+                        help = \'P-Value threshold (default: %(default)s)\')\n+    parser.add_argument(\'-fc\', \'--fChange\', \n+                        type = float, \n+                        default = 1.5, \n+                        help = \'Fold-Change threshold (default: %(default)s)\')\n+    parser.add_argument(\'-td\', \'--tool_dir\',\n+                        type = str,\n+                        required = True,\n+                        help = \'your tool directory\')\n+    parser.add_argument(\'-op\', \'--option\', \n+                        type = str, \n+                        choices = [\'datasets\', \'dataset_class\'],\n+                        help=\'dataset or dataset and class\')\n+    parser.add_argument(\'-ol\', \'--out_log\', \n+                        help = "Output log")    \n+    parser.add_argument(\'-ids\', \'--input_datas\', \n+                        type = str,\n+                        nargs = \'+\', \n+                        help = \'input datasets\')\n+    parser.add_argument(\'-id\', \'--input_data\',\n+                        type = str,\n+                        help = \'input dataset\')\n+    parser.add_argument(\'-ic\', \'--input_class\', \n+                        type = str, \n+                        help = \'sample group specification\')\n+    parser.add_argument(\'-cm\', \'--custom_map\', \n+                        type = str, \n+                        help = \'custom map\')\n+    parser.add_argument(\'-yn\', \'--yes_no\', \n+                        type = str,\n+                        choices = [\'yes\', \'no\'],\n+                        help = \'if make or not custom map\')\n+    args = parser.parse_args()\n+    return args\n+\n+########################### warning ###########################################\n+\n+def warning(s):\n+    args = process_args(sys.argv)\n+    with open(args.out_log, \'a\') as log:\n+            log.write(s)\n+            \n+############################ dataset input ####################################\n+\n+def read_dataset(data, name):\n+    try:\n+        dataset = pd.read_csv(data, sep = \'\\t\', header = 0)\n+    except pd.errors.EmptyDataError:\n+        sys.exit(\'Execution aborted: wrong format of \' + name + \'\\n\')\n+    if len(dataset.columns) < 2:\n+        sys.exit(\'Execution aborted: wrong format of \' + name + \'\\n\')\n+    return dataset\n+\n+############################ dataset name #####################################\n+\n+def name_dataset(name_data, count):\n+    if str(name_data) '..b'method=\'xml\'))\n+            file_pdf = \'map_pdf/\' + i + \'_vs_\' + j + \'.pdf\'\n+            renderPDF.drawToFile(svg2rlg(file_svg), file_pdf)\n+    return None\n+\n+############################ MAIN #############################################\n+\n+def main():\n+    args = process_args(sys.argv)\n+    os.makedirs(\'table_out\')\n+    if args.rules_selector == \'HMRcore\':\n+        os.makedirs(\'map_svg\')\n+        os.makedirs(\'map_pdf\')\n+        recon = pk.load(open(args.tool_dir + \'/local/HMRcore_rules.p\', \'rb\'))\n+    elif args.rules_selector == \'Recon\':\n+        recon = pk.load(open(args.tool_dir + \'/local/Recon_rules.p\', \'rb\'))\n+    elif args.rules_selector == \'Custom\':\n+        ids, rules, gene_in_rule = make_recon(args.custom)\n+    resolve_none = check_bool(args.none)\n+    class_pat = {}\n+    if args.option == \'datasets\':\n+        num = 1\n+        #if len(args.names) != len(set(args.names)):\n+        #    sys.exit(\'Execution aborted: datasets name duplicated\')\n+        for i, j in zip(args.input_datas, args.names):\n+            name = name_dataset(j, num)\n+            dataset = read_dataset(i, name)\n+            dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)\n+            type_gene = gene_type(dataset.iloc[0, 0], name)\n+            if args.rules_selector != \'Custom\':\n+                genes = data_gene(dataset, type_gene, name, None)\n+                ids, rules = load_id_rules(recon.get(type_gene))\n+            elif args.rules_selector == \'Custom\':\n+                genes = data_gene(dataset, type_gene, name, gene_in_rule)\n+            resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n+            if err != None and err:\n+                warning(\'Warning: gene\\n\' + str(err) + \'\\nnot found in class \'\n+                    + name + \', the expression level for this gene \' +\n+                    \'will be considered NaN\\n\')\n+            if resolve_rules != None:\n+                class_pat[name] = list(map(list, zip(*resolve_rules.values())))\n+            num += 1\n+    elif args.option == \'dataset_class\':\n+        name = \'RNAseq\'\n+        dataset = read_dataset(args.input_data, name)\n+        dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)\n+        type_gene = gene_type(dataset.iloc[0, 0], name)\n+        classes = read_dataset(args.input_class, \'class\')\n+        if not len(classes.columns) == 2:\n+            warning(\'Warning: more than 2 columns in class file. Extra\' +\n+                    \'columns have been disregarded\\n\')\n+        classes = classes.astype(str)\n+        if args.rules_selector != \'Custom\':\n+            genes = data_gene(dataset, type_gene, name, None)\n+            ids, rules = load_id_rules(recon.get(type_gene))\n+        elif args.rules_selector == \'Custom\':\n+            genes = data_gene(dataset, type_gene, name, gene_in_rule)\n+        resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n+        if err != None and err:\n+            warning(\'Warning: gene\\n\'+str(err)+\'\\nnot found in class \'\n+                    + name + \', the expression level for this gene \' +\n+                    \'will be considered NaN\\n\')\n+        if resolve_rules != None:\n+            class_pat = split_class(classes, resolve_rules)\n+    if args.rules_selector == \'Custom\':\n+        if args.yes_no == \'yes\':\n+            os.makedirs(\'map_svg\')\n+            os.makedirs(\'map_pdf\')\n+            try:\n+                core_map = ET.parse(args.custom_map)\n+            except (ET.XMLSyntaxError, ET.XMLSchemaParseError):\n+                sys.exit(\'Execution aborted: custom map in wrong format\')\n+        elif args.yes_no == \'no\':\n+            core_map = ET.parse(args.tool_dir + \'/local/HMRcoreMap.svg\')\n+    else:       \n+        core_map = ET.parse(args.tool_dir+\'/local/HMRcoreMap.svg\')\n+    maps(core_map, class_pat, ids, args.pValue, args.fChange)\n+    warning(\'Execution succeeded\')\n+    return None\n+\n+###############################################################################\n+\n+if __name__ == "__main__":\n+    main()\n'
b
diff -r 000000000000 -r 23ac9cf12788 Marea/marea.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Marea/marea.xml Tue Nov 06 03:16:21 2018 -0500
[
b'@@ -0,0 +1,240 @@\n+<tool id="MaREA" name="Metabolic Enrichment Analysis">\r\n+    <description>for Galaxy</description>\r\n+    <requirements>\r\n+        <requirement type="package">pandas</requirement>\r\n+        <requirement type="package">scipy</requirement>\r\n+        <requirement type="package">lxml</requirement>\r\n+        <requirement type="package">svglib</requirement>\r\n+        <requirement type="package">reportlab</requirement>\r\n+        <requirement type="package">cobrapy</requirement>\r\n+        <requirement type="package">python-libsbml</requirement>\r\n+    </requirements>\r\n+    <command>\r\n+        <![CDATA[\r\n+      \tpython $__tool_directory__/marea.py\r\n+        --rules_selector $cond_rule.rules_selector\r\n+        #if $cond_rule.rules_selector == \'Custom\':\r\n+            --custom ${cond_rule.Custom_rules}\r\n+            --yes_no ${cond_rule.cond_map.yes_no}\r\n+            #if $cond_rule.cond_map.yes_no == \'yes\':\r\n+                --custom_map $cond_rule.cond_map.Custom_map\r\n+            #end if\r\n+        #end if\r\n+      \t--none $None\r\n+      \t--pValue $pValue\r\n+      \t--fChange $fChange\r\n+      \t--tool_dir $__tool_directory__\r\n+      \t--option $cond.type_selector\r\n+        --out_log $log\r\n+        #if $cond.type_selector == \'datasets\':\r\n+            --input_datas\r\n+            #for $data in $cond.input_Datasets:\r\n+                ${data.input}\r\n+            #end for\r\n+            --names\r\n+            #for $data in $cond.input_Datasets:\r\n+                ${data.input_name}\r\n+            #end for\r\n+        #elif $cond.type_selector == \'dataset_class\':\r\n+            --input_data ${input_data}\r\n+            --input_class ${input_class}\r\n+        #end if\r\n+        ]]>\r\n+    </command>\r\n+    <inputs>\r\n+        <conditional name="cond_rule">\r\n+            <param name="rules_selector" type="select" label="Gene-Protein-Reaction rules:">\r\n+                <option value="HMRcore" selected="true">HMRcore rules</option>\r\n+                <option value="Recon">Recon 2.2 rules</option>\r\n+                <option value="Custom">Custom rules</option>\r\n+            </param>\r\n+            <when value="Custom">\r\n+                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules"/>\r\n+                <conditional name="cond_map">\r\n+                    <param name="yes_no" type="select" label="Custom map? (optional)">\r\n+                        <option value="no" selected="true">no</option>\r\n+                        <option value="yes">yes</option>\r\n+                    </param>\r\n+                    <when value="yes">\r\n+                        <param name="Custom_map" type="data" format="xml, svg" label="custom-map.svg"/>\r\n+                    </when>\r\n+                </conditional>\r\n+            </when>\r\n+        </conditional>\r\n+        <conditional name="cond">\r\n+            <param name="type_selector" type="select" label="Input format:">\r\n+                <option value="datasets" selected="true">RNAseq of group 1 + RNAseq of group 2 + \xe2\x80\xa6 + RNAseq of group N</option>\r\n+                <option value="dataset_class">RNAseq of all samples + sample group specification</option>\r\n+            </param>\r\n+            <when value="datasets">\r\n+                <repeat name="input_Datasets" title="RNAseq" type="data" min="2">\r\n+                    <param name="input" type="data" format="tabular, csv, tsv" label="add dataset"/>\t\r\n+                    <param name="input_name" type="text" label="Dataset\'s name:" value="Dataset" help="Defalut: Dataset"/>\r\n+\t        </repeat>\r\n+            </when>\r\n+            <when value="dataset_class">\r\n+                <param name="input_data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples"/>\r\n+                <param name="input_class" type="data" format="tabular, csv, tsv" label="Sample group specification"/>\r\n+            </when>\r\n+        </conditional>\r\n+        <param name="None" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A '..b'A_vs_classB. Reactions will conventionally be reported as up-regulated (down-regulated) if they are significantly more (less) active in class having label \xe2\x80\x9cclassA\xe2\x80\x9d.\r\n+\r\n+.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724\r\n+\r\n+\r\n+Example input\r\n+-------------\r\n+\r\n+**"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N" exemple input"** option:\r\n+\r\n+Dataset 1:\t\t\t\t\t\t\r\n+\r\n++------------+------------+------------+------------+   \r\n+|  Hugo_ID   | TCGAA62670 | TCGAA62671 | TCGAA62672 |   \r\n++============+============+============+============+   \r\n+| HGNC:24086 |  0.523167  |  0.371355  |  0.925661  |   \r\n++------------+------------+------------+------------+    \r\n+| HGNC:24086 |  0.568765  |  0.765567  |  0.456789  |    \r\n++------------+------------+------------+------------+    \r\n+| HGNC:9876  |  0.876545  |  0.768933  |  0.987654  |   \r\n++------------+------------+------------+------------+    \r\n+| HGNC:9     |  0.456788  |  0.876543  |  0.876542  |   \r\n++------------+------------+------------+------------+    \r\n+| HGNC:23    |  0.876543  |  0.786543  |  0.897654  |   \r\n++------------+------------+------------+------------+ \r\n+   \r\n+|\r\n+\r\n+Dataset 2:\r\n+\r\n++-------------+------------+------------+------------+\r\n+| Hugo_Symbol | TCGAA62670 | TCGAA62671 | TCGAA62672 |\r\n++=============+============+============+============+\r\n+|    A1BG     |  0.523167  |  0.371355  |  0.925661  |\r\n++-------------+------------+------------+------------+\r\n+|    A1CF     |  0.568765  |  0.765567  |  0.456789  |\r\n++-------------+------------+------------+------------+\r\n+|     A2M     |  0.876545  |  0.768933  |  0.987654  |\r\n++-------------+------------+------------+------------+\r\n+|    A4GALT   |  0.456788  |  0.876543  |  0.876542  |\r\n++-------------+------------+------------+------------+\r\n+|   M664Y65   |  0.876543  |  0.786543  |  0.897654  |\r\n++-------------+------------+------------+------------+\r\n+\r\n+|\r\n+\r\n+**"RNAseq of all samples + sample group specification"** option:\r\n+\r\n+Dataset:\r\n+\r\n++------------+------------+------------+------------+   \r\n+|  Hugo_ID   | TCGAA62670 | TCGAA62671 | TCGAA62672 |   \r\n++============+============+============+============+   \r\n+| HGNC:24086 |  0.523167  |  0.371355  |  0.925661  |   \r\n++------------+------------+------------+------------+    \r\n+| HGNC:24086 |  0.568765  |  0.765567  |  0.456789  |    \r\n++------------+------------+------------+------------+    \r\n+| HGNC:9876  |  0.876545  |  0.768933  |  0.987654  |   \r\n++------------+------------+------------+------------+    \r\n+| HGNC:9     |  0.456788  |  0.876543  |  0.876542  |   \r\n++------------+------------+------------+------------+    \r\n+| HGNC:23    |  0.876543  |  0.786543  |  0.897654  |   \r\n++------------+------------+------------+------------+ \r\n+\r\n+|\r\n+\r\n+Class-file:\r\n+\r\n++------------+------------+   \r\n+| Patient_ID |    class   |   \r\n++============+============+   \r\n+| TCGAAA3529 |     MSI    |   \r\n++------------+------------+    \r\n+| TCGAA62671 |     MSS    |    \r\n++------------+------------+    \r\n+| TCGAA62672 |     MSI    |   \r\n++------------+------------+\r\n+\r\n+|\r\n+\r\n+\r\n+\r\n+.. class:: warningmark\r\n+\r\n+This tool expects input datasets consisting of tab-delimited columns.\r\n+\r\n+\r\n+.. class:: infomark\r\n+\r\n+TIP: If your data is not TAB delimited, use `Convert delimiters to TAB`_.\r\n+\r\n+.. class:: infomark\r\n+\r\n+TIP: If your dataset is not split into classes, use `Cluster for MaREA`_.\r\n+\r\n+This tool is developed by the `nome del gruppo di bioinformatica`_ at the `dipartimento di informatica disco`_.\r\n+\r\n+\r\n+.. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj\r\n+.. _Cluster for MaREA: http://link del tool di cluster.org/\r\n+.. _nome del gruppo di bioinformatica: http://sito di bio.org\r\n+.. _dipartimento di informatica disco : http://www.disco.unimib.it/go/Home/English\r\n+\r\n+]]>\r\n+    </help>\r\n+</tool>\r\n+\t\r\n+\t\r\n+\t\r\n+\t\r\n+\t\r\n+\t\r\n+\t\r\n+\t\r\n+\t\r\n+\t\r\n+\t\r\n+\t\r\n+\t\r\n'
b
diff -r 000000000000 -r 23ac9cf12788 Marea/marea_cluster.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Marea/marea_cluster.py Tue Nov 06 03:16:21 2018 -0500
[
b'@@ -0,0 +1,608 @@\n+\n+from __future__ import division\n+import os\n+import sys\n+import pandas as pd\n+import collections\n+import pickle as pk\n+import argparse\n+from sklearn.cluster import KMeans\n+import matplotlib.pyplot as plt\n+\n+########################## argparse ###########################################\n+\n+def process_args(args):\n+    parser = argparse.ArgumentParser(usage = \'%(prog)s [options]\',\n+                                     description = \'process some value\\\'s\' +\n+                                     \' genes to create class.\')\n+    parser.add_argument(\'-rs\', \'--rules_selector\', \n+                        type = str,\n+                        default = \'HMRcore\',\n+                        choices = [\'HMRcore\', \'Recon\', \'Custom\'], \n+                        help = \'chose which type of dataset you want use\')\n+    parser.add_argument(\'-cr\', \'--custom\',\n+                        type = str,\n+                        help=\'your dataset if you want custom rules\')\n+    parser.add_argument(\'-ch\', \'--cond_hier\', \n+                        type = str,\n+                        default = \'no\',\n+                        choices = [\'no\', \'yes\'], \n+                        help = \'chose if you wanna hierical dendrogram\')\n+    parser.add_argument(\'-lk\', \'--k_min\', \n+                        type = int,\n+                        help = \'min number of cluster\')\n+    parser.add_argument(\'-uk\', \'--k_max\', \n+                        type = int,\n+                        help = \'max number of cluster\')\n+    parser.add_argument(\'-li\', \'--linkage\', \n+                        type = str, \n+                        choices = [\'single\', \'complete\', \'average\'], \n+                        help=\'linkage hierarchical cluster\')\n+    parser.add_argument(\'-d\', \'--data\',\n+                        type = str,\n+                        required = True,\n+                        help = \'input dataset\')\n+    parser.add_argument(\'-n\', \'--none\',\n+                        type = str,\n+                        default = \'true\',\n+                        choices = [\'true\', \'false\'], \n+                        help = \'compute Nan values\')\n+    parser.add_argument(\'-td\', \'--tool_dir\',\n+                        type = str,\n+                        required = True,\n+                        help = \'your tool directory\')\n+    parser.add_argument(\'-na\', \'--name\',\n+                        type = str,\n+                        help = \'name of dataset\')\n+    parser.add_argument(\'-de\', \'--dendro\', \n+                        help = "Dendrogram out")\n+    parser.add_argument(\'-ol\', \'--out_log\', \n+                        help = "Output log")\n+    parser.add_argument(\'-el\', \'--elbow\', \n+                        help = "Out elbow")\n+    args = parser.parse_args()\n+    return args\n+\n+########################### warning ###########################################\n+\n+def warning(s):\n+    args = process_args(sys.argv)\n+    with open(args.out_log, \'a\') as log:\n+            log.write(s)\n+            \n+############################ dataset input ####################################\n+\n+def read_dataset(data, name):\n+    try:\n+        dataset = pd.read_csv(data, sep = \'\\t\', header = 0)\n+    except pd.errors.EmptyDataError:\n+        sys.exit(\'Execution aborted: wrong format of \'+name+\'\\n\')\n+    if len(dataset.columns) < 2:\n+        sys.exit(\'Execution aborted: wrong format of \'+name+\'\\n\')\n+    return dataset\n+\n+############################ dataset name #####################################\n+\n+def name_dataset(name_data, count):\n+    if str(name_data) == \'Dataset\':\n+        return str(name_data) + \'_\' + str(count)\n+    else:\n+        return str(name_data)\n+    \n+############################ load id e rules ##################################\n+\n+def load_id_rules(reactions):\n+    ids, rules = [], []\n+    for key, value in reactions.items():\n+            ids.append(key)\n+            rules.append(value)\n+    return (ids, rules)\n+\n+############################ check_methods ####################################\n+\n+def'..b'          if tmp:\n+                tmp, err = replace_gene_value(tmp, value)\n+                if err:\n+                    not_found.extend(err)\n+                ris = control(None, tmp, resolve_none)\n+                if ris is False or ris == None:\n+                    tmp_resolve.append(None)\n+                else:\n+                    tmp_resolve.append(ris)\n+                    flag = True\n+            else:\n+                tmp_resolve.append(None)        \n+        resolve_rules[key] = tmp_resolve\n+    if flag is False:\n+        sys.exit(\'Execution aborted: no computable score\' +\n+                 \' (due to missing gene values) for class \'\n+                 + name + \', the class has been disregarded\\n\')\n+    return (resolve_rules, list(set(not_found)))\n+\n+################################# clustering ##################################\n+\n+def f_cluster(resolve_rules):\n+    os.makedirs(\'cluster_out\')\n+    args = process_args(sys.argv)\n+    cluster_data = pd.DataFrame.from_dict(resolve_rules, orient = \'index\')\n+    for i in cluster_data.columns:\n+        tmp = cluster_data[i][0]\n+        if tmp == None:\n+            cluster_data = cluster_data.drop(columns=[i])\n+    distorsion = []\n+    for i in range(args.k_min, args.k_max+1):\n+        tmp_kmeans = KMeans(n_clusters = i,\n+                            n_init = 100, \n+                            max_iter = 300,\n+                            random_state = 0).fit(cluster_data)\n+        distorsion.append(tmp_kmeans.inertia_)\n+        predict = tmp_kmeans.predict(cluster_data)\n+        predict = [x+1 for x in predict]\n+        classe = (pd.DataFrame(zip(cluster_data.index, predict))).astype(str)\n+        dest = \'cluster_out/K=\' + str(i) + \'_\' + args.name+\'.tsv\'\n+        classe.to_csv(dest, sep = \'\\t\', index = False,\n+                      header = [\'Patient_ID\', \'Class\'])\n+    plt.figure(0)\n+    plt.plot(range(args.k_min, args.k_max+1), distorsion, marker = \'o\')\n+    plt.xlabel(\'Number of cluster\')\n+    plt.ylabel(\'Distorsion\')\n+    plt.savefig(args.elbow, dpi = 240, format = \'pdf\')\n+    if args.cond_hier == \'yes\':\n+        import scipy.cluster.hierarchy as hier\n+        lin = hier.linkage(cluster_data, args.linkage)\n+        plt.figure(1)\n+        plt.figure(figsize=(10, 5))\n+        hier.dendrogram(lin, leaf_font_size = 2, labels = cluster_data.index)\n+        plt.savefig(args.dendro, dpi = 480, format = \'pdf\')\n+    return None\n+\n+################################# main ########################################\n+\n+def main():\n+    args = process_args(sys.argv)\n+    if args.k_min > args.k_max:\n+        sys.exit(\'Execution aborted: max cluster > min cluster\')\n+    if args.rules_selector == \'HMRcore\':\n+        recon = pk.load(open(args.tool_dir + \'/local/HMRcore_rules.p\', \'rb\'))\n+    elif args.rules_selector == \'Recon\':\n+        recon = pk.load(open(args.tool_dir + \'/local/Recon_rules.p\', \'rb\'))\n+    elif args.rules_selector == \'Custom\':\n+        ids, rules, gene_in_rule = make_recon(args.custom)\n+    resolve_none = check_bool(args.none)\n+    dataset = read_dataset(args.data, args.name)\n+    dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)\n+    type_gene = gene_type(dataset.iloc[0, 0], args.name)\n+    if args.rules_selector != \'Custom\':\n+        genes = data_gene(dataset, type_gene, args.name, None)\n+        ids, rules = load_id_rules(recon.get(type_gene))\n+    elif args.rules_selector == \'Custom\':\n+        genes = data_gene(dataset, type_gene, args.name, gene_in_rule)\n+    resolve_rules, err = resolve(genes, rules, ids, resolve_none, args.name)\n+    if err:\n+        warning(\'WARNING: gene\\n\' + str(err) + \'\\nnot found in class \'  \n+                + args.name + \', the expression level for this gene \' +\n+                \'will be considered NaN\\n\')\n+    f_cluster(resolve_rules)\n+    warning(\'Execution succeeded\')\n+    return None\n+\n+###############################################################################\n+\n+if __name__ == "__main__":\n+    main()\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 23ac9cf12788 Marea/marea_cluster.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Marea/marea_cluster.xml Tue Nov 06 03:16:21 2018 -0500
[
@@ -0,0 +1,97 @@
+<tool id="MaREA_cluester" name="MaREA cluster analysis">
+    <description>of Reaction Activity Scores</description>
+    <requirements>
+        <requirement type="package">pandas</requirement>
+        <requirement type="package">scikit-learn</requirement>
+        <requirement type="package">scipy</requirement>
+        <requirement type="package">matplotlib</requirement>
+        <requirement type="package">cobrapy</requirement>
+        <requirement type="package">python-libsbml</requirement>
+    </requirements>
+    <command>
+        <![CDATA[
+       python $__tool_directory__/marea_cluster.py
+        --rules_selector $cond_rule.rules_selector
+        #if $cond_rule.rules_selector == 'Custom':
+            --custom ${cond_rule.Custom_rules}
+        #end if
+        --cond_hier $cond_hier.hier
+        #if $cond_hier.hier == 'yes':
+            --linkage ${cond_hier.linkage}
+            --dendro $dendrogram
+        #end if
+        --k_max $k_max
+        --k_min $k_min
+        --data $input
+        --name $name
+       --none $None
+       --tool_dir $__tool_directory__
+        --out_log $log
+        --elbow $elbow
+        ]]>
+    </command>
+    <inputs>
+        <conditional name="cond_rule">
+            <param name="rules_selector" type="select" label="Gene-Protein-Reaction rules:">
+                <option value="HMRcore" selected="true">HMRcore rules</option>
+                <option value="Recon">Recon 2.2 rules</option>
+                <option value="Custom">Custom rules</option>
+            </param>
+            <when value="Custom">
+                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules"/>
+            </when>
+        </conditional>
+        <param name="input" type="data" format="tabular, csv, tsv" label="RNAseq of all samples"/>
+        <param name="name" type="text" label="Output name prefix" value = "dataset"/>
+        <param name="k_min" type="integer" size="20" value="3" min="2" max="30" label="min number of clusters (k) to be tested (k-means)"/>
+        <param name="k_max" type="integer" size="20" value="3" min="2" max="30" label="max number of clusters (k) to be tested (k-means)"/>
+        <param name="None" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" help="if NO is selected (A and NaN) is solved as (NaN)"/>
+ <conditional name="cond_hier">
+            <param name="hier" type="select" label="Produce dendrogram (hierarchical clustering):">
+                <option value="no" selected="true">no</option>
+                <option value="yes">yes</option>
+            </param>
+            <when value="yes">
+                <param name="linkage" type="select" label="Linkage type:">
+                    <option value="single" selected="true">Single: minimum distance between all observations of two sets</option>
+                    <option value="complete">Complete: maximum distance between all observations of two sets</option>
+                    <option value="average">Average: average distance between all observations of two sets</option>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data format="txt" name="log" label="Log"/>
+        <data format="pdf" name="dendrogram" label="$name dendrogram">
+            <filter>cond_hier['hier'] == 'yes'</filter>
+        </data>
+        <data format="pdf" name="elbow" label="$name elbow evaluation method"/>
+        <collection name="cluster_out" type="list" label="Clusters $k_min - $k_max">
+            <discover_datasets pattern="__name_and_ext__" directory="cluster_out"/>
+        </collection>
+    </outputs>
+    <help>
+
+.. class:: warningmark
+
+This tool expects input datasets consisting of tab-delimited columns.
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation > Convert delimiters to TAB*
+
+    </help>
+</tool>
+
+
+
+
+
+
+
+
+
+
+
+
+