Previous changeset 14:1a0c8c2780f2 (2019-02-13) Next changeset 16:c71ac0bb12de (2019-10-01) |
Commit message:
Upload 1.0.1 |
added:
marea-1.0.1/local/HMRcoreMap.svg marea-1.0.1/local/HMRcore_genes.p marea-1.0.1/local/HMRcore_rules.p marea-1.0.1/local/Recon_genes.p marea-1.0.1/local/Recon_rules.p marea-1.0.1/local/desktop.ini marea-1.0.1/marea.py marea-1.0.1/marea.xml marea-1.0.1/marea_cluster.py marea-1.0.1/marea_cluster.xml marea-1.0.1/marea_macros.xml |
b |
diff -r 1a0c8c2780f2 -r d0e7f14b773f marea-1.0.1/local/HMRcoreMap.svg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/marea-1.0.1/local/HMRcoreMap.svg Tue Oct 01 06:03:12 2019 -0400 |
b |
b'@@ -0,0 +1,7702 @@\n+<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n+<!-- Generator: Adobe Illustrator 22.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->\n+\n+<svg\n+ xmlns:dc="http://purl.org/dc/elements/1.1/"\n+ xmlns:cc="http://creativecommons.org/ns#"\n+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"\n+ xmlns:svg="http://www.w3.org/2000/svg"\n+ xmlns="http://www.w3.org/2000/svg"\n+ xmlns:xlink="http://www.w3.org/1999/xlink"\n+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"\n+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"\n+ version="1.1"\n+ x="0px"\n+ y="0px"\n+ viewBox="0 0 1904.8016 1511.2752"\n+ xml:space="preserve"\n+ id="svg2"\n+ inkscape:version="0.91 r13725"\n+ sodipodi:docname="HMRcoreMap.svg"\n+ width="1904.8015"\n+ height="1511.2753"><metadata\n+ id="metadata2021"><rdf:RDF><cc:Work\n+ rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type\n+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title /></cc:Work></rdf:RDF></metadata><defs\n+ id="defs2019"><sodipodi:namedview\n+ showguides="true"\n+ showgrid="true"\n+ pagecolor="#ffffff"\n+ inkscape:zoom="1.4702451"\n+ inkscape:window-y="-8"\n+ inkscape:window-x="-8"\n+ inkscape:window-width="1920"\n+ inkscape:window-maximized="1"\n+ inkscape:window-height="1017"\n+ inkscape:snap-page="false"\n+ inkscape:snap-grids="true"\n+ inkscape:pageshadow="2"\n+ inkscape:pageopacity="0.0"\n+ inkscape:document-units="px"\n+ inkscape:cy="338.10986"\n+ inkscape:cx="1343.7768"\n+ inkscape:current-layer="layer1"\n+ id="base"\n+ fit-margin-top="0"\n+ fit-margin-right="0"\n+ fit-margin-left="0"\n+ fit-margin-bottom="0"\n+ borderopacity="1.0"\n+ bordercolor="#666666"><inkscape:grid\n+ type="xygrid"\n+ originy="72.926308"\n+ originx="-97.409688"\n+ id="grid3434"\n+ dotted="true" /></sodipodi:namedview></defs><sodipodi:namedview\n+ pagecolor="#ffffff"\n+ bordercolor="#666666"\n+ borderopacity="1"\n+ objecttolerance="10"\n+ gridtolerance="10"\n+ guidetolerance="10"\n+ inkscape:pageopacity="0"\n+ inkscape:pageshadow="2"\n+ inkscape:window-width="1920"\n+ inkscape:window-height="1017"\n+ id="namedview2017"\n+ showgrid="false"\n+ inkscape:zoom="0.44727204"\n+ inkscape:cx="497.63252"\n+ inkscape:cy="796.80241"\n+ inkscape:window-x="-8"\n+ inkscape:window-y="-8"\n+ inkscape:window-maximized="1"\n+ inkscape:current-layer="svg2"\n+ fit-margin-top="0"\n+ fit-margin-left="0"\n+ fit-margin-right="0"\n+ fit-margin-bottom="0" /><style\n+ type="text/css"\n+ id="style4">\n+\t.st0{display:none;}\n+\t.st1{display:inline;}\n+\t.st2{fill:none;stroke:#5AB6E7;stroke-width:7;stroke-linejoin:round;}\n+\t.st3{fill:none;stroke:#5AB6E7;stroke-width:7;stroke-linejoin:round;stroke-dasharray:11.9422,11.9422;}\n+\t.st4{fill:none;stroke:#5AB6E7;stroke-width:7;stroke-linejoin:round;stroke-dasharray:12.1815,12.1815;}\n+\t.st5{font-family:\'Helvetica\';}\n+\t.st6{font-size:30px;}\n+\t.st7{font-size:39.262px;}\n+\t.st8{fill:none;stroke:#0000FF;stroke-width:30;}\n+\t.st9{fill:none;stroke:#E41A1C;stroke-width:30;}\n+\t.st10{fill:none;stroke:#BEBEBE;stroke-width:30;}\n+\t.st11{stroke:#000000;stroke-width:30;}\n+\t.st12{fill:none;stroke:#BEBEBE;stroke-width:30;stroke-dasharray:30,30;stroke-dashoffset:6;}\n+\t.st13{fill:none;stroke:#000000;stroke-width:1.8444;}\n+\t.st14{fill:none;stroke:#000000;stroke-width:2.1821;}\n+\t.st15{font-family:\'Calibri-Bold\';}\n+\t.st16{font-size:16px;}\n+\t.st17{font-family:\'Calibri\';}\n+\t.st18{font-size:10px;}\n+\t.st19{fill:none;stroke:#000000;stroke-width:1.8856;}\n+\t.st20{fill:none;stroke:#000000;stroke-width:1.9459;}\n+\t.st21{fill:none;stroke:#000000;stroke-width:2.2892;}\n+\t.st22{fill:none;stroke:#000000;stroke-width:2.5;}\n+\t.st23{fill:none;stroke:#000000;stroke-width:1.9412;}\n+\t.st24{fill:none;str'..b'31.89,1231.8186 2.2,-7.3 2.2,7.3 -2.2,-1.8 -2.2,1.8 z"\n+ class="st14"\n+ inkscape:label="Glutamine_DM_COOP b"\n+ inkscape:connector-curvature="0"\n+ id="B_Glutamine_DM_COOP" /><path\n+ style="fill:none;stroke:#000000;stroke-width:2.18210006"\n+ d="m 1233.89,1279.4186 0,-48"\n+ class="st14"\n+ inkscape:label="Glutamine_DM_COOP"\n+ inkscape:connector-curvature="0"\n+ id="R_Glutamine_DM_COOP" /><flowRoot\n+ xml:space="preserve"\n+ id="flowRoot5366"\n+ style="font-style:normal;font-weight:normal;font-size:35px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n+ transform="translate(-20.6,18.418554)"><flowRegion\n+ id="flowRegion5368"><rect\n+ id="rect5370"\n+ width="1165.1471"\n+ height="77.465683"\n+ x="306.70087"\n+ y="-39.523308" /></flowRegion><flowPara\n+ id="flowPara5372" /></flowRoot><flowRoot\n+ xml:space="preserve"\n+ id="TitoloConfronto"\n+ style="font-style:normal;font-weight:normal;font-size:35px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n+ inkscape:label="TitoloConfronto"\n+ transform="translate(-18.364224,56.426743)"><flowRegion\n+ id="flowRegion5376"><rect\n+ id="rect5378"\n+ width="1869.6877"\n+ height="68.569115"\n+ x="301.95807"\n+ y="-69.56102" /></flowRegion><flowPara\n+ id="TitleText"\n+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:45px;font-family:sans-serif;-inkscape-font-specification:sans-serif">TITOLO: TITOLOTITOLO </flowPara></flowRoot><flowRoot\n+ xml:space="preserve"\n+ id="flowRoot5382"\n+ style="font-style:normal;font-weight:normal;font-size:35px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n+ transform="translate(-16.64767,38.180207)"><flowRegion\n+ id="flowRegion5384"><rect\n+ id="rect5386"\n+ width="275.00043"\n+ height="149.79698"\n+ x="1681.3033"\n+ y="204.59315" /></flowRegion><flowPara\n+ id="flowPara5390"\n+ style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:\'sans-serif Bold\'">Fold Change</flowPara></flowRoot><flowRoot\n+ xml:space="preserve"\n+ id="FC_min"\n+ style="font-style:normal;font-weight:normal;font-size:35px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n+ transform="translate(-8.622366,131.05768)"\n+ inkscape:label="FC_min"><flowRegion\n+ id="flowRegion5384-2"><rect\n+ id="rect5386-9"\n+ width="275.00043"\n+ height="149.79698"\n+ x="1681.3033"\n+ y="204.59315" /></flowRegion><flowPara\n+ id="Val_FC_min">min: </flowPara></flowRoot><flowRoot\n+ xml:space="preserve"\n+ id="FC_max"\n+ style="font-style:normal;font-weight:normal;font-size:35px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n+ transform="translate(-17.492772,95.648076)"\n+ inkscape:label="FC_max"><flowRegion\n+ id="flowRegion5384-2-2"><rect\n+ id="rect5386-9-9"\n+ width="275.00043"\n+ height="149.79698"\n+ x="1681.3033"\n+ y="204.59315" /></flowRegion><flowPara\n+ id="Val_FC_max">max:</flowPara></flowRoot></svg>\n\\ No newline at end of file\n' |
b |
diff -r 1a0c8c2780f2 -r d0e7f14b773f marea-1.0.1/local/HMRcore_genes.p |
b |
Binary file marea-1.0.1/local/HMRcore_genes.p has changed |
b |
diff -r 1a0c8c2780f2 -r d0e7f14b773f marea-1.0.1/local/HMRcore_rules.p |
b |
Binary file marea-1.0.1/local/HMRcore_rules.p has changed |
b |
diff -r 1a0c8c2780f2 -r d0e7f14b773f marea-1.0.1/local/Recon_genes.p |
b |
Binary file marea-1.0.1/local/Recon_genes.p has changed |
b |
diff -r 1a0c8c2780f2 -r d0e7f14b773f marea-1.0.1/local/Recon_rules.p |
b |
Binary file marea-1.0.1/local/Recon_rules.p has changed |
b |
diff -r 1a0c8c2780f2 -r d0e7f14b773f marea-1.0.1/local/desktop.ini --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/marea-1.0.1/local/desktop.ini Tue Oct 01 06:03:12 2019 -0400 |
[ |
@@ -0,0 +1,6 @@ +[.ShellClassInfo] +IconResource=C:\WINDOWS\System32\SHELL32.dll,4 +[ViewState] +Mode= +Vid= +FolderType=Generic |
b |
diff -r 1a0c8c2780f2 -r d0e7f14b773f marea-1.0.1/marea.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/marea-1.0.1/marea.py Tue Oct 01 06:03:12 2019 -0400 |
[ |
b'@@ -0,0 +1,825 @@\n+from __future__ import division\n+import sys\n+import pandas as pd\n+import itertools as it\n+import scipy.stats as st\n+import collections\n+import lxml.etree as ET\n+import shutil\n+import pickle as pk\n+import math\n+import os\n+import argparse\n+from svglib.svglib import svg2rlg\n+from reportlab.graphics import renderPDF\n+\n+########################## argparse ##########################################\n+\n+def process_args(args):\n+ parser = argparse.ArgumentParser(usage = \'%(prog)s [options]\',\n+ description = \'process some value\\\'s\'+\n+ \' genes to create a comparison\\\'s map.\')\n+ parser.add_argument(\'-rs\', \'--rules_selector\', \n+ type = str,\n+ default = \'HMRcore\',\n+ choices = [\'HMRcore\', \'Recon\', \'Custom\'], \n+ help = \'chose which type of dataset you want use\')\n+ parser.add_argument(\'-cr\', \'--custom\',\n+ type = str,\n+ help=\'your dataset if you want custom rules\')\n+ parser.add_argument(\'-na\', \'--names\', \n+ type = str,\n+ nargs = \'+\', \n+ help = \'input names\')\n+ parser.add_argument(\'-n\', \'--none\',\n+ type = str,\n+ default = \'true\',\n+ choices = [\'true\', \'false\'], \n+ help = \'compute Nan values\')\n+ parser.add_argument(\'-pv\' ,\'--pValue\', \n+ type = float, \n+ default = 0.05, \n+ help = \'P-Value threshold (default: %(default)s)\')\n+ parser.add_argument(\'-fc\', \'--fChange\', \n+ type = float, \n+ default = 1.5, \n+ help = \'Fold-Change threshold (default: %(default)s)\')\n+ parser.add_argument(\'-td\', \'--tool_dir\',\n+ type = str,\n+ required = True,\n+ help = \'your tool directory\')\n+ parser.add_argument(\'-op\', \'--option\', \n+ type = str, \n+ choices = [\'datasets\', \'dataset_class\'],\n+ help=\'dataset or dataset and class\')\n+ parser.add_argument(\'-ol\', \'--out_log\', \n+ help = "Output log") \n+ parser.add_argument(\'-ids\', \'--input_datas\', \n+ type = str,\n+ nargs = \'+\', \n+ help = \'input datasets\')\n+ parser.add_argument(\'-id\', \'--input_data\',\n+ type = str,\n+ help = \'input dataset\')\n+ parser.add_argument(\'-ic\', \'--input_class\', \n+ type = str, \n+ help = \'sample group specification\')\n+ parser.add_argument(\'-cm\', \'--custom_map\', \n+ type = str, \n+ help = \'custom map\')\n+ parser.add_argument(\'-yn\', \'--yes_no\', \n+ type = str,\n+ choices = [\'yes\', \'no\'],\n+ help = \'if make or not custom map\')\n+ parser.add_argument(\'-gs\', \'--generate_svg\',\n+ type = str,\n+ default = \'true\',\n+ choices = [\'true\', \'false\'], \n+ help = \'generate svg map\')\n+ parser.add_argument(\'-gp\', \'--generate_pdf\',\n+ type = str,\n+ default = \'true\',\n+ choices = [\'true\', \'false\'], \n+ help = \'generate pdf map\')\n+ parser.add_argument(\'-gr\', \'--generate_ras\',\n+ type = str,\n+ default = \'true\',\n+ choices = [\'true\', \'false\'],\n+ help = \'generate reaction activity score\')\n+ args = parser.parse_args()\n+ return args\n+\n+########################### warning ######'..b' #############################################\n+\n+def main():\n+ args = process_args(sys.argv)\n+ \n+ create_svg = check_bool(args.generate_svg)\n+ create_pdf = check_bool(args.generate_pdf)\n+ generate_ras = check_bool(args.generate_ras)\n+ \n+ os.makedirs(\'result\')\n+\n+ if generate_ras:\n+ os.makedirs(\'ras\')\n+ \n+ if args.rules_selector == \'HMRcore\': \n+ recon = pk.load(open(args.tool_dir + \'/local/HMRcore_rules.p\', \'rb\'))\n+ elif args.rules_selector == \'Recon\':\n+ recon = pk.load(open(args.tool_dir + \'/local/Recon_rules.p\', \'rb\'))\n+ elif args.rules_selector == \'Custom\':\n+ ids, rules, gene_in_rule = make_recon(args.custom)\n+ \n+ resolve_none = check_bool(args.none)\n+ \n+ class_pat = {}\n+ \n+ if args.option == \'datasets\':\n+ num = 1\n+ for i, j in zip(args.input_datas, args.names):\n+\n+ name = name_dataset(j, num)\n+ dataset = read_dataset(i, name)\n+\n+ dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)\n+\n+ type_gene = gene_type(dataset.iloc[0, 0], name) \n+ \n+ if args.rules_selector != \'Custom\':\n+ genes = data_gene(dataset, type_gene, name, None)\n+ ids, rules = load_id_rules(recon.get(type_gene))\n+ elif args.rules_selector == \'Custom\':\n+ genes = data_gene(dataset, type_gene, name, gene_in_rule)\n+ \n+ resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n+\n+ if generate_ras:\n+ create_ras(resolve_rules, name)\n+ \n+ \n+ if err != None and err:\n+ warning(\'Warning: gene\\n\' + str(err) + \'\\nnot found in class \'\n+ + name + \', the expression level for this gene \' +\n+ \'will be considered NaN\\n\')\n+ if resolve_rules != None:\n+ class_pat[name] = list(map(list, zip(*resolve_rules.values())))\n+ num += 1\n+ elif args.option == \'dataset_class\':\n+ name = \'RNAseq\'\n+ dataset = read_dataset(args.input_data, name)\n+ dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)\n+ type_gene = gene_type(dataset.iloc[0, 0], name)\n+ classes = read_dataset(args.input_class, \'class\')\n+ if not len(classes.columns) == 2:\n+ warning(\'Warning: more than 2 columns in class file. Extra\' +\n+ \'columns have been disregarded\\n\')\n+ classes = classes.astype(str)\n+ if args.rules_selector != \'Custom\':\n+ genes = data_gene(dataset, type_gene, name, None)\n+ ids, rules = load_id_rules(recon.get(type_gene))\n+ elif args.rules_selector == \'Custom\':\n+ genes = data_gene(dataset, type_gene, name, gene_in_rule)\n+ resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n+ if err != None and err:\n+ warning(\'Warning: gene\\n\'+str(err)+\'\\nnot found in class \'\n+ + name + \', the expression level for this gene \' +\n+ \'will be considered NaN\\n\')\n+ if resolve_rules != None:\n+ class_pat = split_class(classes, resolve_rules)\n+ \n+ if args.rules_selector == \'Custom\':\n+ if args.yes_no == \'yes\':\n+ try:\n+ core_map = ET.parse(args.custom_map)\n+ except (ET.XMLSyntaxError, ET.XMLSchemaParseError):\n+ sys.exit(\'Execution aborted: custom map in wrong format\')\n+ elif args.yes_no == \'no\':\n+ core_map = ET.parse(args.tool_dir + \'/local/HMRcoreMap.svg\')\n+ else: \n+ core_map = ET.parse(args.tool_dir+\'/local/HMRcoreMap.svg\')\n+ \n+ maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf)\n+ \n+ print(\'Execution succeded\')\n+\n+ return None\n+\n+###############################################################################\n+\n+if __name__ == "__main__":\n+ main()\n' |
b |
diff -r 1a0c8c2780f2 -r d0e7f14b773f marea-1.0.1/marea.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/marea-1.0.1/marea.xml Tue Oct 01 06:03:12 2019 -0400 |
[ |
b'@@ -0,0 +1,223 @@\n+<tool id="MaREA" name="Metabolic Enrichment Analysis" version="1.0.1">\n+ <description>for Galaxy - 1.0.1</description>\n+ <macros>\n+ <import>marea_macros.xml</import>\n+ </macros>\n+ <requirements>\n+ <requirement type="package" version="0.23.0">pandas</requirement>\n+ <requirement type="package" version="1.1.0">scipy</requirement>\n+ <requirement type="package" version="0.10.1">cobra</requirement>\n+ <requirement type="package" version="4.2.1">lxml</requirement>\n+ <requirement type="package" version="0.8.1">svglib</requirement>\n+ <requirement type="package" version="3.4.0">reportlab</requirement>\n+ </requirements>\n+ <command detect_errors="exit_code">\n+ <![CDATA[\n+ \tpython $__tool_directory__/marea.py\n+ --rules_selector $cond_rule.rules_selector\n+ #if $cond_rule.rules_selector == \'Custom\':\n+ --custom ${cond_rule.Custom_rules}\n+ --yes_no ${cond_rule.cond_map.yes_no}\n+ #if $cond_rule.cond_map.yes_no == \'yes\':\n+ --custom_map $cond_rule.cond_map.Custom_map\n+ #end if\n+ #end if\n+\t#if $advanced.choice == \'true\':\n+ \t --none ${advanced.None}\n+ \t --pValue ${advanced.pValue}\n+ \t --fChange ${advanced.fChange}\n+\t --generate_svg ${advanced.generateSvg}\n+\t --generate_pdf ${advanced.generatePdf}\n+\t --generate_ras ${advanced.generateRas}\n+\t#else \n+\t --none true\n+\t --pValue 0.05\n+\t --fChange 1.5\n+\t --generate_svg false\n+\t --generate_pdf true\n+\t --generate_ras false\n+\t#end if\n+ \t--tool_dir $__tool_directory__\n+ \t--option $cond.type_selector\n+ --out_log $log\t\t\n+\t\n+ #if $cond.type_selector == \'datasets\':\n+ --input_datas\n+ #for $data in $cond.input_Datasets:\n+ ${data.input}\n+ #end for\n+ --names\n+ #for $data in $cond.input_Datasets:\n+ ${data.input_name}\n+ #end for\n+ #elif $cond.type_selector == \'dataset_class\':\n+ --input_data ${input_data}\n+ --input_class ${input_class}\n+ #end if\n+ ]]>\n+ </command>\n+\n+ <inputs>\n+ <conditional name="cond_rule">\n+ <expand macro="options"/>\n+ <when value="HMRcore">\n+ </when>\n+ <when value="Recon">\n+ </when>\n+ <when value="Custom">\n+ <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />\n+ <conditional name="cond_map">\n+ <param name="yes_no" type="select" label="Custom map? (optional)">\n+ <option value="no" selected="true">no</option>\n+ <option value="yes">yes</option>\n+ </param>\n+ <when value="yes">\n+ <param name="Custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/>\n+ </when>\n+ <when value="no">\n+ </when>\n+ </conditional>\n+ </when>\n+ </conditional>\n+ <conditional name="cond">\n+ <param name="type_selector" argument="--option" type="select" label="Input format:">\n+ <option value="datasets" selected="true">RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N</option>\n+ <option value="dataset_class">RNAseq of all samples + sample group specification</option>\n+ </param>\n+ <when value="datasets">\n+ <repeat name="input_Datasets" title="RNAseq" min="2">\n+ <param name="input" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" />\t\n+ <param name="input_name" argument="--names" type="text" label="Dataset\'s name:" value="Dataset" help="Default: Dataset" />\n+ </repeat>\n+ '..b'ity Score for each table" help="Generate Reaction Activity Score for each table" />\t\t\n+\t\t</when>\n+ \t</conditional>\n+ </inputs>\n+\n+ <outputs>\n+ <data format="txt" name="log" label="${tool.name} - Log" />\n+ <collection name="results" type="list" label="${tool.name} - Results">\n+ <discover_datasets pattern="__name_and_ext__" directory="result"/>\n+ </collection>\n+\t<collection name="ras" type="list" label="${tool.name} - RAS" format_source="tabular">\n+\t <filter>advanced[\'choice\'] and advanced[\'generateRas\']</filter>\n+ \t <discover_datasets pattern="__name_and_ext__" directory="ras" format="tabular"/>\n+\t</collection>\n+ </outputs>\n+ <tests>\n+ <test>\n+ <param name="pValue" value="0.56"/>\n+ <output name="log" file="log.txt"/>\n+ </test>\n+ </tests>\n+ <help>\n+<![CDATA[\n+\n+What it does\n+-------------\n+\n+This tool analyzes RNA-seq dataset(s) as described in Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724.\n+\n+Accepted files are: \n+ - option 1) two or more RNA-seq datasets, each referring to samples in a given condition/class. The user can specify a label for each class (as e.g. "*classA*" and "*classB*");\n+ - option 2) one RNA dataset and one class-file specifying the class/condition each sample belongs to.\n+\n+Optional files:\n+ - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats:\n+\n+\t* (Cobra Toolbox and CobraPy compliant) xml of metabolic model;\n+\t* .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2).\n+ - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example.\n+\n+The tool generates:\n+ 1) a tab-separated file: reporting fold-change and p-values of reaction activity scores (RASs) between a pair of conditions/classes;\n+ 2) a metabolic map file (downlodable as .svg): visualizing up- and down-regulated reactions between a pair of conditions/classes;\n+ 3) a log file (.txt).\n+\n+RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID.\n+\n+Class-file format: each row of the class-file reports the sample ID (column1) and the label of the class/condition the sample belongs to (column 2).\n+\n+To calculate P-Values and Fold-Changes and to generate maps, comparisons are performed for each possible pair of classes.\n+\n+Output files will be named as classA_vs_classB. Reactions will conventionally be reported as up-regulated (down-regulated) if they are significantly more (less) active in class having label "classA".\n+\n+\n+Example input\n+-------------\n+\n+**"Custom Rules"** option:\n+\n+Custom Rules Dastaset:\n+\n+@CUSTOM_RULES_EXEMPLE@\n+\n+**"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N"** option:\n+\n+RNA-seq Dataset 1:\t\t\t\t\t\t\n+\n+@DATASET_EXEMPLE1@\n+\n+RNA-seq Dataset 2:\n+\n+@DATASET_EXEMPLE2@\n+\n+**"RNAseq of all samples + sample group specification"** option:\n+\n+RNA-seq Dataset:\n+\n+@DATASET_EXEMPLE1@\n+\n+Class-file:\n+\n++------------+------------+ \n+| Patient_ID | class | \n++============+============+ \n+| TCGAAA3529 | MSI | \n++------------+------------+ \n+| TCGAA62671 | MSS | \n++------------+------------+ \n+| TCGAA62672 | MSI | \n++------------+------------+\n+\n+|\n+\n+.. class:: infomark\n+\n+**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.\n+\n+.. class:: infomark\n+\n+**TIP**: If your dataset is not split into classes, use `MaREA cluster analysis`_.\n+\n+@REFERENCE@\n+\n+.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724\n+.. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj\n+.. _MaREA cluster analysis: http://link del tool di cluster.org\n+\n+]]>\n+ </help>\n+ <expand macro="citations" />\n+</tool>\n+\t\n' |
b |
diff -r 1a0c8c2780f2 -r d0e7f14b773f marea-1.0.1/marea_cluster.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/marea-1.0.1/marea_cluster.py Tue Oct 01 06:03:12 2019 -0400 |
[ |
b'@@ -0,0 +1,417 @@\n+# -*- coding: utf-8 -*-\n+"""\n+Created on Mon Jun 3 19:51:00 2019\n+\n+@author: Narger\n+"""\n+\n+import sys\n+import argparse\n+import os\n+from sklearn.datasets import make_blobs\n+from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering\n+from sklearn.metrics import silhouette_samples, silhouette_score, davies_bouldin_score, cluster\n+import matplotlib.pyplot as plt\n+import scipy.cluster.hierarchy as shc \n+import matplotlib.cm as cm\n+import numpy as np\n+import pandas as pd\n+\n+################################# process args ###############################\n+\n+def process_args(args):\n+ parser = argparse.ArgumentParser(usage = \'%(prog)s [options]\',\n+ description = \'process some value\\\'s\' +\n+ \' genes to create class.\')\n+\n+ parser.add_argument(\'-ol\', \'--out_log\', \n+ help = "Output log")\n+ \n+ parser.add_argument(\'-in\', \'--input\',\n+ type = str,\n+ help = \'input dataset\')\n+ \n+ parser.add_argument(\'-cy\', \'--cluster_type\',\n+ type = str,\n+ choices = [\'kmeans\', \'meanshift\', \'dbscan\', \'hierarchy\'],\n+ default = \'kmeans\',\n+ help = \'choose clustering algorythm\')\n+ \n+ parser.add_argument(\'-k1\', \'--k_min\', \n+ type = int,\n+ default = 2,\n+ help = \'choose minimun cluster number to be generated\')\n+ \n+ parser.add_argument(\'-k2\', \'--k_max\', \n+ type = int,\n+ default = 7,\n+ help = \'choose maximum cluster number to be generated\')\n+ \n+ parser.add_argument(\'-el\', \'--elbow\', \n+ type = str,\n+ default = \'false\',\n+ choices = [\'true\', \'false\'],\n+ help = \'choose if you want to generate an elbow plot for kmeans\')\n+ \n+ parser.add_argument(\'-si\', \'--silhouette\', \n+ type = str,\n+ default = \'false\',\n+ choices = [\'true\', \'false\'],\n+ help = \'choose if you want silhouette plots\')\n+ \n+ parser.add_argument(\'-db\', \'--davies\', \n+ type = str,\n+ default = \'false\',\n+ choices = [\'true\', \'false\'],\n+ help = \'choose if you want davies bouldin scores\')\n+ \n+ parser.add_argument(\'-td\', \'--tool_dir\',\n+ type = str,\n+ required = True,\n+ help = \'your tool directory\')\n+ \n+ parser.add_argument(\'-ms\', \'--min_samples\',\n+ type = int,\n+ help = \'min samples for dbscan (optional)\')\n+ \n+ parser.add_argument(\'-ep\', \'--eps\',\n+ type = int,\n+ help = \'eps for dbscan (optional)\')\n+ \n+ \n+ args = parser.parse_args()\n+ return args\n+\n+########################### warning ###########################################\n+\n+def warning(s):\n+ args = process_args(sys.argv)\n+ with open(args.out_log, \'a\') as log:\n+ log.write(s + "\\n\\n")\n+ print(s)\n+\n+########################## read dataset ######################################\n+ \n+def read_dataset(dataset):\n+ try:\n+ dataset = pd.read_csv(dataset, sep = \'\\t\', header = 0)\n+ except pd.errors.EmptyDataError:\n+ sys.exit(\'Execution aborted: wrong format of dataset\\n\')\n+ if len(dataset.columns) < 2:\n+ sys.exit(\'Execution aborted: wrong format of dataset\\n\')\n+ return dataset\n+\n+############################ rewrite_input ###################################\n+ \n+def rewrite_input(dataset):\n+ #Riscrivo il dataset come dizionario di liste, \n+ #non come dizionario di dizionari\n+ \n+ for'..b' warning("For n_clusters =" + str(n_clusters_) + \n+ "The average silhouette_score is :" + str(silhouette_avg))\n+ \n+ ##TODO: PLOT SU DBSCAN (no centers) e HIERARCHICAL\n+\n+ # Black removed and is used for noise instead.\n+ unique_labels = set(labels)\n+ colors = [plt.cm.Spectral(each)\n+ for each in np.linspace(0, 1, len(unique_labels))]\n+ for k, col in zip(unique_labels, colors):\n+ if k == -1:\n+ # Black used for noise.\n+ col = [0, 0, 0, 1]\n+\n+ class_member_mask = (labels == k)\n+ \n+ xy = dataset[class_member_mask & core_samples_mask]\n+ plt.plot(xy[:, 0], xy[:, 1], \'o\', markerfacecolor=tuple(col),\n+ markeredgecolor=\'k\', markersize=14)\n+ \n+ xy = dataset[class_member_mask & ~core_samples_mask]\n+ plt.plot(xy[:, 0], xy[:, 1], \'o\', markerfacecolor=tuple(col),\n+ markeredgecolor=\'k\', markersize=6)\n+\n+ plt.title(\'Estimated number of clusters: %d\' % n_clusters_)\n+ s = \'clustering/dbscan_output/dbscan_plot.png\'\n+ fig = plt.gcf()\n+ fig.set_size_inches(18.5, 10.5, forward = True)\n+ fig.savefig(s, dpi=100)\n+ \n+ \n+ write_to_csv(dataset, labels, \'clustering/dbscan_output/dbscan_results.tsv\')\n+ \n+########################## hierachical #######################################\n+ \n+def hierachical_agglomerative(dataset, k_min, k_max):\n+\n+ if not os.path.exists(\'clustering/agglomerative_output\'):\n+ os.makedirs(\'clustering/agglomerative_output\')\n+ \n+ plt.figure(figsize=(10, 7)) \n+ plt.title("Customer Dendograms") \n+ shc.dendrogram(shc.linkage(dataset, method=\'ward\')) \n+ fig = plt.gcf()\n+ fig.savefig(\'clustering/agglomerative_output/dendogram.png\', dpi=200)\n+ \n+ range_n_clusters = [i for i in range(k_min, k_max+1)]\n+\n+ for n_clusters in range_n_clusters:\n+ \n+ cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity=\'euclidean\', linkage=\'ward\') \n+ cluster.fit_predict(dataset) \n+ cluster_labels = cluster.labels_\n+ \n+ silhouette_avg = silhouette_score(dataset, cluster_labels)\n+ warning("For n_clusters =", n_clusters,\n+ "The average silhouette_score is :", silhouette_avg)\n+ \n+ plt.clf()\n+ plt.figure(figsize=(10, 7)) \n+ plt.title("Agglomerative Hierarchical Clustering\\nwith " + str(n_clusters) + " clusters and " + str(silhouette_avg) + " silhouette score")\n+ plt.scatter(dataset[:,0], dataset[:,1], c = cluster_labels, cmap=\'rainbow\') \n+ s = \'clustering/agglomerative_output/hierachical_\' + str(n_clusters) + \'_clusters.png\'\n+ fig = plt.gcf()\n+ fig.set_size_inches(10, 7, forward = True)\n+ fig.savefig(s, dpi=200)\n+ \n+ write_to_csv(dataset, cluster_labels, \'clustering/agglomerative_output/agglomerative_hierarchical_with_\' + str(n_clusters) + \'_clusters.tsv\')\n+ \n+ \n+\n+ \n+############################# main ###########################################\n+\n+\n+def main():\n+ if not os.path.exists(\'clustering\'):\n+ os.makedirs(\'clustering\')\n+\n+ args = process_args(sys.argv)\n+ \n+ #Data read\n+ \n+ X = read_dataset(args.input)\n+ X = pd.DataFrame.to_dict(X, orient=\'list\')\n+ X = rewrite_input(X)\n+ X = pd.DataFrame.from_dict(X, orient = \'index\')\n+ \n+ for i in X.columns:\n+ tmp = X[i][0]\n+ if tmp == None:\n+ X = X.drop(columns=[i])\n+ \n+ X = pd.DataFrame.to_numpy(X)\n+ \n+ \n+ if args.cluster_type == \'kmeans\':\n+ kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.davies)\n+ \n+ if args.cluster_type == \'dbscan\':\n+ dbscan(X, args.eps, args.min_samples)\n+ \n+ if args.cluster_type == \'hierarchy\':\n+ hierachical_agglomerative(X, args.k_min, args.k_max)\n+ \n+##############################################################################\n+\n+if __name__ == "__main__":\n+ main()\n' |
b |
diff -r 1a0c8c2780f2 -r d0e7f14b773f marea-1.0.1/marea_cluster.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/marea-1.0.1/marea_cluster.xml Tue Oct 01 06:03:12 2019 -0400 |
[ |
@@ -0,0 +1,92 @@ +<tool id="MaREA_cluester" name="MaREA cluster analysis" version="1.0.1"> + <description>of Reaction Activity Scores - 1.0.1</description> + <macros> + <import>marea_macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="0.23.0">pandas</requirement> + <requirement type="package" version="1.1.0">scipy</requirement> + <requirement type="package" version="0.10.1">cobra</requirement> + <requirement type="package" version="0.21.3">scikit-learn</requirement> + <requirement type="package" version="2.2.2">matplotlib</requirement> + <requirement type="package" version="1.17">numpy</requirement> + </requirements> + <command detect_errors="exit_code"> + <![CDATA[ + python $__tool_directory__/marea_cluster.py + --input $input + --tool_dir $__tool_directory__ + --out_log $log + #if $data.clust_type == 'kmeans': + --k_min ${data.k_min} + --k_max ${data.k_max} + --elbow ${data.elbow} + --silhouette ${data.silhouette} + #end if + #if $data.clust_type == 'dbscan': + #if $data.dbscan_advanced.advanced == 'true' + --eps ${data.dbscan_advanced.eps} + --min_samples ${data.dbscan_advanced.min_samples} + #end if + #end if + #if $data.clust_type == 'hierarchy': + --k_min ${data.k_min} + --k_max ${data.k_max} + #end if + ]]> + </command> + <inputs> + <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" /> + + <conditional name="data"> + <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:"> + <option value="kmeans" selected="true">KMeans</option> + <option value="dbscan">DBSCAN</option> + <option value="hierarchy">Agglomerative Hierarchical</option> + </param> + <when value="kmeans"> + <param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" /> + <param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" /> + <param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/> + <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/> + </when> + <when value="dbscan"> + <conditional name="dbscan_advanced"> + <param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)"> + <option value="true">Yes</option> + <option value="false">No</option> + </param> + <when value="false"></when> + <when value="true"> + <param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" /> + <param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/> + + </when> + </conditional> + </when> + <when value="hierarchy"> + <param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" /> + <param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" /> + </when> + </conditional> + </inputs> + + <outputs> + <data format="txt" name="log" label="${tool.name} - Log" /> + <collection name="results" type="list" label="${tool.name} - Results"> + <discover_datasets pattern="__name_and_ext__" directory="clustering"/> + </collection> + </outputs> + <help> +<![CDATA[ + +What it does +------------- + + +]]> + </help> + <expand macro="citations" /> +</tool> + + |
b |
diff -r 1a0c8c2780f2 -r d0e7f14b773f marea-1.0.1/marea_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/marea-1.0.1/marea_macros.xml Tue Oct 01 06:03:12 2019 -0400 |
b |
@@ -0,0 +1,92 @@ +<macros> + + <xml name="options"> + <param name="rules_selector" argument="--rules_selector" type="select" label="Gene-Protein-Reaction rules:"> + <option value="HMRcore" selected="true">HMRcore rules</option> + <option value="Recon">Recon 2.2 rules</option> + <option value="Custom">Custom rules</option> + </param> + </xml> + + <token name="@CUSTOM_RULES_EXEMPLE@"> + ++--------------------+-------------------------------+ +| id | rule (with entrez-id) | ++====================+===============================+ +| SHMT1 | 155060 or 10357 | ++--------------------+-------------------------------+ +| NIT2 | 155060 or 100134869 | ++--------------------+-------------------------------+ +| GOT1_GOT2_GOT1L1_2 | 155060 and 100134869 or 10357 | ++--------------------+-------------------------------+ + +| + + </token> + + <token name="@DATASET_EXEMPLE1@"> + ++------------+------------+------------+------------+ +| Hugo_ID | TCGAA62670 | TCGAA62671 | TCGAA62672 | ++============+============+============+============+ +| HGNC:24086 | 0.523167 | 0.371355 | 0.925661 | ++------------+------------+------------+------------+ +| HGNC:24086 | 0.568765 | 0.765567 | 0.456789 | ++------------+------------+------------+------------+ +| HGNC:9876 | 0.876545 | 0.768933 | 0.987654 | ++------------+------------+------------+------------+ +| HGNC:9 | 0.456788 | 0.876543 | 0.876542 | ++------------+------------+------------+------------+ +| HGNC:23 | 0.876543 | 0.786543 | 0.897654 | ++------------+------------+------------+------------+ + +| + + </token> + + <token name="@DATASET_EXEMPLE2@"> + ++-------------+------------+------------+------------+ +| Hugo_Symbol | TCGAA62670 | TCGAA62671 | TCGAA62672 | ++=============+============+============+============+ +| A1BG | 0.523167 | 0.371355 | 0.925661 | ++-------------+------------+------------+------------+ +| A1CF | 0.568765 | 0.765567 | 0.456789 | ++-------------+------------+------------+------------+ +| A2M | 0.876545 | 0.768933 | 0.987654 | ++-------------+------------+------------+------------+ +| A4GALT | 0.456788 | 0.876543 | 0.876542 | ++-------------+------------+------------+------------+ +| M664Y65 | 0.876543 | 0.786543 | 0.897654 | ++-------------+------------+------------+------------+ + +| + + </token> + + <token name="@REFERENCE@"> + +This tool is developed by the `BIMIB`_ at the `Department of Informatics, Systems and Communications`_ of `University of Milan - Bicocca`_. + +.. _BIMIB: http://sito di bio.org +.. _Department of Informatics, Systems and Communications: http://www.disco.unimib.it/go/Home/English +.. _University of Milan - Bicocca: https://www.unimib.it/ + + </token> + + <xml name="citations"> + <citations> <!--esempio di citazione--> + <citation type="bibtex"> +@online{lh32017, + author = {Alex Graudenzi, Davide Maspero, Cluadio Isella, Marzia Di Filippo, Giancarlo Mauri, Enzo Medico, Marco Antoniotti, Chiara Damiani}, + year = {2018}, + title = {MaREA: Metabolic feature extraction, enrichment and visualization of RNAseq}, + publisher = {bioRxiv}, + journal = {bioRxiv}, + url = {https://www.biorxiv.org/content/early/2018/01/16/248724}, +} + </citation> + </citations> + </xml> + +</macros> |