Repository 'marea'
hg clone https://toolshed.g2.bx.psu.edu/repos/bimib/marea

Changeset 1:9e63d5f02d62 (2018-11-07)
Previous changeset 0:23ac9cf12788 (2018-11-06) Next changeset 2:3b3d0e5d0802 (2018-11-07)
Commit message:
Uploaded
modified:
Marea/marea.xml
Marea/marea_cluster.xml
added:
Marea/marea_macros.xml
b
diff -r 23ac9cf12788 -r 9e63d5f02d62 Marea/marea.xml
--- a/Marea/marea.xml Tue Nov 06 03:16:21 2018 -0500
+++ b/Marea/marea.xml Wed Nov 07 07:07:46 2018 -0500
[
b'@@ -1,15 +1,15 @@\n <tool id="MaREA" name="Metabolic Enrichment Analysis">\r\n     <description>for Galaxy</description>\r\n+    <macros>\r\n+        <import>marea_macros.xml</import>\r\n+    </macros>\r\n+    <expand macro="requirements" />\r\n     <requirements>\r\n-        <requirement type="package">pandas</requirement>\r\n-        <requirement type="package">scipy</requirement>\r\n         <requirement type="package">lxml</requirement>\r\n         <requirement type="package">svglib</requirement>\r\n         <requirement type="package">reportlab</requirement>\r\n-        <requirement type="package">cobrapy</requirement>\r\n-        <requirement type="package">python-libsbml</requirement>\r\n     </requirements>\r\n-    <command>\r\n+    <command detect_errors="exit_code">\r\n         <![CDATA[\r\n       \tpython $__tool_directory__/marea.py\r\n         --rules_selector $cond_rule.rules_selector\r\n@@ -41,60 +41,67 @@\n         #end if\r\n         ]]>\r\n     </command>\r\n+\r\n     <inputs>\r\n         <conditional name="cond_rule">\r\n-            <param name="rules_selector" type="select" label="Gene-Protein-Reaction rules:">\r\n-                <option value="HMRcore" selected="true">HMRcore rules</option>\r\n-                <option value="Recon">Recon 2.2 rules</option>\r\n-                <option value="Custom">Custom rules</option>\r\n-            </param>\r\n+            <expand macro="options" />\r\n+            <when value="HMRcore">\r\n+            </when>\r\n+            <when value="Recon">\r\n+            </when>\r\n             <when value="Custom">\r\n-                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules"/>\r\n+                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />\r\n                 <conditional name="cond_map">\r\n                     <param name="yes_no" type="select" label="Custom map? (optional)">\r\n                         <option value="no" selected="true">no</option>\r\n                         <option value="yes">yes</option>\r\n                     </param>\r\n                     <when value="yes">\r\n-                        <param name="Custom_map" type="data" format="xml, svg" label="custom-map.svg"/>\r\n+                        <param name="Custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/>\r\n+                    </when>\r\n+                    <when value="no">\r\n                     </when>\r\n                 </conditional>\r\n             </when>\r\n         </conditional>\r\n         <conditional name="cond">\r\n-            <param name="type_selector" type="select" label="Input format:">\r\n-                <option value="datasets" selected="true">RNAseq of group 1 + RNAseq of group 2 + \xe2\x80\xa6 + RNAseq of group N</option>\r\n+            <param name="type_selector" argument="--option" type="select" label="Input format:">\r\n+                <option value="datasets" selected="true">RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N</option>\r\n                 <option value="dataset_class">RNAseq of all samples + sample group specification</option>\r\n             </param>\r\n             <when value="datasets">\r\n                 <repeat name="input_Datasets" title="RNAseq" type="data" min="2">\r\n-                    <param name="input" type="data" format="tabular, csv, tsv" label="add dataset"/>\t\r\n-                    <param name="input_name" type="text" label="Dataset\'s name:" value="Dataset" help="Defalut: Dataset"/>\r\n+                    <param name="input" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" />\t\r\n+                    <param name="input_name" argument="--names" type="text" label="Dataset\'s name:" value="Dataset" help="Defalut: Dataset" />\r\n \t        </repeat>\r\n             </when>\r\n             <when value="dataset_class">\r\n-                <param name="input_data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples"/>\r\n-                <param name="input_class" type="data" format'..b'ion:\r\n+**"Custom Rules"** option:\r\n+\r\n+Custom Rules Dastaset:\r\n \r\n-Dataset 1:\t\t\t\t\t\t\r\n+@CUSTOM_RULES_EXEMPLE@\r\n+\r\n+**"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N"** option:\r\n \r\n-+------------+------------+------------+------------+   \r\n-|  Hugo_ID   | TCGAA62670 | TCGAA62671 | TCGAA62672 |   \r\n-+============+============+============+============+   \r\n-| HGNC:24086 |  0.523167  |  0.371355  |  0.925661  |   \r\n-+------------+------------+------------+------------+    \r\n-| HGNC:24086 |  0.568765  |  0.765567  |  0.456789  |    \r\n-+------------+------------+------------+------------+    \r\n-| HGNC:9876  |  0.876545  |  0.768933  |  0.987654  |   \r\n-+------------+------------+------------+------------+    \r\n-| HGNC:9     |  0.456788  |  0.876543  |  0.876542  |   \r\n-+------------+------------+------------+------------+    \r\n-| HGNC:23    |  0.876543  |  0.786543  |  0.897654  |   \r\n-+------------+------------+------------+------------+ \r\n-   \r\n-|\r\n+RNA-seq Dataset 1:\t\t\t\t\t\t\r\n+\r\n+@DATASET_EXEMPLE1@\r\n \r\n-Dataset 2:\r\n+RNA-seq Dataset 2:\r\n \r\n-+-------------+------------+------------+------------+\r\n-| Hugo_Symbol | TCGAA62670 | TCGAA62671 | TCGAA62672 |\r\n-+=============+============+============+============+\r\n-|    A1BG     |  0.523167  |  0.371355  |  0.925661  |\r\n-+-------------+------------+------------+------------+\r\n-|    A1CF     |  0.568765  |  0.765567  |  0.456789  |\r\n-+-------------+------------+------------+------------+\r\n-|     A2M     |  0.876545  |  0.768933  |  0.987654  |\r\n-+-------------+------------+------------+------------+\r\n-|    A4GALT   |  0.456788  |  0.876543  |  0.876542  |\r\n-+-------------+------------+------------+------------+\r\n-|   M664Y65   |  0.876543  |  0.786543  |  0.897654  |\r\n-+-------------+------------+------------+------------+\r\n-\r\n-|\r\n+@DATASET_EXEMPLE2@\r\n \r\n **"RNAseq of all samples + sample group specification"** option:\r\n \r\n-Dataset:\r\n+RNA-seq Dataset:\r\n \r\n-+------------+------------+------------+------------+   \r\n-|  Hugo_ID   | TCGAA62670 | TCGAA62671 | TCGAA62672 |   \r\n-+============+============+============+============+   \r\n-| HGNC:24086 |  0.523167  |  0.371355  |  0.925661  |   \r\n-+------------+------------+------------+------------+    \r\n-| HGNC:24086 |  0.568765  |  0.765567  |  0.456789  |    \r\n-+------------+------------+------------+------------+    \r\n-| HGNC:9876  |  0.876545  |  0.768933  |  0.987654  |   \r\n-+------------+------------+------------+------------+    \r\n-| HGNC:9     |  0.456788  |  0.876543  |  0.876542  |   \r\n-+------------+------------+------------+------------+    \r\n-| HGNC:23    |  0.876543  |  0.786543  |  0.897654  |   \r\n-+------------+------------+------------+------------+ \r\n-\r\n-|\r\n+@DATASET_EXEMPLE1@\r\n \r\n Class-file:\r\n \r\n@@ -199,42 +174,22 @@\n \r\n |\r\n \r\n-\r\n-\r\n-.. class:: warningmark\r\n-\r\n-This tool expects input datasets consisting of tab-delimited columns.\r\n-\r\n-\r\n .. class:: infomark\r\n \r\n-TIP: If your data is not TAB delimited, use `Convert delimiters to TAB`_.\r\n+**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.\r\n \r\n .. class:: infomark\r\n \r\n-TIP: If your dataset is not split into classes, use `Cluster for MaREA`_.\r\n+**TIP**: If your dataset is not split into classes, use `MaREA cluster analysis`_.\r\n \r\n-This tool is developed by the `nome del gruppo di bioinformatica`_ at the `dipartimento di informatica disco`_.\r\n-\r\n+@REFERENCE@\r\n \r\n+.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724\r\n .. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj\r\n-.. _Cluster for MaREA: http://link del tool di cluster.org/\r\n-.. _nome del gruppo di bioinformatica: http://sito di bio.org\r\n-.. _dipartimento di informatica disco : http://www.disco.unimib.it/go/Home/English\r\n+.. _MaREA cluster analysis: http://link del tool di cluster.org\r\n \r\n ]]>\r\n     </help>\r\n+    <expand macro="citations" />\r\n </tool>\r\n \t\r\n-\t\r\n-\t\r\n-\t\r\n-\t\r\n-\t\r\n-\t\r\n-\t\r\n-\t\r\n-\t\r\n-\t\r\n-\t\r\n-\t\r\n'
b
diff -r 23ac9cf12788 -r 9e63d5f02d62 Marea/marea_cluster.xml
--- a/Marea/marea_cluster.xml Tue Nov 06 03:16:21 2018 -0500
+++ b/Marea/marea_cluster.xml Wed Nov 07 07:07:46 2018 -0500
[
@@ -1,12 +1,12 @@
 <tool id="MaREA_cluester" name="MaREA cluster analysis">
     <description>of Reaction Activity Scores</description>
+    <macros>
+        <import>marea_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
     <requirements>
-        <requirement type="package">pandas</requirement>
         <requirement type="package">scikit-learn</requirement>
-        <requirement type="package">scipy</requirement>
         <requirement type="package">matplotlib</requirement>
-        <requirement type="package">cobrapy</requirement>
-        <requirement type="package">python-libsbml</requirement>
     </requirements>
     <command>
         <![CDATA[
@@ -32,27 +32,23 @@
     </command>
     <inputs>
         <conditional name="cond_rule">
-            <param name="rules_selector" type="select" label="Gene-Protein-Reaction rules:">
-                <option value="HMRcore" selected="true">HMRcore rules</option>
-                <option value="Recon">Recon 2.2 rules</option>
-                <option value="Custom">Custom rules</option>
-            </param>
+            <expand macro="options" />
             <when value="Custom">
-                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules"/>
+                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />
             </when>
         </conditional>
-        <param name="input" type="data" format="tabular, csv, tsv" label="RNAseq of all samples"/>
-        <param name="name" type="text" label="Output name prefix" value = "dataset"/>
-        <param name="k_min" type="integer" size="20" value="3" min="2" max="30" label="min number of clusters (k) to be tested (k-means)"/>
-        <param name="k_max" type="integer" size="20" value="3" min="2" max="30" label="max number of clusters (k) to be tested (k-means)"/>
-        <param name="None" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" help="if NO is selected (A and NaN) is solved as (NaN)"/>
+        <param name="input" argument="--data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" />
+        <param name="name" argument="--name" type="text" label="Output name prefix" value="dataset" />
+        <param name="k_min" argument="--k_min" type="integer" size="20" value="3" min="2" max="30" label="Min number of clusters (k) to be tested (k-means)"/>
+        <param name="k_max" argument="--k_max" type="integer" size="20" value="3" min="2" max="30" label="Max number of clusters (k) to be tested (k-means)"/>
+        <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" help="If NO is selected, (A and NaN) is solved as (NaN)" />
  <conditional name="cond_hier">
-            <param name="hier" type="select" label="Produce dendrogram (hierarchical clustering):">
+            <param name="hier" argument="--cond_hier" type="select" label="Produce dendrogram (hierarchical clustering):">
                 <option value="no" selected="true">no</option>
                 <option value="yes">yes</option>
             </param>
             <when value="yes">
-                <param name="linkage" type="select" label="Linkage type:">
+                <param name="linkage" argument="--linkage" type="select" label="Linkage type:">
                     <option value="single" selected="true">Single: minimum distance between all observations of two sets</option>
                     <option value="complete">Complete: maximum distance between all observations of two sets</option>
                     <option value="average">Average: average distance between all observations of two sets</option>
@@ -60,38 +56,80 @@
             </when>
         </conditional>
     </inputs>
+
     <outputs>
-        <data format="txt" name="log" label="Log"/>
+        <data format="txt" name="log" label="Log" />
         <data format="pdf" name="dendrogram" label="$name dendrogram">
             <filter>cond_hier['hier'] == 'yes'</filter>
         </data>
-        <data format="pdf" name="elbow" label="$name elbow evaluation method"/>
+        <data format="pdf" name="elbow" label="$name elbow evaluation method" />
         <collection name="cluster_out" type="list" label="Clusters $k_min - $k_max">
-            <discover_datasets pattern="__name_and_ext__" directory="cluster_out"/>
+            <discover_datasets pattern="__name_and_ext__" directory="cluster_out" />
         </collection>
     </outputs>
+
     <help>
+<![CDATA[
+
+What it does
+-------------
+
+This tool performs cluster analysis of RNA-seq dataset(s) based of Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724.
+
+Accepted files are:
+    1) For "Recon 2.2 rules" or "HMRcore rules" options: RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*");
+    2) For "Custom rules" option: custom rules dataset, custom map (.svg) and RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*").
+
+Optional files:
+    - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats:
+
+        * (Cobra Toolbox and CobraPy compliant) xml of metabolic model;
+        * .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2).
+    - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example.
 
-.. class:: warningmark
+The tool generates:
+    1) Clusters n1 - n2 (n1 and n2 refer to min and max number of clusters): class-files (as many files as the chosen different number of clusters k to be tested) specifying the class/condition each sample belongs to;
+    2) Log: a log file (.txt);
+    3) *dataset* elbow evaluation method: diagram (.pdf) of elbow evaluation method;
+    4) *dataset* dendrogram (optional): dendrogram (.pdf) if the user chooses to produce a dendrogram (hierachical clustering).
+
+RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID.
+
+
+Example input
+-------------
 
-This tool expects input datasets consisting of tab-delimited columns.
+**RNA-seq dataset**:
+
+@DATASET_EXEMPLE@
+
+**Custom Rules Dataset**:
+
+@CUSTOM_RULES_EXEMPLE@
+
+**Custom Map**:
+
+*see the generated HMRcore .svg map for example*
+
+
 
 .. class:: infomark
 
-**TIP:** If your data is not TAB delimited, use *Text Manipulation > Convert delimiters to TAB*
+**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.
+
+.. class:: warningmark
+
+If dendrogram it's too populated, each path and label can be not clear.
+
+@REFERENCE@
 
+.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724
+.. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj
+
+
+]]>
     </help>
+    <expand macro="citations" />
 </tool>
 
 
-
-
-
-
-
-
-
-
-
-
-
b
diff -r 23ac9cf12788 -r 9e63d5f02d62 Marea/marea_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Marea/marea_macros.xml Wed Nov 07 07:07:46 2018 -0500
b
@@ -0,0 +1,101 @@
+<macros>
+
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package">pandas</requirement>
+            <requirement type="package">scipy</requirement>
+            <requirement type="package">cobrapy</requirement>
+            <requirement type="package">python-libsbml</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="options">
+        <param name="rules_selector" argument="--rules_selector" type="select" label="Gene-Protein-Reaction rules:">
+            <option value="HMRcore" selected="true">HMRcore rules</option>
+            <option value="Recon">Recon 2.2 rules</option>
+            <option value="Custom">Custom rules</option>
+        </param>
+    </xml>
+
+   <token name="@CUSTOM_RULES_EXEMPLE@">
+
++--------------------+-------------------------------+
+|         id         |     rule (with entrez-id)     |
++====================+===============================+
+|        SHMT1       |        155060 or 10357        |
++--------------------+-------------------------------+
+|        NIT2        |      155060 or 100134869      |
++--------------------+-------------------------------+
+| GOT1_GOT2_GOT1L1_2 | 155060 and 100134869 or 10357 |
++--------------------+-------------------------------+
+
+|
+
+    </token>
+
+    <token name="@DATASET_EXEMPLE1@">
+
++------------+------------+------------+------------+   
+|  Hugo_ID   | TCGAA62670 | TCGAA62671 | TCGAA62672 |   
++============+============+============+============+   
+| HGNC:24086 |  0.523167  |  0.371355  |  0.925661  |   
++------------+------------+------------+------------+    
+| HGNC:24086 |  0.568765  |  0.765567  |  0.456789  |    
++------------+------------+------------+------------+    
+| HGNC:9876  |  0.876545  |  0.768933  |  0.987654  |   
++------------+------------+------------+------------+    
+| HGNC:9     |  0.456788  |  0.876543  |  0.876542  |   
++------------+------------+------------+------------+    
+| HGNC:23    |  0.876543  |  0.786543  |  0.897654  |   
++------------+------------+------------+------------+ 
+   
+|
+
+    </token>
+
+    <token name="@DATASET_EXEMPLE2@">
+
++-------------+------------+------------+------------+
+| Hugo_Symbol | TCGAA62670 | TCGAA62671 | TCGAA62672 |
++=============+============+============+============+
+|    A1BG     |  0.523167  |  0.371355  |  0.925661  |
++-------------+------------+------------+------------+
+|    A1CF     |  0.568765  |  0.765567  |  0.456789  |
++-------------+------------+------------+------------+
+|     A2M     |  0.876545  |  0.768933  |  0.987654  |
++-------------+------------+------------+------------+
+|    A4GALT   |  0.456788  |  0.876543  |  0.876542  |
++-------------+------------+------------+------------+
+|   M664Y65   |  0.876543  |  0.786543  |  0.897654  |
++-------------+------------+------------+------------+
+
+|
+
+    </token>
+
+    <token name="@REFERENCE@">
+
+This tool is developed by the `BIMIB`_ at the `Department of Informatics, Systems and Communications`_ of `University of Milan - Bicocca`_. Development team: Irene Sala, Luca Rosato, Davide Maspero, Chiara Damiani.
+
+.. _BIMIB: http://sito di bio.org
+.. _Department of Informatics, Systems and Communications: http://www.disco.unimib.it/go/Home/English
+.. _University of Milan - Bicocca: https://www.unimib.it/
+
+    </token>
+
+    <xml name="citations">
+        <citations> <!--esempio di citazione-->
+            <citation type="bibtex">
+@online{lh32017,
+  author = {Alex Graudenzi, Davide Maspero, Cluadio Isella, Marzia Di Filippo, Giancarlo Mauri, Enzo Medico, Marco Antoniotti, Chiara Damiani},
+  year = {2018},
+  title = {MaREA: Metabolic feature extraction, enrichment and visualization of RNAseq},
+  publisher = {bioRxiv},
+  journal = {bioRxiv},
+  url = {https://www.biorxiv.org/content/early/2018/01/16/248724},
+}
+            </citation>
+        </citations>
+    </xml>
+
+</macros>