Mercurial > repos > bimib > marea
changeset 1:9e63d5f02d62 draft
Uploaded
author | bimib |
---|---|
date | Wed, 07 Nov 2018 07:07:46 -0500 |
parents | 23ac9cf12788 |
children | 3b3d0e5d0802 |
files | Marea/marea.xml Marea/marea_cluster.xml Marea/marea_macros.xml |
diffstat | 3 files changed, 242 insertions(+), 148 deletions(-) [+] |
line wrap: on
line diff
--- a/Marea/marea.xml Tue Nov 06 03:16:21 2018 -0500 +++ b/Marea/marea.xml Wed Nov 07 07:07:46 2018 -0500 @@ -1,15 +1,15 @@ <tool id="MaREA" name="Metabolic Enrichment Analysis"> <description>for Galaxy</description> + <macros> + <import>marea_macros.xml</import> + </macros> + <expand macro="requirements" /> <requirements> - <requirement type="package">pandas</requirement> - <requirement type="package">scipy</requirement> <requirement type="package">lxml</requirement> <requirement type="package">svglib</requirement> <requirement type="package">reportlab</requirement> - <requirement type="package">cobrapy</requirement> - <requirement type="package">python-libsbml</requirement> </requirements> - <command> + <command detect_errors="exit_code"> <![CDATA[ python $__tool_directory__/marea.py --rules_selector $cond_rule.rules_selector @@ -41,60 +41,67 @@ #end if ]]> </command> + <inputs> <conditional name="cond_rule"> - <param name="rules_selector" type="select" label="Gene-Protein-Reaction rules:"> - <option value="HMRcore" selected="true">HMRcore rules</option> - <option value="Recon">Recon 2.2 rules</option> - <option value="Custom">Custom rules</option> - </param> + <expand macro="options" /> + <when value="HMRcore"> + </when> + <when value="Recon"> + </when> <when value="Custom"> - <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules"/> + <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" /> <conditional name="cond_map"> <param name="yes_no" type="select" label="Custom map? (optional)"> <option value="no" selected="true">no</option> <option value="yes">yes</option> </param> <when value="yes"> - <param name="Custom_map" type="data" format="xml, svg" label="custom-map.svg"/> + <param name="Custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/> + </when> + <when value="no"> </when> </conditional> </when> </conditional> <conditional name="cond"> - <param name="type_selector" type="select" label="Input format:"> - <option value="datasets" selected="true">RNAseq of group 1 + RNAseq of group 2 + … + RNAseq of group N</option> + <param name="type_selector" argument="--option" type="select" label="Input format:"> + <option value="datasets" selected="true">RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N</option> <option value="dataset_class">RNAseq of all samples + sample group specification</option> </param> <when value="datasets"> <repeat name="input_Datasets" title="RNAseq" type="data" min="2"> - <param name="input" type="data" format="tabular, csv, tsv" label="add dataset"/> - <param name="input_name" type="text" label="Dataset's name:" value="Dataset" help="Defalut: Dataset"/> + <param name="input" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" /> + <param name="input_name" argument="--names" type="text" label="Dataset's name:" value="Dataset" help="Defalut: Dataset" /> </repeat> </when> <when value="dataset_class"> - <param name="input_data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples"/> - <param name="input_class" type="data" format="tabular, csv, tsv" label="Sample group specification"/> + <param name="input_data" argument="--input_data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" /> + <param name="input_class" argument="--input_class" type="data" format="tabular, csv, tsv" label="Sample group specification" /> </when> </conditional> - <param name="None" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?"/> - <param name="pValue" type="float" size="20" value="0.05" max="1" min="0" label="P-value threshold" help="min value 0"/> - <param name="fChange" type="float" size="20" value="1.5" min="1" label="Fold-Cahnge threshold" help="min value 1"/> + <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> + <param name="pValue" argument="--pValue" type="float" size="20" value="0.05" max="1" min="0" label="P-value threshold" help="min value 0" /> + <param name="fChange" argument="--fChange" type="float" size="20" value="1.5" min="1" label="Fold-Change threshold" help="min value 1" /> </inputs> + <outputs> - <data format="txt" name="log" label="Log"/> - <collection name="map_svg" type="list" label="file svg"> + <data format="txt" name="log" label="Log" /> + <collection name="map_svg" type="list" label="Graphical results (.svg)"> <filter>(cond_rule['rules_selector'] == 'HMRcore') or ((cond_rule['rules_selector'] == 'Custom') and (cond_rule['cond_map']['yes_no'] == 'yes'))</filter> - <discover_datasets pattern="__name_and_ext__" directory="map_svg"/> + <discover_datasets pattern="__name_and_ext__" directory="map_svg" /> </collection> - <collection name="map_pdf" type="list" label="file pdf"> + <collection name="map_pdf" type="list" label="Graphical results (.pdf)"> <filter>(cond_rule['rules_selector'] == 'HMRcore') or ((cond_rule['rules_selector'] == 'Custom') and (cond_rule['cond_map']['yes_no'] == 'yes'))</filter> - <discover_datasets pattern="__name_and_ext__" directory="map_pdf"/> + <discover_datasets pattern="__name_and_ext__" directory="map_pdf" /> </collection> - <collection name="table_out" type="list" label="file table"> - <discover_datasets pattern="__name_and_ext__" directory="table_out"/> + <collection name="table_out" type="list" label="Tabular results"> + <discover_datasets pattern="__name_and_ext__" directory="table_out" /> </collection> </outputs> + + + <help> <![CDATA[ @@ -103,87 +110,55 @@ This tool analyzes RNA-seq dataset(s) as described in Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724. -The tool can be used to generate: - 1) a tab-separated file: reporting fold-change and p-values of reaction activity scores (RASs) between a pair of conditions/classes - 2) a metabolic map file (downlodable as .svg): visualizing up- and down-regulated reactions between a pair of conditions/classes - 3) a log file (.txt) +Accepted files are: + - option 1) two or more RNA-seq datasets, each referring to samples in a given condition/class. The user can specify a label for each class (as e.g. "*classA*" and "*classB*"); + - option 2) one RNA dataset and one class-file specifying the class/condition each sample belongs to. + +Optional files: + - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats: -Accepted files are: - 1) or two or more RNA-seq datasets, each referring to samples in a given condition/class. The user can specify a label for each class (as e.g. “classA” and “classB”). - 2) or one RNA dataset and one class-file specifying the class/condition each sample belongs to. + * (Cobra Toolbox and CobraPy compliant) xml of metabolic model; + * .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2). + - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example. +The tool generates: + 1) a tab-separated file: reporting fold-change and p-values of reaction activity scores (RASs) between a pair of conditions/classes; + 2) a metabolic map file (downlodable as .svg): visualizing up- and down-regulated reactions between a pair of conditions/classes; + 3) a log file (.txt). -RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, …) of each gene (row) for a given sample (column). Header: sample ID. +RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID. Class-file format: each row of the class-file reports the sample ID (column1) and the label of the class/condition the sample belongs to (column 2). To calculate P-Values and Fold-Changes and to generate maps, comparisons are performed for each possible pair of classes. -Output files will be named as classA_vs_classB. Reactions will conventionally be reported as up-regulated (down-regulated) if they are significantly more (less) active in class having label “classA”. - -.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724 +Output files will be named as classA_vs_classB. Reactions will conventionally be reported as up-regulated (down-regulated) if they are significantly more (less) active in class having label "classA". Example input ------------- -**"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N" exemple input"** option: +**"Custom Rules"** option: + +Custom Rules Dastaset: -Dataset 1: +@CUSTOM_RULES_EXEMPLE@ + +**"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N"** option: -+------------+------------+------------+------------+ -| Hugo_ID | TCGAA62670 | TCGAA62671 | TCGAA62672 | -+============+============+============+============+ -| HGNC:24086 | 0.523167 | 0.371355 | 0.925661 | -+------------+------------+------------+------------+ -| HGNC:24086 | 0.568765 | 0.765567 | 0.456789 | -+------------+------------+------------+------------+ -| HGNC:9876 | 0.876545 | 0.768933 | 0.987654 | -+------------+------------+------------+------------+ -| HGNC:9 | 0.456788 | 0.876543 | 0.876542 | -+------------+------------+------------+------------+ -| HGNC:23 | 0.876543 | 0.786543 | 0.897654 | -+------------+------------+------------+------------+ - -| +RNA-seq Dataset 1: + +@DATASET_EXEMPLE1@ -Dataset 2: +RNA-seq Dataset 2: -+-------------+------------+------------+------------+ -| Hugo_Symbol | TCGAA62670 | TCGAA62671 | TCGAA62672 | -+=============+============+============+============+ -| A1BG | 0.523167 | 0.371355 | 0.925661 | -+-------------+------------+------------+------------+ -| A1CF | 0.568765 | 0.765567 | 0.456789 | -+-------------+------------+------------+------------+ -| A2M | 0.876545 | 0.768933 | 0.987654 | -+-------------+------------+------------+------------+ -| A4GALT | 0.456788 | 0.876543 | 0.876542 | -+-------------+------------+------------+------------+ -| M664Y65 | 0.876543 | 0.786543 | 0.897654 | -+-------------+------------+------------+------------+ - -| +@DATASET_EXEMPLE2@ **"RNAseq of all samples + sample group specification"** option: -Dataset: +RNA-seq Dataset: -+------------+------------+------------+------------+ -| Hugo_ID | TCGAA62670 | TCGAA62671 | TCGAA62672 | -+============+============+============+============+ -| HGNC:24086 | 0.523167 | 0.371355 | 0.925661 | -+------------+------------+------------+------------+ -| HGNC:24086 | 0.568765 | 0.765567 | 0.456789 | -+------------+------------+------------+------------+ -| HGNC:9876 | 0.876545 | 0.768933 | 0.987654 | -+------------+------------+------------+------------+ -| HGNC:9 | 0.456788 | 0.876543 | 0.876542 | -+------------+------------+------------+------------+ -| HGNC:23 | 0.876543 | 0.786543 | 0.897654 | -+------------+------------+------------+------------+ - -| +@DATASET_EXEMPLE1@ Class-file: @@ -199,42 +174,22 @@ | - - -.. class:: warningmark - -This tool expects input datasets consisting of tab-delimited columns. - - .. class:: infomark -TIP: If your data is not TAB delimited, use `Convert delimiters to TAB`_. +**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_. .. class:: infomark -TIP: If your dataset is not split into classes, use `Cluster for MaREA`_. +**TIP**: If your dataset is not split into classes, use `MaREA cluster analysis`_. -This tool is developed by the `nome del gruppo di bioinformatica`_ at the `dipartimento di informatica disco`_. - +@REFERENCE@ +.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724 .. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj -.. _Cluster for MaREA: http://link del tool di cluster.org/ -.. _nome del gruppo di bioinformatica: http://sito di bio.org -.. _dipartimento di informatica disco : http://www.disco.unimib.it/go/Home/English +.. _MaREA cluster analysis: http://link del tool di cluster.org ]]> </help> + <expand macro="citations" /> </tool> - - - - - - - - - - - -
--- a/Marea/marea_cluster.xml Tue Nov 06 03:16:21 2018 -0500 +++ b/Marea/marea_cluster.xml Wed Nov 07 07:07:46 2018 -0500 @@ -1,12 +1,12 @@ <tool id="MaREA_cluester" name="MaREA cluster analysis"> <description>of Reaction Activity Scores</description> + <macros> + <import>marea_macros.xml</import> + </macros> + <expand macro="requirements" /> <requirements> - <requirement type="package">pandas</requirement> <requirement type="package">scikit-learn</requirement> - <requirement type="package">scipy</requirement> <requirement type="package">matplotlib</requirement> - <requirement type="package">cobrapy</requirement> - <requirement type="package">python-libsbml</requirement> </requirements> <command> <![CDATA[ @@ -32,27 +32,23 @@ </command> <inputs> <conditional name="cond_rule"> - <param name="rules_selector" type="select" label="Gene-Protein-Reaction rules:"> - <option value="HMRcore" selected="true">HMRcore rules</option> - <option value="Recon">Recon 2.2 rules</option> - <option value="Custom">Custom rules</option> - </param> + <expand macro="options" /> <when value="Custom"> - <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules"/> + <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" /> </when> </conditional> - <param name="input" type="data" format="tabular, csv, tsv" label="RNAseq of all samples"/> - <param name="name" type="text" label="Output name prefix" value = "dataset"/> - <param name="k_min" type="integer" size="20" value="3" min="2" max="30" label="min number of clusters (k) to be tested (k-means)"/> - <param name="k_max" type="integer" size="20" value="3" min="2" max="30" label="max number of clusters (k) to be tested (k-means)"/> - <param name="None" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" help="if NO is selected (A and NaN) is solved as (NaN)"/> + <param name="input" argument="--data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" /> + <param name="name" argument="--name" type="text" label="Output name prefix" value="dataset" /> + <param name="k_min" argument="--k_min" type="integer" size="20" value="3" min="2" max="30" label="Min number of clusters (k) to be tested (k-means)"/> + <param name="k_max" argument="--k_max" type="integer" size="20" value="3" min="2" max="30" label="Max number of clusters (k) to be tested (k-means)"/> + <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" help="If NO is selected, (A and NaN) is solved as (NaN)" /> <conditional name="cond_hier"> - <param name="hier" type="select" label="Produce dendrogram (hierarchical clustering):"> + <param name="hier" argument="--cond_hier" type="select" label="Produce dendrogram (hierarchical clustering):"> <option value="no" selected="true">no</option> <option value="yes">yes</option> </param> <when value="yes"> - <param name="linkage" type="select" label="Linkage type:"> + <param name="linkage" argument="--linkage" type="select" label="Linkage type:"> <option value="single" selected="true">Single: minimum distance between all observations of two sets</option> <option value="complete">Complete: maximum distance between all observations of two sets</option> <option value="average">Average: average distance between all observations of two sets</option> @@ -60,38 +56,80 @@ </when> </conditional> </inputs> + <outputs> - <data format="txt" name="log" label="Log"/> + <data format="txt" name="log" label="Log" /> <data format="pdf" name="dendrogram" label="$name dendrogram"> <filter>cond_hier['hier'] == 'yes'</filter> </data> - <data format="pdf" name="elbow" label="$name elbow evaluation method"/> + <data format="pdf" name="elbow" label="$name elbow evaluation method" /> <collection name="cluster_out" type="list" label="Clusters $k_min - $k_max"> - <discover_datasets pattern="__name_and_ext__" directory="cluster_out"/> + <discover_datasets pattern="__name_and_ext__" directory="cluster_out" /> </collection> </outputs> + <help> +<![CDATA[ + +What it does +------------- + +This tool performs cluster analysis of RNA-seq dataset(s) based of Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724. + +Accepted files are: + 1) For "Recon 2.2 rules" or "HMRcore rules" options: RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*"); + 2) For "Custom rules" option: custom rules dataset, custom map (.svg) and RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*"). + +Optional files: + - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats: + + * (Cobra Toolbox and CobraPy compliant) xml of metabolic model; + * .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2). + - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example. -.. class:: warningmark +The tool generates: + 1) Clusters n1 - n2 (n1 and n2 refer to min and max number of clusters): class-files (as many files as the chosen different number of clusters k to be tested) specifying the class/condition each sample belongs to; + 2) Log: a log file (.txt); + 3) *dataset* elbow evaluation method: diagram (.pdf) of elbow evaluation method; + 4) *dataset* dendrogram (optional): dendrogram (.pdf) if the user chooses to produce a dendrogram (hierachical clustering). + +RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID. + + +Example input +------------- -This tool expects input datasets consisting of tab-delimited columns. +**RNA-seq dataset**: + +@DATASET_EXEMPLE@ + +**Custom Rules Dataset**: + +@CUSTOM_RULES_EXEMPLE@ + +**Custom Map**: + +*see the generated HMRcore .svg map for example* + + .. class:: infomark -**TIP:** If your data is not TAB delimited, use *Text Manipulation > Convert delimiters to TAB* +**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_. + +.. class:: warningmark + +If dendrogram it's too populated, each path and label can be not clear. + +@REFERENCE@ +.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724 +.. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj + + +]]> </help> + <expand macro="citations" /> </tool> - - - - - - - - - - -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Marea/marea_macros.xml Wed Nov 07 07:07:46 2018 -0500 @@ -0,0 +1,101 @@ +<macros> + + <xml name="requirements"> + <requirements> + <requirement type="package">pandas</requirement> + <requirement type="package">scipy</requirement> + <requirement type="package">cobrapy</requirement> + <requirement type="package">python-libsbml</requirement> + </requirements> + </xml> + + <xml name="options"> + <param name="rules_selector" argument="--rules_selector" type="select" label="Gene-Protein-Reaction rules:"> + <option value="HMRcore" selected="true">HMRcore rules</option> + <option value="Recon">Recon 2.2 rules</option> + <option value="Custom">Custom rules</option> + </param> + </xml> + + <token name="@CUSTOM_RULES_EXEMPLE@"> + ++--------------------+-------------------------------+ +| id | rule (with entrez-id) | ++====================+===============================+ +| SHMT1 | 155060 or 10357 | ++--------------------+-------------------------------+ +| NIT2 | 155060 or 100134869 | ++--------------------+-------------------------------+ +| GOT1_GOT2_GOT1L1_2 | 155060 and 100134869 or 10357 | ++--------------------+-------------------------------+ + +| + + </token> + + <token name="@DATASET_EXEMPLE1@"> + ++------------+------------+------------+------------+ +| Hugo_ID | TCGAA62670 | TCGAA62671 | TCGAA62672 | ++============+============+============+============+ +| HGNC:24086 | 0.523167 | 0.371355 | 0.925661 | ++------------+------------+------------+------------+ +| HGNC:24086 | 0.568765 | 0.765567 | 0.456789 | ++------------+------------+------------+------------+ +| HGNC:9876 | 0.876545 | 0.768933 | 0.987654 | ++------------+------------+------------+------------+ +| HGNC:9 | 0.456788 | 0.876543 | 0.876542 | ++------------+------------+------------+------------+ +| HGNC:23 | 0.876543 | 0.786543 | 0.897654 | ++------------+------------+------------+------------+ + +| + + </token> + + <token name="@DATASET_EXEMPLE2@"> + ++-------------+------------+------------+------------+ +| Hugo_Symbol | TCGAA62670 | TCGAA62671 | TCGAA62672 | ++=============+============+============+============+ +| A1BG | 0.523167 | 0.371355 | 0.925661 | ++-------------+------------+------------+------------+ +| A1CF | 0.568765 | 0.765567 | 0.456789 | ++-------------+------------+------------+------------+ +| A2M | 0.876545 | 0.768933 | 0.987654 | ++-------------+------------+------------+------------+ +| A4GALT | 0.456788 | 0.876543 | 0.876542 | ++-------------+------------+------------+------------+ +| M664Y65 | 0.876543 | 0.786543 | 0.897654 | ++-------------+------------+------------+------------+ + +| + + </token> + + <token name="@REFERENCE@"> + +This tool is developed by the `BIMIB`_ at the `Department of Informatics, Systems and Communications`_ of `University of Milan - Bicocca`_. Development team: Irene Sala, Luca Rosato, Davide Maspero, Chiara Damiani. + +.. _BIMIB: http://sito di bio.org +.. _Department of Informatics, Systems and Communications: http://www.disco.unimib.it/go/Home/English +.. _University of Milan - Bicocca: https://www.unimib.it/ + + </token> + + <xml name="citations"> + <citations> <!--esempio di citazione--> + <citation type="bibtex"> +@online{lh32017, + author = {Alex Graudenzi, Davide Maspero, Cluadio Isella, Marzia Di Filippo, Giancarlo Mauri, Enzo Medico, Marco Antoniotti, Chiara Damiani}, + year = {2018}, + title = {MaREA: Metabolic feature extraction, enrichment and visualization of RNAseq}, + publisher = {bioRxiv}, + journal = {bioRxiv}, + url = {https://www.biorxiv.org/content/early/2018/01/16/248724}, +} + </citation> + </citations> + </xml> + +</macros>