view kegg_maps_visualization.xml @ 0:9845dc9c7323 draft

planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
author proteore
date Tue, 18 Dec 2018 10:02:54 -0500
parents
children 6f389729a30b
line wrap: on
line source

<tool id="kegg_maps_visualization" name="KEGG maps visualization" version="2018.12.18">
    <description>of (differentially expressed) genes/proteins</description>
    <requirements>
        <requirement type="package" version="1.18.0">bioconductor-pathview</requirement>
        <requirement type="package" version="1.18.0">bioconductor-keggrest</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        Rscript $__tool_directory__/kegg_maps_visualization.R 
        #if $input.ids == "text"
            --id_list="$input.txt"
        #else
            --input="$input.file"
            --id_column="$input.ncol"
            --header="$input.header"  
        #end if
        #if $species.pathways.pathways_id != "pathways_file"
            --pathways_id="$species.pathways.pids" 
        #else 
            --pathways_input="$species.pathways.file"
            --header2="$species.pathways.header2"
            --pathway_col="$species.pathways.ncol2"
        #end if
        --id_type="$id_type"  
        --native_kegg="$native"
        

        #if $input.ids=="file" and $input.foldchange.fc=="true"
            --fold_change_data="$input.foldchange.fc"
            --fold_change_col="$input.foldchange.fc_col"
        #else 
            --fold_change_data="false"
        #end if

        --species=${species.ref_file} 
        --pathways_list=$__tool_directory__/${ filter( lambda x: str( x[0] ) == str( $species.ref_file ), $__app__.tool_data_tables['kegg_pathways_list_index'].get_fields() )[0][-1] } 
        --output="$text_output"

    ]]></command>
    <inputs>
        <!--section name="pathways_section" title="Pathways" expanded="True"--> 
            <conditional name="species">
                <param name="ref_file" type="select" label="Species" >
                    <option value="hsa">Human (Homo sapiens)</option>
                    <option value="mmu">Mouse (Mus musculus)</option>
                    <option value="rno">Rat (Rattus norvegicus)</option>
                </param>
                <when value="hsa">
                    <conditional name="pathways">
                        <param name="pathways_id" type="select" label="Enter your pathway(s)" help="Enter KEGG pathway name(s) or KEGG pathway id(s)">
                            <option value="pathways_names">KEGG pathway name(s)</option>
                            <option value="pathways_ids">KEGG pathway IDs</option>
                            <option value="pathways_file">KEGG pathway IDs from file</option>
                        </param>         
                        <when value="pathways_names">
                            <param name="pids" type="select" label="Select pathway(s) (by clicking in the box)" multiple="true" help='You can select one or several pathway(s), you can write the beginning of your pathways to search using autocomplete'>
                                <options from_data_table="hsa_pathways">
                                    <filter type="sort_by" column="1"/>
                                    <validator type="no_options" message="No indexes are available for the selected input dataset"/>
                                </options>
                            </param>
                        </when>
                        <when value="pathways_ids">
                            <param name="pids" type="text" label="Copy/paste your pathway IDs" help='IDs must be separated by tab, space or carriage return into the form field, for example: "hsa00010 hsa05412"'>
                                <sanitizer invalid_char=''>
                                <valid initial="string.printable">
                                    <remove value="&apos;"/>
                                </valid>
                                <mapping>
                                    <add source="&#x20;" target=""/> 
                                </mapping>
                                </sanitizer>
                            </param>
                        </when>
                        <when value="pathways_file">
                            <param name="file" type="data" format="txt,tabular" label="Select your file" help="Pathway id format : 'path:hsa00010' or 'hsa00010' or '00010'" />
                            <param name="header2" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
                            <param name="ncol2" type="text" value="c1" label="Column of pathways IDs" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' />
                        </when>
                    </conditional>
                </when>
                <when value="mmu">
                    <conditional name="pathways">
                    <param name="pathways_id" type="select" label="Enter your pathway(s)" help="Enter KEGG pathway name(s) or KEGG pathway id(s)">
                        <option value="pathways_names">KEGG pathway name(s)</option>
                        <option value="pathways_ids">KEGG pathway IDs</option>
                        <option value="pathways_file">KEGG pathway IDs from file</option>
                    </param>
                    <when value="pathways_names">
                        <param name="pids" type="select" label="Select pathway(s)" multiple="true" help='You can select one or several pathway(s), you can write the beginning of your pathways to search using autocomplete'>
                            <options from_data_table="mmu_pathways">
                                <filter type="sort_by" column="1"/>
                                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
                            </options>
                        </param>
                    </when>
                    <when value="pathways_ids">
                        <param name="pids" type="text" label="Copy/paste your pathway IDs" help='IDs must be separated by tab, space or carriage return into the form field, for example: "mmu00053 mmu00340"'>
                            <sanitizer invalid_char=''>
                            <valid initial="string.printable">
                                <remove value="&apos;"/>
                            </valid>
                            <mapping>
                                <add source="&#x20;" target=""/> 
                            </mapping>
                            </sanitizer>
                        </param>
                    </when>
                    <when value="pathways_file">
                        <param name="file" type="data" format="txt,tabular" label="Enter your file" help="Pathway id format : 'path:mmu00053' or 'mmu00053' or '00053'" />
                        <param name="header2" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
                        <param name="ncol2" type="text" value="c1" label="Column of IDs" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' />
                    </when>
                    </conditional>
                </when>
                <when value="rno">
                    <conditional name="pathways">
                    <param name="pathways_id" type="select" label="Enter your pathway(s) name/id" help="Enter KEGG pathway name(s) or KEGG pathway id(s)">
                        <option value="pathways_names">KEGG pathway name(s)</option>
                        <option value="pathways_ids">KEGG pathway IDs</option>
                        <option value="pathways_file">KEGG pathway IDs from file</option>
                    </param>
                    <when value="pathways_names">
                        <param name="pids" type="select" label="Select pathway(s)" multiple="true" help='You can select one or several pathway(s), you can write the beginning of your pathways to search using autocomplete'>
                            <options from_data_table="rno_pathways">
                                <filter type="sort_by" column="1"/>
                                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
                            </options>
                        </param>
                    </when>
                    <when value="pathways_ids">
                        <param name="pids" type="text" label="Copy/paste your pathway IDs" help='IDs must be separated by tab, space or carriage return into the form field, for example: "hsa00010 hsa05412"'>
                            <sanitizer invalid_char=''>
                            <valid initial="string.printable">
                                <remove value="&apos;"/>
                            </valid>
                            <mapping>
                                <add source="&#x20;" target=""/> 
                            </mapping>
                            </sanitizer>
                        </param>
                    </when>
                    <when value="pathways_file">
                        <param name="file" type="data" format="txt,tabular" label="Select your file" help="Pathway id format : 'path:hsa00010' or 'hsa00010' or '00010'" />
                        <param name="header2" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
                        <param name="ncol2" type="text" value="c1" label="Column of IDs" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' />
                    </when>
                    </conditional>
                </when>
            </conditional>
        <!--/section-->
        <!--section name="genes_section" title="Genes/prot" expanded="True"--> 
            <param name="id_type" type="select" label="Select ID type for genes to map to the pathway(s):">
                <option value="geneID" selected="true">Entrez Gene ID</option>
                <option value="keggid">KEGG Genes ID</option>
                <option value="uniprotID">UniProt Accession number</option>
            </param>
            <conditional name="input" >
                <param name="ids" type="select" label="Enter your IDs" help="Copy/paste or ID from file (e.g. table)" >
                    <option value="text">Copy/paste your IDs</option>
                    <option value="file" selected="true">Input file containing your IDs</option>
                </param>
                <when value="text" >
                    <param name="txt" type="text" label="Copy/paste your IDs" help='IDs must be separated by tab, space or carriage return into the form field, for example: P31946 P62258' >
                        <sanitizer invalid_char=''>
                            <valid initial="string.printable">
                                <remove value="&apos;"/>
                            </valid>
                            <mapping initial="none">
                                <add source="&apos;" target="__sq__"/>
                            </mapping>
                        </sanitizer>
                    </param>
                </when>
                <when value="file" >
                    <param name="file" type="data" format="txt,tabular" label="Select your file" help="" />
                    <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
                    <param name="ncol" type="text" value="c1" label="Column if IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' />
                    <conditional name="foldchange" >
                        <param name="fc" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Map quantitative data on the pathway?"/>
                            <when value="true">
                                <param name="fc_col" type="text" label="Column(s) number of quantitative data (separated by ','. 3 columns max)" help="For example : c1,c3,c4"/>
                            </when>
                            <when value="false"/>
                    </conditional>
                </when>
            </conditional>
        <!--/section-->
        <param name="native" type="select" label="Graphical format">
            <option value="true">KEGG map (.png)</option>
            <option value="false">Graphviz layout engine (.pdf)</option> 
        </param>
    </inputs>
    <outputs>
        <data name="text_output" format="tsv" label="KEGG maps visualization text output"/>
        <collection type="list" label="KEGG maps visualization from ${input.file.name}" name="graphviz_output_from_file">
            <filter>native=="false" and input["ids"] == "file"</filter>
                <discover_datasets pattern="(?P&lt;designation&gt;.+\..*)\.pdf" ext="pdf" />
        </collection>
        <collection type="list" label="KEGG maps visualization from ${input.file.name}" name="kegg_graph_output_from_file">
            <filter>native=="true" and input["ids"] == "file"</filter>
                <discover_datasets pattern="(?P&lt;designation&gt;.+\..*)\.png" ext="png"/>
        </collection>
        <collection type="list" label="KEGG maps visualization" name="graphviz_output_from_list">
            <filter>native=="false" and input["ids"] == "text"</filter>
                <discover_datasets pattern="(?P&lt;designation&gt;.+\..*)\.pdf" ext="pdf" />
        </collection>
        <collection type="list" label="KEGG maps visualization" name="kegg_graph_output_from_list">
            <filter>native=="true" and input["ids"] == "text"</filter>
                <discover_datasets pattern="(?P&lt;designation&gt;.+\..*)\.png" ext="png" />
        </collection>
    </outputs>
    <tests>
        <!--test>
            <conditional name="input">
                <param name="ids" value="file"/>
                <param name="file" value="Lacombe_et_al_2017_OK.txt"/>
                <param name="header" value="true"/>
                <param name="ncol" value="c1"/>
            </conditional>
            <conditional name="pathways">
                <param name="pathways_id" value="pathways_ids"/>
                <param name="pids" value="04514,05167,00010"/>
            </conditional>
            <param name="id_type" value="uniprotID"/>
            <param name="species" value="hsa"/>
            <param name="native" value="true"/>            
            <output name="kegg_from_file" file="hsa04514.pathview.png" compare="sim_size"/>
            <output name="kegg_from_file" file="hsa05167.pathview.png" compare="sim_size"/>
            <output name="kegg_from_file" file="hsa00010.pathview.png" compare="sim_size"/>
        </test-->
    </tests>
    <help><![CDATA[

**Description**

This tool based on the Pathview R package, is set for pathway based data integration and visualization. 

It maps and renders a wide variety of biological data on relevant KEGG pathway graphs. All users need is to supply their data and specify the target pathway. Pathview automatically downloads the pathway graph data, parses the data file, maps user data to the pathway, and render pathway graph with the mapped data.

**Input**

1. "Enter your pathway(s)": target KEGG Pathways to be specify can be set by:

    - choosing from the KEGG pathways name list 
    - giving a list of KEGG pathway IDs (e.g.hsa00010) (copy/paste mode)
    - selecting a list from a dataset (column) - for instance, output from the "KEGG pathways identification and coverage" tool can be used (1st column, c1)
2. "Select ID type for genes to map on the pathway(s)": Identifiers to map can be either a list of Entrez genes IDs, KEGG genes ID or Uniprot accession number. IDs to be mapped can be either provided by copy/paste or a file (tabular, tsv, txt) with a column containing the IDs. 
3. "Map quantitative data on the pathway ?": Yes/No (default is No). Fold change values (up to three columns) from a dataset (same file as identifiers to map) can be graphically represented on the final figure (using a range of color representing the fold-change values).

Below is an example of an input file with identifiers (uniprot_AC) and fold_change values.

.. csv-table:: Simulated data
   :header: "Uniprot_AC","Protein.name","Number_of_peptides","fc_values 1","fc_values 2","fc_values 3"

   "P15924","Desmoplakin","69","0.172302292051025","-0.757435966487116","0.0411240398990759"
   "P02538","Keratin, type II cytoskeletal 6A","53","-0.988842456122076","0.654626325100182","-0.219153396366064"
   "P02768","Serum albumin","44","-0.983493243315454","0.113752002761474","-0.645886132600729"
   "P08779","Keratin, type I cytoskeletal 16","29","0.552302597284443","-0.329045605110646","2.10616106806788"

|

.. class:: warningmark 

If there's more than one ID per line in the specified column, it will be splitted resulting in one unique ID per line. For example this table:

.. csv-table:: Simulated data
   :header: "Ensembl_Gene","UniProt.AC","UniProt.ID"

    "ENSG00000136881","Q14032","BAAT_HUMAN"
    "ENSG00000170099","P08185;G3V350;G3V4V7","CBG_HUMAN;G3V350_HUMAN;G3V4V7_HUMAN"
    "ENSG00000244731","P0C0L4","CO4A_HUMAN"

|

will be converted as follow:

.. csv-table:: Simulated data
   :header: "Ensembl_Gene","UniProt.AC","UniProt.ID"

    "ENSG00000136881","Q14032","BAAT_HUMAN"
    "ENSG00000170099","P08185","CBG_HUMAN;G3V350_HUMAN;G3V4V7_HUMAN"
    "ENSG00000170099","G3V350","CBG_HUMAN;G3V350_HUMAN;G3V4V7_HUMAN"
    "ENSG00000170099","G3V4V7","CBG_HUMAN;G3V350_HUMAN;G3V4V7_HUMAN"
    "ENSG00000244731","P0C0L4","CO4A_HUMAN"

|

-----

**Output:**

- a **collection dataset** named 'KEGG maps visualization from <dataset>', one file (png or pdf) for each given pathway.

- a **summary text file** (.tsv) of the mapping(s) with the following columns
    - **KEGG pathway ID**: KEGG pathway(s) used to map given genes/proteins ids
    - **pathway name**: name(s) of KEGG pathway(s) used for mapping
    - **nb of Uniprot_AC used** (only when Uniprot accession number is given): number of Uniprot accession number which will be converted to Entrez genes IDs
    - **nb of Entrez gene ID used**: number of Entrez gene IDs used for mapping
    - **nb of Entrez gene ID mapped**: number of Entrez gene IDs mapped on a given pathway
    - **nb of Entrez gene ID in the pathway**: number total of Entrez gene IDs in a given pathway
    - **ratio of Entrez gene ID mapped**: number of Entrez gene IDs mapped / number total of Entrez gene IDs
    - **Entrez gene ID mapped**: list of mapped Entrez gene IDs
    - **uniprot_AC mapped** (only when Uniprot accession number is given): list of Uniprot accession number corresponding to the mapped Entrez gene IDs

-----

.. class:: infomark

**Data source (release date)**

KEGG Pathways names list are from  http://rest.kegg.jp/list/pathway/

User manual / Documentation: http://www.bioconductor.org/packages/release/bioc/html/pathview.html

-----

.. class:: infomark

**Authors**

Luo, Weijun, Brouwer, Cory (2013). “Pathview: an R/Bioconductor package for pathway-based data integration and visualization.” Bioinformatics, 29(14), 1830-1831. doi: 10.1093/bioinformatics/btt285.

-----

.. class:: infomark

**Galaxy integration**

David Christiany, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR

Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR

This work has been partially funded through the French National Agency for Research (ANR) IFB project.

Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
    ]]></help>
    <citations>
        <citation type="doi">10.1093/nar/gkx372</citation>
        <citation type="bibtex">
@misc{renameTODO,
  author = {Weijun Luo},
  year = {2013},
  title = {pathview},
  url = {https://bioconductor.org/packages/release/bioc/html/pathview.html},
}</citation>
    </citations>
</tool>