diff compute_kegg_pathways.xml @ 0:42d0805353b6 draft

planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
author proteore
date Wed, 19 Sep 2018 05:38:52 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/compute_kegg_pathways.xml	Wed Sep 19 05:38:52 2018 -0400
@@ -0,0 +1,153 @@
+<tool id="compute_kegg_pathways" name="KEGG pathways coverage" version="2018.09.18">
+    <requirements>
+        <requirement type="package" version="1.18.0">bioconductor-keggrest</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        Rscript $__tool_directory__/compute_kegg_pathways.R  
+        
+        #if $input.ids == "text"
+            --id_list="$input.txt"
+        #else
+            --input="$input.file"
+            --id_column="$input.ncol"
+            --header="$input.header"  
+        #end if
+        
+        --id_type="$ref_ids.id_type"
+        --output="$output1" 
+        --nb_pathways="$nb_pathways"
+        --ref="$__tool_directory__/$ref_ids.ref_file"
+
+    ]]></command>
+    <inputs>
+        <conditional name="input" >
+            <param name="ids" type="select" label="Provide your identifiers (Uniprot or Entrez gene)" help="Copy/paste or ID list from a file (e.g. table)" >
+                <option value="text">Copy/paste your identifiers</option>
+                <option value="file" selected="true">Input file containing your identifiers</option>
+            </param>
+            <when value="text" >
+                <param name="txt" type="text" label="Copy/paste your identifiers" help='IDs must be separated by "," into the form field, for example: P31946,P62258' >
+                    <sanitizer invalid_char=''>
+                        <valid initial="string.printable">
+                            <remove value="&apos;"/>
+                        </valid>
+                        <mapping initial="none">
+                            <add source="&apos;" target="__sq__"/>
+                        </mapping>
+                    </sanitizer>
+                </param>
+            </when>
+            <when value="file" >
+                <param name="file" type="data" format="txt,tabular" label="Select a file that contains your list of IDs" help="" />
+                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" />
+                <param name="ncol" type="text" value="c1" label="The column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' />
+            </when>
+        </conditional>
+        <conditional name="ref_ids">
+            <param name="id_type" type="select" label="select your identifiers type :">
+                <option value="uniprot">Uniprot Accession number</option>
+                <option value="geneID">Entrez gene ID</option>
+            </param>
+            <when value="uniprot">
+                <param name="ref_file" type="select" label="Select species" >
+                    <options from_data_table="uniprot_kegg_list"/>
+                        <option>Human (Homo sapiens)</option>
+                        <option>Mouse (Mus musculus)</option>
+                </param>
+            </when>
+            <when value="geneID">
+                <param name="ref_file" type="select" label="Select species" >
+                    <options from_data_table="entrez_kegg_list"/>
+                        <option>Human (Homo sapiens)</option>
+                        <option>Mouse (Mus musculus)</option>
+                </param>
+            </when>
+        </conditional>
+        <param type="integer" name="nb_pathways" label="Set the number of pathways to be displayed in the output" value="10" help="pathways are sorted by percent of mapping gene by pathway in descending order"/>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tsv" />
+    </outputs>
+    <help><![CDATA[
+
+This tool computes the number of proteins from your list divided by the total number of protein of each KEGG pathway. 
+
+it allows you to identify the signaling pathways that are the most covered by your proteomics dataset. 
+
+By default the number of pathway to be displayed is set to 10 and sorted in descending order. 
+
+At the moment two species are supported: Human (Homo sapiens) and Mouse (Mus musculus)
+
+**Input:**
+
+Input can be either a list of Uniprot accession number or Entrez gene IDs (copy/paste mode) or a file containing multiple columns but with at least one column Uniprot accession number or Entrez gene IDs. If your input file contains other type of IDs, please use the ID_Converter tool.
+
+**Output:**
+
+The output is a tabular file (.tsv) with the following columns:
+
+* **Pathway_ID:** KEGG pathway identifier (e.g. hsa04970)
+* **Description:** name of the pathway as in KEGG
+* **Ratio IDs mapped/total IDs (%):** percentage of pathway coverage
+* **nb genes mapped in the pathway:** number of genes/proteins of your list mapped on the KEGG pathway
+* **nb total genes present in the pathway:** total number of genes/proteins present in the KEGG pathway
+
+-----
+
+.. class:: infomark
+
+**Database:**
+
+Pathways and associated Uniprot Accession Number or Gene IDs are collected from KEGGREST package 
+
+User manual / Documentation: KEGGREST Bioconductor package https://bioconductor.org/packages/3.1/bioc/html/KEGGREST.html
+
+
+-----
+
+.. class:: infomark
+
+**Galaxy integration**
+
+David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+
+Sandra Dérozier, Olivier Rué, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform
+
+This work has been partially funded through the French National Agency for Research (ANR) IFB project.
+
+Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
+
+-----
+
+compute_KEGG_pathways R script
+
+| Arguments:
+| --**help**                  Print this test
+| --**input**                 tab file
+| --**id_list**               id list ',' separated
+| --**id_type**               type of input ids (uniprot_AC or geneID)
+| --**id_column**             number og column containg ids of interest
+| --**org**                   organism : Hs , Mm, ...
+| --**nb_pathways**           number of pathways to return
+| --**header**                boolean
+| --**output**                output path
+| --**ref**                   ref file (l.hsa.gene.RData, l.hsa.up.RData)
+|
+| Example:
+| Rscript compute_KEGG_pathways.R --input='P31946,P62258' --id_type='uniprot' --id_column 'c1' --header TRUE
+|
+
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+@misc{githubKEGGREST,
+  title = {KEGGREST: Client-side REST access to KEGG},
+  author = {Dan Tenenbaum},
+  year = {2018},
+  note = {R package version 1.18.1},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/Bioconductor/KEGGREST},
+}</citation>
+    </citations>
+</tool>