Mercurial > repos > proteore > proteore_kegg_pathways_coverage
diff compute_kegg_pathways.xml @ 0:42d0805353b6 draft
planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
author | proteore |
---|---|
date | Wed, 19 Sep 2018 05:38:52 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/compute_kegg_pathways.xml Wed Sep 19 05:38:52 2018 -0400 @@ -0,0 +1,153 @@ +<tool id="compute_kegg_pathways" name="KEGG pathways coverage" version="2018.09.18"> + <requirements> + <requirement type="package" version="1.18.0">bioconductor-keggrest</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + Rscript $__tool_directory__/compute_kegg_pathways.R + + #if $input.ids == "text" + --id_list="$input.txt" + #else + --input="$input.file" + --id_column="$input.ncol" + --header="$input.header" + #end if + + --id_type="$ref_ids.id_type" + --output="$output1" + --nb_pathways="$nb_pathways" + --ref="$__tool_directory__/$ref_ids.ref_file" + + ]]></command> + <inputs> + <conditional name="input" > + <param name="ids" type="select" label="Provide your identifiers (Uniprot or Entrez gene)" help="Copy/paste or ID list from a file (e.g. table)" > + <option value="text">Copy/paste your identifiers</option> + <option value="file" selected="true">Input file containing your identifiers</option> + </param> + <when value="text" > + <param name="txt" type="text" label="Copy/paste your identifiers" help='IDs must be separated by "," into the form field, for example: P31946,P62258' > + <sanitizer invalid_char=''> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> + </when> + <when value="file" > + <param name="file" type="data" format="txt,tabular" label="Select a file that contains your list of IDs" help="" /> + <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" /> + <param name="ncol" type="text" value="c1" label="The column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' /> + </when> + </conditional> + <conditional name="ref_ids"> + <param name="id_type" type="select" label="select your identifiers type :"> + <option value="uniprot">Uniprot Accession number</option> + <option value="geneID">Entrez gene ID</option> + </param> + <when value="uniprot"> + <param name="ref_file" type="select" label="Select species" > + <options from_data_table="uniprot_kegg_list"/> + <option>Human (Homo sapiens)</option> + <option>Mouse (Mus musculus)</option> + </param> + </when> + <when value="geneID"> + <param name="ref_file" type="select" label="Select species" > + <options from_data_table="entrez_kegg_list"/> + <option>Human (Homo sapiens)</option> + <option>Mouse (Mus musculus)</option> + </param> + </when> + </conditional> + <param type="integer" name="nb_pathways" label="Set the number of pathways to be displayed in the output" value="10" help="pathways are sorted by percent of mapping gene by pathway in descending order"/> + </inputs> + <outputs> + <data name="output1" format="tsv" /> + </outputs> + <help><![CDATA[ + +This tool computes the number of proteins from your list divided by the total number of protein of each KEGG pathway. + +it allows you to identify the signaling pathways that are the most covered by your proteomics dataset. + +By default the number of pathway to be displayed is set to 10 and sorted in descending order. + +At the moment two species are supported: Human (Homo sapiens) and Mouse (Mus musculus) + +**Input:** + +Input can be either a list of Uniprot accession number or Entrez gene IDs (copy/paste mode) or a file containing multiple columns but with at least one column Uniprot accession number or Entrez gene IDs. If your input file contains other type of IDs, please use the ID_Converter tool. + +**Output:** + +The output is a tabular file (.tsv) with the following columns: + +* **Pathway_ID:** KEGG pathway identifier (e.g. hsa04970) +* **Description:** name of the pathway as in KEGG +* **Ratio IDs mapped/total IDs (%):** percentage of pathway coverage +* **nb genes mapped in the pathway:** number of genes/proteins of your list mapped on the KEGG pathway +* **nb total genes present in the pathway:** total number of genes/proteins present in the KEGG pathway + +----- + +.. class:: infomark + +**Database:** + +Pathways and associated Uniprot Accession Number or Gene IDs are collected from KEGGREST package + +User manual / Documentation: KEGGREST Bioconductor package https://bioconductor.org/packages/3.1/bioc/html/KEGGREST.html + + +----- + +.. class:: infomark + +**Galaxy integration** + +David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR + +Sandra Dérozier, Olivier Rué, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform + +This work has been partially funded through the French National Agency for Research (ANR) IFB project. + +Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool. + +----- + +compute_KEGG_pathways R script + +| Arguments: +| --**help** Print this test +| --**input** tab file +| --**id_list** id list ',' separated +| --**id_type** type of input ids (uniprot_AC or geneID) +| --**id_column** number og column containg ids of interest +| --**org** organism : Hs , Mm, ... +| --**nb_pathways** number of pathways to return +| --**header** boolean +| --**output** output path +| --**ref** ref file (l.hsa.gene.RData, l.hsa.up.RData) +| +| Example: +| Rscript compute_KEGG_pathways.R --input='P31946,P62258' --id_type='uniprot' --id_column 'c1' --header TRUE +| + + ]]></help> + <citations> + <citation type="bibtex"> +@misc{githubKEGGREST, + title = {KEGGREST: Client-side REST access to KEGG}, + author = {Dan Tenenbaum}, + year = {2018}, + note = {R package version 1.18.1}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/Bioconductor/KEGGREST}, +}</citation> + </citations> +</tool>