view get_expression_profiles.xml @ 13:2173192d4824 draft default tip

"planemo upload commit 59b014e9f6e2d668cbd7c4844b10db3d59baefd8"
author proteore
date Thu, 07 May 2020 10:01:54 -0400
parents 56c93d1fc8fd
children
line wrap: on
line source

<tool id="sel_ann_hpa" name="Get expression profiles" version="2020.05.07">
    <description>by (normal or tumor) tissue/cell type [Human Protein Atlas]
    </description>
        <requirement type="package" version="3.6.1">r-base</requirement>
    <requirements>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command interpreter="Rscript">
        $__tool_directory__/get_expression_profiles.R
        
        #if "protein_atlas" in str($options.proteinatlas).split("/")
          --ref_file="$options.proteinatlas"
        #else
          --ref_file="$__tool_directory__/$options.proteinatlas"
        #end if

        --input_type="$input.ids"
        #if $input.ids == "list"
            --input="$input.list"
        #else
            --input="$input.file"
            --column_number="$input.ncol"
            --header="$input.header"
        #end if

        #if $options.database == "normal"
            --atlas="normal"
            --tissue="$options.normal_tissue"
            --level="$options.level"
            --reliability="$options.reliability"
        #else if $options.database == "tumor"
            --atlas="cancer"
            --cancer="$options.cancer_tissue"
        #end if
        --not_mapped="$not_mapped"
        --output="$hpa_output"
    </command>
       <inputs>
           <conditional name="input" >
            <param name="ids" type="select" label="Enter your IDs (Ensembl gene ID only; e.g. ENSG00000163631)" help="Copy/paste or from a file (e.g. table)" >
                <option value="list">Copy/paste your IDs</option>
                <option value="file" selected="true">Input file containing your IDs</option>
            </param>
            <when value="list" >
                <param name="list" type="text" label="Copy/paste your IDs" help='IDs must be separated by spaces into the form field, for example: ENSG00000174876 ENSG00000178372 ENSG00000159763' >
                    <sanitizer>
                        <valid initial="string.printable">
                            <remove value="&apos;"/>
                        </valid>
                        <mapping initial="none">
                            <add source="&apos;" target="__sq__"/>
                        </mapping>
                    </sanitizer>
                </param>
            </when>
            <when value="file" >
                <param name="file" type="data" format="txt,tabular" label="" help="" />
                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
                <param name="ncol" type="text" value="c1" label="Column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on'>
                    <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
                </param>
            </when>
        </conditional>
        <conditional name="options">
            <param name="database" type="select" label="Human Protein Atlas (normal or tumor tissue)">
                <option value="normal">Human Normal Tissue</option>
                <option value="tumor">Human Tumor Tissue</option>
            </param>
            <when value="normal">
                <param name="proteinatlas" type="select" label="Normal tissue HPA version" >
                    <options from_data_table="proteore_protein_atlas_normal_tissue">
                        <filter type="sort_by" column="0"/>
                    </options>
	            </param>
                <param name="normal_tissue" type="select" label="Select tissue(s)" multiple="True" display="checkboxes" optional="False">
                    <option value="adrenal gland" >Adrenal gland</option>
                    <option value="appendix" >Appendix</option>
                    <option value="bone marrow" >Bone marrow</option>
                    <option value="breast" >Breast</option>
                    <option value="bronchus" >Bronchus</option>
                    <option value="caudate" >Caudate</option>
                    <option value="cerebellum" >Cerebellum</option>
                    <option value="cerebral cortex" >Cerebral cortex</option>
                    <option value="cervix" >Cervix</option>
                    <option value="colon" >Colon</option>
                    <option value="duodenum" >Duodenum</option>
                    <option value="endometrium 1" >Endometrium 1</option>
                    <option value="endometrium 2" >Endometrium 2</option>
                    <option value="epididymis" >Epididymis</option>
                    <option value="esophagus" >Esophagus</option>
                    <option value="fallopian tube" >Fallopian tube</option>
                    <option value="gallbladder" >Gallbladder</option>
                    <option value="heart muscle" >Heart muscle</option>
                    <option value="hippocampus" >Hippocampus</option>
                    <option value="kidney" >Kidney</option>
                    <option value="liver" >Liver</option>
                    <option value="lung" >Lung</option>
                    <option value="lymph node" >Lymph node</option>
                    <option value="nasopharynx" >Nasopharynx</option>
                    <option value="oral mucosa" >Oral mucosa</option>
                    <option value="ovary" >Ovary</option>
                    <option value="pancreas" >Pancreas</option>
                    <option value="parathyroid gland" >Parathyroid gland</option>
                    <option value="placenta" >Placenta</option>
                    <option value="prostate" >Prostate</option>
                    <option value="rectum" >Rectum</option>
                    <option value="salivary gland" >Salivary gland</option>
                    <option value="seminal vesicle" >Seminal vesicle</option>
                    <option value="skeletal muscle" >Skeletal muscle</option>
                    <option value="skin 1" >Skin 1</option>
                    <option value="skin 2" >Skin 2</option>
                    <option value="small intestine" >Small intestine</option>
                    <option value="smooth muscle" >Smooth muscle</option>
                    <option value="soft tissue 1" >Soft tissue 1</option>
                    <option value="soft tissue 2" >Soft tissue 2</option>
                    <option value="spleen" >Spleen</option>
                    <option value="stomach 1" >Stomach 1</option>
                    <option value="stomach 2" >Stomach 2</option>
                    <option value="testis" >Testis</option>
                    <option value="thyroid gland" >Thyroid gland</option>
                    <option value="tonsil" >Tonsil</option>
                    <option value="urinary bladder" >Urinary bladder</option>
                    <option value="vagina" >Vagina</option>
                </param>
                <param name="level" type="select" label="Expression level" display="checkboxes" multiple="True" optional="False">
                    <option value="High" selected="true">High</option>
                    <option value="Medium" selected="true">Medium</option>
                    <option value="Low">Low</option>
                    <option value="Not detected">Not detected</option>
                </param>
                <param name="reliability" type="select" label="Reliability score" display="checkboxes" multiple="True" optional="False">
                    <option value="Enhanced" selected="true">Enhanced</option>
                    <option value="Supported" selected="true">Supported</option>
                    <option value="Approved">Approved</option>
                    <option value="Uncertain">Uncertain</option>
                </param>
            </when>
            <when value="tumor">
                <param name="proteinatlas" type="select" label="Tumor tissue HPA version" >
                    <options from_data_table="proteore_protein_atlas_tumor_tissue">
                        <filter type="sort_by" column="0"/>
                    </options>
	            </param>
                <param name="cancer_tissue" type="select" label="Select cancer tissue(s)" multiple="True" display="checkboxes" optional="False">
                    <option value="breast cancer" >Breast cancer</option>
                    <option value="carcinoid" >Carcinoid</option>
                    <option value="cervical cancer" >Cervical cancer</option>
                    <option value="colorectal cancer" >Colorectal cancer</option>
                    <option value="endometrial cancer" >Endometrial cancer</option>
                    <option value="glioma" >Glioma</option>
                    <option value="head and neck cancer" >Head and neck cancer</option>
                    <option value="liver cancer" >Liver cancer</option>
                    <option value="lung cancer" >Lung cancer</option>
                    <option value="lymphoma" >Lymphoma</option>
                    <option value="melanoma" >Melanoma</option>
                    <option value="ovarian cancer" >Ovarian cancer</option>
                    <option value="pancreatic cancer" >Pancreatic cancer</option>
                    <option value="prostate cancer" >Prostate cancer</option>
                    <option value="renal cancer" >Renal cancer</option>
                    <option value="skin cancer" >Skin cancer</option>
                    <option value="stomach cancer" >Stomach cancer</option>
                    <option value="testis cancer" >Testis cancer</option>
                    <option value="thyroid cancer" >Thyroid cancer</option>
                    <option value="urothelial cancer" >Urothelial cancer</option>
                </param>
            </when>
        </conditional>
        <param name="not_mapped" type="boolean" truevalue="true" falsevalue="false" label="Keep IDs not found in HPA?" checked="true"/>
    </inputs>
    <outputs>
        <data name="hpa_output" format="tsv" label="" />
    </outputs>
    <tests>
        <test>
            <conditional name="input">
                <param name="ids" value="file"/>
                <param name="file" value="ID_Converter_FKW_Lacombe_et_al_2017_OK.txt"/>
                <param name="header" value="true"/>
                <param name="ncol" value="c8"/>
            </conditional>
            <conditional name="options">
                <param name="database" value="normal"/>
                <param name="proteinatlas" value="normal_tissue.tsv"/>
                <param name="normal_tissue" value="bronchus,lung,nasopharynx,salivary gland"/>
                <param name="level" value="Not detected,Medium,High,Low"/>
                <param name="reliability" value="Approved,Supported,Uncertain"/>
            </conditional>
            <param name="not_mapped" value="true" />
            <output name="hpa_output" file="Expres_levels_Lacombe_et_al_2017_OK.txt"/>
        </test>
    </tests>
    <help><![CDATA[

**Description**

This tool allows to retrieve expression profiles (normal or tumor tissue) from Human Protein Atlas (https://www.proteinatlas.org/) 
 
**Input**

A list of Ensembl gene IDs (e.g. ENSG00000163631) must be entered (either via a copy/paste or by choosing a file); if it's not the case, please use the ID_Converter tool of ProteoRE.

A line can contains multiple ids if separated by ";". 

.. class:: warningmark
	
In copy/paste mode, the number of IDs considered in input is limited to 5000.

-----

**Parameters**

"Human Protein Atlas (normal or tumor tissue)": two resources are currently available 

* **Human normal tissue data**: expression profiles for proteins in human tissues based on immunohistochemisty using tissue micro arrays.

**Output** will be in the form: a tab-separated file includes Ensembl gene identifier ("Gene"), tissue name ("Tissue"), annotated cell type ("Cell type"), expression value ("Level"), and the gene reliability of the expression value ("Reliability"). 
 
* **Human tumor tissue data**: staining profiles for proteins in human tumor tissue based on immunohistochemisty using tissue micro arrays and log-rank P value for Kaplan-Meier analysis of correlation between mRNA expression level and patient survival. 

**Ouptut** will be in the form: The tab-separated file includes Ensembl gene identifier ("Gene"), gene name ("Gene name"), tumor name ("Cancer"), the number of patients annotated for different staining levels ("High", "Medium", "Low" & "Not detected") and log-rank p values for patient survival and mRNA correlation ("prognostic - favourable", "unprognostic - favourable", "prognostic - unfavourable", "unprognostic - unfavourable").

"Select tissue(s)": information from more than one tissue can be retrieved 

"Keep IDs not found in HPA?": ENSG ID not found in Human Protein Atlas will be returned in the output file in the form of "NA" (default is "No")

-----

**Reliability score (only for normal tissue)**

Reliability score is divided into Enhanced, Supported, Approved, or Uncertain with respect to the definitions from HPA:

Enhanced - One or several antibodies with non-overlapping epitopes targeting the same gene have obtained enhanced validation based on orthogonal or independent antibody validation method.

Supported - Consistency with RNA-seq and/or protein/gene characterization data, in combination with similar staining pattern if independent antibodies are available.

Approved - Consistency with RNA-seq data in combination with inconsistency with, or lack of, protein/gene characterization data. Alternatively, consistency with protein/gene characterization data in combination with inconsistency with RNA-seq data. If independent antibodies are available, the staining pattern is partly similar or dissimilar.

Uncertain - Inconsistency with, or lack of, RNA-seq and/or protein/gene characterization data, in combination with dissimilar staining pattern if independent antibodies are available.

-----

**Data sources (release date)**

23/10/2018: Both normal and tumor tissues data are based on the Human Protein Atlas version 18.1 and Ensembl version 88.38. 
12/12/2019: Both normal and tumor tissues data are based on The Human Protein Atlas version 19 and Ensembl version 92.38.
More information: https://www.proteinatlas.org/about/download

-----

.. class:: infomark

**Authors**

David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR

Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR

This work has been partially funded through the French National Agency for Research (ANR) IFB project.

Help: contact@proteore.org for any questions or concerns about this tool.
        
    ]]></help>
    <citations>
    </citations>
</tool>