view sel_ann_hpa.xml @ 5:69cf9e6283f8 draft

planemo upload commit 1aa1bc2601a18344f518f8852ed8f1b0a36ae8b9-dirty
author proteore
date Tue, 20 Mar 2018 07:01:46 -0400
parents f294fd77b143
children f7e93829327e
line wrap: on
line source

<tool id="sel_ann_hpa" name="Get expression data by tissue" version="0.1.0">
    <description>finds tissue in which your proteins are expressed (or not) - Human Protein Atlas
    </description>
    <requirements>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command interpreter="Rscript">
        $__tool_directory__/sel_ann_hpa.R
        --ref_file=$options.proteinatlas ##"$__tool_directory__/../../usecase1/normal_tissue.csv"
        --input_type="$input.ids"
        #if $input.ids == "list"
            --input="$input.list"
        #else
            --input="$input.file"
            --column_number="$input.ncol"
            --header="$input.header"
        #end if

        #if $options.database == "normal"
            --atlas="normal"
            --tissue="$options.normal_tissue"
            --level="$options.level"
            --reliability="$options.reliability"
        #else if $options.database == "tumor"
            --atlas="cancer"
            --cancer="$options.cancer_tissue"
        #end if
        --not_mapped="$not_mapped"
        --output="$hpa_output"
    </command>
       <inputs>
           <conditional name="input" >
            <param name="ids" type="select" label="Provide your identifiers" help="Copy/paste or ID list from a file (e.g. table)" >
                <option value="list">Copy/paste your identifiers</option>
                <option value="file">Input file containing your identifiers</option>
            </param>
            <when value="list" >
                <param name="list" type="text" label="Copy/paste your identifiers" help='IDs must be separated by spaces into the form field, for example: ENSG00000174876 ENSG00000178372 ENSG00000159763' >
                    <sanitizer>
                        <valid initial="string.printable">
                            <remove value="&apos;"/>
                        </valid>
                        <mapping initial="none">
                            <add source="&apos;" target="__sq__"/>
                        </mapping>
                    </sanitizer>
                </param>
            </when>
            <when value="file" >
                <param name="file" type="data" format="txt,tabular" label="Choose a file that contains your list of ENSG IDs" help="" />
                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" />
                <param name="ncol" type="text" value="c1" label="The column number of ENSG IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' />                
            </when>
        </conditional>
        <conditional name="options">
            <param name="database" type="select" label="Human Protein Atlas (data from normal or tumor tissue)">
                <option value="normal">Human Normal Tissue</option>
                <option value="tumor">Human Tumor Tissue</option>
            </param>
            <when value="normal">
                <param name="proteinatlas" type="select" label="Human Protein Atlas" >
                    <options from_file="proteinatlas.loc" >
		                <column name="name" index="1" />
		                <column name="value" index="2" />
		                <filter type="remove_value" meta_ref="proteinatlas" key="name" value="Full Human Protein Atlas" />
                        <filter type="remove_value" meta_ref="proteinatlas" key="name" value="HPA Tumor Tissue" />
	                </options>
	            </param>
                <param name="normal_tissue" type="select" label="Select tissue(s)" multiple="True" display="checkboxes" optional="False">
                    <option value="adrenal gland" >Adrenal gland</option>
                    <option value="appendix" >Appendix</option>
                    <option value="bone marrow" >Bone marrow</option>
                    <option value="breast" >Breast</option>
                    <option value="bronchus" >Bronchus</option>
                    <option value="caudate" >Caudate</option>
                    <option value="cerebellum" >Cerebellum</option>
                    <option value="cerebral cortex" >Cerebral cortex</option>
                    <option value="cervix" >Cervix</option>
                    <option value="colon" >Colon</option>
                    <option value="duodenum" >Duodenum</option>
                    <option value="endometrium 1" >Endometrium 1</option>
                    <option value="endometrium 2" >Endometrium 2</option>
                    <option value="epididymis" >Epididymis</option>
                    <option value="esophagus" >Esophagus</option>
                    <option value="fallopian tube" >Fallopian tube</option>
                    <option value="gallbladder" >Gallbladder</option>
                    <option value="heart muscle" >Heart muscle</option>
                    <option value="hippocampus" >Hippocampus</option>
                    <option value="kidney" >Kidney</option>
                    <option value="liver" >Liver</option>
                    <option value="lung" >Lung</option>
                    <option value="lymph node" >Lymph node</option>
                    <option value="nasopharynx" >Nasopharynx</option>
                    <option value="oral mucosa" >Oral mucosa</option>
                    <option value="ovary" >Ovary</option>
                    <option value="pancreas" >Pancreas</option>
                    <option value="parathyroid gland" >Parathyroid gland</option>
                    <option value="placenta" >Placenta</option>
                    <option value="prostate" >Prostate</option>
                    <option value="rectum" >Rectum</option>
                    <option value="salivary gland" >Salivary gland</option>
                    <option value="seminal vesicle" >Seminal vesicle</option>
                    <option value="skeletal muscle" >Skeletal muscle</option>
                    <option value="skin 1" >Skin 1</option>
                    <option value="skin 2" >Skin 2</option>
                    <option value="small intestine" >Small intestine</option>
                    <option value="smooth muscle" >Smooth muscle</option>
                    <option value="soft tissue 1" >Soft tissue 1</option>
                    <option value="soft tissue 2" >Soft tissue 2</option>
                    <option value="spleen" >Spleen</option>
                    <option value="stomach 1" >Stomach 1</option>
                    <option value="stomach 2" >Stomach 2</option>
                    <option value="testis" >Testis</option>
                    <option value="thyroid gland" >Thyroid gland</option>
                    <option value="tonsil" >Tonsil</option>
                    <option value="urinary bladder" >Urinary bladder</option>
                    <option value="vagina" >Vagina</option>
                </param>
                <param name="level" type="select" label="Expression level" display="checkboxes" multiple="True" optional="False">
                    <option value="High" selected="true">High</option>
                    <option value="Medium">Medium</option>
                    <option value="Low">Low</option>
                    <option value="Not detected">Not detected</option>
                </param>
                <param name="reliability" type="select" label="Reliability score" display="checkboxes" multiple="True" optional="False">
                    <option value="Enhanced" selected="true">Enhanced</option>
                    <option value="Supported" selected="true">Supported</option>
                    <option value="Approved">Approved</option>
                    <option value="Uncertain">Uncertain</option>
                </param>
            </when>
            <when value="tumor">
                <param name="proteinatlas" type="select" label="Human Protein Atlas" >
                    <options from_file="proteinatlas.loc" >
		                <column name="name" index="1" />
		                <column name="value" index="2" />
		                <filter type="remove_value" meta_ref="proteinatlas" key="name" value="Full Human Protein Atlas" />
                        <filter type="remove_value" meta_ref="proteinatlas" key="name" value="HPA Normal Tissue" />
	                </options>
	            </param>
                <param name="cancer_tissue" type="select" label="Keep and annotate genes present in the following tissue(s)" multiple="True" display="checkboxes" optional="False">
                    <option value="breast cancer" >Breast cancer</option>
                    <option value="carcinoid" >Carcinoid</option>
                    <option value="cervical cancer" >Cervical cancer</option>
                    <option value="colorectal cancer" >Colorectal cancer</option>
                    <option value="endometrial cancer" >Endometrial cancer</option>
                    <option value="glioma" >Glioma</option>
                    <option value="head and neck cancer" >Head and neck cancer</option>
                    <option value="liver cancer" >Liver cancer</option>
                    <option value="lung cancer" >Lung cancer</option>
                    <option value="lymphoma" >Lymphoma</option>
                    <option value="melanoma" >Melanoma</option>
                    <option value="ovarian cancer" >Ovarian cancer</option>
                    <option value="pancreatic cancer" >Pancreatic cancer</option>
                    <option value="prostate cancer" >Prostate cancer</option>
                    <option value="renal cancer" >Renal cancer</option>
                    <option value="skin cancer" >Skin cancer</option>
                    <option value="stomach cancer" >Stomach cancer</option>
                    <option value="testis cancer" >Testis cancer</option>
                    <option value="thyroid cancer" >Thyroid cancer</option>
                    <option value="urothelial cancer" >Urothelial cancer</option>
                </param>
            </when>
        </conditional>
        <param name="not_mapped" type="boolean" truevalue="true" falsevalue="false" label="Would you like to include in output the IDs that do not match criteria or not mapped in HPA?" checked="true"/>
    </inputs>
    <outputs>
        <data name="hpa_output" format="tabular" label="" />
    </outputs>
    <tests>
        <test>
            <conditional name="input">
                <param name="ids" value="file"/>
                <param name="file" value="ID_Converter_FKW_Lacombe_et_al_2017_OK.txt"/>
                <param name="header" value="true"/>
                <param name="ncol" value="c8"/>
            </conditional>
            <conditional name="options">
                <param name="database" value="normal"/>
                <param name="proteinatlas" value="normal_tissue.tsv"/>
                <param name="normal_tissue" value="bronchus,lung,nasopharynx,salivary gland"/>
                <param name="level" value="Not detected,Medium,High,Low"/>
                <param name="reliability" value="Approved,Supported,Uncertain"/>
            </conditional>
            <param name="not_mapped" value="true" />
            <output name="hpa_output" file="Expres_levels_Lacombe_et_al_2017_OK.txt"/>
        </test>
    </tests>
    <help><![CDATA[
This tool retrieve information from Human Protein Atlas (https://www.proteinatlas.org/) 
regarding the expression profiles of human genes both on the mRNA and protein level. 

A list of ENSG (Ensembl gene) IDs must be entered (either via a copy/paste or by choosing a file), 
if it's not the case, please use the ID_Convert tool from ProteoRE.

The resources from Human Protein Atlas that can be queried are the following: 

* **Human normal tissue data**: expression profiles for proteins in human tissues based on immunohistochemisty using tissue micro arrays.

  The tab-separated file includes Ensembl gene identifier ("Gene"), tissue name ("Tissue"), annotated cell type ("Cell type"), expression value ("Level"), and the gene reliability of the expression value ("Reliability"). 

  The data is based on The Human Protein Atlas version 18 and Ensembl version 88.38.

* **Human tumor tissue data**: staining profiles for proteins in human tumor tissue based on immunohistochemisty using tissue micro arrays and log-rank P value for Kaplan-Meier analysis of correlation between mRNA expression level and patient survival. 

  The tab-separated file includes Ensembl gene identifier ("Gene"), gene name ("Gene name"), tumor name ("Cancer"), the number of patients annotated for different staining levels ("High", "Medium", "Low" & "Not detected") and log-rank p values for patient survival and mRNA correlation ("prognostic - favourable", "unprognostic - favourable", "prognostic - unfavourable", "unprognostic - unfavourable").

  The data is based on The Human Protein Atlas version 18 and Ensembl version 88.38.

-----

**Reliability score**

Reliability score is divided into Enhanced, Supported, Approved, or Uncertain with respect 
to the definitions from HPA:

Enhanced - One or several antibodies with non-overlapping epitopes targeting the same gene 
have obtained enhanced validation based on orthogonal or independent antibody validation method.

Supported - Consistency with RNA-seq and/or protein/gene characterization data, 
in combination with similar staining pattern if independent antibodies are available.

Approved - Consistency with RNA-seq data in combination with inconsistency with, or lack of, 
protein/gene characterization data. Alternatively, consistency with protein/gene characterization data 
in combination with inconsistency with RNA-seq data. If independent antibodies are available, 
the staining pattern is partly similar or dissimilar.

Uncertain - Inconsistency with, or lack of, RNA-seq and/or protein/gene characterization data, 
in combination with dissimilar staining pattern if independent antibodies are available.

-----

.. class:: infomark

**Authors**

T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR

Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform

This work has been partially funded through the French National Agency for Research (ANR) IFB project.

Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
        
    ]]></help>
    <citations>
    </citations>
</tool>