view add_expression_data.xml @ 15:208b87582b4c draft default tip

"planemo upload commit 9a871575622a1318adc64dad63fa5e3a9d2a19fd"
author proteore
date Wed, 29 Apr 2020 08:51:13 -0400
parents 133309fd6875
children
line wrap: on
line source

<tool id="rna_abbased_data" name="Add expression data" version="2020.04.06">
<description> (RNAseq or Immuno-assays)[Human Protein Atlas]
</description>
<requirements>
  <requirement type="package" version="3.6.1">r-base</requirement>
</requirements>
<stdio>
  <exit_code range="1:" />
</stdio>
<command><![CDATA[

    Rscript $__tool_directory__/add_expression_HPA.R
    --input='$inputtype.genelist'
    --select='$options.hpaparams'
    --output='$output' 
    
    #if $inputtype.filetype == "copy_paste": 
        --inputtype="copypaste"    
    #else 
        --inputtype="tabfile" 
        --header='$inputtype.header' 
        --column='$inputtype.column'  
    #end if

    #if "protein_atlas" in str($ref_file).split("/")
    --atlas="$ref_file"
    #else 
    --atlasf="$__tool_directory__/$ref_file"
    #end if

]]></command>

<inputs>
    <param name="ref_file" type="select" label="HPA source file version" >
        <options from_data_table="proteore_protein_full_atlas">
            <filter type="sort_by" column="0"/>
        </options>
    </param>
  <conditional name="inputtype">
    <param name="filetype" type="select" label="Enter your IDs (Ensembl gene IDs only, e.g. ENSG00000064787)" help="Copy/paste or from a file (e.g. table)"> 
      <option value="file_all" selected="true">Input file containing your IDs</option>
      <option value="copy_paste">Copy/paste your list of IDs</option> 
    </param>
    <when value="copy_paste">
      <param name="genelist" type="text" label="Enter a list of IDs">
        <sanitizer>
            <valid initial="string.printable">
                <remove value="&apos;"/>
            </valid>
            <mapping initial="none">
                <add source="&apos;" target="__sq__"/>
            </mapping>
        </sanitizer>
      </param>
    </when>
    <when value="file_all">
      <param name="genelist" type="data" format="txt,tabular" label="Select your file" help=""/>
      <param name="column" type="text" label="Column IDs (e.g : Enter c1 if ENSG ID are in column n°1)" value="c1">
        <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
      </param>
      <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?"/>
    </when>
  </conditional>
  <section name="options" title="RNAseq/Ab-based expression data" expanded="True">   
      <param name="hpaparams" type="select" label="Select information to add to your list" multiple="True" display="checkboxes" optional="false" > 
          <option value="Gene" selected="true">Gene name</option>
          <option value="Gene description" selected="false">Gene description</option>
          <option value="Evidence">Evidence (at protein level, at transcript level or no evidence)</option>
          <option value="Antibody">Antibody reference</option>
          <option value="RNA tissue specificity">RNA tissue category</option>
 		  <option value="Reliability (IH)">IH detection level</option>
          <option value="Reliability (IF)">IF detection level</option>
          <option value="Subcellular location">Subcellular location</option>
          <option value="RNA tissue specific NX">RNA tissue specificity abundance in 'Transcript Per Million'</option>
          <option value="TPM max in non-specific">RNA non-specific tissue abundance in 'Transcript Per Million' (only for 23/10/2018 release)</option>
    </param>
  </section>

</inputs>


<outputs>
  <data name="output" format="tsv" label=""/>
</outputs>

<tests>
  <test>
    <conditional name="inputtype">
      <param name="filetype " value="file_all"/>
      <param name="genelist" value="ID_Converter_Lacombe_et_al_2017_OK.txt"/>
      <param name="column" value="c8"/>
      <param name="header" value="true"/>
    </conditional>
    <section name="options">
      <param name="hpaparams" value="Gene,Gene description,Evidence,Antibody,RNA tissue category,Reliability (IH),Reliability (IF),Subcellular location,RNA TS TPM,TPM max in non-specific"/>
    </section>
    <output name="output" file="Get_annotation_RNAseq.txt"/>
  </test>
</tests>

<help><![CDATA[
**Description**

This tool adds expression annotation (RNAseq- or antibody-based experimental data - see "Parameters" below) from the Human Protein Atlas (HPA) database (https://www.proteinatlas.org/) to your gene/protein list.

-----

**Input**

Input can be either a list of Ensembl gene (ENSG) IDs (copy/paste mode) or a file containing multiple fields with at least one column of Ensembl gene IDs. If your input file contains other type of IDs, please use the ID_Converter tool to create a column of Ensembl gene IDs.  

.. class:: warningmark
	
In copy/paste mode, the number of IDs considered in input is limited to 5000.

-----

**Parameters** 

"Select information to add to your list": choose by clicking the following information: 

- Gene name: according to the HGNC (Hugo Gene Nomenclature Committee) 

- Gene description: entry description (full text)  

- Evidence: at protein level, at transcript level or no evidence

- Antibody reference: reference of the HPA antibody used for immunohistochemistry and immunocytochemistry/IF

- RNA tissue category: categories based on RNA-Seq data to estimate the transcript abundance of each protein-coding gene in tissues. For more information, please refer to http://www.proteinatlas.org/about/assays+annotation#rna .

- IH detection level: level of detection of the protein associated to the coding gene tissues based on immunofluorescency. For more information, please refer to http://www.proteinatlas.org/about/assays+annotation#if .

- IF detection level:level of detection of the protein associated to the coding gene tissues based on immunohistochemistry. For more information, please refer to http://www.proteinatlas.org/about/assays+annotation#ih .

- Subcellular location:according to HPA data. For more information, please refer to https://www.proteinatlas.org/about/assays+annotation#ifa

- RNA tissue specificity abundance in 'Transcript Per Million': For each gene is reported the tissue specificity abundance in 'Transcript Per Million' (TPM) as the sum of the TPM values of all its protein-coding transcripts.

- RNA non-specific tissue abundance in 'Transcript Per Million' (only available from 23/10/2018 release): please refer to http://www.proteinatlas.org/about/assays+annotation#rna.

-----

**Output**

The output is a tabular file containing initial columns and new columns with annotation from HPA.  

-----

**HPA source file version (release date)**

Release date 23/10/2018: data are based on the Human Protein Atlas version 18.1 and Ensembl version 88.38

Release date 21/01/2020: a subset of the data in the Human Protein Atlas version 19.1

downloaded from:  http://www.proteinatlas.org/download/proteinatlas.tab.gz

-----

.. class:: infomark

**Authors**

Lisa Perus, David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR

Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR

This work has been partially funded through the French National Agency for Research (ANR) IFB project.

Help: contact@proteore.org for any questions or concerns about this tool.

]]></help>

<citations>
</citations>

</tool>