diff add_expression_data.xml @ 9:5c260bd3552e draft

planemo upload commit eb7450f36863f02f036cbc52bf5525d68f22bd9e
author proteore
date Tue, 18 Dec 2018 08:23:48 -0500
parents
children 7b9a4ec7ec54
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/add_expression_data.xml	Tue Dec 18 08:23:48 2018 -0500
@@ -0,0 +1,157 @@
+<tool id="rna_abbased_data" name="Add expression data" version="2018.12.12">
+<description> (RNAseq or Immuno-assays)[Human Protein Atlas]
+</description>
+<requirements>
+  <requirement type="package" version="3.4.1">R</requirement>
+</requirements>
+<stdio>
+  <exit_code range="1:" />
+</stdio>
+<command><![CDATA[
+
+  #if $inputtype.filetype == "copy_paste": 
+  
+    Rscript $__tool_directory__/add_expression_HPA.R --inputtype="copypaste" --input='$inputtype.genelist' --atlas="$__tool_directory__/proteinatlas.csv" --select='$options.hpaparams' --output='$output'  
+
+  #else 
+  
+    Rscript $__tool_directory__/add_expression_HPA.R --inputtype="tabfile" --input='$inputtype.genelist' --header='$inputtype.header' --atlas="$__tool_directory__/proteinatlas.csv" --column='$inputtype.column' --select='$options.hpaparams' --output='$output'  
+
+  #end if
+   
+
+]]></command>
+
+<inputs>
+  <conditional name="inputtype">
+    <param name="filetype" type="select" label="Enter your IDs (Ensembl gene ENSG IDs only)" help="Copy/paste or from a file (e.g. table)"> 
+      <option value="file_all" selected="true">Input file containing your IDs</option>
+      <option value="copy_paste">Copy/paste your list of IDs</option> 
+    </param>
+    <when value="copy_paste">
+      <param name="genelist" type="text" label="Enter a list of IDs">
+        <sanitizer>
+            <valid initial="string.printable">
+                <remove value="&apos;"/>
+            </valid>
+            <mapping initial="none">
+                <add source="&apos;" target="__sq__"/>
+            </mapping>
+        </sanitizer>
+      </param>
+    </when>
+    <when value="file_all">
+      <param name="genelist" type="data" format="txt,tabular" label="Select your file" help=""/>
+      <param name="column" type="text" label="Column IDs (e.g : Enter c1 if ENSG ID are in column n°1)" value="c1"/> 
+      <param name="header" type="select" label="Does file contain header?" multiple="false" optional="false"> 
+ 		      <option value="true" selected="true">Yes</option>
+          <option value="false" selected="false">No</option>
+      </param>
+    </when>
+  </conditional>
+  <section name="options" title="RNAseq/Ab-based expression data" expanded="True">   
+      <param name="hpaparams" type="select" label="Select information to add to your list" multiple="True" display="checkboxes" optional="false" > 
+          <option value="Gene" selected="true">Gene name</option>
+          <option value="Gene description" selected="false">Gene description</option>
+          <option value="Evidence">Evidence (at protein level, at transcript level or no evidence)</option>
+          <option value="Antibody">Antibody reference</option>
+          <option value="RNA tissue category">RNA tissue category</option>
+ 		  <option value="Reliability (IH)">IH detection level</option>
+          <option value="Reliability (IF)">IF detection level</option>
+          <option value="Subcellular location">Subcellular location</option>
+          <option value="RNA TS TPM">RNA tissue specificity abundance in 'Transcript Per Million'</option>
+          <option value="TPM max in non-specific">RNA non-specific tissue abundance in 'Transcript Per Million'</option>
+    </param>
+  </section>
+
+</inputs>
+
+
+<outputs>
+  <data name="output" format="tsv" label=""/>
+</outputs>
+
+<tests>
+  <test>
+    <conditional name="inputtype">
+      <param name="filetype " value="file_all"/>
+      <param name="genelist" value="ID_Converter_Lacombe_et_al_2017_OK.txt"/>
+      <param name="column" value="c8"/>
+      <param name="header" value="TRUE"/>
+    </conditional>
+    <section name="options">
+      <param name="hpaparams" value="Gene,Gene.description,Evidence,Antibody,RNA.tissue.category,Reliability.IH,Reliability.IF,Subcellular.location,RNA.TS.TPM,TPM.max.in.non.specific"/>
+    </section>
+    <output name="output" file="Get_annotation_RNAseq.txt"/>
+  </test>
+</tests>
+
+<help><![CDATA[
+**Description**
+
+This tool adds expression annotation (RNAseq- or antibody-based experimental data - see "Parameters" below) from the Human Protein Atlas (HPA) database (https://www.proteinatlas.org/) to your gene/protein list.
+
+-----
+
+**Input**
+
+Input can be either a list of Ensembl gene (ENSG) IDsds (copy/paste mode) or a file containing multiple fields with at least one column of Ensembl gene IDs. If your input file contains other type of IDs, please use the ID_Converter tool to create a column of Ensembl gene IDs.  
+
+-----
+
+**Parameters** 
+
+"Select information to add to your list": choose by clicking the following information: 
+
+- Gene name: according to the HGNC (Hugo Gene Nomenclature Committee) 
+
+- Gene description: entry description (full text)  
+
+- Evidence: at protein level, at transcript level or no evidence
+
+- Antibody reference: reference of the HPA antibody used for immunohistochemistry and immunocytochemistry/IF
+
+- RNA tissue category: categories based on RNA-Seq data to estimate the transcript abundance of each protein-coding gene in tissues. For more information, please refer to http://www.proteinatlas.org/about/assays+annotation#rna .
+
+- IH detection level: level of detection of the protein associated to the coding gene tissues based on immunofluorescency. For more information, please refer to http://www.proteinatlas.org/about/assays+annotation#if .
+
+- IF detection level:level of detection of the protein associated to the coding gene tissues based on immunohistochemistry. For more information, please refer to http://www.proteinatlas.org/about/assays+annotation#ih .
+
+- Subcellular location:according to HPA data. For more information, please refer to https://www.proteinatlas.org/about/assays+annotation#ifa
+
+- RNA tissue specificity abundance in 'Transcript Per Million': For each gene is reported the tissue specificity abundance in 'Transcript Per Million' (TPM) as the sum of the TPM values of all its protein-coding transcripts.
+
+- RNA non-specific tissue abundance in 'Transcript Per Million': please refer to http://www.proteinatlas.org/about/assays+annotation#rna.
+
+-----
+
+**Output**
+
+The output is a tabular file containing original columns and new columns including selected annotation.  
+
+-----
+
+**Data sources (release date)**
+
+HPA source file (Human Protein Atlas version 18):  http://www.proteinatlas.org/download/proteinatlas.tab.gz
+
+-----
+
+.. class:: infomark
+
+**Authors**
+
+Lisa Peru, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+
+Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR
+
+This work has been partially funded through the French National Agency for Research (ANR) IFB project.
+
+Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
+
+]]></help>
+
+<citations>
+</citations>
+
+</tool>