diff expression_rnaseq_abbased.xml @ 1:8dd24f13f923 draft

planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
author proteore
date Fri, 16 Feb 2018 04:09:32 -0500
parents cf2fa609625b
children 5cdffe94464e
line wrap: on
line diff
--- a/expression_rnaseq_abbased.xml	Sun Nov 26 20:49:17 2017 -0500
+++ b/expression_rnaseq_abbased.xml	Fri Feb 16 04:09:32 2018 -0500
@@ -1,4 +1,4 @@
-<tool id="rna_abbased_data" name="Expression from RNAseq/Ab-based data (Human Protein Atlas)" version="0.1.0">
+<tool id="rna_abbased_data" name="Get annotation from RNAseq/Ab-based experiments (Human species)" version="0.1.0">
 <description>
 </description>
 <requirements>
@@ -23,31 +23,31 @@
 
 <inputs>
   <conditional name="inputtype">
-    <param name="filetype" type="select" label="Select your type of input file"> 
-      <option value="file_all">Input file containing your identifiers</option>
+    <param name="filetype" type="select" label="Enter your list of Ensembl gene ID"> 
+      <option value="file_all">Input file containing your IDs</option>
       <option value="copy_paste">Copy/paste your list of IDs</option> 
     </param>
     <when value="copy_paste">
       <param name="genelist" type="text" label="Enter a list of identifiers"/>
     </when>
     <when value="file_all">
-      <param name="genelist" type="data" format="txt,tabular" label="Choose a multiple-columns file" help="This file must imperatively have 1 column filled with IDs consistent with the database that will be used. Please use the MappingIDs component if this is not the case."/>
-      <param name="column" type="text" label="Please specify the column where you would like to apply the comparison (e.g : Enter c1)" value="c1"/> 
+      <param name="genelist" type="data" format="txt,tabular" label="Choose your file" help="This file must imperatively have 1 column filled with Ensembl Gene IDs (ENSG). Please use the ID_Converter tool if this is not the case."/>
+      <param name="column" type="text" label="Please specify the column where are your Ensembl gene IDs (e.g : Enter c1 if ENSG ID are in column n°1)" value="c1"/> 
       <param name="header" type="select" label="Does your file have a header?" multiple="false" optional="false"> 
  		      <option value="TRUE" selected="true">Yes</option>
           <option value="FALSE" selected="false">No</option>
       </param>
     </when>
   </conditional>
-  <section name="options" title="RNAseq/Ab-based data expression options" expanded="True">   
-      <param name="hpaparams" type="select" label="Choose the expression from RNAseq/ab-based data you would like to add to your input" multiple="True" display="checkboxes"> 
+  <section name="options" title="RNAseq/Ab-based expression data" expanded="True">   
+      <param name="hpaparams" type="select" label="Choose the information from RNAseq/ab-based data you want to add to your list (see below for details)" multiple="True" display="checkboxes"> 
           <option value="Gene" selected="true">Gene name</option>
           <option value="Gene.description" selected="false">Gene description</option>
           <option value="Evidence">Evidence (at protein level, at transcript level or no evidence)</option>
           <option value="Antibody">Antibody reference</option>
           <option value="RNA.tissue.category">RNA tissue category</option>
- 		  <option value="Reliability..IH.">IH detection level</option>
-          <option value="Reliability..IF.">IF detection level</option>
+ 		  <option value="Reliability.IH">IH detection level</option>
+          <option value="Reliability.IF">IF detection level</option>
           <option value="Subcellular.location">Subcellular location</option>
           <option value="RNA.TS.TPM">RNA tissue specificity abundance in 'Transcript Per Million'</option>
           <option value="TPM.max.in.non.specific">RNA non-specific tissue abundance in 'Transcript Per Million'</option>
@@ -58,65 +58,76 @@
 
 
 <outputs>
-  <data name="output" format="tabular" label="abc"/>
+  <data name="output" format="tabular" label=""/>
 </outputs>
 
 <tests>
   <test>
     <conditional name="inputtype">
       <param name="filetype " value="file_all"/>
-      <param name="genelist" value="mitochondrion_enzymes_Nextprot.txt"/>
-      <param name="column" value="c1"/>
+      <param name="genelist" value="ID_Converter_Lacombe_et_al_2017_OK.txt"/>
+      <param name="column" value="c8"/>
       <param name="header" value="TRUE"/>
     </conditional>
     <section name="options">
-      <param name="hpaparams" value="Gene,Gene.description,Reliability..IH.,Subcellular.location,TPM.max.in.non.specific"/>
+      <param name="hpaparams" value="Gene,Gene.description,Evidence,Antibody,RNA.tissue.category,Reliability.IH,Reliability.IF,Subcellular.location,RNA.TS.TPM,TPM.max.in.non.specific"/>
     </section>
-    <output name="output" file="output_expression_rnaseq_abbased_data.tab"/>
+    <output name="output" file="Get_annotation_RNAseq.txt"/>
   </test>
 </tests>
 
 <help><![CDATA[
 
-This tool filters an input **tabular** file according to different databases.
+This tool adds expression information (RNAseq- or antibody-based experiments) from the Human Protein Atlas (HPA) database (https://www.proteinatlas.org/) to your protein list.
 
 **Input**
 
-Input can be a file containing multiple fields but with **at least one column of Ensembl gene IDs** or a list of Ensembl gene ids. If your input file contains other kind of IDs, please refer to the MappingIDs component to create a column of Ensembl gene IDs.  
+Input can be either a list of Ensembl gene ids (copy/paste) or a file containing multiple fields but with **at least one column of Ensembl gene IDs**. If your input file contains other type of IDs, please use the ID_Converter tool to create a column of Ensembl gene IDs.  
 
 **Databases**
 
-The input file will be filtered using information from different sources : 
+HPA source file:  http://www.proteinatlas.org/download/proteinatlas.tab.gz
 
-- HPA normal tissue : will filter the input according to the data contained in the Human Protein Atlas webservice. Pertinent information, such as tissular location, will be added for each gene to your input file.  
+**Annotation**
+
+- Gene name: according to the HGNC (Hugo Gene Nomenclature Committee) 
 
-- HPA cancer tissue :  will filter the input according to the data contained in the Human Protein Atlas webservice for cancer. Pertinent information, such as tumor type, will be added for each gene to your input file.  
+- Gene description: entry description (full text)  
 
-**Parameters**
+- Evidence: at protein level, at transcript level or no evidence
 
-For HPA normal tissue :
+- Antibody reference: reference of the HPA antibody used for immunohistochemistry and immunocytochemistry/IF
 
-- tissue category : categories based on RNA-Seq data to estimate the transcript abundance of each protein-coding gene in tissues. For more information, please refer to http://www.proteinatlas.org/about/assays+annotation#rna .
+- RNA tissue category: categories based on RNA-Seq data to estimate the transcript abundance of each protein-coding gene in tissues. For more information, please refer to http://www.proteinatlas.org/about/assays+annotation#rna .
 
-- level of detection IF : level of detection of the protein associated to the coding gene tissues based on immunofluorescency. For more information, please refer to http://www.proteinatlas.org/about/assays+annotation#if .
+- IH detection level: level of detection of the protein associated to the coding gene tissues based on immunofluorescency. For more information, please refer to http://www.proteinatlas.org/about/assays+annotation#if .
+
+- IF detection level:level of detection of the protein associated to the coding gene tissues based on immunohistochemistry. For more information, please refer to http://www.proteinatlas.org/about/assays+annotation#ih .
 
-- level of detection IH :  level of detection of the protein associated to the coding gene tissues based on immunohistochemistry. For more information, please refer to http://www.proteinatlas.org/about/assays+annotation#if .
+- Subcellular location:according to HPA data. For more information, please refer to https://www.proteinatlas.org/about/assays+annotation#ifa
 
-For HPA cancer tissue : 
+- RNA tissue specificity abundance in 'Transcript Per Million': For each gene is reported the tissue specificity abundance in 'Transcript Per Million' (TPM) as the sum of the TPM values of all its protein-coding transcripts.
 
-- tumors : which tumors are associated with your protein-coding genes according to the Human Protein Atlas.
-
+- RNA non-specific tissue abundance in 'Transcript Per Million': please refer to http://www.proteinatlas.org/about/assays+annotation#rna.
 
 **Outputs**
 
-The output will be a tabular file. The initial columns will be kept, but lines can be deleted due to the filtering process. Additional columns will be added according to which data you chose to filter your input with.  
+The output is a tabular file. The initial columns are kept and new columns are added according to what type of annotation data you chose.  
+
+-----
 
+.. class:: infomark
+
+**Authors**
 
-**Data sources**
+Lisa Peru, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+
+Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform
 
-The data for HPA normal tissue was retrieved from the Human Protein Atlas downloadable data repository (http://www.proteinatlas.org/download/proteinatlas.tab.gz).
+This work has been partially funded through the French National Agency for Research (ANR) IFB project.
 
-The data for HPA cancer was retrieved from the Human Protein Atlas downloadable data repository (http://www.proteinatlas.org/download/cancer.csv.zip).
+Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
+
 ]]></help>
 
 <citations>