view add_protein_features.xml @ 18:0a9ae3d7dbf2 draft default tip

"planemo upload commit 7afd4b3ee25f024257ccbac6e51076d25b2a04e7"
author proteore
date Thu, 20 Aug 2020 03:09:52 -0400
parents 2952bae8a1ea
children
line wrap: on
line source

<tool id="prot_features" name="Add protein features" version="2020.08.17">
<description>[neXtProt, Human]
</description>
<requirements>
  <requirement type="package" version="3.4.1">R</requirement>
</requirements>
<stdio>
  <exit_code range="1:" />
</stdio>
<command><![CDATA[

  Rscript $__tool_directory__/add_protein_features.R 
  --inputtype="$inputtype.filetype"
  --input='$inputtype.genelist'

  #if $inputtype.filetype == "file" 
    --column='$inputtype.column' 
    --header=$inputtype.header
  #end if

  --type='$idtype'
  --pc_features='$pc_features' 
  --output='$output'  

  #if 'proteore_nextprot_ref' in str($ref_file).split("/")
    --nextprot="$ref_file" 
  #else 
    --nextprot="$__tool_directory__/$ref_file"
  #end if 
    
]]></command>

<inputs>
  <conditional name="inputtype">
    <param name="filetype" type="select" label="Enter your IDs (neXtProt or UniProt; e.g. NX_P05090 or P05090)" help="Copy/paste or from a file" > 
      <option value="file" selected="true">Input file containing your IDs </option>
      <option value="copy_paste">Copy/paste your list of IDs</option> 
    </param>
    <when value="copy_paste">
      <param name="genelist" type="text" label="Enter a list of IDs separated by tab, space or carriage return into the form field" help="for example : A0AVI2 A6NGB0">
        <sanitizer invalid_char="">
            <valid initial="string.printable">
                <remove value="&apos;"/>
            </valid>
            <mapping initial="none">
                <add source="&apos;" target="__sq__"/>
                <add source="&#x20;" target=""/>
                <add source="&#xA;" target=""/>
                <add source="&#xD;" target=""/>
                <add source="&#x9;" target=""/>
            </mapping>
        </sanitizer>
      </param>
    </when>
    <when value="file">
      <param name="genelist" type="data" format="txt,tabular" label="Select your file" help=""/>
      <param name="column" type="text" label="Column IDs (e.g : Enter c1 for column n°1)" value="c1">
        <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
      </param>
      <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does input file have header?" />
    </when>
  </conditional>

    <param name="idtype" type="select" label="Type of IDs" multiple="false" optional="false"> 
        <option value="NextprotID" selected="true">neXtProt IDs</option>
        <option value="Uniprot_AC" selected="false">UniProt accession number</option>
    </param>
    <param name="pc_features" type="select" label="Features/Annotation" multiple="true" help="" display="checkboxes" optional="false">
        <option value="ProteinName" selected="false">Protein name</option>
	    <option value="SeqLength" selected="false">Sequence Length</option>
        <option value="MW" selected="false">Molecular Weight</option>
        <option value="IsoPoint" selected="false">Isoelectric point</option>
        <option value="TMDomains" selected="false">Number of transmembrane domains</option>
       	<option value="SubcellLocations" selected="false">Subcellular Location</option>
        <option value="Diseases" selected="false">Disease information</option>
        <option value="Function" selected="false">Protein function</option>
        <option value="PostTranslationalModifications" selected="false">Post translational modifications</option>
        <option value="ProteinFamily" selected="false">Protein family</option>
        <option value="Pathway" selected="false">Pathway(s)</option>
	    <option value="ProteinExistence" selected="false">Protein Existence (evidence score from 1 to 5)</option>
        <option value="Chr" selected="false">Chromosome</option>
    </param>

    <param name="ref_file" type="select" label="Release of neXtProt reference file to use">
        <options from_data_table="proteore_nextprot_ref">
            <filter type="sort_by" column="0"/>
        </options>
    </param>

</inputs>


<outputs>
  <data name="output" format="tsv" label="Add_information_from_neXtProt on ${inputtype.genelist.name}">
    <filter>inputtype=="file"</filter>
  </data>
  <data name="output" format="tsv" label="Add_information_from_neXtProt"/>
</outputs>

<tests>
  <test>
    <conditional name="inputtype">
      <param name="filetype" value="file"/>
      <param name="genelist" value="Lacombe_et_al_2017.tsv"/>
      <param name="column" value="c1"/>
      <param name="header" value="true"/>
    </conditional>

    <param name="idtype" value="Uniprot_AC"/> 
    <param name="pc_features" value="SeqLength,MW,IsoPoint,TMDomains,ProteinExistence"/> 
      
    <param name="ref_file" value="test-data/cached_locally/nextprot_ref_07-05-2019.tsv"/>
    <output name="output" file="Add_information_from_neXtProt.tsv"/>
  </test>
</tests>

<help><![CDATA[

**Description**

This tool retrieves annotation (protein features) from the neXtProt database (knowledgebase on human proteins) to enrich and better inform your protein IDs list.

-----

**Input**

A list of of Uniprot Accession Number (e.g. P05090) or neXtProt IDs (e.g. NX_P05090) entered in a copy/paste mode or a file containing one or multiple columns with **at least one column of Uniprot accession number or neXtProt IDs**. If your input file contains other type of IDs, please use the ID_Converter tool.  

.. class:: warningmark
	
In copy/paste mode, the number of IDs considered in input is limited to 5000.

-----

**Parameters**

"Select features": select each feature of your interest by clicking the corresponding checkbox. Disease information is set to "Yes" by default.  

-----

**Output**

Output is a tabular file containing both original columns and new columns including the annotation requested. Only features/annotations assigned with a "GOLD" quality criterion are considered, otherwise a "NA" value is returned.

.. class:: warningmark

"Protein name","Protein function","Post translational modifications","Protein family","Pathway(s)" options will not work will releases older than 08/2020.

-----

**Data source (release date)**

Annotations have been retrieved using a REST API (https://academic.oup.com/nar/article/43/D1/D764/2439066#40348985) (Gaudet et  al., 2017)

 neXtProt releases : 
 
- nextProt release 31-07-2020 (release Feb, 2020)
- nextProt release 07-05-2019 (release Feb, 2019)
- neXtProt release 08-10-2018 (release Feb, 2018)
  
.. class:: warningmark

For "Protein function", "Post translational modifications" and "Pathway(s)", only results with gold quality are returned.

-----

.. class:: infomark

**Authors**

David Christiany, Lisa Perus, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR

Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR

This work has been partially funded through the French National Agency for Research (ANR) IFB project.

Help: contact@proteore.org for any questions or concerns about this tool.
 
    ]]></help>
    <citations>
    </citations>

</tool>