view build_protein_interaction_maps.xml @ 3:30dda8d454ce draft

planemo upload commit b8671ffe2e12dc6612b971a3e6e1dc71496aefd0
author proteore
date Fri, 24 Jan 2020 05:21:52 -0500
parents 99f6a94c1ade
children 4db07f536a32
line wrap: on
line source

<tool id="build_protein_interaction_maps" name="Build Protein interaction network" version="2019.01.23">
<description>[BioGRID, BioPlex, HuMAP]</description>
    <requirements>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        python $__tool_directory__/build_protein_interaction_maps.py 
            #if $input.ids == "text"
                --input="$input.txt"
                --input_type="text"
            #else
                --input="$input.file"
                --ncol="$input.ncol"
                --header="$input.header"
                --input_type="file"
            #end if
            --database=$database.ref
            #if $database.ref=="biogrid" 
                --species='$database.ref_file.fields.species'
            #else 
                --species="Human"
            #end if

            #if 'PPI_dictionaries' in str($database.ref_file).split('/') : 
                --dict_path="$database.ref_file"
            #else
                --dict_path="$__tool_directory__/$database.ref_file"
            #end if

            #if $input.ids == "file"
                --network_output="$network_output"
                --nodes_output="$nodes_output"
            #else 
                --network_output="$network_output_from_list"
                --nodes_output="$nodes_output_from_list"
            #end if
            --id_type=$database.id_type

    ]]></command>
    <inputs>
        <conditional name="input">
            <param name="ids" type="select" label="Enter IDs" help="Copy/paste or from a file (e.g. table)">
                <option value="text">Copy/paste your identifiers</option>
                <option value="file" selected="true">Input file containing IDs</option>
            </param>
            <when value="text">
                <param name="txt" type="text" label="Copy/paste IDs" help='IDs must be separated by tab, space or carriage return into the form field, for example: P31946 P62258' >
                    <sanitizer invalid_char="">
                        <valid initial="string.printable">
                            <remove value="&apos;"/>
                        </valid>
                        <mapping initial="none">
                            <add source="&apos;" target="__sq__"/>
                            <add source="&#x20;" target=""/>
                            <add source="&#xA;" target=""/>
                            <add source="&#xD;" target=""/>
                            <add source="&#x9;" target=""/>
                        </mapping>
                    </sanitizer>
                </param>
            </when>
            <when value="file" >
                <param name="file" type="data" format="txt,tabular" label="Select your file" help="" />
                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
                <param name="ncol" type="text" value="c1" label="Column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on'>
                    <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
                </param>
            </when>
        </conditional>
        <conditional name="database">
            <param name="ref" type="select" label="Select database">
                <option value="biogrid">BioGRID</option>
                <option value="bioplex">Human Bioplex 2.0</option>
                <option value="humap">Human Protein Complex Map (Hu.MAP)</option>
            </param>
            <when value="biogrid">
                <param name="id_type" type="select" label="Type/source of IDs" optional="false" >
                    <option value="GeneID" >Entrez gene ID</option>
                </param>
                <param name="ref_file" type="select" label="Species">
                    <options from_data_table="proteore_biogrid_dictionaries">
                        <filter type="sort_by" column="1"/>
                    </options>
                </param>
            </when>
            <when value="bioplex">
                <param name="id_type" type="select" label="Type/source of IDs" optional="false">
                    <option value="UniProt-AC" selected="True" >UniProt accession number</option>
                    <option value="GeneID">Entrez gene ID</option>
                </param>
                <param name="ref_file" type="select" label="Species">
                    <options from_data_table="proteore_bioplex_dictionaries">
                        <filter type="sort_by" column="0"/>
                    </options>
                </param>
            </when>
            <when value="humap">
                <param name="id_type" type="select" label="Type/source of IDs" optional="false" >
                    <option value="GeneID" >Entrez gene ID</option>
                </param>
                <param name="ref_file" type="select" label="Species">
                    <options from_data_table="proteore_humap_dictionaries">
                        <filter type="sort_by" column="0"/>
                    </options>
                </param>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="network_output_from_list" format="tsv" label="Network_${database.ref}_from_ids_list">
            <filter>input['ids'] == "text" </filter>
        </data>
        <data name="network_output" format="tsv" label="Network_${database.ref}_from_${input.file.name}">
            <filter>input['ids'] == "file" </filter>
        </data>
        <data name="nodes_output_from_list" format="tsv" label="Nodes_${database.ref}_from_ids_list">
            <filter>input['ids'] == "text"</filter>
        </data>
        <data name="nodes_output" format="tsv" label="Nodes_${database.ref}_from_${input.file.name}">
            <filter>input['ids'] == "file"</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <conditional name="input">
                <param name="ids" value="file"/>
                <param name="file" value="Lacombe_geneID.tsv" />
                <param name="header" value="true" />
                <param name="ncol" value="c1"/>
            </conditional>
            <conditional name="database">
                <param name="ref" value="bioplex"/>
                <param name="id_type" value="UniProt-AC"/>
                <param name="ref_file" value="tool-data/Human_bioplex_2019-03-01"/>
            </conditional>
            <output name="network_output" value="Network_bioplex_from_Lacombe_geneID.tsv"/>
            <output name="nodes_output" value="Nodes_bioplex_from_Lacombe_geneID.tsv"/>
        </test>
        <test>
            <conditional name="input">
                <param name="ids" value="file"/>
                <param name="file" value="Lacombe_geneID.tsv" />
                <param name="header" value="true" />
                <param name="ncol" value="c4"/>
            </conditional>
            <conditional name="database">
                <param name="ref" value="humap"/>
                <param name="id_type" value="GeneID"/>
                <param name="ref_file" value="tool-data/Human_humap_2019-03-01"/>
            </conditional>
            <output name="network_output" value="Network_humap_from_Lacombe_geneID.tsv"/>
            <output name="nodes_output" value="Nodes_humap_from_Lacombe_geneID.tsv"/>
        </test>
        <test>
            <conditional name="input">
                <param name="ids" value="file"/>
                <param name="file" value="Lacombe_geneID.tsv" />
                <param name="header" value="true" />
                <param name="ncol" value="c4"/>
            </conditional>
            <conditional name="database">
                <param name="ref" value="biogrid"/>
                <param name="id_type" value="GeneID"/>
                <param name="ref_file" value="tool-data/Human_biogrid_2019-03-01"/>
            </conditional>
            <output name="network_output" value="Network_biogrid_from_Lacombe_geneID.tsv"/>
            <output name="nodes_output" value="Nodes_biogrid_from_Lacombe_geneID.tsv"/>
        </test>
        <test>
            <conditional name="input">
                <param name="ids" value="file"/>
                <param name="file" value="Wilson-foie-souris-up_geneID.tsv" />
                <param name="header" value="true" />
                <param name="ncol" value="c2"/>
            </conditional>
            <conditional name="database">
                <param name="ref" value="biogrid"/>
                <param name="id_type" value="GeneID"/>
                <param name="ref_file" value="tool-data/Mouse_biogrid_2019-03-01"/>
            </conditional>
            <output name="network_output" value="Network_biogrid_from_Wilson-foie-souris_geneID.tsv"/>
            <output name="nodes_output" value="Nodes_biogrid_from_Wilson-foie-souris_geneID.tsv"/>
        </test>
        <test>
            <conditional name="input">
                <param name="ids" value="file"/>
                <param name="file" value="Rattus_Hameza_dataset_geneID.tsv" />
                <param name="header" value="true" />
                <param name="ncol" value="c4"/>
            </conditional>
            <conditional name="database">
                <param name="ref" value="biogrid"/>
                <param name="id_type" value="GeneID"/>
                <param name="ref_file" value="tool-data/Rat_biogrid_2019-03-01"/>
            </conditional>
            <output name="network_output" value="Network_biogrid_from_Rattus_Hameza_dataset_geneID.tsv"/>
            <output name="nodes_output" value="Nodes_biogrid_from_Rattus_Hameza_dataset_geneID.tsv"/>
        </test>
    </tests>
    <help><![CDATA[
**Description**

As elementary constituents of cellular protein complexes and pathways, protein–protein interactions (PPIs) are key determinants of protein function. This tool allows building interaction maps by mapping your list of protein or gene identifiers on different public resources; indeed, according to your need and the species of interest, different PPIs resources are available (for details see “Parameters” section). The two result files generated (network and nodes attributes) can be used for viewing and further exploration of the resulting protein interactions network by directly importing those in dedicated software (e.g. Cytoscape).   

-----

**Input**

"Enter IDs": A list of IDs must be entered either via a copy/paste or by choosing a file. The type of identifiers allowed depends on the public resource you select (see below).  

.. class:: warningmark
	
In copy/paste mode, the number of IDs considered in input is limited to 5000.

-----

**Parameters**

"Select database": three databases are currently proposed according to your need and listed below

1. **BioGRID** is an interaction repository with data compiled through comprehensive curation efforts. Homo sapiens, Mus musculus and Rattus norvegicus species are currently available(for more details, https://thebiogrid.org/). 
2. **Bioplex** (biophysical interactions of ORFeome-based complexes) network is the result of creating thousands of cell lines with each expressing a tagged version of a protein from the ORFeome collection. Immunopurification of the tagged protein and detection of associated proteins by mass spectrometry are the building blocks of the network (for more details, http://bioplex.hms.harvard.edu/)
3. **HuMAP** (Human Protein Complex Map) is one of the most comprehensive view of human protein complexes; built by integrating large scale affinity purification mass spectrometry (AP/MS) datasets with dataset of large scale biochemical fractionations (for more details, http://proteincomplexes.org/about). We recommend to select this resource for exploring human protein complexes.   

"Type/source of IDs": correspond to the type of your identifiers you have. Note that only Entrez gene Id and Uniprot Accession number are allowed. If you don't have this type please use the "ID-converter" tool from ProteoRE.

"Species": must be specified if using Biogrid as PPI database (i.e. Homo sapiens, Mus musculus and Rattus norvegicus). If Bioplex or HuMAP are selected, then species is automatically set to Human (Homo sapiens) displaying the release date.

-----

**Output:**

Two output files are created with the follwing prefix "Network_PPIdatabaseName\_" and a "Nodes_PPIdatabaseName\_" (where "PPIdatabaseName" correspond to the PPI database selected). 
The "Network" file contains information related to each interaction between two proteins (one row per binary interaction) while the "Nodes" file contains attributes (i.e. annotation, information) related to each gene/protein.
Below is shown a brief example of each output file when BioGRID is selected. Note that a "NA" is added when there is no available information.

"Network" output file (example): 

.. csv-table:: Network file (if BioGRID database selected - simulated data)
   :header: "Entrez_Gene_Interactor_A","Entrez Gene Interactor B","Gene symbol Interactor A","Gene symbol Interactor B","Experimental System","Experimental Type","Pubmed ID","Interaction Score","Phenotypes"

    1,368,"A1BG","ABCC6","Two-hybrid","physical",21988832,"NA","Growth abnormality"
    1,10549,"A1BG","PRDX4","Negative Genetic","genetic",21988832,"NA","NA"
    1,9923,"A1BG","ZBTB40","Affinity Capture-MS","physical",28514442,0.99977983,"NA"  


**"Interaction Score"**: a positive for negative value recorded by the original publication depicting P-Values, Confidence Score, SGA Score, etc. Will be “NA” if no score is reported.

"Nodes" output file (example):

.. csv-table:: Nodes file (if BioGRID database selected - simulated data)
   :header: "Entrez gene ID","Official Symbol Interactor","Present in user input ids","ID present in Biogrid Human","Pathway"
   
	"1","A1BG","True","True","Platelet degranulation ;Neutrophil degranulation"
	"10","NAT2","False","True","Acetylation"
	"12","SERPINA3","True","False","NA"



- **"Present in user input ids"**: a boolean value (i.e. True/False) telling you whether the ID was present in your initial list ("True") or not ("False"); a "False" indicates that this ID is not present in your list but has been reported in BioGRID.    
- **"ID present in Biogrid Human"**: a boolean value (i.e. True/False) telling you whether the ID was present in the BioGRID release you selected ("True") or not ("False"); a "False" value indicates that one ID from your list is not reported in the BioGRID release used.  

These 2 files can be directly imported into a visualization software (such as Cytoscape - https://cytoscape.org/download.html) for further exploration and analysis of the newly created biological network.

-----

.. class:: infomark

**Data source (release date)**

This tool uses the following public ressources:

- **BioGRID**:

The release 3.5.167 was compiled on Nov. 25rd, 2018 and contains all curated interaction data processed prior to this date. Source files are: 

BIOGRID-ORGANISM-Homo_sapiens-3.5.167.tab2.txt

BIOGRID-ORGANISM-Mus_musculus-3.5.167.tab2.txt

BIOGRID-ORGANISM-Rattus_norvegicus-3.5.167.tab2.txt

- **Bioplex**: Version 2.0 

BioPlex_interactionList_v4a.tsv: http://bioplex.hms.harvard.edu/data/BioPlex_interactionList_v4a.tsv

- **Human protein complex Map (Hu.map)**:

nodeTable.txt: http://proteincomplexes.org/static/downloads/nodeTable.txt

pairsWprob: http://proteincomplexes.org/static/downloads/pairsWprob.txt

Mapping files linking the source database identifier (Entrez gene ID and Uniprot Accession Number) to the Reactome pathways are based on 2018-12-07 version, NCBI2Reactome.txt and UniProt2Reactome.txt (from https://www.reactome.org/download-data).

-----

.. class:: infomark

**Galaxy integration**

David Christiany, Lisa Perus, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR

Sandra Dérozier, Olivier Rué, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR

This work has been partially funded through the French National Agency for Research (ANR) IFB project.

Help: contact@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
    ]]></help>
    <citations>
    </citations>
 </tool>