view freqSAP.xml @ 0:ddabfd6ee2a2 draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
author recetox
date Fri, 18 Jul 2025 13:21:36 +0000
parents
children
line wrap: on
line source

<tool id="freqsap" name="freqSAP" version="1.0.0+galaxy0" profile="23.0" license="MIT">
    <description>Get frequencies of single amino-acid polymorphisms based on nucleid-acid polymorphism for different populations from UniProt and DbSNP</description>

    <creator>
        <person
            givenName="Helge"
            familyName="Hecht"
            url="https://github.com/hechth"
            identifier="0000-0001-6744-996X" />
        <organization
            url="https://www.recetox.muni.cz/"
            email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
            name="RECETOX MUNI" />
    </creator>

    <edam_topics>
        <edam_topic>topic_0121</edam_topic>
        <edam_topic>topic_3366</edam_topic>
    </edam_topics>

    <edam_operations>
        <edam_operation>operation_3197</edam_operation>
        <edam_operation>operation_2479</edam_operation>
        <edam_operation>operation_2422</edam_operation>
    </edam_operations>

    <requirements>
        <requirement type="package" version="2.3.0">pandas</requirement>
        <requirement type="package" version="2.32.4">requests</requirement>
        <requirement type="package" version="3.1.5">openpyxl</requirement>
    </requirements>

    <required_files>
        <include path="freqSAP.py"/>
    </required_files>

    <command detect_errors="exit_code"><![CDATA[
    python3 '${__tool_directory__}/freqSAP.py' -a '$accession' -p '$populations' -f '$output_format' -o '$output_file'
    ]]></command>
    <inputs>
        <param argument="--accession" type="text" label="Protein Accession in UniProt" help="UniProt accession of the protein to fetch variation data for.">
            <sanitizer invalid_char="">
                <valid initial="string.letters,string.digits">
                    <add value="_" />
                </valid>
            </sanitizer>
            <validator type="regex">[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}</validator>
        </param>
        <param argument="--populations" type="select" multiple="true" optional="false" label="Regions from which to choose the populations" help="The regions are broken down into sub-populations based on available entries in DbSNP.">
            <option value="Africa">Africa</option>
            <option value="North America">North America</option>
            <option value="Asia">Asia</option>
            <option value="Europe">Europe</option>
            <option value="South America">South America</option>
            <option value="Middle East">Middle East</option>
            <option value="Other">Other</option>
            <option value="Global" selected="true">Global</option>
        </param>
        <param argument="--output_format" type="select" label="Output Format" help="Format in which to store the output file.">
            <option value="tabular" selected="true">Tab Separated Value (tabular)</option>
            <option value="csv">Comma Separated Value (csv)</option>
            <option value="xlsx">Excel (xlsx)</option>
       </param>
    </inputs>
    <outputs>
        <data name="output_file" format="tabular" label="${tool.name} of ${accession}">
            <change_format>
                <when input="output_format" value="xlsx" format="xlsx"/>
                <when input="output_format" value="csv" format="csv"/>
            </change_format>
        </data>
    </outputs>

    <tests>
        <test>
            <param name="accession" value="P0DJI9"/>
            <param name="populations" value="Europe,Asia"/>
            <output name="output_file" ftype="tabular">
                <assert_contents>
                    <has_n_columns n="21"/>
                    <has_n_lines n="807" delta="20"/>
                    <has_text text="rs1044068853"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="accession" value="P05067"/>
            <param name="populations" value="Global"/>
            <param name="output_format" value="xlsx"/>
            <output name="output_file" ftype="xlsx">
                <assert_contents>
                    <has_size size="161" delta="10"/>
                </assert_contents>
            </output>
        </test>
    </tests>

    <help><![CDATA[
freqSAP: Frequency of Single Amino-Acid Polymorphisms
=====================================================

This tool retrieves the frequencies of single amino-acid polymorphisms (SAPs) for a given protein accession from UniProt and DbSNP, broken down by population regions.

Inputs
------

- **Protein Accession in UniProt:**  
  Enter the UniProt accession (e.g., ``P12345``) for the protein of interest.

- **Regions from which to choose the populations:**  
  Select one or more population regions. The tool will fetch SAP frequencies for the selected regions.

- **Output Format:**  
  Choose the format for the output file:
  
  - Tab Separated Value (``tabular``)
  - Comma Separated Value (``csv``)
  - Excel (``xlsx``)

Outputs
-------

- A table listing SAP frequencies for the selected protein and populations, in the chosen format.

Example Usage
-------------

1. Enter a UniProt accession (e.g., ``P69905``).
2. Select ``Europe`` and ``Asia`` as populations.
3. Choose ``csv`` as the output format.
4. Run the tool to download the SAP frequency table.
    ]]></help>

    <citations>
        <citation type="doi">10.1093/nar/29.1.308</citation>
        <citation type="doi">10.1093/nar/gkaf394</citation>
    </citations>
</tool>