diff freqSAP.xml @ 0:ddabfd6ee2a2 draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
author recetox
date Fri, 18 Jul 2025 13:21:36 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/freqSAP.xml	Fri Jul 18 13:21:36 2025 +0000
@@ -0,0 +1,138 @@
+<tool id="freqsap" name="freqSAP" version="1.0.0+galaxy0" profile="23.0" license="MIT">
+    <description>Get frequencies of single amino-acid polymorphisms based on nucleid-acid polymorphism for different populations from UniProt and DbSNP</description>
+
+    <creator>
+        <person
+            givenName="Helge"
+            familyName="Hecht"
+            url="https://github.com/hechth"
+            identifier="0000-0001-6744-996X" />
+        <organization
+            url="https://www.recetox.muni.cz/"
+            email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
+            name="RECETOX MUNI" />
+    </creator>
+
+    <edam_topics>
+        <edam_topic>topic_0121</edam_topic>
+        <edam_topic>topic_3366</edam_topic>
+    </edam_topics>
+
+    <edam_operations>
+        <edam_operation>operation_3197</edam_operation>
+        <edam_operation>operation_2479</edam_operation>
+        <edam_operation>operation_2422</edam_operation>
+    </edam_operations>
+
+    <requirements>
+        <requirement type="package" version="2.3.0">pandas</requirement>
+        <requirement type="package" version="2.32.4">requests</requirement>
+        <requirement type="package" version="3.1.5">openpyxl</requirement>
+    </requirements>
+
+    <required_files>
+        <include path="freqSAP.py"/>
+    </required_files>
+
+    <command detect_errors="exit_code"><![CDATA[
+    python3 '${__tool_directory__}/freqSAP.py' -a '$accession' -p '$populations' -f '$output_format' -o '$output_file'
+    ]]></command>
+    <inputs>
+        <param argument="--accession" type="text" label="Protein Accession in UniProt" help="UniProt accession of the protein to fetch variation data for.">
+            <sanitizer invalid_char="">
+                <valid initial="string.letters,string.digits">
+                    <add value="_" />
+                </valid>
+            </sanitizer>
+            <validator type="regex">[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}</validator>
+        </param>
+        <param argument="--populations" type="select" multiple="true" optional="false" label="Regions from which to choose the populations" help="The regions are broken down into sub-populations based on available entries in DbSNP.">
+            <option value="Africa">Africa</option>
+            <option value="North America">North America</option>
+            <option value="Asia">Asia</option>
+            <option value="Europe">Europe</option>
+            <option value="South America">South America</option>
+            <option value="Middle East">Middle East</option>
+            <option value="Other">Other</option>
+            <option value="Global" selected="true">Global</option>
+        </param>
+        <param argument="--output_format" type="select" label="Output Format" help="Format in which to store the output file.">
+            <option value="tabular" selected="true">Tab Separated Value (tabular)</option>
+            <option value="csv">Comma Separated Value (csv)</option>
+            <option value="xlsx">Excel (xlsx)</option>
+       </param>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="tabular" label="${tool.name} of ${accession}">
+            <change_format>
+                <when input="output_format" value="xlsx" format="xlsx"/>
+                <when input="output_format" value="csv" format="csv"/>
+            </change_format>
+        </data>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="accession" value="P0DJI9"/>
+            <param name="populations" value="Europe,Asia"/>
+            <output name="output_file" ftype="tabular">
+                <assert_contents>
+                    <has_n_columns n="21"/>
+                    <has_n_lines n="807" delta="20"/>
+                    <has_text text="rs1044068853"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="accession" value="P05067"/>
+            <param name="populations" value="Global"/>
+            <param name="output_format" value="xlsx"/>
+            <output name="output_file" ftype="xlsx">
+                <assert_contents>
+                    <has_size size="161" delta="10"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+freqSAP: Frequency of Single Amino-Acid Polymorphisms
+=====================================================
+
+This tool retrieves the frequencies of single amino-acid polymorphisms (SAPs) for a given protein accession from UniProt and DbSNP, broken down by population regions.
+
+Inputs
+------
+
+- **Protein Accession in UniProt:**  
+  Enter the UniProt accession (e.g., ``P12345``) for the protein of interest.
+
+- **Regions from which to choose the populations:**  
+  Select one or more population regions. The tool will fetch SAP frequencies for the selected regions.
+
+- **Output Format:**  
+  Choose the format for the output file:
+  
+  - Tab Separated Value (``tabular``)
+  - Comma Separated Value (``csv``)
+  - Excel (``xlsx``)
+
+Outputs
+-------
+
+- A table listing SAP frequencies for the selected protein and populations, in the chosen format.
+
+Example Usage
+-------------
+
+1. Enter a UniProt accession (e.g., ``P69905``).
+2. Select ``Europe`` and ``Asia`` as populations.
+3. Choose ``csv`` as the output format.
+4. Run the tool to download the SAP frequency table.
+    ]]></help>
+
+    <citations>
+        <citation type="doi">10.1093/nar/29.1.308</citation>
+        <citation type="doi">10.1093/nar/gkaf394</citation>
+    </citations>
+</tool>
\ No newline at end of file