Mercurial > repos > iuc > data_manager_clair3_models

diff data_manager/install_clair3_models.xml @ 0:11e42265a9b0 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
author: iuc
date: Thu, 20 Feb 2025 17:57:11 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/install_clair3_models.xml	Thu Feb 20 17:57:11 2025 +0000
@@ -0,0 +1,102 @@
+<tool id="data_manager_clair3_models" name="Clair3 model downloader" version="0.0.1" tool_type="manage_data" profile="23.2">
+    <requirements>
+        <requirement type="package" version="3.12">python</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+            ## this code looks up the existing table and uses it to build a list of known models
+            ## because models are uniquely identified by their name, downloading duplicate models
+            ## would be an error, so it is useful to know which models we already have
+            ##
+            ## $__app__.tool_data_tables is a dictionary where the keys are data table names and the values 
+            ## are TabularToolDataTable objects (from lib/galaxy/tools/data/__init__.py)
+            ##
+            ## the get_fields() method on the TabularToolDataTable returns a list of lists, with one list
+            ## per line of the tool data table, so row[0] is the first field (i.e. the value column)
+            ##
+
+            #set $data_table = $__app__.tool_data_tables.get("clair3_models")
+            #if $data_table is not None and len($data_table.get_fields()) > 0:
+                #set $known_models = ','.join([ row[0] for row in $data_table.get_fields() ])
+                #set $sha256_sums = ','.join([ row[1] for row in $data_table.get_fields() ])
+            #else
+                #set $known_models = None
+                #set $sha256_sums = None
+            #end if
+
+        python '$__tool_directory__/model_fetcher.py'
+            '${output_file}'
+            #if $known_models is not None
+                --known_models '$known_models'
+                --sha256_sums '$sha256_sums'
+            #end if
+            #if $model_selection.source == 'latest'
+                --download_latest
+            #elif $model_selection.source == 'chosen'
+                --download_models '$model_selection.model_list'
+            #end if
+    ]]></command>
+    <inputs>
+        <conditional name="model_selection">
+            <param name="source" label="Select the source of the list of models to download" type="select">
+                <option value="latest">Latest models from Rerio page</option>
+                <option value="chosen">User provided list of models</option>
+            </param>
+            <when value="latest">
+            </when>
+            <when value="chosen">
+                <param name="model_list" type="text" label="List of models to download" help="A comma separated list of model to download, e.g. 'r1041_e82_400bps_sup_v430,r1041_e82_400bps_hac_v430'">
+                    <validator type="regex" message="Invalid model list. Format is a comma separated list of model names (e.g. 'r1041_e82_400bps_sup_v430,r1041_e82_400bps_hac_v430')">^[a-z_0-9,]+$</validator>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="data_manager_json" label="Data Manager Output (JSON)" />
+    </outputs>
+    <tests>
+        <test> <!-- test1 -->
+            <conditional name="model_selection">
+                <param name="source" value="chosen"/>
+                <param name="model_list" value="r1041_e82_400bps_sup_v500,r1041_e82_400bps_hac_v500" />
+            </conditional>
+            <output name="output_file">
+                <assert_contents>
+                    <!-- the text 'r1041_e82_400bps_sup_v500' is only there if the test is run for the first time (i.e. empty test-data/clair3_models.loc) so need to look for something else -->
+                    <has_text text='clair3_models' />
+                </assert_contents>
+            </output>
+        </test>
+        <test> <!-- test2 -->
+            <conditional name="model_selection">
+                <param name="source" value="latest"/>
+            </conditional>
+            <output name="output_file">
+                <assert_contents>
+                    <!-- because we don't know what the names of the latest models are we can only test to see if the data table output is created -->
+                    <has_text text='data_tables' />  
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+    Clair3_ is a variant caller for long read data developed at the University of Hong Kong. This tool makes use of models trained to match particular
+    sequencing technologies and basecallers. Oxford Nanopore provides a set of models for Clair3 on their Rerio_ page. These tools are designed for
+    "research release" under the terms of the "Oxford Nanopore Technologies, Ltd. Public License Version 1.0" license_. This data manager allows
+    downloading model files from the Rerio page and installing them on a Galaxy server.
+
+    .. _Clair3: https://github.com/HKU-BAL/Clair3
+    .. _Rerio: https://github.com/nanoporetech/rerio
+    .. _license: https://github.com/nanoporetech/rerio/blob/master/LICENCE.txt
+    ]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1101/2021.12.29.474431v2</citation>
+        <citation type="bibtex"><![CDATA[@misc{ONT2024,
+            title        = {Rerio},
+            author       = {Oxford Nanopore Technologies},
+            year         = 2024,
+            howpublished = {\url{https://github.com/nanoporetech/rerio}},
+            commit       = {c0c8ce6}
+    }]]></citation>
+    </citations>
+</tool>