changeset 5:6ab91825be22 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_busco/ commit d46d2ec37b9b8f08d58472978c51b4c46e7ed18c
author iuc
date Fri, 04 Apr 2025 10:12:43 +0000
parents 828d961faea7
children 709faa13c793
files data_manager/busco_fetcher.xml data_manager/busco_options.xml data_manager/extract.py data_manager_conf.xml test-data/busco_database.loc test-data/busco_database_options.loc tool-data/busco_database.loc.sample tool-data/busco_database_options.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 10 files changed, 176 insertions(+), 92 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/busco_fetcher.xml	Fri Apr 04 10:12:36 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,74 +0,0 @@
-<tool id="busco_fetcher" name="Busco" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" tool_type="manage_data" profile="20.01">
-    <description>dataset dowloader</description>
-        <macros>
-        <token name="@TOOL_VERSION@">5.8.0</token>
-        <token name="@VERSION_SUFFIX@">1</token>
-    </macros>
-    <requirements>
-        <requirement type="package" version="@TOOL_VERSION@">busco</requirement>
-    </requirements>
-    <command detect_errors="exit_code"><![CDATA[
-        busco --download_path '$out_file.extra_files_path' --download '$lineage' &&
-        ## remove unwanted lineage folders and placement files
-        ## https://gitlab.com/ezlab/busco/-/issues/784
-        if [ -d '$out_file.extra_files_path'/lineages ]; then
-            find '$out_file.extra_files_path'/lineages/ -mindepth 1 -maxdepth 1 ! -name '*_$datasets_version*' -exec rm -rf {} \;;
-        fi &&
-        if [ -d '$out_file.extra_files_path'/placement_files ]; then
-            find '$out_file.extra_files_path'/placement_files/ -mindepth 1 -maxdepth 1 ! -name '*_$datasets_version*' -delete;
-        fi &&
-        ## unpack faa.gz files ready to use
-        ## https://gitlab.com/ezlab/busco/-/issues/789
-        if [ -d '$out_file.extra_files_path'/lineages ]; then
-            find '$out_file.extra_files_path'/lineages/ -name "*.faa.gz" -exec gunzip {} \;;
-        fi
-        && cp '$dmjson' '$out_file' 
-    ]]></command>
-    <configfiles>
-        <configfile name="dmjson"><![CDATA[
-#from datetime import datetime
-    {
-      "data_tables":{
-        "busco_database":[
-          {
-            "value":"$lineage $datasets_version #echo datetime.now().strftime('%Y-%m-%d-%H%M%S')#",
-            "name":"$lineage $datasets_version #echo datetime.now().strftime('%Y-%m-%d-%H%M%S')#",
-            "version":"@TOOL_VERSION@",
-            "path":"$out_file.extra_files_path"
-          }
-        ]
-      }
-    }]]>
-            </configfile>
-        </configfiles>
-    <inputs>
-        <param name="lineage" argument="--download" type="select" label="Select the lineage to be downloaded">
-            <option value="all">All</option>
-            <option value="prokaryota">Prokaryota</option>
-            <option value="eukaryota">Eukaryota</option>
-            <option value="virus">Virus</option>
-        </param>
-        <param argument="--datasets_version" type="select" label="OrthoDB version" help="By March 2025 OrthoDB 12 datasets were only available for Prokaryota (https://busco-data2.ezlab.org/v5/data/)">
-            <option value="odb10" selected="true">OrthoDB 10</option>
-            <option value="odb12">OrthoDB 12</option>
-        </param>
-    </inputs>
-    <outputs>
-        <data name="out_file" format="data_manager_json" label="BUSCO data manager: JSON"/>
-    </outputs>
-    <tests>
-        <test expect_num_outputs="1">
-            <param name="lineage" value="virus"/>
-            <output name="out_file">
-                <assert_contents>
-                    <has_text text="virus"/>
-                    <has_text text="5.8.0"/>
-                </assert_contents>
-            </output>
-        </test>
-    </tests>
-    <help>This tool downloads the BUSCO databases.</help>
-    <citations>
-        <citation type="doi">10.1093/bioinformatics/btv351</citation>
-    </citations>
-</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/busco_options.xml	Fri Apr 04 10:12:43 2025 +0000
@@ -0,0 +1,47 @@
+<tool id="busco_fetcher_options" name="Busco options" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" tool_type="manage_data" profile="20.01">
+    <description></description>
+        <macros>
+        <token name="@TOOL_VERSION@">5.8.0</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">busco</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        #if $test:
+            busco --download_path test/ --download virus > /dev/null &&
+            #set db_path="./test"
+        #else
+            #set db_path=$cached_db.fields.path
+        #end if
+        python '$__tool_directory__/extract.py' '$db_path' '$cached_db' '$out_file'
+    ]]></command>
+    <inputs>
+        <param name="test" type="hidden"/>
+        <param name="cached_db" label="Cached database with lineage" type="select">
+            <options from_data_table="busco_database">
+                <validator message="No BUSCO database is available" type="no_options"/>
+            </options>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json" label="BUSCO options data manager: JSON"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="test" value="true"/>
+            <param name="cached_db" value="test"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="busco_database_options"/>
+                    <has_text text="Chordopoxvirinae"/>
+                    <has_text text="chordopoxvirinae_"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>This tool determines the possible choices of BUSCO lineages in a given BUSCO DB.</help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btv351</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/extract.py	Fri Apr 04 10:12:43 2025 +0000
@@ -0,0 +1,23 @@
+import json
+import os
+import re
+import sys
+
+busco_db = os.path.join(sys.argv[1], "lineages")
+busco_db_value = sys.argv[2]
+dmjson = sys.argv[3]
+
+content = []
+for d in os.scandir(busco_db):
+    if not d.is_dir():
+        continue
+    if not os.path.exists(os.path.join(d, "dataset.cfg")):
+        continue
+    name = re.sub(r"_odb\d+", "", d.name)
+    name = name.replace("_", " ").capitalize()
+    content.append({'value': d.name, 'name': name, 'db_value': busco_db_value})
+
+with open(dmjson, "w") as fh:
+    json.dump({"data_tables": {"busco_database_options": content}}, fh)
+
+print(f'{len(content)} x busco_db\n')
--- a/data_manager_conf.xml	Fri Apr 04 10:12:36 2025 +0000
+++ b/data_manager_conf.xml	Fri Apr 04 10:12:43 2025 +0000
@@ -1,18 +1,12 @@
 <data_managers>
-    <data_manager tool_file="data_manager/busco_fetcher.xml" id="busco_fetcher" version="5.4.6+galaxy0">
-        <data_table name="busco_database">
+    <data_manager tool_file="data_manager/busco_options.xml" id="busco_fetcher_options" version="5.4.6+galaxy0">
+        <data_table name="busco_database_options">
             <output>
                 <column name="value" />
                 <column name="name" />
-                <column name="version"/>
-                <column name="path" output_ref="out_file">
-                    <move type="directory" relativize_symlinks="True">
-                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">busco/${value}</target>
-                    </move>
-                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/busco/${value}</value_translation>
-                    <value_translation type="function">abspath</value_translation>
-                </column>
+                <column name="db_value"/>
             </output>
         </data_table>
+
     </data_manager>
 </data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/busco_database.loc	Fri Apr 04 10:12:43 2025 +0000
@@ -0,0 +1,1 @@
+test	test name	test	test
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/busco_database_options.loc	Fri Apr 04 10:12:43 2025 +0000
@@ -0,0 +1,81 @@
+iridoviridae_odb10	Iridoviridae	test
+betabaculovirus_odb10	Betabaculovirus	test
+betaherpesvirinae_odb10	Betaherpesvirinae	test
+skunavirus_odb10	Skunavirus	test
+chordopoxvirinae_odb10	Chordopoxvirinae	test
+alphabaculovirus_odb10	Alphabaculovirus	test
+rudiviridae_odb10	Rudiviridae	test
+tevenvirinae_odb10	Tevenvirinae	test
+tunavirinae_odb10	Tunavirinae	test
+varicellovirus_odb10	Varicellovirus	test
+pahexavirus_odb10	Pahexavirus	test
+guernseyvirinae_odb10	Guernseyvirinae	test
+baculoviridae_odb10	Baculoviridae	test
+alphaherpesvirinae_odb10	Alphaherpesvirinae	test
+spounavirinae_odb10	Spounavirinae	test
+peduovirus_odb10	Peduovirus	test
+enquatrovirus_odb10	Enquatrovirus	test
+teseptimavirus_odb10	Teseptimavirus	test
+gammaherpesvirinae_odb10	Gammaherpesvirinae	test
+herpesviridae_odb10	Herpesviridae	test
+cheoctovirus_odb10	Cheoctovirus	test
+poxviridae_odb10	Poxviridae	test
+fromanvirus_odb10	Fromanvirus	test
+bclasvirinae_odb10	Bclasvirinae	test
+simplexvirus_odb10	Simplexvirus	test
+tequatrovirus_odb10	Tequatrovirus	test
+aviadenovirus_odb10	Aviadenovirus	test
+iridoviridae_odb10	Iridoviridae	test
+betabaculovirus_odb10	Betabaculovirus	test
+betaherpesvirinae_odb10	Betaherpesvirinae	test
+skunavirus_odb10	Skunavirus	test
+chordopoxvirinae_odb10	Chordopoxvirinae	test
+alphabaculovirus_odb10	Alphabaculovirus	test
+rudiviridae_odb10	Rudiviridae	test
+tevenvirinae_odb10	Tevenvirinae	test
+tunavirinae_odb10	Tunavirinae	test
+varicellovirus_odb10	Varicellovirus	test
+pahexavirus_odb10	Pahexavirus	test
+guernseyvirinae_odb10	Guernseyvirinae	test
+baculoviridae_odb10	Baculoviridae	test
+alphaherpesvirinae_odb10	Alphaherpesvirinae	test
+spounavirinae_odb10	Spounavirinae	test
+peduovirus_odb10	Peduovirus	test
+enquatrovirus_odb10	Enquatrovirus	test
+teseptimavirus_odb10	Teseptimavirus	test
+gammaherpesvirinae_odb10	Gammaherpesvirinae	test
+herpesviridae_odb10	Herpesviridae	test
+cheoctovirus_odb10	Cheoctovirus	test
+poxviridae_odb10	Poxviridae	test
+fromanvirus_odb10	Fromanvirus	test
+bclasvirinae_odb10	Bclasvirinae	test
+simplexvirus_odb10	Simplexvirus	test
+tequatrovirus_odb10	Tequatrovirus	test
+aviadenovirus_odb10	Aviadenovirus	test
+iridoviridae_odb10	Iridoviridae	test
+betabaculovirus_odb10	Betabaculovirus	test
+betaherpesvirinae_odb10	Betaherpesvirinae	test
+skunavirus_odb10	Skunavirus	test
+chordopoxvirinae_odb10	Chordopoxvirinae	test
+alphabaculovirus_odb10	Alphabaculovirus	test
+rudiviridae_odb10	Rudiviridae	test
+tevenvirinae_odb10	Tevenvirinae	test
+tunavirinae_odb10	Tunavirinae	test
+varicellovirus_odb10	Varicellovirus	test
+pahexavirus_odb10	Pahexavirus	test
+guernseyvirinae_odb10	Guernseyvirinae	test
+baculoviridae_odb10	Baculoviridae	test
+alphaherpesvirinae_odb10	Alphaherpesvirinae	test
+spounavirinae_odb10	Spounavirinae	test
+peduovirus_odb10	Peduovirus	test
+enquatrovirus_odb10	Enquatrovirus	test
+teseptimavirus_odb10	Teseptimavirus	test
+gammaherpesvirinae_odb10	Gammaherpesvirinae	test
+herpesviridae_odb10	Herpesviridae	test
+cheoctovirus_odb10	Cheoctovirus	test
+poxviridae_odb10	Poxviridae	test
+fromanvirus_odb10	Fromanvirus	test
+bclasvirinae_odb10	Bclasvirinae	test
+simplexvirus_odb10	Simplexvirus	test
+tequatrovirus_odb10	Tequatrovirus	test
+aviadenovirus_odb10	Aviadenovirus	test
--- a/tool-data/busco_database.loc.sample	Fri Apr 04 10:12:36 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of busco files.  
-#file has this format (white space characters are TAB characters)
-# - value
-# - name
-# - version
-# - /path/to/data 
-#virus_lineage_1.0  Virus_lineage_1.0	5.4.6	/path/to/data
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/busco_database_options.loc.sample	Fri Apr 04 10:12:43 2025 +0000
@@ -0,0 +1,6 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use fill the select for the available BUSCO lineages
+#file has this format (white space characters are TAB characters)
+# - value: value used in the options
+# - name: name shown to the user in options
+# - db_value: the id/value of the busco db (in the busco_database data table)
\ No newline at end of file
--- a/tool_data_table_conf.xml.sample	Fri Apr 04 10:12:36 2025 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Apr 04 10:12:43 2025 +0000
@@ -3,4 +3,8 @@
         <columns>value, name, version, path</columns>
         <file path="tool-data/busco_database.loc" />
     </table>
+    <table name="busco_database_options" comment_char="#">
+        <columns>value, name, db_value</columns>
+        <file path="tool-data/busco_database_options.loc" />
+    </table>
 </tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Fri Apr 04 10:12:43 2025 +0000
@@ -0,0 +1,10 @@
+<tables>
+    <table name="busco_database" comment_char="#">
+        <columns>value, name, version, path</columns>
+        <file path="${__HERE__}/test-data/busco_database.loc" />
+    </table>
+    <table name="busco_database_options" comment_char="#">
+        <columns>value, name, db_value</columns>
+        <file path="${__HERE__}/test-data/busco_database_options.loc" />
+    </table>
+</tables>