changeset 7:8ef19ca5d289 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_busco/ commit 32f1099f18b41bf1efd1a039daf732809c77269e
author iuc
date Tue, 22 Apr 2025 13:11:57 +0000
parents 709faa13c793
children
files data_manager/busco_fetcher.xml data_manager/busco_options.xml data_manager/extract.py data_manager_conf.xml test-data/busco_database.loc test-data/busco_database_options.loc tool-data/busco_database.loc.sample tool-data/busco_database_options.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 10 files changed, 85 insertions(+), 177 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/busco_fetcher.xml	Tue Apr 22 13:11:57 2025 +0000
@@ -0,0 +1,74 @@
+<tool id="busco_fetcher" name="Busco" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" tool_type="manage_data" profile="23.1">
+    <description>dataset dowloader</description>
+        <macros>
+        <token name="@TOOL_VERSION@">5.8.0</token>
+        <token name="@VERSION_SUFFIX@">1</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">busco</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        busco --download_path '$out_file.extra_files_path' --download '$lineage' &&
+        ## remove unwanted lineage folders and placement files
+        ## https://gitlab.com/ezlab/busco/-/issues/784
+        if [ -d '$out_file.extra_files_path'/lineages ]; then
+            find '$out_file.extra_files_path'/lineages/ -mindepth 1 -maxdepth 1 ! -name '*_$datasets_version*' -exec rm -rf {} \;;
+        fi &&
+        if [ -d '$out_file.extra_files_path'/placement_files ]; then
+            find '$out_file.extra_files_path'/placement_files/ -mindepth 1 -maxdepth 1 ! -name '*_$datasets_version*' -delete;
+        fi &&
+        ## unpack faa.gz files ready to use
+        ## https://gitlab.com/ezlab/busco/-/issues/789
+        if [ -d '$out_file.extra_files_path'/lineages ]; then
+            find '$out_file.extra_files_path'/lineages/ -name "*.faa.gz" -exec gunzip {} \;;
+        fi
+        && cp '$dmjson' '$out_file' 
+    ]]></command>
+    <configfiles>
+        <configfile name="dmjson"><![CDATA[
+#from datetime import datetime
+    {
+      "data_tables":{
+        "busco_database":[
+          {
+            "value":"$lineage $datasets_version #echo datetime.now().strftime('%Y-%m-%d-%H%M%S')#",
+            "name":"$lineage $datasets_version #echo datetime.now().strftime('%Y-%m-%d-%H%M%S')#",
+            "version":"@TOOL_VERSION@",
+            "path":"$out_file.extra_files_path"
+          }
+        ]
+      }
+    }]]>
+            </configfile>
+        </configfiles>
+    <inputs>
+        <param name="lineage" argument="--download" type="select" label="Select the lineage to be downloaded">
+            <option value="all">All</option>
+            <option value="prokaryota">Prokaryota</option>
+            <option value="eukaryota">Eukaryota</option>
+            <option value="virus">Virus</option>
+        </param>
+        <param argument="--datasets_version" type="select" label="OrthoDB version" help="By March 2025 OrthoDB 12 datasets were only available for Prokaryota (https://busco-data2.ezlab.org/v5/data/)">
+            <option value="odb10" selected="true">OrthoDB 10</option>
+            <option value="odb12">OrthoDB 12</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json" label="BUSCO data manager: JSON"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="lineage" value="virus"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="virus"/>
+                    <has_text text="5.8.0"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>This tool downloads the BUSCO databases.</help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btv351</citation>
+    </citations>
+</tool>
--- a/data_manager/busco_options.xml	Fri Apr 04 11:18:19 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-<tool id="busco_fetcher_options" name="Busco options" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" tool_type="manage_data" profile="20.01">
-    <description></description>
-        <macros>
-        <token name="@TOOL_VERSION@">5.8.0</token>
-        <token name="@VERSION_SUFFIX@">0</token>
-    </macros>
-    <requirements>
-        <requirement type="package" version="@TOOL_VERSION@">busco</requirement>
-    </requirements>
-    <command detect_errors="exit_code"><![CDATA[
-        #if $test:
-            busco --download_path test/ --download virus > /dev/null &&
-            #set db_path="./test"
-        #else
-            #set db_path=$cached_db.fields.path
-        #end if
-        python '$__tool_directory__/extract.py' '$db_path' '$cached_db' '$out_file'
-    ]]></command>
-    <inputs>
-        <param name="test" type="hidden"/>
-        <param name="cached_db" label="Cached database with lineage" type="select">
-            <options from_data_table="busco_database">
-                <validator message="No BUSCO database is available" type="no_options"/>
-            </options>
-        </param>
-    </inputs>
-    <outputs>
-        <data name="out_file" format="data_manager_json" label="BUSCO options data manager: JSON"/>
-    </outputs>
-    <tests>
-        <test expect_num_outputs="1">
-            <param name="test" value="true"/>
-            <param name="cached_db" value="test"/>
-            <output name="out_file">
-                <assert_contents>
-                    <has_text text="busco_database_options"/>
-                    <has_text text="Chordopoxvirinae"/>
-                    <has_text text="chordopoxvirinae_"/>
-                </assert_contents>
-            </output>
-        </test>
-    </tests>
-    <help>This tool determines the possible choices of BUSCO lineages in a given BUSCO DB.</help>
-    <citations>
-        <citation type="doi">10.1093/bioinformatics/btv351</citation>
-    </citations>
-</tool>
--- a/data_manager/extract.py	Fri Apr 04 11:18:19 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-import json
-import os
-import re
-import sys
-
-busco_db = os.path.join(sys.argv[1], "lineages")
-busco_db_value = sys.argv[2]
-dmjson = sys.argv[3]
-
-content = []
-for d in os.scandir(busco_db):
-    if not d.is_dir():
-        continue
-    if not os.path.exists(os.path.join(d, "dataset.cfg")):
-        continue
-    name = re.sub(r"_odb\d+", "", d.name)
-    name = name.replace("_", " ").capitalize()
-    content.append({'value': d.name, 'name': name, 'db_value': busco_db_value})
-
-with open(dmjson, "w") as fh:
-    json.dump({"data_tables": {"busco_database_options": content}}, fh)
-
-print(f'{len(content)} x busco_db\n')
--- a/data_manager_conf.xml	Fri Apr 04 11:18:19 2025 +0000
+++ b/data_manager_conf.xml	Tue Apr 22 13:11:57 2025 +0000
@@ -1,12 +1,18 @@
 <data_managers>
-    <data_manager tool_file="data_manager/busco_options.xml" id="busco_fetcher_options" version="5.4.6+galaxy0">
-        <data_table name="busco_database_options">
+    <data_manager tool_file="data_manager/busco_fetcher.xml" id="busco_fetcher" version="5.4.6+galaxy0">
+        <data_table name="busco_database">
             <output>
                 <column name="value" />
                 <column name="name" />
-                <column name="db_value"/>
+                <column name="version"/>
+                <column name="path" output_ref="out_file">
+                    <move type="directory" relativize_symlinks="True">
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">busco/${value}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/busco/${value}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
             </output>
         </data_table>
-
     </data_manager>
 </data_managers>
--- a/test-data/busco_database.loc	Fri Apr 04 11:18:19 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-test	test name	test	test
\ No newline at end of file
--- a/test-data/busco_database_options.loc	Fri Apr 04 11:18:19 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-iridoviridae_odb10	Iridoviridae	test
-betabaculovirus_odb10	Betabaculovirus	test
-betaherpesvirinae_odb10	Betaherpesvirinae	test
-skunavirus_odb10	Skunavirus	test
-chordopoxvirinae_odb10	Chordopoxvirinae	test
-alphabaculovirus_odb10	Alphabaculovirus	test
-rudiviridae_odb10	Rudiviridae	test
-tevenvirinae_odb10	Tevenvirinae	test
-tunavirinae_odb10	Tunavirinae	test
-varicellovirus_odb10	Varicellovirus	test
-pahexavirus_odb10	Pahexavirus	test
-guernseyvirinae_odb10	Guernseyvirinae	test
-baculoviridae_odb10	Baculoviridae	test
-alphaherpesvirinae_odb10	Alphaherpesvirinae	test
-spounavirinae_odb10	Spounavirinae	test
-peduovirus_odb10	Peduovirus	test
-enquatrovirus_odb10	Enquatrovirus	test
-teseptimavirus_odb10	Teseptimavirus	test
-gammaherpesvirinae_odb10	Gammaherpesvirinae	test
-herpesviridae_odb10	Herpesviridae	test
-cheoctovirus_odb10	Cheoctovirus	test
-poxviridae_odb10	Poxviridae	test
-fromanvirus_odb10	Fromanvirus	test
-bclasvirinae_odb10	Bclasvirinae	test
-simplexvirus_odb10	Simplexvirus	test
-tequatrovirus_odb10	Tequatrovirus	test
-aviadenovirus_odb10	Aviadenovirus	test
-iridoviridae_odb10	Iridoviridae	test
-betabaculovirus_odb10	Betabaculovirus	test
-betaherpesvirinae_odb10	Betaherpesvirinae	test
-skunavirus_odb10	Skunavirus	test
-chordopoxvirinae_odb10	Chordopoxvirinae	test
-alphabaculovirus_odb10	Alphabaculovirus	test
-rudiviridae_odb10	Rudiviridae	test
-tevenvirinae_odb10	Tevenvirinae	test
-tunavirinae_odb10	Tunavirinae	test
-varicellovirus_odb10	Varicellovirus	test
-pahexavirus_odb10	Pahexavirus	test
-guernseyvirinae_odb10	Guernseyvirinae	test
-baculoviridae_odb10	Baculoviridae	test
-alphaherpesvirinae_odb10	Alphaherpesvirinae	test
-spounavirinae_odb10	Spounavirinae	test
-peduovirus_odb10	Peduovirus	test
-enquatrovirus_odb10	Enquatrovirus	test
-teseptimavirus_odb10	Teseptimavirus	test
-gammaherpesvirinae_odb10	Gammaherpesvirinae	test
-herpesviridae_odb10	Herpesviridae	test
-cheoctovirus_odb10	Cheoctovirus	test
-poxviridae_odb10	Poxviridae	test
-fromanvirus_odb10	Fromanvirus	test
-bclasvirinae_odb10	Bclasvirinae	test
-simplexvirus_odb10	Simplexvirus	test
-tequatrovirus_odb10	Tequatrovirus	test
-aviadenovirus_odb10	Aviadenovirus	test
-iridoviridae_odb10	Iridoviridae	test
-betabaculovirus_odb10	Betabaculovirus	test
-betaherpesvirinae_odb10	Betaherpesvirinae	test
-skunavirus_odb10	Skunavirus	test
-chordopoxvirinae_odb10	Chordopoxvirinae	test
-alphabaculovirus_odb10	Alphabaculovirus	test
-rudiviridae_odb10	Rudiviridae	test
-tevenvirinae_odb10	Tevenvirinae	test
-tunavirinae_odb10	Tunavirinae	test
-varicellovirus_odb10	Varicellovirus	test
-pahexavirus_odb10	Pahexavirus	test
-guernseyvirinae_odb10	Guernseyvirinae	test
-baculoviridae_odb10	Baculoviridae	test
-alphaherpesvirinae_odb10	Alphaherpesvirinae	test
-spounavirinae_odb10	Spounavirinae	test
-peduovirus_odb10	Peduovirus	test
-enquatrovirus_odb10	Enquatrovirus	test
-teseptimavirus_odb10	Teseptimavirus	test
-gammaherpesvirinae_odb10	Gammaherpesvirinae	test
-herpesviridae_odb10	Herpesviridae	test
-cheoctovirus_odb10	Cheoctovirus	test
-poxviridae_odb10	Poxviridae	test
-fromanvirus_odb10	Fromanvirus	test
-bclasvirinae_odb10	Bclasvirinae	test
-simplexvirus_odb10	Simplexvirus	test
-tequatrovirus_odb10	Tequatrovirus	test
-aviadenovirus_odb10	Aviadenovirus	test
--- a/tool-data/busco_database.loc.sample	Fri Apr 04 11:18:19 2025 +0000
+++ b/tool-data/busco_database.loc.sample	Tue Apr 22 13:11:57 2025 +0000
@@ -5,4 +5,4 @@
 # - name
 # - version
 # - /path/to/data 
-#virus_lineage_1.0	 Virus_Lineage_1.0  5.4.6	/path/to/data
\ No newline at end of file
+#virus_lineage_1.0  Virus_lineage_1.0	5.4.6	/path/to/data
\ No newline at end of file
--- a/tool-data/busco_database_options.loc.sample	Fri Apr 04 11:18:19 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use fill the select for the available BUSCO lineages
-#file has this format (white space characters are TAB characters)
-# - value: value used in the options
-# - name: name shown to the user in options
-# - db_value: the id/value of the busco db (in the busco_database data table)
\ No newline at end of file
--- a/tool_data_table_conf.xml.sample	Fri Apr 04 11:18:19 2025 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Apr 22 13:11:57 2025 +0000
@@ -3,8 +3,4 @@
         <columns>value, name, version, path</columns>
         <file path="tool-data/busco_database.loc" />
     </table>
-    <table name="busco_database_options" comment_char="#">
-        <columns>value, name, db_value</columns>
-        <file path="tool-data/busco_database_options.loc" />
-    </table>
 </tables>
--- a/tool_data_table_conf.xml.test	Fri Apr 04 11:18:19 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-<tables>
-    <table name="busco_database" comment_char="#">
-        <columns>value, name, version, path</columns>
-        <file path="${__HERE__}/test-data/busco_database.loc" />
-    </table>
-    <table name="busco_database_options" comment_char="#">
-        <columns>value, name, db_value</columns>
-        <file path="${__HERE__}/test-data/busco_database_options.loc" />
-    </table>
-</tables>