Mercurial > repos > iuc > data_manager_fetch_busco
changeset 5:6ab91825be22 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_busco/ commit d46d2ec37b9b8f08d58472978c51b4c46e7ed18c
author | iuc |
---|---|
date | Fri, 04 Apr 2025 10:12:43 +0000 |
parents | 828d961faea7 |
children | 709faa13c793 |
files | data_manager/busco_fetcher.xml data_manager/busco_options.xml data_manager/extract.py data_manager_conf.xml test-data/busco_database.loc test-data/busco_database_options.loc tool-data/busco_database.loc.sample tool-data/busco_database_options.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 10 files changed, 176 insertions(+), 92 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/busco_fetcher.xml Fri Apr 04 10:12:36 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,74 +0,0 @@ -<tool id="busco_fetcher" name="Busco" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" tool_type="manage_data" profile="20.01"> - <description>dataset dowloader</description> - <macros> - <token name="@TOOL_VERSION@">5.8.0</token> - <token name="@VERSION_SUFFIX@">1</token> - </macros> - <requirements> - <requirement type="package" version="@TOOL_VERSION@">busco</requirement> - </requirements> - <command detect_errors="exit_code"><![CDATA[ - busco --download_path '$out_file.extra_files_path' --download '$lineage' && - ## remove unwanted lineage folders and placement files - ## https://gitlab.com/ezlab/busco/-/issues/784 - if [ -d '$out_file.extra_files_path'/lineages ]; then - find '$out_file.extra_files_path'/lineages/ -mindepth 1 -maxdepth 1 ! -name '*_$datasets_version*' -exec rm -rf {} \;; - fi && - if [ -d '$out_file.extra_files_path'/placement_files ]; then - find '$out_file.extra_files_path'/placement_files/ -mindepth 1 -maxdepth 1 ! -name '*_$datasets_version*' -delete; - fi && - ## unpack faa.gz files ready to use - ## https://gitlab.com/ezlab/busco/-/issues/789 - if [ -d '$out_file.extra_files_path'/lineages ]; then - find '$out_file.extra_files_path'/lineages/ -name "*.faa.gz" -exec gunzip {} \;; - fi - && cp '$dmjson' '$out_file' - ]]></command> - <configfiles> - <configfile name="dmjson"><![CDATA[ -#from datetime import datetime - { - "data_tables":{ - "busco_database":[ - { - "value":"$lineage $datasets_version #echo datetime.now().strftime('%Y-%m-%d-%H%M%S')#", - "name":"$lineage $datasets_version #echo datetime.now().strftime('%Y-%m-%d-%H%M%S')#", - "version":"@TOOL_VERSION@", - "path":"$out_file.extra_files_path" - } - ] - } - }]]> - </configfile> - </configfiles> - <inputs> - <param name="lineage" argument="--download" type="select" label="Select the lineage to be downloaded"> - <option value="all">All</option> - <option value="prokaryota">Prokaryota</option> - <option value="eukaryota">Eukaryota</option> - <option value="virus">Virus</option> - </param> - <param argument="--datasets_version" type="select" label="OrthoDB version" help="By March 2025 OrthoDB 12 datasets were only available for Prokaryota (https://busco-data2.ezlab.org/v5/data/)"> - <option value="odb10" selected="true">OrthoDB 10</option> - <option value="odb12">OrthoDB 12</option> - </param> - </inputs> - <outputs> - <data name="out_file" format="data_manager_json" label="BUSCO data manager: JSON"/> - </outputs> - <tests> - <test expect_num_outputs="1"> - <param name="lineage" value="virus"/> - <output name="out_file"> - <assert_contents> - <has_text text="virus"/> - <has_text text="5.8.0"/> - </assert_contents> - </output> - </test> - </tests> - <help>This tool downloads the BUSCO databases.</help> - <citations> - <citation type="doi">10.1093/bioinformatics/btv351</citation> - </citations> -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/busco_options.xml Fri Apr 04 10:12:43 2025 +0000 @@ -0,0 +1,47 @@ +<tool id="busco_fetcher_options" name="Busco options" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" tool_type="manage_data" profile="20.01"> + <description></description> + <macros> + <token name="@TOOL_VERSION@">5.8.0</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">busco</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + #if $test: + busco --download_path test/ --download virus > /dev/null && + #set db_path="./test" + #else + #set db_path=$cached_db.fields.path + #end if + python '$__tool_directory__/extract.py' '$db_path' '$cached_db' '$out_file' + ]]></command> + <inputs> + <param name="test" type="hidden"/> + <param name="cached_db" label="Cached database with lineage" type="select"> + <options from_data_table="busco_database"> + <validator message="No BUSCO database is available" type="no_options"/> + </options> + </param> + </inputs> + <outputs> + <data name="out_file" format="data_manager_json" label="BUSCO options data manager: JSON"/> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="test" value="true"/> + <param name="cached_db" value="test"/> + <output name="out_file"> + <assert_contents> + <has_text text="busco_database_options"/> + <has_text text="Chordopoxvirinae"/> + <has_text text="chordopoxvirinae_"/> + </assert_contents> + </output> + </test> + </tests> + <help>This tool determines the possible choices of BUSCO lineages in a given BUSCO DB.</help> + <citations> + <citation type="doi">10.1093/bioinformatics/btv351</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/extract.py Fri Apr 04 10:12:43 2025 +0000 @@ -0,0 +1,23 @@ +import json +import os +import re +import sys + +busco_db = os.path.join(sys.argv[1], "lineages") +busco_db_value = sys.argv[2] +dmjson = sys.argv[3] + +content = [] +for d in os.scandir(busco_db): + if not d.is_dir(): + continue + if not os.path.exists(os.path.join(d, "dataset.cfg")): + continue + name = re.sub(r"_odb\d+", "", d.name) + name = name.replace("_", " ").capitalize() + content.append({'value': d.name, 'name': name, 'db_value': busco_db_value}) + +with open(dmjson, "w") as fh: + json.dump({"data_tables": {"busco_database_options": content}}, fh) + +print(f'{len(content)} x busco_db\n')
--- a/data_manager_conf.xml Fri Apr 04 10:12:36 2025 +0000 +++ b/data_manager_conf.xml Fri Apr 04 10:12:43 2025 +0000 @@ -1,18 +1,12 @@ <data_managers> - <data_manager tool_file="data_manager/busco_fetcher.xml" id="busco_fetcher" version="5.4.6+galaxy0"> - <data_table name="busco_database"> + <data_manager tool_file="data_manager/busco_options.xml" id="busco_fetcher_options" version="5.4.6+galaxy0"> + <data_table name="busco_database_options"> <output> <column name="value" /> <column name="name" /> - <column name="version"/> - <column name="path" output_ref="out_file"> - <move type="directory" relativize_symlinks="True"> - <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">busco/${value}</target> - </move> - <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/busco/${value}</value_translation> - <value_translation type="function">abspath</value_translation> - </column> + <column name="db_value"/> </output> </data_table> + </data_manager> </data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/busco_database.loc Fri Apr 04 10:12:43 2025 +0000 @@ -0,0 +1,1 @@ +test test name test test \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/busco_database_options.loc Fri Apr 04 10:12:43 2025 +0000 @@ -0,0 +1,81 @@ +iridoviridae_odb10 Iridoviridae test +betabaculovirus_odb10 Betabaculovirus test +betaherpesvirinae_odb10 Betaherpesvirinae test +skunavirus_odb10 Skunavirus test +chordopoxvirinae_odb10 Chordopoxvirinae test +alphabaculovirus_odb10 Alphabaculovirus test +rudiviridae_odb10 Rudiviridae test +tevenvirinae_odb10 Tevenvirinae test +tunavirinae_odb10 Tunavirinae test +varicellovirus_odb10 Varicellovirus test +pahexavirus_odb10 Pahexavirus test +guernseyvirinae_odb10 Guernseyvirinae test +baculoviridae_odb10 Baculoviridae test +alphaherpesvirinae_odb10 Alphaherpesvirinae test +spounavirinae_odb10 Spounavirinae test +peduovirus_odb10 Peduovirus test +enquatrovirus_odb10 Enquatrovirus test +teseptimavirus_odb10 Teseptimavirus test +gammaherpesvirinae_odb10 Gammaherpesvirinae test +herpesviridae_odb10 Herpesviridae test +cheoctovirus_odb10 Cheoctovirus test +poxviridae_odb10 Poxviridae test +fromanvirus_odb10 Fromanvirus test +bclasvirinae_odb10 Bclasvirinae test +simplexvirus_odb10 Simplexvirus test +tequatrovirus_odb10 Tequatrovirus test +aviadenovirus_odb10 Aviadenovirus test +iridoviridae_odb10 Iridoviridae test +betabaculovirus_odb10 Betabaculovirus test +betaherpesvirinae_odb10 Betaherpesvirinae test +skunavirus_odb10 Skunavirus test +chordopoxvirinae_odb10 Chordopoxvirinae test +alphabaculovirus_odb10 Alphabaculovirus test +rudiviridae_odb10 Rudiviridae test +tevenvirinae_odb10 Tevenvirinae test +tunavirinae_odb10 Tunavirinae test +varicellovirus_odb10 Varicellovirus test +pahexavirus_odb10 Pahexavirus test +guernseyvirinae_odb10 Guernseyvirinae test +baculoviridae_odb10 Baculoviridae test +alphaherpesvirinae_odb10 Alphaherpesvirinae test +spounavirinae_odb10 Spounavirinae test +peduovirus_odb10 Peduovirus test +enquatrovirus_odb10 Enquatrovirus test +teseptimavirus_odb10 Teseptimavirus test +gammaherpesvirinae_odb10 Gammaherpesvirinae test +herpesviridae_odb10 Herpesviridae test +cheoctovirus_odb10 Cheoctovirus test +poxviridae_odb10 Poxviridae test +fromanvirus_odb10 Fromanvirus test +bclasvirinae_odb10 Bclasvirinae test +simplexvirus_odb10 Simplexvirus test +tequatrovirus_odb10 Tequatrovirus test +aviadenovirus_odb10 Aviadenovirus test +iridoviridae_odb10 Iridoviridae test +betabaculovirus_odb10 Betabaculovirus test +betaherpesvirinae_odb10 Betaherpesvirinae test +skunavirus_odb10 Skunavirus test +chordopoxvirinae_odb10 Chordopoxvirinae test +alphabaculovirus_odb10 Alphabaculovirus test +rudiviridae_odb10 Rudiviridae test +tevenvirinae_odb10 Tevenvirinae test +tunavirinae_odb10 Tunavirinae test +varicellovirus_odb10 Varicellovirus test +pahexavirus_odb10 Pahexavirus test +guernseyvirinae_odb10 Guernseyvirinae test +baculoviridae_odb10 Baculoviridae test +alphaherpesvirinae_odb10 Alphaherpesvirinae test +spounavirinae_odb10 Spounavirinae test +peduovirus_odb10 Peduovirus test +enquatrovirus_odb10 Enquatrovirus test +teseptimavirus_odb10 Teseptimavirus test +gammaherpesvirinae_odb10 Gammaherpesvirinae test +herpesviridae_odb10 Herpesviridae test +cheoctovirus_odb10 Cheoctovirus test +poxviridae_odb10 Poxviridae test +fromanvirus_odb10 Fromanvirus test +bclasvirinae_odb10 Bclasvirinae test +simplexvirus_odb10 Simplexvirus test +tequatrovirus_odb10 Tequatrovirus test +aviadenovirus_odb10 Aviadenovirus test
--- a/tool-data/busco_database.loc.sample Fri Apr 04 10:12:36 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of busco files. -#file has this format (white space characters are TAB characters) -# - value -# - name -# - version -# - /path/to/data -#virus_lineage_1.0 Virus_lineage_1.0 5.4.6 /path/to/data \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/busco_database_options.loc.sample Fri Apr 04 10:12:43 2025 +0000 @@ -0,0 +1,6 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use fill the select for the available BUSCO lineages +#file has this format (white space characters are TAB characters) +# - value: value used in the options +# - name: name shown to the user in options +# - db_value: the id/value of the busco db (in the busco_database data table) \ No newline at end of file
--- a/tool_data_table_conf.xml.sample Fri Apr 04 10:12:36 2025 +0000 +++ b/tool_data_table_conf.xml.sample Fri Apr 04 10:12:43 2025 +0000 @@ -3,4 +3,8 @@ <columns>value, name, version, path</columns> <file path="tool-data/busco_database.loc" /> </table> + <table name="busco_database_options" comment_char="#"> + <columns>value, name, db_value</columns> + <file path="tool-data/busco_database_options.loc" /> + </table> </tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Fri Apr 04 10:12:43 2025 +0000 @@ -0,0 +1,10 @@ +<tables> + <table name="busco_database" comment_char="#"> + <columns>value, name, version, path</columns> + <file path="${__HERE__}/test-data/busco_database.loc" /> + </table> + <table name="busco_database_options" comment_char="#"> + <columns>value, name, db_value</columns> + <file path="${__HERE__}/test-data/busco_database_options.loc" /> + </table> +</tables>