Mercurial > repos > iuc > data_manager_fetch_busco
changeset 7:8ef19ca5d289 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_busco/ commit 32f1099f18b41bf1efd1a039daf732809c77269e
author | iuc |
---|---|
date | Tue, 22 Apr 2025 13:11:57 +0000 |
parents | 709faa13c793 |
children | |
files | data_manager/busco_fetcher.xml data_manager/busco_options.xml data_manager/extract.py data_manager_conf.xml test-data/busco_database.loc test-data/busco_database_options.loc tool-data/busco_database.loc.sample tool-data/busco_database_options.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 10 files changed, 85 insertions(+), 177 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/busco_fetcher.xml Tue Apr 22 13:11:57 2025 +0000 @@ -0,0 +1,74 @@ +<tool id="busco_fetcher" name="Busco" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" tool_type="manage_data" profile="23.1"> + <description>dataset dowloader</description> + <macros> + <token name="@TOOL_VERSION@">5.8.0</token> + <token name="@VERSION_SUFFIX@">1</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">busco</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + busco --download_path '$out_file.extra_files_path' --download '$lineage' && + ## remove unwanted lineage folders and placement files + ## https://gitlab.com/ezlab/busco/-/issues/784 + if [ -d '$out_file.extra_files_path'/lineages ]; then + find '$out_file.extra_files_path'/lineages/ -mindepth 1 -maxdepth 1 ! -name '*_$datasets_version*' -exec rm -rf {} \;; + fi && + if [ -d '$out_file.extra_files_path'/placement_files ]; then + find '$out_file.extra_files_path'/placement_files/ -mindepth 1 -maxdepth 1 ! -name '*_$datasets_version*' -delete; + fi && + ## unpack faa.gz files ready to use + ## https://gitlab.com/ezlab/busco/-/issues/789 + if [ -d '$out_file.extra_files_path'/lineages ]; then + find '$out_file.extra_files_path'/lineages/ -name "*.faa.gz" -exec gunzip {} \;; + fi + && cp '$dmjson' '$out_file' + ]]></command> + <configfiles> + <configfile name="dmjson"><![CDATA[ +#from datetime import datetime + { + "data_tables":{ + "busco_database":[ + { + "value":"$lineage $datasets_version #echo datetime.now().strftime('%Y-%m-%d-%H%M%S')#", + "name":"$lineage $datasets_version #echo datetime.now().strftime('%Y-%m-%d-%H%M%S')#", + "version":"@TOOL_VERSION@", + "path":"$out_file.extra_files_path" + } + ] + } + }]]> + </configfile> + </configfiles> + <inputs> + <param name="lineage" argument="--download" type="select" label="Select the lineage to be downloaded"> + <option value="all">All</option> + <option value="prokaryota">Prokaryota</option> + <option value="eukaryota">Eukaryota</option> + <option value="virus">Virus</option> + </param> + <param argument="--datasets_version" type="select" label="OrthoDB version" help="By March 2025 OrthoDB 12 datasets were only available for Prokaryota (https://busco-data2.ezlab.org/v5/data/)"> + <option value="odb10" selected="true">OrthoDB 10</option> + <option value="odb12">OrthoDB 12</option> + </param> + </inputs> + <outputs> + <data name="out_file" format="data_manager_json" label="BUSCO data manager: JSON"/> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="lineage" value="virus"/> + <output name="out_file"> + <assert_contents> + <has_text text="virus"/> + <has_text text="5.8.0"/> + </assert_contents> + </output> + </test> + </tests> + <help>This tool downloads the BUSCO databases.</help> + <citations> + <citation type="doi">10.1093/bioinformatics/btv351</citation> + </citations> +</tool>
--- a/data_manager/busco_options.xml Fri Apr 04 11:18:19 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -<tool id="busco_fetcher_options" name="Busco options" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" tool_type="manage_data" profile="20.01"> - <description></description> - <macros> - <token name="@TOOL_VERSION@">5.8.0</token> - <token name="@VERSION_SUFFIX@">0</token> - </macros> - <requirements> - <requirement type="package" version="@TOOL_VERSION@">busco</requirement> - </requirements> - <command detect_errors="exit_code"><![CDATA[ - #if $test: - busco --download_path test/ --download virus > /dev/null && - #set db_path="./test" - #else - #set db_path=$cached_db.fields.path - #end if - python '$__tool_directory__/extract.py' '$db_path' '$cached_db' '$out_file' - ]]></command> - <inputs> - <param name="test" type="hidden"/> - <param name="cached_db" label="Cached database with lineage" type="select"> - <options from_data_table="busco_database"> - <validator message="No BUSCO database is available" type="no_options"/> - </options> - </param> - </inputs> - <outputs> - <data name="out_file" format="data_manager_json" label="BUSCO options data manager: JSON"/> - </outputs> - <tests> - <test expect_num_outputs="1"> - <param name="test" value="true"/> - <param name="cached_db" value="test"/> - <output name="out_file"> - <assert_contents> - <has_text text="busco_database_options"/> - <has_text text="Chordopoxvirinae"/> - <has_text text="chordopoxvirinae_"/> - </assert_contents> - </output> - </test> - </tests> - <help>This tool determines the possible choices of BUSCO lineages in a given BUSCO DB.</help> - <citations> - <citation type="doi">10.1093/bioinformatics/btv351</citation> - </citations> -</tool>
--- a/data_manager/extract.py Fri Apr 04 11:18:19 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -import json -import os -import re -import sys - -busco_db = os.path.join(sys.argv[1], "lineages") -busco_db_value = sys.argv[2] -dmjson = sys.argv[3] - -content = [] -for d in os.scandir(busco_db): - if not d.is_dir(): - continue - if not os.path.exists(os.path.join(d, "dataset.cfg")): - continue - name = re.sub(r"_odb\d+", "", d.name) - name = name.replace("_", " ").capitalize() - content.append({'value': d.name, 'name': name, 'db_value': busco_db_value}) - -with open(dmjson, "w") as fh: - json.dump({"data_tables": {"busco_database_options": content}}, fh) - -print(f'{len(content)} x busco_db\n')
--- a/data_manager_conf.xml Fri Apr 04 11:18:19 2025 +0000 +++ b/data_manager_conf.xml Tue Apr 22 13:11:57 2025 +0000 @@ -1,12 +1,18 @@ <data_managers> - <data_manager tool_file="data_manager/busco_options.xml" id="busco_fetcher_options" version="5.4.6+galaxy0"> - <data_table name="busco_database_options"> + <data_manager tool_file="data_manager/busco_fetcher.xml" id="busco_fetcher" version="5.4.6+galaxy0"> + <data_table name="busco_database"> <output> <column name="value" /> <column name="name" /> - <column name="db_value"/> + <column name="version"/> + <column name="path" output_ref="out_file"> + <move type="directory" relativize_symlinks="True"> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">busco/${value}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/busco/${value}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> </output> </data_table> - </data_manager> </data_managers>
--- a/test-data/busco_database.loc Fri Apr 04 11:18:19 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -test test name test test \ No newline at end of file
--- a/test-data/busco_database_options.loc Fri Apr 04 11:18:19 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,81 +0,0 @@ -iridoviridae_odb10 Iridoviridae test -betabaculovirus_odb10 Betabaculovirus test -betaherpesvirinae_odb10 Betaherpesvirinae test -skunavirus_odb10 Skunavirus test -chordopoxvirinae_odb10 Chordopoxvirinae test -alphabaculovirus_odb10 Alphabaculovirus test -rudiviridae_odb10 Rudiviridae test -tevenvirinae_odb10 Tevenvirinae test -tunavirinae_odb10 Tunavirinae test -varicellovirus_odb10 Varicellovirus test -pahexavirus_odb10 Pahexavirus test -guernseyvirinae_odb10 Guernseyvirinae test -baculoviridae_odb10 Baculoviridae test -alphaherpesvirinae_odb10 Alphaherpesvirinae test -spounavirinae_odb10 Spounavirinae test -peduovirus_odb10 Peduovirus test -enquatrovirus_odb10 Enquatrovirus test -teseptimavirus_odb10 Teseptimavirus test -gammaherpesvirinae_odb10 Gammaherpesvirinae test -herpesviridae_odb10 Herpesviridae test -cheoctovirus_odb10 Cheoctovirus test -poxviridae_odb10 Poxviridae test -fromanvirus_odb10 Fromanvirus test -bclasvirinae_odb10 Bclasvirinae test -simplexvirus_odb10 Simplexvirus test -tequatrovirus_odb10 Tequatrovirus test -aviadenovirus_odb10 Aviadenovirus test -iridoviridae_odb10 Iridoviridae test -betabaculovirus_odb10 Betabaculovirus test -betaherpesvirinae_odb10 Betaherpesvirinae test -skunavirus_odb10 Skunavirus test -chordopoxvirinae_odb10 Chordopoxvirinae test -alphabaculovirus_odb10 Alphabaculovirus test -rudiviridae_odb10 Rudiviridae test -tevenvirinae_odb10 Tevenvirinae test -tunavirinae_odb10 Tunavirinae test -varicellovirus_odb10 Varicellovirus test -pahexavirus_odb10 Pahexavirus test -guernseyvirinae_odb10 Guernseyvirinae test -baculoviridae_odb10 Baculoviridae test -alphaherpesvirinae_odb10 Alphaherpesvirinae test -spounavirinae_odb10 Spounavirinae test -peduovirus_odb10 Peduovirus test -enquatrovirus_odb10 Enquatrovirus test -teseptimavirus_odb10 Teseptimavirus test -gammaherpesvirinae_odb10 Gammaherpesvirinae test -herpesviridae_odb10 Herpesviridae test -cheoctovirus_odb10 Cheoctovirus test -poxviridae_odb10 Poxviridae test -fromanvirus_odb10 Fromanvirus test -bclasvirinae_odb10 Bclasvirinae test -simplexvirus_odb10 Simplexvirus test -tequatrovirus_odb10 Tequatrovirus test -aviadenovirus_odb10 Aviadenovirus test -iridoviridae_odb10 Iridoviridae test -betabaculovirus_odb10 Betabaculovirus test -betaherpesvirinae_odb10 Betaherpesvirinae test -skunavirus_odb10 Skunavirus test -chordopoxvirinae_odb10 Chordopoxvirinae test -alphabaculovirus_odb10 Alphabaculovirus test -rudiviridae_odb10 Rudiviridae test -tevenvirinae_odb10 Tevenvirinae test -tunavirinae_odb10 Tunavirinae test -varicellovirus_odb10 Varicellovirus test -pahexavirus_odb10 Pahexavirus test -guernseyvirinae_odb10 Guernseyvirinae test -baculoviridae_odb10 Baculoviridae test -alphaherpesvirinae_odb10 Alphaherpesvirinae test -spounavirinae_odb10 Spounavirinae test -peduovirus_odb10 Peduovirus test -enquatrovirus_odb10 Enquatrovirus test -teseptimavirus_odb10 Teseptimavirus test -gammaherpesvirinae_odb10 Gammaherpesvirinae test -herpesviridae_odb10 Herpesviridae test -cheoctovirus_odb10 Cheoctovirus test -poxviridae_odb10 Poxviridae test -fromanvirus_odb10 Fromanvirus test -bclasvirinae_odb10 Bclasvirinae test -simplexvirus_odb10 Simplexvirus test -tequatrovirus_odb10 Tequatrovirus test -aviadenovirus_odb10 Aviadenovirus test
--- a/tool-data/busco_database.loc.sample Fri Apr 04 11:18:19 2025 +0000 +++ b/tool-data/busco_database.loc.sample Tue Apr 22 13:11:57 2025 +0000 @@ -5,4 +5,4 @@ # - name # - version # - /path/to/data -#virus_lineage_1.0 Virus_Lineage_1.0 5.4.6 /path/to/data \ No newline at end of file +#virus_lineage_1.0 Virus_lineage_1.0 5.4.6 /path/to/data \ No newline at end of file
--- a/tool-data/busco_database_options.loc.sample Fri Apr 04 11:18:19 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use fill the select for the available BUSCO lineages -#file has this format (white space characters are TAB characters) -# - value: value used in the options -# - name: name shown to the user in options -# - db_value: the id/value of the busco db (in the busco_database data table) \ No newline at end of file
--- a/tool_data_table_conf.xml.sample Fri Apr 04 11:18:19 2025 +0000 +++ b/tool_data_table_conf.xml.sample Tue Apr 22 13:11:57 2025 +0000 @@ -3,8 +3,4 @@ <columns>value, name, version, path</columns> <file path="tool-data/busco_database.loc" /> </table> - <table name="busco_database_options" comment_char="#"> - <columns>value, name, db_value</columns> - <file path="tool-data/busco_database_options.loc" /> - </table> </tables>
--- a/tool_data_table_conf.xml.test Fri Apr 04 11:18:19 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -<tables> - <table name="busco_database" comment_char="#"> - <columns>value, name, version, path</columns> - <file path="${__HERE__}/test-data/busco_database.loc" /> - </table> - <table name="busco_database_options" comment_char="#"> - <columns>value, name, db_value</columns> - <file path="${__HERE__}/test-data/busco_database_options.loc" /> - </table> -</tables>