Mercurial > repos > iuc > data_manager_dada2
diff data_manager/dada2_fetcher.xml @ 8:0181efacb40a draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_dada2 commit 5f2ed0407e3adaafc6070fc0594a4685ade094db
| author | iuc |
|---|---|
| date | Sat, 22 Nov 2025 20:07:44 +0000 |
| parents | 678176bff643 |
| children |
line wrap: on
line diff
--- a/data_manager/dada2_fetcher.xml Tue May 21 07:35:56 2024 +0000 +++ b/data_manager/dada2_fetcher.xml Sat Nov 22 20:07:44 2025 +0000 @@ -1,139 +1,361 @@ -<?xml version="1.0"?> -<tool id="dada2_fetcher" name="dada2 data manager" tool_type="manage_data" version="0.1.1" profile="23.0"> +<tool id="dada2_fetcher" name="dada2 data manager" tool_type="manage_data" version="0.1.2" profile="24.2"> <description>Download reference databases</description> <requirements> - <requirement type="package" version="3.7">python</requirement> + <requirement type="package" version="1.25.0">wget</requirement> + <requirement type="package" version="6.0">unzip</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ - python '$__tool_directory__/data_manager.py' - --out '$out_file' - #set dataset = str($db_cond.db_select) + '_' + str($db_cond.version_select) - --dataset '$dataset' +## see https://github.com/benjjneb/dada2/issues/2053 for the distinction of the _toGenus and _toSpecies datasets +#set $FILE2TAXURL = { + "silva_138.2_toGenus": "https://zenodo.org/records/14169026/files/silva_nr99_v138.2_toGenus_trainset.fa.gz?download=1", + "silva_138.2_toSpecies": "https://zenodo.org/records/14169026/files/silva_nr99_v138.2_toSpecies_trainset.fa.gz?download=1", + + "silva_138.2_fungi_18S_toGenus": "https://zenodo.org/records/15044434/files/SILVA_SSUfungi_nr99_v138_2_toGenus_trainset.fasta?download=1", + "silva_138.2_fungi_18S_toSpecies": "https://zenodo.org/records/15044434/files/SILVA_SSUfungi_nr99_v138_2_toSpecies_trainset.fasta?download=1", + + "silva_138.2_fungi_26S_toGenus": "https://zenodo.org/records/15044501/files/SILVA_LSUfungi_nr99_v138_2_toGenus_trainset.fasta?download=1", + "silva_138.2_fungi_26S_toSpecies": "https://zenodo.org/records/15044501/files/SILVA_LSUfungi_nr99_v138_2_toSpecies_trainset.fasta?download=1", + + "silva_138": "https://zenodo.org/record/3731176/files/silva_nr_v138_train_set.fa.gz?download=1", + "silva_132": "https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz?download=1", + "silva_128": "https://zenodo.org/record/824551/files/silva_nr_v128_train_set.fa.gz?download=1", + "rdp_19_toGenus": "https://zenodo.org/records/14168771/files/rdp_19_toGenus_trainset.fa.gz?download=1", + "rdp_19_toSpecies": "https://zenodo.org/records/14168771/files/rdp_19_toSpecies_trainset.fa.gz?download=1", + "rdp_16": "https://zenodo.org/record/801828/files/rdp_train_set_16.fa.gz?download=1", + "rdp_14": "https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1", + "unite_8.0_fungi": "https://s3.hpc.ut.ee/plutof-public/original/9f7b41c3-825b-4db8-9c52-74a4603a860a.zip", + "unite_8.0_fungi_singletons": "https://s3.hpc.ut.ee/plutof-public/original/53dfc9ce-9cb5-4205-84bb-f47faff26462.zip", + "greengenes_13.84": "https://zenodo.org/record/158955/files/gg_13_8_train_set_97.fa.gz?download=1", + "greengenes2_2024.09_toGenus": "https://zenodo.org/records/14169078/files/gg2_2024_09_toGenus_trainset.fa.gz?download=1", + "greengenes2_2024.09_toSpecies": "https://zenodo.org/records/14169078/files/gg2_2024_09_toSpecies_trainset.fa.gz?download=1", + "RefSeq_RDP_2018_05": "https://zenodo.org/record/2541239/files/RefSeq-RDP16S_v2_May2018.fa.gz?download=1", + + "gtdb_202_toGenus": "https://zenodo.org/records/4735821/files/GTDB_bac120_arc122_ssu_r202_Genus.fa.gz?download=1", + "gtdb_202_toSpecies": "https://zenodo.org/records/4735821/files/GTDB_bac120_arc122_ssu_r202_Species.fa.gz?download=1", + "gtdb_2018_11": "https://zenodo.org/record/2541239/files/GTDB_bac-arc_ssu_r86.fa.gz?download=1", + + "hitdb_1": "https://zenodo.org/record/159205/files/hitdb_v1.00.fa.gz?download=1", + "silva_euk_18S_132": "https://zenodo.org/record/1447330/files/silva_132.18s.99_rep_set.dada2.fa.gz?download=1", + "PR2_4.11.1": "https://github.com/pr2database/pr2database/releases/download/4.11.1/pr2_version_4.11.1_dada2.fasta.gz" +} +## rdp_19 not available for assignSpecies https://github.com/benjjneb/dada2/issues/2053#issuecomment-2512185929 +#set FILE2SPECIESURL = { + "silva_138.2": "https://zenodo.org/records/14169026/files/silva_v138.2_assignSpecies.fa.gz?download=1", + "silva_138.2_fungi_18S": "https://zenodo.org/records/15044434/files/SILVA_SSUfungi_assignSpecies.fasta?download=1", + "silva_138.2_fungi_26S": "https://zenodo.org/records/15044501/files/SILVA_LSUfungi_assignSpecies.fasta?download=1", + "silva_138": "https://zenodo.org/record/3731176/files/silva_species_assignment_v138.fa.gz?download=1", + "silva_132": "https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz?download=1", + "silva_128": "https://zenodo.org/record/824551/files/silva_species_assignment_v128.fa.gz?download=1", + "rdp_16": "https://zenodo.org/record/801828/files/rdp_species_assignment_16.fa.gz?download=1", + "rdp_14": "https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1", + "gtdb_202": "https://zenodo.org/records/4735821/files/GTDB_bac120_arc122_ssu_r202_fullTaxo.fa.gz?download=1", + "gtdb_2018_11": "https://zenodo.org/records/2658728/files/GTDB_dada2_assignment_species.fa.gz?download=1" +} +mkdir -p '${out_file.extra_files_path}' && +cd '${out_file.extra_files_path}' && +#for $database in $taxonomy_databases: + wget '$FILE2TAXURL[str($database)]' -O '${database}.taxonomy' && + #if str($database).startswith("unite") + unzip -p '${database}.taxonomy' "*.fasta" -x "developer/*" > tmp && + mv tmp '${database}.taxonomy' && + #end if +#end for +#if $species_databases + #for $database in $species_databases: + wget '$FILE2SPECIESURL[str($database)]' -O '${database}.species' && + #end for +#end if + cp '$dmjson' '$out_file' ]]> </command> + <configfiles> + <configfile name="dmjson"><![CDATA[#slurp +#set FILE2NAME = { + "silva_138.2_toGenus": "Silva version 138.2 to Genus (for short reads)", + "silva_138.2_toSpecies": "Silva version 138.2 to Species (for long reads)", + "silva_138.2": "Silva version 138.2", + + "silva_138.2_fungi_18S_toGenus": "Silva version 138.2 for Fungi 18S to Genus (for short reads)", + "silva_138.2_fungi_18S_toSpecies": "Silva version 138.2 for Fungi 18S to Species (for long reads)", + "silva_138.2_fungi_18S": "Silva version 138.2 for Fungi 18S", + + "silva_138.2_fungi_26S_toGenus": "Silva version 138.2 for Fungi 26S to Genus (for short reads)", + "silva_138.2_fungi_26S_toSpecies": "Silva version 138.2 for Fungi 26S to Species (for long reads)", + "silva_138.2_fungi_26S": "Silva version 138.2 for Fungi 26S", + + "silva_138": "Silva version 138", + "silva_132": "Silva version 132", + "silva_128": "Silva version 128", + "rdp_19_toGenus": "RDP trainset 19 to Genus (for short reads)", + "rdp_19_toSpecies": "RDP trainset 19 to Species (for long reads)", + "rdp_16": "RDP trainset 16", + "rdp_14": "RDP trainset 14", + "greengenes_13.84": "GreenGenes version 13.84", + "greengenes2_2024.09_toGenus": "GreenGenes2 release 2024.09 to Genus (for short reads)", + "greengenes2_2024.09_toSpecies": "GreenGenes2 release 2024.09 to Species (for long reads)", + "unite_8.0_fungi": "UNITE: General Fasta release 8.0 for Fungi", + "unite_8.0_fungi_singletons": "UNITE: General Fasta release 8.0 for Fungi including global and 97% singletons", + "RefSeq_RDP_2018_05": "NCBI RefSeq 16S rRNA database supplemented by RDP (05/2018)", + + "gtdb_202_toGenus": "GTDB Version 202 to Genus (for short reads)", + "gtdb_202_toSpecies": "GTDB Version 202 to Species (for long reads)", + "gtdb_202": "GTDB Version 202", + + "gtdb_2018_11": "GTDB: Genome Taxonomy Database (Bacteria & Archaea) (11/2018)", + "hitdb_1": "HitDB version 1 (Human InTestinal 16S rRNA)", + "silva_euk_18S_132": "Silva version 132 Eukaryotic 18S", + "PR2_4.11.1": "Protist Ribosomal Reference database (PR2) 4.11.1" +} +#set FILE2TAXLEVELS = { + "PR2_4.11.1": "Kingdom,Supergroup,Division,Class,Order,Family,Genus,Species", +} +{ + "data_tables":{ + "dada2_taxonomy": [ +#for $i, $database in enumerate($taxonomy_databases): + #set taxlevels = FILE2TAXLEVELS.get(str($database), "Kingdom,Phylum,Class,Order,Family,Genus,Species") + { + "value": "$database", + "name": "$FILE2NAME[str($database)]", + "path": "${database}.taxonomy", + "taxlevels": "$taxlevels" + }#slurp + #if i + 1 < len(str($taxonomy_databases).split(",")) +, + #end if +#end for + ] +#if $species_databases + , "dada2_species": [ +#for $i, $database in enumerate($species_databases): + { + "value": "$database", + "name": "$FILE2NAME[str($database)]", + "path": "${database}.species" + }#slurp + #if i + 1 < len(str($species_databases).split(",")) +, + #end if +#end for + ] +#end if + } +} +]]></configfile> + </configfiles> <inputs> - <conditional name="db_cond"> - <param name="db_select" type="select" label="Taxonomic database"> - <option value="silva">Silva</option> - <option value="rdp">RDP</option> - <option value="greengenes">GreenGenes</option> - <option value="unite">UNITE Fungi: General Fasta</option> - <!-- UNITE Eukaryotes not yet supported https://github.com/benjjneb/dada2/issues/702 --> - <option value="RefSeq_RDP">NCBI RefSeq 16S rRNA database supplemented by RDP</option> - <option value="gtdb">GTDB: Genome Taxonomy Database (Bacteria & Archaea)</option> - <option value="hitdb">HitDB (Human InTestinal 16S)</option> - <option value="silva_euk_18S">Silva Eukaryotic 18S</option> - <option value="PR2">Protist Ribosomal Reference database (PR2)</option> - </param> - <when value="silva"> - <param name="version_select" type="select" label="Database version"> - <option value="138">138</option> - <option value="132">132</option> - <option value="128">128</option> - </param> - </when> - <when value="rdp"> - <param name="version_select" type="select" label="Database version"> - <option value="16">16</option> - <option value="14">14</option> - </param> - </when> - <when value="greengenes"> - <param name="version_select" type="select" label="Database version"> - <option value="13.84">13.84</option> - </param> - </when> - <when value="unite"> - <param name="version_select" type="select" label="Database version"> - <option value="8.0_fungi">release 8.0 for Fungi</option> - <option value="8.0_fungi_singletons">release 8.0 for Fungi including global and 97% singletons</option> - </param> - </when> - <when value="RefSeq_RDP"> - <param name="version_select" type="select" label="Database version"> - <option value="2018_05">05/2018</option> - </param> - </when> - <when value="gtdb"> - <param name="version_select" type="select" label="Database version"> - <option value="2018_11">11/2018</option> - </param> - </when> - <when value="hitdb"> - <param name="version_select" type="select" label="Database version"> - <option value="1">1</option> - </param> - </when> - <when value="silva_euk_18S"> - <param name="version_select" type="select" label="Database version"> - <option value="132">132</option> - </param> - </when> - <when value="PR2"> - <param name="version_select" type="select" label="Database version"> - <option value="4.11.1">4.11.1</option> - </param> - </when> - </conditional> + <param name="taxonomy_databases" multiple="true" type="select" label="Taxonomic database" help="for assignTaxonomy"> + <option value="silva_138.2_toGenus">Silva 138.2 to Genus (for short reads)</option> + <option value="silva_138.2_toSpecies">Silva 138.2 to Species (for long reads)</option> + + <option value="silva_138.2_fungi_18S_toGenus">Silva 138.2 for Fungi 18S to Genus (for short reads)</option> + <option value="silva_138.2_fungi_18S_toSpecies">Silva 138.2 for Fungi 18S to Species (for long read)</option> + + <option value="silva_138.2_fungi_26S_toGenus">Silva 138.2 for Fungi 26S to Genus (for short reads)</option> + <option value="silva_138.2_fungi_26S_toSpecies">Silva 138.2 for Fungi 26S to Species (for long reads)</option> + + <option value="silva_138">Silva 138</option> + <option value="silva_132">Silva 132</option> + <option value="silva_128">Silva 128</option> + <option value="rdp_19_toGenus">RDP 19 to Genus (for short reads)</option> + <option value="rdp_19_toSpecies">RDP 19 to Species (for long reads)</option> + <option value="rdp_16">RDP 16</option> + <option value="rdp_14">RDP 14</option> + <option value="greengenes2_2024.09_toGenus">GreenGenes2 2024.09 to Genus (for short reads)</option> + <option value="greengenes2_2024.09_toSpecies">GreenGenes2 2024.09 to Species (for long reads)</option> + <option value="greengenes_13.84">GreenGenes 13.84</option> + <option value="unite_8.0_fungi">UNITE Fungi: General Fasta 8.0 for Fungi</option> + <option value="unite_8.0_fungi_singletons">UNITE Fungi: General Fasta release 8.0 for Fungi including global and 97% singletons</option> + <!-- UNITE Eukaryotes not yet supported https://github.com/benjjneb/dada2/issues/702 --> + <option value="RefSeq_RDP_2018_05">NCBI RefSeq 16S rRNA database supplemented by RDP 05/2018</option> + <option value="gtdb_202_toGenus">GTDB: Genome Taxonomy Database 202 to Genus (for short reads) </option> + <option value="gtdb_202_toSpecies">GTDB: Genome Taxonomy Database 202 to Species (for long reads) </option> + <option value="gtdb_2018_11">GTDB: Genome Taxonomy Database 11/2018</option> + <option value="hitdb_1">HitDB (Human InTestinal 16S) 1</option> + <option value="silva_euk_18S_132">Silva Eukaryotic 18S 132</option> + <option value="PR2_4.11.1">Protist Ribosomal Reference database (PR2) 4.11.1</option> + </param> + <param name="species_databases" type="select" multiple="true" optional="true" label="Species databases" help="for addSpecies"> + <option value="silva_138.2">Silva 138.2</option> + <option value="silva_138.2_fungi_18S">Silva 138.2 for Fungi 18S</option> + <option value="silva_138.2_fungi_26S">Silva 138.2 for Fungi 26S</option> + <option value="silva_138">Silva 138</option> + <option value="silva_132">Silva 132</option> + <option value="silva_128">Silva 128</option> + <option value="rdp_16">RDP 16</option> + <option value="rdp_14">RDP 14</option> + <option value="gtdb_202">GTDB: Genome Taxonomy Database 202</option> + <option value="gtdb_2018_11">GTDB: Genome Taxonomy Database 11/2018</option> + </param> </inputs> <outputs> <data name="out_file" format="data_manager_json" /> </outputs> <tests> <test> - <param name="db_cond|db_select" value="silva"/> - <param name="db_cond|version_select" value="138"/> - <output name="out_file" file="silva138_json"/> + <param name="taxonomy_databases" value="silva_138.2_toGenus,silva_138.2_toSpecies"/> + <param name="species_databases" value="silva_138.2"/> + <output name="out_file"> + <assert_contents> + <has_text text=""Silva version 138.2 to Genus (for short reads)""/> + <has_text text=""Silva version 138.2 to Species (for long reads)""/> + <has_text text=""Silva version 138.2""/> + </assert_contents> + </output> + <assert_command> + <has_text text="wget" n="3"/> + </assert_command> </test> + <test> - <param name="db_cond|db_select" value="silva"/> - <param name="db_cond|version_select" value="132"/> - <output name="out_file" file="silva132_json"/> + <param name="taxonomy_databases" value="silva_138.2_fungi_18S_toGenus,silva_138.2_fungi_18S_toSpecies"/> + <param name="species_databases" value="silva_138.2_fungi_18S"/> + <output name="out_file"> + <assert_contents> + <has_text text=""Silva version 138.2 for Fungi 18S to Genus (for short reads)""/> + <has_text text=""Silva version 138.2 for Fungi 18S to Species (for long reads)""/> + <has_text text=""Silva version 138.2 for Fungi 18S""/> + </assert_contents> + </output> + <assert_command> + <has_text text="wget" n="3"/> + </assert_command> </test> <test> - <param name="db_cond|db_select" value="rdp"/> - <param name="db_cond|version_select" value="16"/> - <output name="out_file" file="rdp16_json"/> + <param name="taxonomy_databases" value="silva_138.2_fungi_26S_toGenus,silva_138.2_fungi_26S_toSpecies"/> + <param name="species_databases" value="silva_138.2_fungi_26S"/> + <output name="out_file"> + <assert_contents> + <has_text text=""Silva version 138.2 for Fungi 26S to Genus (for short reads)""/> + <has_text text=""Silva version 138.2 for Fungi 26S to Species (for long reads)""/> + <has_text text=""Silva version 138.2 for Fungi 26S""/> + </assert_contents> + </output> + <assert_command> + <has_text text="wget" n="3"/> + </assert_command> + </test> + + <test> + <param name="taxonomy_databases" value="silva_138"/> + <param name="species_databases" value="silva_138"/> + <output name="out_file" file="silva138_json"/> + <assert_command> + <has_text text="wget" n="2"/> + </assert_command> </test> <test> - <param name="db_cond|db_select" value="greengenes"/> - <param name="db_cond|version_select" value="13.84"/> - <output name="out_file" file="greengenes13.84_json"/> + <param name="taxonomy_databases" value="silva_132"/> + <param name="species_databases" value="silva_132"/> + <output name="out_file" file="silva132_json"/> + <assert_command> + <has_text text="wget" n="2"/> + </assert_command> </test> <test> - <param name="db_cond|db_select" value="unite"/> - <param name="db_cond|version_select" value="8.0_fungi"/> - <output name="out_file" file="unite8fungi_json"/> + <param name="taxonomy_databases" value="rdp_19_toSpecies,rdp_19_toGenus"/> + <output name="out_file"> + <assert_contents> + <has_text text="RDP trainset 19 to Species (for long reads)"/> + <has_text text="RDP trainset 19 to Genus (for short reads)"/> + </assert_contents> + </output> + <assert_command> + <has_text text="wget" n="2"/> + </assert_command> + </test> + <test> + <param name="taxonomy_databases" value="rdp_16"/> + <param name="species_databases" value="rdp_16"/> + <output name="out_file" file="rdp16_json"/> + <assert_command> + <has_text text="wget" n="2"/> + </assert_command> </test> <test> - <param name="db_cond|db_select" value="unite"/> - <param name="db_cond|version_select" value="8.0_fungi_singletons"/> - <output name="out_file" file="unite8fungisingletons_json"/> + <param name="taxonomy_databases" value="greengenes2_2024.09_toSpecies,greengenes2_2024.09_toGenus"/> + <output name="out_file"> + <assert_contents> + <has_text text="GreenGenes2 release 2024.09 to Genus (for short reads)"/> + <has_text text="GreenGenes2 release 2024.09 to Species (for long reads)"/> + </assert_contents> + </output> + <assert_command> + <has_text text="wget" n="2"/> + </assert_command> </test> <test> - <param name="db_cond|db_select" value="RefSeq_RDP"/> - <param name="db_cond|version_select" value="2018_05"/> - <output name="out_file" file="RefSeq_RDP2018_json"/> + <param name="taxonomy_databases" value="greengenes_13.84"/> + <output name="out_file" file="greengenes13.84_json"/> + <assert_command> + <has_text text="wget" n="1"/> + </assert_command> + </test> + <test> + <param name="taxonomy_databases" value="unite_8.0_fungi"/> + <output name="out_file" file="unite8fungi_json"/> + <assert_command> + <has_text text="unzip"/> + </assert_command> + </test> + <test> + <param name="taxonomy_databases" value="unite_8.0_fungi_singletons"/> + <output name="out_file" file="unite8fungisingletons_json"/> + <assert_command> + <has_text text="wget" n="1"/> + </assert_command> </test> <test> - <param name="db_cond|db_select" value="gtdb"/> - <param name="db_cond|version_select" value="2018_11"/> + <param name="taxonomy_databases" value="RefSeq_RDP_2018_05"/> + <output name="out_file" file="RefSeq_RDP2018_json"/> + <assert_command> + <has_text text="wget" n="1"/> + </assert_command> + </test> + + <test> + <param name="taxonomy_databases" value="gtdb_202_toGenus,gtdb_202_toSpecies"/> + <param name="species_databases" value="gtdb_202"/> + <output name="out_file"> + <assert_contents> + <has_text text=""GTDB Version 202 to Genus (for short reads)""/> + <has_text text=""GTDB Version 202 to Species (for long reads)""/> + <has_text text=""GTDB Version 202""/> + </assert_contents> + </output> + <assert_command> + <has_text text="wget" n="3"/> + </assert_command> + </test> + + <test> + <param name="taxonomy_databases" value="gtdb_2018_11"/> + <param name="species_databases" value="gtdb_2018_11"/> <output name="out_file" file="gtdb2018_json"/> + <assert_command> + <has_text text="wget" n="2"/> + </assert_command> </test> <test> - <param name="db_cond|db_select" value="hitdb"/> - <param name="db_cond|version_select" value="1"/> + <param name="taxonomy_databases" value="hitdb_1"/> <output name="out_file" file="hitdb1_json"/> + <assert_command> + <has_text text="wget" n="1"/> + </assert_command> </test> <test> - <param name="db_cond|db_select" value="silva_euk_18S"/> - <param name="db_cond|version_select" value="132"/> + <param name="taxonomy_databases" value="silva_euk_18S_132"/> <output name="out_file" file="silvaeuk132_json"/> + <assert_command> + <has_text text="wget" n="1"/> + </assert_command> </test> <test> - <param name="db_cond|db_select" value="PR2"/> - <param name="db_cond|version_select" value="4.11.1"/> + <param name="taxonomy_databases" value="PR2_4.11.1"/> <output name="out_file" file="PR24.11.1_json"/> + <assert_command> + <has_text text="wget" n="1"/> + </assert_command> </test> </tests> <help><