Mercurial > repos > iuc > data_manager_dada2
view data_manager/dada2_fetcher.xml @ 8:0181efacb40a draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_dada2 commit 5f2ed0407e3adaafc6070fc0594a4685ade094db
| author | iuc |
|---|---|
| date | Sat, 22 Nov 2025 20:07:44 +0000 |
| parents | 678176bff643 |
| children |
line wrap: on
line source
<tool id="dada2_fetcher" name="dada2 data manager" tool_type="manage_data" version="0.1.2" profile="24.2"> <description>Download reference databases</description> <requirements> <requirement type="package" version="1.25.0">wget</requirement> <requirement type="package" version="6.0">unzip</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ ## see https://github.com/benjjneb/dada2/issues/2053 for the distinction of the _toGenus and _toSpecies datasets #set $FILE2TAXURL = { "silva_138.2_toGenus": "https://zenodo.org/records/14169026/files/silva_nr99_v138.2_toGenus_trainset.fa.gz?download=1", "silva_138.2_toSpecies": "https://zenodo.org/records/14169026/files/silva_nr99_v138.2_toSpecies_trainset.fa.gz?download=1", "silva_138.2_fungi_18S_toGenus": "https://zenodo.org/records/15044434/files/SILVA_SSUfungi_nr99_v138_2_toGenus_trainset.fasta?download=1", "silva_138.2_fungi_18S_toSpecies": "https://zenodo.org/records/15044434/files/SILVA_SSUfungi_nr99_v138_2_toSpecies_trainset.fasta?download=1", "silva_138.2_fungi_26S_toGenus": "https://zenodo.org/records/15044501/files/SILVA_LSUfungi_nr99_v138_2_toGenus_trainset.fasta?download=1", "silva_138.2_fungi_26S_toSpecies": "https://zenodo.org/records/15044501/files/SILVA_LSUfungi_nr99_v138_2_toSpecies_trainset.fasta?download=1", "silva_138": "https://zenodo.org/record/3731176/files/silva_nr_v138_train_set.fa.gz?download=1", "silva_132": "https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz?download=1", "silva_128": "https://zenodo.org/record/824551/files/silva_nr_v128_train_set.fa.gz?download=1", "rdp_19_toGenus": "https://zenodo.org/records/14168771/files/rdp_19_toGenus_trainset.fa.gz?download=1", "rdp_19_toSpecies": "https://zenodo.org/records/14168771/files/rdp_19_toSpecies_trainset.fa.gz?download=1", "rdp_16": "https://zenodo.org/record/801828/files/rdp_train_set_16.fa.gz?download=1", "rdp_14": "https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1", "unite_8.0_fungi": "https://s3.hpc.ut.ee/plutof-public/original/9f7b41c3-825b-4db8-9c52-74a4603a860a.zip", "unite_8.0_fungi_singletons": "https://s3.hpc.ut.ee/plutof-public/original/53dfc9ce-9cb5-4205-84bb-f47faff26462.zip", "greengenes_13.84": "https://zenodo.org/record/158955/files/gg_13_8_train_set_97.fa.gz?download=1", "greengenes2_2024.09_toGenus": "https://zenodo.org/records/14169078/files/gg2_2024_09_toGenus_trainset.fa.gz?download=1", "greengenes2_2024.09_toSpecies": "https://zenodo.org/records/14169078/files/gg2_2024_09_toSpecies_trainset.fa.gz?download=1", "RefSeq_RDP_2018_05": "https://zenodo.org/record/2541239/files/RefSeq-RDP16S_v2_May2018.fa.gz?download=1", "gtdb_202_toGenus": "https://zenodo.org/records/4735821/files/GTDB_bac120_arc122_ssu_r202_Genus.fa.gz?download=1", "gtdb_202_toSpecies": "https://zenodo.org/records/4735821/files/GTDB_bac120_arc122_ssu_r202_Species.fa.gz?download=1", "gtdb_2018_11": "https://zenodo.org/record/2541239/files/GTDB_bac-arc_ssu_r86.fa.gz?download=1", "hitdb_1": "https://zenodo.org/record/159205/files/hitdb_v1.00.fa.gz?download=1", "silva_euk_18S_132": "https://zenodo.org/record/1447330/files/silva_132.18s.99_rep_set.dada2.fa.gz?download=1", "PR2_4.11.1": "https://github.com/pr2database/pr2database/releases/download/4.11.1/pr2_version_4.11.1_dada2.fasta.gz" } ## rdp_19 not available for assignSpecies https://github.com/benjjneb/dada2/issues/2053#issuecomment-2512185929 #set FILE2SPECIESURL = { "silva_138.2": "https://zenodo.org/records/14169026/files/silva_v138.2_assignSpecies.fa.gz?download=1", "silva_138.2_fungi_18S": "https://zenodo.org/records/15044434/files/SILVA_SSUfungi_assignSpecies.fasta?download=1", "silva_138.2_fungi_26S": "https://zenodo.org/records/15044501/files/SILVA_LSUfungi_assignSpecies.fasta?download=1", "silva_138": "https://zenodo.org/record/3731176/files/silva_species_assignment_v138.fa.gz?download=1", "silva_132": "https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz?download=1", "silva_128": "https://zenodo.org/record/824551/files/silva_species_assignment_v128.fa.gz?download=1", "rdp_16": "https://zenodo.org/record/801828/files/rdp_species_assignment_16.fa.gz?download=1", "rdp_14": "https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1", "gtdb_202": "https://zenodo.org/records/4735821/files/GTDB_bac120_arc122_ssu_r202_fullTaxo.fa.gz?download=1", "gtdb_2018_11": "https://zenodo.org/records/2658728/files/GTDB_dada2_assignment_species.fa.gz?download=1" } mkdir -p '${out_file.extra_files_path}' && cd '${out_file.extra_files_path}' && #for $database in $taxonomy_databases: wget '$FILE2TAXURL[str($database)]' -O '${database}.taxonomy' && #if str($database).startswith("unite") unzip -p '${database}.taxonomy' "*.fasta" -x "developer/*" > tmp && mv tmp '${database}.taxonomy' && #end if #end for #if $species_databases #for $database in $species_databases: wget '$FILE2SPECIESURL[str($database)]' -O '${database}.species' && #end for #end if cp '$dmjson' '$out_file' ]]> </command> <configfiles> <configfile name="dmjson"><![CDATA[#slurp #set FILE2NAME = { "silva_138.2_toGenus": "Silva version 138.2 to Genus (for short reads)", "silva_138.2_toSpecies": "Silva version 138.2 to Species (for long reads)", "silva_138.2": "Silva version 138.2", "silva_138.2_fungi_18S_toGenus": "Silva version 138.2 for Fungi 18S to Genus (for short reads)", "silva_138.2_fungi_18S_toSpecies": "Silva version 138.2 for Fungi 18S to Species (for long reads)", "silva_138.2_fungi_18S": "Silva version 138.2 for Fungi 18S", "silva_138.2_fungi_26S_toGenus": "Silva version 138.2 for Fungi 26S to Genus (for short reads)", "silva_138.2_fungi_26S_toSpecies": "Silva version 138.2 for Fungi 26S to Species (for long reads)", "silva_138.2_fungi_26S": "Silva version 138.2 for Fungi 26S", "silva_138": "Silva version 138", "silva_132": "Silva version 132", "silva_128": "Silva version 128", "rdp_19_toGenus": "RDP trainset 19 to Genus (for short reads)", "rdp_19_toSpecies": "RDP trainset 19 to Species (for long reads)", "rdp_16": "RDP trainset 16", "rdp_14": "RDP trainset 14", "greengenes_13.84": "GreenGenes version 13.84", "greengenes2_2024.09_toGenus": "GreenGenes2 release 2024.09 to Genus (for short reads)", "greengenes2_2024.09_toSpecies": "GreenGenes2 release 2024.09 to Species (for long reads)", "unite_8.0_fungi": "UNITE: General Fasta release 8.0 for Fungi", "unite_8.0_fungi_singletons": "UNITE: General Fasta release 8.0 for Fungi including global and 97% singletons", "RefSeq_RDP_2018_05": "NCBI RefSeq 16S rRNA database supplemented by RDP (05/2018)", "gtdb_202_toGenus": "GTDB Version 202 to Genus (for short reads)", "gtdb_202_toSpecies": "GTDB Version 202 to Species (for long reads)", "gtdb_202": "GTDB Version 202", "gtdb_2018_11": "GTDB: Genome Taxonomy Database (Bacteria & Archaea) (11/2018)", "hitdb_1": "HitDB version 1 (Human InTestinal 16S rRNA)", "silva_euk_18S_132": "Silva version 132 Eukaryotic 18S", "PR2_4.11.1": "Protist Ribosomal Reference database (PR2) 4.11.1" } #set FILE2TAXLEVELS = { "PR2_4.11.1": "Kingdom,Supergroup,Division,Class,Order,Family,Genus,Species", } { "data_tables":{ "dada2_taxonomy": [ #for $i, $database in enumerate($taxonomy_databases): #set taxlevels = FILE2TAXLEVELS.get(str($database), "Kingdom,Phylum,Class,Order,Family,Genus,Species") { "value": "$database", "name": "$FILE2NAME[str($database)]", "path": "${database}.taxonomy", "taxlevels": "$taxlevels" }#slurp #if i + 1 < len(str($taxonomy_databases).split(",")) , #end if #end for ] #if $species_databases , "dada2_species": [ #for $i, $database in enumerate($species_databases): { "value": "$database", "name": "$FILE2NAME[str($database)]", "path": "${database}.species" }#slurp #if i + 1 < len(str($species_databases).split(",")) , #end if #end for ] #end if } } ]]></configfile> </configfiles> <inputs> <param name="taxonomy_databases" multiple="true" type="select" label="Taxonomic database" help="for assignTaxonomy"> <option value="silva_138.2_toGenus">Silva 138.2 to Genus (for short reads)</option> <option value="silva_138.2_toSpecies">Silva 138.2 to Species (for long reads)</option> <option value="silva_138.2_fungi_18S_toGenus">Silva 138.2 for Fungi 18S to Genus (for short reads)</option> <option value="silva_138.2_fungi_18S_toSpecies">Silva 138.2 for Fungi 18S to Species (for long read)</option> <option value="silva_138.2_fungi_26S_toGenus">Silva 138.2 for Fungi 26S to Genus (for short reads)</option> <option value="silva_138.2_fungi_26S_toSpecies">Silva 138.2 for Fungi 26S to Species (for long reads)</option> <option value="silva_138">Silva 138</option> <option value="silva_132">Silva 132</option> <option value="silva_128">Silva 128</option> <option value="rdp_19_toGenus">RDP 19 to Genus (for short reads)</option> <option value="rdp_19_toSpecies">RDP 19 to Species (for long reads)</option> <option value="rdp_16">RDP 16</option> <option value="rdp_14">RDP 14</option> <option value="greengenes2_2024.09_toGenus">GreenGenes2 2024.09 to Genus (for short reads)</option> <option value="greengenes2_2024.09_toSpecies">GreenGenes2 2024.09 to Species (for long reads)</option> <option value="greengenes_13.84">GreenGenes 13.84</option> <option value="unite_8.0_fungi">UNITE Fungi: General Fasta 8.0 for Fungi</option> <option value="unite_8.0_fungi_singletons">UNITE Fungi: General Fasta release 8.0 for Fungi including global and 97% singletons</option> <!-- UNITE Eukaryotes not yet supported https://github.com/benjjneb/dada2/issues/702 --> <option value="RefSeq_RDP_2018_05">NCBI RefSeq 16S rRNA database supplemented by RDP 05/2018</option> <option value="gtdb_202_toGenus">GTDB: Genome Taxonomy Database 202 to Genus (for short reads) </option> <option value="gtdb_202_toSpecies">GTDB: Genome Taxonomy Database 202 to Species (for long reads) </option> <option value="gtdb_2018_11">GTDB: Genome Taxonomy Database 11/2018</option> <option value="hitdb_1">HitDB (Human InTestinal 16S) 1</option> <option value="silva_euk_18S_132">Silva Eukaryotic 18S 132</option> <option value="PR2_4.11.1">Protist Ribosomal Reference database (PR2) 4.11.1</option> </param> <param name="species_databases" type="select" multiple="true" optional="true" label="Species databases" help="for addSpecies"> <option value="silva_138.2">Silva 138.2</option> <option value="silva_138.2_fungi_18S">Silva 138.2 for Fungi 18S</option> <option value="silva_138.2_fungi_26S">Silva 138.2 for Fungi 26S</option> <option value="silva_138">Silva 138</option> <option value="silva_132">Silva 132</option> <option value="silva_128">Silva 128</option> <option value="rdp_16">RDP 16</option> <option value="rdp_14">RDP 14</option> <option value="gtdb_202">GTDB: Genome Taxonomy Database 202</option> <option value="gtdb_2018_11">GTDB: Genome Taxonomy Database 11/2018</option> </param> </inputs> <outputs> <data name="out_file" format="data_manager_json" /> </outputs> <tests> <test> <param name="taxonomy_databases" value="silva_138.2_toGenus,silva_138.2_toSpecies"/> <param name="species_databases" value="silva_138.2"/> <output name="out_file"> <assert_contents> <has_text text=""Silva version 138.2 to Genus (for short reads)""/> <has_text text=""Silva version 138.2 to Species (for long reads)""/> <has_text text=""Silva version 138.2""/> </assert_contents> </output> <assert_command> <has_text text="wget" n="3"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="silva_138.2_fungi_18S_toGenus,silva_138.2_fungi_18S_toSpecies"/> <param name="species_databases" value="silva_138.2_fungi_18S"/> <output name="out_file"> <assert_contents> <has_text text=""Silva version 138.2 for Fungi 18S to Genus (for short reads)""/> <has_text text=""Silva version 138.2 for Fungi 18S to Species (for long reads)""/> <has_text text=""Silva version 138.2 for Fungi 18S""/> </assert_contents> </output> <assert_command> <has_text text="wget" n="3"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="silva_138.2_fungi_26S_toGenus,silva_138.2_fungi_26S_toSpecies"/> <param name="species_databases" value="silva_138.2_fungi_26S"/> <output name="out_file"> <assert_contents> <has_text text=""Silva version 138.2 for Fungi 26S to Genus (for short reads)""/> <has_text text=""Silva version 138.2 for Fungi 26S to Species (for long reads)""/> <has_text text=""Silva version 138.2 for Fungi 26S""/> </assert_contents> </output> <assert_command> <has_text text="wget" n="3"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="silva_138"/> <param name="species_databases" value="silva_138"/> <output name="out_file" file="silva138_json"/> <assert_command> <has_text text="wget" n="2"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="silva_132"/> <param name="species_databases" value="silva_132"/> <output name="out_file" file="silva132_json"/> <assert_command> <has_text text="wget" n="2"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="rdp_19_toSpecies,rdp_19_toGenus"/> <output name="out_file"> <assert_contents> <has_text text="RDP trainset 19 to Species (for long reads)"/> <has_text text="RDP trainset 19 to Genus (for short reads)"/> </assert_contents> </output> <assert_command> <has_text text="wget" n="2"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="rdp_16"/> <param name="species_databases" value="rdp_16"/> <output name="out_file" file="rdp16_json"/> <assert_command> <has_text text="wget" n="2"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="greengenes2_2024.09_toSpecies,greengenes2_2024.09_toGenus"/> <output name="out_file"> <assert_contents> <has_text text="GreenGenes2 release 2024.09 to Genus (for short reads)"/> <has_text text="GreenGenes2 release 2024.09 to Species (for long reads)"/> </assert_contents> </output> <assert_command> <has_text text="wget" n="2"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="greengenes_13.84"/> <output name="out_file" file="greengenes13.84_json"/> <assert_command> <has_text text="wget" n="1"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="unite_8.0_fungi"/> <output name="out_file" file="unite8fungi_json"/> <assert_command> <has_text text="unzip"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="unite_8.0_fungi_singletons"/> <output name="out_file" file="unite8fungisingletons_json"/> <assert_command> <has_text text="wget" n="1"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="RefSeq_RDP_2018_05"/> <output name="out_file" file="RefSeq_RDP2018_json"/> <assert_command> <has_text text="wget" n="1"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="gtdb_202_toGenus,gtdb_202_toSpecies"/> <param name="species_databases" value="gtdb_202"/> <output name="out_file"> <assert_contents> <has_text text=""GTDB Version 202 to Genus (for short reads)""/> <has_text text=""GTDB Version 202 to Species (for long reads)""/> <has_text text=""GTDB Version 202""/> </assert_contents> </output> <assert_command> <has_text text="wget" n="3"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="gtdb_2018_11"/> <param name="species_databases" value="gtdb_2018_11"/> <output name="out_file" file="gtdb2018_json"/> <assert_command> <has_text text="wget" n="2"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="hitdb_1"/> <output name="out_file" file="hitdb1_json"/> <assert_command> <has_text text="wget" n="1"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="silva_euk_18S_132"/> <output name="out_file" file="silvaeuk132_json"/> <assert_command> <has_text text="wget" n="1"/> </assert_command> </test> <test> <param name="taxonomy_databases" value="PR2_4.11.1"/> <output name="out_file" file="PR24.11.1_json"/> <assert_command> <has_text text="wget" n="1"/> </assert_command> </test> </tests> <help><![CDATA[ Public Reference databases maintained by the DADA2 project .......................................................... The following refrence databases which are describes as maintained by the DADA2 project (https://benjjneb.github.io/dada2/training.html) are available - Silva - RDP - GreenGenes (https://web.archive.org/web/20240703054246/https://greengenes.secondgenome.com/) - Greengenes2 (https://greengenes2.ucsd.edu/) - UNITE general FASTA (https://unite.ut.ee/repository.php) While Silva and RDP contain reference databases for taxonomy and species assignment, the greengenes and UNITE databases only contains a reference database for taxonomy assignment. Silva are licensed under Create Commons Attribution 4.0 (CC-BY 4.0) since release 138. Except for UNITE all reference databases are downloaded from the corresponding zenodo links that are listed on the DADA2 website. The UNITE databases are taken from the links provided on the UNITE website More detailed informations in the reference data bases can be found on the DADA2 website and contained links: https://benjjneb.github.io/dada2/training.html. Further public Reference databases listed by the DADA2 project .............................................................. Several contributed reference databases are listed of the DADA2 project website (https://benjjneb.github.io/dada2/training.html): - RefSeq + RDP (NCBI RefSeq 16S rRNA database supplemented by RDP) - GTDB: Genome Taxonomy Database (More info: http://gtdb.ecogenomic.org/) - HitDB version 1 (Human InTestinal 16S rRNA) (https://github.com/microbiome/HITdb) - RDP fungi LSU - Silva Eukaryotic 18S - PR2 (https://github.com/pr2database/pr2database) Except for PR2, all reference databases are downloaded from the corresponding zenodo links that are listed on the DADA2 website. The PR2 database is taken from their github page. More detailed informations in the reference data bases can be found on the DADA2 website and contained links: https://benjjneb.github.io/dada2/training.html. ]]></help> <citations> <!-- silva --> <citation type="doi">10.1093/nar/gks1219</citation> <!-- rdp --> <citation type="doi">10.1093/nar/gkt1244</citation> <!-- greengenes2 --> <citation type="doi">10.1038/s41587-023-01845-1</citation> <!-- greengenes --> <citation type="doi">10.1128/AEM.03006-05</citation> <!-- unite --> <citation type="doi">10.15156/BIO/786343</citation> <!-- TODO gtdb ??? --> <!-- hitdb --> <citation type="doi">10.1186/s12864-015-2265-y</citation> <!-- PR2 --> <citation type="doi">10.1093/nar/gks1160</citation> </citations> </tool>
