Mercurial > repos > iuc > data_manager_build_bracken_database
view test-data/reproduce_test_dataset.sh @ 6:84cc0dc92b0c draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit c0f150357d52e21443197b2ec1427feb9fc7971e
author | iuc |
---|---|
date | Wed, 06 Mar 2024 14:09:08 +0000 |
parents | c2e4127fb5bf |
children |
line wrap: on
line source
#!/bin/bash # This script produces a small kraken2 database containing only a ~1kb portion each of a salmonella and ecoli genome # It requires kraken2, and entrez-direct (available on bioconda) kraken2-build --db test_db --download_taxonomy mv test_db/taxonomy/nucl_gb.accession2taxid test_db/taxonomy/nucl_gb.accession2taxid_full grep -e 'NC_003198.1' -e 'NC_011750.1' test_db/taxonomy/nucl_gb.accession2taxid_full > test_db/taxonomy/nucl_gb.accession2taxid mv test_db/taxonomy/nodes.dmp test_db/taxonomy/nodes.dmp_full grep -f node_patterns.txt test_db/taxonomy/nodes.dmp_full > test_db/taxonomy/nodes.dmp mv test_db/taxonomy/names.dmp test_db/taxonomy/names.dmp_full grep -e '^220341\s' -e '^585057\s' test_db/taxonomy/names.dmp_full > test_db/taxonomy/names.dmp esearch -db nucleotide -query "NC_003198.1" | efetch -format fasta > NC_003198.1.fasta esearch -db nucleotide -query "NC_011750.1" | efetch -format fasta > NC_011750.1.fasta head -n 14 NC_003198.1.fasta > NC_003198.1_1kb.fasta head -n 14 NC_011750.1.fasta > NC_011750.1_1kb.fasta kraken2-build --db test_db --add-to-library NC_003198.1_1kb.fasta kraken2-build --db test_db --add-to-library NC_011750.1_1kb.fasta kraken2-build --db test_db --build