Mercurial > repos > dfornika > data_manager_build_bracken_database
diff test-data/reproduce_test_dataset.sh @ 0:911bb6c95bf8 draft default tip
"planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/data_managers/data_manager_build_bracken_database/ commit d0b060e7d9cc9fd89926dbc07fc93c8a1471b3fe-dirty"
author | dfornika |
---|---|
date | Thu, 24 Oct 2019 17:44:45 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/reproduce_test_dataset.sh Thu Oct 24 17:44:45 2019 -0400 @@ -0,0 +1,18 @@ +#!/bin/bash + +# This script produces a small kraken2 database containing only a ~1kb portion each of a salmonella and ecoli genome +# It requires kraken2, and entrez-direct (available on bioconda) +kraken2-build --db test_db --download_taxonomy +mv test_db/taxonomy/nucl_gb.accession2taxid test_db/taxonomy/nucl_gb.accession2taxid_full +grep -e 'NC_003198.1' -e 'NC_011750.1' test_db/taxonomy/nucl_gb.accession2taxid_full > test_db/taxonomy/nucl_gb.accession2taxid +mv test_db/taxonomy/nodes.dmp test_db/taxonomy/nodes.dmp_full +grep -f node_patterns.txt test_db/taxonomy/nodes.dmp_full > test_db/taxonomy/nodes.dmp +mv test_db/taxonomy/names.dmp test_db/taxonomy/names.dmp_full +grep -e '^220341\s' -e '^585057\s' test_db/taxonomy/names.dmp_full > test_db/taxonomy/names.dmp +esearch -db nucleotide -query "NC_003198.1" | efetch -format fasta > NC_003198.1.fasta +esearch -db nucleotide -query "NC_011750.1" | efetch -format fasta > NC_011750.1.fasta +head -n 14 NC_003198.1.fasta > NC_003198.1_1kb.fasta +head -n 14 NC_011750.1.fasta > NC_011750.1_1kb.fasta +kraken2-build --db test_db --add-to-library NC_003198.1_1kb.fasta +kraken2-build --db test_db --add-to-library NC_011750.1_1kb.fasta +kraken2-build --db test_db --build