Mercurial > repos > iuc > data_manager_build_bracken_database
annotate test-data/reproduce_test_dataset.sh @ 7:174a754bd3b6 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_build_bracken_database commit a108f20aebc04574a8bd0a90b955064439a50852
| author | iuc |
|---|---|
| date | Wed, 05 Nov 2025 13:32:09 +0000 |
| parents | c2e4127fb5bf |
| children |
| rev | line source |
|---|---|
|
0
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
1 #!/bin/bash |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
2 |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
3 # This script produces a small kraken2 database containing only a ~1kb portion each of a salmonella and ecoli genome |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
4 # It requires kraken2, and entrez-direct (available on bioconda) |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
5 kraken2-build --db test_db --download_taxonomy |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
6 mv test_db/taxonomy/nucl_gb.accession2taxid test_db/taxonomy/nucl_gb.accession2taxid_full |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
7 grep -e 'NC_003198.1' -e 'NC_011750.1' test_db/taxonomy/nucl_gb.accession2taxid_full > test_db/taxonomy/nucl_gb.accession2taxid |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
8 mv test_db/taxonomy/nodes.dmp test_db/taxonomy/nodes.dmp_full |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
9 grep -f node_patterns.txt test_db/taxonomy/nodes.dmp_full > test_db/taxonomy/nodes.dmp |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
10 mv test_db/taxonomy/names.dmp test_db/taxonomy/names.dmp_full |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
11 grep -e '^220341\s' -e '^585057\s' test_db/taxonomy/names.dmp_full > test_db/taxonomy/names.dmp |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
12 esearch -db nucleotide -query "NC_003198.1" | efetch -format fasta > NC_003198.1.fasta |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
13 esearch -db nucleotide -query "NC_011750.1" | efetch -format fasta > NC_011750.1.fasta |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
14 head -n 14 NC_003198.1.fasta > NC_003198.1_1kb.fasta |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
15 head -n 14 NC_011750.1.fasta > NC_011750.1_1kb.fasta |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
16 kraken2-build --db test_db --add-to-library NC_003198.1_1kb.fasta |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
17 kraken2-build --db test_db --add-to-library NC_011750.1_1kb.fasta |
|
c2e4127fb5bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
iuc
parents:
diff
changeset
|
18 kraken2-build --db test_db --build |
