annotate test-data/reproduce_test_data.sh @ 8:1d4bd12f01cf draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 2d68ebc8d29e6046811970c6ef04f683e9916857
author iuc
date Thu, 18 Jan 2024 15:38:05 +0000
parents b7b1c8bf7ae0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
1 #!/bin/bash
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
2
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
3 # This script produces a small kraken2 database containing only a ~1kb portion each of a salmonella and ecoli genome
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
4 # It requires kraken2, art and entrez-direct (all available on bioconda)
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
5 kraken2-build --db test_db --download_taxonomy
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
6 mv test_db/taxonomy/nucl_gb.accession2taxid test_db/taxonomy/nucl_gb.accession2taxid_full
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
7 grep -e 'NC_003198.1' -e 'NC_011750.1' test_db/taxonomy/nucl_gb.accession2taxid_full > test_db/taxonomy/nucl_gb.accession2taxid
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
8 esearch -db nucleotide -query "NC_003198.1" | efetch -format fasta > NC_003198.1.fasta
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
9 esearch -db nucleotide -query "NC_011750.1" | efetch -format fasta > NC_011750.1.fasta
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
10 head -n 14 NC_003198.1.fasta > NC_003198.1_1kb.fasta
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
11 head -n 14 NC_011750.1.fasta > NC_011750.1_1kb.fasta
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
12 kraken2-build --db test_db --add-to-library NC_003198.1_1kb.fasta
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
13 kraken2-build --db test_db --add-to-library NC_011750.1_1kb.fasta
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
14 kraken2-build --db test_db --build
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
15
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
16 # Simulate 100bp reads from ~1kb portions of genomes
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
17 art_illumina -sam -i NC_011750.1_1kb.fasta -p -m 300 -f 10 -s 10 -l 100 -o NC_011750.1_simulated_R
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
18 art_illumina -sam -i NC_003198.1_1kb.fasta -p -m 300 -f 10 -s 10 -l 100 -o NC_003198.1_simulated_R
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
19
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
20 # Generate kraken reports
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
21 kraken2 --db test_db --report NC_011750.1_simulated_kraken_report.txt --paired NC_011750.1_simulated_R1.fastq NC_011750.1_simulated_R2.fastq
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
22 kraken2 --db test_db --report NC_003198.1_simulated_kraken_report.txt --paired NC_003198.1_simulated_R1.fastq NC_003198.1_simulated_R2.fastq
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
23
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
24 # Build bracken kmer distribution files using default kmer-len=35 and read-len=100
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
25 bracken-build -d test_db
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
26
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
27 #
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
28 # est_abundance.py --kmer_distr test_db/database100mers.kmer_distrib --level S -i NC_003198.1_simulated_kraken_report.txt -o NC_003198.1_simulated_bracken_report.txt
b7b1c8bf7ae0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 851f81495c875ac09d936537ffd2b32e6af2c8c5"
iuc
parents:
diff changeset
29 # est_abundance.py --kmer_distr test_db/database100mers.kmer_distrib --level S -i NC_011750.1_simulated_kraken_report.txt -o NC_011750.1_simulated_bracken_report.txt