# HG changeset patch
# User iuc
# Date 1571578641 14400
# Node ID c2e4127fb5bf5eb3069bb614ac7e52867821eed5
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 74e81c69c8806d98beb15a889741bcd702866ce3"
diff -r 000000000000 -r c2e4127fb5bf data_manager/bracken_build_database.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/bracken_build_database.py Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import argparse
+import errno
+import json
+import os
+import subprocess
+import uuid
+
+
+DATA_TABLE_NAME = "bracken_databases"
+
+
+def bracken_build_database(target_directory, bracken_build_args, database_name, data_table_name=DATA_TABLE_NAME):
+
+ database_value = str(uuid.uuid4())
+
+ database_name = database_name
+
+ database_path = os.path.join(bracken_build_args['kraken_database'], 'database' + str(bracken_build_args['read_len']) + 'mers.kmer_distrib')
+
+ bracken_build_args_list = [
+ '-t', bracken_build_args['threads'],
+ '-k', bracken_build_args['kmer_len'],
+ '-l', bracken_build_args['read_len'],
+ '-d', bracken_build_args['kraken_database'],
+ ]
+
+ subprocess.check_call(['bracken-build'] + bracken_build_args_list)
+
+ data_table_entry = {
+ "data_tables": {
+ data_table_name: [
+ {
+ "value": database_value,
+ "name": database_name,
+ "path": database_path,
+ }
+ ]
+ }
+ }
+
+ return data_table_entry
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('data_manager_json')
+ parser.add_argument('--threads', dest='threads', default=1, help='threads')
+ parser.add_argument('--kmer-len', dest='kmer_len', help='K-mer length')
+ parser.add_argument('--read-len', dest='read_len', help='Read length')
+ parser.add_argument('--kraken-db', dest='kraken_database', help='Kraken Database')
+ parser.add_argument('--database-name', dest='database_name', help='Database Name')
+ args = parser.parse_args()
+
+ data_manager_input = json.loads(open(args.data_manager_json).read())
+
+ target_directory = data_manager_input['output_data'][0]['extra_files_path']
+
+ bracken_build_args = {
+ 'threads': args.threads,
+ 'kmer_len': args.kmer_len,
+ 'read_len': args.read_len,
+ 'kraken_database': args.kraken_database,
+ }
+
+ try:
+ os.mkdir(target_directory)
+ except OSError as exc:
+ if exc.errno == errno.EEXIST and os.path.isdir( target_directory ):
+ pass
+ else:
+ raise
+
+ data_manager_output = {}
+
+ data_manager_output = bracken_build_database(
+ target_directory,
+ bracken_build_args,
+ args.database_name,
+ )
+
+ with open(args.data_manager_json, 'w') as out:
+ out.write(json.dumps(data_manager_output, sort_keys=True))
+
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r c2e4127fb5bf data_manager/bracken_build_database.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/bracken_build_database.xml Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,48 @@
+
+
+ bracken database builder
+
+ bracken
+ kraken2
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10.7717/peerj-cs.104
+
+
diff -r 000000000000 -r c2e4127fb5bf data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
diff -r 000000000000 -r c2e4127fb5bf test-data/kraken2_databases.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kraken2_databases.loc Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,6 @@
+# Tab separated with three columns:
+# - value (Galaxy records this in the Galaxy DB)
+# - name (Galaxy shows this in the UI)
+# - path (folder name containing the Kraken DB)
+#
+test_entry "Test Database" ${__HERE__}/test_db
diff -r 000000000000 -r c2e4127fb5bf test-data/nodes_patterns.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/nodes_patterns.txt Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,15 @@
+^220341\s
+^90370\s
+^59201\s
+^28901\s
+^590\s
+^543\s
+^91347\s
+^1236\s
+^1224\s
+^2\s
+^131567\s
+^1\s
+^585057\s
+^562\s
+^561\s
diff -r 000000000000 -r c2e4127fb5bf test-data/reproduce_test_dataset.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reproduce_test_dataset.sh Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# This script produces a small kraken2 database containing only a ~1kb portion each of a salmonella and ecoli genome
+# It requires kraken2, and entrez-direct (available on bioconda)
+kraken2-build --db test_db --download_taxonomy
+mv test_db/taxonomy/nucl_gb.accession2taxid test_db/taxonomy/nucl_gb.accession2taxid_full
+grep -e 'NC_003198.1' -e 'NC_011750.1' test_db/taxonomy/nucl_gb.accession2taxid_full > test_db/taxonomy/nucl_gb.accession2taxid
+mv test_db/taxonomy/nodes.dmp test_db/taxonomy/nodes.dmp_full
+grep -f node_patterns.txt test_db/taxonomy/nodes.dmp_full > test_db/taxonomy/nodes.dmp
+mv test_db/taxonomy/names.dmp test_db/taxonomy/names.dmp_full
+grep -e '^220341\s' -e '^585057\s' test_db/taxonomy/names.dmp_full > test_db/taxonomy/names.dmp
+esearch -db nucleotide -query "NC_003198.1" | efetch -format fasta > NC_003198.1.fasta
+esearch -db nucleotide -query "NC_011750.1" | efetch -format fasta > NC_011750.1.fasta
+head -n 14 NC_003198.1.fasta > NC_003198.1_1kb.fasta
+head -n 14 NC_011750.1.fasta > NC_011750.1_1kb.fasta
+kraken2-build --db test_db --add-to-library NC_003198.1_1kb.fasta
+kraken2-build --db test_db --add-to-library NC_011750.1_1kb.fasta
+kraken2-build --db test_db --build
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/hash.k2d
Binary file test-data/test_db/hash.k2d has changed
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/library/added/9C7DdW7GAD.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/library/added/9C7DdW7GAD.fna Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,17 @@
+>NC_003198.1 Salmonella enterica subsp. enterica serovar Typhi str. CT18, complete genome
+AGAGATTACGTCTGGTTGCAAGAGATCATAACAGGGGAAATTGATTGAAAATAAATATAT
+CGCCAGCAGCACATGAACAAGTTTCGGAATGTGATCAATTTAAAAATTTATTGACTTAGG
+CGGGCAGATACTTTAACCAATATAGGAATACAAGACAGACAAATAAAAATGACAGAGTAC
+ACAACATCCATGAACCGCATCAGxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxAGGT
+AACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGAACAGTGCGG
+GCxxxxxxxxCGACCAGAGATCACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGT
+ACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATTCC
+AGGCAAGGGCAGGTAGCGACCGTACTTTCCGCCCCCGCGAAAATTACCAACCATCTGGTG
+GCGATGATTGAAAAAACTATCGGCGGCCAGGATGCTTTGCCGAATATCAGCGATGCCGAA
+CGTATTTTTTCTGACCTGCTCGCAGGACTTGCCAGCGCGCAGCCGGGATTCCCGCTTGCA
+CGGTTGAAAATGGTTGTCGAACAAGAATTCGCTCAGATCAAACATGTTTTGCATGGTATC
+AGCCTGCTGGGTCAGTGCCCGGATAGCATCAACGCCGCGCTGATTTGCCGTGGCGAAAAA
+ATGTCGATCGCGATTATGGCGGGACTCCTGGAGGCGCGTGGACATCGCGTCACGGTGATC
+GATCCGGTAGAAAAACTGCTGGCGGTGGGCCATTACCTTGAATCTACCGTCGATATCGCG
+GAATCGACTCGCCGTATCGCCGCCAGCCAGATCCCGGCCGATCACATGATCCTGATGGCG
+GGCTTTACTG
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/library/added/9C7DdW7GAD.fna.masked
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/library/added/cWk1IBlK73.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/library/added/cWk1IBlK73.fna Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,17 @@
+>NC_011750.1 Escherichia coli IAI39 chromosome, complete genome
+GCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTxxxxxxxGAGTGTCT
+GATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGT
+CACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACA
+CAACATCCATGAAACGCATTAGxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxAGGTA
+ACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGGC
+xxxxxxxxCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGTAC
+ATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAG
+GCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGC
+GATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACG
+TATTTTTGCCGAACTTCTGACGGGACTCGCCGCTGCCCAACCGGGATTCCCGCTGGCGCA
+ACTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATTAG
+TTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAAAT
+GTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTACCGTTATCGA
+TCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCTGA
+GTCCACCCGCCGTATTGCGGCAAGTCGTATTCCGGCTGATCACATGGTGCTGATGGCAGG
+TTTCACCGCC
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/library/added/cWk1IBlK73.fna.masked
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/library/added/prelim_map.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/library/added/prelim_map.txt Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,2 @@
+ACCNUM NC_011750.1 NC_011750
+ACCNUM NC_003198.1 NC_003198
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/library/added/prelim_map_QXr8C5PiOX.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/library/added/prelim_map_QXr8C5PiOX.txt Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,1 @@
+ACCNUM NC_003198.1 NC_003198
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/library/added/prelim_map_l8ftMYsZv0.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/library/added/prelim_map_l8ftMYsZv0.txt Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,1 @@
+ACCNUM NC_011750.1 NC_011750
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/opts.k2d
Binary file test-data/test_db/opts.k2d has changed
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/seqid2taxid.map
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/seqid2taxid.map Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,1 @@
+NC_011750.1 585057
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/taxo.k2d
Binary file test-data/test_db/taxo.k2d has changed
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/taxonomy/names.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/taxonomy/names.dmp Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,5 @@
+220341 | Salmonella enterica subsp. enterica serovar Typhi CT18 | | equivalent name |
+220341 | Salmonella enterica subsp. enterica serovar Typhi str. CT18 | | scientific name |
+220341 | Salmonella enterica subsp. enterica serovar Typhi strain CT18 | | equivalent name |
+220341 | Salmonella typhi CT18 | | equivalent name |
+585057 | Escherichia coli IAI39 | | scientific name |
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/taxonomy/nodes.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/taxonomy/nodes.dmp Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,15 @@
+1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | |
+2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | |
+543 | 91347 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+561 | 543 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+562 | 561 | species | EC | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+590 | 543 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+28901 | 590 | species | SE | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+59201 | 28901 | subspecies | SE | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+90370 | 59201 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+91347 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | |
+220341 | 90370 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+585057 | 562 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/taxonomy/nucl_gb.accession2taxid
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/taxonomy/nucl_gb.accession2taxid Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,2 @@
+NC_003198 NC_003198.1 220341 16758993
+NC_011750 NC_011750.1 585057 218698419
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/taxonomy/prelim_map.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/taxonomy/prelim_map.txt Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,2 @@
+ACCNUM NC_011750.1 NC_011750
+ACCNUM NC_003198.1 NC_003198
diff -r 000000000000 -r c2e4127fb5bf test-data/test_db/unmapped.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/unmapped.txt Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,1 @@
+NC_003198
diff -r 000000000000 -r c2e4127fb5bf tool-data/bracken_databases.loc.sample
diff -r 000000000000 -r c2e4127fb5bf tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,8 @@
+
+
+
+
+
diff -r 000000000000 -r c2e4127fb5bf tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Sun Oct 20 09:37:21 2019 -0400
@@ -0,0 +1,8 @@
+
+
+
+
+