diff data_manager/kraken2_build_database.py @ 12:90b4d4f0a3a4 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit 9835da32741d05d129a1a44835f66e32713770ad
author iuc
date Fri, 18 Oct 2024 17:08:15 +0000
parents 9002633b4737
children e9ee4d074d5d
line wrap: on
line diff
--- a/data_manager/kraken2_build_database.py	Mon Jul 15 09:21:32 2024 +0000
+++ b/data_manager/kraken2_build_database.py	Fri Oct 18 17:08:15 2024 +0000
@@ -1,12 +1,11 @@
 #!/usr/bin/env python
 
-from __future__ import print_function
-
 import argparse
 import datetime
 import errno
 import json
 import os
+import re
 import shutil
 import subprocess
 import sys
@@ -321,12 +320,33 @@
     return data_table_entry
 
 
-def kraken2_build_custom(kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME):
+def kraken2_build_custom(kraken2_args, custom_database_name, custom_source_info, target_directory, data_table_name=DATA_TABLE_NAME):
+    now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
+
+    database_value = "_".join([
+        now,
+        re.sub(r'[^\w_.-]+', '_', custom_database_name).strip('_'),
+        "kmer-len", str(kraken2_args["kmer_len"]),
+        "minimizer-len", str(kraken2_args["minimizer_len"]),
+        "minimizer-spaces", str(kraken2_args["minimizer_spaces"]),
+        "load-factor", str(kraken2_args["load_factor"]),
+    ])
+
+    database_name = " ".join([
+        custom_database_name,
+        "(" + custom_source_info + ",",
+        "kmer-len=" + str(kraken2_args["kmer_len"]) + ",",
+        "minimizer-len=" + str(kraken2_args["minimizer_len"]) + ",",
+        "minimizer-spaces=" + str(kraken2_args["minimizer_spaces"]) + ",",
+        "load-factor=" + str(kraken2_args["load_factor"]) + ")",
+    ])
+
+    database_path = database_value
 
     args = [
         '--threads', str(kraken2_args["threads"]),
         '--download-taxonomy',
-        '--db', custom_database_name,
+        '--db', database_path,
     ]
 
     if kraken2_args['skip_maps']:
@@ -337,7 +357,7 @@
     args = [
         '--threads', str(kraken2_args["threads"]),
         '--add-to-library', kraken2_args["custom_fasta"],
-        '--db', custom_database_name
+        '--db', database_path,
     ]
 
     subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
@@ -349,7 +369,7 @@
         '--minimizer-len', str(kraken2_args["minimizer_len"]),
         '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]),
         '--load-factor', str(kraken2_args["load_factor"]),
-        '--db', custom_database_name
+        '--db', database_path,
     ]
 
     subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
@@ -358,7 +378,7 @@
         args = [
             '--threads', str(kraken2_args["threads"]),
             '--clean',
-            '--db', custom_database_name
+            '--db', database_path,
         ]
 
         subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
@@ -367,9 +387,9 @@
         'data_tables': {
             data_table_name: [
                 {
-                    "value": custom_database_name,
-                    "name": custom_database_name,
-                    "path": custom_database_name
+                    "value": database_value,
+                    "name": database_name,
+                    "path": database_path,
                 }
             ]
         }
@@ -393,6 +413,7 @@
     parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)')
     parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)')
     parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)')
+    parser.add_argument('--custom-source-info', dest='custom_source_info', help='Description of how this build has been sourced (only applies to --database-type custom)')
     parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='')
     parser.add_argument('--clean', dest='clean', action='store_true', help='Clean up extra files')
     args = parser.parse_args()
@@ -464,6 +485,7 @@
         data_manager_output = kraken2_build_custom(
             kraken2_args,
             args.custom_database_name,
+            args.custom_source_info,
             target_directory,
         )
     else: