changeset 12:90b4d4f0a3a4 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit 9835da32741d05d129a1a44835f66e32713770ad
author iuc
date Fri, 18 Oct 2024 17:08:15 +0000
parents 1e34d2e3d285
children e9ee4d074d5d
files data_manager/kraken2_build_database.py data_manager/kraken2_build_database.xml
diffstat 2 files changed, 43 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/kraken2_build_database.py	Mon Jul 15 09:21:32 2024 +0000
+++ b/data_manager/kraken2_build_database.py	Fri Oct 18 17:08:15 2024 +0000
@@ -1,12 +1,11 @@
 #!/usr/bin/env python
 
-from __future__ import print_function
-
 import argparse
 import datetime
 import errno
 import json
 import os
+import re
 import shutil
 import subprocess
 import sys
@@ -321,12 +320,33 @@
     return data_table_entry
 
 
-def kraken2_build_custom(kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME):
+def kraken2_build_custom(kraken2_args, custom_database_name, custom_source_info, target_directory, data_table_name=DATA_TABLE_NAME):
+    now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
+
+    database_value = "_".join([
+        now,
+        re.sub(r'[^\w_.-]+', '_', custom_database_name).strip('_'),
+        "kmer-len", str(kraken2_args["kmer_len"]),
+        "minimizer-len", str(kraken2_args["minimizer_len"]),
+        "minimizer-spaces", str(kraken2_args["minimizer_spaces"]),
+        "load-factor", str(kraken2_args["load_factor"]),
+    ])
+
+    database_name = " ".join([
+        custom_database_name,
+        "(" + custom_source_info + ",",
+        "kmer-len=" + str(kraken2_args["kmer_len"]) + ",",
+        "minimizer-len=" + str(kraken2_args["minimizer_len"]) + ",",
+        "minimizer-spaces=" + str(kraken2_args["minimizer_spaces"]) + ",",
+        "load-factor=" + str(kraken2_args["load_factor"]) + ")",
+    ])
+
+    database_path = database_value
 
     args = [
         '--threads', str(kraken2_args["threads"]),
         '--download-taxonomy',
-        '--db', custom_database_name,
+        '--db', database_path,
     ]
 
     if kraken2_args['skip_maps']:
@@ -337,7 +357,7 @@
     args = [
         '--threads', str(kraken2_args["threads"]),
         '--add-to-library', kraken2_args["custom_fasta"],
-        '--db', custom_database_name
+        '--db', database_path,
     ]
 
     subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
@@ -349,7 +369,7 @@
         '--minimizer-len', str(kraken2_args["minimizer_len"]),
         '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]),
         '--load-factor', str(kraken2_args["load_factor"]),
-        '--db', custom_database_name
+        '--db', database_path,
     ]
 
     subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
@@ -358,7 +378,7 @@
         args = [
             '--threads', str(kraken2_args["threads"]),
             '--clean',
-            '--db', custom_database_name
+            '--db', database_path,
         ]
 
         subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
@@ -367,9 +387,9 @@
         'data_tables': {
             data_table_name: [
                 {
-                    "value": custom_database_name,
-                    "name": custom_database_name,
-                    "path": custom_database_name
+                    "value": database_value,
+                    "name": database_name,
+                    "path": database_path,
                 }
             ]
         }
@@ -393,6 +413,7 @@
     parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)')
     parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)')
     parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)')
+    parser.add_argument('--custom-source-info', dest='custom_source_info', help='Description of how this build has been sourced (only applies to --database-type custom)')
     parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='')
     parser.add_argument('--clean', dest='clean', action='store_true', help='Clean up extra files')
     args = parser.parse_args()
@@ -464,6 +485,7 @@
         data_manager_output = kraken2_build_custom(
             kraken2_args,
             args.custom_database_name,
+            args.custom_source_info,
             target_directory,
         )
     else:
--- a/data_manager/kraken2_build_database.xml	Mon Jul 15 09:21:32 2024 +0000
+++ b/data_manager/kraken2_build_database.xml	Fri Oct 18 17:08:15 2024 +0000
@@ -3,12 +3,12 @@
     <description>database builder</description>
     <macros>
         <token name="@TOOL_VERSION@">2.1.3</token>
-        <token name="@VERSION_SUFFIX@">2</token>
+        <token name="@VERSION_SUFFIX@">3</token>
         <token name="@PROFILE@">22.01</token>
         <xml name="common_params">
             <param name="kmer_len" type="integer" value="35" label="K-mer length in BP" />
             <param name="minimizer_len" type="integer" value="31" label="Minimizer length" />
-            <param name="minimizer_spaces" type="integer" value="6" label="Minimizer spaces" />
+            <param name="minimizer_spaces" type="integer" value="7" label="Minimizer spaces" />
             <param name="load_factor" type="float" value="0.7" min="0" max="1" label="Load factor" help="Proportion of the hash table to be populated" />
             <param name="clean" type="boolean" truevalue="--clean" falsevalue="" checked="true" label="Clean up extra files" />
         </xml>
@@ -46,9 +46,12 @@
             <option value="pluspfp_16gb">PlusPFP-16 (PlusPFP with DB capped at 16 GB; ~15 GB)</option>
         </xml>
     </macros>
+    <xrefs>
+        <xref type="bio.tools">kraken2</xref>
+    </xrefs>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">kraken2</requirement>
-        <requirement type="package" version="3.7">python</requirement>
+        <requirement type="package" version="3.13">python</requirement>
     </requirements>
     <version_command>kraken2 -version | head -n 1 | awk '{print $NF}'</version_command>
     <command detect_errors="exit_code"><![CDATA[
@@ -79,6 +82,7 @@
     --threads \${GALAXY_SLOTS:-1}
     --custom-fasta '$database_type.custom_fasta'
     --custom-database-name '$database_type.custom_database_name'
+    --custom-source-info '$database_type.custom_source_info'
     $database_type.skip_maps
     --kmer-len $database_type.kmer_len
     --minimizer-len $database_type.minimizer_len
@@ -236,6 +240,7 @@
             <when value="custom">
                 <param name="custom_fasta" type="data" format="fasta" multiple="False" label="Select history item" />
                 <param name="custom_database_name" type="text" label="Name for this database" />
+                <param name="custom_source_info" type="text" label="Database source info" help="Concise description of how this build has been sourced. This description will be appended (in parentheses) to the user-facing name of the build. Example: https://doi.org/10.5281/zenodo.8339822, from v1 assembly_summary.txt sequences" />
                 <param name="skip_maps" type="boolean" truevalue="--skip-maps" falsevalue="" label="Skip downloading accession number to taxid maps during taxonomy download." />
                 <expand macro="common_params" />
             </when>
@@ -250,6 +255,7 @@
                 <param name="database_type" value="custom" />
                 <param name="custom_fasta" value="adapter.fa" />
                 <param name="custom_database_name" value="custom_database" />
+                <param name="custom_source_info" value="from adapter.fa test data" />
                 <param name="skip_maps" value="true" />
                 <param name="kmer_len" value="35" />
                 <param name="minimizer_spaces" value="6"/>
@@ -260,7 +266,7 @@
                 <assert_contents>
                     <has_text text="kraken2_databases"/>
                     <has_text text="path"/>
-                    <has_text text="custom_database"/>
+                    <has_text text="custom_database (from adapter.fa test data, kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7)"/>
                 </assert_contents>
             </output>
         </test>
@@ -328,6 +334,6 @@
 Build Kraken2 databases or download `prebuilt Kraken2 RefSeq indexes <https://benlangmead.github.io/aws-indexes/k2>`__
     ]]></help>
     <citations>
-        <citation type="doi">10.1186/gb-2014-15-3-r46</citation>
+        <citation type="doi">10.1186/s13059-019-1891-0</citation>
     </citations>
 </tool>