Mercurial > repos > iuc > data_manager_build_kraken2_database
changeset 12:90b4d4f0a3a4 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit 9835da32741d05d129a1a44835f66e32713770ad
author | iuc |
---|---|
date | Fri, 18 Oct 2024 17:08:15 +0000 |
parents | 1e34d2e3d285 |
children | e9ee4d074d5d |
files | data_manager/kraken2_build_database.py data_manager/kraken2_build_database.xml |
diffstat | 2 files changed, 43 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/kraken2_build_database.py Mon Jul 15 09:21:32 2024 +0000 +++ b/data_manager/kraken2_build_database.py Fri Oct 18 17:08:15 2024 +0000 @@ -1,12 +1,11 @@ #!/usr/bin/env python -from __future__ import print_function - import argparse import datetime import errno import json import os +import re import shutil import subprocess import sys @@ -321,12 +320,33 @@ return data_table_entry -def kraken2_build_custom(kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME): +def kraken2_build_custom(kraken2_args, custom_database_name, custom_source_info, target_directory, data_table_name=DATA_TABLE_NAME): + now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") + + database_value = "_".join([ + now, + re.sub(r'[^\w_.-]+', '_', custom_database_name).strip('_'), + "kmer-len", str(kraken2_args["kmer_len"]), + "minimizer-len", str(kraken2_args["minimizer_len"]), + "minimizer-spaces", str(kraken2_args["minimizer_spaces"]), + "load-factor", str(kraken2_args["load_factor"]), + ]) + + database_name = " ".join([ + custom_database_name, + "(" + custom_source_info + ",", + "kmer-len=" + str(kraken2_args["kmer_len"]) + ",", + "minimizer-len=" + str(kraken2_args["minimizer_len"]) + ",", + "minimizer-spaces=" + str(kraken2_args["minimizer_spaces"]) + ",", + "load-factor=" + str(kraken2_args["load_factor"]) + ")", + ]) + + database_path = database_value args = [ '--threads', str(kraken2_args["threads"]), '--download-taxonomy', - '--db', custom_database_name, + '--db', database_path, ] if kraken2_args['skip_maps']: @@ -337,7 +357,7 @@ args = [ '--threads', str(kraken2_args["threads"]), '--add-to-library', kraken2_args["custom_fasta"], - '--db', custom_database_name + '--db', database_path, ] subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) @@ -349,7 +369,7 @@ '--minimizer-len', str(kraken2_args["minimizer_len"]), '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]), '--load-factor', str(kraken2_args["load_factor"]), - '--db', custom_database_name + '--db', database_path, ] subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) @@ -358,7 +378,7 @@ args = [ '--threads', str(kraken2_args["threads"]), '--clean', - '--db', custom_database_name + '--db', database_path, ] subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) @@ -367,9 +387,9 @@ 'data_tables': { data_table_name: [ { - "value": custom_database_name, - "name": custom_database_name, - "path": custom_database_name + "value": database_value, + "name": database_name, + "path": database_path, } ] } @@ -393,6 +413,7 @@ parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)') + parser.add_argument('--custom-source-info', dest='custom_source_info', help='Description of how this build has been sourced (only applies to --database-type custom)') parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='') parser.add_argument('--clean', dest='clean', action='store_true', help='Clean up extra files') args = parser.parse_args() @@ -464,6 +485,7 @@ data_manager_output = kraken2_build_custom( kraken2_args, args.custom_database_name, + args.custom_source_info, target_directory, ) else:
--- a/data_manager/kraken2_build_database.xml Mon Jul 15 09:21:32 2024 +0000 +++ b/data_manager/kraken2_build_database.xml Fri Oct 18 17:08:15 2024 +0000 @@ -3,12 +3,12 @@ <description>database builder</description> <macros> <token name="@TOOL_VERSION@">2.1.3</token> - <token name="@VERSION_SUFFIX@">2</token> + <token name="@VERSION_SUFFIX@">3</token> <token name="@PROFILE@">22.01</token> <xml name="common_params"> <param name="kmer_len" type="integer" value="35" label="K-mer length in BP" /> <param name="minimizer_len" type="integer" value="31" label="Minimizer length" /> - <param name="minimizer_spaces" type="integer" value="6" label="Minimizer spaces" /> + <param name="minimizer_spaces" type="integer" value="7" label="Minimizer spaces" /> <param name="load_factor" type="float" value="0.7" min="0" max="1" label="Load factor" help="Proportion of the hash table to be populated" /> <param name="clean" type="boolean" truevalue="--clean" falsevalue="" checked="true" label="Clean up extra files" /> </xml> @@ -46,9 +46,12 @@ <option value="pluspfp_16gb">PlusPFP-16 (PlusPFP with DB capped at 16 GB; ~15 GB)</option> </xml> </macros> + <xrefs> + <xref type="bio.tools">kraken2</xref> + </xrefs> <requirements> <requirement type="package" version="@TOOL_VERSION@">kraken2</requirement> - <requirement type="package" version="3.7">python</requirement> + <requirement type="package" version="3.13">python</requirement> </requirements> <version_command>kraken2 -version | head -n 1 | awk '{print $NF}'</version_command> <command detect_errors="exit_code"><![CDATA[ @@ -79,6 +82,7 @@ --threads \${GALAXY_SLOTS:-1} --custom-fasta '$database_type.custom_fasta' --custom-database-name '$database_type.custom_database_name' + --custom-source-info '$database_type.custom_source_info' $database_type.skip_maps --kmer-len $database_type.kmer_len --minimizer-len $database_type.minimizer_len @@ -236,6 +240,7 @@ <when value="custom"> <param name="custom_fasta" type="data" format="fasta" multiple="False" label="Select history item" /> <param name="custom_database_name" type="text" label="Name for this database" /> + <param name="custom_source_info" type="text" label="Database source info" help="Concise description of how this build has been sourced. This description will be appended (in parentheses) to the user-facing name of the build. Example: https://doi.org/10.5281/zenodo.8339822, from v1 assembly_summary.txt sequences" /> <param name="skip_maps" type="boolean" truevalue="--skip-maps" falsevalue="" label="Skip downloading accession number to taxid maps during taxonomy download." /> <expand macro="common_params" /> </when> @@ -250,6 +255,7 @@ <param name="database_type" value="custom" /> <param name="custom_fasta" value="adapter.fa" /> <param name="custom_database_name" value="custom_database" /> + <param name="custom_source_info" value="from adapter.fa test data" /> <param name="skip_maps" value="true" /> <param name="kmer_len" value="35" /> <param name="minimizer_spaces" value="6"/> @@ -260,7 +266,7 @@ <assert_contents> <has_text text="kraken2_databases"/> <has_text text="path"/> - <has_text text="custom_database"/> + <has_text text="custom_database (from adapter.fa test data, kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7)"/> </assert_contents> </output> </test> @@ -328,6 +334,6 @@ Build Kraken2 databases or download `prebuilt Kraken2 RefSeq indexes <https://benlangmead.github.io/aws-indexes/k2>`__ ]]></help> <citations> - <citation type="doi">10.1186/gb-2014-15-3-r46</citation> + <citation type="doi">10.1186/s13059-019-1891-0</citation> </citations> </tool>