Mercurial > repos > iuc > data_manager_build_kma_index
changeset 3:b221a6c2ed00 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kma_index/ commit d0b3d69f25fe8419e5b98165ae53b59651d711b1
author | iuc |
---|---|
date | Fri, 10 Oct 2025 12:03:10 +0000 |
parents | 2c532a6eee3f |
children | |
files | data_manager/kma_build_index.py data_manager/kma_build_index.xml data_manager_conf.xml test-data/kma_data_manager.json test-data/kma_index.loc tool-data/kma_index.loc.sample tool_data_table_conf.xml.test |
diffstat | 6 files changed, 69 insertions(+), 128 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/kma_build_index.py Sun Nov 22 12:45:39 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,97 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function - -import argparse -import errno -import json -import os -import subprocess -import uuid - - -DATA_TABLE_NAME = "kma_index" - - -def kma_build_index(kma_index_args, index_name, target_directory, data_table_name=DATA_TABLE_NAME): - UUID = str(uuid.uuid4()) - - os.mkdir(os.path.join(target_directory, UUID)) - - args = [ - '-k', str(kma_index_args["k"]), - '-k_t', str(kma_index_args["k_t"]), - '-k_i', str(kma_index_args["k_i"]), - '-ML', str(kma_index_args["ML"]), - '-ht', str(kma_index_args["ht"]), - '-hq', str(kma_index_args["hq"]), - '-o', os.path.join(UUID, "index"), - '-i', " ".join(kma_index_args["fasta"]), - ] - - subprocess.check_call(' '.join(['kma index'] + args), cwd=target_directory, shell=True) - - data_table_entry = { - 'data_tables': { - data_table_name: [ - { - "value": UUID, - "name": index_name, - "path": os.path.join(UUID, "index"), - } - ] - } - } - - return data_table_entry - - -def main(args): - with open(args.data_manager_json) as fh: - data_manager_input = json.load(fh) - - target_directory = data_manager_input['output_data'][0]['extra_files_path'] - - try: - os.mkdir(target_directory) - except OSError as exc: - if exc.errno == errno.EEXIST and os.path.isdir(target_directory): - pass - else: - raise - - data_manager_output = {} - - kma_index_args = { - "k": args.k, - "k_t": args.k_t, - "k_i": args.k_i, - "ML": args.ML, - "ht": args.ht, - "hq": args.hq, - "fasta": args.fasta, - } - - data_manager_output = kma_build_index( - kma_index_args, - args.index_name, - target_directory, - ) - - with open(args.data_manager_json, 'w') as fh: - json.dump(data_manager_output, fh, sort_keys=True) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('data_manager_json') - parser.add_argument('--k', dest='k', type=int, default=16, help='') - parser.add_argument('--k_t', dest='k_t', type=int, default=16, help='') - parser.add_argument('--k_i', dest='k_i', type=int, default=16, help='') - parser.add_argument('--ML', dest='ML', type=int, default=16, help='') - parser.add_argument('--ht', dest='ht', type=float, default=1.0, help='') - parser.add_argument('--hq', dest='hq', type=float, default=1.0, help='') - parser.add_argument('--name', dest='index_name', help='') - parser.add_argument('fasta', nargs='+', help='fasta file(s) to index') - args = parser.parse_args() - main(args)
--- a/data_manager/kma_build_index.xml Sun Nov 22 12:45:39 2020 +0000 +++ b/data_manager/kma_build_index.xml Fri Oct 10 12:03:10 2025 +0000 @@ -1,47 +1,70 @@ <?xml version="1.0"?> -<tool id="kma_build_index" name="KMA Index Builder" tool_type="manage_data" version="1.2.21+galaxy0" profile="19.01"> +<tool id="kma_build_index" name="KMA Index Builder" tool_type="manage_data" version="1.6.6+galaxy0" profile="24.1"> <description>kma index builder</description> <requirements> - <requirement type="package" version="1.2.21">kma</requirement> - <requirement type="package" version="3.7">python</requirement> + <requirement type="package" version="1.6.6">kma</requirement> </requirements> <version_command>kma -v</version_command> <command detect_errors="exit_code"> <![CDATA[ - python '${__tool_directory__}/kma_build_index.py' - '${out_file}' - --k ${k} - --k_t ${k_t} - --k_i ${k_i} - --ML ${ML} - --ht ${ht} - --hq ${hq} - --name '${name}' - '${fasta}' - ]]> - </command> +mkdir -p '${out_file.extra_files_path}'/index_folder && +kma_index + -i '$fasta' + -o '${out_file.extra_files_path}'/index_folder/index + ## index options + -k $k + -k_t $k_t + -k_i $k_i + -ML $ML + -ht $ht + -hq $hq + ]]></command> + <configfiles> + <configfile filename="dm.json"><![CDATA[#slurp +#from datetime import datetime +#from hashlib import md5 +#set value=datetime.now().strftime("%Y-%m-%d") + "_" + md5($name.encode()).hexdigest() +{"data_tables": {"kma_index": [{"value": "$value", "name": "$name", "path": "index_folder"}]}} +]]></configfile> + </configfiles> <inputs> - <param name="k" type="integer" value="16" min="8" max="32" label="kmer size used for indexing the database." /> - <param name="k_t" type="integer" value="16" min="8" max="32" label="kmer size used to identify template candidates when running KMA." /> - <param name="k_i" type="integer" value="16" min="8" max="32" label="kmer size used when performing alignments between two sequences." /> - <param name="ML" type="integer" value="16" label="Minimum length of templates" /> - <param name="ht" type="float" value="1.0" min="0.0" max="1.0" label="Homology template" /> - <param name="hq" type="float" value="1.0" min="0.0" max="1.0" label="Homology query" /> - <param name="name" type="text" label="Name for index" /> - <param name="fasta" type="data" format="fasta" label="Sequence to index (fasta)" /> + <param argument="-k" type="integer" value="16" min="8" max="32" label="kmer size used for indexing the database." /> + <param argument="-k_t" type="integer" value="16" min="8" max="32" label="kmer size used to identify template candidates when running KMA." /> + <param argument="-k_i" type="integer" value="16" min="8" max="32" label="kmer size used when performing alignments between two sequences." /> + <param argument="-ML" type="integer" value="16" label="Minimum length of templates" /> + <param argument="-ht" type="float" value="1.0" min="0.0" max="1.0" label="Homology template" /> + <param argument="-hq" type="float" value="1.0" min="0.0" max="1.0" label="Homology query" /> + <param name="fasta" type="data" format="fasta" label="Sequence to index (fasta)" /> + <param name="name" type="text" optional="false" label="Name for index" help="Choose a name that describes the indexed sequence and possibly any non-default indexing parameters."> + <validator type="empty_field"/> + </param> </inputs> <outputs> - <data name="out_file" format="data_manager_json" /> + <data name="out_file" format="data_manager_json" from_work_dir="dm.json"/> </outputs> <tests> <test> <param name="fasta" value="phiX174.fasta"/> <param name="name" value="index"/> - <output name="out_file" value="kma_data_manager.json" compare="sim_size" /> + <output name="out_file" ftype="data_manager_json"> + <assert_contents> + <has_text text='"kma_index":' /> + <has_text text='"name": "index"' /> + <has_text text='"path": "index_folder"' /> + <has_text_matching expression='"value": "\d{4}-\d{2}-\d{2}_6a992d5529f459a44fee58c733255e86"' /> + </assert_contents> + </output> </test> </tests> - <help> - </help> + <help><![CDATA[ +**Data manager for building an index for the kma aligner** + +Runs the kma index command on the input sequence with the chosen options. + +.. class:: infomark + + You may want to capture details about the reference sequence and any non-default indexing parameters in the name of the index so that users have a better idea what they are selecting for tool runs. + ]]></help> <citations> <citation type="doi">10.1186/s12859-018-2336-6</citation> </citations>
--- a/data_manager_conf.xml Sun Nov 22 12:45:39 2020 +0000 +++ b/data_manager_conf.xml Fri Oct 10 12:03:10 2025 +0000 @@ -6,10 +6,10 @@ <column name="name"/> <column name="path" output_ref="out_file"> <move type="directory"> - <source>#import os#${os.path.dirname($path)}</source> - <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">kma_index/#import os#${os.path.dirname($path)}</target> + <source>${path}</source> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">kma_index/${value}</target> </move> - <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/kma_index/${path}</value_translation> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/kma_index/${value}/index</value_translation> <value_translation type="function">abspath</value_translation> </column> </output>
--- a/test-data/kma_data_manager.json Sun Nov 22 12:45:39 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -{"data_tables": {"kma_index": [{"name": "b153d490-f0f2-4980-8d1b-c7365e4d69ec/index", "path": "index", "value": "b153d490-f0f2-4980-8d1b-c7365e4d69ec"}]}}
--- a/tool-data/kma_index.loc.sample Sun Nov 22 12:45:39 2020 +0000 +++ b/tool-data/kma_index.loc.sample Fri Oct 10 12:03:10 2025 +0000 @@ -0,0 +1,8 @@ +# This data table has records in the format +# value name path +# where paths point to folders with kma index files but include the common prefix (always index) of these files. +# A record could look like this: +# 2025-10-10_abbf9e73ffdc7e46450db9781b23e605 an_example_index /tool-data/kma_index/2025-10-10_abbf9e73ffdc7e46450db9781b23e605/index +# with the following files inside /tool-data/kma_index/2025-10-10_abbf9e73ffdc7e46450db9781b23e605/: +# index.comp.b index.length.b index.name index.seq.b +# The entire recorded path (including the /index) can be passed to kma unaltered and it will discover the index files from it.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Fri Oct 10 12:03:10 2025 +0000 @@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of kma indexes in the required format --> + <table name="kma_index" comment_char="#"> + <columns>value, name, path</columns> + <file path="${__HERE__}/test-data/kma_index.loc" /> + </table> +</tables>