changeset 0:5f9d6aee2256 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_malt_index_builder commit 710e56e0e522b0ed060dab2fecf05ed1c79c928f"
author iuc
date Wed, 17 Nov 2021 08:22:56 +0000
parents
children
files data_manager/malt_index_builder.py data_manager/malt_index_builder.xml data_manager_conf.xml test-data/all_fasta.loc test-data/malt_index_builder1.json test-data/malt_index_builder2.json test-data/malt_indices.loc test-data/phiX174.fasta tool-data/all_fasta.loc.sample tool-data/malt_indices.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 11 files changed, 407 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/malt_index_builder.py	Wed Nov 17 08:22:56 2021 +0000
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+
+import json
+import optparse
+import os
+import subprocess
+import sys
+
+
+def get_id_name(params, dbkey, fasta_description=None):
+    sequence_id = params['param_dict']['sequence_id']
+    if not sequence_id:
+        sequence_id = dbkey
+
+    sequence_name = params['param_dict']['sequence_name']
+    if not sequence_name:
+        sequence_name = fasta_description
+        if not sequence_name:
+            sequence_name = dbkey
+    return sequence_id, sequence_name
+
+
+def build_malt_index(data_manager_dict, fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, sequence_type, shapes, max_hits_per_seed, protein_reduct):
+    # The malt-build program produces a directory of files,
+    # so the data table path entry will be a directory and
+    # not an index file.
+    fasta_base_name = os.path.split(fasta_filename)[-1]
+    sym_linked_fasta_filename = os.path.join(target_directory, fasta_base_name)
+    os.symlink(fasta_filename, sym_linked_fasta_filename)
+    args = ['malt-build', '--input', sym_linked_fasta_filename, '--sequenceType', sequence_type, '--index', target_directory]
+    threads = os.environ.get('GALAXY_SLOTS')
+    if threads:
+        args.extend(['--threads', threads])
+    if shapes is not None:
+        args.extend(['--shapes', shapes])
+    if max_hits_per_seed is not None:
+        args.extend(['--maxHitsPerSeed', max_hits_per_seed])
+    if protein_reduct is not None:
+        args.extend(['--proteinReduct', protein_reduct])
+    proc = subprocess.Popen(args=args, shell=False, cwd=target_directory)
+    return_code = proc.wait()
+    if return_code:
+        sys.exit('Error building index, return_code: %d' % return_code)
+    # Remove unwanted files from the output directory.
+    os.remove(sym_linked_fasta_filename)
+    # The path entry here is the directory
+    # where the index files will be located,
+    # not a single index file (malt-build
+    # produces a directory if files, which
+    # is considered an index..
+    data_table_entry = dict(value=sequence_id, dbkey=dbkey, name=sequence_name, path=None)
+    _add_data_table_entry(data_manager_dict, data_table_entry)
+
+
+def _add_data_table_entry(data_manager_dict, data_table_entry):
+    data_table_name = "malt_indices"
+    data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
+    data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, [])
+    data_manager_dict['data_tables'][data_table_name].append(data_table_entry)
+    return data_manager_dict
+
+
+def main():
+    parser = optparse.OptionParser()
+    parser.add_option('-f', '--fasta_filename', dest='fasta_filename', action='store', type="string", help='fasta filename')
+    parser.add_option('-d', '--fasta_dbkey', dest='fasta_dbkey', action='store', type="string", help='fasta dbkey')
+    parser.add_option('-t', '--fasta_description', dest='fasta_description', action='store', type="string", default=None, help='fasta description')
+    parser.add_option('-e', '--sequence_type', dest='sequence_type', action='store', type="string", help='DNA or Protein sequences')
+    parser.add_option('-p', '--shapes', dest='shapes', action='store', type="string", default=None, help='Comma-separated list of seed shapes')
+    parser.add_option('-m', '--max_hits_per_seed', dest='max_hits_per_seed', action='store', type="string", default=None, help='Maximum number of hits per seed')
+    parser.add_option('-r', '--protein_reduct', dest='protein_reduct', action='store', type="string", default=None, help='Name or definition of protein alphabet reduction')
+    (options, args) = parser.parse_args()
+
+    filename = args[0]
+
+    with open(filename) as fh:
+        params = json.load(fh)
+    target_directory = params['output_data'][0]['extra_files_path']
+    os.mkdir(target_directory)
+    data_manager_dict = {}
+
+    dbkey = options.fasta_dbkey
+
+    if dbkey in [None, '', '?']:
+        raise Exception('"%s" is not a valid dbkey. You must specify a valid dbkey.' % (dbkey))
+
+    sequence_id, sequence_name = get_id_name(params, dbkey=dbkey, fasta_description=options.fasta_description)
+
+    # Build the index.
+    build_malt_index(data_manager_dict, options.fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, options.sequence_type, options.shapes, options.max_hits_per_seed, options.protein_reduct)
+
+    # Save info to json file.
+    with open(filename, 'w') as fh:
+        json.dump(data_manager_dict, fh, sort_keys=True)
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/malt_index_builder.xml	Wed Nov 17 08:22:56 2021 +0000
@@ -0,0 +1,131 @@
+<tool id="data_manager_malt_index_builder" name="MALT index builder" tool_type="manage_data" version="@TOOL_VERSION@+galaxy0" profile="21.01">
+    <description></description>
+    <requirements>
+        <requirement type="package" version="3.8">python</requirement>
+        <requirement type="package" version="0.53">malt</requirement>
+    </requirements>
+    <macros>
+        <token name="@TOOL_VERSION@">0.5.3</token>
+    </macros>
+    <command detect_errors="exit_code"><![CDATA[
+python '$__tool_directory__/malt_index_builder.py'
+'${out_file}'
+--fasta_filename '${all_fasta_source.fields.path}'
+--fasta_dbkey '${all_fasta_source.fields.dbkey}'
+--fasta_description '${all_fasta_source.fields.name}'
+--sequence_type '${sequence_type}'
+#if str($protein_reduct_setting_cond.protein_reduct_setting) == 'yes':
+    --protein_reduct '${protein_reduct_setting_cond.protein_reduct}'
+#end if
+#if str($seed_setting_cond.seed_setting) == 'yes':
+    --shapes '${seed_setting_cond.shapes}'
+    ## malt-build requires a string here.
+    --max_hits_per_seed '${seed_setting_cond.max_hits_per_seed}'
+#end if
+    ]]></command>
+    <inputs>
+        <param name="all_fasta_source" type="select" label="Source FASTA Sequence">
+            <options from_data_table="all_fasta"/>
+        </param>
+        <param name="sequence_name" type="text" value="" label="Name of sequence"/>
+        <param name="sequence_id" type="text" value="" label="ID for sequence"/>
+        <param  name="sequence_type" type="select" label="Reference sequences type" help="Use the DNA setting For RNA sequences">
+            <option value="DNA" selected="true">DNA</option>
+            <option value="Protein">Protein</option>
+        </param>
+        <conditional name="protein_reduct_setting_cond">
+            <param name="protein_reduct_setting" type="select" label="Specify protein alphabet reduction?" help="Used only if the reference sequences are Protein sequences">
+                <option selected="true" value="no">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no"/>
+            <when value="yes">
+                <param name="protein_reduct" type="select" label="Name or definition of protein alphabet reduction">
+                    <option selected="true" value="DIAMOND_11">DIAMOND_11</option>
+                    <option value="BLOSUM50_10">BLOSUM50_10</option>
+                    <option value="BLOSUM50_11">BLOSUM50_11</option>
+                    <option value="BLOSUM50_15">BLOSUM50_15</option>
+                    <option value="BLOSUM50_4">BLOSUM50_4</option>
+                    <option value="BLOSUM50_8">BLOSUM50_8</option>
+                    <option value="GBMR4">GBMR4</option>
+                    <option value="HSDM17">HSDM17</option>
+                    <option value="MALT_10">MALT_10</option>
+                    <option value="SDM12">SDM12</option>
+                    <option value="UNREDUCED">UNREDUCED</option>
+                </param>
+            </when>
+        </conditional>
+        <conditional name="seed_setting_cond">
+            <param name="seed_setting" type="select" label="Specify seed settings?">
+                <option selected="true" value="no">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no"/>
+            <when value="yes">
+                <param name="shapes" type="text" value="" label="Comma-separated list of seed shapes" help="See help text below">
+                    <sanitizer invalid_char="">
+                        <valid initial="string.printable">
+                            <remove value="&apos;"/>
+                        </valid>
+                        <mapping initial="none">
+                            <add source="&apos;" target="&apos;&quot;&apos;&quot;&apos;"/>
+                        </mapping>
+                    </sanitizer>
+                </param>
+                <param name="max_hits_per_seed" type="integer" value="1" min="1" label="Maximum number of hits per seed"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="all_fasta_source" value="phiX174"/>
+            <param name="sequence_name" value="Pretty name for phiX174"/>
+            <param name="sequence_id" value="phiX174-1"/>
+            <output name="out_file" value="malt_index_builder1.json"/>
+        </test>
+        <test>
+            <param name="all_fasta_source" value="phiX174"/>
+            <param name="sequence_name" value="Pretty name for phiX174"/>
+            <param name="sequence_id" value="phiX174-1"/>
+            <param name="seed_setting" value="yes"/>
+            <param name="shapes" value="111110111011110110111111"/>
+            <output name="out_file" value="malt_index_builder1.json"/>
+        </test>
+        <test>
+            <param name="all_fasta_source" value="phiX174"/>
+            <param name="sequence_name" value="Pretty name for phiX174"/>
+            <param name="sequence_id" value="phiX174-2"/>
+            <param name="sequence_type" value="Protein"/>
+            <param name="protein_reduct_setting" value="yes"/>
+            <param name="protein_reduct" value="BLOSUM50_10"/>
+            <output name="out_file" value="malt_index_builder2.json"/>
+        </test>
+    </tests>
+    <help>
+.. class:: infomark
+
+**Notice:** Values for Name and ID of sequence will be generated automatically if left blank.
+
+**What it does**
+
+Takes a reference sequence database (represented by a FastA file, possibly in gzip format) as input and produces an index that
+can be used by the malt tool as input. If MALT is to be used as a taxonomic and/or functional analysis tool as well as an
+alignment tool, then this MALT index builder tool must be provided with a number of mapping files that are used to map reference
+sequences to taxonomic or functional classes or to locate genes in DNA reference sequences.
+
+**Options**
+
+ * **Specify protein alphabet reduction** - specify the alphabet reduction in the case of protein reference sequences.
+ * **Specify seed settings** - specify the settings for controlling how MALT uses its seed-and-extend approach based on “spaced seeds”.
+
+  * **Shapes** - specify the seed shapes used. For DNA sequences, the default seed shape is: 111110111011110110111111. For protein sequences, by default MALT uses the following four shapes: 111101101110111, 1111000101011001111, 11101001001000100101111 and 11101001000010100010100111.
+  * **Maximim hits per seed** - specify the maximum number of hits per seed - MALT uses this to calculate a maximum number of hits per hash value.
+
+    </help>
+    <citations>
+        <citation type="doi">https://doi.org/10.1101/050559</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Wed Nov 17 08:22:56 2021 +0000
@@ -0,0 +1,18 @@
+<data_managers>
+    <data_manager tool_file="data_manager/malt_index_builder.xml" id="data_manager_malt_index_builder">
+        <data_table name="malt_indices">
+            <output>
+                <column name="value"/>
+                <column name="dbkey"/>
+                <column name="name"/>
+                <column name="path" output_ref="out_file">
+                    <move type="directory" relativize_symlinks="True">
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">malt_index/${value}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/malt_index/${value}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc	Wed Nov 17 08:22:56 2021 +0000
@@ -0,0 +1,19 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>		<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3		/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19		Human (Homo sapiens): hg19 Canonical		/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19		Human (Homo sapiens): hg19 Full			/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
+phiX174	phiX174	phiX174	${__HERE__}/phiX174.fasta
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/malt_index_builder1.json	Wed Nov 17 08:22:56 2021 +0000
@@ -0,0 +1,1 @@
+{"data_tables": {"malt_indices": [{"dbkey": "phiX174", "name": "Pretty name for phiX174", "path": null, "value": "phiX174-1"}]}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/malt_index_builder2.json	Wed Nov 17 08:22:56 2021 +0000
@@ -0,0 +1,1 @@
+{"data_tables": {"malt_indices": [{"dbkey": "phiX174", "name": "Pretty name for phiX174", "path": null, "value": "phiX174-2"}]}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phiX174.fasta	Wed Nov 17 08:22:56 2021 +0000
@@ -0,0 +1,79 @@
+>phiX174
+GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT
+GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA
+ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG
+TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA
+GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC
+TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT
+TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT
+CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT
+TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG
+TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC
+GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA
+CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG
+TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT
+AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC
+CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA
+TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC
+TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA
+CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA
+GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT
+GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA
+ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC
+TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT
+TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC
+ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC
+CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT
+GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC
+CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC
+TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG
+TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT
+TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA
+AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT
+TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT
+ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC
+GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC
+TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT
+TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA
+TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG
+TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC
+CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG
+AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC
+CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT
+TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG
+CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA
+AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT
+GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG
+GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA
+TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT
+CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG
+TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA
+GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC
+CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA
+TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA
+AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC
+TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT
+CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA
+TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG
+TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT
+CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT
+TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC
+ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG
+TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA
+ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG
+GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC
+CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT
+GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG
+GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT
+ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG
+CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC
+CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC
+GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT
+CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG
+CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA
+TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT
+TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG
+TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC
+AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC
+TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Wed Nov 17 08:22:56 2021 +0000
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>		<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3		/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19		Human (Homo sapiens): hg19 Canonical		/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19		Human (Homo sapiens): hg19 Full			/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/malt_indices.loc.sample	Wed Nov 17 08:22:56 2021 +0000
@@ -0,0 +1,23 @@
+# This is a sample file distributed with Galaxy that enables tools
+# to use a directory of MALT indexed data files.  The malt_index.loc
+# file has this format (longer white space characters are TAB characters):
+#
+# <unique_build_id>   <dbkey>   <display_name>   <directory_path>
+#
+# So, for example, if you have the phiX indexe stored in 
+# /depot/data2/galaxy/phiX/base/, 
+# then the malt_index.loc entry would look like this:
+#
+# phiX174   phiX   phiX Pretty   /depot/data2/galaxy/phiX/base
+#
+# and your /depot/data2/galaxy/phiX/base/ directory
+# would contain MALT index files for phiX174, sommething like this:
+#
+# -rw-r----- 1 galaxy galaxy    69 Oct 15 09:51 index0.idx
+# -rw-r----- 1 galaxy galaxy  5442 Oct 15 09:51 ref.db
+# -rw-r----- 1 galaxy galaxy     8 Oct 15 09:51 ref.idx
+# -rw-r----- 1 galaxy galaxy    25 Oct 15 09:51 ref.inf
+# -rw-r----- 1 galaxy galaxy 24952 Oct 15 09:51 table0.db
+# -rw-r----- 1 galaxy galaxy 65536 Oct 15 09:51 table0.idx
+#
+phiX174	NC_001422	Coliphage phiX174	/depot/data2/galaxy/tool-data/malt_index/phiX174
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Nov 17 08:22:56 2021 +0000
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of indices MALT versions 0.5.3 and higher -->
+    <table name="malt_indices" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/malt_indices.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Wed Nov 17 08:22:56 2021 +0000
@@ -0,0 +1,12 @@
+<tables>
+    <!-- Location of index files for malt_build version 0.5.3 and higher -->
+    <table name="malt_indices" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/malt_indices.loc" />
+    </table>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+</tables>