changeset 0:d505990b8c89 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/malt commit 04aaa266f5b2e557f42c7feca4e081c7e9c1dae6"
author iuc
date Fri, 12 Nov 2021 18:39:23 +0000
parents
children 87a1202c03e8
files macros.xml malt_run.xml test-data/alignments_output1.sam test-data/alignments_output2.txt test-data/input.fastq.gz test-data/malt_indices.loc test-data/phiX/index0.idx test-data/phiX/ref.db test-data/phiX/ref.idx test-data/phiX/ref.inf test-data/phiX/table0.db test-data/phiX/table0.idx test-data/unaligned_output2.fasta test-data/viral.1.protein.fasta.gz tool-data/malt_indices.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 17 files changed, 436 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Nov 12 18:39:23 2021 +0000
@@ -0,0 +1,29 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.5.3</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">20.09</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="0.53">malt</requirement>
+        </requirements>
+    </xml>
+    <macro name="sub_matrix">
+        <param name="sub_matrix" type="select" label="Select the protein substitution matrix to use">
+            <option value="BLOSUM62" selected="True">BLOSUM62</option>
+            <option value="BLOSUM45">BLOSUM45</option>
+            <option value="BLOSUM50">BLOSUM50</option>
+            <option value="BLOSUM80">BLOSUM80</option>
+            <option value="BLOSUM90">BLOSUM90</option>
+        </param>
+    </macro>
+    <macro name="forward_reverse_only">
+        <param name="forward_only" type="boolean" truevalue="--forwardOnly" falsevalue="" checked="false" label="Align query forward strand only?"/>
+        <param name="reverse_only" type="boolean" truevalue="--reverseOnly" falsevalue="" checked="false" label="Align query reverse strand only?"/>
+    </macro>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">https://doi.org/10.1101/050559</citation>
+        </citations>
+    </xml>
+</macros>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/malt_run.xml	Fri Nov 12 18:39:23 2021 +0000
@@ -0,0 +1,274 @@
+<tool id="malt_run" name="MALT analyzer" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+#import re
+
+## This will point to a directory.
+#set ref = str($reference.fields.path)
+
+#set input_identifier = re.sub('[^\s\w\-]', '_', str($input.element_identifier))
+## malt-run uses the file extension to determine the input format.
+#if $input.is_of_type('fasta', 'fasta.gz'):
+    #set input_identifier = $input_identifier + '.fasta'
+#else:
+    #set input_identifier = $input_identifier + '.fastq'
+#end if
+#if $input.ext.endswith('.gz'):
+    #set input_identifier = $input_identifier + '.gz'
+#end if
+ln -s '${input}' '${input_identifier}' &&
+
+malt-run 
+--mode '$mode_cond.mode'
+
+#if str($mode_cond.mode) == 'BlastN':
+    --matchScore '$mode_cond.matchScore'
+    --mismatchScore '$mode_cond.mismatchScore'
+    --setLambda '$mode_cond.setLambda'
+    --setK '$mode_cond.setK'
+    $mode_cond.forward_only
+    $mode_cond.reverse_only
+#else if str($mode_cond.mode) == 'BlastP':
+    --subMatrix '$mode_cond.sub_matrix'
+#else if str($mode_cond.mode) == 'BlastX':
+    --subMatrix '$mode_cond.sub_matrix'
+    $mode_cond.forward_only
+    $mode_cond.reverse_only
+#end if
+
+--alignmentType '$alignmentType'
+--inFile '$input_identifier'
+--index '$ref'
+## malt-run requires correct output file extensions.
+--output './output.rma6'
+--numThreads \${GALAXY_SLOTS:-12}
+
+--memoryMode '$advanced_options_performance.memoryMode'
+--maxTables '$advanced_options_performance.maxTables'
+$advanced_options_performance.replicateQueryCache
+
+--minBitScore '$advanced_options_filter.minBitScore'
+--maxExpected '$advanced_options_filter.maxExpected'
+--minPercentIdentity '$advanced_options_filter.minPercentIdentity'
+--maxAlignmentsPerQuery '$advanced_options_filter.maxAlignmentsPerQuery'
+--maxAlignmentsPerRef '$advanced_options_filter.maxAlignmentsPerRef'
+
+--topPercent '$advanced_options_lca.topPercent'
+--minSupportPercent '$advanced_options_lca.minSupportPercent'
+--minSupport '$advanced_options_lca.minSupport'
+--minPercentIdentityLCA '$advanced_options_lca.minPercentIdentityLCA'
+$advanced_options_lca.useMinPercentIdentityFilterLCA
+$advanced_options_lca.weightedLCA
+$advanced_options_lca.magnitudes
+
+--maxSeedsPerFrame '$advanced_options_heuristics.maxSeedsPerFrame'
+--maxSeedsPerRef '$advanced_options_heuristics.maxSeedsPerRef'
+--seedShift '$advanced_options_heuristics.seedShift'
+
+--gapOpen '$advanced_options_alignment.gapOpen'
+--gapExtend '$advanced_options_alignment.gapExtend'
+--band '$advanced_options_alignment.band'
+
+#if str($additional_outputs_cond.additional_outputs) == 'yes':
+    $additional_outputs_cond.includeUnaligned
+    #if $additional_outputs_cond.output_alignments_cond.output_alignments:
+        ## malt-run requires correct output file extensions.
+        #set alignments_ext = $additional_outputs_cond.output_alignments_cond.format + '.gz'
+        ## This param value must be a path, not a file name so we'll use the ./ approach.
+        --alignments './alignments_output.${alignments_ext}'
+        --format '$additional_outputs_cond.output_alignments_cond.format'
+    #end if
+    #if $additional_outputs_cond.output_aligned:
+        ## malt-run requires correct output file extensions.
+        #set aligned_ext = 'fna.gz'
+        ## This param value must be a path, not a file name so we'll use the ./ approach.
+        --outAligned './aligned_output.${aligned_ext}'
+    #end if
+    #if $additional_outputs_cond.output_unaligned:
+        ## malt-run requires correct output file extensions.
+        ## This param value must be a path, not a file name so we'll use the ./ approach.
+        --outUnaligned './unaligned_output.fna.gz'
+    #end if
+#end if
+
+&& mv 'output.rma6' '$rma6_output'
+
+#if str($additional_outputs_cond.additional_outputs) == 'yes':
+    #if $additional_outputs_cond.output_alignments_cond.output_alignments:
+        ## malt-run always compresses these outputs.
+        && gunzip -c 'alignments_output.${alignments_ext}' > '$alignments_output'
+    #end if
+    #if $additional_outputs_cond.output_aligned:
+        ## malt-run always compresses these outputs.
+        && gunzip -c 'aligned_output.${aligned_ext}' > '$aligned_output'
+    #end if
+    #if $additional_outputs_cond.output_unaligned:
+        ## malt-run always compresses these outputs.
+        && gunzip -c 'unaligned_output.fna.gz' > '$unaligned_output'
+    #end if
+#end if
+]]></command>
+    <inputs>
+        <param name="input" type="data" format="fasta,fasta.gz,fastqsanger.gz,fastqsanger" label="Input file containing DNA or protein sequences"/>
+        <param name="reference" type="select" label="Reference genome">
+            <options from_data_table="malt_indices">
+                <filter type="sort_by" column="2"/>
+                <validator type="no_options" message="A cached reference genome is not available"/>
+            </options>
+        </param>
+        <conditional name="mode_cond" label="Select alignment mode">
+            <param argument="--mode" type="select" label="Alignment mode">
+                <option value="BlastN" selected="True">BlastN</option>
+                <option value="BlastP">BlastP</option>
+                <option value="BlastX">Blastx</option>
+            </param>
+            <when value="BlastN">
+                <param argument="--matchScore" type="integer" value="2" label="Alignment match disjointScore"/>
+                <param argument="--mismatchScore" type="integer" value="-3" label="Alignment mis-match disjointScore"/>
+                <param argument="--setLambda" type="float" value="0.625" label="Parameter Lambda for BLASTN statistics"/>
+                <param argument="--setK" type="float" value="0.41" label="Parameter K for BLASTN statistics"/>
+                <expand macro="forward_reverse_only"/>
+            </when>
+            <when value="BlastP">
+                <expand macro="sub_matrix"/>
+            </when>
+            <when value="BlastX">
+                <expand macro="sub_matrix"/>
+                <expand macro="forward_reverse_only"/>
+            </when>
+        </conditional>
+        <param argument="--alignmentType" type="select" label="Alignment type">
+            <option value="Local" selected="True">Local</option>
+            <option value="SemiGlobal">SemiGlobal</option>
+        </param>
+        <section name="advanced_options_performance" title="Advanced options for performance" expanded="false">
+            <param argument="--memoryMode" type="select" display="radio" label="Memory mode">
+                <option value="load" selected="True">Load all indices into memory</option>
+                <option value="page">Load indices page by page when needed</option>
+                <option value="map">Use memory mapping</option>
+            </param>
+            <param argument="--maxTables" type="integer" value="0" min="0" label="Maximum number of seed tables to use" help="Zero value uses all tables"/>
+            <param argument="--replicateQueryCache" type="boolean" truevalue="--replicateQueryCache" falsevalue="" checked="false" label="Cache results for replicated queries?"/>
+        </section>
+        <section name="advanced_options_filter" title="Advanced options for filter" expanded="false">
+            <param argument="--minBitScore" type="float" value="50.0" label="Minimum bit disjointScore"/>
+            <param argument="--maxExpected" type="float" value="1.0" label="Maximum expected disjointScore"/>
+            <param argument="--minPercentIdentity" type="float" value="0.0" label="Minimum percent identity"/>
+            <param argument="--maxAlignmentsPerQuery" type="integer" value="25" label="Maximum number of alignments per query"/>
+            <param argument="--maxAlignmentsPerRef" type="integer" value="1" label="Maximum number of (non-overlapping) alignments per reference" help="MALT reports up to this many best scoring matches for each hit reference sequence"/>
+        </section>
+        <section name="advanced_options_lca" title="Advanced options for LCA" expanded="false">
+            <param argument="--topPercent" type="float" value="10.0" label="Top percent value for LCA algorithm" help="For each read, matches are used for taxonomic placement whose bit disjointScore is within this percentage of the best disjointScore"/>
+            <param argument="--minSupportPercent" type="float" value="0.001" min="0" label="Minimum support value for LCA algorithm as a percent of assigned reads" help="Zero value ignores this option"/>
+            <param argument="--minSupport" type="integer" value="0" min="0" label="Minimum support value for LCA algorithm" help="Overrides the above parameter"/>
+            <param argument="--minPercentIdentityLCA" type="float" value="0.0" min="0" label="Minimum percent identity used by the LCA algorithm"/>
+            <param argument="--useMinPercentIdentityFilterLCA" type="boolean" truevalue="--useMinPercentIdentityFilterLCA" falsevalue="" checked="false" label="Use percent identity assignment filter?"/>
+            <param argument="--weightedLCA" type="boolean" truevalue="--weightedLCA" falsevalue="" checked="false" label="Use the weighted LCA for taxonomic assignment?"/>
+            <param argument="--magnitudes" type="boolean" truevalue="--magnitudes" falsevalue="" checked="false" label="Reads have magnitudes (to be used in taxonomic or functional analysis)?"/>
+        </section>
+        <section name="advanced_options_heuristics" title="Advanced options for heuristics" expanded="false">
+            <param argument="--maxSeedsPerFrame" type="integer" value="100" label="Maximum number of seed matches per offset per read frame"/>
+            <param argument="--maxSeedsPerRef" type="integer" value="20" label="Maximum number of seed matches per read and reference"/>
+            <param argument="--seedShift" type="integer" value="1" label="Seed shift"/>
+        </section>
+        <section name="advanced_options_alignment" title="Advanced options for alignment" expanded="false">
+            <param argument="--gapOpen" type="integer" value="11" label="Gap open penalty"/>
+            <param argument="--gapExtend" type="integer" value="1" label="Gap extension penalty"/>
+            <param argument="band" type="integer" value="4" label="Band width/2 for banded alignment"/>
+        </section>
+        <conditional name="additional_outputs_cond">
+            <param name="additional_outputs" type="select" label="Specify additional outputs?">
+                <option value="no" selected="True">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no"/>
+            <when value="yes">
+                <param argument="--includeUnaligned" type="boolean" truevalue="--includeUnaligned" falsevalue="" checked="false" label="Include unaligned queries in RMA output file?"/>
+                <conditional name="output_alignments_cond">
+                    <param name="output_alignments" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output alignments?"/>
+                    <when value="false"/>
+                    <when value="true">
+                        <param argument="format" type="select" label="Alignment format">
+                            <option value="SAM" selected="True">SAM</option>
+                            <option value="Tab">Tab (tabulated BLAST format)</option>
+                            <option value="Text">Text (full text BLAST matches)</option>
+                        </param>
+                    </when>
+                </conditional>
+                <param name="output_aligned" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output all reads that have at least one significant alignment to the reference?"/>
+                <param name="output_unaligned" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output all reads that do not have any significant alignment to the reference?"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="rma6_output" format="rma6" label="${tool.name} on ${on_string} (RMA6)"/>
+        <data name="alignments_output" format="sam" label="${tool.name} on ${on_string} (alignments)">
+            <filter>additional_outputs_cond['additional_outputs'] == 'yes' and additional_outputs_cond['output_alignments_cond']['output_alignments']</filter>
+            <change_format>
+                <when input="additional_outputs_cond.output_alignments_cond.format" value="Tab" format="tabular"/>
+                <when input="additional_outputs_cond.output_alignments_cond.format" value="Text" format="txt"/>
+            </change_format>
+        </data>
+        <data name="aligned_output" format="fasta" label="${tool.name} on ${on_string} (aligned)">
+            <filter>additional_outputs_cond['additional_outputs'] == 'yes' and additional_outputs_cond['output_aligned']</filter>
+        </data>
+        <data name="unaligned_output" format="fasta" label="${tool.name} on ${on_string} (unaligned)">
+            <filter>additional_outputs_cond['additional_outputs'] == 'yes' and additional_outputs_cond['output_unaligned']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <param name="input" value="input.fastq.gz" ftype="fastqsanger.gz" dbkey="phiX"/>
+            <param name="additional_outputs" value="yes"/>
+            <param name="includeUnaligned" value="--includeUnaligned"/>
+            <param name="output_alignments" value="true"/>
+            <output name="rma6_output" ftype="rma6">
+                <assert_contents>
+                    <has_size value="76465"/>
+                </assert_contents>
+                <metadata name="dbkey" value="phiX"/>
+            </output>
+            <output name="alignments_output" file="alignments_output1.sam" ftype="sam" compare="contains"/>
+        </test>
+        <test expect_num_outputs="3">
+            <param name="input" value="viral.1.protein.fasta.gz" ftype="fasta.gz" dbkey="phiX"/>
+            <param name="alignmentType" value="SemiGlobal"/>
+            <param name="additional_outputs" value="yes"/>
+            <param name="includeUnaligned" value="--includeUnaligned"/>
+            <param name="output_alignments" value="true"/>
+            <param name="format" value="Text"/>
+            <param name="output_unaligned" value="true"/>
+            <output name="rma6_output" ftype="rma6">
+                <assert_contents>
+                    <has_size value="29338"/>
+                </assert_contents>
+                <metadata name="dbkey" value="phiX"/>
+            </output>
+            <output name="alignments_output" file="alignments_output2.txt" ftype="txt" compare="contains"/>
+            <output name="unaligned_output" file="unaligned_output2.fasta" ftype="fasta" compare="contains"/>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Align one or more files of input sequences (DNA or proteins) against an index representing a collection of reference DNA
+or protein sequences. Depending on the type of input and reference sequences, the program can be be run in BLASTN, BLASTP
+or BLASTX mode.
+
+**Options**
+
+  **Input file** - specify all input files which must be in FastA or FastQ format and may be gzipped.
+  **Referencegenome ** - select the index built by the **MALT index builder** tool.
+  **Alignment mode** - run the program in BlastN mode, BlastP mode or BlastX mode.  that is, to align DNA and DNA, protein and protein, or DNA reads against protein references, respectively.  BlastN mode can only be used if the employed index contains DNA sequences, whereas the BlastP and BlastX modes are only applicable to an index based on protein reference sequences.
+  **Alignmentment type - specify the type of alignments to be performed. By default, this is set to Local and the program performs local alignment just like BLAST programs do.  Alternatively, this can be set to SemiGlobal and the program will perform semi global alignment in which reads are aligned end-to-end. 
+  **Include unaligned queries in RMA output file** - ensure that all unaligned queries are placed into the output RMA file. By default, only queries that have an alignment are included.
+    </help>
+    <citations>
+        <citation type="doi">https://doi.org/10.1101/050559</citation>
+    </citations>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/alignments_output1.sam	Fri Nov 12 18:39:23 2021 +0000
@@ -0,0 +1,5 @@
+@HD	VN:1.5	SO:unsorted	GO:query
+@PG	ID:1	PN:MALT	CL:--mode BlastN --matchScore 2 --mismatchScore -3 --setLambda 0.625 --setK 0.41 --alignmentType Local --inFile input_fastq_gz.fastq.gz --index
+@RG	ID:1	PL:unknown	SM:unknown
+@CO	BlastN-like alignments
+@CO	Reporting AS: bitScore, ZR: rawScore, ZE: expected, ZI: percent identity, ZL: reference length
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/alignments_output2.txt	Fri Nov 12 18:39:23 2021 +0000
@@ -0,0 +1,50 @@
+BLASTN output produced by MALT
+
+
+Query= YP_009137150.1
+
+***** No hits found ******
+
+Query= YP_009137151.1
+
+***** No hits found ******
+
+Query= YP_009137152.1
+
+***** No hits found ******
+
+Query= YP_009137153.1
+
+***** No hits found ******
+
+Query= YP_009137154.1
+
+***** No hits found ******
+
+Query= YP_009137155.1
+
+***** No hits found ******
+
+Query= YP_009137156.1
+
+***** No hits found ******
+
+Query= YP_009137157.1
+
+***** No hits found ******
+
+Query= YP_009137158.1
+
+***** No hits found ******
+
+Query= YP_009137159.1
+
+***** No hits found ******
+
+Query= YP_009137160.1
+
+***** No hits found ******
+
+Query= YP_009137161.1
+
+***** No hits found ******
Binary file test-data/input.fastq.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/malt_indices.loc	Fri Nov 12 18:39:23 2021 +0000
@@ -0,0 +1,18 @@
+# This is a sample file distributed with Galaxy that enables tools
+# to use a directory of MALT indexed data files.  The malt_index.loc
+# file has this format (longer white space characters are TAB characters):
+#
+# <unique_build_id>   <dbkey>   <display_name>   <file_path>
+#
+# So, for example, if you have the phiX indexe stored in 
+# /depot/data2/galaxy/phiX/base/, 
+# then the malt_index.loc entry would look like this:
+#
+# phiX   phiX   phiX Pretty   /depot/data2/galaxy/phiX/base/phiX.fa
+#
+# and your /depot/data2/galaxy/phiX/base/ directory
+# would contain phiX.fa.* files:
+#
+#-rw-r--r--  1 galaxy    universe 830134 2005-09-13 10:12 phiX.fa
+#...etc...
+phiX	phiX	Coliphage phiX	${__HERE__}/phiX
Binary file test-data/phiX/index0.idx has changed
Binary file test-data/phiX/ref.db has changed
Binary file test-data/phiX/ref.idx has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phiX/ref.inf	Fri Nov 12 18:39:23 2021 +0000
@@ -0,0 +1,2 @@
+sequences	1
+letters	5386
Binary file test-data/phiX/table0.db has changed
Binary file test-data/phiX/table0.idx has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unaligned_output2.fasta	Fri Nov 12 18:39:23 2021 +0000
@@ -0,0 +1,26 @@
+>YP_009137150.1
+NNNNNGNNNNGNNNNNNNGANANNNNGANANNNNGANNTANNNNNNANNNGTANNNANAANNNNNNNNNNNNANNNNNANNAGNNNANNANNNNNNGGGNANNANNAANAAACNNNNNNNNNGNNNNNNNNNANNNNTTTNNNANNNNNNNNNNANNNANANNGNNCNNNNNNNNNNNANNTAANNANNGNNANNNANNNNNNNNNAAANANNGNCNNNNANANANANANANNNGGNANNNNAAAAANGNNAAAGNGNNAN
+>YP_009137151.1
+NNNNNGTNNNANNGNNNNNNNTNGTNNAANNANGNNNNNNNNANNNNNNNTNNGNNNNNNNNNNTNNAGNTNTNNNNAGNNNAATNNANNNANNNGNNTNANANGNCGGGNNGNNNANAGGGGNNCANCTNNNANNNNCNNNNCNNNNCNNCNNTNNNNNNTCNNCNTNNANNNNGNTANGNNNTNNNNNNNNTNNNANAANNAGTANNNNNTGNNNTANNNNNNGGNTNNANNNTNNNNGTNNNNNNNANNNNNNNANNNANNNGGGGAGATNGTNNNAATNNANNGANNNNNNGGANNNAGNGNGNGGGNANAANNNNNANNNNAAGGGNANANNNGNNAAAANGNTNNAGNNNAANNNNNNNNNNNNNNNNNNAGNGNNNNNNNNNANNNNGNGGGGNNNNNGNAANNNAANANNNNNNNNAAAANNNNANANAAGNANNANNNNANNANNNNNTNANTNTNANNNGNAGATNANGNGGNGANGGNGNNNGTNTNGAANNAANGAAANNNNNNGNNNGNAANNNANNNAANNNNNANNGNGANNAANNNANNNNNGNNGNGNNANANAGAANNNANNNNNAANAAAANNNNNAANNNNNAANNNNNAANNNNNAANNNNAANNNNNNNNANNNAGGAGGNNANANGAGNNNNTNNGNNAAANNGNNNCANNTNNANGGNNNGANNNANGNTNNNNNAGNNNNNANANNNNNTNTGNCNNNNNNNTGNNGANNNNNNNTGNNANNNNAAANANNNNTNNNNNANNCNNNNNNNTNNANNNNNNNNTNNGNNNNNNGTNNGANNNNGNNNNNNNNNNNGANANAGNANAGNGN
+>YP_009137152.1
+NGNNCNNGNNNNGANGANGGNNATNNNNNNNNANNNGNNNNNNCNNTNANNNNNNNNANNNNNNANNNGNNNNNNCNGNNTNNNNNNANNANNNNNNNNAAGNNNNNNNNNNNANTNNTTTNNANNNNNNNANGNNNNANNNANNNAGCNNNNNNNTNNCNGNNNNNNANTTNTNNNNNNNNNNANNNNNNNATNNNNNANNNANNNNNNNTNGNNNNTNNNNN
+>YP_009137153.1
+NNNANNNNNNNNNNNNNNNNTNNNGGNNNNNNGAGNNANNATNNNCNANNNGNGAGNNTNNAAGNANNNNNNNGCNNGNNNNANTTNNNNNGNNGNATNNNNTNNNNNNAANNNANNNNNANNNNNNNNNANNNTANNNANNNNNCNTNNNNNNNNNNNNNTNNCTNNNNNNNNNGNNNNNNNGNANGNANNNNANNNNNNNNNNNNAANNNCNNNANNNGNGCNNNNANNGNNNNNTTNTNNNGAAANNNNNGNNNNNGGNNNNNAANNNGNNNNNNGANANNANNNNNNNNNNNNNNNNNNNNNNNNGTCNNNNAANNNNNTNNNNNNNNNN
+>YP_009137154.1
+NNGNGNNNNNNANTNNANNGNTNANNNNNNGANNNTTNNNNNAGTANNANNGNNNGNNNNNANNNNTNNNAGNNNGANGNNNTNNTNNNNNNNNNNGNNNNTNTNNNNNNNNNANNNNACTNTNNNNGTAANNNNNGANNNNTCNNNNNNNNNNNNNCNNANAANNNNNNNANNNNNNNNNNNTNNNNGNNCNANNNNNNNNNNNNNNNNNNNNNNNNTNNN
+>YP_009137155.1
+NGNNNTTNANNNNNNNANNTNANNNAANNNNNNNNGTNANNTNGNANNNNNNNGGNNNNNTNNGNNNNNNCNANNCANNNNGNANTNNNNNNNNANNNANNNNNNNNNNNGNNNNNATAGNNTNNNTNNTNNNTATNNGTANNAANGCNNANNNNNNTTNNANNNNTAGNNNANTTNNNNNNNNNNNNNNTNNNANNTNGT
+>YP_009137156.1
+NAANGGNGNNNNNANGNNNNNNGANNANNNNNNANNNNTNNNGNNNNNANNNNNNNNNNNNTNNNNNNNNNNNAANNNNTGNNNNNNNNAANNNTGNAGNGNNTCNNTNNNNNNCNNTGATNNAANNNNNNNNGANNNNNNNTNNNNNGNNGNNNNANNGNNNNTNANNNANNNNNNNNNNTNNNNNNNNNTNNANAANGGNNANNNNNANTANNNTNGNGNGANTNNANNTNGANNANTNNNNNNNNNAGNNGNNNNTTNNNCNNNNNANNNTNNNAGNNNNNNNCNGNNTNTANNNNTNNNNNNNCNNNNNNNNNTNNNCNNTNNNNTNNNNNNANNNNNNNCNNNNNGNNNNNNNNGNNNTNNNNNNNNNNNNNNNNNTNNANNNGNTNNNNNNNNNNANNANNNANNNNTNNGNNNNNTNNNNTNNNNNNNNNNNNNTNNNTNTNNNNNTANANNNTNNNNNNNNNAGNNNCNNNNNNNNNNANNNNTNNNNNNNANTANNNNNNNGNNGTNNTNNANNNNNNNNNTNGNTNNNNANNNNNNNNNGGNNNNNNNNNNNGNNATNNTNANGNNGNNTANNNNNNNNAAGANATNAANTNNNNNGNNANNNNNNGNNNGGNNNNNNNNNNNNNAGNNNNNNNNNNCNNANNNNNTTAANNANNGNNNNANNGNNNNNNNNNGNNNNNANNNTNNNNNNNNGCNNNTGNNNGGNNNNANNTNNNTNNGNTNTNNNANANNNNNNNATAGNANNNNNNNNNNNNNNNNNGNNNNNNTNNNNNNNNNNNTNNANANNANNGNNNNNANTNTNNNGNNNNNNANCNTNGNNNNNNANNANNNTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNANNNNNNNNNN
+>YP_009137157.1
+NAANNANANANNTNGGNAANCANNNGNNNNNNTNGTNNNNNNNNGNNGNTNGNGNNNNNNNNNAATNNNNAANNNANNNATTNNNNNNNNNNNNNANGNNNNNNNNNNNNANNGNNAGNANNNNNTNNCTNNTTNNNNANGNNNCNANGGNNGNTNNNNNNNNNTCNGNNNNNNNTNNGNATNNNGANNNNNTNNNNNATNAGAANNAGNGNNNNANANNNTNNANNNNNNNNNNNNNGNNNNNNNNNGNNGNCNNNCNNNNTGNNNNNNNNNNNNCNNNNACNANNNNANNCNNNNTANNNNNNGNNNNNNNGNAGAANNNNNANGNNNNTNAGNAANNNNNNNNNNNGNNNNGNNNNTNNANNNNAGGNNNNTNANNNTNNGNGNGGTGNGNANNNNGANNNNNNNANNTANNNNNNGNNNGNNNNNNGTNNNNNNTNAGNATNNNANNNNNNNANAGANNNNNNAANNAAGGGAGNNANANNNNANNNNNNNNNNNNNNTNNANNNNNNNNNANGNNNNNNNNNNNNNNNNCNNNNNNNNNNGNNTNNNNNNGANANNNANNNNNNNNANNAGANNTGNNNNNGNNNNNNANNNNTNNNTNNTNNAANNNANNNNAANNNNNNNTNANNNANANNNGGNNNNTNTNNNNNGNTNNGANNNGNGNNNNNGNNNANN
+>YP_009137158.1
+NANNTNANNGTAAANNNNANAGNNNNNNNANGNNNNANNNNACNNNNNNNNNNNNTATNNNNNNNTNNGNNNNNNNGNNNNANNANNNNNNNCGNNNTNNGNANNNNTANNNNNNNNANNNNNNNNNNNNNNNTNNNNNNNNCNNNNNNNCNNNNATNNNNNNNNANNGNNANNANNNNNNNCNNNNNCNNNNNTNNCNAGNNNNNNNNNNNNNNNANNNNANNNNNNNNANNCATNNNNANNNNGNNNNTNCNANNNAGNNGNCNNNNNNNNANNNNNNNGNNNNNTNNNNNNNC
+>YP_009137159.1
+NNANGNNNNNNNNNANTNNANNNNNNTNNCNNANNNNNCNNAAGNANANNANNNNGNNNNNNNNGNNNNNAAGANAAANAATANAANNNNNNGNNTNNNANNACNNAANNNNTGNNANNNNNNNGNNNNTGNNNNNNNANNGNNAANNAANNNNNANNNNNNNANNANNANNNGANNANANNAANNNNNNCANNANNANTNTTNTANNANNNTTNANNNTNNNGANANNNNNCNNGNNNNGNNNNNTANNNNNNGNNNNNNAGNGNNANANNANNNNNNTTNNAANGANANNNANNGNGNNNNGGNNNNNANNGNNGNNTNTNNAAAAANNANNNNNGAAATNAACNGANNANGANNNNNNNANNANANNAAGNNNNANNNANANNNNTATTNNNNNNNAANGNNNTANNCNNGNNANNNAANAGNGNAGNNNANGNAGNNNNNANNAAANNNNNANANNNNNNNNACNACNANNNNNGGNNAANCNNNNNTANNNNNANCGGTGAANNGNNNNNNGNANAANGANNNANNANNANNNANNNANGNNNNNANNNANNGNNTNNNTNNNTGANNNNNNNNTNNNNGNNNNGNANAAAANNNNNANNNNNNNNAAGNNNNANCAANGNNNNACNNNNNNANGNNNNANNNNNNTNNGNANNNNNNTANNGGANNNANNNNNNNNNNCNGGGNNNGNNNNNNNNNNNNNNNNCANNNNNNNNNNNNNNTGNNNGNNGNGGNNNNNNNNNNNNNNN
+>YP_009137160.1
+NANTNNNATCTNNTNNAANANGATNANGAANGNNNGANNNNGNNNCNNNTNNNNNANNNNGGNNANNNNNNNNNTTNNNNNNGNNTNNNANANTANCNTNNNANNGNGNTTANNNNNGNANNNNNTNNNNNNCNNNNTNTNATNNANNGNNNNNTNNNNTNNNNNNNNNNNNNNNNNNNNNNGNNNNNNNNNNNNNNNNNTNGNNNNNTNNNNGNNNANNNNNNNTCNNNNANNATANANNNNNNCNNNGNNNNNNNNGNNANNGNNANNCNNNNNNGNNNNNAANNNNGNAGGANNNNANNNATNNGNNNANNAGGNNNCNNNNTNNNANNNANNCNNNTNNNNNNNNNTNNGNNTTNGNNNNNNNTTNNTNGNNNNNNNNNNNNANNNNNNNGNNNNNNNNNNGNNNTNNNGNNNNNNNGNGANNNNNNTNNNNNNNNNANGNNNANNNNNTNNNCNNNNGNCNANNANAANANGNNNNNNNNNNNNNNNCTNACNANNNNNNNNNNTNNCNNNNNNGNNAANTNNNNCNNNNGNNNNANNANNNNNNNNCNNNNTNNNANAANTNNNGNNNNNNNNNNNANANNNANNNGNNNNNGNTNNNNNNNNNACNNNNNAANNNANNNNNNNNNATGNNNTNNAAGNNNNNANNNTNNNANNNNNNNNCNTNAACNNNNNNAGGNGNTNNNNNNNNNNNNNNNNNNNNNNNNGNCNNTNGNNNNNNANNNNANANNGCNNNGNNNNTNNANNNNNNNNGNNNGNNNANNTNTNNGAGNNNNNTNNANNNNNNANNNNNNACNTNNNNNANNNANNNNNAGANNNNNANANGNCNNTNANGANNNTNNNNNAANNNNNTNANNNNNGANNNNTN
+>YP_009137161.1
+NGNNANNGNNNAANGANNANGANAANNNNCNNNATNNNNANCNNGNNNNANNNNNNNNCNNANATNNANANATNNNNGGNANNNNNNTNNNNATNANTNTNNNAAANNAANGANTNNNNNANNAANNNAAANNANNNATNNAGNNCANNNNNTNNNNANNNNNNANNNNNNNNACNNNNAANNCTNGNNNGTNNNNNNGNNNNANTNNNNNGNNNANNTNANNNGTNNCTAAAANNNNTNAANNNNNNANNNNNCNTTNNANNNNNNNNNNNGNNCNNNNNNNGNNNGANAATGNNGNACNNNNTGGNNNNNNNNNGANTGNNNANANNAANANANANNNCTNANNNNNNNNTNNNNNNNNTNNNANNANNNNNNNNNGNNNGGNNGNNGNANTNNANNNNNANNNNNGNNNGNNNNNNNANNNNANNNANNNNNGNNNNANNNNNTNNGNANNNAGNNNNNTNNNN
+>YP_009137162.1
+NGNANNGANNCCCNNNNNTTNGGNNNNNTANNNNNNNNNNNNNGNNNNNNNNNNNTNANGNNNNNANNNNNNNTNNNNNNNANNNATNTNNNNNNN
Binary file test-data/viral.1.protein.fasta.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/malt_indices.loc.sample	Fri Nov 12 18:39:23 2021 +0000
@@ -0,0 +1,19 @@
+# This is a sample file distributed with Galaxy that enables tools
+# to use a directory of MALT indexed data files.  The malt_index.loc
+# file has this format (longer white space characters are TAB characters):
+#
+# <unique_build_id>   <dbkey>   <display_name>   <file_path>
+#
+# So, for example, if you have the phiX indexe stored in 
+# /depot/data2/galaxy/phiX/base/, 
+# then the malt_index.loc entry would look like this:
+#
+# phiX   phiX   phiX Pretty   /depot/data2/galaxy/phiX/base/phiX.fa
+#
+# and your /depot/data2/galaxy/phiX/base/ directory
+# would contain phiX.fa.* files:
+#
+#-rw-r--r--  1 galaxy    universe 830134 2005-09-13 10:12 phiX.fa
+#...etc...
+
+#phiX	phiX	Coliphage phiX	/phiX
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Nov 12 18:39:23 2021 +0000
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Location of Malt indices for FASTA files -->
+    <table name="malt_indices" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/malt_indices.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Fri Nov 12 18:39:23 2021 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="malt_indices" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/malt_indices.loc" />
+    </table>
+</tables>