changeset 0:4f0a8e401e2e draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/megan commit 5386f7bb4bf5bdd4b5303d0686c97fe5d9b99ca0"
author iuc
date Sat, 11 Dec 2021 11:52:57 +0000
parents
children ef0443c0eaba
files macros.xml sam2rma.xml test-data/13-1941-6_S4_L001_R1_600000.fastq.gz test-data/13-1941-6_S4_L001_R2_600000.fastq.gz test-data/blast_R1.txt test-data/blast_R2.txt test-data/contaminants.txt test-data/daa2info_output1.txt test-data/daa2info_output2.txt test-data/daa2info_output_summary2.txt test-data/input.daa test-data/input1.sam test-data/input2.sam test-data/input_meganized.daa test-data/kegg_output.txt test-data/read_extractor_input.rma6 test-data/read_extractor_output.txt test-data/taxonomy_output.txt
diffstat 18 files changed, 1459 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Sat Dec 11 11:52:57 2021 +0000
@@ -0,0 +1,140 @@
+<macros>
+    <token name="@TOOL_VERSION@">6.21.7</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">20.09</token>
+    <xml name="bio_tools">
+        <xrefs>
+            <xref type="bio.tools">megan</xref>
+        </xrefs>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">megan</requirement>
+        </requirements>
+    </xml>
+    <macro name="input_type_cond">
+        <conditional name="input_type_cond">
+            <param name="input_type" type="select" label="Choose the category of the reads files to be analyzed">
+                <option value="single" selected="true">Single dataset</option>
+                <option value="pair">Dataset pair</option>
+                <option value="paired">List of dataset pairs</option>
+            </param>
+            <when value="single">
+                <param name="read1" type="data" format="fasta,fasta.gz,fastqsanger.gz,fastqsanger" label="Forward read file" help="This read file should be the one used by Blast to generate the Blast file below"/>
+                <param name="blast1" type="data" format="daa,blastxml,sam,tabular,txt" label="Output file of Blast on input forward read file"/>
+            </when>
+            <when value="pair">
+                <param name="read1" type="data" format="fasta,fasta.gz,fastqsanger.gz,fastqsanger" label="Forward read file" help="This read file should be the one used by Blast to generate the Blast file below"/>
+                <param name="read2" type="data" format="fasta,fasta.gz,fastqsanger.gz,fastqsanger" label="Reverse read file" help="This read file should be the one used by Blast to generate the Blast file below"/>
+                <param argument="--pairedSuffixLength" type="integer" value="0" label="Length of name suffix used to distinguish read names" help="Use 0 if read and mate have the same name"/>
+                <param name="blast1" type="data" format="daa,blastxml,sam,tabular,txt" label="Output file of Blast on input forward read file"/>
+                <param name="blast2" type="data" format="daa,blastxml,sam,tabular,txt" label="Output file of Blast on input reverse read file"/>
+            </when>
+            <when value="paired">
+                <param name="reads_collection" type="data_collection" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of paired read files"/>
+                <param argument="--pairedSuffixLength" type="integer" value="0" label="Length of name suffix used to distinguish read names" help="Use 0 if read and mate have the same name"/>
+                <param name="blast1" type="data" format="daa,blastxml,sam,tabular,txt" label="Blast file for forward read"/>
+                <param name="blast2" type="data" format="daa,blastxml,sam,tabular,txt" label="Blast file for reverse read"/>
+            </when>
+        </conditional>
+    </macro>
+    <macro name="input_daa_param">
+        <param argument="--in" type="data" format="daa" label="Input DAA file"/>
+    </macro>
+    <macro name="long_reads_param">
+        <param argument="--longReads" type="boolean" truevalue="--longReads" falsevalue="" checked="false" label="Parse and analyse input reads as long reads?"/>
+    </macro>
+    <macro name="max_matches_per_read_param">
+        <param argument="--maxMatchesPerRead" type="integer" value="100" label="Maximum matches per read"/>
+    </macro>
+    <macro name="only_named_classifications_param">
+        <param argument="--only" type="text" value="" label="Names of classifications to use" help="Leave blank to use all classifications">
+            <sanitizer>
+                <valid initial="@VALIDINITIAL@">
+                    <remove value="&apos;" />
+                    <add value="|" />
+                </valid>
+                <mapping initial="none">
+                    <add source="&apos;" target="&apos;&quot;&apos;&quot;&apos;" />
+                </mapping>
+            </sanitizer>
+        </param>
+    </macro>
+    <macro name="classification_options">
+        <option value="EC" selected="true">EC</option>
+        <option value="EGGNOG">EGGNOG</option>
+        <option value="GTDB">GTDB</option>
+        <option value="INTERPRO2GO">INTERPRO2GO</option>
+        <option value="KEGG">KEGG</option>
+        <option value="SEED">SEED</option>
+        <option value="Taxonomy">Taxonomy</option>
+    </macro>
+    <macro name="blast_mode_options">
+        <option value="Unknown" selected="true">Unknown</option>
+        <option value="BlastN">BlastN</option>
+        <option value="BlastP">BlastP</option>
+        <option value="BlastX">BlastX</option>
+        <option value="Classifier">Classifier</option>
+    </macro>
+    <macro name="classify_param">
+        <param argument="--classify" type="boolean" truevalue="--classify" falsevalue="" checked="true" label="Run classification algorithm?"/>
+    </macro>
+    <macro name="min_score_param">
+        <param argument="--minScore" type="float" value="50.0" label="Minimum score"/>
+    </macro>
+    <macro name="max_expected_param">
+        <param argument="--maxExpected" type="float" value="0.01" label="Maximum expected"/>
+    </macro>
+    <macro name="min_percent_identity_param">
+        <param argument="--minPercentIdentity" type="float" value="0.0" min="0.0" max="100.0" label="Minimum percent identity"/>
+    </macro>
+    <macro name="top_percent_param">
+        <param argument="--topPercent" type="float" value="10.0" min="0.0" max="100.0" label="Top percent"/>
+    </macro>
+    <macro name="min_max_params">
+        <param argument="--minSupportPercent" type="float" value="0.05" min="0.0" max="100.0" label="Minimum support as percent of assigned reads" help="0 value ignores"/>
+        <param argument="--minSupport" type="integer" value="0" label="Minimum support" help="0 value ignores"/>
+        <param argument="--minPercentReadCover" type="float" value="0.0" min="0.0" max="100.0" label="Minimum percent of read length to be covered by alignments"/>
+        <param argument="--minPercentReferenceCover" type="float" value="0.0" min="0.0" max="100.0" label="Minimum percent of reference length to be covered by alignments"/>
+        <param argument="--minReadLength" type="integer" value="0" label="Minimum read length"/>
+    </macro>
+    <macro name="lca_params">
+        <param argument="--lcaAlgorithm" type="select" label="Select the LCA algorithm to use for taxonomic assignment">
+            <option value="naive" selected="true">naive</option>
+            <option value="weighted">weighted</option>
+            <option value="longReads">longReads</option>
+        </param>
+        <param argument="--lcaCoveragePercent" type="float" value="100.0" min="0.0" max="100.0" label="Percent for the LCA to cover"/>
+    </macro>
+    <macro name="read_assignment_mode_param">
+        <param argument="--readAssignmentMode" type="select" label="Select the read assignment mode">
+            <option value="alignedBases" selected="true">alignedBases</option>
+            <option value="readCount">readCount</option>
+        </param>
+    </macro>
+    <macro name="con_file_param">
+        <param argument="--conFile" type="data" format="txt" optional="true" label="File of contaminant taxa (one id or name per line)" help="Optional, no selection ignores"/>
+    </macro>
+    <macro name="mapdb_param">
+        <param argument="--mapDB" type="data" format="sqlite" optional="true" label="MEGAN mapping db" help="Optional, no selection ignores"/>
+    </macro>
+    <xml name="sanitize_query" token_validinitial="string.printable">
+        <sanitizer>
+            <valid initial="@VALIDINITIAL@">
+                <remove value="&apos;" />
+                <add value="|" />
+            </valid>
+            <mapping initial="none">
+                <add source="&apos;" target="&apos;&quot;&apos;&quot;&apos;" />
+            </mapping>
+        </sanitizer>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/nmeth.3176</citation>
+            <citation type="doi">10.1101/gr.120618.111</citation>
+            <citation type="doi">10.1101/gr.5969107</citation>
+        </citations>
+    </xml>
+</macros>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sam2rma.xml	Sat Dec 11 11:52:57 2021 +0000
@@ -0,0 +1,249 @@
+<tool id="megan_sam2rma" name="MEGAN: Generate a MEGAN rma6 file" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>from a DIAMOND or MALT sam file</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="bio_tools"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+#import re
+
+#if str($input_type_cond.input_type) in ['single', 'pair']:
+    #set read1 = $input_type_cond.read1
+    #set sam1 = $input_type_cond.sam1
+#else:
+    ## Processing paired reads are tricky if we're
+    ## downstream from MALT.  MALT doesn’t have a
+    ## paired-read mode, so it won’t attempt to analyze
+    ## reads in pairs.  To do paired read processing,
+    ## set MALT to generate SAM files and then import the
+    ## SAM files into MEGAN, specifying paired read mode
+    ## there. If you have multiple SAM files for the same
+    ## sample, then import them all at the same time to
+    ## create one unified rma6 file.
+
+    #set read1 = $input_type_cond.reads_collection['forward']
+    #set sam1 = $input_type_cond.sam1
+#end if
+
+#if $read1.is_of_type('fasta', 'fasta.gz'):
+    #set read_ext = '.fasta'
+#else:
+    #set read_ext = '.fastq'
+#end if
+#if $read1.ext.endswith('.gz'):
+    #set read_ext = $read_ext + '.gz'
+#end if
+
+#set read1_identifier = 'read1' + $read_ext
+ln -s '${read1}' '${read1_identifier}' &&
+
+#set sam1_identifier = 'sam1.' + $sam1.ext
+ln -s '${sam1}' '${sam1_identifier}' &&
+
+#if str($input_type_cond.input_type) in ['pair', 'paired']:
+    #if str($input_type_cond.input_type) == 'pair':
+        #set read2 = $input_type_cond.read2
+        #set sam2 = $input_type_cond.sam2
+    #else if str($input_type_cond.input_type) == 'paired':
+        #set read2 = $input_type_cond.reads_collection['reverse']
+        #set sam2 = $input_type_cond.sam2
+    #end if
+    #set read2_identifier = 'read2' + $read_ext
+    ln -s '${read2}' '${read2_identifier}' &&
+    #set sam2_identifier = 'sam2.' + $sam2.ext
+    ln -s '${sam2}' '${sam2_identifier}' &&
+#end if
+
+## The output must be a directory when we have multiple
+## inputs, and the outputs inherit the base name of the
+## inputs.
+
+sam2rma
+#if str($input_type_cond.input_type) == 'single':
+    --in '${sam1_identifier}'
+    --reads '${read1_identifier}'
+    --out '${output_single}'
+#else if str($input_type_cond.input_type) == 'pair':
+    --in '${sam1_identifier}' '${sam2_identifier}'
+    --reads '${read1_identifier}' '${read2_identifier}'
+    --paired
+    --pairedSuffixLength $input_type_cond.pairedSuffixLength 
+    --out '.'
+#else if str($input_type_cond.input_type) == 'paired':
+    --in '${sam1_identifier}' '${sam2_identifier}'
+    --reads '${read1_identifier}' '${read2_identifier}'
+    --paired
+    --pairedSuffixLength $input_type_cond.pairedSuffixLength 
+    ## Strangely, megan requires an output
+    ## directory when processing paired reads
+    ## even though it produces a single file.
+    ## We'll accommodate thie by prepending ./
+    ## to a temporary output file and then move
+    ## it later.
+    --out '.'
+#end if
+#if $advanced_options.metaDataFile:
+    --metaDataFile '$advanced_options.metaDataFile'
+#end if
+#if str($advanced_options.paired_reads_cond.paired_reads) == 'yes':
+    --paired
+    $advanced_options.paired_reads_cond.pairedSuffixLength
+#end if
+$advanced_options.longReads
+--maxMatchesPerRead $advanced_options.maxMatchesPerRead
+$advanced_options.classify
+--minScore $advanced_options.minScore
+--maxExpected $advanced_options.maxExpected
+--topPercent $advanced_options.topPercent
+--minSupportPercent $advanced_options.minSupportPercent
+--minSupport $advanced_options.minSupport
+--minPercentReadCover $advanced_options.minPercentReadCover
+--minPercentReferenceCover $advanced_options.minPercentReferenceCover
+--minReadLength $advanced_options.minReadLength
+--lcaAlgorithm '$advanced_options.lcaAlgorithm'
+--lcaCoveragePercent $advanced_options.lcaCoveragePercent
+--readAssignmentMode '$advanced_options.readAssignmentMode'
+#if $advanced_options.conFile:
+    --conFile '$advanced_options.conFile'
+#end if
+#if $advanced_options.mapDB:
+    --mapDB '$advanced_options.mapDB'
+#end if
+#if str($advanced_options.only) != '':
+    --only '$advanced_options.only'
+#end if
+--useCompression 'false'
+--threads \${GALAXY_SLOTS:-8}
+--tempStoreDir '.'
+#if str($input_type_cond.input_type) in ['pair', 'paired']:
+    && mv 'sam1.rma6' '$output_forward'
+    && mv 'sam2.rma6' '$output_reverse'
+#end if
+    ]]></command>
+    <inputs>
+        <conditional name="input_type_cond">
+            <param name="input_type" type="select" label="Choose the category of the reads files to be analyzed">
+                <option value="single" selected="true">Single dataset</option>
+                <option value="pair">Dataset pair</option>
+                <option value="paired">List of dataset pairs</option>
+            </param>
+            <when value="single">
+                <param name="read1" type="data" format="fasta,fasta.gz,fastqsanger.gz,fastqsanger" label="Forward read file" help="This read file should be the one used by DIAMOND or MALT to generate the SAM file below"/>
+                <param name="sam1" type="data" format="sam" label="Output file of DIAMOND or MALT on input forward read file"/>
+            </when>
+            <when value="pair">
+                <param name="read1" type="data" format="fasta,fasta.gz,fastqsanger.gz,fastqsanger" label="Forward read file" help="This read file should be the one used by DIAMOND or MALT to generate the SAM file below"/>
+                <param name="sam1" type="data" format="sam" label="Output file of DIAMOND or MALT on input forward read file"/>
+                <param name="read2" type="data" format="fasta,fasta.gz,fastqsanger.gz,fastqsanger" label="Reverse read file" help="This read file should be the one used by DIAMOND or MALT to generate the SAM file below"/>
+                <param name="sam2" type="data" format="sam" label="Output file of DIAMOND or MALT on input reverse read file"/>
+                <param argument="--pairedSuffixLength" type="integer" value="0" label="Length of name suffix used to distinguish read names" help="Use 0 if read and mate have the same name"/>
+            </when>
+            <when value="paired">
+                <param name="reads_collection" type="data_collection" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of paired read files"/>
+                <param name="sam1" type="data" format="sam" label="Output file of DIAMOND or MALT on input forward read file"/>
+                <param name="sam2" type="data" format="sam" label="Output file of DIAMOND or MALT on input reverse read file"/>
+                <param argument="--pairedSuffixLength" type="integer" value="0" label="Length of name suffix used to distinguish read names" help="Use 0 if read and mate have the same name"/>
+            </when>
+        </conditional>
+        <section name="advanced_options" title="Advanced options" expanded="false">
+            <param argument="--metaDataFile" type="data" format="tabular" multiple="true" optional="true" label="Files containing metadata to be included in the output files"/>
+            <conditional name="paired_reads_cond">
+                <param name="paired_reads" type="select" label="DAA file was created using paired reads?">
+                    <option value="no" selected="true">no</option>
+                    <option value="yes">Yes</option>
+                </param>
+                <when value="no"/>
+                <when value="yes">
+                    <param argument="--pairedSuffixLength" type="integer" value="0" label="Length of name suffix used to distinguish read names" help="Use 0 if read and mate have the same name"/>
+                </when>
+            </conditional>
+            <expand macro="long_reads_param"/>
+            <expand macro="max_matches_per_read_param"/>
+            <expand macro="classify_param"/>
+            <expand macro="min_score_param"/>
+            <expand macro="max_expected_param"/>
+            <expand macro="top_percent_param"/>
+            <expand macro="min_max_params"/>
+            <expand macro="lca_params"/>
+            <expand macro="read_assignment_mode_param"/>
+            <expand macro="con_file_param"/>
+            <expand macro="mapdb_param"/>
+            <expand macro="only_named_classifications_param"/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="output_single" format="rma6">
+            <filter>input_type_cond['input_type'] == 'single'</filter>
+        </data>
+        <data name="output_forward" format="rma6" label="${tool.name} on ${on_string} (forward">
+            <filter>input_type_cond['input_type'] != 'single'</filter>
+        </data>
+        <data name="output_reverse" format="rma6" label="${tool.name} on ${on_string} (reverse)">
+            <filter>input_type_cond['input_type'] != 'single'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Single dataset input -->
+        <test expect_num_outputs="1">
+            <param name="sam1" ftype="sam" value="input1.sam"/>
+            <param name="read1" ftype="fastqsanger.gz" value="13-1941-6_S4_L001_R1_600000.fastq.gz"/>
+            <output name="output_single" ftype="rma6">
+                <assert_contents>
+                    <has_size value="885"/>
+                </assert_contents>
+            </output>
+       </test>
+        <!-- Dataset pair input -->
+        <test expect_num_outputs="2">
+            <param name="input_type" value="pair"/>
+            <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
+            <param name="sam1" value="input1.sam" ftype="sam"/>
+            <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/>
+            <param name="sam2" value="input2.sam" ftype="sam"/>
+            <output name="output_forward" ftype="rma6">
+                <assert_contents>
+                    <has_size value="805"/>
+                </assert_contents>
+            </output>
+            <output name="output_reverse" ftype="rma6">
+                <assert_contents>
+                    <has_size value="805"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- List of dataset pairs input -->
+        <test expect_num_outputs="2">
+            <param name="input_type" value="paired"/>
+            <param name="reads_collection">
+                <collection type="paired">
+                    <element name="forward" value="13-1941-6_S4_L001_R1_600000.fastq.gz"/>
+                    <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz"/>
+                </collection>
+            </param>
+            <param name="sam1" value="input1.sam" ftype="sam"/>
+            <param name="sam2" value="input2.sam" ftype="sam"/>
+            <output name="output_forward" ftype="rma6">
+                <assert_contents>
+                    <has_size value="805"/>
+                </assert_contents>
+            </output>
+            <output name="output_reverse" ftype="rma6">
+                <assert_contents>
+                    <has_size value="805"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Generates a MEGAN RMA (RealMedia Audio) file from a SAM file that was generated by DIAMOND or MALT.  MEGAN uses an
+update of the original RMA file format known as RMA6.
+
+Inputs consist of reads in fasta or fasqsanger format (gzip compression is supported) and associated SAM files.
+Each read file should have been used previously as the input to DIAMOND or MALT to produce the associated SAM file
+for this tool.
+    </help>
+    <expand macro="citations"/>
+</tool>
Binary file test-data/13-1941-6_S4_L001_R1_600000.fastq.gz has changed
Binary file test-data/13-1941-6_S4_L001_R2_600000.fastq.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blast_R1.txt	Sat Dec 11 11:52:57 2021 +0000
@@ -0,0 +1,404 @@
+BLASTN output produced by MALT
+
+
+Query= XXXXXXXXXX:7:1101:1582:1835#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1610:1859#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1743:1871#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1536:1878#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2990:100153#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1624:1906#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1666:1926#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2921:100163#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1513:1929#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2759:100170#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1708:1937#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2981:100211#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1688:1946#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2767:100225#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1536:1959#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2797:100234#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1552:1976#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1748:1978#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2779:100239#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1593:1980#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2946:100242#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1987:1781#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3046:100006#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1900:1788#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3214:100027#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1848:1879#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3237:100032#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3027:100049#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1756:1891#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3238:100065#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1915:1901#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3198:100082#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1964:1931#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3088:100091#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1840:1948#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3105:100094#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1958:1952#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3190:100106#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1993:1999#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3117:100110#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2159:1798#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3147:100111#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2152:1838#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3065:100152#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2180:1843#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3154:100159#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2125:1861#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3198:100173#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2076:1911#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3166:100190#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2196:1920#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3225:100207#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2115:1927#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3019:100219#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2179:1937#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3202:100230#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2149:1945#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3211:100242#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2169:1964#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3168:100244#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3005:100246#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2313:1789#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3253:100014#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2361:1794#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2337:1794#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3284:100039#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2477:1795#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3310:100056#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2355:1821#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3420:100060#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2418:1834#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3267:100061#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2378:1838#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3416:100083#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2481:1853#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3411:100111#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3258:100128#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2252:1856#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3428:100129#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2394:1871#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3387:100138#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2269:1904#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3444:100163#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2259:1943#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3371:100179#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2371:1957#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3311:100186#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2394:1961#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3438:100192#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2333:1962#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3479:100209#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2459:1990#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3417:100210#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2372:1994#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3452:100214#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2677:1830#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3354:100219#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2603:1846#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3600:100019#/1
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2535:1848#/1
+
+***** No hits found ******
+
+EOF
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blast_R2.txt	Sat Dec 11 11:52:57 2021 +0000
@@ -0,0 +1,404 @@
+BLASTN output produced by MALT
+
+
+Query= XXXXXXXXXX:7:1101:1582:1835#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1610:1859#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1743:1871#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1536:1878#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2990:100153#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1624:1906#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1666:1926#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2921:100163#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1513:1929#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2759:100170#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1708:1937#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2981:100211#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1688:1946#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2767:100225#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1536:1959#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2797:100234#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1552:1976#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1748:1978#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2779:100239#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1593:1980#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2946:100242#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1987:1781#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3046:100006#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1900:1788#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3214:100027#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1848:1879#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3237:100032#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3027:100049#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1756:1891#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3238:100065#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1915:1901#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3198:100082#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1964:1931#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3088:100091#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1840:1948#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3105:100094#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1958:1952#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3190:100106#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:1993:1999#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3117:100110#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2159:1798#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3147:100111#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2152:1838#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3065:100152#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2180:1843#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3154:100159#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2125:1861#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3198:100173#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2076:1911#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3166:100190#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2196:1920#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3225:100207#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2115:1927#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3019:100219#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2179:1937#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3202:100230#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2149:1945#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3211:100242#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2169:1964#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3168:100244#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3005:100246#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2313:1789#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3253:100014#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2361:1794#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2337:1794#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3284:100039#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2477:1795#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3310:100056#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2355:1821#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3420:100060#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2418:1834#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3267:100061#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2378:1838#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3416:100083#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2481:1853#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3411:100111#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3258:100128#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2252:1856#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3428:100129#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2394:1871#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3387:100138#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2269:1904#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3444:100163#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2259:1943#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3371:100179#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2371:1957#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3311:100186#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2394:1961#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3438:100192#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2333:1962#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3479:100209#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2459:1990#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3417:100210#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2372:1994#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3452:100214#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2677:1830#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3354:100219#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2603:1846#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:3600:100019#/2
+
+***** No hits found ******
+
+Query= XXXXXXXXXX:7:1101:2535:1848#/2
+
+***** No hits found ******
+
+EOF
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/contaminants.txt	Sat Dec 11 11:52:57 2021 +0000
@@ -0,0 +1,12 @@
+Illumina Single End Adapter 1					ACACTCTTTCCCTACACGACGCTGTTCCATCT
+Illumina Single End Adapter 2					CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
+Illumina Single End PCR Primer 1				AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
+Illumina Single End PCR Primer 2				CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
+Illumina Single End Sequencing Primer			ACACTCTTTCCCTACACGACGCTCTTCCGATCT
+
+Illumina Paired End Adapter 1					ACACTCTTTCCCTACACGACGCTCTTCCGATCT
+Illumina Paired End Adapter 2					CTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
+Illumina Paried End PCR Primer 1				AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
+Illumina Paired End PCR Primer 2				CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
+Illumina Paried End Sequencing Primer 1			ACACTCTTTCCCTACACGACGCTCTTCCGATCT
+Illumina Paired End Sequencing Primer 2			CGGTCTCGGCATTCCTACTGAACCGCTCTTCCGATCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/daa2info_output1.txt	Sat Dec 11 11:52:57 2021 +0000
@@ -0,0 +1,3 @@
+# Number of reads: 1
+# Alignment mode:  BLASTP
+# Is meganized:    false
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/daa2info_output2.txt	Sat Dec 11 11:52:57 2021 +0000
@@ -0,0 +1,19 @@
+# Number of reads: 1
+# Alignment mode:  BLASTP
+# Is meganized:    true
+# Classifications: Taxonomy
+# Meganization summary:
+## @Creator	DAA2Info
+## @CreationDate
+## @ContentType	Summary4
+## @Names	input
+## @BlastMode	BlastP
+## @Uids
+## @Sizes	1.0
+## @TotalReads	1
+## @AdditionalReads	0
+## Classifications:
+##  Taxonomy (1 classes)
+## @Algorithm	Taxonomy	merge
+## @Parameters	
+## 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/daa2info_output_summary2.txt	Sat Dec 11 11:52:57 2021 +0000
@@ -0,0 +1,16 @@
+@Creator	DAA2Info
+@CreationDate
+@ContentType	Summary4
+@Names	input
+@BlastMode	BlastP
+@Uids
+@Sizes	1
+@TotalReads	1
+@AdditionalReads	0
+@Algorithm	Taxonomy	merge
+@Parameters	
+@ColorTable	Fews8	White-Green
+TAX	-2	1
+END_OF_DATA_TABLE
+#SampleID	@Source
+input.daa	input.DAA
Binary file test-data/input.daa has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input1.sam	Sat Dec 11 11:52:57 2021 +0000
@@ -0,0 +1,5 @@
+@HD	VN:1.5	SO:unsorted	GO:query
+@PG	ID:1	PN:MALT	CL:--mode BlastN --matchScore 2 --mismatchScore -3 --setLambda 0.625 --setK 0.41 --alignmentType Local --inFile 13-1941-6_S4_L001_R1_600000_fastq_gz.fastq.gz --index /home/galaxy/tool-data/malt_index/AF2122-1 --output ./output.rma6 --numThreads 1 --memoryMode load --maxTables 0 --minBitScore 50.0 --maxExpected 1.0 --minPercentIdentity 0.0 --maxAlignmentsPerQuery 25 --maxAlignmentsPerRef 1 --topPercent 10.0 --minSupportPercent 0.001 --minSupport 0 --minPercentIdentityLCA 0.0 --maxSeedsPerFrame 100 --maxSeedsPerRef 20 --seedShift 1 --gapOpen 11 --gapExtend 1 --band 4 --alignments ./alignments_output.SAM.gz --format SAM --outAligned ./aligned_output.fna.gz --outUnaligned ./unaligned_output.fna.gz	DS:BlastN
+@RG	ID:1	PL:unknown	SM:unknown
+@CO	BlastN-like alignments
+@CO	Reporting AS: bitScore, ZR: rawScore, ZE: expected, ZI: percent identity, ZL: reference length
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input2.sam	Sat Dec 11 11:52:57 2021 +0000
@@ -0,0 +1,5 @@
+@HD	VN:1.5	SO:unsorted	GO:query
+@PG	ID:1	PN:MALT	CL:--mode BlastN --matchScore 2 --mismatchScore -3 --setLambda 0.625 --setK 0.41 --alignmentType Local --inFile 13-1941-6_S4_L001_R2_600000_fastq_gz.fastq.gz --index /home/galaxy/tool-data/malt_index/AF2122-1 --output ./output.rma6 --numThreads 1 --memoryMode load --maxTables 0 --minBitScore 50.0 --maxExpected 1.0 --minPercentIdentity 0.0 --maxAlignmentsPerQuery 25 --maxAlignmentsPerRef 1 --topPercent 10.0 --minSupportPercent 0.001 --minSupport 0 --minPercentIdentityLCA 0.0 --maxSeedsPerFrame 100 --maxSeedsPerRef 20 --seedShift 1 --gapOpen 11 --gapExtend 1 --band 4 --alignments ./alignments_output.SAM.gz --format SAM --outAligned ./aligned_output.fna.gz --outUnaligned ./unaligned_output.fna.gz	DS:BlastN
+@RG	ID:1	PL:unknown	SM:unknown
+@CO	BlastN-like alignments
+@CO	Reporting AS: bitScore, ZR: rawScore, ZE: expected, ZI: percent identity, ZL: reference length
Binary file test-data/input_meganized.daa has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kegg_output.txt	Sat Dec 11 11:52:57 2021 +0000
@@ -0,0 +1,100 @@
+XXXXXXXXXX:7:1101:1582:1835#/1; ;
+XXXXXXXXXX:7:1101:1610:1859#/1; ;
+XXXXXXXXXX:7:1101:1743:1871#/1; ;
+XXXXXXXXXX:7:1101:1536:1878#/1; ;
+XXXXXXXXXX:7:1101:2990:100153#/1; ;
+XXXXXXXXXX:7:1101:1624:1906#/1; ;
+XXXXXXXXXX:7:1101:1666:1926#/1; ;
+XXXXXXXXXX:7:1101:2921:100163#/1; ;
+XXXXXXXXXX:7:1101:1513:1929#/1; ;
+XXXXXXXXXX:7:1101:2759:100170#/1; ;
+XXXXXXXXXX:7:1101:1708:1937#/1; ;
+XXXXXXXXXX:7:1101:2981:100211#/1; ;
+XXXXXXXXXX:7:1101:1688:1946#/1; ;
+XXXXXXXXXX:7:1101:2767:100225#/1; ;
+XXXXXXXXXX:7:1101:1536:1959#/1; ;
+XXXXXXXXXX:7:1101:2797:100234#/1; ;
+XXXXXXXXXX:7:1101:1552:1976#/1; ;
+XXXXXXXXXX:7:1101:1748:1978#/1; ;
+XXXXXXXXXX:7:1101:2779:100239#/1; ;
+XXXXXXXXXX:7:1101:1593:1980#/1; ;
+XXXXXXXXXX:7:1101:2946:100242#/1; ;
+XXXXXXXXXX:7:1101:1987:1781#/1; ;
+XXXXXXXXXX:7:1101:3046:100006#/1; ;
+XXXXXXXXXX:7:1101:1900:1788#/1; ;
+XXXXXXXXXX:7:1101:3214:100027#/1; ;
+XXXXXXXXXX:7:1101:1848:1879#/1; ;
+XXXXXXXXXX:7:1101:3237:100032#/1; ;
+XXXXXXXXXX:7:1101:3027:100049#/1; ;
+XXXXXXXXXX:7:1101:1756:1891#/1; ;
+XXXXXXXXXX:7:1101:3238:100065#/1; ;
+XXXXXXXXXX:7:1101:1915:1901#/1; ;
+XXXXXXXXXX:7:1101:3198:100082#/1; ;
+XXXXXXXXXX:7:1101:1964:1931#/1; ;
+XXXXXXXXXX:7:1101:3088:100091#/1; ;
+XXXXXXXXXX:7:1101:1840:1948#/1; ;
+XXXXXXXXXX:7:1101:3105:100094#/1; ;
+XXXXXXXXXX:7:1101:1958:1952#/1; ;
+XXXXXXXXXX:7:1101:3190:100106#/1; ;
+XXXXXXXXXX:7:1101:1993:1999#/1; ;
+XXXXXXXXXX:7:1101:3117:100110#/1; ;
+XXXXXXXXXX:7:1101:2159:1798#/1; ;
+XXXXXXXXXX:7:1101:3147:100111#/1; ;
+XXXXXXXXXX:7:1101:2152:1838#/1; ;
+XXXXXXXXXX:7:1101:3065:100152#/1; ;
+XXXXXXXXXX:7:1101:2180:1843#/1; ;
+XXXXXXXXXX:7:1101:3154:100159#/1; ;
+XXXXXXXXXX:7:1101:2125:1861#/1; ;
+XXXXXXXXXX:7:1101:3198:100173#/1; ;
+XXXXXXXXXX:7:1101:2076:1911#/1; ;
+XXXXXXXXXX:7:1101:3166:100190#/1; ;
+XXXXXXXXXX:7:1101:2196:1920#/1; ;
+XXXXXXXXXX:7:1101:3225:100207#/1; ;
+XXXXXXXXXX:7:1101:2115:1927#/1; ;
+XXXXXXXXXX:7:1101:3019:100219#/1; ;
+XXXXXXXXXX:7:1101:2179:1937#/1; ;
+XXXXXXXXXX:7:1101:3202:100230#/1; ;
+XXXXXXXXXX:7:1101:2149:1945#/1; ;
+XXXXXXXXXX:7:1101:3211:100242#/1; ;
+XXXXXXXXXX:7:1101:2169:1964#/1; ;
+XXXXXXXXXX:7:1101:3168:100244#/1; ;
+XXXXXXXXXX:7:1101:3005:100246#/1; ;
+XXXXXXXXXX:7:1101:2313:1789#/1; ;
+XXXXXXXXXX:7:1101:3253:100014#/1; ;
+XXXXXXXXXX:7:1101:2361:1794#/1; ;
+XXXXXXXXXX:7:1101:2337:1794#/1; ;
+XXXXXXXXXX:7:1101:3284:100039#/1; ;
+XXXXXXXXXX:7:1101:2477:1795#/1; ;
+XXXXXXXXXX:7:1101:3310:100056#/1; ;
+XXXXXXXXXX:7:1101:2355:1821#/1; ;
+XXXXXXXXXX:7:1101:3420:100060#/1; ;
+XXXXXXXXXX:7:1101:2418:1834#/1; ;
+XXXXXXXXXX:7:1101:3267:100061#/1; ;
+XXXXXXXXXX:7:1101:2378:1838#/1; ;
+XXXXXXXXXX:7:1101:3416:100083#/1; ;
+XXXXXXXXXX:7:1101:2481:1853#/1; ;
+XXXXXXXXXX:7:1101:3411:100111#/1; ;
+XXXXXXXXXX:7:1101:3258:100128#/1; ;
+XXXXXXXXXX:7:1101:2252:1856#/1; ;
+XXXXXXXXXX:7:1101:3428:100129#/1; ;
+XXXXXXXXXX:7:1101:2394:1871#/1; ;
+XXXXXXXXXX:7:1101:3387:100138#/1; ;
+XXXXXXXXXX:7:1101:2269:1904#/1; ;
+XXXXXXXXXX:7:1101:3444:100163#/1; ;
+XXXXXXXXXX:7:1101:2259:1943#/1; ;
+XXXXXXXXXX:7:1101:3371:100179#/1; ;
+XXXXXXXXXX:7:1101:2371:1957#/1; ;
+XXXXXXXXXX:7:1101:3311:100186#/1; ;
+XXXXXXXXXX:7:1101:2394:1961#/1; ;
+XXXXXXXXXX:7:1101:3438:100192#/1; ;
+XXXXXXXXXX:7:1101:2333:1962#/1; ;
+XXXXXXXXXX:7:1101:3479:100209#/1; ;
+XXXXXXXXXX:7:1101:2459:1990#/1; ;
+XXXXXXXXXX:7:1101:3417:100210#/1; ;
+XXXXXXXXXX:7:1101:2372:1994#/1; ;
+XXXXXXXXXX:7:1101:3452:100214#/1; ;
+XXXXXXXXXX:7:1101:2677:1830#/1; ;
+XXXXXXXXXX:7:1101:3354:100219#/1; ;
+XXXXXXXXXX:7:1101:2603:1846#/1; ;
+XXXXXXXXXX:7:1101:3600:100019#/1; ;
+XXXXXXXXXX:7:1101:2535:1848#/1; ;
Binary file test-data/read_extractor_input.rma6 has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/read_extractor_output.txt	Sat Dec 11 11:52:57 2021 +0000
@@ -0,0 +1,2 @@
+>sequence
+LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLVEWIWGGFSVDKATLNRFFAFHFILFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLGLXXXXXXXXXXXXXSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVILGLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGXIENY
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/taxonomy_output.txt	Sat Dec 11 11:52:57 2021 +0000
@@ -0,0 +1,100 @@
+XXXXXXXXXX:7:1101:1582:1835#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1610:1859#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1743:1871#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1536:1878#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2990:100153#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1624:1906#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1666:1926#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2921:100163#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1513:1929#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2759:100170#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1708:1937#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2981:100211#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1688:1946#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2767:100225#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1536:1959#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2797:100234#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1552:1976#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1748:1978#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2779:100239#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1593:1980#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2946:100242#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1987:1781#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3046:100006#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1900:1788#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3214:100027#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1848:1879#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3237:100032#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3027:100049#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1756:1891#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3238:100065#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1915:1901#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3198:100082#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1964:1931#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3088:100091#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1840:1948#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3105:100094#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1958:1952#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3190:100106#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:1993:1999#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3117:100110#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2159:1798#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3147:100111#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2152:1838#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3065:100152#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2180:1843#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3154:100159#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2125:1861#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3198:100173#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2076:1911#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3166:100190#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2196:1920#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3225:100207#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2115:1927#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3019:100219#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2179:1937#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3202:100230#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2149:1945#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3211:100242#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2169:1964#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3168:100244#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3005:100246#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2313:1789#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3253:100014#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2361:1794#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2337:1794#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3284:100039#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2477:1795#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3310:100056#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2355:1821#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3420:100060#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2418:1834#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3267:100061#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2378:1838#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3416:100083#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2481:1853#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3411:100111#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3258:100128#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2252:1856#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3428:100129#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2394:1871#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3387:100138#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2269:1904#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3444:100163#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2259:1943#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3371:100179#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2371:1957#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3311:100186#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2394:1961#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3438:100192#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2333:1962#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3479:100209#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2459:1990#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3417:100210#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2372:1994#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3452:100214#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2677:1830#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3354:100219#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2603:1846#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:3600:100019#/1; ; No hits; 100;
+XXXXXXXXXX:7:1101:2535:1848#/1; ; No hits; 100;