changeset 0:c19015f577a5 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seqkit commit 76c1a289f15cc9a9a7d9a49dc132af62cc1d5af2
author iuc
date Fri, 26 Sep 2025 16:48:57 +0000
parents
children 911de3a36b31
files macros.xml seqkit_split2.xml test-data/fx2tab_output1.tabular test-data/fx2tab_output2.tabular test-data/fx2tab_output3.tabular test-data/fx2tab_output4.tabular test-data/grep_output1.fasta.gz test-data/grep_output2.fasta.gz test-data/grep_output3.fastq.gz test-data/grep_output4.fasta.gz test-data/grep_pattern.fasta test-data/hairpin.fa.gz test-data/head_output1.fastq.gz test-data/head_output2.fasta.gz test-data/input1.fasta.gz test-data/input1.fastq.gz test-data/locate_output1.tabular test-data/locate_output2.bed test-data/locate_output3.gtf test-data/motif_sequence.fasta test-data/reads_1.fq.gz test-data/reads_2.fq.gz test-data/sort_output1.fastq.gz test-data/sort_output2.fasta.gz test-data/sort_output3.fasta.gz test-data/sort_output4.fasta.gz test-data/sort_output5.fastq.gz test-data/sort_output6.fastq.gz test-data/stats_output1.tabular test-data/stats_output2.tabular test-data/translate_output1.fasta.gz test-data/translate_output2.fastq.gz test-data/translate_output3.fastq.gz
diffstat 33 files changed, 619 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Sep 26 16:48:57 2025 +0000
@@ -0,0 +1,22 @@
+<macros>
+    <token name="@TOOL_VERSION@">2.10.1</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">24.0</token>
+    <token name="@FASTQ_TYPES@">fasta,fasta.gz,fastqsanger,fastqsanger.gz</token>
+    <xml name="bio_tools">
+        <xrefs>
+            <xref type="bio.tools">seqkit</xref>
+        </xrefs>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">seqkit</requirement>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pone.0163962</citation>
+        </citations>
+    </xml>
+</macros>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seqkit_split2.xml	Fri Sep 26 16:48:57 2025 +0000
@@ -0,0 +1,268 @@
+<tool id="seqkit_split2" name="Seqkit Split2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Split sequences into files by part size, number of parts, or length</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="bio_tools"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+    #import re
+    mkdir -p out &&
+
+    ## The preprocessing steps below are adapted from the cutadapt.xml tool wrapper.
+    ## Set things up for handling inputs and outputs in single- vs paired-end modes   
+    #set input_type = str($input_file_type.type)
+    #if $input_type == 'single':
+        #set paired = False
+    #else:
+        #set paired = True
+    #end if
+  
+    #if $input_type == 'paired_collection'
+        #set input_1 = $input_file_type.input_1.forward
+        #set input_2 = $input_file_type.input_1.reverse
+        #set read1 = re.sub('[^\w\-\s]', '_', str($input_file_type.input_1.name)) + "_1"
+        #set read2 = re.sub('[^\w\-\s]', '_', str($input_file_type.input_1.name)) + "_2"
+    #else
+        #set input_1 = $input_file_type.input_1
+        #set read1 = re.sub('[^\w\-\s]', '_', str($input_file_type.input_1.element_identifier))
+    #end if
+
+    #if $input_1.is_of_type("fastq", "fastq.gz"):
+        #set ext = ".fastqsanger"
+    #else
+        #set ext = ".fasta"
+    #end if
+    #if $input_1.ext.endswith(".gz"):
+        #set ext=ext+".gz"
+    #end if
+
+    #set read1 = $read1 + $ext
+
+    #if $paired:
+        #if $input_2.is_of_type("fastq", "fastq.gz"):
+            #set ext2 = ".fastqsanger"
+        #else
+            #set ext2 = ".fasta"
+        #end if
+        #if $input_2.ext.endswith(".gz"):
+            #set ext2=ext2+".gz"
+        #end if
+        #set read2 = $read2 + $ext2
+    #end if
+    
+    ## Link in the input files
+    ln -fs '$input_1' '$read1' &&
+    #if $paired:
+        ln -fs '$input_2' '$read2' &&
+    #end if
+
+    seqkit split2
+    #if $paired:
+        -1 '$read1'
+        -2 '$read2'
+    #else:
+        '$read1'
+    #end if
+    #if str($split_type.split_selector) == 'by_part':
+        -p $split_type.by_part
+    #else if str($split_type.split_selector) == 'by_size':
+        -s $split_type.by_size
+    #else if str($split_type.split_selector) == 'by_length':
+        -l $split_type.by_length
+    #end if
+    -o seqkit_split2
+    -O out
+    -j "\${GALAXY_SLOTS:-4}"
+    ]]></command>
+    <inputs>
+        <conditional name="input_file_type">
+            <param name="type" type="select" label="Single-end or Paired-end reads?">
+                <option value="single">Single-end</option>
+                <option value="paired_collection">Paired-end Collection</option>
+            </param>
+            <when value="single">
+                <param name="input_1" type="data" format="@FASTQ_TYPES@" label="Input FASTQ/A file" help="Select a single FASTA or FASTQ file (gzipped or uncompressed)"/>
+            </when>
+            <when value="paired_collection">
+                <param name="input_1" format="@FASTQ_TYPES@" type="data_collection" collection_type="paired" label="Paired Collection" help="Should be of datatype &quot;fastq.gz&quot; or &quot;fasta&quot;" />
+            </when>
+        </conditional>
+        <conditional name="split_type">
+            <param name="split_selector" type="select" label="Split sequences by">
+                <option value="by_part" selected="true">Number of parts</option>
+                <option value="by_size">Number of sequences per part</option>
+                <option value="by_length">Length of sequences</option>
+            </param>
+            <when value="by_part">
+                <param name="by_part" type="integer" value="" min="1" label="Number of parts" help="Split sequences into N parts using round-robin distribution." />
+            </when>
+            <when value="by_size">
+                <param name="by_size" type="integer" value="" min="1" label="Number of sequences per part" help="Split sequences into parts with N sequences each." />
+            </when>
+            <when value="by_length">
+                <param name="by_length" type="text" value="" label="Chunk size" help="Split sequences into chunks of >=N bases. Supports K/M/G suffix (e.g., 10K, 1M)">
+                    <validator type="regex" message="Invalid characters in field">^[0-9KMG]+$</validator>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <collection name="outputs_files" type="list" label="${tool.name} on ${on_string}: Splitted files">
+            <discover_datasets pattern="(?P&lt;designation&gt;seqkit_split2\.part_\d+)\.(?P&lt;ext&gt;.+)" directory="out"/>
+        </collection>
+    </outputs>
+    <tests>
+        <!-- Test 01: for Seqkit Split with Single End FASTQ file; splitting by parts -->
+        <test expect_num_outputs="1">
+            <conditional name="input_file_type">
+                <param name="type" value="single"/>
+                <param name="input_1" value="reads_1.fq.gz"/>
+            </conditional>
+            <conditional name="split_type">
+                <param name="split_selector" value="by_part"/>
+                <param name="by_part" value="2"/>
+            </conditional>
+            <output_collection name="outputs_files" type="list" count="2">
+                <element name="seqkit_split2.part_001" ftype="fastqsanger.gz">
+                    <assert_contents>
+                        <has_n_lines n="4958"/>
+                    </assert_contents>
+                </element>
+                <element name="seqkit_split2.part_002" ftype="fastqsanger.gz">
+                    <assert_contents>
+                        <has_n_lines n="4949"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        
+        <!-- Test 02: for Seqkit Split with Paired FASTQ Collection; splitting by parts -->
+        <test expect_num_outputs="1">
+            <conditional name="input_file_type">
+                <param name="type" value="paired_collection"/>
+                <param name="input_1">
+                    <collection type="paired">
+                        <element name="forward" ftype="fastq.gz" value="reads_1.fq.gz"/>
+                        <element name="reverse" ftype="fastq.gz" value="reads_2.fq.gz"/>
+                    </collection>
+                </param>
+            </conditional>
+            <conditional name="split_type">
+                <param name="split_selector" value="by_part"/>
+                <param name="by_part" value="2"/>
+            </conditional>
+            <output_collection name="outputs_files" type="list" count="2">
+                <element name="seqkit_split2.part_001" ftype="fastqsanger.gz">
+                    <assert_contents>
+                        <has_n_lines n="4958"/>
+                    </assert_contents>
+                </element>
+                <element name="seqkit_split2.part_002" ftype="fastqsanger.gz">
+                    <assert_contents>
+                        <has_n_lines n="4949"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+
+        <!-- Test 03: for Seqkit Split with Single End FASTA file; splitting by parts -->
+        <test expect_num_outputs="1">
+            <conditional name="input_file_type">
+                <param name="type" value="single"/>
+                <param name="input_1" value="hairpin.fa.gz"/>
+            </conditional>
+            <conditional name="split_type">
+                <param name="split_selector" value="by_part"/>
+                <param name="by_part" value="2"/>
+            </conditional>
+            <output_collection name="outputs_files" type="list" count="2">
+                <element name="seqkit_split2.part_001" ftype="fasta.gz">
+                    <assert_contents>
+                        <has_n_lines n="2988"/>
+                    </assert_contents>
+                </element>
+                <element name="seqkit_split2.part_002" ftype="fasta.gz">
+                    <assert_contents>
+                        <has_n_lines n="2987"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+
+        <!-- Test 04: for Seqkit Split with Single End FASTA file; splitting by size -->
+        <test expect_num_outputs="1">
+            <conditional name="input_file_type">
+                <param name="type" value="single"/>
+                <param name="input_1" value="hairpin.fa.gz"/>
+            </conditional>
+            <conditional name="split_type">
+                <param name="split_selector" value="by_size"/>
+                <param name="by_size" value="200"/>
+            </conditional>
+            <output_collection name="outputs_files" type="list" count="25">
+                <element name="seqkit_split2.part_001" ftype="fasta.gz">
+                    <assert_contents>
+                        <has_n_lines n="224"/>
+                    </assert_contents>
+                </element>
+                <element name="seqkit_split2.part_002" ftype="fasta.gz">
+                    <assert_contents>
+                        <has_n_lines n="281"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+
+        <!-- Test 05: for Seqkit Split with Single End FASTA file; splitting by length -->
+        <test expect_num_outputs="1">
+            <conditional name="input_file_type">
+                <param name="type" value="single"/>
+                <param name="input_1" value="hairpin.fa.gz"/>
+            </conditional>
+            <conditional name="split_type">
+                <param name="split_selector" value="by_length"/>
+                <param name="by_length" value="50K"/>
+            </conditional>
+            <output_collection name="outputs_files" type="list" count="10">
+                <element name="seqkit_split2.part_001" ftype="fasta.gz">
+                    <assert_contents>
+                        <has_n_lines n="642"/>
+                    </assert_contents>
+                </element>
+                <element name="seqkit_split2.part_002" ftype="fasta.gz">
+                    <assert_contents>
+                        <has_n_lines n="589"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+**Seqkit Split2**
+
+This tool splits FASTA or FASTQ files (single-end or paired-end) into multiple files based on the number of parts, sequences per part, or sequence length. It supports low memory usage and fast processing.
+
+**Input type**: Choose between single-end FASTA/FASTQ or paired-end FASTQ files.
+
+**Split sequences by**:
+  - **Number of parts**: Split into N parts using round-robin distribution.
+  - **Number of sequences per part**: Split into parts with N sequences each.
+  - **Length of sequences**: Split into chunks of >=N bases (supports K/M/G suffix, e.g., 10K, 1M).
+
+**Outputs**
+
+- A collection of split FASTA/FASTQ files
+
+For more details, see the Seqkit Split2 documentation_
+
+.. _documentation: https://bioinf.shenwei.me/seqkit/usage/#split2
+
+    ]]></help>
+        <expand macro="citations"/>
+        <creator>
+            <person givenName="Saim" familyName="Momin" url="https://github.com/SaimMomin12" identifier="https://orcid.org/0009-0003-9935-828X"/>
+            <organization name="Galaxy Europe" url="https://galaxyproject.org/eu/"/>
+    </creator>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fx2tab_output1.tabular	Fri Sep 26 16:48:57 2025 +0000
@@ -0,0 +1,5 @@
+1/2	TATACTACTGTCATGTTTGCTTTTTTCGTGCTCATTACCTTATCGTATGCTTCCATCCAAAGATCTAGTTGTTTATAATATTCTCCCGGCCTTACTTCAAC	?@<DDDADDFAAC@>GG<FDCBHIIGB:8??DEAHGAFEDF<B?F9DBFFCDHF>48BBCF)8877=@=@C7=E??EBD:AADA;>C3;BB6:A@>>@:3>
+2/2	AAAAAAACTTTCTTTACAGGCGTAAAGAAAGTGAAATTGACAGTATTTATACATGAAATAGCAATGTCTTTCCCACTTCCCTACGCTGGCATTAACCAGAT	<@@DA6DDF42ABGF9F?F@C<EDDDFBGI>04BGC>BFF><?*88BDFDEAFFDGCGEFEEFIFFFEF>EBDBB@@:ACCCAB8@?=;B<@BABBBBB?#
+3/2	GTGCCATCATTTTCTATCCATTATTATGGATTATTGGCTCATCGTTTAATCCGGGTGATAGTTTATCTGGATCAAGTATTATTCCACAAAATGCAACGTTA	=BBFFFFFHHHHHJJJJJJJJJJJJIJIJEIIGIJJJHGIGGIIGHIDIJGHIIJ?FFHGIIJJJJJJJJHGIHHHCEHFFFFFFFFEAECCDADDDDDDD
+4/2	TTCAAAACACATAAAGCTAATTGCCGCATATGACAATATTGCTAAAATAATTTTTTTACCAGATATCGGTGTTAATCGAAATAATGTACTTTCGGTCATTT	BBCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJIJJJJJJIJJJJJJIIJJJJJJJIJJHHHHHFFDDCDEEEDDDDDDDDEDDDFFFEDDDDDDDDE
+5/2	ACTTGCCAATGCGATGCACCAATCTTTTCAGCAATAATCGGCAAAATTGGGTCGACTACTCCTATACCTGAAAAGGCAAGGAAAGTAGCCAACACTGTAAT	BCCFFFFFHHHHHIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJIHIIJIJHHHHFFFFFFEEEEEEDDDDDDDDDDDDDCDDDDDDDDDDDDDED
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fx2tab_output2.tabular	Fri Sep 26 16:48:57 2025 +0000
@@ -0,0 +1,6 @@
+#id	length	avg.qual
+1/2	101	24.98
+2/2	101	20.44
+3/2	101	37.13
+4/2	101	37.61
+5/2	101	37.28
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fx2tab_output3.tabular	Fri Sep 26 16:48:57 2025 +0000
@@ -0,0 +1,3 @@
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAGCCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCACGGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTCCCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGATACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGCTGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCATTAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCACTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAATGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTACATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCACTCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTATAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATTTGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGGGCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAATTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGAATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAGTTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAACTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCCAGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTTCAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAAACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGCCCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATATAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAGCCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTCTATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCCCCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTAAAAAAAAAAAAACCTGCTAGGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTTATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTAGGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATTCTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTTGGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATATTGCTTATGAAAATTCCATAGTGGTATTTTTTTGGATTCTTAATGTGTAACTTAAACATACTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACAAGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATGGATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGGAACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAATTTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCTTGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACTTTTTTTCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGGCCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAACCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGTTCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATACTTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACATGTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATTAATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGTTATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGTGATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTTTCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAAGTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAATTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAATATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTTAGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGTTCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATTACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCTGGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTAGAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGTGACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATTTTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTTTTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTTAGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTGAGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGCTGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTTCACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGTAAAGACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACATTATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCTGAGAGGGAATAATCTGAGCAAAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCTTTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAAATGGGAGCTGGTCACACAGGGCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAACAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCACGTCTCAAATTTAGACTTACTTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTAGAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAGGCTTCTCAGCTGGGAAGAGAGGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCTGCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATGGAAACTTTTAGATGACATTCTACAAATTATTTTTTTCTTTAAATTAAAAGAACCTAGCCAATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCTCTGATGAAGCCAGGTTGTCAAAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAAATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAAGGTTGTCAAGAAGGCTTTTTTTTTTTTCTTTTTTAAACCTGAGGGCAAAAAGGAATGGATAAAGTAGTGTAATGGATTGACAATCAGGAAGAACAGAATAACTCAGTTTTTTTTTCTCCTACAAGGAGATATGGCTGGACCAAAATAAAATGACATGAAATTGCAAAAATGAAAAT	
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	GGGGGGCTGCGCGGCCGGGTCGGTGCGCACACGAGAAGGACGCGCGGCCCCCAGCGCTCTTGGGGGCCGCCTCGGAGCATGACCCCCGCGGGCCAGCGCCGCGCGCCTGATCCGAGGAGACCCCGCGCTCCCGCAGCCATGGGCACCGGGGGCCGGCGGGGGGCGGCGGCCGCGCCGCTGCTGGTGGCGGTGGCCGCGCTGCTACTGGGCGCCGCGGGCCACCTGTACCCCGGAGAGGTGTGTCCCGGCATGGATATCCGGAACAACCTCACTAGGTTGCATGAGCTGGAGAATTGCTCTGTCATCGAAGGACACTTGCAGATACTCTTGATGTTCAAAACGAGGCCCGAAGATTTCCGAGACCTCAGTTTCCCCAAACTCATCATGATCACTGATTACTTGCTGCTCTTCCGGGTCTATGGGCTCGAGAGCCTGAAGGACCTGTTCCCCAACCTCACGGTCATCCGGGGATCACGACTGTTCTTTAACTACGCGCTGGTCATCTTCGAGATGGTTCACCTCAAGGAACTCGGCCTCTACAACCTGATGAACATCACCCGGGGTTCTGTCCGCATCGAGAAGAACAATGAGCTCTGTTACTTGGCCACTATCGACTGGTCCCGTATCCTGGATTCCGTGGAGGATAATCACATCGTGTTGAACAAAGATGACAACGAGGAGTGTGGAGACATCTGTCCGGGTACCGCGAAGGGCAAGACCAACTGCCCCGCCACCGTCATCAACGGGCAGTTTGTCGAACGATGTTGGACTCATAGTCACTGCCAGAAAGTTTGCCCGACCATCTGTAAGTCACACGGCTGCACCGCCGAAGGCCTCTGTTGCCACAGCGAGTGCCTGGGCAACTGTTCTCAGCCCGACGACCCCACCAAGTGCGTGGCCTGCCGCAACTTCTACCTGGACGGCAGGTGTGTGGAGACCTGCCCGCCCCCGTACTACCACTTCCAGGACTGGCGCTGTGTGAACTTCAGCTTCTGCCAGGACCTGCACCACAAATGCAAGAACTCGCGGAGGCAGGGCTGCCACCAATACGTCATTCACAACAACAAGTGCATCCCTGAGTGTCCCTCCGGGTACACGATGAATTCCAGCAACTTGCTGTGCACCCCATGCCTGGGTCCCTGTCCCAAGGTGTGCCACCTCCTAGAAGGCGAGAAGACCATCGACTCGGTGACGTCTGCCCAGGAGCTCCGAGGATGCACCGTCATCAACGGGAGTCTGATCATCAACATTCGAGGAGGCAACAATCTGGCAGCTGAGCTAGAAGCCAACCTCGGCCTCATTGAAGAAATTTCAGGGTATCTAAAAATCCGCCGATCCTACGCTCTGGTGTCACTTTCCTTCTTCCGGAAGTTACGTCTGATTCGAGGAGAGACCTTGGAAATTGGGAACTACTCCTTCTATGCCTTGGACAACCAGAACCTAAGGCAGCTCTGGGACTGGAGCAAACACAACCTCACCACCACTCAGGGGAAACTCTTCTTCCACTATAACCCCAAACTCTGCTTGTCAGAAATCCACAAGATGGAAGAAGTTTCAGGAACCAAGGGGCGCCAGGAGAGAAACGACATTGCCCTGAAGACCAATGGGGACAAGGCATCCTGTGAAAATGAGTTACTTAAATTTTCTTACATTCGGACATCTTTTGACAAGATCTTGCTGAGATGGGAGCCGTACTGGCCCCCCGACTTCCGAGACCTCTTGGGGTTCATGCTGTTCTACAAAGAGGCCCCTTATCAGAATGTGACGGAGTTCGATGGGCAGGATGCGTGTGGTTCCAACAGTTGGACGGTGGTAGACATTGACCCACCCCTGAGGTCCAACGACCCCAAATCACAGAACCACCCAGGGTGGCTGATGCGGGGTCTCAAGCCCTGGACCCAGTATGCCATCTTTGTGAAGACCCTGGTCACCTTTTCGGATGAACGCCGGACCTATGGGGCCAAGAGTGACATCATTTATGTCCAGACAGATGCCACCAACCCCTCTGTGCCCCTGGATCCAATCTCAGTGTCTAACTCATCATCCCAGATTATTCTGAAGTGGAAACCACCCTCCGACCCCAATGGCAACATCACCCACTACCTGGTTTTCTGGGAGAGGCAGGCGGAAGACAGTGAGCTGTTCGAGCTGGATTATTGCCTCAAAGGGCTGAAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCTCAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAGACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGATTACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGACCCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTGCCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAGCACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACACTTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGCAGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTTGGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCGAAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAGGAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGTGGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGCTCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATTGCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGTATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCTTCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGACGAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTCGGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTGGCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAGGCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCCAAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTACCTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAAGAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAGTTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAAATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGCAAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACCACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAACAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTATCTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAATTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTGCACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGTGAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCACTGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGCTACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTGACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTTCCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCTACCATGTCCAGTGGAGTTCAGAGATCGTTCCTATACATTTCTGTTCATCTTAAGGTGGACTCGTTTGGTTACCAATTTAACTAGTCCTGCAGAGGATTTAACTGTGAACCTGGAGGGCAAGGGGTTTCCACAGTTGCTGCTCCTTTGGGGCAACGACGGTTTCAAACCAGGATTTTGTGTTTTTTCGTTCCCCCCACCCGCCCCCAGCAGATGGAAAGAAAGCACCTGTTTTTACAAATTCTTTTTTTTTTTTTTTTTTTTTTTTTTTTGCTGGTGTCTGAGCTTCAGTATAAAAGACAAAACTTCCTGTTTGTGGAACAAAATTTCGAAAGAAAAAACCAAA	
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	CCAGCTGGAGCCCTGAGTGGCTGAGCTCAGGCCTTCGCAGCATTCTTGGGTGGGAGCAGCCACGGGTCAGCCACAAGGGCCACAGCCATGAATGGCACAGAAGGCCCTAACTTCTACGTGCCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTCACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCACTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTACTACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAGGAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACCCGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCAGCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGATGAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTGCCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACAGCCATCCCACCAG	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fx2tab_output4.tabular	Fri Sep 26 16:48:57 2025 +0000
@@ -0,0 +1,4 @@
+#id	length	GC	GC-Skew	A	AT	alphabet	avg.qual	seq.hash
+ENA|AB011145|AB011145.1	4796	38.47	11.65	1545	61.53	ACGT	0.00	c19cf05cadbdbc26e22efc2201acfcec
+ENA|M10051|M10051.1	4723	55.24	0.50	1068	44.76	ACGT	0.00	4f6bbf79e427ef90b6f31de5023ad241
+ENA|BC112106|BC112106.1	1213	58.78	-15.01	233	41.22	ACGT	0.00	3b099f7df389373bb7e3269efc819599
Binary file test-data/grep_output1.fasta.gz has changed
Binary file test-data/grep_output2.fasta.gz has changed
Binary file test-data/grep_output3.fastq.gz has changed
Binary file test-data/grep_output4.fasta.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/grep_pattern.fasta	Fri Sep 26 16:48:57 2025 +0000
@@ -0,0 +1,2 @@
+ENA|AB011145|AB011145.1
+ENA|BC112106|BC112106.1
Binary file test-data/hairpin.fa.gz has changed
Binary file test-data/head_output1.fastq.gz has changed
Binary file test-data/head_output2.fasta.gz has changed
Binary file test-data/input1.fasta.gz has changed
Binary file test-data/input1.fastq.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/locate_output1.tabular	Fri Sep 26 16:48:57 2025 +0000
@@ -0,0 +1,95 @@
+seqID	patternName	pattern	strand	start	end	matched
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	251	256	ATAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	435	440	AGAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	660	665	ATAGAG
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	741	746	AAAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	852	857	ATAAAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	1142	1147	AAAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	1320	1325	ATAGGT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	1621	1626	ATAAAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	1778	1783	AGAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	1784	1789	ATACAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	1810	1815	ATAAAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	2038	2043	ATGGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	2120	2125	CTAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	2235	2240	ATAGTT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	2243	2248	ATAAAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	2318	2323	CTAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	2527	2532	ATAAAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	2549	2554	AAAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	2603	2608	ATATAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	2651	2656	ATGGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	2726	2731	ATAGTT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	2744	2749	ATAAAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	2902	2907	TTAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	3041	3046	AAAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	3488	3493	ATGGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	3539	3544	TTAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	3675	3680	AAAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	3739	3744	ATGGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	3770	3775	ATAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	3825	3830	AGAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	3862	3867	AAAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	3870	3875	ATAGGT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	4052	4057	ATATAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	4068	4073	AGAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	4080	4085	ACAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	4388	4393	TTAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	4446	4451	ATAGAG
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	4500	4505	AAAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	4563	4568	ATAGAA
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	4607	4612	ATAGAA
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	4675	4680	ATGGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	+	4692	4697	ATGGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	4603	4608	ATAAAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	4459	4464	TTAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	4223	4228	GTAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	4072	4077	ATGGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	4052	4057	ATATAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	3634	3639	ATAAAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	3566	3571	ATAGAC
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	2918	2923	ACAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	2778	2783	ATACAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	2709	2714	ATAAAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	2644	2649	ATAAAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	2603	2608	ATATAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	2531	2536	ATTGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	1734	1739	ATTGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	1601	1606	ACAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	1438	1443	ATAGAA
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	1422	1427	ATAGGT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	1329	1334	ATAGAG
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	1146	1151	ATACAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	1121	1126	ATACAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	811	816	ATAGCT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	764	769	GTAGAT
+ENA|AB011145|AB011145.1	ATAGAT	ATAGAT	-	656	661	ATAGTT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	+	250	255	ATGGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	+	664	669	AAAGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	+	1987	1992	ACAGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	+	3540	3545	AGAGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	+	3561	3566	AGAGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	+	3886	3891	ATGGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	+	4400	4405	AGAGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	+	4413	4418	ATACAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	3688	3693	ATAGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	3065	3070	ATAGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	3061	3066	ATAAAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	3025	3030	AAAGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	2956	2961	ATAGGT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	2828	2833	AGAGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	2719	2724	AAAGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	1975	1980	ATAAAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	1951	1956	ATAGGT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	1909	1914	AAAGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	1659	1664	AAAGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	1417	1422	ATAGAA
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	1319	1324	TTAGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	802	807	ACAGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	691	696	ACAGAT
+ENA|M10051|M10051.1	ATAGAT	ATAGAT	-	415	420	ATAGAC
+ENA|BC112106|BC112106.1	ATAGAT	ATAGAT	+	386	391	ATGGAT
+ENA|BC112106|BC112106.1	ATAGAT	ATAGAT	-	1000	1005	ATAGAT
+ENA|BC112106|BC112106.1	ATAGAT	ATAGAT	-	985	990	GTAGAT
+ENA|BC112106|BC112106.1	ATAGAT	ATAGAT	-	742	747	AAAGAT
+ENA|BC112106|BC112106.1	ATAGAT	ATAGAT	-	700	705	GTAGAT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/locate_output2.bed	Fri Sep 26 16:48:57 2025 +0000
@@ -0,0 +1,202 @@
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	136	139	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	174	177	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	254	257	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	269	272	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	275	278	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	296	299	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	322	325	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	353	356	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	377	380	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	479	482	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	484	487	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	487	490	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	490	493	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	692	695	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	695	698	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	725	728	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	796	799	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	814	817	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	827	830	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	855	858	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	890	893	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	940	943	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	977	980	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1028	1031	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1120	1123	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1124	1127	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1145	1148	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1220	1223	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1265	1268	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1346	1349	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1441	1444	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1510	1513	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1629	1632	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1693	1696	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1709	1712	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1730	1733	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1866	1869	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1902	1905	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1972	1975	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	1991	1994	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2028	2031	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2037	2040	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2041	2044	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2060	2063	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2123	2126	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2147	2150	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2294	2297	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2358	2361	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2578	2581	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2590	2593	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2597	2600	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2606	2609	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2650	2653	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2661	2664	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2701	2704	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2705	2708	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2747	2750	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2777	2780	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2811	2814	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2875	2878	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2889	2892	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2908	2911	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2933	2936	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	2981	2984	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3093	3096	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3125	3128	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3135	3138	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3191	3194	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3278	3281	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3342	3345	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3346	3349	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3451	3454	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3487	3490	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3491	3494	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3554	3557	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3668	3671	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3671	3674	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3678	3681	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3738	3741	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3744	3747	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3753	3756	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3773	3776	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3865	3868	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3925	3928	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	3938	3941	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4000	4003	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4100	4103	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4262	4265	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4377	4380	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4391	4394	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4452	4455	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4483	4486	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4531	4534	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4577	4580	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4613	4616	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4674	4677	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4691	4694	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4750	4753	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4769	4772	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4774	4777	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4788	4791	A[TU]G	0	+
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.	4794	4797	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	78	81	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	138	141	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	249	252	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	280	283	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	330	333	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	384	387	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	418	421	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	510	513	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	546	549	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	586	589	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	667	670	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	761	764	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1013	1016	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1098	1101	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1127	1130	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1214	1217	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1420	1423	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1542	1545	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1603	1606	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1627	1630	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1682	1685	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1728	1731	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1759	1762	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1774	1777	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1783	1786	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1875	1878	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1903	1906	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1939	1942	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1954	1957	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1978	1981	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	1990	1993	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	2083	2086	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	2242	2245	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	2261	2264	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	2434	2437	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	2443	2446	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	2670	2673	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	2689	2692	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	2713	2716	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	2745	2748	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	2770	2773	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	2788	2791	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	2809	2812	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	2815	2818	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3091	3094	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3148	3151	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3158	3161	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3243	3246	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3250	3253	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3259	3262	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3355	3358	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3369	3372	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3444	3447	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3453	3456	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3543	3546	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3552	3555	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3576	3579	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3633	3636	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3643	3646	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3675	3678	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3691	3694	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3744	3747	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3766	3769	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3792	3795	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3862	3865	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3885	3888	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3889	3892	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3942	3945	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3948	3951	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	3972	3975	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	4092	4095	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	4107	4110	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	4114	4117	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	4168	4171	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	4227	4230	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	4383	4386	A[TU]G	0	+
+ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.	4590	4593	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	87	90	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	91	94	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	130	133	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	188	191	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	201	204	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	216	219	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	342	345	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	385	388	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	413	416	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	513	516	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	541	544	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	549	552	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	573	576	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	705	708	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	732	735	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	754	757	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	843	846	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	855	858	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	948	951	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	1008	1011	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	1011	1014	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	1035	1038	A[TU]G	0	+
+ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.	1078	1081	A[TU]G	0	+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/locate_output3.gtf	Fri Sep 26 16:48:57 2025 +0000
@@ -0,0 +1,4 @@
+ENA|AB011145|AB011145.1	SeqKit	location	2270	2276	0	+	.	gene_id "test_motif2"; 
+ENA|AB011145|AB011145.1	SeqKit	location	2642	2648	0	+	.	gene_id "test_motif2"; 
+ENA|AB011145|AB011145.1	SeqKit	location	2655	2661	0	+	.	gene_id "test_motif2"; 
+ENA|AB011145|AB011145.1	SeqKit	location	2771	2777	0	+	.	gene_id "test_motif2"; 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_sequence.fasta	Fri Sep 26 16:48:57 2025 +0000
@@ -0,0 +1,4 @@
+>test_motif
+ATATACTAT
+>test_motif2
+ATATTTA
Binary file test-data/reads_1.fq.gz has changed
Binary file test-data/reads_2.fq.gz has changed
Binary file test-data/sort_output1.fastq.gz has changed
Binary file test-data/sort_output2.fasta.gz has changed
Binary file test-data/sort_output3.fasta.gz has changed
Binary file test-data/sort_output4.fasta.gz has changed
Binary file test-data/sort_output5.fastq.gz has changed
Binary file test-data/sort_output6.fastq.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/stats_output1.tabular	Fri Sep 26 16:48:57 2025 +0000
@@ -0,0 +1,2 @@
+file	format	type	num_seqs	sum_len	min_len	avg_len	max_len	Q1	Q2	Q3	sum_gap	N50	N50_num	Q20(%)	Q30(%)	AvgQual	GC(%)	sum_n
+input1_fastq_gz	FASTQ	DNA	5	505	101	101.0	101	101	101	101	0	101	1	98	89	25.93	35.45	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/stats_output2.tabular	Fri Sep 26 16:48:57 2025 +0000
@@ -0,0 +1,2 @@
+file	format	type	num_seqs	sum_len	min_len	avg_len	max_len	Q1	Q2	Q3	sum_gap	N50	N50_num	Q20(%)	Q30(%)	AvgQual	GC(%)	sum_n
+input1_fasta_gz	FASTA	DNA	3	10732	1213	3577.3	4796	2968	4723	4760	0	4723	2	0	0	0.00	48.15	0
Binary file test-data/translate_output1.fasta.gz has changed
Binary file test-data/translate_output2.fastq.gz has changed
Binary file test-data/translate_output3.fastq.gz has changed