Repository 'smgu_frameshift_deletions_checks'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/smgu_frameshift_deletions_checks

Changeset 1:029d90b0c4f6 (2023-07-14)
Previous changeset 0:f079716f598c (2023-05-31) Next changeset 2:e8971ca74398 (2023-08-04)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/smallgenomeutilities commit e702dcdbc7c3235ef3c4ee8998c7247d1af49465
modified:
frameshift_deletions_checks.xml
macros.xml
test-data/frameshift_deletions_check.tsv
added:
annotations_NC_045512.2.gff3
frameshift_deletions_report_fixer.py
ref_NC_045512.2.fasta
test-data/NC_045512.2.fasta
test-data/frameshift_deletions_check_no_english.tsv
removed:
test-data/NC_045512.2.fasta.gz
b
diff -r f079716f598c -r 029d90b0c4f6 annotations_NC_045512.2.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/annotations_NC_045512.2.gff3 Fri Jul 14 22:07:22 2023 +0000
b
@@ -0,0 +1,36 @@
+##gff-version 3
+#!gff-spec-version 1.21
+#!processor NCBI annotwriter
+##sequence-region NC_045512.2 1 29903
+NC_045512.2 RefSeq five_prime_UTR 1 265 . + . ID=id-NC_045512.2:1..265;gbkey=5'UTR
+NC_045512.2 RefSeq gene 266 21555 . + . ID=gene-GU280_gp01;Dbxref=GeneID:43740578;Name=ORF1ab;gbkey=Gene;gene=ORF1ab;gene_biotype=protein_coding;locus_tag=GU280_gp01
+NC_045512.2 RefSeq CDS 266 13468 . + 1 ID=cds-YP_009724389.1;Parent=gene-GU280_gp01;Dbxref=Genbank:YP_009724389.1,GeneID:43740578;Name=YP_009724389.1;Note=pp1ab%3B translated by -1 ribosomal frameshift;exception=ribosomal slippage;gbkey=CDS;gene=ORF1ab;locus_tag=GU280_gp01;product=ORF1ab polyprotein;protein_id=YP_009724389.1
+NC_045512.2 RefSeq CDS 13468 21555 . + 1 ID=cds-YP_009724389.1;Parent=gene-GU280_gp01;Dbxref=Genbank:YP_009724389.1,GeneID:43740578;Name=YP_009724389.1;Note=pp1ab%3B translated by -1 ribosomal frameshift;exception=ribosomal slippage;gbkey=CDS;gene=ORF1ab;locus_tag=GU280_gp01;product=ORF1ab polyprotein;protein_id=YP_009724389.1
+NC_045512.2 RefSeq CDS 266 13483 . + 1 ID=cds-YP_009725295.1;Parent=gene-GU280_gp01;Dbxref=Genbank:YP_009725295.1,GeneID:43740578;Name=YP_009725295.1;Note=pp1a;gbkey=CDS;gene=ORF1ab;locus_tag=GU280_gp01;product=ORF1a polyprotein;protein_id=YP_009725295.1
+NC_045512.2 RefSeq stem_loop 13476 13503 . + . ID=id-GU280_gp01;Dbxref=GeneID:43740578;function=Coronavirus frameshifting stimulation element stem-loop 1;gbkey=stem_loop;gene=ORF1ab;inference=COORDINATES: profile:Rfam-release-14.1:RF00507%2CInfernal:1.1.2;locus_tag=GU280_gp01
+NC_045512.2 RefSeq stem_loop 13488 13542 . + . ID=id-GU280_gp01-2;Dbxref=GeneID:43740578;function=Coronavirus frameshifting stimulation element stem-loop 2;gbkey=stem_loop;gene=ORF1ab;inference=COORDINATES: profile:profile:Rfam-release-14.1:RF00507%2CInfernal:1.1.2;locus_tag=GU280_gp01
+NC_045512.2 RefSeq gene 21563 25384 . + . ID=gene-GU280_gp02;Dbxref=GeneID:43740568;Name=S;gbkey=Gene;gene=S;gene_biotype=protein_coding;gene_synonym=spike glycoprotein;locus_tag=GU280_gp02
+NC_045512.2 RefSeq CDS 21563 25384 . + 1 ID=cds-YP_009724390.1;Parent=gene-GU280_gp02;Dbxref=Genbank:YP_009724390.1,GeneID:43740568;Name=YP_009724390.1;Note=structural protein%3B spike protein;gbkey=CDS;gene=S;locus_tag=GU280_gp02;product=surface glycoprotein;protein_id=YP_009724390.1
+NC_045512.2 RefSeq gene 25393 26220 . + . ID=gene-GU280_gp03;Dbxref=GeneID:43740569;Name=ORF3a;gbkey=Gene;gene=ORF3a;gene_biotype=protein_coding;locus_tag=GU280_gp03
+NC_045512.2 RefSeq CDS 25393 26220 . + 1 ID=cds-YP_009724391.1;Parent=gene-GU280_gp03;Dbxref=Genbank:YP_009724391.1,GeneID:43740569;Name=YP_009724391.1;gbkey=CDS;gene=ORF3a;locus_tag=GU280_gp03;product=ORF3a protein;protein_id=YP_009724391.1
+NC_045512.2 RefSeq gene 26245 26472 . + . ID=gene-GU280_gp04;Dbxref=GeneID:43740570;Name=E;gbkey=Gene;gene=E;gene_biotype=protein_coding;locus_tag=GU280_gp04
+NC_045512.2 RefSeq CDS 26245 26472 . + 1 ID=cds-YP_009724392.1;Parent=gene-GU280_gp04;Dbxref=Genbank:YP_009724392.1,GeneID:43740570;Name=YP_009724392.1;Note=ORF4%3B structural protein%3B E protein;gbkey=CDS;gene=E;locus_tag=GU280_gp04;product=envelope protein;protein_id=YP_009724392.1
+NC_045512.2 RefSeq gene 26523 27191 . + . ID=gene-GU280_gp05;Dbxref=GeneID:43740571;Name=M;gbkey=Gene;gene=M;gene_biotype=protein_coding;locus_tag=GU280_gp05
+NC_045512.2 RefSeq CDS 26523 27191 . + 1 ID=cds-YP_009724393.1;Parent=gene-GU280_gp05;Dbxref=Genbank:YP_009724393.1,GeneID:43740571;Name=YP_009724393.1;Note=ORF5%3B structural protein;gbkey=CDS;gene=M;locus_tag=GU280_gp05;product=membrane glycoprotein;protein_id=YP_009724393.1
+NC_045512.2 RefSeq gene 27202 27387 . + . ID=gene-GU280_gp06;Dbxref=GeneID:43740572;Name=ORF6;gbkey=Gene;gene=ORF6;gene_biotype=protein_coding;locus_tag=GU280_gp06
+NC_045512.2 RefSeq CDS 27202 27387 . + 1 ID=cds-YP_009724394.1;Parent=gene-GU280_gp06;Dbxref=Genbank:YP_009724394.1,GeneID:43740572;Name=YP_009724394.1;gbkey=CDS;gene=ORF6;locus_tag=GU280_gp06;product=ORF6 protein;protein_id=YP_009724394.1
+NC_045512.2 RefSeq gene 27394 27759 . + . ID=gene-GU280_gp07;Dbxref=GeneID:43740573;Name=ORF7a;gbkey=Gene;gene=ORF7a;gene_biotype=protein_coding;locus_tag=GU280_gp07
+NC_045512.2 RefSeq CDS 27394 27759 . + 1 ID=cds-YP_009724395.1;Parent=gene-GU280_gp07;Dbxref=Genbank:YP_009724395.1,GeneID:43740573;Name=YP_009724395.1;gbkey=CDS;gene=ORF7a;locus_tag=GU280_gp07;product=ORF7a protein;protein_id=YP_009724395.1
+NC_045512.2 RefSeq gene 27756 27887 . + . ID=gene-GU280_gp08;Dbxref=GeneID:43740574;Name=ORF7b;gbkey=Gene;gene=ORF7b;gene_biotype=protein_coding;locus_tag=GU280_gp08
+NC_045512.2 RefSeq CDS 27756 27887 . + 1 ID=cds-YP_009725318.1;Parent=gene-GU280_gp08;Dbxref=Genbank:YP_009725318.1,GeneID:43740574;Name=YP_009725318.1;gbkey=CDS;gene=ORF7b;locus_tag=GU280_gp08;product=ORF7b;protein_id=YP_009725318.1
+NC_045512.2 RefSeq gene 27894 28259 . + . ID=gene-GU280_gp09;Dbxref=GeneID:43740577;Name=ORF8;gbkey=Gene;gene=ORF8;gene_biotype=protein_coding;locus_tag=GU280_gp09
+NC_045512.2 RefSeq CDS 27894 28259 . + 1 ID=cds-YP_009724396.1;Parent=gene-GU280_gp09;Dbxref=Genbank:YP_009724396.1,GeneID:43740577;Name=YP_009724396.1;gbkey=CDS;gene=ORF8;locus_tag=GU280_gp09;product=ORF8 protein;protein_id=YP_009724396.1
+NC_045512.2 RefSeq gene 28274 29533 . + . ID=gene-GU280_gp10;Dbxref=GeneID:43740575;Name=N;gbkey=Gene;gene=N;gene_biotype=protein_coding;locus_tag=GU280_gp10
+NC_045512.2 RefSeq CDS 28274 29533 . + 1 ID=cds-YP_009724397.2;Parent=gene-GU280_gp10;Dbxref=Genbank:YP_009724397.2,GeneID:43740575;Name=YP_009724397.2;Note=ORF9%3B structural protein;gbkey=CDS;gene=N;locus_tag=GU280_gp10;product=nucleocapsid phosphoprotein;protein_id=YP_009724397.2
+NC_045512.2 RefSeq gene 29558 29674 . + . ID=gene-GU280_gp11;Dbxref=GeneID:43740576;Name=ORF10;gbkey=Gene;gene=ORF10;gene_biotype=protein_coding;locus_tag=GU280_gp11
+NC_045512.2 RefSeq CDS 29558 29674 . + 1 ID=cds-YP_009725255.1;Parent=gene-GU280_gp11;Dbxref=Genbank:YP_009725255.1,GeneID:43740576;Name=YP_009725255.1;gbkey=CDS;gene=ORF10;locus_tag=GU280_gp11;product=ORF10 protein;protein_id=YP_009725255.1
+NC_045512.2 RefSeq stem_loop 29609 29644 . + . ID=id-GU280_gp11;Dbxref=GeneID:43740576;function=Coronavirus 3' UTR pseudoknot stem-loop 1;gbkey=stem_loop;gene=ORF10;inference=COORDINATES: profile::Rfam-release-14.1:RF00165%2CInfernal:1.1.2;locus_tag=GU280_gp11
+NC_045512.2 RefSeq stem_loop 29629 29657 . + . ID=id-GU280_gp11-2;Dbxref=GeneID:43740576;function=Coronavirus 3' UTR pseudoknot stem-loop 2;gbkey=stem_loop;gene=ORF10;inference=COORDINATES: profile::Rfam-release-14.1:RF00165%2CInfernal:1.1.2;locus_tag=GU280_gp11
+NC_045512.2 RefSeq three_prime_UTR 29675 29903 . + . ID=id-NC_045512.2:29675..29903;gbkey=3'UTR
+NC_045512.2 RefSeq stem_loop 29728 29768 . + . ID=id-NC_045512.2:29728..29768;Note=basepair exception: alignment to the Rfam model implies coordinates 29740:29758 form a noncanonical C:T basepair%2C but the homologous positions form a highly conserved C:G basepair in other viruses%2C including SARS (NC_004718.3);function=Coronavirus 3' stem-loop II-like motif (s2m);gbkey=stem_loop;inference=COORDINATES: profile:Rfam-release-14.1:RF00164%2CInfernal:1.1.2
+###
b
diff -r f079716f598c -r 029d90b0c4f6 frameshift_deletions_checks.xml
--- a/frameshift_deletions_checks.xml Wed May 31 17:10:11 2023 +0000
+++ b/frameshift_deletions_checks.xml Fri Jul 14 22:07:22 2023 +0000
[
b'@@ -11,52 +11,104 @@\n     <command detect_errors="exit_code">\n     <![CDATA[\n #if $input.is_of_type("cram"):\n-echo \'is CRAM\' >&2 &&\n- ln -vs \'$input\' \'input.${input.ext}\' >&2 &&\n- ln -vs \'$input.metadata.cram_index\' \'input.${input.ext}.crai\' >&2 &&\n+ln -s \'$input\' input.cram &&\n+ln -s \'$input.metadata.cram_index\' input.cram.crai &&\n #elif $input.is_of_type("bam"):\n- echo \'is BAM\' &&\n- ln -s \'$input\' \'input.${input.ext}\' >&2 &&\n- ln -s \'$input.metadata.bam_index\' \'input.${input.ext}.bai\' >&2 &&\n+ln -s \'$input\' input.bam &&\n+ln -s \'$input.metadata.bam_index\' input.bam.bai &&\n #else:\n # raise TypeError(\'Unknown input alignment type ${input.ext}\')\n #end if\n-ln -vs \'$consensus\' \'consensus.${consensus.ext}\' >&2 &&\n-ln -vs \'$reference\' \'reference.${reference.ext}\' >&2 &&\n+ln -s \'$consensus\' consensus.fasta &&\n+#if str($ref_data.choice) == \'custom\':\n+ln -s \'$reference\' reference.fa &&\n+#else:\n+ln -s \'$__tool_directory__/ref_NC_045512.2.fasta\' reference.fa &&\n+#end if\n \n frameshift_deletions_checks \n-    --input=\'input.${input.ext}\'\n-    --consensus=\'consensus.${consensus.ext}\'\n-    --reference=\'reference.${reference.ext}\'\n-    --genes=\'$genes\'\n-    --output=\'$report\' \n-#if $orf1ab != \'\':\n-    --orf1ab=\'$orf1ab\'\n+    --input=input.${input.ext}\n+    --consensus=consensus.fasta\n+    --reference=reference.fa\n+#if str($ref_data.choice) == \'standard\':\n+    --genes=\'$__tool_directory__\'/annotations_NC_045512.2.gff3\n+    --orf1ab=\'cds-YP_009724389.1\'\n+#else:\n+    --genes=\'$ref_data.genes\'\n+    --orf1ab=\'$ref_data.orf1ab\'\n #end if\n-    \'$english\'\n-    $zero_based\n+    $out_options.english\n+    $out_options.zero_based\n+    --output=report.tsv &&\n+python \'$__tool_directory__/frameshift_deletions_report_fixer.py\' report.tsv \'$report\'\n ]]>\n     <!-- ##cores \\${GALAXY_SLOTS:-4} -->\n     </command>\n     <inputs>\n         <param argument="--consensus" type="data" format="fasta" label="Consensus" help="Fasta file containing the sample\'s consensus sequence (majority, with indels)" />\n         <param argument="--input" type="data" format="bam,cram" label="Input BAM" help="Input BAM file with sample\'s sequencing reads, aligned against the reference" />\n-        <param argument="--reference" type="data" format="fasta" label="Reference" help="Fasta file containing the reference sequence (used during alignment) to compare against" />\n-        <param argument="--genes" type="data" format="gff" label="Genes GFF" help="GFF file listing genes positions on the reference sequence" />\n-        \n-        <param argument="--english" type="boolean" truevalue="--english" falsevalue="--no-english" checked="true" optional="true" label="Write Summary Diagnosis?" help="If checked writes english summary diagnosis." />\n-        <param argument="--zero-based" type="boolean" truevalue="--zero-based" falsevalue="" checked="false" optional="true" label="Use 0-based Sequence Positions?" help="Use 0-based (python) instead of 1-based (standard) seq positions" />\n-        <param argument="--orf1ab" type="text" value="cds-YP_009724389.1" optional="true" label="ORF1AB" help="CDS ID for the full Orf1ab CDS, comprising the ribosomal shift. In the GFF this CDS should consist of 2 entries with the same CDS ID due to the partial overlap caused by the ribosomal shift at translation time."/>\n+        <conditional name="ref_data">\n+            <param name="choice" type="select" label="Reference data selection" help="Select built-in genome files to base reported positions and annotations on the SARS-CoV-2 reference sequence NC_045512.2. If you have mapped to a different reference, select custom genome files and provide the reference sequence and genomic feature annotations for it in fasta and gff format, repsectively.">\n+                <option value="standard">Use built-in genome files</option>\n+                <option value="custom">Provide custom genome files</option>\n+            </param>\n+            <when value="standard" />\n+            <when value="custom">\n+                <param ar'..b'evalue="--zero-based" falsevalue="" checked="false" optional="true" label="Use 0-based Sequence Positions?" help="Use 0-based instead of 1-based genome positions" />\n+        </section>\n     </inputs>\n     <outputs>\n-        <data name="report" format="tabular" />\n+        <data name="report" format="tabular">\n+            <actions>\n+                <conditional name="out_options.english">\n+                    <!-- The "english" flag removes certain numerical columns and collapses them into new text columns -->\n+                    <when value="--english">\n+                        <action name="column_names" type="metadata" default="ref_id,start_position,length,VARIANT,gene_region,reads_all,reads_fwd,reads_rev,deletions,freq_del,freq_del_fwd,freq_del_rev,deletions_fwd,deletions_rev,insertions,freq_insert,freq_insert_fwd,freq_insert_rev,insertions_fwd,insertions_rev,stops,freq_stop,freq_stop_fwd,freq_stop_rev,stops_fwd,stops_rev,matches_ref,pos_critical_inserts,pos_critical_dels,homopolymeric,ref_base,cons_id,variant_position_english,variant_diagnosis" />\n+                    </when>\n+                    <when value="--no-english">\n+                        <action name="column_names" type="metadata" default="ref_id,start_position,length,VARIANT,gene_region,aa_position,stop_mismatches,stoploss_nt,reads_all,reads_fwd,reads_rev,deletions,freq_del,freq_del_fwd,freq_del_rev,deletions_fwd,deletions_rev,insertions,freq_insert,freq_insert_fwd,freq_insert_rev,insertions_fwd,insertions_rev,stops,freq_stop,freq_stop_fwd,freq_stop_rev,stops_fwd,stops_rev,matches_ref,pos_critical_inserts,pos_critical_dels,homopolymeric,ref_base,cons_id" />\n+                    </when>\n+                </conditional>\n+            </actions>\n+        </data>\n     </outputs>\n     <tests>\n         <test>\n             <param name="consensus" value="consensus.bcftools.fasta.gz" />\n             <param name="input" value="REF_aln_trim.cram" />\n-            <param name="reference" value="NC_045512.2.fasta.gz" />\n-            <param name="genes" value="Genes_NC_045512.2.GFF3" />\n+            <conditional name="ref_data">\n+                <param name="choice" value="standard" />\n+            </conditional>\n+            <output name="report" value="frameshift_deletions_check.tsv" />\n+        </test>\n+        <test>\n+            <param name="consensus" value="consensus.bcftools.fasta.gz" />\n+            <param name="input" value="REF_aln_trim.cram" />\n+            <conditional name="ref_data">\n+                <param name="choice" value="standard" />\n+            </conditional>\n+            <section name="out_options">\n+                <param name="english" value="false" />\n+            </section>\n+            <output name="report" value="frameshift_deletions_check_no_english.tsv" />\n+        </test>\n+        <test>\n+            <param name="consensus" value="consensus.bcftools.fasta.gz" />\n+            <param name="input" value="REF_aln_trim.cram" />\n+            <conditional name="ref_data">\n+                <param name="choice" value="custom" />\n+                <param name="reference" value="NC_045512.2.fasta" />\n+                <param name="genes" value="Genes_NC_045512.2.GFF3" />\n+            </conditional>\n             <output name="report" value="frameshift_deletions_check.tsv" />\n         </test>\n     </tests>\n@@ -71,8 +123,8 @@\n \n * *gene_region*: Gene in which the deletion is found according to ``--genes`` argument;\n * *reads_all*: Total number of reads covering the indel;\n-* *reads_fwd*: Total nubmer of forward reads covering the indel;\n-* *reads_rev*: Total nubmer of reverse reads covering the indel;\n+* *reads_fwd*: Total number of forward reads covering the indel;\n+* *reads_rev*: Total number of reverse reads covering the indel;\n * *deletions/insertions*: Number of reads supporting the deletion/insertion;\n * *freq_del/freq_insert*: Fraction of reads supporting the deletion/insertion;\n * *matches_ref*: number of reads that matche with the reference base;\n'
b
diff -r f079716f598c -r 029d90b0c4f6 frameshift_deletions_report_fixer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/frameshift_deletions_report_fixer.py Fri Jul 14 22:07:22 2023 +0000
[
@@ -0,0 +1,35 @@
+"""Polish the output of the frameshift_deletions_check command.
+
+- Drops the first index column, which is rather pointless to include
+- Turns ref bases printed as literal bytes strings into plain output
+- Removes [] around pos lists and spaces after comma separating list elements
+- Turns None and empty list values into . as a cell placeholder
+"""
+
+import re
+import sys
+
+
+def matchrepl(matchobj):
+    bytes_string_content = matchobj.group(1)
+    if bytes_string_content is not None:
+        return bytes_string_content
+    list_content = matchobj.group(2)
+    if list_content is not None:
+        if list_content == '':
+            return '.'
+        return list_content.replace(', ', ',')
+    none_cell = matchobj.group(3)
+    if none_cell is not None:
+        return '\t.\t'
+
+    raise ValueError('Error in regex parsing code')
+
+
+if __name__ == '__main__':
+    regex = re.compile(r"b'(.+)'|\[([^\]]*)\]|\t(None)\t")
+    with open(sys.argv[1]) as i:
+        with open(sys.argv[2], 'w') as o:
+            for line in i:
+                line = line[line.index('\t') + 1:]
+                o.write(regex.sub(matchrepl, line))
b
diff -r f079716f598c -r 029d90b0c4f6 macros.xml
--- a/macros.xml Wed May 31 17:10:11 2023 +0000
+++ b/macros.xml Fri Jul 14 22:07:22 2023 +0000
b
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">0.3.9</token>
-    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
     <token name="@PROFILE@">21.09</token>
     <xml name="xrefs">
         <xrefs>
b
diff -r f079716f598c -r 029d90b0c4f6 ref_NC_045512.2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ref_NC_045512.2.fasta Fri Jul 14 22:07:22 2023 +0000
b
b'@@ -0,0 +1,430 @@\n+>NC_045512.2 Wuhan seafood market pneumonia virus isolate Wuhan-Hu-1, complete genome\n+ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAA\n+CGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAAC\n+TAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTG\n+TTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTC\n+CCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC\n+GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGG\n+CTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGAT\n+GCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTC\n+GTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCT\n+TCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTA\n+GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTG\n+TTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGG\n+CCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTG\n+TCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTG\n+CTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA\n+ATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAA\n+CCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCAC\n+CAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCA\n+GACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACT\n+ACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG\n+GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCG\n+CACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCA\n+CGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACA\n+ACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGA\n+GATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT\n+TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAG\n+GTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCG\n+TGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCC\n+GCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTG\n+ATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG\n+GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTT\n+AAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAA\n+TTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGT\n+AAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTA\n+GGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC\n+TACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTT\n+AACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAA\n+GCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGT\n+ACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAA\n+GGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTT\n+GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAA\n+ATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC\n+ACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAA\n+TTGGCTTCACATATGTATTGTTCTTTCTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGAAG\n+AAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT\n+TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAA\n+CAAACTGTTGGTCAACAAGACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTC\n+AACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTT\n+AAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACA\n+GTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA\n+CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCACTTAAAGTGGGTGGTAG\n+TTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGT\n+GAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTAT\n+TATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAA\n+TGTCT'..b'GACTATTACCAGCTGTACTCA\n+ACTCAATTGAGTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGC\n+CTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAAT\n+TTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTAC\n+TCATTCGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTAT\n+TCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGT\n+GAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGAT\n+CTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCATGGCAGA\n+TTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTC\n+CTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTA\n+AGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAAT\n+AAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTC\n+ATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTC\n+TCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGT\n+GATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAA\n+GAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTG\n+ACTCAGGTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAG\n+CAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTAC\n+TATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATA\n+AACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAAC\n+CAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGA\n+GCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACA\n+TACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAAT\n+TTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACT\n+GTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTT\n+ATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTG\n+CTTTTTAGCCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAA\n+GATCATAATGAAACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTG\n+TAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCC\n+GTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAA\n+TTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCT\n+GTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGA\n+AGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTAAAATGTCTG\n+ATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAG\n+TAACCAGAATGGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACT\n+GCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTC\n+CAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGG\n+TGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCT\n+GGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAA\n+AAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAAC\n+ATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGT\n+AGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAATGGCTGGCA\n+ATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGG\n+TAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGG\n+CAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCC\n+AAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACA\n+ATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACG\n+TGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGC\n+TGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGC\n+TGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGAT\n+TTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTCAACTCAGGCCTAAACTCATG\n+CAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTT\n+GTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCT\n+TTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTAC\n+GATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAAT\n+TTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGAATGACAAAAAAAAAAAAAAAAAAAA\n+AAAAAAAAAAAAA\n+\n'
b
diff -r f079716f598c -r 029d90b0c4f6 test-data/NC_045512.2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/NC_045512.2.fasta Fri Jul 14 22:07:22 2023 +0000
b
b'@@ -0,0 +1,430 @@\n+>NC_045512.2 Wuhan seafood market pneumonia virus isolate Wuhan-Hu-1, complete genome\n+ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAA\n+CGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAAC\n+TAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTG\n+TTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTC\n+CCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC\n+GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGG\n+CTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGAT\n+GCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTC\n+GTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCT\n+TCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTA\n+GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTG\n+TTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGG\n+CCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTG\n+TCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTG\n+CTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA\n+ATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAA\n+CCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCAC\n+CAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCA\n+GACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACT\n+ACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG\n+GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCG\n+CACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCA\n+CGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACA\n+ACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGA\n+GATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT\n+TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAG\n+GTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCG\n+TGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCC\n+GCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTG\n+ATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG\n+GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTT\n+AAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAA\n+TTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGT\n+AAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTA\n+GGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC\n+TACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTT\n+AACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAA\n+GCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGT\n+ACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAA\n+GGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTT\n+GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAA\n+ATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC\n+ACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAA\n+TTGGCTTCACATATGTATTGTTCTTTCTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGAAG\n+AAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT\n+TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAA\n+CAAACTGTTGGTCAACAAGACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTC\n+AACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTT\n+AAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACA\n+GTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA\n+CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCACTTAAAGTGGGTGGTAG\n+TTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGT\n+GAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTAT\n+TATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAA\n+TGTCT'..b'GACTATTACCAGCTGTACTCA\n+ACTCAATTGAGTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGC\n+CTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAAT\n+TTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTAC\n+TCATTCGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTAT\n+TCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGT\n+GAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGAT\n+CTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCATGGCAGA\n+TTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTC\n+CTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTA\n+AGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAAT\n+AAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTC\n+ATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTC\n+TCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGT\n+GATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAA\n+GAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTG\n+ACTCAGGTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAG\n+CAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTAC\n+TATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATA\n+AACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAAC\n+CAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGA\n+GCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACA\n+TACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAAT\n+TTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACT\n+GTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTT\n+ATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTG\n+CTTTTTAGCCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAA\n+GATCATAATGAAACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTG\n+TAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCC\n+GTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAA\n+TTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCT\n+GTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGA\n+AGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTAAAATGTCTG\n+ATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAG\n+TAACCAGAATGGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACT\n+GCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTC\n+CAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGG\n+TGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCT\n+GGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAA\n+AAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAAC\n+ATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGT\n+AGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAATGGCTGGCA\n+ATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGG\n+TAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGG\n+CAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCC\n+AAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACA\n+ATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACG\n+TGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGC\n+TGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGC\n+TGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGAT\n+TTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTCAACTCAGGCCTAAACTCATG\n+CAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTT\n+GTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCT\n+TTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTAC\n+GATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAAT\n+TTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGAATGACAAAAAAAAAAAAAAAAAAAA\n+AAAAAAAAAAAAA\n+\n'
b
diff -r f079716f598c -r 029d90b0c4f6 test-data/NC_045512.2.fasta.gz
b
Binary file test-data/NC_045512.2.fasta.gz has changed
b
diff -r f079716f598c -r 029d90b0c4f6 test-data/frameshift_deletions_check.tsv
--- a/test-data/frameshift_deletions_check.tsv Wed May 31 17:10:11 2023 +0000
+++ b/test-data/frameshift_deletions_check.tsv Fri Jul 14 22:07:22 2023 +0000
[
b"@@ -1,140 +1,140 @@\n-\tref_id\tstart_position\tlength\tVARIANT\tgene_region\treads_all\treads_fwd\treads_rev\tdeletions\tfreq_del\tfreq_del_fwd\tfreq_del_rev\tdeletions_fwd\tdeletions_rev\tinsertions\tfreq_insert\tfreq_insert_fwd\tfreq_insert_rev\tinsertions_fwd\tinsertions_rev\tstops\tfreq_stop\tfreq_stop_fwd\tfreq_stop_rev\tstops_fwd\tstops_rev\tmatches_ref\tpos_critical_inserts\tpos_critical_dels\thomopolymeric\tref_base\tcons_id\tvariant_position_english\tvariant_diagnosis\n-0\tNC_045512.2\t5600\t2\tdeletion\tORF1ab\t40\t40\t0\t23\t0.575\t0.575\t0\t23\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t17\t[21101]\t[8740, 18800]\t1\tb'T'\tNC_045512.2\tGap of 2 nucleotide(s) found at refpos 5600\tonly fwd or rev reads available, deletion supported by the majority of them; homopolymeric; neighboring indels may restore reading frame\n-1\tNC_045512.2\t5602\t3\tstopgain\tORF1ab\t40\t40\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t40\t1.0\t1.0\t0\t40\t0\t40\t[]\t[]\t0\tb'T'\tNC_045512.2\tEarly Stopgain found at refpos 5602, aminoacid position 1779\tonly fwd or rev reads available, stopgain supported by the majority of them\n-2\tNC_045512.2\t5635\t3\tstopgain\tORF1ab\t40\t40\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t40\t1.0\t1.0\t0\t40\t0\t40\t[]\t[]\t0\tb'T'\tNC_045512.2\tEarly Stopgain found at refpos 5635, aminoacid position 1790\tonly fwd or rev reads available, stopgain supported by the majority of them\n-3\tNC_045512.2\t5707\t3\tstopgain\tORF1ab\t63\t39\t24\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t63\t1.0\t1.0\t1.0\t39\t24\t63\t[]\t[]\t0\tb'T'\tNC_045512.2\tEarly Stopgain found at refpos 5707, aminoacid position 1814\tstopgain supported by majority of fwd and rev reads\n-4\tNC_045512.2\t5713\t3\tstopgain\tORF1ab\t72\t39\t33\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t72\t1.0\t1.0\t1.0\t39\t33\t72\t[]\t[]\t0\tb'T'\tNC_045512.2\tEarly Stopgain found at refpos 5713, aminoacid position 1816\tstopgain supported by majority of fwd and rev reads\n-5\tNC_045512.2\t5737\t3\tstopgain\tORF1ab\t79\t39\t40\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t79\t1.0\t1.0\t1.0\t39\t40\t79\t[]\t[]\t0\tb'T'\tNC_045512.2\tEarly Stopgain found at refpos 5737, aminoacid position 1824\tstopgain supported by majority of fwd and rev reads\n-6\tNC_045512.2\t5740\t3\tstopgain\tORF1ab\t79\t39\t40\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t79\t1.0\t1.0\t1.0\t39\t40\t79\t[]\t[]\t0\tb'T'\tNC_045512.2\tEarly Stopgain found at refpos 5740, aminoacid position 1825\tstopgain supported by majority of fwd and rev reads\n-7\tNC_045512.2\t5752\t3\tstopgain\tORF1ab\t79\t39\t40\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t79\t1.0\t1.0\t1.0\t39\t40\t79\t[]\t[]\t0\tb'T'\tNC_045512.2\tEarly Stopgain found at refpos 5752, aminoacid position 1829\tstopgain supported by majority of fwd and rev reads\n-8\tNC_045512.2\t5773\t3\tstopgain\tORF1ab\t78\t38\t40\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t78\t1.0\t1.0\t1.0\t38\t40\t78\t[]\t[]\t0\tb'T'\tNC_045512.2\tEarly Stopgain found at refpos 5773, aminoacid position 1836\tstopgain supported by majority of fwd and rev reads\n-9\tNC_045512.2\t5788\t3\tstopgain\tORF1ab\t74\t34\t40\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t74\t1.0\t1.0\t1.0\t34\t40\t74\t[]\t[]\t0\tb'T'\tNC_045512.2\tEarly Stopgain found at refpos 5788, aminoacid position 1841\tstopgain supported by majority of fwd and rev reads\n-10\tNC_045512.2\t5911\t3\tstopgain\tORF1ab\t45\t5\t40\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t45\t1.0\t1.0\t1.0\t5\t40\t45\t[]\t[]\t0\tb'T'\tNC_045512.2\tEarly Stopgain found at refpos 5911, aminoacid position 1882\tstopgain supported by majority of fwd and rev reads\n-11\tNC_045512.2\t6043\t3\tstopgain\tORF1ab\t10\t5\t5\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t10\t1.0\t1.0\t1.0\t5\t5\t10\t[]\t[]\t0\tb'T'\tNC_045512.2\tEarly Stopgain found at refpos 6043, aminoacid position 1926\tstopgain supported by majority of fwd and rev reads\n-12\tNC_045512.2\t6049\t3\tstopgain\tORF1ab\t10\t5\t5\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t10\t1.0\t1.0\t1.0\t5\t5\t10\t[]\t[]\t0\tb'T'\tNC_045512.2\tEarly Stopgain found at refpos 6049, aminoacid position 1928\tstopgain supported by majority of fwd and rev reads\n-13\tNC_045512.2\t6061\t3\tstopgain\tORF1ab\t10\t5\t5\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t10\t1.0\t1.0\t1.0\t5\t5\t10\t[]\t[]\t0\tb'T'\tNC_045512.2\tEarly Stopgain found at refpos 6061, aminoacid position 1932\tstopgain supported by majority of fwd and rev reads\n-14\tNC_045512.2\t6064\t3\t"..b'y the majority of them\n+NC_045512.2\t20780\t3\tstopgain\tORF1ab\t250\t0\t250\t0\t0.0\t0\t0.0\t0\t0\t0\t0.0\t0\t0.0\t0\t0\t250\t1.0\t0\t1.0\t0\t250\t250\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 20780, aminoacid position 6838\tonly fwd or rev reads available, stopgain supported by the majority of them\n+NC_045512.2\t20783\t3\tstopgain\tORF1ab\t250\t0\t250\t0\t0.0\t0\t0.0\t0\t0\t0\t0.0\t0\t0.0\t0\t0\t250\t1.0\t0\t1.0\t0\t250\t250\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 20783, aminoacid position 6839\tonly fwd or rev reads available, stopgain supported by the majority of them\n+NC_045512.2\t20819\t3\tstopgain\tORF1ab\t275\t24\t251\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t275\t1.0\t1.0\t1.0\t24\t251\t275\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 20819, aminoacid position 6851\tstopgain supported by majority of fwd and rev reads\n+NC_045512.2\t20828\t3\tstopgain\tORF1ab\t275\t24\t251\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t274\t0.9963636363636363\t1.0\t0.9960159362549801\t24\t250\t274\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 20828, aminoacid position 6854\tstopgain supported by majority of fwd and rev reads\n+NC_045512.2\t20834\t3\tstopgain\tORF1ab\t275\t24\t251\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t275\t1.0\t1.0\t1.0\t24\t251\t275\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 20834, aminoacid position 6856\tstopgain supported by majority of fwd and rev reads\n+NC_045512.2\t20852\t3\tstopgain\tORF1ab\t275\t24\t251\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t275\t1.0\t1.0\t1.0\t24\t251\t275\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 20852, aminoacid position 6862\tstopgain supported by majority of fwd and rev reads\n+NC_045512.2\t20912\t3\tstopgain\tORF1ab\t24\t24\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t24\t1.0\t1.0\t0\t24\t0\t24\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 20912, aminoacid position 6882\tonly fwd or rev reads available, stopgain supported by the majority of them\n+NC_045512.2\t20990\t3\tstopgain\tORF1ab\t48\t24\t24\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t48\t1.0\t1.0\t1.0\t24\t24\t48\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 20990, aminoacid position 6908\tstopgain supported by majority of fwd and rev reads\n+NC_045512.2\t21140\t3\tstopgain\tORF1ab\t41\t17\t24\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t41\t1.0\t1.0\t1.0\t17\t24\t41\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 21140, aminoacid position 6958\tstopgain supported by majority of fwd and rev reads\n+NC_045512.2\t21164\t3\tstopgain\tORF1ab\t17\t17\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t17\t1.0\t1.0\t0\t17\t0\t17\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 21164, aminoacid position 6966\tonly fwd or rev reads available, stopgain supported by the majority of them\n+NC_045512.2\t21170\t3\tstopgain\tORF1ab\t17\t17\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t17\t1.0\t1.0\t0\t17\t0\t17\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 21170, aminoacid position 6968\tonly fwd or rev reads available, stopgain supported by the majority of them\n+NC_045512.2\t21248\t3\tstopgain\tORF1ab\t34\t17\t17\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t34\t1.0\t1.0\t1.0\t17\t17\t34\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 21248, aminoacid position 6994\tstopgain supported by majority of fwd and rev reads\n+NC_045512.2\t21275\t3\tstopgain\tORF1ab\t34\t17\t17\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t34\t1.0\t1.0\t1.0\t17\t17\t34\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 21275, aminoacid position 7003\tstopgain supported by majority of fwd and rev reads\n+NC_045512.2\t21314\t3\tstopgain\tORF1ab\t31\t14\t17\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t31\t1.0\t1.0\t1.0\t14\t17\t31\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 21314, aminoacid position 7016\tstopgain supported by majority of fwd and rev reads\n+NC_045512.2\t21398\t3\tstopgain\tORF1ab\t21\t4\t17\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t21\t1.0\t1.0\t1.0\t4\t17\t21\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 21398, aminoacid position 7044\tstopgain supported by majority of fwd and rev reads\n+NC_045512.2\t21419\t3\tstopgain\tORF1ab\t21\t4\t17\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t21\t1.0\t1.0\t1.0\t4\t17\t21\t.\t.\t0\tT\tNC_045512.2\tEarly Stopgain found at refpos 21419, aminoacid position 7051\tstopgain supported by majority of fwd and rev reads\n'
b
diff -r f079716f598c -r 029d90b0c4f6 test-data/frameshift_deletions_check_no_english.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/frameshift_deletions_check_no_english.tsv Fri Jul 14 22:07:22 2023 +0000
b
b'@@ -0,0 +1,140 @@\n+ref_id\tstart_position\tlength\tVARIANT\tgene_region\taa_position\tstop_mismatches\tstoploss_nt\treads_all\treads_fwd\treads_rev\tdeletions\tfreq_del\tfreq_del_fwd\tfreq_del_rev\tdeletions_fwd\tdeletions_rev\tinsertions\tfreq_insert\tfreq_insert_fwd\tfreq_insert_rev\tinsertions_fwd\tinsertions_rev\tstops\tfreq_stop\tfreq_stop_fwd\tfreq_stop_rev\tstops_fwd\tstops_rev\tmatches_ref\tpos_critical_inserts\tpos_critical_dels\thomopolymeric\tref_base\tcons_id\n+NC_045512.2\t5600\t2\tdeletion\tORF1ab\t.\t0\t.\t40\t40\t0\t23\t0.575\t0.575\t0\t23\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t17\t21101\t8740,18800\t1\tT\tNC_045512.2\n+NC_045512.2\t5602\t3\tstopgain\tORF1ab\t1779\t0\t.\t40\t40\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t40\t1.0\t1.0\t0\t40\t0\t40\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t5635\t3\tstopgain\tORF1ab\t1790\t0\t.\t40\t40\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t40\t1.0\t1.0\t0\t40\t0\t40\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t5707\t3\tstopgain\tORF1ab\t1814\t0\t.\t63\t39\t24\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t63\t1.0\t1.0\t1.0\t39\t24\t63\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t5713\t3\tstopgain\tORF1ab\t1816\t0\t.\t72\t39\t33\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t72\t1.0\t1.0\t1.0\t39\t33\t72\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t5737\t3\tstopgain\tORF1ab\t1824\t0\t.\t79\t39\t40\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t79\t1.0\t1.0\t1.0\t39\t40\t79\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t5740\t3\tstopgain\tORF1ab\t1825\t0\t.\t79\t39\t40\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t79\t1.0\t1.0\t1.0\t39\t40\t79\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t5752\t3\tstopgain\tORF1ab\t1829\t0\t.\t79\t39\t40\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t79\t1.0\t1.0\t1.0\t39\t40\t79\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t5773\t3\tstopgain\tORF1ab\t1836\t0\t.\t78\t38\t40\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t78\t1.0\t1.0\t1.0\t38\t40\t78\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t5788\t3\tstopgain\tORF1ab\t1841\t0\t.\t74\t34\t40\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t74\t1.0\t1.0\t1.0\t34\t40\t74\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t5911\t3\tstopgain\tORF1ab\t1882\t0\t.\t45\t5\t40\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t45\t1.0\t1.0\t1.0\t5\t40\t45\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6043\t3\tstopgain\tORF1ab\t1926\t0\t.\t10\t5\t5\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t10\t1.0\t1.0\t1.0\t5\t5\t10\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6049\t3\tstopgain\tORF1ab\t1928\t0\t.\t10\t5\t5\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t10\t1.0\t1.0\t1.0\t5\t5\t10\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6061\t3\tstopgain\tORF1ab\t1932\t0\t.\t10\t5\t5\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t10\t1.0\t1.0\t1.0\t5\t5\t10\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6064\t3\tstopgain\tORF1ab\t1933\t0\t.\t10\t5\t5\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t10\t1.0\t1.0\t1.0\t5\t5\t10\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6079\t3\tstopgain\tORF1ab\t1938\t0\t.\t10\t5\t5\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t10\t1.0\t1.0\t1.0\t5\t5\t10\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6082\t3\tstopgain\tORF1ab\t1939\t0\t.\t10\t5\t5\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t10\t1.0\t1.0\t1.0\t5\t5\t10\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6106\t3\tstopgain\tORF1ab\t1947\t0\t.\t10\t5\t5\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t10\t1.0\t1.0\t1.0\t5\t5\t10\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6202\t3\tstopgain\tORF1ab\t1979\t0\t.\t86\t81\t5\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t86\t1.0\t1.0\t1.0\t81\t5\t86\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6214\t3\tstopgain\tORF1ab\t1983\t0\t.\t86\t81\t5\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t86\t1.0\t1.0\t1.0\t81\t5\t86\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6226\t3\tstopgain\tORF1ab\t1987\t0\t.\t86\t81\t5\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t86\t1.0\t1.0\t1.0\t81\t5\t86\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6247\t3\tstopgain\tORF1ab\t1994\t0\t.\t86\t81\t5\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t86\t1.0\t1.0\t1.0\t81\t5\t86\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6259\t3\tstopgain\tORF1ab\t1998\t0\t.\t81\t81\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t81\t1.0\t1.0\t0\t81\t0\t81\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6262\t3\tstopgain\tORF1ab\t1999\t0\t.\t81\t81\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t81\t1.0\t1.0\t0\t81\t0\t81\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6274\t3\tstopgain\tORF1ab\t2003\t0\t.\t80\t80\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t80\t1.0\t1.0\t0\t80\t0\t80\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6322\t3\tstopgain\tORF1ab\t2019\t0\t.\t153\t80\t73\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t153\t1.0\t1.0\t1.0\t80\t73\t153\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t6340\t3\tstopgain\tORF1ab\t2025\t0\t.\t157\t80\t77\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t157\t1.0\t1.0\t1.0\t80\t77\t157\t.\t.\t0\tT\tNC_045512.2\n+NC_04551'..b'\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t108\t1.0\t1.0\t1.0\t69\t39\t108\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t19931\t3\tstopgain\tORF1ab\t6555\t0\t.\t69\t69\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t69\t1.0\t1.0\t0\t69\t0\t69\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t19940\t3\tstopgain\tORF1ab\t6558\t0\t.\t69\t69\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t68\t0.9855072463768116\t0.9855072463768116\t0\t68\t0\t68\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20012\t3\tstopgain\tORF1ab\t6582\t0\t.\t131\t69\t62\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t131\t1.0\t1.0\t1.0\t69\t62\t131\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20084\t3\tstopgain\tORF1ab\t6606\t0\t.\t133\t64\t69\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t133\t1.0\t1.0\t1.0\t64\t69\t133\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20120\t3\tstopgain\tORF1ab\t6618\t0\t.\t69\t0\t69\t0\t0.0\t0\t0.0\t0\t0\t0\t0.0\t0\t0.0\t0\t0\t69\t1.0\t0\t1.0\t0\t69\t69\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20135\t3\tstopgain\tORF1ab\t6623\t0\t.\t69\t0\t69\t0\t0.0\t0\t0.0\t0\t0\t0\t0.0\t0\t0.0\t0\t0\t69\t1.0\t0\t1.0\t0\t69\t69\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20534\t3\tstopgain\tORF1ab\t6756\t0\t.\t251\t251\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t251\t1.0\t1.0\t0\t251\t0\t251\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20537\t3\tstopgain\tORF1ab\t6757\t0\t.\t251\t251\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t251\t1.0\t1.0\t0\t251\t0\t251\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20558\t3\tstopgain\tORF1ab\t6764\t0\t.\t250\t250\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t250\t1.0\t1.0\t0\t250\t0\t250\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20579\t3\tstopgain\tORF1ab\t6771\t0\t.\t250\t250\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t250\t1.0\t1.0\t0\t250\t0\t250\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20633\t3\tstopgain\tORF1ab\t6789\t0\t.\t250\t249\t1\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t250\t1.0\t1.0\t1.0\t249\t1\t250\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20723\t3\tstopgain\tORF1ab\t6819\t0\t.\t442\t194\t248\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t442\t1.0\t1.0\t1.0\t194\t248\t442\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20777\t3\tstopgain\tORF1ab\t6837\t0\t.\t250\t0\t250\t0\t0.0\t0\t0.0\t0\t0\t0\t0.0\t0\t0.0\t0\t0\t250\t1.0\t0\t1.0\t0\t250\t250\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20780\t3\tstopgain\tORF1ab\t6838\t0\t.\t250\t0\t250\t0\t0.0\t0\t0.0\t0\t0\t0\t0.0\t0\t0.0\t0\t0\t250\t1.0\t0\t1.0\t0\t250\t250\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20783\t3\tstopgain\tORF1ab\t6839\t0\t.\t250\t0\t250\t0\t0.0\t0\t0.0\t0\t0\t0\t0.0\t0\t0.0\t0\t0\t250\t1.0\t0\t1.0\t0\t250\t250\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20819\t3\tstopgain\tORF1ab\t6851\t0\t.\t275\t24\t251\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t275\t1.0\t1.0\t1.0\t24\t251\t275\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20828\t3\tstopgain\tORF1ab\t6854\t0\t.\t275\t24\t251\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t274\t0.9963636363636363\t1.0\t0.9960159362549801\t24\t250\t274\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20834\t3\tstopgain\tORF1ab\t6856\t0\t.\t275\t24\t251\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t275\t1.0\t1.0\t1.0\t24\t251\t275\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20852\t3\tstopgain\tORF1ab\t6862\t0\t.\t275\t24\t251\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t275\t1.0\t1.0\t1.0\t24\t251\t275\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20912\t3\tstopgain\tORF1ab\t6882\t0\t.\t24\t24\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t24\t1.0\t1.0\t0\t24\t0\t24\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t20990\t3\tstopgain\tORF1ab\t6908\t0\t.\t48\t24\t24\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t48\t1.0\t1.0\t1.0\t24\t24\t48\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t21140\t3\tstopgain\tORF1ab\t6958\t0\t.\t41\t17\t24\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t41\t1.0\t1.0\t1.0\t17\t24\t41\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t21164\t3\tstopgain\tORF1ab\t6966\t0\t.\t17\t17\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t17\t1.0\t1.0\t0\t17\t0\t17\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t21170\t3\tstopgain\tORF1ab\t6968\t0\t.\t17\t17\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t17\t1.0\t1.0\t0\t17\t0\t17\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t21248\t3\tstopgain\tORF1ab\t6994\t0\t.\t34\t17\t17\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t34\t1.0\t1.0\t1.0\t17\t17\t34\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t21275\t3\tstopgain\tORF1ab\t7003\t0\t.\t34\t17\t17\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t34\t1.0\t1.0\t1.0\t17\t17\t34\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t21314\t3\tstopgain\tORF1ab\t7016\t0\t.\t31\t14\t17\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t31\t1.0\t1.0\t1.0\t14\t17\t31\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t21398\t3\tstopgain\tORF1ab\t7044\t0\t.\t21\t4\t17\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t21\t1.0\t1.0\t1.0\t4\t17\t21\t.\t.\t0\tT\tNC_045512.2\n+NC_045512.2\t21419\t3\tstopgain\tORF1ab\t7051\t0\t.\t21\t4\t17\t0\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\t0.0\t0\t0\t21\t1.0\t1.0\t1.0\t4\t17\t21\t.\t.\t0\tT\tNC_045512.2\n'