Repository 'tn93_filter'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/tn93_filter

Changeset 0:ba95715078c9 (2021-04-23)
Next changeset 1:cf50aeb956f2 (2022-04-20)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
added:
macros.xml
test-data/cluster-in1-1.fa
test-data/cluster-in1-2.fa
test-data/cluster-out1.fa
test-data/cluster-out1.json
test-data/filter-in1-clusters.fa
test-data/filter-in1-reads.fa
test-data/filter-in1-reference.fa
test-data/filter-out1.fasta
test-data/readreduce-in1.fa
test-data/readreduce-in2.fa
test-data/readreduce-out1.fa
test-data/readreduce-out2.fa
test-data/tn93-in1.fa
test-data/tn93-in2-alpha.fa
test-data/tn93-in2-beta.fa
test-data/tn93-out1.csv
test-data/tn93-out2.csv
tn93_cluster.py
tn93_filter.py
tn93_filter.xml
b
diff -r 000000000000 -r ba95715078c9 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Apr 23 03:05:08 2021 +0000
b
@@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@VERSION@">1.0.6</token>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+                @UNPUBLISHED{spond,
+                    author = "Sergei Kosakovsky Pond",
+                    title = "HyPhy: Hypothesis Testing using Phylogenies",
+                    year = "2000",
+                    note = "http://hyphy.org/",
+                    url = "http://hyphy.org/"}
+            </citation>
+            <yield />
+        </citations>
+    </xml>
+</macros>
\ No newline at end of file
b
diff -r 000000000000 -r ba95715078c9 test-data/cluster-in1-1.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cluster-in1-1.fa Fri Apr 23 03:05:08 2021 +0000
b
b'@@ -0,0 +1,14 @@\n+>NC_045512.2:21563-25384\n+ATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n+>gb_MW540268_Organism_Severe_acute_respiratory_syndrome_coronavirus_2_Strain_Name_SARS_CoV_2_human_USA_MA_MASPHL_01380_2020_Segment_nu'..b'CTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n+>gb_MW518841_Organism_Severe_acute_respiratory_syndrome_coronavirus_2_Strain_Name_SARS_CoV_2_human_USA_CA_CDC_STM_220_2020_Segment_null_1\n+ATGTTAGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTTGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATCATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTTAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n'
b
diff -r 000000000000 -r ba95715078c9 test-data/cluster-in1-2.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cluster-in1-2.fa Fri Apr 23 03:05:08 2021 +0000
b
@@ -0,0 +1,2 @@
+>NC_045512.2:21563-25384 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1
+ATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA
b
diff -r 000000000000 -r ba95715078c9 test-data/cluster-out1.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cluster-out1.fa Fri Apr 23 03:05:08 2021 +0000
b
@@ -0,0 +1,3 @@
+>gb_MW540268_Organism_Severe_acute_respiratory_syndrome_coronavirus_2_Strain_Name_SARS_CoV_2_human_USA_MA_MASPHL_01380_2020_Segment_null_4
+CTTGTTTTTTTTTTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGATTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCTATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA
+
b
diff -r 000000000000 -r ba95715078c9 test-data/cluster-out1.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cluster-out1.json Fri Apr 23 03:05:08 2021 +0000
[
@@ -0,0 +1,7 @@
+[
+ {
+ "size" : 6, 
+ "members" : ["gb_MW540268_Organism_Severe_acute_respiratory_syndrome_coronavirus_2_Strain_Name_SARS_CoV_2_human_USA_MA_MASPHL_01380_2020_Segment_null_4","gb_MW467454_Organism_Severe_acute_respiratory_syndrome_coronavirus_2_Strain_Name_SARS_CoV_2_human_EGY_EGY_CCHE57357_A_46_2020_Segment_null_1","gb_MT496989_Organism_Severe_acute_respiratory_syndrome_coronavirus_2_Strain_Name_SARS_CoV_2_human_IND_GBRC63_2020_Segment_null_3","gb_MW206333_Organism_Severe_acute_respiratory_syndrome_coronavirus_2_Strain_Name_SARS_CoV_2_human_USA_UT_UPHL_2009646_2020_Segment_null_4","gb_MW525081_Organism_Severe_acute_respiratory_syndrome_coronavirus_2_Strain_Name_SARS_CoV_2_human_USA_MO_CDC_STM_0000025_G03_2021_Segment_null_1","gb_MW518841_Organism_Severe_acute_respiratory_syndrome_coronavirus_2_Strain_Name_SARS_CoV_2_human_USA_CA_CDC_STM_220_2020_Segment_null_1"],
+ "centroid" : ">gb_MW540268_Organism_Severe_acute_respiratory_syndrome_coronavirus_2_Strain_Name_SARS_CoV_2_human_USA_MA_MASPHL_01380_2020_Segment_null_4\nCTTGTTTTTTTTTTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGATTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCTATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n"
+ }
+]
b
diff -r 000000000000 -r ba95715078c9 test-data/filter-in1-clusters.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filter-in1-clusters.fa Fri Apr 23 03:05:08 2021 +0000
b
@@ -0,0 +1,4 @@
+>epi_isl_1041406/hCoV-19/USA/NY-PRL-2021_02_08_05H12/2021
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAGACG
+>epi_isl_1041403/hCoV-19/USA/NY-PRL-2021_02_08_05H08/2021
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
b
diff -r 000000000000 -r ba95715078c9 test-data/filter-in1-reads.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filter-in1-reads.fa Fri Apr 23 03:05:08 2021 +0000
b
b'@@ -0,0 +1,15 @@\n+>gb_MW540268_Organism_Severe_acute_respiratory_syndrome_coronavirus_2_Strain_Name_SARS_CoV_2_human_USA_MA_MASPHL_01380_2020_Segment_null_4\n+CTTGTTTTTTTTTTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGATTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCTATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n+\n+>gb_MW467454_Organ'..b'GCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n+\n+>gb_MW518841_Organism_Severe_acute_respiratory_syndrome_coronavirus_2_Strain_Name_SARS_CoV_2_human_USA_CA_CDC_STM_220_2020_Segment_null_1\n+ATGTTAGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTTGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATCATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTTAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n+\n'
b
diff -r 000000000000 -r ba95715078c9 test-data/filter-in1-reference.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filter-in1-reference.fa Fri Apr 23 03:05:08 2021 +0000
b
@@ -0,0 +1,4 @@
+>epi_isl_1041406/hCoV-19/USA/NY-PRL-2021_02_08_05H12/2021
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAGACG
+>epi_isl_1041403/hCoV-19/USA/NY-PRL-2021_02_08_05H08/2021
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
b
diff -r 000000000000 -r ba95715078c9 test-data/filter-out1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filter-out1.fasta Fri Apr 23 03:05:08 2021 +0000
b
b'@@ -0,0 +1,15 @@\n+>gb_MW540268_Organism_Severe_acute_respiratory_syndrome_coronavirus_2_Strain_Name_SARS_CoV_2_human_USA_MA_MASPHL_01380_2020_Segment_null_4\n+CTTGTTTTTTTTTTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGATTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCTATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n+\n+>gb_MW467454_Organ'..b'GCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n+\n+>gb_MW518841_Organism_Severe_acute_respiratory_syndrome_coronavirus_2_Strain_Name_SARS_CoV_2_human_USA_CA_CDC_STM_220_2020_Segment_null_1\n+ATGTTAGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTTGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATCATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTTAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n+\n'
b
diff -r 000000000000 -r ba95715078c9 test-data/readreduce-in1.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/readreduce-in1.fa Fri Apr 23 03:05:08 2021 +0000
b
b'@@ -0,0 +1,224 @@\n+>B_FR_83_HXB2_ACC_K03455_5\n+CCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGA\n+TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT\n+TAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATT\n+GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA\n+CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA\n+CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA\n+AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC\n+AGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTA\n+GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCA\n+CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT\n+CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACA\n+TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA\n+AAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACC\n+AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC\n+TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC\n+AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGC\n+AAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTA\n+GAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG\n+CTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGT\n+GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC\n+AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAA\n+ACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA\n+ATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGG\n+GAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACA\n+TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT\n+TAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCA\n+TAGTAGGAGCAGAAACCTTC\n+>B_US_83_RF_ACC_M17451\n+CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA\n+TGGCCCAAAAGTTAAACAATGGCCATTGACAGAGGAAAAAATAAAAGCAT\n+TGGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCCAAAATT\n+GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA\n+CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA\n+CTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCACATCCTGCAGGGTTA\n+AAAAAGAAGAAATCAGTAACAGTATTGGATGTGGGTGATGCATATTTTTC\n+AGTTCCCTTAGATAAAGAGTTCAGGAAGTATACTGCATTTACCATACCTA\n+GTATAAACAATGAAACACCACGGATTAGATATCAGTACAATGTGCTTCCA\n+CAAGGGTGGAAAGGATCACCAGCAATATTCCAAAGTAGTATGACAAAAAT\n+CTTAGAGCCTTTTAAAAAACAAAATCCAGAAATAGTTATCTATCAATACA\n+TGGATGATTTGTATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATA\n+AAAATAGAGGAACTGAGAGAACATCTGTTAAAGTGGGGGTTTACCACACC\n+GGACAAGAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC\n+TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC\n+AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC\n+AAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA\n+GGGGAACCAAAGCACTAACAGAAGTAGTACAACTAACAAAAGAAGCAGAG\n+CTAGAACTGGCAGAAAATAGGGAGATTCTAAAAGAACCAGTACATGGAGT\n+GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC\n+AAGGCCAATGGACATACCAAATTTATCAAGAGCCATTTAAAAACCTGAAA\n+ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA\n+ATTAACAGAGGCAGTACAAAAAGTAGCCACAGAAAGCATAGTAATATGGG\n+GAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAGGCA\n+TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT\n+CAATACCCCTCCCTTAGTAAAATTGTGGTACCAGTTAGAAAAAGAACCCA\n+TAATAGGAGCAGAAACTTTC\n+>B_US_86_JRFL_ACC_U63632\n+CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA\n+TGGCCCAAAAGTCAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT\n+TAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATT\n+GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGA\n+CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAAAA\n+CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA\n+AAAAAGAGAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC\n+AGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTA\n+GTATAAACAATGAGACACCAGGGATTAGGTATCAGTACAATGTGCTTCCG\n+CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT\n+CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAATTATCTATCAATACA\n+TGGATGATTTGTATGTAGGATCTGACTTAGAGATAGGGCAGCATAGAGCA\n+AAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGGTTTACCACACC\n+AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC\n+TCCATCCTGACAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC\n+AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGC\n+AAGTCAGATTTACGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA\n+GGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG\n+CTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAGCCAGTACATGGAGT\n+GTATTATGACCCATCAAAAGACTTAATAGCAGAACTACAGAAGCAGGGGC\n+AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAATTCTGAAA\n+ACAGGAAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACA\n+ATTAACAGAG'..b'CCAATATTTGCCATAAAGAAAAAAGA\n+CAGTACCAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA\n+CTCAAGATTTCTGGGAGGTTCAATTAGGAATACCGCATCCTGCAGGGCTG\n+AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTCTC\n+AGTTCCCTTAGATGAAGATTTTAGGAAATATACCGCATTTACCATACCTA\n+GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTCCCA\n+CAGGGATGGAAAGGATCACCGGCAATATTCCAAAGTAGCATGACAAAAAT\n+CTTAGAGCCCTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACA\n+TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA\n+AAAATAGAGGAATTAAGAGAACATCTATTGAGGTGGGGATTTACCACACC\n+AGATAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC\n+TCCATCCTGATAAATGGACAGTACAGCCTATAAACCTGCCAGAAAAAGAA\n+AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGGAAATTAAACTGGGC\n+AAGCCAGATTTATGCAGGAATTAAAGTAAAGCAATTATGTAAACTCCTTA\n+GGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAA\n+TTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGT\n+GTATTATGACCCATCAAAAGACTTAATAGCAGAACTACAGAAACAAGGGG\n+ACGGCCAATGGACATACCAAATTTATCAAGAACCATTTAAAAATCTAAAA\n+ACAGGAAAGTATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACA\n+ATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTGATATGGG\n+GAAAGACTCCTAAATTTAAACTACCCATACAAAAGGAAACATGGGAAACA\n+TGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT\n+CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA\n+TAATAGGAGCAGAAACTTTC\n+>D_CD_84_84ZR085_ACC_U88822\n+CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA\n+TGGCCCAAAAGTTAAACAATGGCCGTTGACAGAAGAAAAAATAAAAGCAT\n+TAACAGAAATTTGTACAGATATGGAAAAGGAAGGAAAAATTTCAAGAATT\n+GGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA\n+CAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA\n+CTCAAGACTTCTGGGAAGTTCAATTAGGGATACCACATCCTGCAGGATTA\n+AAGAAGAAAAAGTCAATAACAGTACTGGATGTGGGCGATGCATATTTTTC\n+AATTCCCTTATGTGAAGACTTTAGGAAGTACACTGCATTTACCATACCTA\n+GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCA\n+CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGATAAAAAT\n+CTTAGAGCCCTTTAGAAAACAAAATCCAGAAGTAGTTATCTATCAATACA\n+TGGATGATTTGTATGTAGGATCTGATTTAGAAATAGGACAGCATAGAGCA\n+AAAATAGAGAAATTAAGAGAACATCTGTTGAGGTGGGGGCTTACCACACC\n+AGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC\n+TCCATCCTGATAAGTGGACAGTACAGTCTATAACACTGCCAGAGAAAGAA\n+AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGAAAATTAAATTGGGC\n+AAGCCAGATTTATCCAGGAATTAAAGTAAAGCAATTATGTAAACTCCTTA\n+GGGGAACCAAGGCACTAACAGAGGTAATACCACTAACAGAAGAAGCAGAA\n+TTAGAACTGGCAGAAAACAGGGAGATTCTAAAGGAACCAATGCATGGAGT\n+GTATTATGACCCATCAAAAGACTTAATAGCAGAATTACAGAAACAAGGGC\n+AAGGTCAATGGACATATCAAATTTATCAAGAACCATTTAAAAATCTGAAA\n+ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA\n+GTTAACAGAGGCAGTGCAAAAAATAGCCATAGAAAGCATAGTGATATGGG\n+GAAAGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA\n+TGGTGGATAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT\n+CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA\n+TAATAGGAGCAGAAACTTTC\n+>D_UG_94_94UG114_ACC_U88824\n+CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGGATGGA\n+TGGCCCAAAAGTTAAACAATGGCCGTTGACAGAAGAAAAAATAAAAGCAC\n+TAATAGAAATTTGTTCAGAACTAGAAAAGGAAGGAAAAATTTCAAAAATT\n+GGGCCTGAAAACCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA\n+CAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA\n+CTCAAGACTTTTGGGAAGTTCAACTAGGAATACCACATCCTGCAGGGCTA\n+AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGACGCATATTTTTC\n+AGTTCCCTTACATGAAGACTTTAGAAAATATACCGCATTCACCATACCTA\n+GTACAAACAATGAGACACCAGGAATTAGATATCAGTACAATGTGCTTCCA\n+CAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT\n+CTTAGAACCTTTTAGAAAACAAAATCCAGAAATGATTATCTATCAATACA\n+TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAATA\n+AAAATAGAGGAATTAAGGGGACACCTCTTGAAGTGGGGATTTACCACACC\n+AGACAAAAAGTATCAGAAAGAACCCCCATTTCTTTGGATGGGTTATGAAC\n+TCCATCCTGATAAGTGGACAGTACAGCCTATACATCTGCCAGAAAAGGAA\n+AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGAAAATTAAATTGGGC\n+AAGCCAGATTTATCCAGGAATTAAAGTAAGACAATTATGCAAATGCCTTA\n+GGGGAGCCAAAGCACTGACAGAAGTAATACCACTGACAGCAGAAGCAGAA\n+TTAGAACTGGCAGAAAACAGGGAAATACTAAAAGAACCAGTACATGGAGC\n+GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAACAAGGGC\n+AAGATCAATGGACATATCAAATATATCAAGAACAATATAAAAATCTGAAA\n+ACAGGAAAGTATGCGAAAATGAGGGGTACCCACACTAATGATGTAAAACA\n+ATTAACAGAGGCAGTGCAGAAAATAGCCCAAGAATGTATAGTAATATGGG\n+GAAAGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA\n+TGGTGGACAGAGTATTGGCAGGCCACCTGGATTCCTGAGTGGGAGTATGT\n+CAACACCCCTCCTTTAGTTAAATTATGGTATCAGTTAGAGAAGGAACCCA\n+TAGTAGGAGCAGAAACTTTC\n'
b
diff -r 000000000000 -r ba95715078c9 test-data/readreduce-in2.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/readreduce-in2.fa Fri Apr 23 03:05:08 2021 +0000
b
@@ -0,0 +1,112 @@
+>B_FR_83_HXB2_ACC_K03455_5
+CCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGA
+TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT
+TAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATT
+GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA
+CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA
+CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA
+AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC
+AGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTA
+GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCA
+CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT
+CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACA
+TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA
+AAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACC
+AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC
+TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC
+AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGC
+AAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTA
+GAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG
+CTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGT
+GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC
+AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAA
+ACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA
+ATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGG
+GAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACA
+TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT
+TAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCA
+TAGTAGGAGCAGAAACCTTC
+>B_US_83_RF_ACC_M17451
+CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA
+TGGCCCAAAAGTTAAACAATGGCCATTGACAGAGGAAAAAATAAAAGCAT
+TGGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCCAAAATT
+GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA
+CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA
+CTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCACATCCTGCAGGGTTA
+AAAAAGAAGAAATCAGTAACAGTATTGGATGTGGGTGATGCATATTTTTC
+AGTTCCCTTAGATAAAGAGTTCAGGAAGTATACTGCATTTACCATACCTA
+GTATAAACAATGAAACACCACGGATTAGATATCAGTACAATGTGCTTCCA
+CAAGGGTGGAAAGGATCACCAGCAATATTCCAAAGTAGTATGACAAAAAT
+CTTAGAGCCTTTTAAAAAACAAAATCCAGAAATAGTTATCTATCAATACA
+TGGATGATTTGTATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATA
+AAAATAGAGGAACTGAGAGAACATCTGTTAAAGTGGGGGTTTACCACACC
+GGACAAGAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC
+TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC
+AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC
+AAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA
+GGGGAACCAAAGCACTAACAGAAGTAGTACAACTAACAAAAGAAGCAGAG
+CTAGAACTGGCAGAAAATAGGGAGATTCTAAAAGAACCAGTACATGGAGT
+GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC
+AAGGCCAATGGACATACCAAATTTATCAAGAGCCATTTAAAAACCTGAAA
+ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA
+ATTAACAGAGGCAGTACAAAAAGTAGCCACAGAAAGCATAGTAATATGGG
+GAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAGGCA
+TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT
+CAATACCCCTCCCTTAGTAAAATTGTGGTACCAGTTAGAAAAAGAACCCA
+TAATAGGAGCAGAAACTTTC
+>B_US_86_JRFL_ACC_U63632
+CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA
+TGGCCCAAAAGTCAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT
+TAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATT
+GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGA
+CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAAAA
+CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA
+AAAAAGAGAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC
+AGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTA
+GTATAAACAATGAGACACCAGGGATTAGGTATCAGTACAATGTGCTTCCG
+CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT
+CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAATTATCTATCAATACA
+TGGATGATTTGTATGTAGGATCTGACTTAGAGATAGGGCAGCATAGAGCA
+AAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGGTTTACCACACC
+AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC
+TCCATCCTGACAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC
+AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGC
+AAGTCAGATTTACGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA
+GGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG
+CTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAGCCAGTACATGGAGT
+GTATTATGACCCATCAAAAGACTTAATAGCAGAACTACAGAAGCAGGGGC
+AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAATTCTGAAA
+ACAGGAAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACA
+ATTAACAGAGGCAGTGCAAAAAATAGCCAATGAAAGCATAGTAATATGGG
+GAAAGATTCCTAAATTTAAATTACCCATACAAAAAGAAACATGGGAAACA
+TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT
+CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCA
+TAGTAGGAGCAGAAACTTTC
+>B_US_90_WEAU160_ACC_U21135
+CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA
+TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAGAAAATAAAAGCAT
+TAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATT
+GGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGA
+CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA
+CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCTTCAGGGTTA
+AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC
+AGTACCCTTAGATGAAGACTTCAGGAAGTACACTGCATTTACCATACCTA
+GTATAAACAATGAAACACCAGGGATTAGATATCAGTACAATGTGCTTCCA
+CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT
+ATTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACA
+TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA
+AAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGATTTACCACACC
+AGACAAAAAACATCAAAAAGACCCTCCATTCCTTTGGATGGGTTATGAAC
+TCCATCCTGATAAATGGACAGTACAGCCTATAAAGCTGCCAGAAAAAGAA
+AGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC
+AAGTCAGATTTACGCAGGGATTAAAGTAAAGCAACTATGTAAACTCCTTA
+GGGGGACCAAAGCACTAACAGAAATAATACCAATAACAGAAGAAGCAGAG
+CTAGAGCTGGCAGAAAACAGGGAAATTCTAAAAGAACCGGTACATGGAGT
+GTATTATGACCCATCAAAAGACTTAATAGCAGAGCTACAGAAGCAGGGGC
+AAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAA
+ACAGGAAAGTATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACA
+ATTAACAGAGGCAGTGCAGAAAATAACCACAGAAAGCATAGTAATATGGG
+GAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAACA
+TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT
+CAATACCCCTCCCTTAGTGAAATTATGGTATCAGTTAGAGAAAGAACCCA
+TAGTAGGAGCAGAAACTTTC
\ No newline at end of file
b
diff -r 000000000000 -r ba95715078c9 test-data/tn93-in1.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tn93-in1.fa Fri Apr 23 03:05:08 2021 +0000
b
b'@@ -0,0 +1,224 @@\n+>B_FR_83_HXB2_ACC_K03455_5\n+CCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGA\n+TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT\n+TAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATT\n+GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA\n+CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA\n+CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA\n+AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC\n+AGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTA\n+GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCA\n+CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT\n+CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACA\n+TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA\n+AAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACC\n+AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC\n+TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC\n+AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGC\n+AAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTA\n+GAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG\n+CTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGT\n+GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC\n+AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAA\n+ACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA\n+ATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGG\n+GAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACA\n+TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT\n+TAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCA\n+TAGTAGGAGCAGAAACCTTC\n+>B_US_83_RF_ACC_M17451\n+CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA\n+TGGCCCAAAAGTTAAACAATGGCCATTGACAGAGGAAAAAATAAAAGCAT\n+TGGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCCAAAATT\n+GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA\n+CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA\n+CTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCACATCCTGCAGGGTTA\n+AAAAAGAAGAAATCAGTAACAGTATTGGATGTGGGTGATGCATATTTTTC\n+AGTTCCCTTAGATAAAGAGTTCAGGAAGTATACTGCATTTACCATACCTA\n+GTATAAACAATGAAACACCACGGATTAGATATCAGTACAATGTGCTTCCA\n+CAAGGGTGGAAAGGATCACCAGCAATATTCCAAAGTAGTATGACAAAAAT\n+CTTAGAGCCTTTTAAAAAACAAAATCCAGAAATAGTTATCTATCAATACA\n+TGGATGATTTGTATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATA\n+AAAATAGAGGAACTGAGAGAACATCTGTTAAAGTGGGGGTTTACCACACC\n+GGACAAGAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC\n+TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC\n+AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC\n+AAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA\n+GGGGAACCAAAGCACTAACAGAAGTAGTACAACTAACAAAAGAAGCAGAG\n+CTAGAACTGGCAGAAAATAGGGAGATTCTAAAAGAACCAGTACATGGAGT\n+GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC\n+AAGGCCAATGGACATACCAAATTTATCAAGAGCCATTTAAAAACCTGAAA\n+ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA\n+ATTAACAGAGGCAGTACAAAAAGTAGCCACAGAAAGCATAGTAATATGGG\n+GAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAGGCA\n+TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT\n+CAATACCCCTCCCTTAGTAAAATTGTGGTACCAGTTAGAAAAAGAACCCA\n+TAATAGGAGCAGAAACTTTC\n+>B_US_86_JRFL_ACC_U63632\n+CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA\n+TGGCCCAAAAGTCAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT\n+TAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATT\n+GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGA\n+CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAAAA\n+CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA\n+AAAAAGAGAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC\n+AGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTA\n+GTATAAACAATGAGACACCAGGGATTAGGTATCAGTACAATGTGCTTCCG\n+CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT\n+CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAATTATCTATCAATACA\n+TGGATGATTTGTATGTAGGATCTGACTTAGAGATAGGGCAGCATAGAGCA\n+AAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGGTTTACCACACC\n+AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC\n+TCCATCCTGACAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC\n+AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGC\n+AAGTCAGATTTACGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA\n+GGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG\n+CTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAGCCAGTACATGGAGT\n+GTATTATGACCCATCAAAAGACTTAATAGCAGAACTACAGAAGCAGGGGC\n+AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAATTCTGAAA\n+ACAGGAAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACA\n+ATTAACAGAG'..b'CCAATATTTGCCATAAAGAAAAAAGA\n+CAGTACCAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA\n+CTCAAGATTTCTGGGAGGTTCAATTAGGAATACCGCATCCTGCAGGGCTG\n+AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTCTC\n+AGTTCCCTTAGATGAAGATTTTAGGAAATATACCGCATTTACCATACCTA\n+GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTCCCA\n+CAGGGATGGAAAGGATCACCGGCAATATTCCAAAGTAGCATGACAAAAAT\n+CTTAGAGCCCTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACA\n+TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA\n+AAAATAGAGGAATTAAGAGAACATCTATTGAGGTGGGGATTTACCACACC\n+AGATAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC\n+TCCATCCTGATAAATGGACAGTACAGCCTATAAACCTGCCAGAAAAAGAA\n+AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGGAAATTAAACTGGGC\n+AAGCCAGATTTATGCAGGAATTAAAGTAAAGCAATTATGTAAACTCCTTA\n+GGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAA\n+TTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGT\n+GTATTATGACCCATCAAAAGACTTAATAGCAGAACTACAGAAACAAGGGG\n+ACGGCCAATGGACATACCAAATTTATCAAGAACCATTTAAAAATCTAAAA\n+ACAGGAAAGTATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACA\n+ATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTGATATGGG\n+GAAAGACTCCTAAATTTAAACTACCCATACAAAAGGAAACATGGGAAACA\n+TGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT\n+CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA\n+TAATAGGAGCAGAAACTTTC\n+>D_CD_84_84ZR085_ACC_U88822\n+CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA\n+TGGCCCAAAAGTTAAACAATGGCCGTTGACAGAAGAAAAAATAAAAGCAT\n+TAACAGAAATTTGTACAGATATGGAAAAGGAAGGAAAAATTTCAAGAATT\n+GGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA\n+CAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA\n+CTCAAGACTTCTGGGAAGTTCAATTAGGGATACCACATCCTGCAGGATTA\n+AAGAAGAAAAAGTCAATAACAGTACTGGATGTGGGCGATGCATATTTTTC\n+AATTCCCTTATGTGAAGACTTTAGGAAGTACACTGCATTTACCATACCTA\n+GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCA\n+CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGATAAAAAT\n+CTTAGAGCCCTTTAGAAAACAAAATCCAGAAGTAGTTATCTATCAATACA\n+TGGATGATTTGTATGTAGGATCTGATTTAGAAATAGGACAGCATAGAGCA\n+AAAATAGAGAAATTAAGAGAACATCTGTTGAGGTGGGGGCTTACCACACC\n+AGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC\n+TCCATCCTGATAAGTGGACAGTACAGTCTATAACACTGCCAGAGAAAGAA\n+AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGAAAATTAAATTGGGC\n+AAGCCAGATTTATCCAGGAATTAAAGTAAAGCAATTATGTAAACTCCTTA\n+GGGGAACCAAGGCACTAACAGAGGTAATACCACTAACAGAAGAAGCAGAA\n+TTAGAACTGGCAGAAAACAGGGAGATTCTAAAGGAACCAATGCATGGAGT\n+GTATTATGACCCATCAAAAGACTTAATAGCAGAATTACAGAAACAAGGGC\n+AAGGTCAATGGACATATCAAATTTATCAAGAACCATTTAAAAATCTGAAA\n+ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA\n+GTTAACAGAGGCAGTGCAAAAAATAGCCATAGAAAGCATAGTGATATGGG\n+GAAAGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA\n+TGGTGGATAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT\n+CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA\n+TAATAGGAGCAGAAACTTTC\n+>D_UG_94_94UG114_ACC_U88824\n+CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGGATGGA\n+TGGCCCAAAAGTTAAACAATGGCCGTTGACAGAAGAAAAAATAAAAGCAC\n+TAATAGAAATTTGTTCAGAACTAGAAAAGGAAGGAAAAATTTCAAAAATT\n+GGGCCTGAAAACCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA\n+CAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA\n+CTCAAGACTTTTGGGAAGTTCAACTAGGAATACCACATCCTGCAGGGCTA\n+AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGACGCATATTTTTC\n+AGTTCCCTTACATGAAGACTTTAGAAAATATACCGCATTCACCATACCTA\n+GTACAAACAATGAGACACCAGGAATTAGATATCAGTACAATGTGCTTCCA\n+CAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT\n+CTTAGAACCTTTTAGAAAACAAAATCCAGAAATGATTATCTATCAATACA\n+TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAATA\n+AAAATAGAGGAATTAAGGGGACACCTCTTGAAGTGGGGATTTACCACACC\n+AGACAAAAAGTATCAGAAAGAACCCCCATTTCTTTGGATGGGTTATGAAC\n+TCCATCCTGATAAGTGGACAGTACAGCCTATACATCTGCCAGAAAAGGAA\n+AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGAAAATTAAATTGGGC\n+AAGCCAGATTTATCCAGGAATTAAAGTAAGACAATTATGCAAATGCCTTA\n+GGGGAGCCAAAGCACTGACAGAAGTAATACCACTGACAGCAGAAGCAGAA\n+TTAGAACTGGCAGAAAACAGGGAAATACTAAAAGAACCAGTACATGGAGC\n+GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAACAAGGGC\n+AAGATCAATGGACATATCAAATATATCAAGAACAATATAAAAATCTGAAA\n+ACAGGAAAGTATGCGAAAATGAGGGGTACCCACACTAATGATGTAAAACA\n+ATTAACAGAGGCAGTGCAGAAAATAGCCCAAGAATGTATAGTAATATGGG\n+GAAAGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA\n+TGGTGGACAGAGTATTGGCAGGCCACCTGGATTCCTGAGTGGGAGTATGT\n+CAACACCCCTCCTTTAGTTAAATTATGGTATCAGTTAGAGAAGGAACCCA\n+TAGTAGGAGCAGAAACTTTC\n'
b
diff -r 000000000000 -r ba95715078c9 test-data/tn93-in2-alpha.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tn93-in2-alpha.fa Fri Apr 23 03:05:08 2021 +0000
b
@@ -0,0 +1,112 @@
+>B_FR_83_HXB2_ACC_K03455_5
+CCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGA
+TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT
+TAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATT
+GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA
+CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA
+CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA
+AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC
+AGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTA
+GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCA
+CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT
+CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACA
+TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA
+AAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACC
+AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC
+TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC
+AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGC
+AAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTA
+GAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG
+CTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGT
+GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC
+AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAA
+ACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA
+ATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGG
+GAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACA
+TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT
+TAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCA
+TAGTAGGAGCAGAAACCTTC
+>B_US_83_RF_ACC_M17451
+CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA
+TGGCCCAAAAGTTAAACAATGGCCATTGACAGAGGAAAAAATAAAAGCAT
+TGGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCCAAAATT
+GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA
+CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA
+CTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCACATCCTGCAGGGTTA
+AAAAAGAAGAAATCAGTAACAGTATTGGATGTGGGTGATGCATATTTTTC
+AGTTCCCTTAGATAAAGAGTTCAGGAAGTATACTGCATTTACCATACCTA
+GTATAAACAATGAAACACCACGGATTAGATATCAGTACAATGTGCTTCCA
+CAAGGGTGGAAAGGATCACCAGCAATATTCCAAAGTAGTATGACAAAAAT
+CTTAGAGCCTTTTAAAAAACAAAATCCAGAAATAGTTATCTATCAATACA
+TGGATGATTTGTATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATA
+AAAATAGAGGAACTGAGAGAACATCTGTTAAAGTGGGGGTTTACCACACC
+GGACAAGAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC
+TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC
+AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC
+AAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA
+GGGGAACCAAAGCACTAACAGAAGTAGTACAACTAACAAAAGAAGCAGAG
+CTAGAACTGGCAGAAAATAGGGAGATTCTAAAAGAACCAGTACATGGAGT
+GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC
+AAGGCCAATGGACATACCAAATTTATCAAGAGCCATTTAAAAACCTGAAA
+ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA
+ATTAACAGAGGCAGTACAAAAAGTAGCCACAGAAAGCATAGTAATATGGG
+GAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAGGCA
+TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT
+CAATACCCCTCCCTTAGTAAAATTGTGGTACCAGTTAGAAAAAGAACCCA
+TAATAGGAGCAGAAACTTTC
+>B_US_86_JRFL_ACC_U63632
+CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA
+TGGCCCAAAAGTCAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT
+TAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATT
+GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGA
+CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAAAA
+CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA
+AAAAAGAGAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC
+AGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTA
+GTATAAACAATGAGACACCAGGGATTAGGTATCAGTACAATGTGCTTCCG
+CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT
+CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAATTATCTATCAATACA
+TGGATGATTTGTATGTAGGATCTGACTTAGAGATAGGGCAGCATAGAGCA
+AAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGGTTTACCACACC
+AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC
+TCCATCCTGACAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC
+AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGC
+AAGTCAGATTTACGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA
+GGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG
+CTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAGCCAGTACATGGAGT
+GTATTATGACCCATCAAAAGACTTAATAGCAGAACTACAGAAGCAGGGGC
+AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAATTCTGAAA
+ACAGGAAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACA
+ATTAACAGAGGCAGTGCAAAAAATAGCCAATGAAAGCATAGTAATATGGG
+GAAAGATTCCTAAATTTAAATTACCCATACAAAAAGAAACATGGGAAACA
+TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT
+CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCA
+TAGTAGGAGCAGAAACTTTC
+>B_US_90_WEAU160_ACC_U21135
+CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA
+TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAGAAAATAAAAGCAT
+TAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATT
+GGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGA
+CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA
+CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCTTCAGGGTTA
+AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC
+AGTACCCTTAGATGAAGACTTCAGGAAGTACACTGCATTTACCATACCTA
+GTATAAACAATGAAACACCAGGGATTAGATATCAGTACAATGTGCTTCCA
+CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT
+ATTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACA
+TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA
+AAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGATTTACCACACC
+AGACAAAAAACATCAAAAAGACCCTCCATTCCTTTGGATGGGTTATGAAC
+TCCATCCTGATAAATGGACAGTACAGCCTATAAAGCTGCCAGAAAAAGAA
+AGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC
+AAGTCAGATTTACGCAGGGATTAAAGTAAAGCAACTATGTAAACTCCTTA
+GGGGGACCAAAGCACTAACAGAAATAATACCAATAACAGAAGAAGCAGAG
+CTAGAGCTGGCAGAAAACAGGGAAATTCTAAAAGAACCGGTACATGGAGT
+GTATTATGACCCATCAAAAGACTTAATAGCAGAGCTACAGAAGCAGGGGC
+AAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAA
+ACAGGAAAGTATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACA
+ATTAACAGAGGCAGTGCAGAAAATAACCACAGAAAGCATAGTAATATGGG
+GAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAACA
+TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT
+CAATACCCCTCCCTTAGTGAAATTATGGTATCAGTTAGAGAAAGAACCCA
+TAGTAGGAGCAGAAACTTTC
\ No newline at end of file
b
diff -r 000000000000 -r ba95715078c9 test-data/tn93-in2-beta.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tn93-in2-beta.fa Fri Apr 23 03:05:08 2021 +0000
b
@@ -0,0 +1,112 @@
+>D_CD_83_ELI_ACC_K03454_7
+CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA
+TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT
+TAACAGAAATTTGTACAGATATGGAAAAGGAAGGAAAAATTTCAAGAATT
+GGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA
+CAGTACCAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA
+CTCAAGATTTCTGGGAAGTTCAATTAGGAATACCGCATCCTGCAGGGCTG
+AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC
+AGTTCCCTTAGATGAAGATTTTAGGAAATATACCGCCTTTACCATATCTA
+GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCA
+CAGGGATGGAAAGGATCACCGGCAATATTCCAAAGTAGCATGACAAAAAT
+CTTAGAGCCCTTTAGAAAACAAAATCCAGAAATGGTTATCTATCAATACA
+TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGGACA
+AAAATAGAGAAATTAAGAGAACATCTATTGAGGTGGGGATTTACCAGACC
+AGATAAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGTTATGAAC
+TCCATCCTGATAAATGGACAGTACAGTCTATAAAACTGCCAGAAAAGGAG
+AGCTGGACTGTCAATGATATACAGAACTTAGTGGAGAGATTAAACTGGGC
+AAGCCAGATTTATCCAGGAATTAAAGTAAGACAATTATGTAAACTCCTTA
+GGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAA
+TTAGAACTGGCAGAAAACAGGGAAATTTTAAAAGAACCAGTACATGGAGT
+GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAACAAGGGC
+ACGGCCAATGGACATACCAAATTTATCAAGAACCATTTAAAAATCTGAAA
+ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAGCA
+ATTAGCAGAGGCAGTGCAAAGAATATCCACAGAAAGCATAGTGATATGGG
+GAAGGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA
+TGGTGGGCAGAGTATTGGCAAGCCACTTGGATTCCTGAGTGGGAATTTGT
+CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA
+TAATAGGAGCAGAAACTTTC
+>D_CD_83_NDK_ACC_M27323
+CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA
+TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT
+TAACAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAGAATT
+GGGCCTGAAAATCCATATAATACTCCAATATTTGCCATAAAGAAAAAAGA
+CAGTACCAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA
+CTCAAGATTTCTGGGAGGTTCAATTAGGAATACCGCATCCTGCAGGGCTG
+AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTCTC
+AGTTCCCTTAGATGAAGATTTTAGGAAATATACCGCATTTACCATACCTA
+GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTCCCA
+CAGGGATGGAAAGGATCACCGGCAATATTCCAAAGTAGCATGACAAAAAT
+CTTAGAGCCCTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACA
+TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA
+AAAATAGAGGAATTAAGAGAACATCTATTGAGGTGGGGATTTACCACACC
+AGATAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC
+TCCATCCTGATAAATGGACAGTACAGCCTATAAACCTGCCAGAAAAAGAA
+AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGGAAATTAAACTGGGC
+AAGCCAGATTTATGCAGGAATTAAAGTAAAGCAATTATGTAAACTCCTTA
+GGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAA
+TTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGT
+GTATTATGACCCATCAAAAGACTTAATAGCAGAACTACAGAAACAAGGGG
+ACGGCCAATGGACATACCAAATTTATCAAGAACCATTTAAAAATCTAAAA
+ACAGGAAAGTATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACA
+ATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTGATATGGG
+GAAAGACTCCTAAATTTAAACTACCCATACAAAAGGAAACATGGGAAACA
+TGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT
+CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA
+TAATAGGAGCAGAAACTTTC
+>D_CD_84_84ZR085_ACC_U88822
+CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA
+TGGCCCAAAAGTTAAACAATGGCCGTTGACAGAAGAAAAAATAAAAGCAT
+TAACAGAAATTTGTACAGATATGGAAAAGGAAGGAAAAATTTCAAGAATT
+GGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA
+CAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA
+CTCAAGACTTCTGGGAAGTTCAATTAGGGATACCACATCCTGCAGGATTA
+AAGAAGAAAAAGTCAATAACAGTACTGGATGTGGGCGATGCATATTTTTC
+AATTCCCTTATGTGAAGACTTTAGGAAGTACACTGCATTTACCATACCTA
+GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCA
+CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGATAAAAAT
+CTTAGAGCCCTTTAGAAAACAAAATCCAGAAGTAGTTATCTATCAATACA
+TGGATGATTTGTATGTAGGATCTGATTTAGAAATAGGACAGCATAGAGCA
+AAAATAGAGAAATTAAGAGAACATCTGTTGAGGTGGGGGCTTACCACACC
+AGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC
+TCCATCCTGATAAGTGGACAGTACAGTCTATAACACTGCCAGAGAAAGAA
+AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGAAAATTAAATTGGGC
+AAGCCAGATTTATCCAGGAATTAAAGTAAAGCAATTATGTAAACTCCTTA
+GGGGAACCAAGGCACTAACAGAGGTAATACCACTAACAGAAGAAGCAGAA
+TTAGAACTGGCAGAAAACAGGGAGATTCTAAAGGAACCAATGCATGGAGT
+GTATTATGACCCATCAAAAGACTTAATAGCAGAATTACAGAAACAAGGGC
+AAGGTCAATGGACATATCAAATTTATCAAGAACCATTTAAAAATCTGAAA
+ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA
+GTTAACAGAGGCAGTGCAAAAAATAGCCATAGAAAGCATAGTGATATGGG
+GAAAGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA
+TGGTGGATAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT
+CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA
+TAATAGGAGCAGAAACTTTC
+>D_UG_94_94UG114_ACC_U88824
+CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGGATGGA
+TGGCCCAAAAGTTAAACAATGGCCGTTGACAGAAGAAAAAATAAAAGCAC
+TAATAGAAATTTGTTCAGAACTAGAAAAGGAAGGAAAAATTTCAAAAATT
+GGGCCTGAAAACCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA
+CAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA
+CTCAAGACTTTTGGGAAGTTCAACTAGGAATACCACATCCTGCAGGGCTA
+AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGACGCATATTTTTC
+AGTTCCCTTACATGAAGACTTTAGAAAATATACCGCATTCACCATACCTA
+GTACAAACAATGAGACACCAGGAATTAGATATCAGTACAATGTGCTTCCA
+CAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT
+CTTAGAACCTTTTAGAAAACAAAATCCAGAAATGATTATCTATCAATACA
+TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAATA
+AAAATAGAGGAATTAAGGGGACACCTCTTGAAGTGGGGATTTACCACACC
+AGACAAAAAGTATCAGAAAGAACCCCCATTTCTTTGGATGGGTTATGAAC
+TCCATCCTGATAAGTGGACAGTACAGCCTATACATCTGCCAGAAAAGGAA
+AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGAAAATTAAATTGGGC
+AAGCCAGATTTATCCAGGAATTAAAGTAAGACAATTATGCAAATGCCTTA
+GGGGAGCCAAAGCACTGACAGAAGTAATACCACTGACAGCAGAAGCAGAA
+TTAGAACTGGCAGAAAACAGGGAAATACTAAAAGAACCAGTACATGGAGC
+GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAACAAGGGC
+AAGATCAATGGACATATCAAATATATCAAGAACAATATAAAAATCTGAAA
+ACAGGAAAGTATGCGAAAATGAGGGGTACCCACACTAATGATGTAAAACA
+ATTAACAGAGGCAGTGCAGAAAATAGCCCAAGAATGTATAGTAATATGGG
+GAAAGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA
+TGGTGGACAGAGTATTGGCAGGCCACCTGGATTCCTGAGTGGGAGTATGT
+CAACACCCCTCCTTTAGTTAAATTATGGTATCAGTTAGAGAAGGAACCCA
+TAGTAGGAGCAGAAACTTTC
b
diff -r 000000000000 -r ba95715078c9 test-data/tn93-out1.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tn93-out1.csv Fri Apr 23 03:05:08 2021 +0000
b
@@ -0,0 +1,29 @@
+B_FR_83_HXB2_ACC_K03455_5,B_US_83_RF_ACC_M17451,0.045156
+B_FR_83_HXB2_ACC_K03455_5,B_US_86_JRFL_ACC_U63632,0.0296218
+B_FR_83_HXB2_ACC_K03455_5,B_US_90_WEAU160_ACC_U21135,0.0327566
+B_FR_83_HXB2_ACC_K03455_5,D_CD_83_ELI_ACC_K03454_7,0.0669206
+B_FR_83_HXB2_ACC_K03455_5,D_CD_83_NDK_ACC_M27323,0.0592586
+B_FR_83_HXB2_ACC_K03455_5,D_CD_84_84ZR085_ACC_U88822,0.0663619
+B_FR_83_HXB2_ACC_K03455_5,D_UG_94_94UG114_ACC_U88824,0.0847988
+B_US_83_RF_ACC_M17451,B_US_86_JRFL_ACC_U63632,0.048328
+B_US_83_RF_ACC_M17451,B_US_90_WEAU160_ACC_U21135,0.0515908
+B_US_83_RF_ACC_M17451,D_CD_83_ELI_ACC_K03454_7,0.0810759
+B_US_83_RF_ACC_M17451,D_CD_83_NDK_ACC_M27323,0.0661066
+B_US_83_RF_ACC_M17451,D_CD_84_84ZR085_ACC_U88822,0.0769146
+B_US_83_RF_ACC_M17451,D_UG_94_94UG114_ACC_U88824,0.0955213
+B_US_86_JRFL_ACC_U63632,B_US_90_WEAU160_ACC_U21135,0.0408994
+B_US_86_JRFL_ACC_U63632,D_CD_83_ELI_ACC_K03454_7,0.0771797
+B_US_86_JRFL_ACC_U63632,D_CD_83_NDK_ACC_M27323,0.0609044
+B_US_86_JRFL_ACC_U63632,D_CD_84_84ZR085_ACC_U88822,0.0705011
+B_US_86_JRFL_ACC_U63632,D_UG_94_94UG114_ACC_U88824,0.0882054
+B_US_90_WEAU160_ACC_U21135,D_CD_83_ELI_ACC_K03454_7,0.0771856
+B_US_90_WEAU160_ACC_U21135,D_CD_83_NDK_ACC_M27323,0.0609097
+B_US_90_WEAU160_ACC_U21135,D_CD_84_84ZR085_ACC_U88822,0.0740203
+B_US_90_WEAU160_ACC_U21135,D_UG_94_94UG114_ACC_U88824,0.0890019
+D_CD_83_ELI_ACC_K03454_7,D_CD_83_NDK_ACC_M27323,0.0287246
+D_CD_83_ELI_ACC_K03454_7,D_CD_84_84ZR085_ACC_U88822,0.055948
+D_CD_83_ELI_ACC_K03454_7,D_UG_94_94UG114_ACC_U88824,0.0742033
+D_CD_83_NDK_ACC_M27323,D_CD_84_84ZR085_ACC_U88822,0.0491974
+D_CD_83_NDK_ACC_M27323,D_UG_94_94UG114_ACC_U88824,0.0726626
+D_CD_84_84ZR085_ACC_U88822,D_UG_94_94UG114_ACC_U88824,0.0805088
+ID1,ID2,Distance
b
diff -r 000000000000 -r ba95715078c9 test-data/tn93-out2.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tn93-out2.csv Fri Apr 23 03:05:08 2021 +0000
b
@@ -0,0 +1,17 @@
+B_FR_83_HXB2_ACC_K03455_5,D_CD_83_ELI_ACC_K03454_7,0.0669206
+B_FR_83_HXB2_ACC_K03455_5,D_CD_83_NDK_ACC_M27323,0.0592586
+B_FR_83_HXB2_ACC_K03455_5,D_CD_84_84ZR085_ACC_U88822,0.0663619
+B_FR_83_HXB2_ACC_K03455_5,D_UG_94_94UG114_ACC_U88824,0.0847988
+B_US_83_RF_ACC_M17451,D_CD_83_ELI_ACC_K03454_7,0.0810759
+B_US_83_RF_ACC_M17451,D_CD_83_NDK_ACC_M27323,0.0661066
+B_US_83_RF_ACC_M17451,D_CD_84_84ZR085_ACC_U88822,0.0769146
+B_US_83_RF_ACC_M17451,D_UG_94_94UG114_ACC_U88824,0.0955213
+B_US_86_JRFL_ACC_U63632,D_CD_83_ELI_ACC_K03454_7,0.0771797
+B_US_86_JRFL_ACC_U63632,D_CD_83_NDK_ACC_M27323,0.0609044
+B_US_86_JRFL_ACC_U63632,D_CD_84_84ZR085_ACC_U88822,0.0705011
+B_US_86_JRFL_ACC_U63632,D_UG_94_94UG114_ACC_U88824,0.0882054
+B_US_90_WEAU160_ACC_U21135,D_CD_83_ELI_ACC_K03454_7,0.0771856
+B_US_90_WEAU160_ACC_U21135,D_CD_83_NDK_ACC_M27323,0.0609097
+B_US_90_WEAU160_ACC_U21135,D_CD_84_84ZR085_ACC_U88822,0.0740203
+B_US_90_WEAU160_ACC_U21135,D_UG_94_94UG114_ACC_U88824,0.0890019
+ID1,ID2,Distance
b
diff -r 000000000000 -r ba95715078c9 tn93_cluster.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tn93_cluster.py Fri Apr 23 03:05:08 2021 +0000
[
@@ -0,0 +1,81 @@
+import argparse
+import json
+import os
+import shlex
+import shutil
+import subprocess
+import sys
+
+
+def cluster_to_fasta(json_file, fasta_file, reference_name=None):
+    with open(json_file, "r") as fh:
+        cluster_json = json.load(fh)
+        with open(fasta_file, "w") as fh2:
+            for c in cluster_json:
+                if reference_name is not None:
+                    if reference_name in c['members']:
+                        cc = c['centroid'].split('\n')
+                        cc[0] = ">" + reference_name
+                        print("\n".join(cc), file=fh2)
+                        continue
+                print(c['centroid'], file=fh2)
+
+    return(os.path.getmtime(fasta_file), len(cluster_json))
+
+
+def run_command(command):
+    proc = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout, stderr = proc.communicate()
+    result = proc.returncode
+    if result != 0:
+        print('Command `%s` failed with exit code %s\n' % (command, result), file=sys.stderr)
+        print('--------------------- STDOUT ---------------------')
+        print(stdout.decode().replace('\\n', '\n'))
+        print('------------------- END STDOUT -------------------')
+        print('--------------------- STDERR ---------------------', file=sys.stderr)
+        print(stderr.decode().replace('\\n', '\n'), file=sys.stderr)
+        print('------------------- END STDERR -------------------', file=sys.stderr)
+    return(int(result))
+
+
+def main(arguments):
+    threshold = arguments.threshold
+    step = threshold * 0.25
+    shutil.copy(arguments.input, os.path.join(os.getcwd(), 'reference_msa.fa'))
+    shutil.copy(arguments.input, os.path.join(os.getcwd(), 'reference_msa.fa.bak'))
+    with open(arguments.reference) as fh:
+        for line in fh:
+            if line[0] == '>':
+                _ref_seq_name = line[1:].split(' ')[0].strip()
+                break
+    while True and threshold <= 1:
+        command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f reference_msa.fa' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction)
+        return_code = run_command(command)
+        if return_code != 0:
+            return return_code
+        input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'reference_msa.fa.bak', _ref_seq_name)
+        if cluster_count <= arguments.cluster_count or threshold == 1:
+            break
+        else:
+            threshold += step
+        print('Found %d clusters at threshold %f' % (cluster_count, threshold))
+    shutil.copy('reference_msa.fa.bak', arguments.compressed)
+    shutil.copy('clusters.json', arguments.output)
+    os.remove('reference_msa.fa.bak')
+    return 0
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well')
+    parser.add_argument('--input', help='Input MSA', required=True, type=str)
+    parser.add_argument('--reference', help='Reference sequence', required=True, type=str)
+    parser.add_argument('--output', help='Input MSA', required=True, type=str)
+    parser.add_argument('--threshold', help='Threshold', required=True, type=float)
+    parser.add_argument('--ambigs', help='Handle ambigs', required=True, type=str)
+    parser.add_argument('--cluster-type', help='Cluster type', required=True, type=str)
+    parser.add_argument('--overlap', help='Overlap', required=True, type=int)
+    parser.add_argument('--fraction', help='Fraction', required=True, type=float)
+    parser.add_argument('--cluster-count', help='Max query', required=True, type=int)
+    parser.add_argument('--compressed', help='File to write compressed clusters to', required=True, type=str)
+    arguments = parser.parse_args()
+    exit(main(arguments))
b
diff -r 000000000000 -r ba95715078c9 tn93_filter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tn93_filter.py Fri Apr 23 03:05:08 2021 +0000
[
@@ -0,0 +1,43 @@
+import argparse
+import csv
+
+from Bio import SeqIO
+
+arguments = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well')
+
+arguments.add_argument('-f', '--reference', help='Reference sequence', required=True, type=str)
+arguments.add_argument('-d', '--distances', help='Calculated pairwise distances', required=True, type=str)
+arguments.add_argument('-r', '--reads', help='Output file for filtered reads', required=True, type=str)
+arguments.add_argument('-q', '--clusters', help='Compressed clusters', required=True, type=str)
+settings = arguments.parse_args()
+
+reference_name = 'REFERENCE'
+reference_seq = ''
+
+with open(settings.reference) as seq_fh:
+    for seq_record in SeqIO.parse(seq_fh, 'fasta'):
+        reference_name = seq_record.name
+        reference_seq = seq_record.seq
+        break
+
+with open(settings.distances) as fh:
+    reader = csv.reader(fh, delimiter=',')
+    next(reader)
+    seqs_to_filter = set()
+    for line in reader:
+        if line[1] not in seqs_to_filter:
+            seqs_to_filter.add(line[1])
+    if reference_name in seqs_to_filter:
+        seqs_to_filter.remove(reference_name)
+
+with open(settings.reads, "a+") as fh:
+    seqs_filtered = list()
+    for seq_record in SeqIO.parse(settings.clusters, "fasta"):
+        if seq_record.name not in seqs_to_filter:
+            if seq_record.name == reference_name:
+                if seq_record.name not in seqs_filtered:
+                    seqs_filtered.append(seq_record.name)
+                else:
+                    continue
+    if reference_name not in seqs_filtered:
+        fh.write('\n>REFERENCE\n%s' % reference_seq)
b
diff -r 000000000000 -r ba95715078c9 tn93_filter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tn93_filter.xml Fri Apr 23 03:05:08 2021 +0000
[
@@ -0,0 +1,47 @@
+<tool id="tn93_filter" name="TN93 Filter" version="@VERSION@">
+    <description>- remove sequences from a reference that are within a given distance of of a cluster</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@VERSION@">tn93</requirement>
+        <requirement type="package" version="1.70">biopython</requirement>
+    </requirements>
+    <version_command><![CDATA[tn93 --version]]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+    cat '$reads' > filtered_msa.fa &&
+    tn93 -o pairwise.csv -s filtered_msa.fa -t $threshold '$clusters' &&
+    python '$__tool_directory__/tn93_filter.py' --reference '$reference'
+        --distances pairwise.csv
+        --reads filtered_msa.fa
+        --clusters '$clusters'
+    ]]></command>
+    <inputs>
+        <param name="reads" type="data" format="fasta" label="Aligned reads" />
+        <param name="reference" type="data" format="fasta" label="Reference sequence" />
+        <param name="clusters" type="data" format="fasta" label="FASTA file with compressed clusters" />
+        <param name="threshold" type="float" value="0.015" label="Pairwise distance threshold" />
+    </inputs>
+    <outputs>
+        <data name="filtered_reference" format="fasta" from_work_dir="filtered_msa.fa" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="reads" value="filter-in1-reads.fa" />
+            <param name="reference" value="filter-in1-reference.fa" />
+            <param name="clusters" value="filter-in1-clusters.fa" />
+            <param name="threshold" value="0.35" />
+            <output name="filtered_reference" file="filter-out1.fasta" ftype="fasta" />
+        </test>
+    </tests>
+    <help><![CDATA[
+TN93-Filter
+===========
+
+Removes aligned sequences that are within a given distance from a reference
+sequence using the 1993 Tamura-Nei distance calculation.
+]]></help>
+    <expand macro="citations">
+        <citation type="doi">10.1093/oxfordjournals.molbev.a040023</citation>
+    </expand>
+</tool>