Repository 'sarscov2formatter'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/sarscov2formatter

Changeset 0:1c664ff29354 (2020-05-04)
Next changeset 1:e3a7995dce75 (2020-11-20)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sarscov2formatter commit a9dbee25f6ad053c0c9b78f85276c7c839d2ab7a"
added:
sarscov2formatter.xml
test-data/align.fasta
test-data/dups.json
test-data/meta.json
test-data/msa.fasta
b
diff -r 000000000000 -r 1c664ff29354 sarscov2formatter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sarscov2formatter.xml Mon May 04 05:39:35 2020 -0400
[
@@ -0,0 +1,66 @@
+<tool id="sarscov2formatter" name="sarscov2formatter" version="0.1" profile="18.01">
+    <requirements>
+        <requirement type="package" version="0.5.2">sarscov2formatter</requirement>
+    </requirements>
+    <command><![CDATA[
+sarscov2formatter
+-a $align
+#if $source.source_choice == 'ncbi':
+    -m ncbi
+#else:
+    -m '$source.meta'
+#end if
+    ]]></command>
+    <inputs>
+        <param name="align" type="data" format="fasta" label="Multiple Sequence Alignment (MSA)" help='MSA to be used in HyPhy analysis' />
+        <conditional name="source" >
+            <param name="source_choice" type="select" label="NCBI datasource or other?" >
+                <option value="ncbi">NCBI</option>
+                <option value="other">Other</option>
+            </param>
+            <when value="ncbi" />
+            <when value="other" >
+                 <param name="meta" type="data" format='tabular' label="Metadata source" help="Tabular file with metadata with the correct columns (see below)" />
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="msa" format="fasta" from_work_dir="msa.fasta" label="${tool.name}: MSA" />
+        <data name="dups" format="json" from_work_dir="duplicates.json" label="${tool.name}: Duplicates" />
+        <data name="outmeta" format="json" from_work_dir="meta.json" label="${tool.name}: Metadata" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="align" ftype="fasta" value="align.fasta" />
+            <param name="source_choice" value="ncbi" />
+            <output name="msa" ftype="fasta" compare="diff" value="msa.fasta" />
+            <output name="dups" ftype="json" compare="diff" value="dups.json" />
+            <output name="outmeta" ftype="json" compare="diff" value="meta.json" />
+        </test>
+    </tests>
+        <help><![CDATA[
+
+=================
+sarscov2formatter
+=================
+
+Custom sript that performs necessary formatting operations for the SARS-CoV2 Selection Analysis workflow.
+
+If using non-NCBI data, the metadata input file must be tabular with the following columns: ID, collection_date, country, state (optional), and locality (optional). Optional columns should still be created even if they are not used.
+
+Dates should be of the format: YYMMDD (example: May 1 2020 = 20200501).
+
+
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+            @misc{githubsarscov2formatter,
+            author = {Nicholas Keener},
+            year = {2020},
+            title = {sarscov2formatter},
+            publisher = {Github},
+            journal = {Github repository},
+            url = {https://github.com/nickeener/sarscov2formatter},
+        }</citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 1c664ff29354 test-data/align.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/align.fasta Mon May 04 05:39:35 2020 -0400
b
b'@@ -0,0 +1,910 @@\n+>NC_045512.2 Wuhan seafood market pneumonia virus isolate Wuhan-Hu-1, complete genome\n+ATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACC\n+AGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGAC\n+AAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCC\n+AATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGAT\n+AACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATA\n+ATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTT\n+AATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTT\n+TTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTAT\n+TCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAA\n+GGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTAT\n+TTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTT\n+TCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACT\n+TTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCT\n+GGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAAT\n+GAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAG\n+TGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTC\n+CAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAA\n+GTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAAC\n+TGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTAT\n+GGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTT\n+GTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGAT\n+TATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAAT\n+CTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAAT\n+CTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGT\n+AATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACT\n+AATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCA\n+CCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAAT\n+TTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTG\n+CCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAG\n+ACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCA\n+GGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTC\n+CCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCT\n+AATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATAT\n+GAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCT\n+CCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGT\n+GCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATT\n+AGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATG\n+TACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGT\n+ACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAA\n+GTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTT\n+AATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGAT\n+CTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGC\n+CTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTT\n+TTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGT\n+ACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATG\n+CAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAA\n+AAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCC\n+ACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAAC\n+ACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATC\n+CTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGA\n+CTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCT\n+TCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTT\n+GATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTA\n+GTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCC\n+ATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACA\n+CACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACA\n+TTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCT\n+TTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACA\n+TCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAA\n+AAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTC\n+CAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTT\n+ATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGC\n+TGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTG'..b'-CoV-2/human/USA/VA-DCLS-0063/2020, complete genome\n+ATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACC\n+AGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGAC\n+AAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCC\n+AATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGAT\n+AACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATA\n+ATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTT\n+AATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTT\n+TTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTAT\n+TCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAA\n+GGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTAT\n+TTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTT\n+TCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACT\n+TTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCT\n+GGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAAT\n+GAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAG\n+TGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTC\n+CAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAA\n+GTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAAC\n+TGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTAT\n+GGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTT\n+GTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGAT\n+TATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAAT\n+CTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAAT\n+CTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGT\n+AATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACT\n+AATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCA\n+CCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAAT\n+TTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTG\n+CCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAG\n+ACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCA\n+GGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTC\n+CCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCT\n+AATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATAT\n+GAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCT\n+CCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGT\n+GCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATT\n+AGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATG\n+TACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGT\n+ACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAA\n+GTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTT\n+AATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGAT\n+CTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGC\n+CTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTT\n+TTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGT\n+ACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATG\n+CAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAA\n+AAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCC\n+ACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAAC\n+ACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATC\n+CTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGA\n+CTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCT\n+TCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTT\n+GATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTA\n+GTCTTCTTGCATTTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCC\n+ATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACA\n+CACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACA\n+TTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCT\n+TTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACA\n+TCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAA\n+AAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTC\n+CAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTT\n+ATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGC\n+TGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGAC\n+TCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n'
b
diff -r 000000000000 -r 1c664ff29354 test-data/dups.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dups.json Mon May 04 05:39:35 2020 -0400
b
@@ -0,0 +1,1 @@
+{"MultiSeq0_24": {"1": "MT019530", "2": "MT027062", "3": "MT159705", "4": "MT159710", "5": "MT192773", "6": "MT240479", "7": "MT246472", "8": "MT253699", "9": "MT259275", "10": "MT262910", "11": "MT262912", "12": "MT263399", "13": "MT263435", "14": "MT263451", "15": "MT304482", "16": "MT304483", "17": "MT326151", "18": "MT325578", "19": "MT325609", "20": "MT334533", "21": "MT334534", "22": "MT350276"}, "MultiSeq1_15": {"1": "MT276327", "2": "MT293173", "3": "MT293195", "4": "MT326113", "5": "MT326162", "6": "MT326189", "7": "MT325626", "8": "MT345825", "9": "MT345827", "10": "MT345834", "11": "MT345835", "12": "MT345857", "13": "MT344957"}, "MT350282": {"0": "MT350282"}, "MT345866": {"0": "MT345866"}, "MT293207": {"0": "MT293207"}, "MT292573": {"0": "MT292573"}, "MT292570": {"0": "MT292570"}, "MT263469": {"0": "MT263469"}, "MT263450": {"0": "MT263450"}, "MT263436": {"0": "MT263436"}, "MN988713": {"0": "MN988713"}}
\ No newline at end of file
b
diff -r 000000000000 -r 1c664ff29354 test-data/meta.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meta.json Mon May 04 05:39:35 2020 -0400
b
@@ -0,0 +1,1 @@
+{"NC_045512": {"collected": "20200117", "location": {"subregion": "Asia", "country": "China", "state": null, "locality": null}}, "MT350282": {"collected": "20200318", "location": {"subregion": "South America", "country": "Brazil", "state": null, "locality": null}}, "MT350276": {"collected": "20200319", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT345866": {"collected": "20200323", "location": {"subregion": "North America", "country": "USA", "state": "ID", "locality": null}}, "MT345857": {"collected": "20200324", "location": {"subregion": "North America", "country": "USA", "state": "ID", "locality": null}}, "MT345835": {"collected": "20200325", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT345834": {"collected": "20200326", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT345827": {"collected": "20200325", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT345825": {"collected": "20200325", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT344957": {"collected": "20200307", "location": {"subregion": "North America", "country": "USA", "state": "PA", "locality": null}}, "MT334534": {"collected": "20200313", "location": {"subregion": "North America", "country": "USA", "state": "UT", "locality": null}}, "MT334533": {"collected": "20200313", "location": {"subregion": "North America", "country": "USA", "state": "UT", "locality": null}}, "MT326189": {"collected": "20200318", "location": {"subregion": "North America", "country": "USA", "state": null, "locality": null}}, "MT326162": {"collected": "20200321", "location": {"subregion": "North America", "country": "USA", "state": null, "locality": null}}, "MT326151": {"collected": "20200321", "location": {"subregion": "North America", "country": "USA", "state": null, "locality": null}}, "MT326113": {"collected": "20200320", "location": {"subregion": "North America", "country": "USA", "state": null, "locality": null}}, "MT325626": {"collected": "20200305", "location": {"subregion": "North America", "country": "USA", "state": "SC", "locality": null}}, "MT325609": {"collected": "20200305", "location": {"subregion": "North America", "country": "USA", "state": "UT", "locality": null}}, "MT325578": {"collected": "20200307", "location": {"subregion": "North America", "country": "USA", "state": "IL", "locality": null}}, "MT304483": {"collected": "20200301", "location": {"subregion": "North America", "country": "USA", "state": "IL", "locality": null}}, "MT304482": {"collected": "20200301", "location": {"subregion": "North America", "country": "USA", "state": "IL", "locality": null}}, "MT293207": {"collected": "20200319", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT293195": {"collected": "20200328", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT293173": {"collected": "20200330", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT292573": {"collected": "20200309", "location": {"subregion": "Europe", "country": "Spain", "state": null, "locality": null}}, "MT292570": {"collected": "20200310", "location": {"subregion": "Europe", "country": "Spain", "state": null, "locality": null}}, "MT276327": {"collected": "20200229", "location": {"subregion": "North America", "country": "USA", "state": "GA", "locality": null}}, "MT263469": {"collected": "20200315", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT263467": {"collected": "20200316", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT263451": {"collected": "20200316", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT263450": {"collected": "20200324", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT263436": {"collected": "20200324", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT263435": {"collected": "20200324", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT263399": {"collected": "20200324", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT262912": {"collected": "20200313", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT262910": {"collected": "20200313", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT259275": {"collected": "20200314", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT253699": {"collected": "20200124", "location": {"subregion": "Asia", "country": "China", "state": "Zhejiang", "locality": "Hangzhou"}}, "MT246472": {"collected": "20200312", "location": {"subregion": "North America", "country": "USA", "state": "WA", "locality": null}}, "MT240479": {"collected": "20200304", "location": {"subregion": "Asia", "country": "Pakistan", "state": "Gilgit", "locality": null}}, "MT192773": {"collected": "20200122", "location": {"subregion": "Asia", "country": "Vietnam", "state": "Ho Chi Minh city", "locality": null}}, "MT159710": {"collected": "20200217", "location": {"subregion": "North America", "country": "USA", "state": null, "locality": null}}, "MT159705": {"collected": "20200217", "location": {"subregion": "North America", "country": "USA", "state": null, "locality": null}}, "MT027062": {"collected": "20200129", "location": {"subregion": "North America", "country": "USA", "state": "CA", "locality": null}}, "MT019530": {"collected": "20191230", "location": {"subregion": "Asia", "country": "China", "state": "Hubei", "locality": "Wuhan"}}, "MN988713": {"collected": "20200121", "location": {"subregion": "North America", "country": "USA", "state": "Illinois", "locality": null}}, "LR757995": {"collected": "20191226", "location": {"subregion": "Asia", "country": "China", "state": "uhan", "locality": null}}}
\ No newline at end of file
b
diff -r 000000000000 -r 1c664ff29354 test-data/msa.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/msa.fasta Mon May 04 05:39:35 2020 -0400
b
b'@@ -0,0 +1,715 @@\n+>MN988713\n+ATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACC\n+AGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGAC\n+AAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCC\n+AATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGAT\n+AACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATA\n+ATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTT\n+AATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTT\n+TTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTAT\n+TCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAA\n+GGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTAT\n+TTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTT\n+TCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACT\n+TTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCT\n+GGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAAT\n+GAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAG\n+TGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTC\n+CAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAA\n+GTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAAC\n+TGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTAT\n+GGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTT\n+GTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGAT\n+TATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAAT\n+CTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAAT\n+CTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGT\n+AATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACT\n+AATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCA\n+CCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAAT\n+TTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTG\n+CCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAG\n+ACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCA\n+GGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTC\n+CCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCT\n+AATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATAT\n+GAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCT\n+CCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGT\n+GCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATT\n+AGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATG\n+TACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGT\n+ACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAA\n+GTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTT\n+AATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGAT\n+CTACTTTTCAAYAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGC\n+CTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTT\n+TTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGT\n+ACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATG\n+CAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAA\n+AAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCC\n+ACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAAC\n+ACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATC\n+CTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGA\n+CTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCT\n+TCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTT\n+GATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTA\n+GTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCC\n+ATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACA\n+CACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACA\n+TTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCT\n+TTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACA\n+TCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAA\n+AAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTC\n+CAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTT\n+ATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGC\n+TGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGAC\n+TCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n+>MultiSeq0_24\n+ATGTTTG'..b'+TCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n+>MT350282\n+ATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACC\n+AGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGAC\n+AAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCC\n+AATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAAAGGTACTAAGAGGTTTGAT\n+AACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATA\n+ATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTT\n+AATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTT\n+TTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTAT\n+TCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAA\n+GGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTAT\n+TTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTT\n+TCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACT\n+TTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCT\n+GGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAAT\n+GAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAG\n+TGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTC\n+CAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAA\n+GTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAAC\n+TGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTAT\n+GGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTT\n+GTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGAT\n+TATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAAT\n+CTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAAT\n+CTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGT\n+AATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACT\n+AATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCA\n+CCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAAT\n+TTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTG\n+CCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAG\n+ACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCA\n+GGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTC\n+CCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCT\n+AATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATAT\n+GAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCT\n+CCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGT\n+GCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATT\n+AGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATG\n+TACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGT\n+ACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAA\n+GTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTT\n+AATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGAT\n+CTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGC\n+CTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTT\n+TTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGT\n+ACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATG\n+CAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAA\n+AAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCC\n+ACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAAC\n+ACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATC\n+CTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGA\n+CTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCT\n+TCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTT\n+GATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTA\n+GTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCC\n+ATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACA\n+CACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACA\n+TTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCT\n+TTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACA\n+TCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAA\n+AAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTC\n+CAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTT\n+ATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGC\n+TGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGAC\n+TCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n'