Repository 'sarscov2formatter'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/sarscov2formatter

Changeset 2:2e993ff8e7dc (2021-11-22)
Previous changeset 1:e3a7995dce75 (2020-11-20)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sarscov2formatter commit dc4d978a42ef881f602b813f9416fd37e3ea3b6a"
modified:
sarscov2formatter.xml
test-data/test.tsv
added:
test-data/dups-other.json
test-data/meta-other.json
test-data/msa-other.fasta
test-data/test.json
b
diff -r e3a7995dce75 -r 2e993ff8e7dc sarscov2formatter.xml
--- a/sarscov2formatter.xml Fri Nov 20 18:21:33 2020 +0000
+++ b/sarscov2formatter.xml Mon Nov 22 10:41:59 2021 +0000
[
@@ -1,12 +1,18 @@
-<tool id="sarscov2formatter" name="sarscov2formatter" version="0.5.3+galaxy1" profile="18.01">
+<tool id="sarscov2formatter" name="sarscov2formatter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="18.01">
+    <macros>
+        <token name="@TOOL_VERSION@">1.0</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
     <requirements>
-        <requirement type="package" version="0.5.3">sarscov2formatter</requirement>
+        <requirement type="package" version="@TOOL_VERSION@">sarscov2formatter</requirement>
     </requirements>
     <command><![CDATA[
 sarscov2formatter
 -a '$align'
 #if $source.source_choice == 'ncbi':
-    -m ncbi
+    #if $source.meta
+        -n '$source.meta'
+    #end if
 #else:
     -m '$source.meta'
 #end if
@@ -18,9 +24,11 @@
                 <option value="ncbi">NCBI</option>
                 <option value="other">Other</option>
             </param>
-            <when value="ncbi" />
+            <when value="ncbi">
+                 <param name="meta" type="data" format="json" optional="true" label="Metadata from NCBI" help="If not given it will be downloaded" />
+            </when>
             <when value="other" >
-                 <param name="meta" type="data" format="tabular" label="Metadata source" help="Tabular file with metadata with the correct columns (see below)" />
+                 <param name="meta" type="data" format="tabular" label="Metadata" help="Tabular file with metadata with the correct columns (see below)" />
             </when>
         </conditional>
     </inputs>
@@ -30,12 +38,12 @@
         <data name="outmeta" format="json" from_work_dir="meta.json" label="${tool.name}: Metadata" />
     </outputs>
     <tests>
-        <!-- note: test with ncbi yields unstable results for meta -> assert contents.
-             also needs a surprising amount memory (1.7G) which might
-             become impractival in the future -->
+        <!-- note: test with ncbi yields unstable results for meta -> assert contents. -->
         <test>
             <param name="align" ftype="fasta" value="align.fasta" />
-            <param name="source_choice" value="ncbi" />
+            <conditional name="source" >
+                <param name="source_choice" value="ncbi" />
+            </conditional>
             <output name="msa" ftype="fasta" compare="diff" value="msa.fasta" />
             <output name="dups" ftype="json" compare="diff" value="dups.json" />
             <output name="outmeta" ftype="json">
@@ -53,19 +61,41 @@
                 </assert_contents>
             </output>
         </test>
-        <!-- TODO test with tabular input, does not work yet
-             test.tsv has been generated with test.py
-             https://github.com/nickeener/sarscov2formatter/issues/2 -->
-<!--        <test>-->
-<!--            <param name="align" ftype="fasta" value="align.fasta" />-->
-<!--            <conditional name="source" >-->
-<!--                <param name="source_choice" value="other" />-->
-<!--                <param name="meta" ftype="tabular" value="test.tsv" />-->
-<!--            </conditional>-->
-<!--            <output name="msa" ftype="fasta" compare="diff" value="msa-other.fasta" />-->
-<!--            <output name="dups" ftype="json" compare="diff" value="dups-other.json" />-->
-<!--            <output name="outmeta" ftype="json" compare="diff" value="meta-other.json" />-->
-<!--        </test>-->
+        <!-- test with local NCBI data, here a json file containing only the entries relevant for the fasta -->
+        <test>
+            <param name="align" ftype="fasta" value="align.fasta" />
+            <conditional name="source" >
+                <param name="source_choice" value="ncbi" />
+                <param name="meta" ftype="json" value="test.json" />
+            </conditional>
+            <output name="msa" ftype="fasta" compare="diff" value="msa.fasta" />
+            <output name="dups" ftype="json" compare="diff" value="dups.json" />
+            <output name="outmeta" ftype="json">
+                <assert_contents>
+                    <has_line_matching expression="\{"/>
+                    <has_line_matching expression="\}"/>
+                    <has_text_matching expression='"LR757995": \{'/>
+                    <has_text_matching expression='"collected": '/>
+                    <has_text_matching expression='"collected": '/>
+                    <has_text_matching expression='"location": '/>
+                    <has_text_matching expression='"country": '/>
+                    <has_text_matching expression='"locality": '/>
+                    <has_text_matching expression='"state": '/>
+                    <has_text_matching expression='"subregion": '/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- TODO test with tabular input -->
+       <test>
+           <param name="align" ftype="fasta" value="align.fasta" />
+           <conditional name="source" >
+               <param name="source_choice" value="other" />
+               <param name="meta" ftype="tabular" value="test.tsv" />
+           </conditional>
+           <output name="msa" ftype="fasta" compare="diff" value="msa-other.fasta" />
+           <output name="dups" ftype="json" compare="diff" value="dups-other.json" />
+           <output name="outmeta" ftype="json" compare="diff" value="meta-other.json" />
+       </test>
     </tests>
         <help><![CDATA[
 
@@ -75,11 +105,13 @@
 
 Custom sript that performs necessary formatting operations for the SARS-CoV2 Selection Analysis workflow.
 
+If using NCBI as data source the file can be obtained from https://www.ncbi.nlm.nih.gov/projects/genome/sars-cov-2-seqs/ncov-sequences.yaml.
+If not given the tool will download it automatically.
+
 If using non-NCBI data, the metadata input file must be tabular with the following columns: ID, collection_date, country, state (optional), and locality (optional). Optional columns should still be created even if they are not used.
 
 Dates should be of the format: YYMMDD (example: May 1 2020 = 20200501).
 
-
     ]]></help>
     <citations>
         <citation type="bibtex">
b
diff -r e3a7995dce75 -r 2e993ff8e7dc test-data/dups-other.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dups-other.json Mon Nov 22 10:41:59 2021 +0000
b
@@ -0,0 +1,68 @@
+{
+ "MN988713": {
+  "0": "MN988713"
+ },
+ "MT263436": {
+  "0": "MT263436"
+ },
+ "MT263450": {
+  "0": "MT263450"
+ },
+ "MT263469": {
+  "0": "MT263469"
+ },
+ "MT292570": {
+  "0": "MT292570"
+ },
+ "MT292573": {
+  "0": "MT292573"
+ },
+ "MT293207": {
+  "0": "MT293207"
+ },
+ "MT345866": {
+  "0": "MT345866"
+ },
+ "MT350282": {
+  "0": "MT350282"
+ },
+ "MultiSeq0_24": {
+  "1": "MT019530",
+  "10": "MT262910",
+  "11": "MT262912",
+  "12": "MT263399",
+  "13": "MT263435",
+  "14": "MT263451",
+  "15": "MT304482",
+  "16": "MT304483",
+  "17": "MT326151",
+  "18": "MT325578",
+  "19": "MT325609",
+  "2": "MT027062",
+  "20": "MT334533",
+  "21": "MT334534",
+  "22": "MT350276",
+  "3": "MT159705",
+  "4": "MT159710",
+  "5": "MT192773",
+  "6": "MT240479",
+  "7": "MT246472",
+  "8": "MT253699",
+  "9": "MT259275"
+ },
+ "MultiSeq1_15": {
+  "1": "MT276327",
+  "10": "MT345834",
+  "11": "MT345835",
+  "12": "MT345857",
+  "13": "MT344957",
+  "2": "MT293173",
+  "3": "MT293195",
+  "4": "MT326113",
+  "5": "MT326162",
+  "6": "MT326189",
+  "7": "MT325626",
+  "8": "MT345825",
+  "9": "MT345827"
+ }
+}
\ No newline at end of file
b
diff -r e3a7995dce75 -r 2e993ff8e7dc test-data/meta-other.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meta-other.json Mon Nov 22 10:41:59 2021 +0000
b
b'@@ -0,0 +1,425 @@\n+{\n+ "LR757995": {\n+  "collected": "20191226",\n+  "location": {\n+   "country": "China",\n+   "locality": "None",\n+   "state": "Wuhan",\n+   "subregion": "Asia"\n+  }\n+ },\n+ "MN988713": {\n+  "collected": "20200121",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "Illinois",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT019530": {\n+  "collected": "20191230",\n+  "location": {\n+   "country": "China",\n+   "locality": "Wuhan",\n+   "state": "Hubei",\n+   "subregion": "Asia"\n+  }\n+ },\n+ "MT027062": {\n+  "collected": "20200129",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "CA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT159705": {\n+  "collected": "20200217",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "None",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT159710": {\n+  "collected": "20200217",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "None",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT192773": {\n+  "collected": "20200122",\n+  "location": {\n+   "country": "Vietnam",\n+   "locality": "None",\n+   "state": "Ho Chi Minh city",\n+   "subregion": "Asia"\n+  }\n+ },\n+ "MT240479": {\n+  "collected": "20200304",\n+  "location": {\n+   "country": "Pakistan",\n+   "locality": "None",\n+   "state": "Gilgit",\n+   "subregion": "Asia"\n+  }\n+ },\n+ "MT246472": {\n+  "collected": "20200312",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT253699": {\n+  "collected": "20200124",\n+  "location": {\n+   "country": "China",\n+   "locality": "Hangzhou",\n+   "state": "Zhejiang",\n+   "subregion": "Asia"\n+  }\n+ },\n+ "MT259275": {\n+  "collected": "20200314",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT262910": {\n+  "collected": "20200313",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT262912": {\n+  "collected": "20200313",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT263399": {\n+  "collected": "20200324",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT263435": {\n+  "collected": "20200324",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT263436": {\n+  "collected": "20200324",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT263450": {\n+  "collected": "20200324",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT263451": {\n+  "collected": "20200316",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT263467": {\n+  "collected": "20200316",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT263469": {\n+  "collected": "20200315",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT276327": {\n+  "collected": "20200229",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "GA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT292570": {\n+  "collected": "20200310",\n+  "location": {\n+   "country": "Spain",\n+   "locality": "None",\n+   "state": "None",\n+   "subregion": "Europe"\n+  }\n+ },\n+ "MT292573": {\n+  "collected": "20200309",\n+  "location": {\n+   "country": "Spain",\n+   "locality": "None",\n+   "state": "None",\n+   "subregion": "Europe"\n+  }\n+ },\n+ "MT293173": {\n+  "collected": "20200330",\n+  "location": {\n+   "country": "USA",'..b'": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT293195": {\n+  "collected": "20200328",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT293207": {\n+  "collected": "20200319",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT304482": {\n+  "collected": "20200301",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "IL",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT304483": {\n+  "collected": "20200301",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "IL",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT325578": {\n+  "collected": "20200307",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "IL",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT325609": {\n+  "collected": "20200305",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "UT",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT325626": {\n+  "collected": "20200305",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "SC",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT326113": {\n+  "collected": "20200320",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "None",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT326151": {\n+  "collected": "20200321",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "None",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT326162": {\n+  "collected": "20200321",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "None",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT326189": {\n+  "collected": "20200318",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "None",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT334533": {\n+  "collected": "20200313",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "UT",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT334534": {\n+  "collected": "20200313",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "UT",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT344957": {\n+  "collected": "20200307",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "PA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT345825": {\n+  "collected": "20200325",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT345827": {\n+  "collected": "20200325",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT345834": {\n+  "collected": "20200326",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT345835": {\n+  "collected": "20200325",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT345857": {\n+  "collected": "20200324",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "ID",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT345866": {\n+  "collected": "20200323",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "ID",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT350276": {\n+  "collected": "20200319",\n+  "location": {\n+   "country": "USA",\n+   "locality": "None",\n+   "state": "WA",\n+   "subregion": "North America"\n+  }\n+ },\n+ "MT350282": {\n+  "collected": "20200318",\n+  "location": {\n+   "country": "Brazil",\n+   "locality": "None",\n+   "state": "None",\n+   "subregion": "South America"\n+  }\n+ },\n+ "NC_045512": {\n+  "collected": "20200117",\n+  "location": {\n+   "country": "China",\n+   "locality": "None",\n+   "state": "None",\n+   "subregion": "Asia"\n+  }\n+ }\n+}\n\\ No newline at end of file\n'
b
diff -r e3a7995dce75 -r 2e993ff8e7dc test-data/msa-other.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/msa-other.fasta Mon Nov 22 10:41:59 2021 +0000
b
b'@@ -0,0 +1,715 @@\n+>MN988713\n+ATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACC\n+AGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGAC\n+AAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCC\n+AATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGAT\n+AACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATA\n+ATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTT\n+AATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTT\n+TTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTAT\n+TCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAA\n+GGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTAT\n+TTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTT\n+TCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACT\n+TTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCT\n+GGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAAT\n+GAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAG\n+TGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTC\n+CAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAA\n+GTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAAC\n+TGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTAT\n+GGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTT\n+GTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGAT\n+TATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAAT\n+CTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAAT\n+CTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGT\n+AATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACT\n+AATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCA\n+CCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAAT\n+TTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTG\n+CCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAG\n+ACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCA\n+GGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTC\n+CCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCT\n+AATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATAT\n+GAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCT\n+CCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGT\n+GCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATT\n+AGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATG\n+TACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGT\n+ACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAA\n+GTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTT\n+AATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGAT\n+CTACTTTTCAAYAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGC\n+CTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTT\n+TTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGT\n+ACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATG\n+CAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAA\n+AAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCC\n+ACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAAC\n+ACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATC\n+CTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGA\n+CTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCT\n+TCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTT\n+GATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTA\n+GTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCC\n+ATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACA\n+CACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACA\n+TTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCT\n+TTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACA\n+TCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAA\n+AAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTC\n+CAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTT\n+ATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGC\n+TGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGAC\n+TCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n+>MultiSeq0_24\n+ATGTTTG'..b'+TCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n+>MT350282\n+ATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACC\n+AGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGAC\n+AAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCC\n+AATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAAAGGTACTAAGAGGTTTGAT\n+AACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATA\n+ATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTT\n+AATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTT\n+TTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTAT\n+TCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAA\n+GGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTAT\n+TTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTT\n+TCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACT\n+TTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCT\n+GGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAAT\n+GAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAG\n+TGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTC\n+CAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAA\n+GTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAAC\n+TGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTAT\n+GGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTT\n+GTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGAT\n+TATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAAT\n+CTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAAT\n+CTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGT\n+AATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACT\n+AATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCA\n+CCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAAT\n+TTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTG\n+CCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAG\n+ACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCA\n+GGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTC\n+CCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCT\n+AATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATAT\n+GAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCT\n+CCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGT\n+GCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATT\n+AGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATG\n+TACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGT\n+ACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAA\n+GTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTT\n+AATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGAT\n+CTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGC\n+CTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTT\n+TTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGT\n+ACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATG\n+CAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAA\n+AAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCC\n+ACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAAC\n+ACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATC\n+CTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGA\n+CTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCT\n+TCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTT\n+GATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTA\n+GTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCC\n+ATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACA\n+CACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACA\n+TTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCT\n+TTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACA\n+TCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAA\n+AAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTC\n+CAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTT\n+ATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGC\n+TGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGAC\n+TCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n'
b
diff -r e3a7995dce75 -r 2e993ff8e7dc test-data/test.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.json Mon Nov 22 10:41:59 2021 +0000
[
b'@@ -0,0 +1,307 @@\n+updated: "Tue Nov 16 07:52:53 EST 2021"\n+\n+genbank-sequences: [\n+  {\n+    "accession": "LR757995",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/LR757995\\">LR757995</a>",\n+    "collection-date": "2019-12-26",\n+    "country": "China:Wuhan"\n+  },\n+  {\n+    "accession": "MN988713",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MN988713\\">MN988713</a>",\n+    "collection-date": "2020-01-21",\n+    "country": "USA: Illinois"\n+  },\n+  {\n+    "accession": "MT019530",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT019530\\">MT019530</a>",\n+    "collection-date": "2019-12-30",\n+    "country": "China: Hubei, Wuhan"\n+  },\n+  {\n+    "accession": "MT027062",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT027062\\">MT027062</a>",\n+    "collection-date": "2020-01-29",\n+    "country": "USA: CA"\n+  },\n+  {\n+    "accession": "MT159705",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT159705\\">MT159705</a>",\n+    "collection-date": "2020-02-17",\n+    "country": "USA"\n+  },\n+  {\n+    "accession": "MT159710",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT159710\\">MT159710</a>",\n+    "collection-date": "2020-02-17",\n+    "country": "USA"\n+  },\n+  {\n+    "accession": "MT192773",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT192773\\">MT192773</a>",\n+    "collection-date": "2020-01-22",\n+    "country": "Viet Nam: Ho Chi Minh city"\n+  },\n+  {\n+    "accession": "MT240479",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT240479\\">MT240479</a>",\n+    "collection-date": "2020-03-04",\n+    "country": "Pakistan: Gilgit"\n+  },\n+  {\n+    "accession": "MT246472",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT246472\\">MT246472</a>",\n+    "collection-date": "2020-03-12",\n+    "country": "USA: WA"\n+  },\n+  {\n+    "accession": "MT253699",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT253699\\">MT253699</a>",\n+    "collection-date": "2020-01-24",\n+    "country": "China: Zhejiang, Hangzhou"\n+  },\n+  {\n+    "accession": "MT259275",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT259275\\">MT259275</a>",\n+    "collection-date": "2020-03-14",\n+    "country": "USA: WA"\n+  },\n+  {\n+    "accession": "MT262910",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT262910\\">MT262910</a>",\n+    "collection-date": "2020-03-13",\n+    "country": "USA: WA"\n+  },\n+  {\n+    "accession": "MT262912",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT262912\\">MT262912</a>",\n+    "collection-date": "2020-03-13",\n+    "country": "USA: WA"\n+  },\n+  {\n+    "accession": "MT263399",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT263399\\">MT263399</a>",\n+    "collection-date": "2020-03-24",\n+    "country": "USA: WA"\n+  },\n+  {\n+    "accession": "MT263435",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT263435\\">MT263435</a>",\n+    "collection-date": "2020-03-24",\n+    "country": "USA: WA"\n+  },\n+  {\n+    "accession": "MT263436",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT263436\\">MT263436</a>",\n+    "collection-date": "2020-03-24",\n+    "country": "USA: WA"\n+  },\n+  {\n+    "accession": "MT263450",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT263450\\">MT263450</a>",\n+    "collection-date": "2020-03-24",\n+    "country": "USA: WA"\n+  },\n+  {\n+    "accession": "MT263451",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT263451\\">MT263451</a>",\n+    "collection-date": "2020-03-16",\n+    "country": "USA: WA"\n+  },\n+  {\n+    "accession": "MT263467",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT263467\\">MT263467</a>",\n+    "collection-date": "2020-03-16",\n+    "country": "USA: WA"\n+  },\n+  {\n+    "accessi'..b'accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT325609\\">MT325609</a>",\n+    "collection-date": "2020-03-05",\n+    "country": "USA: UT"\n+  },\n+  {\n+    "accession": "MT325626",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT325626\\">MT325626</a>",\n+    "collection-date": "2020-03-05",\n+    "country": "USA: SC"\n+  },\n+  {\n+    "accession": "MT326113",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT326113\\">MT326113</a>",\n+    "collection-date": "2020-03-20",\n+    "country": "USA"\n+  },\n+  {\n+    "accession": "MT326151",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT326151\\">MT326151</a>",\n+    "collection-date": "2020-03-21",\n+    "country": "USA"\n+  },\n+  {\n+    "accession": "MT326162",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT326162\\">MT326162</a>",\n+    "collection-date": "2020-03-21",\n+    "country": "USA"\n+  },\n+  {\n+    "accession": "MT326189",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT326189\\">MT326189</a>",\n+    "collection-date": "2020-03-18",\n+    "country": "USA"\n+  },\n+  {\n+    "accession": "MT334533",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT334533\\">MT334533</a>",\n+    "collection-date": "2020-03-13",\n+    "country": "USA: UT"\n+  },\n+  {\n+    "accession": "MT334534",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT334534\\">MT334534</a>",\n+    "collection-date": "2020-03-13",\n+    "country": "USA: UT"\n+  },\n+  {\n+    "accession": "MT344946",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT344946\\">MT344946</a>",\n+    "collection-date": "2020-03",\n+    "country": "USA: GA"\n+  },\n+  {\n+    "accession": "MT344957",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT344957\\">MT344957</a>",\n+    "collection-date": "2020-03-07",\n+    "country": "USA: PA"\n+  },\n+  {\n+    "accession": "MT345825",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT345825\\">MT345825</a>",\n+    "collection-date": "2020-03-25",\n+    "country": "USA: WA"\n+  },\n+  {\n+    "accession": "MT345827",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT345827\\">MT345827</a>",\n+    "collection-date": "2020-03-25",\n+    "country": "USA: WA"\n+  },\n+  {\n+    "accession": "MT345834",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT345834\\">MT345834</a>",\n+    "collection-date": "2020-03-26",\n+    "country": "USA: WA"\n+  },\n+  {\n+    "accession": "MT345835",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT345835\\">MT345835</a>",\n+    "collection-date": "2020-03-25",\n+    "country": "USA: WA"\n+  },\n+  {\n+    "accession": "MT345857",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT345857\\">MT345857</a>",\n+    "collection-date": "2020-03-24",\n+    "country": "USA: ID"\n+  },\n+  {\n+    "accession": "MT345866",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT345866\\">MT345866</a>",\n+    "collection-date": "2020-03-23",\n+    "country": "USA: ID"\n+  },\n+  {\n+    "accession": "MT350252",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT350252\\">MT350252</a>",\n+    "collection-date": "2020-03",\n+    "country": "USA: VA"\n+  },\n+  {\n+    "accession": "MT350255",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT350255\\">MT350255</a>",\n+    "collection-date": "2020-04",\n+    "country": "USA: VA"\n+  },\n+  {\n+    "accession": "MT350276",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT350276\\">MT350276</a>",\n+    "collection-date": "2020-03-19",\n+    "country": "USA: WA"\n+  },\n+  {\n+    "accession": "MT350282",\n+    "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT350282\\">MT350282</a>",\n+    "collection-date": "2020-03-18",\n+    "country": "Brazil"\n+  }\n+]\n+\n+sra-accessions: [\n+]\n'
b
diff -r e3a7995dce75 -r 2e993ff8e7dc test-data/test.tsv
--- a/test-data/test.tsv Fri Nov 20 18:21:33 2020 +0000
+++ b/test-data/test.tsv Mon Nov 22 10:41:59 2021 +0000
b
@@ -1,5 +1,5 @@
 ID collection_date country state locality
-LR757995 20191226 China uhan None
+LR757995 20191226 China Wuhan None
 MT027062 20200129 USA CA None
 MT325626 20200305 USA SC None
 MT325609 20200305 USA UT None