Previous changeset 1:e3a7995dce75 (2020-11-20) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sarscov2formatter commit dc4d978a42ef881f602b813f9416fd37e3ea3b6a" |
modified:
sarscov2formatter.xml test-data/test.tsv |
added:
test-data/dups-other.json test-data/meta-other.json test-data/msa-other.fasta test-data/test.json |
b |
diff -r e3a7995dce75 -r 2e993ff8e7dc sarscov2formatter.xml --- a/sarscov2formatter.xml Fri Nov 20 18:21:33 2020 +0000 +++ b/sarscov2formatter.xml Mon Nov 22 10:41:59 2021 +0000 |
[ |
@@ -1,12 +1,18 @@ -<tool id="sarscov2formatter" name="sarscov2formatter" version="0.5.3+galaxy1" profile="18.01"> +<tool id="sarscov2formatter" name="sarscov2formatter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="18.01"> + <macros> + <token name="@TOOL_VERSION@">1.0</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> <requirements> - <requirement type="package" version="0.5.3">sarscov2formatter</requirement> + <requirement type="package" version="@TOOL_VERSION@">sarscov2formatter</requirement> </requirements> <command><![CDATA[ sarscov2formatter -a '$align' #if $source.source_choice == 'ncbi': - -m ncbi + #if $source.meta + -n '$source.meta' + #end if #else: -m '$source.meta' #end if @@ -18,9 +24,11 @@ <option value="ncbi">NCBI</option> <option value="other">Other</option> </param> - <when value="ncbi" /> + <when value="ncbi"> + <param name="meta" type="data" format="json" optional="true" label="Metadata from NCBI" help="If not given it will be downloaded" /> + </when> <when value="other" > - <param name="meta" type="data" format="tabular" label="Metadata source" help="Tabular file with metadata with the correct columns (see below)" /> + <param name="meta" type="data" format="tabular" label="Metadata" help="Tabular file with metadata with the correct columns (see below)" /> </when> </conditional> </inputs> @@ -30,12 +38,12 @@ <data name="outmeta" format="json" from_work_dir="meta.json" label="${tool.name}: Metadata" /> </outputs> <tests> - <!-- note: test with ncbi yields unstable results for meta -> assert contents. - also needs a surprising amount memory (1.7G) which might - become impractival in the future --> + <!-- note: test with ncbi yields unstable results for meta -> assert contents. --> <test> <param name="align" ftype="fasta" value="align.fasta" /> - <param name="source_choice" value="ncbi" /> + <conditional name="source" > + <param name="source_choice" value="ncbi" /> + </conditional> <output name="msa" ftype="fasta" compare="diff" value="msa.fasta" /> <output name="dups" ftype="json" compare="diff" value="dups.json" /> <output name="outmeta" ftype="json"> @@ -53,19 +61,41 @@ </assert_contents> </output> </test> - <!-- TODO test with tabular input, does not work yet - test.tsv has been generated with test.py - https://github.com/nickeener/sarscov2formatter/issues/2 --> -<!-- <test>--> -<!-- <param name="align" ftype="fasta" value="align.fasta" />--> -<!-- <conditional name="source" >--> -<!-- <param name="source_choice" value="other" />--> -<!-- <param name="meta" ftype="tabular" value="test.tsv" />--> -<!-- </conditional>--> -<!-- <output name="msa" ftype="fasta" compare="diff" value="msa-other.fasta" />--> -<!-- <output name="dups" ftype="json" compare="diff" value="dups-other.json" />--> -<!-- <output name="outmeta" ftype="json" compare="diff" value="meta-other.json" />--> -<!-- </test>--> + <!-- test with local NCBI data, here a json file containing only the entries relevant for the fasta --> + <test> + <param name="align" ftype="fasta" value="align.fasta" /> + <conditional name="source" > + <param name="source_choice" value="ncbi" /> + <param name="meta" ftype="json" value="test.json" /> + </conditional> + <output name="msa" ftype="fasta" compare="diff" value="msa.fasta" /> + <output name="dups" ftype="json" compare="diff" value="dups.json" /> + <output name="outmeta" ftype="json"> + <assert_contents> + <has_line_matching expression="\{"/> + <has_line_matching expression="\}"/> + <has_text_matching expression='"LR757995": \{'/> + <has_text_matching expression='"collected": '/> + <has_text_matching expression='"collected": '/> + <has_text_matching expression='"location": '/> + <has_text_matching expression='"country": '/> + <has_text_matching expression='"locality": '/> + <has_text_matching expression='"state": '/> + <has_text_matching expression='"subregion": '/> + </assert_contents> + </output> + </test> + <!-- TODO test with tabular input --> + <test> + <param name="align" ftype="fasta" value="align.fasta" /> + <conditional name="source" > + <param name="source_choice" value="other" /> + <param name="meta" ftype="tabular" value="test.tsv" /> + </conditional> + <output name="msa" ftype="fasta" compare="diff" value="msa-other.fasta" /> + <output name="dups" ftype="json" compare="diff" value="dups-other.json" /> + <output name="outmeta" ftype="json" compare="diff" value="meta-other.json" /> + </test> </tests> <help><![CDATA[ @@ -75,11 +105,13 @@ Custom sript that performs necessary formatting operations for the SARS-CoV2 Selection Analysis workflow. +If using NCBI as data source the file can be obtained from https://www.ncbi.nlm.nih.gov/projects/genome/sars-cov-2-seqs/ncov-sequences.yaml. +If not given the tool will download it automatically. + If using non-NCBI data, the metadata input file must be tabular with the following columns: ID, collection_date, country, state (optional), and locality (optional). Optional columns should still be created even if they are not used. Dates should be of the format: YYMMDD (example: May 1 2020 = 20200501). - ]]></help> <citations> <citation type="bibtex"> |
b |
diff -r e3a7995dce75 -r 2e993ff8e7dc test-data/dups-other.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dups-other.json Mon Nov 22 10:41:59 2021 +0000 |
b |
@@ -0,0 +1,68 @@ +{ + "MN988713": { + "0": "MN988713" + }, + "MT263436": { + "0": "MT263436" + }, + "MT263450": { + "0": "MT263450" + }, + "MT263469": { + "0": "MT263469" + }, + "MT292570": { + "0": "MT292570" + }, + "MT292573": { + "0": "MT292573" + }, + "MT293207": { + "0": "MT293207" + }, + "MT345866": { + "0": "MT345866" + }, + "MT350282": { + "0": "MT350282" + }, + "MultiSeq0_24": { + "1": "MT019530", + "10": "MT262910", + "11": "MT262912", + "12": "MT263399", + "13": "MT263435", + "14": "MT263451", + "15": "MT304482", + "16": "MT304483", + "17": "MT326151", + "18": "MT325578", + "19": "MT325609", + "2": "MT027062", + "20": "MT334533", + "21": "MT334534", + "22": "MT350276", + "3": "MT159705", + "4": "MT159710", + "5": "MT192773", + "6": "MT240479", + "7": "MT246472", + "8": "MT253699", + "9": "MT259275" + }, + "MultiSeq1_15": { + "1": "MT276327", + "10": "MT345834", + "11": "MT345835", + "12": "MT345857", + "13": "MT344957", + "2": "MT293173", + "3": "MT293195", + "4": "MT326113", + "5": "MT326162", + "6": "MT326189", + "7": "MT325626", + "8": "MT345825", + "9": "MT345827" + } +} \ No newline at end of file |
b |
diff -r e3a7995dce75 -r 2e993ff8e7dc test-data/meta-other.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/meta-other.json Mon Nov 22 10:41:59 2021 +0000 |
b |
b'@@ -0,0 +1,425 @@\n+{\n+ "LR757995": {\n+ "collected": "20191226",\n+ "location": {\n+ "country": "China",\n+ "locality": "None",\n+ "state": "Wuhan",\n+ "subregion": "Asia"\n+ }\n+ },\n+ "MN988713": {\n+ "collected": "20200121",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "Illinois",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT019530": {\n+ "collected": "20191230",\n+ "location": {\n+ "country": "China",\n+ "locality": "Wuhan",\n+ "state": "Hubei",\n+ "subregion": "Asia"\n+ }\n+ },\n+ "MT027062": {\n+ "collected": "20200129",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "CA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT159705": {\n+ "collected": "20200217",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "None",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT159710": {\n+ "collected": "20200217",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "None",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT192773": {\n+ "collected": "20200122",\n+ "location": {\n+ "country": "Vietnam",\n+ "locality": "None",\n+ "state": "Ho Chi Minh city",\n+ "subregion": "Asia"\n+ }\n+ },\n+ "MT240479": {\n+ "collected": "20200304",\n+ "location": {\n+ "country": "Pakistan",\n+ "locality": "None",\n+ "state": "Gilgit",\n+ "subregion": "Asia"\n+ }\n+ },\n+ "MT246472": {\n+ "collected": "20200312",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT253699": {\n+ "collected": "20200124",\n+ "location": {\n+ "country": "China",\n+ "locality": "Hangzhou",\n+ "state": "Zhejiang",\n+ "subregion": "Asia"\n+ }\n+ },\n+ "MT259275": {\n+ "collected": "20200314",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT262910": {\n+ "collected": "20200313",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT262912": {\n+ "collected": "20200313",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT263399": {\n+ "collected": "20200324",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT263435": {\n+ "collected": "20200324",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT263436": {\n+ "collected": "20200324",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT263450": {\n+ "collected": "20200324",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT263451": {\n+ "collected": "20200316",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT263467": {\n+ "collected": "20200316",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT263469": {\n+ "collected": "20200315",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT276327": {\n+ "collected": "20200229",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "GA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT292570": {\n+ "collected": "20200310",\n+ "location": {\n+ "country": "Spain",\n+ "locality": "None",\n+ "state": "None",\n+ "subregion": "Europe"\n+ }\n+ },\n+ "MT292573": {\n+ "collected": "20200309",\n+ "location": {\n+ "country": "Spain",\n+ "locality": "None",\n+ "state": "None",\n+ "subregion": "Europe"\n+ }\n+ },\n+ "MT293173": {\n+ "collected": "20200330",\n+ "location": {\n+ "country": "USA",'..b'": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT293195": {\n+ "collected": "20200328",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT293207": {\n+ "collected": "20200319",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT304482": {\n+ "collected": "20200301",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "IL",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT304483": {\n+ "collected": "20200301",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "IL",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT325578": {\n+ "collected": "20200307",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "IL",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT325609": {\n+ "collected": "20200305",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "UT",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT325626": {\n+ "collected": "20200305",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "SC",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT326113": {\n+ "collected": "20200320",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "None",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT326151": {\n+ "collected": "20200321",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "None",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT326162": {\n+ "collected": "20200321",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "None",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT326189": {\n+ "collected": "20200318",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "None",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT334533": {\n+ "collected": "20200313",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "UT",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT334534": {\n+ "collected": "20200313",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "UT",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT344957": {\n+ "collected": "20200307",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "PA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT345825": {\n+ "collected": "20200325",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT345827": {\n+ "collected": "20200325",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT345834": {\n+ "collected": "20200326",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT345835": {\n+ "collected": "20200325",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT345857": {\n+ "collected": "20200324",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "ID",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT345866": {\n+ "collected": "20200323",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "ID",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT350276": {\n+ "collected": "20200319",\n+ "location": {\n+ "country": "USA",\n+ "locality": "None",\n+ "state": "WA",\n+ "subregion": "North America"\n+ }\n+ },\n+ "MT350282": {\n+ "collected": "20200318",\n+ "location": {\n+ "country": "Brazil",\n+ "locality": "None",\n+ "state": "None",\n+ "subregion": "South America"\n+ }\n+ },\n+ "NC_045512": {\n+ "collected": "20200117",\n+ "location": {\n+ "country": "China",\n+ "locality": "None",\n+ "state": "None",\n+ "subregion": "Asia"\n+ }\n+ }\n+}\n\\ No newline at end of file\n' |
b |
diff -r e3a7995dce75 -r 2e993ff8e7dc test-data/msa-other.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/msa-other.fasta Mon Nov 22 10:41:59 2021 +0000 |
b |
b'@@ -0,0 +1,715 @@\n+>MN988713\n+ATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACC\n+AGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGAC\n+AAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCC\n+AATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGAT\n+AACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATA\n+ATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTT\n+AATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTT\n+TTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTAT\n+TCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAA\n+GGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTAT\n+TTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTT\n+TCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACT\n+TTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCT\n+GGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAAT\n+GAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAG\n+TGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTC\n+CAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAA\n+GTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAAC\n+TGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTAT\n+GGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTT\n+GTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGAT\n+TATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAAT\n+CTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAAT\n+CTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGT\n+AATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACT\n+AATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCA\n+CCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAAT\n+TTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTG\n+CCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAG\n+ACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCA\n+GGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTC\n+CCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCT\n+AATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATAT\n+GAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCT\n+CCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGT\n+GCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATT\n+AGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATG\n+TACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGT\n+ACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAA\n+GTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTT\n+AATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGAT\n+CTACTTTTCAAYAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGC\n+CTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTT\n+TTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGT\n+ACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATG\n+CAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAA\n+AAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCC\n+ACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAAC\n+ACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATC\n+CTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGA\n+CTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCT\n+TCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTT\n+GATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTA\n+GTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCC\n+ATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACA\n+CACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACA\n+TTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCT\n+TTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACA\n+TCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAA\n+AAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTC\n+CAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTT\n+ATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGC\n+TGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGAC\n+TCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n+>MultiSeq0_24\n+ATGTTTG'..b'+TCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n+>MT350282\n+ATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACC\n+AGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGAC\n+AAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCC\n+AATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAAAGGTACTAAGAGGTTTGAT\n+AACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATA\n+ATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTT\n+AATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTT\n+TTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTAT\n+TCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAA\n+GGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTAT\n+TTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTT\n+TCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACT\n+TTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCT\n+GGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAAT\n+GAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAG\n+TGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTC\n+CAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAA\n+GTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAAC\n+TGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTAT\n+GGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTT\n+GTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGAT\n+TATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAAT\n+CTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAAT\n+CTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGT\n+AATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACT\n+AATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCA\n+CCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAAT\n+TTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTG\n+CCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAG\n+ACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCA\n+GGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTC\n+CCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCT\n+AATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATAT\n+GAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCT\n+CCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGT\n+GCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATT\n+AGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATG\n+TACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGT\n+ACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAA\n+GTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTT\n+AATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGAT\n+CTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGC\n+CTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTT\n+TTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGT\n+ACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATG\n+CAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAA\n+AAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCC\n+ACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAAC\n+ACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATC\n+CTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGA\n+CTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCT\n+TCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTT\n+GATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTA\n+GTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCC\n+ATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACA\n+CACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACA\n+TTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCT\n+TTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACA\n+TCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAA\n+AAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTC\n+CAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTT\n+ATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGC\n+TGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGAC\n+TCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n' |
b |
diff -r e3a7995dce75 -r 2e993ff8e7dc test-data/test.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.json Mon Nov 22 10:41:59 2021 +0000 |
[ |
b'@@ -0,0 +1,307 @@\n+updated: "Tue Nov 16 07:52:53 EST 2021"\n+\n+genbank-sequences: [\n+ {\n+ "accession": "LR757995",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/LR757995\\">LR757995</a>",\n+ "collection-date": "2019-12-26",\n+ "country": "China:Wuhan"\n+ },\n+ {\n+ "accession": "MN988713",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MN988713\\">MN988713</a>",\n+ "collection-date": "2020-01-21",\n+ "country": "USA: Illinois"\n+ },\n+ {\n+ "accession": "MT019530",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT019530\\">MT019530</a>",\n+ "collection-date": "2019-12-30",\n+ "country": "China: Hubei, Wuhan"\n+ },\n+ {\n+ "accession": "MT027062",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT027062\\">MT027062</a>",\n+ "collection-date": "2020-01-29",\n+ "country": "USA: CA"\n+ },\n+ {\n+ "accession": "MT159705",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT159705\\">MT159705</a>",\n+ "collection-date": "2020-02-17",\n+ "country": "USA"\n+ },\n+ {\n+ "accession": "MT159710",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT159710\\">MT159710</a>",\n+ "collection-date": "2020-02-17",\n+ "country": "USA"\n+ },\n+ {\n+ "accession": "MT192773",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT192773\\">MT192773</a>",\n+ "collection-date": "2020-01-22",\n+ "country": "Viet Nam: Ho Chi Minh city"\n+ },\n+ {\n+ "accession": "MT240479",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT240479\\">MT240479</a>",\n+ "collection-date": "2020-03-04",\n+ "country": "Pakistan: Gilgit"\n+ },\n+ {\n+ "accession": "MT246472",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT246472\\">MT246472</a>",\n+ "collection-date": "2020-03-12",\n+ "country": "USA: WA"\n+ },\n+ {\n+ "accession": "MT253699",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT253699\\">MT253699</a>",\n+ "collection-date": "2020-01-24",\n+ "country": "China: Zhejiang, Hangzhou"\n+ },\n+ {\n+ "accession": "MT259275",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT259275\\">MT259275</a>",\n+ "collection-date": "2020-03-14",\n+ "country": "USA: WA"\n+ },\n+ {\n+ "accession": "MT262910",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT262910\\">MT262910</a>",\n+ "collection-date": "2020-03-13",\n+ "country": "USA: WA"\n+ },\n+ {\n+ "accession": "MT262912",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT262912\\">MT262912</a>",\n+ "collection-date": "2020-03-13",\n+ "country": "USA: WA"\n+ },\n+ {\n+ "accession": "MT263399",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT263399\\">MT263399</a>",\n+ "collection-date": "2020-03-24",\n+ "country": "USA: WA"\n+ },\n+ {\n+ "accession": "MT263435",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT263435\\">MT263435</a>",\n+ "collection-date": "2020-03-24",\n+ "country": "USA: WA"\n+ },\n+ {\n+ "accession": "MT263436",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT263436\\">MT263436</a>",\n+ "collection-date": "2020-03-24",\n+ "country": "USA: WA"\n+ },\n+ {\n+ "accession": "MT263450",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT263450\\">MT263450</a>",\n+ "collection-date": "2020-03-24",\n+ "country": "USA: WA"\n+ },\n+ {\n+ "accession": "MT263451",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT263451\\">MT263451</a>",\n+ "collection-date": "2020-03-16",\n+ "country": "USA: WA"\n+ },\n+ {\n+ "accession": "MT263467",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT263467\\">MT263467</a>",\n+ "collection-date": "2020-03-16",\n+ "country": "USA: WA"\n+ },\n+ {\n+ "accessi'..b'accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT325609\\">MT325609</a>",\n+ "collection-date": "2020-03-05",\n+ "country": "USA: UT"\n+ },\n+ {\n+ "accession": "MT325626",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT325626\\">MT325626</a>",\n+ "collection-date": "2020-03-05",\n+ "country": "USA: SC"\n+ },\n+ {\n+ "accession": "MT326113",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT326113\\">MT326113</a>",\n+ "collection-date": "2020-03-20",\n+ "country": "USA"\n+ },\n+ {\n+ "accession": "MT326151",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT326151\\">MT326151</a>",\n+ "collection-date": "2020-03-21",\n+ "country": "USA"\n+ },\n+ {\n+ "accession": "MT326162",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT326162\\">MT326162</a>",\n+ "collection-date": "2020-03-21",\n+ "country": "USA"\n+ },\n+ {\n+ "accession": "MT326189",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT326189\\">MT326189</a>",\n+ "collection-date": "2020-03-18",\n+ "country": "USA"\n+ },\n+ {\n+ "accession": "MT334533",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT334533\\">MT334533</a>",\n+ "collection-date": "2020-03-13",\n+ "country": "USA: UT"\n+ },\n+ {\n+ "accession": "MT334534",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT334534\\">MT334534</a>",\n+ "collection-date": "2020-03-13",\n+ "country": "USA: UT"\n+ },\n+ {\n+ "accession": "MT344946",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT344946\\">MT344946</a>",\n+ "collection-date": "2020-03",\n+ "country": "USA: GA"\n+ },\n+ {\n+ "accession": "MT344957",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT344957\\">MT344957</a>",\n+ "collection-date": "2020-03-07",\n+ "country": "USA: PA"\n+ },\n+ {\n+ "accession": "MT345825",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT345825\\">MT345825</a>",\n+ "collection-date": "2020-03-25",\n+ "country": "USA: WA"\n+ },\n+ {\n+ "accession": "MT345827",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT345827\\">MT345827</a>",\n+ "collection-date": "2020-03-25",\n+ "country": "USA: WA"\n+ },\n+ {\n+ "accession": "MT345834",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT345834\\">MT345834</a>",\n+ "collection-date": "2020-03-26",\n+ "country": "USA: WA"\n+ },\n+ {\n+ "accession": "MT345835",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT345835\\">MT345835</a>",\n+ "collection-date": "2020-03-25",\n+ "country": "USA: WA"\n+ },\n+ {\n+ "accession": "MT345857",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT345857\\">MT345857</a>",\n+ "collection-date": "2020-03-24",\n+ "country": "USA: ID"\n+ },\n+ {\n+ "accession": "MT345866",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT345866\\">MT345866</a>",\n+ "collection-date": "2020-03-23",\n+ "country": "USA: ID"\n+ },\n+ {\n+ "accession": "MT350252",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT350252\\">MT350252</a>",\n+ "collection-date": "2020-03",\n+ "country": "USA: VA"\n+ },\n+ {\n+ "accession": "MT350255",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT350255\\">MT350255</a>",\n+ "collection-date": "2020-04",\n+ "country": "USA: VA"\n+ },\n+ {\n+ "accession": "MT350276",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT350276\\">MT350276</a>",\n+ "collection-date": "2020-03-19",\n+ "country": "USA: WA"\n+ },\n+ {\n+ "accession": "MT350282",\n+ "accession-link": "<a href=\\"https://www.ncbi.nlm.nih.gov/nuccore/MT350282\\">MT350282</a>",\n+ "collection-date": "2020-03-18",\n+ "country": "Brazil"\n+ }\n+]\n+\n+sra-accessions: [\n+]\n' |
b |
diff -r e3a7995dce75 -r 2e993ff8e7dc test-data/test.tsv --- a/test-data/test.tsv Fri Nov 20 18:21:33 2020 +0000 +++ b/test-data/test.tsv Mon Nov 22 10:41:59 2021 +0000 |
b |
@@ -1,5 +1,5 @@ ID collection_date country state locality -LR757995 20191226 China uhan None +LR757995 20191226 China Wuhan None MT027062 20200129 USA CA None MT325626 20200305 USA SC None MT325609 20200305 USA UT None |