Repository 'pangolin'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/pangolin

Changeset 1:f557122d379e (2021-04-25)
Previous changeset 0:0ec813ad2910 (2021-04-12) Next changeset 2:b6abccb1f25b (2021-04-25)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pangolin commit 1522bdb834ffab157d1ca25e6e74db21f62e3aae"
modified:
pangolin.xml
test-data/result1.tsv
added:
fetch_latest_pangolearn.py
test-data/2021-04-23/__init__.py
test-data/2021-04-23/data/decisionTreeHeaders_v1.joblib
test-data/2021-04-23/data/decisionTree_v1.joblib
test-data/2021-04-23/data/decision_tree_rules.txt
test-data/2021-04-23/data/lineage_recall_report.csv
test-data/2021-04-23/data/lineages.downsample.csv
test-data/2021-04-23/data/lineages.metadata.csv
test-data/2021-04-23/supporting_information/data_prep_description.md
test-data/pangolearn.loc
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 0ec813ad2910 -r f557122d379e fetch_latest_pangolearn.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fetch_latest_pangolearn.py Sun Apr 25 20:17:07 2021 +0000
[
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+import json
+import os
+import tarfile
+
+# rely on the fact that pangolin itself uses the requests module
+import requests
+
+response = requests.get(
+    "https://api.github.com/repos/cov-lineages/pangoLEARN/releases/latest"
+)
+if response.status_code == 200:
+    details = json.loads(response.text)
+    response = requests.get(details["tarball_url"])
+    if response.status_code == 200:
+        with open("pangolearn.tgz", "wb") as handle:
+            handle.write(response.content)
+        tf = tarfile.open("pangolearn.tgz")
+        pl_path = tf.next().name
+        tf.extractall()
+        tf.close()
+        os.rename(os.path.join(pl_path, "pangoLEARN"), "datadir")
+    else:
+        response.raise_for_status()
+else:
+    response.raise_for_status()
b
diff -r 0ec813ad2910 -r f557122d379e pangolin.xml
--- a/pangolin.xml Mon Apr 12 20:31:42 2021 +0000
+++ b/pangolin.xml Sun Apr 25 20:17:07 2021 +0000
[
@@ -8,8 +8,16 @@
         <requirement type="package" version="0.22.0">csvtk</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
+        #if str($db.source) == "download"
+            python '$__tool_directory__/fetch_latest_pangolearn.py' &&
+        #else if str($db.source) == "builtin"
+            ln -s $db.db_release.fields.path datadir &&
+        #end if
         pangolin
         --threads \${GALAXY_SLOTS:-1}
+        #if str($db.source) == "download" or str($db.source) == "builtin"
+            --datadir 'datadir'
+        #end if
         $alignment
         --outfile report.csv 
         --max-ambig $max_ambig
@@ -28,6 +36,27 @@
           value="0.5" min="0" max="1" help="Maximum proportion of Ns allowed for pangolin to attempt assignment" />
       <param argument="--min-length" type="integer" label="Minimum query length allowed" 
           value="10000" min="0" help="Minimum query length allowed for pangolin to attempt assignment"/>
+      <conditional name="db">
+            <param type="select" name="source" label="pangoLEARN source" help="Where to find the pangoLEARN database">
+                <option value="download">Download latest from web</option>
+                <option value="builtin">Use database from Galaxy server</option>
+                <option value="default">Use default database built in to pangolin (not recommended)</option>
+            </param>
+            <when value="download">
+            </when>
+            <when value="builtin">
+                <param name="db_release" label="pangoLEARN release" type="select">
+                    <options from_data_table="pangolearn">
+                        <column name="value" index="0" />
+                        <column name="name" index="1" />
+                        <column name="path" index="3" />                         
+                        <filter type="sort_by" column="0"/>
+                    </options>
+                </param>
+            </when>
+            <when value="default">
+            </when>
+      </conditional>
     </inputs>
     <outputs>
         <data name="output1" format="tabular" label="pangolin on ${on_string}">
@@ -42,14 +71,41 @@
     <tests>
       <test expect_num_outputs="1">
           <param name="input1" value="test1.fasta"/>
-          <output name="output1" file="result1.tsv" ftype="tabular" />
+          <conditional name="db">
+              <param name="source" value="download" />
+          </conditional>
+          <output name="output1">
+            <assert_contents>
+                <has_text text="B.1.1" />
+                <has_text text="passed_qc" />
+            </assert_contents>
+          </output>
       </test>
       <test expect_num_outputs="2">
           <param name="alignment" value="--alignment" />
           <param name="input1" value="test1.fasta" />
-          <output name="output1" file="result1.tsv" ftype="tabular" />
+          <conditional name="db">
+            <param name="source" value="download" />
+          </conditional>
+          <output name="output1">
+            <assert_contents>
+                <has_text text="B.1.1" />
+                <has_text text="passed_qc" />
+            </assert_contents>
+          </output>
           <output name="align1" file="aln1.fasta" ftype="fasta" />
       </test>
+      <test expect_num_outputs="1">
+        <param name="input1" value="test1.fasta"/>
+        <conditional name="db">
+            <param name="source" value="builtin" />
+        </conditional>
+        <output name="output1">
+            <assert_contents>
+                <has_text text="2021-04-21" />
+            </assert_contents>
+        </output>
+    </test>
     </tests>
     <help><![CDATA[
 
@@ -58,6 +114,13 @@
 `Pangolin <https://cov-lineages.org/pangolin.html>`_ (Phylogenetic Assignment of Named Global Outbreak LINeages) 
 is used to assign a SARS-CoV-2 genome sequence the most likely lineage based on the PANGO nomenclature system.
 
+Pangolin uses the `pangoLEARN <https://github.com/cov-lineages/pangoLEARN>`_ stored model for lineage assignment. This
+model is updated more frequently than the pangolin tool is. In general one should use the most recent model for lineage
+assignment, and the default option for this tool is to download the latest version of the model before the pangolin
+tool runs. A pangoLEARN data manager exists so that the Galaxy admin can download specific versions of the pangoLEARN
+model as required. Finally the pangolin tool can use its default built-in model, but this is **not recommended** as the
+default model rapidly becomes out of date.
+
     ]]></help>
     <citations>
       <citation type="bibtex">
b
diff -r 0ec813ad2910 -r f557122d379e test-data/2021-04-23/__init__.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2021-04-23/__init__.py Sun Apr 25 20:17:07 2021 +0000
b
@@ -0,0 +1,2 @@
+_program = "pangoLEARN"
+__version__ = "2021-04-21"
b
diff -r 0ec813ad2910 -r f557122d379e test-data/2021-04-23/data/decisionTreeHeaders_v1.joblib
b
Binary file test-data/2021-04-23/data/decisionTreeHeaders_v1.joblib has changed
b
diff -r 0ec813ad2910 -r f557122d379e test-data/2021-04-23/data/decisionTree_v1.joblib
b
Binary file test-data/2021-04-23/data/decisionTree_v1.joblib has changed
b
diff -r 0ec813ad2910 -r f557122d379e test-data/2021-04-23/data/decision_tree_rules.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2021-04-23/data/decision_tree_rules.txt Sun Apr 25 20:17:07 2021 +0000
[
b"@@ -0,0 +1,13285 @@\n+['lineage', '285_-', '285_A', '285_C', '285_G', '285_T', '286_-', '286_A', '286_C', '286_G', '286_T', '287_-', '287_A', '287_C', '287_G', '287_T', '288_-', '288_A', '288_C', '288_G', '288_T', '289_-', '289_A', '289_C', '289_G', '289_T', '290_-', '290_A', '290_C', '290_G', '290_T', '291_-', '291_A', '291_C', '291_G', '291_T', '292_-', '292_A', '292_C', '292_G', '292_T', '293_-', '293_A', '293_C', '293_G', '293_T', '294_-', '294_A', '294_C', '294_G', '294_T', '295_-', '295_A', '295_C', '295_G', '295_T', '296_-', '296_A', '296_C', '296_G', '296_T', '297_-', '297_A', '297_C', '297_G', '297_T', '298_-', '298_A', '298_C', '298_G', '298_T', '299_-', '299_A', '299_C', '299_G', '299_T', '300_-', '300_A', '300_C', '300_G', '300_T', '301_-', '301_A', '301_C', '301_G', '301_T', '302_-', '302_A', '302_C', '302_G', '302_T', '303_-', '303_A', '303_C', '303_G', '303_T', '304_-', '304_A', '304_C', '304_G', '304_T', '305_-', '305_A', '305_C', '305_G', '305_T', '306_-', '306_A', '306_C', '306_G', '306_T', '307_-', '307_A', '307_C', '307_G', '307_T', '308_-', '308_A', '308_C', '308_G', '308_T', '309_-', '309_A', '309_C', '309_G', '309_T', '310_-', '310_A', '310_C', '310_G', '310_T', '311_-', '311_A', '311_C', '311_G', '311_T', '312_-', '312_A', '312_C', '312_G', '312_T', '313_-', '313_A', '313_C', '313_G', '313_T', '314_-', '314_A', '314_C', '314_G', '314_T', '315_-', '315_A', '315_C', '315_G', '315_T', '316_-', '316_A', '316_C', '316_G', '316_T', '317_-', '317_A', '317_C', '317_G', '317_T', '318_-', '318_A', '318_C', '318_G', '318_T', '319_-', '319_A', '319_C', '319_G', '319_T', '320_-', '320_A', '320_C', '320_G', '320_T', '321_-', '321_A', '321_C', '321_G', '321_T', '322_-', '322_A', '322_C', '322_G', '322_T', '323_-', '323_A', '323_C', '323_G', '323_T', '324_-', '324_A', '324_C', '324_G', '324_T', '325_-', '325_A', '325_C', '325_G', '325_T', '326_-', '326_A', '326_C', '326_G', '326_T', '327_-', '327_A', '327_C', '327_G', '327_T', '328_-', '328_A', '328_C', '328_G', '328_T', '329_-', '329_A', '329_C', '329_G', '329_T', '330_-', '330_A', '330_C', '330_G', '330_T', '331_-', '331_A', '331_C', '331_G', '331_T', '332_-', '332_A', '332_C', '332_G', '332_T', '333_-', '333_A', '333_C', '333_G', '333_T', '334_-', '334_A', '334_C', '334_G', '334_T', '335_-', '335_A', '335_C', '335_G', '335_T', '336_-', '336_A', '336_C', '336_G', '336_T', '337_-', '337_A', '337_C', '337_G', '337_T', '338_-', '338_A', '338_C', '338_G', '338_T', '339_-', '339_A', '339_C', '339_G', '339_T', '340_-', '340_A', '340_C', '340_G', '340_T', '341_-', '341_A', '341_C', '341_G', '341_T', '342_-', '342_A', '342_C', '342_G', '342_T', '343_-', '343_A', '343_C', '343_G', '343_T', '344_-', '344_A', '344_C', '344_G', '344_T', '345_-', '345_A', '345_C', '345_G', '345_T', '346_-', '346_A', '346_C', '346_G', '346_T', '347_-', '347_A', '347_C', '347_G', '347_T', '348_-', '348_A', '348_C', '348_G', '348_T', '349_-', '349_A', '349_C', '349_G', '349_T', '350_-', '350_A', '350_C', '350_G', '350_T', '351_-', '351_A', '351_C', '351_G', '351_T', '352_-', '352_A', '352_C', '352_G', '352_T', '353_-', '353_A', '353_C', '353_G', '353_T', '354_-', '354_A', '354_C', '354_G', '354_T', '355_-', '355_A', '355_C', '355_G', '355_T', '357_-', '357_A', '357_C', '357_G', '357_T', '358_-', '358_A', '358_C', '358_G', '358_T', '359_-', '359_A', '359_C', '359_G', '359_T', '360_-', '360_A', '360_C', '360_G', '360_T', '361_-', '361_A', '361_C', '361_G', '361_T', '362_-', '362_A', '362_C', '362_G', '362_T', '363_-', '363_A', '363_C', '363_G', '363_T', '364_-', '364_A', '364_C', '364_G', '364_T', '365_-', '365_A', '365_C', '365_G', '365_T', '366_-', '366_A', '366_C', '366_G', '366_T', '367_-', '367_A', '367_C', '367_G', '367_T', '368_-', '368_A', '368_C', '368_G', '368_T', '369_-', '369_A', '369_C', '369_G', '369_T', '370_-', '370_A', '370_C', '370_G', '370_T', '371_-', '371_A', '371_C', '371_G', '371_T', '372_-', '372_A', '372_C', '372_G', '372_T', '373_-', '373_A', '373_C', '373_G', '373_T', '374_-"..b"43!='G',27943=='A',21254!='A',20177=='A',19705=='-'\n+B.1.177.57\t18423=='-',26800=='C',16241!='-',25613!='G',15479!='G',21613=='A',6806=='G',23643!='G',27943=='A',21254=='A'\n+B.1.177.54\t18423=='-',26800=='C',16241!='-',25613!='G',15479!='G',21613=='A',6806=='G',23643=='G'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488!='C'\n+B.1.177.16\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105!='A'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826!='A'\n+B.1.177.15\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416!='C'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780!='A'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931!='G',28143!='A',21254!='A'\n+B.1.177.7\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931!='G',28143!='A',21254=='A'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931!='G',28143=='A'\n+B.1\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931=='G',16072!='A',29644!='G'\n+B.1.177.7\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931=='G',16072!='A',29644=='G'\n+B.1.177.7\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931=='G',16072=='A'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532=='C'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492=='T'\n+B.1.177.18\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310!='C'\n+B.1.177.4\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869!='C'\n+B.1.177.12\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813!='G'\n+B.1.177.57\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804!='G',7043!='G',23624!='A'\n+B.1.177.69\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804!='G',7043!='G',23624=='A',28627!='C'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804!='G',7043!='G',23624=='A',28627=='C',6285!='A'\n+B.1.177.4\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804!='G',7043!='G',23624=='A',28627=='C',6285=='A'\n+B.1.177.58\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804!='G',7043=='G'\n+W.4\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804=='G'\n+B.1.177.81\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118=='G'\n+B.1.177.29\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026=='G'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613=='G',1986=='C',20959!='C',28294!='-',8077!='G'\n+B.1.177.4\t18423=='-',26800=='C',16241!='-',25613=='G',1986=='C',20959!='C',28294!='-',8077=='G'\n+B.1.177.4\t18423=='-',26800=='C',16241!='-',25613=='G',1986=='C',20959!='C',28294=='-'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613=='G',1986=='C',20959=='C'\n+B.1.177.21\t18423=='-',26800=='C',16241=='-',22050!='G',26166!='T'\n+B.1.177\t18423=='-',26800=='C',16241=='-',22050!='G',26166=='T'\n+AA.1\t18423=='-',26800=='C',16241=='-',22050=='G'\n"
b
diff -r 0ec813ad2910 -r f557122d379e test-data/2021-04-23/data/lineage_recall_report.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2021-04-23/data/lineage_recall_report.csv Sun Apr 25 20:17:07 2021 +0000
b
b'@@ -0,0 +1,781 @@\n+lineage,precision,recall,f1_score,support\n+B.1.1.4,0.82,0.9941818181818182,0.8944545454545455,468\n+A,0.8312000000000002,0.9623000000000002,0.8904,665\n+A.1,0.9960000000000001,0.9884000000000001,0.9921999999999999,2508\n+A.10,1.0,0.95,0.9667,14\n+A.11,1.0,1.0,1.0,10\n+A.12,0.8333333333333334,0.8333333333333334,0.8333333333333334,5\n+A.13,1.0,1.0,1.0,21\n+A.14,0.6,0.6,0.6,5\n+A.15,0.9856999999999999,1.0,0.9923,61\n+A.16,0.9833000000000001,1.0,0.9908999999999999,48\n+A.2,0.9953999999999998,0.9907,0.9928999999999999,1058\n+A.2.1,0.2,0.0833,0.1167,21\n+A.2.2,0.9977,0.9977,0.9977,427\n+A.2.3,0.9949999999999999,0.9946999999999999,0.9947000000000001,189\n+A.2.4,0.8116999999999999,1.0,0.8951,84\n+A.3,0.9951000000000001,0.9961,0.9957,793\n+A.4,1.0,1.0,1.0,54\n+A.5,0.9929,0.9879000000000001,0.9902999999999998,411\n+A.6,1.0,1.0,1.0,115\n+A.7,1.0,0.95,0.9667,17\n+A.9,0.3,0.2,0.2334,14\n+B,0.931,0.9460000000000001,0.9382999999999999,3723\n+B.1,0.9252,0.8999,0.9123000000000001,21297\n+B.1.1,0.9598999999999999,0.9279999999999999,0.9431999999999998,12773\n+B.1.1.1,0.9918000000000001,0.9892,0.9905000000000002,5708\n+B.1.1.10,0.9309000000000001,0.9390999999999998,0.9319,493\n+B.1.1.100,1.0,1.0,1.0,17\n+B.1.1.101,0.77,0.85,0.7833,16\n+B.1.1.102,0.6388333333333334,0.8333333333333334,0.6945,6\n+B.1.1.103,0.7,0.45,0.5334999999999999,17\n+B.1.1.104,0.9550000000000001,0.6749999999999999,0.7657,49\n+B.1.1.105,0.47614285714285715,0.5714285714285714,0.5,5\n+B.1.1.106,0.6,0.55,0.5667,15\n+B.1.1.107,0.9875,0.9464,0.9622999999999999,77\n+B.1.1.109,1.0,1.0,1.0,23\n+B.1.1.110,0.8571428571428571,0.8571428571428571,0.8571428571428571,6\n+B.1.1.111,0.9667,1.0,0.9800000000000001,26\n+B.1.1.112,1.0,1.0,1.0,15\n+B.1.1.113,0.8,0.8,0.8,24\n+B.1.1.114,0.8,0.8,0.8,5\n+B.1.1.115,1.0,1.0,1.0,42\n+B.1.1.116,0.85,0.9,0.8667,11\n+B.1.1.117,0.9,0.65,0.7334999999999999,20\n+B.1.1.118,1.0,0.8432999999999999,0.9036,52\n+B.1.1.12,0.9189999999999999,0.9666,0.9343999999999999,64\n+B.1.1.120,1.0,1.0,1.0,29\n+B.1.1.121,0.6833333333333332,0.8333333333333334,0.6970000000000001,6\n+B.1.1.122,0.95,1.0,0.9667,11\n+B.1.1.123,0.8257,0.9400000000000001,0.8713000000000001,52\n+B.1.1.125,1.0,1.0,1.0,91\n+B.1.1.126,1.0,1.0,1.0,9\n+B.1.1.127,0.8847000000000002,0.8666,0.8529,59\n+B.1.1.128,1.0,0.9334,0.96,26\n+B.1.1.129,0.12,0.15,0.1,15\n+B.1.1.13,1.0,1.0,1.0,15\n+B.1.1.130,0.7765000000000001,0.9826,0.8549999999999999,113\n+B.1.1.131,0.8667,0.9,0.8800000000000001,18\n+B.1.1.132,0.9833000000000001,1.0,0.9908999999999999,53\n+B.1.1.133,0.9,0.8,0.8300000000000001,35\n+B.1.1.134,0.975,0.9667,0.9666,68\n+B.1.1.135,0.95,0.9445,0.9439999999999997,93\n+B.1.1.136,1.0,1.0,1.0,36\n+B.1.1.137,0.9606999999999999,0.9856999999999999,0.9712,69\n+B.1.1.138,1.0,1.0,1.0,129\n+B.1.1.139,1.0,1.0,1.0,9\n+B.1.1.14,1.0,1.0,1.0,58\n+B.1.1.140,0.775,0.9,0.8157,21\n+B.1.1.141,1.0,0.9083,0.9154,123\n+B.1.1.142,1.0,1.0,1.0,21\n+B.1.1.143,0.4,0.1666,0.2334,26\n+B.1.1.144,0.9,0.9,0.9,14\n+B.1.1.145,0.9334,0.95,0.9267,20\n+B.1.1.147,0.9,0.9,0.9,12\n+B.1.1.148,0.875,0.9856999999999999,0.9189999999999999,67\n+B.1.1.149,1.0,1.0,1.0,59\n+B.1.1.15,0.9464,0.9833000000000001,0.9611999999999998,63\n+B.1.1.151,0.8183999999999999,0.9933,0.882,145\n+B.1.1.152,0.9,0.8,0.8333999999999999,12\n+B.1.1.153,1.0,0.975,0.9856999999999999,46\n+B.1.1.154,0.8400000000000001,0.9167,0.8584999999999999,25\n+B.1.1.155,0.8000999999999999,0.85,0.8067,19\n+B.1.1.157,1.0,0.9167,0.9467000000000001,24\n+B.1.1.158,0.9667,0.925,0.9427999999999999,44\n+B.1.1.159,0.8950000000000001,0.95,0.9163,40\n+B.1.1.16,0.9667,1.0,0.9800000000000001,47\n+B.1.1.160,0.95,1.0,0.9667,10\n+B.1.1.161,0.7030000000000001,0.82,0.7424999999999999,43\n+B.1.1.162,0.8667,0.85,0.8467,20\n+B.1.1.163,0.9370999999999998,0.9047000000000001,0.9147000000000001,63\n+B.1.1.164,0.8499000000000001,0.9777999999999999,0.9013,86\n+B.1.1.165,1.0,0.96,0.975,55\n+B.1.1.166,0.8667,0.6167,0.6933999999999999,25\n+B.1.1.167,0.8667,0.8167,0.8267,21\n+B.1.1.168,1.0,0.9167,0.9467000000000001,22\n+B.1.1.169,0.85,0.9,0.8667,15\n+B.1.1.17,1.0,0.9667,0.9800000000000001,29\n+B.1.1.170,1.0,1'..b'7,0.45,0.5335,21\n+B.1.9,0.9922000000000001,0.9916,0.9917999999999999,242\n+B.1.90,1.0,0.8834,0.9314,34\n+B.1.91,0.9821000000000002,1.0,0.9909000000000001,379\n+B.1.93,0.9969000000000001,0.9968,0.9968999999999999,946\n+B.1.94,0.9167,1.0,0.9467000000000001,18\n+B.1.95,0.8,0.75,0.7501,20\n+B.1.96,1.0,1.0,1.0,45\n+B.1.97,1.0,1.0,1.0,38\n+B.1.98,0.9441,0.9868,0.9645999999999999,606\n+B.10,0.9751,0.9917,0.9827999999999999,112\n+B.11,0.9644,0.9868,0.9743999999999999,305\n+B.12,0.9856999999999999,1.0,0.9923,63\n+B.13,1.0,1.0,1.0,21\n+B.15,1.0,0.975,0.9856999999999999,41\n+B.18,0.9667,1.0,0.9800000000000001,22\n+B.19,1.0,0.9667,0.9800000000000001,26\n+B.20,0.8667,0.7333999999999999,0.7800999999999999,22\n+B.23,0.8412,0.9833999999999999,0.9006999999999998,120\n+B.26,0.6344,0.65,0.6039,40\n+B.27,1.0,1.0,1.0,58\n+B.28,0.9893000000000001,0.9963000000000001,0.9926,263\n+B.29,0.9627000000000001,0.991,0.9759,216\n+B.3,0.9927999999999999,0.9879999999999999,0.9903999999999998,828\n+B.3.1,0.9829999999999999,0.9940000000000001,0.9884000000000001,508\n+B.30,1.0,0.9800000000000001,0.9888999999999999,48\n+B.31,0.9867000000000001,0.9725999999999999,0.9792,216\n+B.32,0.9333,0.9,0.9067000000000001,24\n+B.33,0.9800000000000001,0.975,0.9745999999999999,47\n+B.34,0.9856999999999999,1.0,0.9923,65\n+B.35,0.9789,0.9778,0.977,87\n+B.36,1.0,1.0,1.0,10\n+B.37,0.975,0.9667,0.9657,27\n+B.38,1.0,1.0,1.0,15\n+B.39,0.9975999999999999,0.9753999999999998,0.9861000000000001,404\n+B.4,0.9811,0.8449,0.9066000000000001,361\n+B.4.1,1.0,1.0,1.0,27\n+B.4.2,1.0,1.0,1.0,15\n+B.4.4,0.9667,0.8916999999999999,0.9124000000000001,38\n+B.4.5,0.6001000000000001,1.0,0.7475000000000002,76\n+B.40,0.9915,0.9804999999999999,0.9858,2435\n+B.41,0.975,0.9826,0.9783999999999999,117\n+B.42,0.8667,0.9,0.8800000000000001,11\n+B.43,0.975,1.0,0.9856999999999999,33\n+B.44,0.95,1.0,0.9667,10\n+B.45,0.9962,1.0,0.998,245\n+B.46,0.9800000000000001,0.9334,0.9489000000000001,32\n+B.47,0.95,1.0,0.9667,19\n+B.48,0.9823000000000001,0.8596,0.9136000000000001,148\n+B.49,1.0,1.0,1.0,19\n+B.5,0.8167,0.8,0.7800999999999999,13\n+B.51,1.0,1.0,1.0,22\n+B.52,1.0,1.0,1.0,74\n+B.53,0.9856999999999999,1.0,0.9923,58\n+B.54,0.7546999999999999,0.4459000000000001,0.5488,88\n+B.6,0.9714,0.9664999999999999,0.9684000000000001,712\n+B.6.1,1.0,0.7833,0.8501,22\n+B.6.2,1.0,1.0,1.0,14\n+B.6.3,0.9,0.7666999999999999,0.8099999999999999,24\n+B.6.4,0.95,0.85,0.8834,22\n+B.6.5,0.8888888888888888,0.8888888888888888,0.8888888888888888,9\n+B.6.6,0.9884999999999999,0.9978000000000001,0.9932000000000002,927\n+C.1,0.9704,0.9732,0.9712999999999999,186\n+C.11,1.0,1.0,1.0,10\n+C.12,1.0,1.0,1.0,122\n+C.13,0.8667999999999999,0.85,0.8200999999999998,20\n+C.14,0.8454,0.785,0.768,45\n+C.15,1.0,1.0,1.0,27\n+C.2,1.0,1.0,1.0,14\n+C.3,0.9929,1.0,0.9963,126\n+C.7,1.0,1.0,1.0,28\n+C.8,0.9875,0.9833000000000001,0.9842000000000001,63\n+C.9,0.835,0.6666000000000001,0.6974,27\n+D.2,1.0,0.9999,1.0,10570\n+D.3,1.0,1.0,1.0,140\n+E.1,1.0,1.0,1.0,41\n+F.1,1.0,1.0,1.0,10\n+G.1,0.9167,0.9,0.8967,19\n+H.1,0.9015000000000001,0.9808999999999999,0.9336999999999998,104\n+I.1,0.9042,1.0,0.9345000000000001,33\n+J.1,0.95,1.0,0.9667,15\n+K.1,1.0,0.9,0.9334,20\n+L.1,1.0,1.0,1.0,165\n+L.2,1.0,1.0,1.0,60\n+M.1,1.0,0.9667,0.9800000000000001,60\n+N.1,1.0,0.9667,0.9800000000000001,23\n+N.2,1.0,1.0,1.0,9\n+N.3,1.0,1.0,1.0,10\n+N.4,1.0,0.95,0.9667,21\n+B.1.1.119,0.3,0.4,0.33340000000000003,4\n+B.1.1.183,0.8333333333333334,0.8333333333333334,0.8333333333333334,6\n+B.1.1.81,0.75,0.75,0.75,4\n+B.1.1.91,0.6666666666666666,0.6666666666666666,0.6666666666666666,5\n+B.1.1.98,0.0,0.0,0.0,2\n+B.1.158,0.0,0.0,0.0,4\n+B.1.269,1.0,1.0,1.0,8\n+B.1.373,1.0,1.0,1.0,8\n+B.1.80,0.0,0.0,0.0,5\n+C.5,1.0,1.0,1.0,7\n+B.1.279,1.0,1.0,1.0,6\n+B.50,0.5,0.5,0.5,6\n+C.10,1.0,1.0,1.0,4\n+C.4,0.9375,1.0,0.958375,8\n+B.1.1.156,0.6666666666666666,0.6666666666666666,0.6666666666666666,6\n+B.1.1.182,1.0,1.0,1.0,4\n+B.1.1.252,1.0,1.0,1.0,5\n+C.6,0.375,0.5,0.41675,4\n+macro avg,0.9166363636363637,0.9150909090909092,0.9071818181818184,193089\n+weighted avg,0.9636363636363636,0.9598181818181818,0.9593636363636364,193089\n'
b
diff -r 0ec813ad2910 -r f557122d379e test-data/2021-04-23/data/lineages.downsample.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2021-04-23/data/lineages.downsample.csv Sun Apr 25 20:17:07 2021 +0000
b
b'@@ -0,0 +1,179715 @@\n+sequence_name,lineage\n+Australia/VIC390/2020,B.6.6\n+Australia/VIC473/2020,B.1.1\n+Australia/VIC475/2020,B.1.23\n+Australia/VIC476/2020,B.1.23\n+Australia/VIC479/2020,B.1.23\n+Australia/NSW14/2020,B.4\n+USA/WA13-UW9/2020,A.1\n+USA/WA-UW-1741/2020,B.1.371\n+England/NOTT-10E5E4/2020,B.23\n+USA/WA-UW-1739/2020,B.1\n+England/NOTT-10E5D5/2020,B.1.1.41\n+USA/WA-UW-1732/2020,B.1\n+USA/WA-UW-1733/2020,B.1.371\n+USA/WA-UW-1722/2020,B.1\n+England/NOTT-10E5A8/2020,B.1.1\n+Netherlands/Oisterwijk_1364072/2020,B\n+USA/WA-UW-1772/2020,B.1\n+England/NOTT-10E599/2020,A.2\n+Netherlands/Oss_1363500/2020,B.55\n+USA/WA-UW-1708/2020,A.1\n+England/NOTT-10E58A/2020,B\n+Netherlands/Rotterdam_1363790/2020,B\n+USA/WA-UW-1731/2020,A.1\n+England/NOTT-10E57B/2020,B\n+Netherlands/Rotterdam_1364040/2020,B.55\n+USA/WA-UW-1709/2020,A.1\n+England/NOTT-10E55D/2020,B.3\n+England/NOTT-10E54E/2020,B.23\n+Netherlands/Tilburg_1363354/2020,B.11\n+Netherlands/Utrecht_1363564/2020,B.1.383\n+Netherlands/Utrecht_1363628/2020,B.1.383\n+USA/WA-UW-1724/2020,B.1.371\n+England/NOTT-10E520/2020,B\n+USA/WA-UW-1706/2020,B.1.371\n+England/NOTT-10E511/2020,B.39\n+USA/WA-UW-1705/2020,A.1\n+USA/WA-UW-1707/2020,B.1\n+England/NOTT-10E4F6/2020,B.1\n+USA/WA-UW-1745/2020,B.30\n+England/NOTT-10E4C9/2020,B.52\n+Netherlands/Diemen_1363454/2020,B.1.1\n+England/NOTT-10E4AB/2020,B.23\n+Netherlands/Eindhoven_1363782/2020,B.1.1\n+USA/WA-UW-1729/2020,A.1\n+England/NOTT-10E49C/2020,B.35\n+Netherlands/Haarlem_1363688/2020,B.1\n+Netherlands/Houten_1363498/2020,B.1\n+Netherlands/Loon_op_zand_1363512/2020,B.11\n+Netherlands/Delft_1363424/2020,B.1.1\n+USA/WA-UW-1682/2020,A.1\n+Australia/VIC908/2020,B.1.1\n+USA/WA-UW-1775/2020,B.1.1\n+England/NOTT-10E757/2020,B.39\n+USA/WA-UW-1774/2020,B.1.371\n+USA/un-UW-1832/2020,B.30\n+England/NOTT-10E739/2020,B.39\n+USA/WA-UW-1796/2020,B.1\n+England/NOTT-10E72A/2020,B.35\n+USA/WA-UW-1792/2020,B.1\n+England/NOTT-10E70C/2020,B.1.1.369\n+USA/WA-UW-1785/2020,A.1\n+Australia/VIC912/2020,B.1.23\n+Australia/VIC913/2020,B.6.6\n+Czech_Republic/IAB_1/2020,B.1\n+Czech_Republic/IAB_4/2020,B.1\n+Czech_Republic/IAB_8/2020,B.1.1\n+Czech_Republic/IAB_9/2020,B.1.1\n+Czech_Republic/IAB_10/2020,B.1\n+Czech_Republic/IAB_12/2020,B.1\n+England/NOTT-10E6A5/2020,B.1.391\n+England/NOTT-10E696/2020,B.1\n+England/NOTT-10E678/2020,B.61\n+USA/WA-UW-1770/2020,B.1\n+England/NOTT-10E669/2020,B.1.1.369\n+England/NOTT-10E65A/2020,B.1\n+USA/WA-UW-1753/2020,B.1.371\n+USA/WA-UW-1735/2020,A.1\n+England/NOTT-10E63C/2020,B.3\n+Australia/NSW09/2020,B.4\n+USA/WA-UW-1730/2020,A.1\n+England/NOTT-10E61E/2020,B.1.250\n+Australia/NSW11/2020,B.4\n+Australia/NSW13/2020,B.4.6\n+Czech_Republic/IAB_15/2020,B.1\n+Czech_Republic/IAB_20/2020,B.1.1\n+USA/WA-UW-1850/2020,A.1\n+USA/WA-UW-1858/2020,B.1\n+USA/WA-UW-1824/2020,B.1.371\n+USA/WA-UW-1826/2020,B.1.162\n+USA/WA-UW-1827/2020,B.1.319\n+USA/WA-UW-1828/2020,A.1\n+England/NOTT-10E809/2020,B.29\n+USA/un-UW-1834/2020,A.1\n+England/NOTT-10E7FD/2020,B\n+USA/WA-UW-1799/2020,B.1.320\n+USA/WA-UW-1782/2020,A.1\n+USA/WA-UW-1784/2020,A.1\n+England/NOTT-10E793/2020,B.23\n+USA/WA-UW-1779/2020,B.1\n+England/NOTT-10E784/2020,B.40\n+USA/WA-UW-1904/2020,B.1\n+USA/WA-UW-1920/2020,A.1\n+USA/WA-UW-1913/2020,A.1\n+USA/WA-UW-1919/2020,A.1\n+USA/WA-UW-1905/2020,A.1\n+USA/WA-UW-1872/2020,B.1.320\n+USA/OR-UW-1849/2020,A.1\n+USA/WA-UW-1863/2020,A.1\n+USA/WA-UW-1946/2020,A.1\n+USA/WA-UW-1835/2020,A.1\n+USA/WA-UW-1868/2020,B.4.4\n+Australia/VIC551/2020,B.1.434\n+Australia/VIC554/2020,B.1.434\n+Australia/VIC555/2020,B.1\n+Australia/VIC557/2020,B.1.23\n+Australia/VIC559/2020,B.1\n+Australia/VIC560/2020,B.1\n+Australia/VIC561/2020,B.1.1\n+Australia/VIC562/2020,B.1\n+Australia/VIC565/2020,A.2.2\n+Australia/VIC567/2020,B.1\n+Australia/VIC568/2020,A.1\n+Australia/VIC569/2020,B\n+Australia/VIC570/2020,B.1\n+Australia/VIC571/2020,B.1\n+Australia/VIC572/2020,B.40\n+Australia/VIC574/2020,B.1.1\n+Australia/VIC575/2020,B.1.1\n+England/NOTT-10E12C/2020,B.61\n+Beijing/233/2020,A\n+India/MH-1-27/2020,B\n+India/MH-1-31/2020,B\n+Australia/VIC534/2020,B.1.23\n+Australia/VIC535/2020,B\n+Australia/VIC536/2020,B.1\n+Australia/V'..b'/WB-1930400401314/2021,B.1.617.1\n+Belgium/UZA-UA-CV2132091726/2021,B.1.617.1\n+Canada/MB-NML-21570/2021,B.1.438.1\n+Canada/MB-NML-21579/2021,B.1.438.1\n+Canada/MB-NML-21589/2021,B.1.438.1\n+Canada/MB-NML-17472/2021,B.1.438.1\n+Canada/MB-NML-17747/2021,B.1.438.1\n+Canada/MB-NML-17706/2021,B.1.438.1\n+Canada/MB-NML-17692/2021,B.1.438.1\n+England/CAMC-14E335E/2021,B.1.617.1\n+England/CAMC-14E0166/2021,B.1.617.1\n+England/CAMC-14E338B/2021,B.1.617.2\n+England/CAMC-14E2F97/2021,B.1.617.2\n+England/MILK-14E0272/2021,B.1.617.2\n+USA/GA-CDC-STM-000046368/2021,B.1.617.1\n+India/KA-NIMH-SEQ-236/2021,B.1.617.1\n+India/KA-NIMH-SEQ-239/2021,B.1.617.1\n+India/KA-NIMH-SEQ-249/2021,B.1.617.1\n+India/KA-NIMH-SEQ-250/2021,B.1.617.1\n+India/KA-NIMH-SEQ-253/2021,B.1.617.1\n+India/KA-NIMH-SEQ-254/2021,B.1.617.1\n+India/KA-NIMH-SEQ-271/2021,B.1.617.1\n+India/KA-NIMH-SEQ-274/2021,B.1.617.1\n+India/KA-NIMH-SEQ-279/2021,B.1.617.1\n+India/KA-NIMH-SEQ-280/2021,B.1.617.1\n+India/KA-NIMH-SEQ-284/2021,B.1.617.1\n+India/KA-NIMH-SEQ-288/2021,B.1.617.1\n+India/KA-NIMH-SEQ-291/2021,B.1.617.1\n+India/KA-NIMH-SEQ-295/2021,B.1.617.1\n+India/KA-NIMH-SEQ-302/2021,B.1.617.1\n+USA/MA-CDC-STM-000044850/2021,B.1.617.2\n+USA/MA-CDC-STM-000044887/2021,B.1.617.2\n+Sint_Maarten/SX-RIVM-23089/2021,B.1.617\n+England/CAMC-14C2C5A/2021,B.1.617.2\n+Belgium/MBLG36792/2021,B.1.617.1\n+USA/ND-NDDH-0594/2021,B.1.438.1\n+USA/ND-NDDH-0620/2021,B.1.438.1\n+USA/ND-NDDH-0621/2021,B.1.438.1\n+USA/NJ-CDC-LC0035972/2021,B.1\n+USA/NJ-CDC-LC0036132/2021,B.1\n+USA/WI-CDC-LC0035686/2021,B.1.617.1\n+USA/CA-CDC-FG-018898/2021,B.1.617.1\n+USA/CA-CDC-FG-018335/2021,B.1.617.2\n+USA/NJ-CDC-LC0038223/2021,B.1.617.2\n+USA/WA-UW-2021033003742/2021,B.1.617.1\n+USA/CA-CDC-FG-019301/2021,B.1.617.1\n+Singapore/535/2021,B.1.617.2\n+Singapore/524/2021,B.1.617.2\n+Singapore/533/2021,B.1.617.2\n+Singapore/534/2021,B.1.617.2\n+Singapore/525/2021,B.1.617.1\n+Singapore/526/2021,B.1.617.1\n+Singapore/527/2021,B.1.617.1\n+Singapore/528/2021,B.1.617.1\n+Singapore/529/2021,B.1.617.1\n+England/CAMC-14E79FE/2021,B.1.617.1\n+England/CAMC-14E7B61/2021,B.1.617.2\n+England/CAMC-14E792B/2021,B.1.617.1\n+England/CAMC-14E7CF5/2021,B.1.617.1\n+England/CAMC-14E7C22/2021,B.1.617.1\n+Scotland/CAMC-14E0157/2021,B.1.617.1\n+Australia/NSW4471/2021,B.1.617.2\n+USA/IN-CDC-STM-000045992/2021,B.1.617.2\n+Belgium/Aalst-OLVZ-8042639/2021,B.1.620\n+USA/ND-NDDH-0641/2021,B.1.438.1\n+USA/WA-UW-2021040102602/2021,B.1.617.1\n+USA/WA-UW-2021040308606/2021,B.1.617.2\n+USA/WA-UW-2021040107121/2021,B.1.617.1\n+New_Zealand/21MV0313/2021,B.1.617\n+New_Zealand/21MV0277/2021,B.1.617\n+New_Zealand/21MV0339/2021,B.1.617\n+New_Zealand/21MV0256/2021,B.1.617\n+New_Zealand/21MV0340/2021,B.1.617\n+New_Zealand/21MV0261/2021,B.1.617\n+New_Zealand/21MV0270/2021,B.1.617\n+New_Zealand/21MV0343/2021,B.1.617\n+New_Zealand/21MV0334/2021,B.1.617\n+Ireland/D-NVRL-21IRL49397/2021,B.1.617.1\n+Ireland/D-NVRL-21IRL49399/2021,B.1.617.1\n+France/ARA-HCL021061596501/2021,B.1.620\n+France/ARA-HCL021061598501/2021,B.1.620\n+USA/WV-WVU-WV064773/2021,B.1.620\n+USA/ND-NDDH-0710/2021,B.1.438.1\n+Reunion/PIMIT_00914/2021,B.1.438.2\n+England/RAND-14F19F1/2021,B.1.617.2\n+England/RAND-14F1AD0/2021,B.1.617.2\n+England/ALDP-14EDD1A/2021,B.1.617.2\n+England/CAMC-14E7563/2021,B.1.617.2\n+England/RAND-14F1A67/2021,B.1.617.1\n+England/CAMC-14DECA6/2021,B.1.617.2\n+England/CAMC-14DEBC7/2021,B.1.617.2\n+England/CAMC-14DEBA9/2021,B.1.617.2\n+England/CAMC-14DE9DC/2021,B.1.617.1\n+England/CAMC-14DEE37/2021,B.1.617.1\n+England/CAMC-14DEBF4/2021,B.1.617.2\n+England/CAMC-14E726C/2021,B.1.617.1\n+England/MILK-14BF397/2021,B.1.617.2\n+England/RAND-14EB338/2021,B.1.617.1\n+England/RAND-14E21BF/2021,B.1.617.2\n+England/RAND-14E1D70/2021,B.1.617.2\n+Australia/WA668/2021,B.1.617.2\n+Australia/WA672/2021,B.1.617.2\n+Australia/NSW4474/2021,B.1.617.2\n+Singapore/537/2021,B.1.617.1\n+Singapore/539/2021,B.1.617.1\n+Singapore/541/2021,B.1.617.2\n+Singapore/544/2021,B.1.617.1\n+Singapore/545/2021,B.1.617.1\n+Singapore/546/2021,B.1.617.2\n+USA/NY-PRL-2021_0412_01A06/2021,B.1.617\n+USA/NY-PRL-2021_0414_00O18/2021,B.1\n'
b
diff -r 0ec813ad2910 -r f557122d379e test-data/2021-04-23/data/lineages.metadata.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2021-04-23/data/lineages.metadata.csv Sun Apr 25 20:17:07 2021 +0000
b
b'@@ -0,0 +1,452755 @@\n+sequence_name,lineage,probability,pangolearn_version,status,note,covv_accession_id,country,sample_date,epi_week,travel_history,constellation\n+Brazil/SP-1750/2021,N.9,1.0,2021-04-14,passed_qc,,EPI_ISL_1079159,Brazil,2021-02-03,58.0,,G-K---\n+England/MILK-129BE47/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1072353,UK,2021-02-10,59.0,,G-----\n+Switzerland/BL-ETHZ-490801/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1080499,Switzerland,2021-02-09,59.0,,G-----\n+Switzerland/BS-ETHZ-490849/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1080500,Switzerland,2021-02-09,59.0,,G-----\n+Italy/CAM-AMES-1-23/2021,A.2.5.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1080740,Italy,2021-02-15,60.0,,GXXX--\n+Belgium/ULG-12383/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081135,Belgium,2021-02-14,60.0,,G-----\n+Belgium/ULG-12395/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081137,Belgium,2021-02-14,60.0,,G-----\n+Belgium/ULG-12381/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081138,Belgium,2021-02-13,59.0,,G-----\n+Belgium/ULG-12398/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081139,Belgium,2021-02-13,59.0,,G-----\n+Belgium/ULG-12428/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081142,Belgium,2021-02-12,59.0,,G-----\n+Belgium/ULG-12415/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081144,Belgium,2021-02-11,59.0,,G-----\n+Belgium/ULG-12357/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081146,Belgium,2021-02-09,59.0,,G-----\n+Belgium/ULG-12363/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081147,Belgium,2021-02-09,59.0,,G-----\n+Belgium/ULG-12370/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081148,Belgium,2021-02-04,58.0,,G-----\n+USA/MD-MDH-1057/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081232,USA,2021-02-16,60.0,,GXX---\n+Belgium/IPG-19/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081842,Belgium,2021-02-18,60.0,,GXX-X-\n+Italy/CAM-AMES-3-82/2021,P.1.1,1.0,2021-04-14,passed_qc,13/17 P.1 (B.1.1.28.1) SNPs,EPI_ISL_1082468,Italy,2021-02-15,60.0,,GYK---\n+Switzerland/GE-33292942/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1084765,Switzerland,2021-02-16,60.0,,G-----\n+Italy/CAM-AMES-6-43/2021,A.2.5.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085201,Italy,2021-02-16,60.0,,G-----\n+Italy/CAM-AMES-6-48/2021,P.1.1,1.0,2021-04-14,passed_qc,14/17 P.1 (B.1.1.28.1) SNPs,EPI_ISL_1085205,Italy,2021-02-16,60.0,,GYK---\n+Italy/CAM-AMES-6-50/2021,P.1.1,1.0,2021-04-14,passed_qc,11/17 P.1 (B.1.1.28.1) SNPs,EPI_ISL_1085207,Italy,2021-02-16,60.0,,G-X---\n+France/un-HMN-21022170010/2021,B.1.214.3,1.0,2021-04-14,passed_qc,,EPI_ISL_1085233,France,2021-02-17,60.0,,G-----\n+France/un-HMN-21022220115/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085246,France,2021-02-22,61.0,,GXX---\n+France/un-HMN-21022180510/2021,B.1.214.3,1.0,2021-04-14,passed_qc,,EPI_ISL_1085379,France,2021-02-16,60.0,,G-----\n+France/un-HMN-21022030415/2021,B.1.619,1.0,2021-04-14,passed_qc,,EPI_ISL_1085533,France,2021-02-03,58.0,,G-K---\n+France/un-HMN-21022110141/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085554,France,2021-02-10,59.0,,GXX---\n+France/un-HMN-21022020529/2021,B.1.214.3,1.0,2021-04-14,passed_qc,,EPI_ISL_1085559,France,2021-02-02,58.0,,G-----\n+France/un-HMN-21022080622/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085627,France,2021-02-08,59.0,,G-----\n+France/un-HMN-21022080646/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085628,France,2021-02-08,59.0,,G-----\n+France/un-HMN-21022100410/2021,B.1.214.3,1.0,2021-04-14,passed_qc,,EPI_ISL_1085784,France,2021-02-10,59.0,,G-----\n+France/un-HMN-21022170216/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085907,France,2021-02-16,60.0,,G-----\n+France/un-HMN-21022160227/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085908,France,2021-02-16,60.0,,G-----\n+France/un-HMN-21022180359/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085909,France,2021-02-16,60.0,,G-----\n+France/un-HMN-21022180249/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085910,France,2021-02-16,60.0,,G-'..b'I_ISL_935442,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2922/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935443,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2923/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935444,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2924/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935445,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2925/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935446,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2926/2020,B.1,1.0,2021-04-14,passed_qc,,EPI_ISL_935447,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2927/2020,B.1.1.244,1.0,2021-04-14,passed_qc,,EPI_ISL_935448,USA,2020-11-17,47.0,,G-----\n+USA/FL-BPHL-2928/2020,B.1.1.222,1.0,2021-04-14,passed_qc,,EPI_ISL_935449,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2929/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935450,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2930/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935451,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2931/2020,B.1,1.0,2021-04-14,passed_qc,,EPI_ISL_935452,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2932/2020,B.1.265,1.0,2021-04-14,passed_qc,,EPI_ISL_935453,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2933/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935454,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2934/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935455,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2935/2020,B.1.595,1.0,2021-04-14,passed_qc,,EPI_ISL_935456,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2936/2020,B.1,1.0,2021-04-14,passed_qc,,EPI_ISL_935457,USA,2020-11-19,47.0,,G-----\n+USA/FL-BPHL-2937/2020,B.1.1.192,1.0,2021-04-14,passed_qc,,EPI_ISL_935458,USA,2020-11-19,47.0,,G-----\n+USA/FL-BPHL-2938/2020,B.1.1.192,1.0,2021-04-14,passed_qc,,EPI_ISL_935459,USA,2020-11-19,47.0,,G-----\n+USA/FL-BPHL-2939/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935460,USA,2020-11-19,47.0,,G-----\n+USA/FL-BPHL-2940/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935461,USA,2020-11-19,47.0,,G-----\n+USA/FL-BPHL-2941/2020,B.1.509,1.0,2021-04-14,passed_qc,,EPI_ISL_935462,USA,2020-11-19,47.0,,G-----\n+USA/FL-BPHL-2942/2020,B.1,1.0,2021-04-14,passed_qc,,EPI_ISL_935463,USA,2020-11-19,47.0,,G-----\n+USA/FL-BPHL-2943/2020,B.1.234,1.0,2021-04-14,passed_qc,,EPI_ISL_935464,USA,2020-11-20,47.0,,G-----\n+USA/FL-BPHL-2944/2020,B.1.564,1.0,2021-04-14,passed_qc,,EPI_ISL_935465,USA,2020-11-20,47.0,,G-----\n+USA/FL-BPHL-2945/2020,B.1.499.1,1.0,2021-04-14,passed_qc,,EPI_ISL_935466,USA,2020-11-30,49.0,,G-----\n+USA/FL-BPHL-2946/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935467,USA,2020-11-30,49.0,,G-----\n+USA/FL-BPHL-2947/2020,B.1.361,1.0,2021-04-14,passed_qc,,EPI_ISL_935468,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2948/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935469,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2949/2020,B.1.234,1.0,2021-04-14,passed_qc,,EPI_ISL_935470,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2950/2020,B.1.596,1.0,2021-04-14,passed_qc,,EPI_ISL_935471,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2951/2020,B.1.596,1.0,2021-04-14,passed_qc,,EPI_ISL_935472,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2952/2020,B.1.588,1.0,2021-04-14,passed_qc,,EPI_ISL_935473,USA,2020-11-30,49.0,,G-----\n+USA/FL-BPHL-2953/2020,B.1.565,1.0,2021-04-14,passed_qc,,EPI_ISL_935474,USA,2020-11-30,49.0,,G-----\n+USA/FL-BPHL-2954/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935475,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2955/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935476,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2956/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935477,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2957/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935478,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2958/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935479,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2959/2020,B.1.582,1.0,2021-04-14,passed_qc,,EPI_ISL_935480,USA,2020-12-02,49.0,,G-----\n+USA/FL-BPHL-2960/2020,B.1.234,1.0,2021-04-14,passed_qc,,EPI_ISL_935481,USA,2020-12-02,49.0,,G-----\n+USA/FL-BPHL-2961/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935482,USA,2020-12-01,49.0,,G-----\n'
b
diff -r 0ec813ad2910 -r f557122d379e test-data/2021-04-23/supporting_information/data_prep_description.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2021-04-23/supporting_information/data_prep_description.md Sun Apr 25 20:17:07 2021 +0000
[
@@ -0,0 +1,16 @@
+# Data preparation
+
+### Source
+
+All GISAID data is downloaded and run through [`grapevine`](https://github.com/cov-ert/grapevine) which excludes records without proper dates, removes duplicate sequences (taking the earliest sample of the duplicates), omits some sequences with known issues, filters by length and coverage, and trims the sequences to CDS.
+
+It also aligns the sequences using `mafft` and builds an ML tree using `iqtree`. A lineages is assigned to each sequence using `pangolin` with the previous data release.
+
+### Lineage Curation
+
+The phylogeny is annotated with lineage and then in `FigTree` the lineages are manually curated, drawing together a number of pieces of information including monophyly in the ML phylogeny (generally a bootstrap > 70 is required) and epidemiological data such as country and travel history. Any changes to lineage definitions and new lineages are documented during this process.
+
+- The lineage may have been defined earlier in the outbreak and with added sequence data, there is less support for that lineage. In these cases the associated epidemiological metadata is examined and the lineage may be refined or even dropped entirely. The lineage number will not be 'recycled', but the members will get reassigned the parent lineage designation.
+- The lineage may have very clear epidemiological support and ambiguities or homoplasies in the sequences/ tree could contribute to low bootstrap values. In these cases, if the support is strong, the lineages are called. Recall rates for these lingeages within `pangolin` may be lower however.
+
+
b
diff -r 0ec813ad2910 -r f557122d379e test-data/pangolearn.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pangolearn.loc Sun Apr 25 20:17:07 2021 +0000
b
@@ -0,0 +1,9 @@
+# this is a tab separated file describing the location of pangoLEARN databases used for the
+# pangolin SARS-CoV-2 lineage typing tool
+#
+# the columns are:
+# value  description format_version path
+#
+# for example
+# 2021-04-14 pangoLEARN data release 2021-04-14 1.0 /tmp/database/pangolearn/pangolearn/2021-04-14
+2021-04-23 pangoLEARN data release 2021-04-23 1.0 ${__HERE__}/2021-04-23
b
diff -r 0ec813ad2910 -r f557122d379e test-data/result1.tsv
--- a/test-data/result1.tsv Mon Apr 12 20:31:42 2021 +0000
+++ b/test-data/result1.tsv Sun Apr 25 20:17:07 2021 +0000
b
@@ -1,1 +1,1 @@
-Consensus_EB232-crude-prep_S297.primertrimmed.consensus_threshold_0.75_quality_20 B.1.1 1.0 2021-04-01 passed_qc
+Consensus_EB232-crude-prep_S297.primertrimmed.consensus_threshold_0.75_quality_20 B.1.1 1.0 2021-04-14 passed_qc
b
diff -r 0ec813ad2910 -r f557122d379e tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Sun Apr 25 20:17:07 2021 +0000
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="pangolearn" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, description, format_version, path</columns>
+        <file path="tool-data/pangolearn.loc" />
+    </table>
+</tables>
b
diff -r 0ec813ad2910 -r f557122d379e tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Sun Apr 25 20:17:07 2021 +0000
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="pangolearn" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, description, format_version, path</columns>
+        <file path="${__HERE__}/test-data/pangolearn.loc" />
+    </table>
+</tables>