Repository 'pangolin'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/pangolin

Changeset 4:42126b414951 (2021-06-03)
Previous changeset 3:514a786baaa9 (2021-05-19) Next changeset 5:42a174224817 (2021-06-21)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pangolin commit ab174c9f8cbfc741501068dfa4f6ccf229a54489"
modified:
pangolin.xml
test-data/pangolearn.loc
added:
test-data/2021-04-21/__init__.py
test-data/2021-04-21/data/decisionTreeHeaders_v1.joblib
test-data/2021-04-21/data/decisionTree_v1.joblib
test-data/2021-04-21/data/decision_tree_rules.txt
test-data/2021-04-21/data/lineage_recall_report.csv
test-data/2021-04-21/data/lineages.downsample.csv
test-data/2021-04-21/data/lineages.metadata.csv
test-data/2021-04-21/supporting_information/data_prep_description.md
removed:
test-data/2021-04-23/__init__.py
test-data/2021-04-23/data/decisionTreeHeaders_v1.joblib
test-data/2021-04-23/data/decisionTree_v1.joblib
test-data/2021-04-23/data/decision_tree_rules.txt
test-data/2021-04-23/data/lineage_recall_report.csv
test-data/2021-04-23/data/lineages.downsample.csv
test-data/2021-04-23/data/lineages.metadata.csv
test-data/2021-04-23/supporting_information/data_prep_description.md
b
diff -r 514a786baaa9 -r 42126b414951 pangolin.xml
--- a/pangolin.xml Wed May 19 13:32:34 2021 +0000
+++ b/pangolin.xml Thu Jun 03 06:38:26 2021 +0000
b
@@ -1,7 +1,7 @@
-<tool id="pangolin" name="Pangolin" version="@TOOL_VERSION@+galaxy1" profile="20.01">
+<tool id="pangolin" name="Pangolin" version="@TOOL_VERSION@+galaxy0" profile="20.01">
     <description>Phylogenetic Assignment of Outbreak Lineages</description>
     <macros>
-        <token name="@TOOL_VERSION@">2.3.8</token>
+        <token name="@TOOL_VERSION@">3.0.3</token>
     </macros>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">pangolin</requirement>
@@ -18,6 +18,7 @@
         #if str($db.source) == "download" or str($db.source) == "builtin"
             --datadir 'datadir'
         #end if
+        $usher
         $alignment
         --outfile report.csv 
         --max-ambig $max_ambig
@@ -34,7 +35,8 @@
     ]]></command>
     <inputs>
         <param type="data" name="input1" format="fasta" label="Input FASTA File(s)" /> 
-        <param name="alignment" type="boolean" label="Generate output alignment" 
+        <param argument="--usher" type="boolean" label="Use UShER model" truevalue="--usher" falsevalue="" help="Use UShER model instead of default pangoLEARN model" />
+        <param argument="--alignment" type="boolean" label="Generate output alignment" 
             truevalue="--alignment" falsevalue="" /> 
         <param argument="--max-ambig" type="float" label="Maximum proportion of Ns allowed" 
             value="0.5" min="0" max="1" help="Maximum proportion of Ns allowed for pangolin to attempt assignment" />
@@ -56,7 +58,8 @@
                         <column name="value" index="0" />
                         <column name="name" index="1" />
                         <column name="path" index="3" />
-                        <filter type="sort_by" column="0"/>
+                        <filter type="sort_by" column="0" />
+                        <filter type="static_value" column="2" value="3.0" />
                     </options>
                 </param>
             </when>
@@ -67,7 +70,7 @@
     <outputs>
         <data name="output1" format="tabular" label="pangolin on ${on_string}">
             <actions>
-                <action name="column_names" type="metadata" default="taxon,lineage,probability,pangoLEARN_version,status,note" />
+                <action name="column_names" type="metadata" default="taxon,lineage,conflict,ambiguity_score,scorpio_call,scorpio_support,scorpio_conflict,version,pangolin_version,pangoLEARN_version,pango_version,status,note" />
             </actions>
         </data>
         <data name="align1" format="fasta" label="pangolin alignment on ${on_string}">
@@ -82,7 +85,22 @@
             </conditional>
             <output name="output1" ftype="tabular">
                 <assert_contents>
-                    <has_text text="B.1.1" />
+                    <has_text_matching expression="B.1.1\t0.0" />
+                    <has_text text="passed_qc" />
+                    <has_n_lines n="1" />
+                </assert_contents>
+            </output>
+        </test>
+        <!-- test UShER mode -->
+        <test expect_num_outputs="1">
+            <param name="input1" value="test1.fasta"/>
+            <param name="usher" value="true" />
+            <conditional name="db">
+                <param name="source" value="download" />
+            </conditional>
+            <output name="output1" ftype="tabular">
+                <assert_contents>
+                    <has_text_matching expression="B.1.1.409\t\t*PUSHER" />
                     <has_text text="passed_qc" />
                     <has_n_lines n="1" />
                 </assert_contents>
@@ -96,7 +114,7 @@
             </conditional>
             <output name="output1" ftype="tabular">
                 <assert_contents>
-                    <has_text text="B.1.1" />
+                    <has_text_matching expression="B.1.1\t0.0" />
                     <has_text text="passed_qc" />
                     <has_n_lines n="1" />
                 </assert_contents>
@@ -108,7 +126,8 @@
                 </assert_contents>
             </output>
         </test>
-        <test expect_num_outputs="1">
+        <!-- testing with builtin data is too large -->
+        <!-- <test expect_num_outputs="1">
             <param name="input1" value="test1.fasta"/>
             <conditional name="db">
                 <param name="source" value="builtin" />
@@ -119,7 +138,7 @@
                     <has_n_lines n="1" />
                 </assert_contents>
             </output>
-        </test>
+        </test> 
         <test expect_num_outputs="1">
             <param name="input1" value="multiple_alignment.fasta.gz"/>
             <conditional name="db">
@@ -133,13 +152,13 @@
                     <has_n_lines n="34" />
                 </assert_contents>
             </output>
-        </test>
+        </test> -->
         <!-- test include-header option -->
         <test expect_num_outputs="1">
             <param name="input1" value="multiple_alignment.fasta.gz"/>
             <param name="include_header" value="true" />
             <conditional name="db">
-                <param name="source" value="builtin" />
+                <param name="source" value="download" />
             </conditional>
             <output name="output1" ftype="tabular">
                 <assert_contents>
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-21/__init__.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2021-04-21/__init__.py Thu Jun 03 06:38:26 2021 +0000
b
@@ -0,0 +1,2 @@
+_program = "pangoLEARN"
+__version__ = "2021-04-21"
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-21/data/decisionTreeHeaders_v1.joblib
b
Binary file test-data/2021-04-21/data/decisionTreeHeaders_v1.joblib has changed
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-21/data/decisionTree_v1.joblib
b
Binary file test-data/2021-04-21/data/decisionTree_v1.joblib has changed
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-21/data/decision_tree_rules.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2021-04-21/data/decision_tree_rules.txt Thu Jun 03 06:38:26 2021 +0000
[
b"@@ -0,0 +1,13285 @@\n+['lineage', '285_-', '285_A', '285_C', '285_G', '285_T', '286_-', '286_A', '286_C', '286_G', '286_T', '287_-', '287_A', '287_C', '287_G', '287_T', '288_-', '288_A', '288_C', '288_G', '288_T', '289_-', '289_A', '289_C', '289_G', '289_T', '290_-', '290_A', '290_C', '290_G', '290_T', '291_-', '291_A', '291_C', '291_G', '291_T', '292_-', '292_A', '292_C', '292_G', '292_T', '293_-', '293_A', '293_C', '293_G', '293_T', '294_-', '294_A', '294_C', '294_G', '294_T', '295_-', '295_A', '295_C', '295_G', '295_T', '296_-', '296_A', '296_C', '296_G', '296_T', '297_-', '297_A', '297_C', '297_G', '297_T', '298_-', '298_A', '298_C', '298_G', '298_T', '299_-', '299_A', '299_C', '299_G', '299_T', '300_-', '300_A', '300_C', '300_G', '300_T', '301_-', '301_A', '301_C', '301_G', '301_T', '302_-', '302_A', '302_C', '302_G', '302_T', '303_-', '303_A', '303_C', '303_G', '303_T', '304_-', '304_A', '304_C', '304_G', '304_T', '305_-', '305_A', '305_C', '305_G', '305_T', '306_-', '306_A', '306_C', '306_G', '306_T', '307_-', '307_A', '307_C', '307_G', '307_T', '308_-', '308_A', '308_C', '308_G', '308_T', '309_-', '309_A', '309_C', '309_G', '309_T', '310_-', '310_A', '310_C', '310_G', '310_T', '311_-', '311_A', '311_C', '311_G', '311_T', '312_-', '312_A', '312_C', '312_G', '312_T', '313_-', '313_A', '313_C', '313_G', '313_T', '314_-', '314_A', '314_C', '314_G', '314_T', '315_-', '315_A', '315_C', '315_G', '315_T', '316_-', '316_A', '316_C', '316_G', '316_T', '317_-', '317_A', '317_C', '317_G', '317_T', '318_-', '318_A', '318_C', '318_G', '318_T', '319_-', '319_A', '319_C', '319_G', '319_T', '320_-', '320_A', '320_C', '320_G', '320_T', '321_-', '321_A', '321_C', '321_G', '321_T', '322_-', '322_A', '322_C', '322_G', '322_T', '323_-', '323_A', '323_C', '323_G', '323_T', '324_-', '324_A', '324_C', '324_G', '324_T', '325_-', '325_A', '325_C', '325_G', '325_T', '326_-', '326_A', '326_C', '326_G', '326_T', '327_-', '327_A', '327_C', '327_G', '327_T', '328_-', '328_A', '328_C', '328_G', '328_T', '329_-', '329_A', '329_C', '329_G', '329_T', '330_-', '330_A', '330_C', '330_G', '330_T', '331_-', '331_A', '331_C', '331_G', '331_T', '332_-', '332_A', '332_C', '332_G', '332_T', '333_-', '333_A', '333_C', '333_G', '333_T', '334_-', '334_A', '334_C', '334_G', '334_T', '335_-', '335_A', '335_C', '335_G', '335_T', '336_-', '336_A', '336_C', '336_G', '336_T', '337_-', '337_A', '337_C', '337_G', '337_T', '338_-', '338_A', '338_C', '338_G', '338_T', '339_-', '339_A', '339_C', '339_G', '339_T', '340_-', '340_A', '340_C', '340_G', '340_T', '341_-', '341_A', '341_C', '341_G', '341_T', '342_-', '342_A', '342_C', '342_G', '342_T', '343_-', '343_A', '343_C', '343_G', '343_T', '344_-', '344_A', '344_C', '344_G', '344_T', '345_-', '345_A', '345_C', '345_G', '345_T', '346_-', '346_A', '346_C', '346_G', '346_T', '347_-', '347_A', '347_C', '347_G', '347_T', '348_-', '348_A', '348_C', '348_G', '348_T', '349_-', '349_A', '349_C', '349_G', '349_T', '350_-', '350_A', '350_C', '350_G', '350_T', '351_-', '351_A', '351_C', '351_G', '351_T', '352_-', '352_A', '352_C', '352_G', '352_T', '353_-', '353_A', '353_C', '353_G', '353_T', '354_-', '354_A', '354_C', '354_G', '354_T', '355_-', '355_A', '355_C', '355_G', '355_T', '357_-', '357_A', '357_C', '357_G', '357_T', '358_-', '358_A', '358_C', '358_G', '358_T', '359_-', '359_A', '359_C', '359_G', '359_T', '360_-', '360_A', '360_C', '360_G', '360_T', '361_-', '361_A', '361_C', '361_G', '361_T', '362_-', '362_A', '362_C', '362_G', '362_T', '363_-', '363_A', '363_C', '363_G', '363_T', '364_-', '364_A', '364_C', '364_G', '364_T', '365_-', '365_A', '365_C', '365_G', '365_T', '366_-', '366_A', '366_C', '366_G', '366_T', '367_-', '367_A', '367_C', '367_G', '367_T', '368_-', '368_A', '368_C', '368_G', '368_T', '369_-', '369_A', '369_C', '369_G', '369_T', '370_-', '370_A', '370_C', '370_G', '370_T', '371_-', '371_A', '371_C', '371_G', '371_T', '372_-', '372_A', '372_C', '372_G', '372_T', '373_-', '373_A', '373_C', '373_G', '373_T', '374_-"..b"43!='G',27943=='A',21254!='A',20177=='A',19705=='-'\n+B.1.177.57\t18423=='-',26800=='C',16241!='-',25613!='G',15479!='G',21613=='A',6806=='G',23643!='G',27943=='A',21254=='A'\n+B.1.177.54\t18423=='-',26800=='C',16241!='-',25613!='G',15479!='G',21613=='A',6806=='G',23643=='G'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488!='C'\n+B.1.177.16\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105!='A'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826!='A'\n+B.1.177.15\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416!='C'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780!='A'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931!='G',28143!='A',21254!='A'\n+B.1.177.7\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931!='G',28143!='A',21254=='A'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931!='G',28143=='A'\n+B.1\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931=='G',16072!='A',29644!='G'\n+B.1.177.7\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931=='G',16072!='A',29644=='G'\n+B.1.177.7\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931=='G',16072=='A'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532=='C'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492=='T'\n+B.1.177.18\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310!='C'\n+B.1.177.4\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869!='C'\n+B.1.177.12\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813!='G'\n+B.1.177.57\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804!='G',7043!='G',23624!='A'\n+B.1.177.69\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804!='G',7043!='G',23624=='A',28627!='C'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804!='G',7043!='G',23624=='A',28627=='C',6285!='A'\n+B.1.177.4\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804!='G',7043!='G',23624=='A',28627=='C',6285=='A'\n+B.1.177.58\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804!='G',7043=='G'\n+W.4\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804=='G'\n+B.1.177.81\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118=='G'\n+B.1.177.29\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026=='G'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613=='G',1986=='C',20959!='C',28294!='-',8077!='G'\n+B.1.177.4\t18423=='-',26800=='C',16241!='-',25613=='G',1986=='C',20959!='C',28294!='-',8077=='G'\n+B.1.177.4\t18423=='-',26800=='C',16241!='-',25613=='G',1986=='C',20959!='C',28294=='-'\n+B.1.177\t18423=='-',26800=='C',16241!='-',25613=='G',1986=='C',20959=='C'\n+B.1.177.21\t18423=='-',26800=='C',16241=='-',22050!='G',26166!='T'\n+B.1.177\t18423=='-',26800=='C',16241=='-',22050!='G',26166=='T'\n+AA.1\t18423=='-',26800=='C',16241=='-',22050=='G'\n"
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-21/data/lineage_recall_report.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2021-04-21/data/lineage_recall_report.csv Thu Jun 03 06:38:26 2021 +0000
b
b'@@ -0,0 +1,781 @@\n+lineage,precision,recall,f1_score,support\n+B.1.1.4,0.82,0.9941818181818182,0.8944545454545455,468\n+A,0.8312000000000002,0.9623000000000002,0.8904,665\n+A.1,0.9960000000000001,0.9884000000000001,0.9921999999999999,2508\n+A.10,1.0,0.95,0.9667,14\n+A.11,1.0,1.0,1.0,10\n+A.12,0.8333333333333334,0.8333333333333334,0.8333333333333334,5\n+A.13,1.0,1.0,1.0,21\n+A.14,0.6,0.6,0.6,5\n+A.15,0.9856999999999999,1.0,0.9923,61\n+A.16,0.9833000000000001,1.0,0.9908999999999999,48\n+A.2,0.9953999999999998,0.9907,0.9928999999999999,1058\n+A.2.1,0.2,0.0833,0.1167,21\n+A.2.2,0.9977,0.9977,0.9977,427\n+A.2.3,0.9949999999999999,0.9946999999999999,0.9947000000000001,189\n+A.2.4,0.8116999999999999,1.0,0.8951,84\n+A.3,0.9951000000000001,0.9961,0.9957,793\n+A.4,1.0,1.0,1.0,54\n+A.5,0.9929,0.9879000000000001,0.9902999999999998,411\n+A.6,1.0,1.0,1.0,115\n+A.7,1.0,0.95,0.9667,17\n+A.9,0.3,0.2,0.2334,14\n+B,0.931,0.9460000000000001,0.9382999999999999,3723\n+B.1,0.9252,0.8999,0.9123000000000001,21297\n+B.1.1,0.9598999999999999,0.9279999999999999,0.9431999999999998,12773\n+B.1.1.1,0.9918000000000001,0.9892,0.9905000000000002,5708\n+B.1.1.10,0.9309000000000001,0.9390999999999998,0.9319,493\n+B.1.1.100,1.0,1.0,1.0,17\n+B.1.1.101,0.77,0.85,0.7833,16\n+B.1.1.102,0.6388333333333334,0.8333333333333334,0.6945,6\n+B.1.1.103,0.7,0.45,0.5334999999999999,17\n+B.1.1.104,0.9550000000000001,0.6749999999999999,0.7657,49\n+B.1.1.105,0.47614285714285715,0.5714285714285714,0.5,5\n+B.1.1.106,0.6,0.55,0.5667,15\n+B.1.1.107,0.9875,0.9464,0.9622999999999999,77\n+B.1.1.109,1.0,1.0,1.0,23\n+B.1.1.110,0.8571428571428571,0.8571428571428571,0.8571428571428571,6\n+B.1.1.111,0.9667,1.0,0.9800000000000001,26\n+B.1.1.112,1.0,1.0,1.0,15\n+B.1.1.113,0.8,0.8,0.8,24\n+B.1.1.114,0.8,0.8,0.8,5\n+B.1.1.115,1.0,1.0,1.0,42\n+B.1.1.116,0.85,0.9,0.8667,11\n+B.1.1.117,0.9,0.65,0.7334999999999999,20\n+B.1.1.118,1.0,0.8432999999999999,0.9036,52\n+B.1.1.12,0.9189999999999999,0.9666,0.9343999999999999,64\n+B.1.1.120,1.0,1.0,1.0,29\n+B.1.1.121,0.6833333333333332,0.8333333333333334,0.6970000000000001,6\n+B.1.1.122,0.95,1.0,0.9667,11\n+B.1.1.123,0.8257,0.9400000000000001,0.8713000000000001,52\n+B.1.1.125,1.0,1.0,1.0,91\n+B.1.1.126,1.0,1.0,1.0,9\n+B.1.1.127,0.8847000000000002,0.8666,0.8529,59\n+B.1.1.128,1.0,0.9334,0.96,26\n+B.1.1.129,0.12,0.15,0.1,15\n+B.1.1.13,1.0,1.0,1.0,15\n+B.1.1.130,0.7765000000000001,0.9826,0.8549999999999999,113\n+B.1.1.131,0.8667,0.9,0.8800000000000001,18\n+B.1.1.132,0.9833000000000001,1.0,0.9908999999999999,53\n+B.1.1.133,0.9,0.8,0.8300000000000001,35\n+B.1.1.134,0.975,0.9667,0.9666,68\n+B.1.1.135,0.95,0.9445,0.9439999999999997,93\n+B.1.1.136,1.0,1.0,1.0,36\n+B.1.1.137,0.9606999999999999,0.9856999999999999,0.9712,69\n+B.1.1.138,1.0,1.0,1.0,129\n+B.1.1.139,1.0,1.0,1.0,9\n+B.1.1.14,1.0,1.0,1.0,58\n+B.1.1.140,0.775,0.9,0.8157,21\n+B.1.1.141,1.0,0.9083,0.9154,123\n+B.1.1.142,1.0,1.0,1.0,21\n+B.1.1.143,0.4,0.1666,0.2334,26\n+B.1.1.144,0.9,0.9,0.9,14\n+B.1.1.145,0.9334,0.95,0.9267,20\n+B.1.1.147,0.9,0.9,0.9,12\n+B.1.1.148,0.875,0.9856999999999999,0.9189999999999999,67\n+B.1.1.149,1.0,1.0,1.0,59\n+B.1.1.15,0.9464,0.9833000000000001,0.9611999999999998,63\n+B.1.1.151,0.8183999999999999,0.9933,0.882,145\n+B.1.1.152,0.9,0.8,0.8333999999999999,12\n+B.1.1.153,1.0,0.975,0.9856999999999999,46\n+B.1.1.154,0.8400000000000001,0.9167,0.8584999999999999,25\n+B.1.1.155,0.8000999999999999,0.85,0.8067,19\n+B.1.1.157,1.0,0.9167,0.9467000000000001,24\n+B.1.1.158,0.9667,0.925,0.9427999999999999,44\n+B.1.1.159,0.8950000000000001,0.95,0.9163,40\n+B.1.1.16,0.9667,1.0,0.9800000000000001,47\n+B.1.1.160,0.95,1.0,0.9667,10\n+B.1.1.161,0.7030000000000001,0.82,0.7424999999999999,43\n+B.1.1.162,0.8667,0.85,0.8467,20\n+B.1.1.163,0.9370999999999998,0.9047000000000001,0.9147000000000001,63\n+B.1.1.164,0.8499000000000001,0.9777999999999999,0.9013,86\n+B.1.1.165,1.0,0.96,0.975,55\n+B.1.1.166,0.8667,0.6167,0.6933999999999999,25\n+B.1.1.167,0.8667,0.8167,0.8267,21\n+B.1.1.168,1.0,0.9167,0.9467000000000001,22\n+B.1.1.169,0.85,0.9,0.8667,15\n+B.1.1.17,1.0,0.9667,0.9800000000000001,29\n+B.1.1.170,1.0,1'..b'7,0.45,0.5335,21\n+B.1.9,0.9922000000000001,0.9916,0.9917999999999999,242\n+B.1.90,1.0,0.8834,0.9314,34\n+B.1.91,0.9821000000000002,1.0,0.9909000000000001,379\n+B.1.93,0.9969000000000001,0.9968,0.9968999999999999,946\n+B.1.94,0.9167,1.0,0.9467000000000001,18\n+B.1.95,0.8,0.75,0.7501,20\n+B.1.96,1.0,1.0,1.0,45\n+B.1.97,1.0,1.0,1.0,38\n+B.1.98,0.9441,0.9868,0.9645999999999999,606\n+B.10,0.9751,0.9917,0.9827999999999999,112\n+B.11,0.9644,0.9868,0.9743999999999999,305\n+B.12,0.9856999999999999,1.0,0.9923,63\n+B.13,1.0,1.0,1.0,21\n+B.15,1.0,0.975,0.9856999999999999,41\n+B.18,0.9667,1.0,0.9800000000000001,22\n+B.19,1.0,0.9667,0.9800000000000001,26\n+B.20,0.8667,0.7333999999999999,0.7800999999999999,22\n+B.23,0.8412,0.9833999999999999,0.9006999999999998,120\n+B.26,0.6344,0.65,0.6039,40\n+B.27,1.0,1.0,1.0,58\n+B.28,0.9893000000000001,0.9963000000000001,0.9926,263\n+B.29,0.9627000000000001,0.991,0.9759,216\n+B.3,0.9927999999999999,0.9879999999999999,0.9903999999999998,828\n+B.3.1,0.9829999999999999,0.9940000000000001,0.9884000000000001,508\n+B.30,1.0,0.9800000000000001,0.9888999999999999,48\n+B.31,0.9867000000000001,0.9725999999999999,0.9792,216\n+B.32,0.9333,0.9,0.9067000000000001,24\n+B.33,0.9800000000000001,0.975,0.9745999999999999,47\n+B.34,0.9856999999999999,1.0,0.9923,65\n+B.35,0.9789,0.9778,0.977,87\n+B.36,1.0,1.0,1.0,10\n+B.37,0.975,0.9667,0.9657,27\n+B.38,1.0,1.0,1.0,15\n+B.39,0.9975999999999999,0.9753999999999998,0.9861000000000001,404\n+B.4,0.9811,0.8449,0.9066000000000001,361\n+B.4.1,1.0,1.0,1.0,27\n+B.4.2,1.0,1.0,1.0,15\n+B.4.4,0.9667,0.8916999999999999,0.9124000000000001,38\n+B.4.5,0.6001000000000001,1.0,0.7475000000000002,76\n+B.40,0.9915,0.9804999999999999,0.9858,2435\n+B.41,0.975,0.9826,0.9783999999999999,117\n+B.42,0.8667,0.9,0.8800000000000001,11\n+B.43,0.975,1.0,0.9856999999999999,33\n+B.44,0.95,1.0,0.9667,10\n+B.45,0.9962,1.0,0.998,245\n+B.46,0.9800000000000001,0.9334,0.9489000000000001,32\n+B.47,0.95,1.0,0.9667,19\n+B.48,0.9823000000000001,0.8596,0.9136000000000001,148\n+B.49,1.0,1.0,1.0,19\n+B.5,0.8167,0.8,0.7800999999999999,13\n+B.51,1.0,1.0,1.0,22\n+B.52,1.0,1.0,1.0,74\n+B.53,0.9856999999999999,1.0,0.9923,58\n+B.54,0.7546999999999999,0.4459000000000001,0.5488,88\n+B.6,0.9714,0.9664999999999999,0.9684000000000001,712\n+B.6.1,1.0,0.7833,0.8501,22\n+B.6.2,1.0,1.0,1.0,14\n+B.6.3,0.9,0.7666999999999999,0.8099999999999999,24\n+B.6.4,0.95,0.85,0.8834,22\n+B.6.5,0.8888888888888888,0.8888888888888888,0.8888888888888888,9\n+B.6.6,0.9884999999999999,0.9978000000000001,0.9932000000000002,927\n+C.1,0.9704,0.9732,0.9712999999999999,186\n+C.11,1.0,1.0,1.0,10\n+C.12,1.0,1.0,1.0,122\n+C.13,0.8667999999999999,0.85,0.8200999999999998,20\n+C.14,0.8454,0.785,0.768,45\n+C.15,1.0,1.0,1.0,27\n+C.2,1.0,1.0,1.0,14\n+C.3,0.9929,1.0,0.9963,126\n+C.7,1.0,1.0,1.0,28\n+C.8,0.9875,0.9833000000000001,0.9842000000000001,63\n+C.9,0.835,0.6666000000000001,0.6974,27\n+D.2,1.0,0.9999,1.0,10570\n+D.3,1.0,1.0,1.0,140\n+E.1,1.0,1.0,1.0,41\n+F.1,1.0,1.0,1.0,10\n+G.1,0.9167,0.9,0.8967,19\n+H.1,0.9015000000000001,0.9808999999999999,0.9336999999999998,104\n+I.1,0.9042,1.0,0.9345000000000001,33\n+J.1,0.95,1.0,0.9667,15\n+K.1,1.0,0.9,0.9334,20\n+L.1,1.0,1.0,1.0,165\n+L.2,1.0,1.0,1.0,60\n+M.1,1.0,0.9667,0.9800000000000001,60\n+N.1,1.0,0.9667,0.9800000000000001,23\n+N.2,1.0,1.0,1.0,9\n+N.3,1.0,1.0,1.0,10\n+N.4,1.0,0.95,0.9667,21\n+B.1.1.119,0.3,0.4,0.33340000000000003,4\n+B.1.1.183,0.8333333333333334,0.8333333333333334,0.8333333333333334,6\n+B.1.1.81,0.75,0.75,0.75,4\n+B.1.1.91,0.6666666666666666,0.6666666666666666,0.6666666666666666,5\n+B.1.1.98,0.0,0.0,0.0,2\n+B.1.158,0.0,0.0,0.0,4\n+B.1.269,1.0,1.0,1.0,8\n+B.1.373,1.0,1.0,1.0,8\n+B.1.80,0.0,0.0,0.0,5\n+C.5,1.0,1.0,1.0,7\n+B.1.279,1.0,1.0,1.0,6\n+B.50,0.5,0.5,0.5,6\n+C.10,1.0,1.0,1.0,4\n+C.4,0.9375,1.0,0.958375,8\n+B.1.1.156,0.6666666666666666,0.6666666666666666,0.6666666666666666,6\n+B.1.1.182,1.0,1.0,1.0,4\n+B.1.1.252,1.0,1.0,1.0,5\n+C.6,0.375,0.5,0.41675,4\n+macro avg,0.9166363636363637,0.9150909090909092,0.9071818181818184,193089\n+weighted avg,0.9636363636363636,0.9598181818181818,0.9593636363636364,193089\n'
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-21/data/lineages.downsample.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2021-04-21/data/lineages.downsample.csv Thu Jun 03 06:38:26 2021 +0000
b
b'@@ -0,0 +1,179715 @@\n+sequence_name,lineage\n+Australia/VIC390/2020,B.6.6\n+Australia/VIC473/2020,B.1.1\n+Australia/VIC475/2020,B.1.23\n+Australia/VIC476/2020,B.1.23\n+Australia/VIC479/2020,B.1.23\n+Australia/NSW14/2020,B.4\n+USA/WA13-UW9/2020,A.1\n+USA/WA-UW-1741/2020,B.1.371\n+England/NOTT-10E5E4/2020,B.23\n+USA/WA-UW-1739/2020,B.1\n+England/NOTT-10E5D5/2020,B.1.1.41\n+USA/WA-UW-1732/2020,B.1\n+USA/WA-UW-1733/2020,B.1.371\n+USA/WA-UW-1722/2020,B.1\n+England/NOTT-10E5A8/2020,B.1.1\n+Netherlands/Oisterwijk_1364072/2020,B\n+USA/WA-UW-1772/2020,B.1\n+England/NOTT-10E599/2020,A.2\n+Netherlands/Oss_1363500/2020,B.55\n+USA/WA-UW-1708/2020,A.1\n+England/NOTT-10E58A/2020,B\n+Netherlands/Rotterdam_1363790/2020,B\n+USA/WA-UW-1731/2020,A.1\n+England/NOTT-10E57B/2020,B\n+Netherlands/Rotterdam_1364040/2020,B.55\n+USA/WA-UW-1709/2020,A.1\n+England/NOTT-10E55D/2020,B.3\n+England/NOTT-10E54E/2020,B.23\n+Netherlands/Tilburg_1363354/2020,B.11\n+Netherlands/Utrecht_1363564/2020,B.1.383\n+Netherlands/Utrecht_1363628/2020,B.1.383\n+USA/WA-UW-1724/2020,B.1.371\n+England/NOTT-10E520/2020,B\n+USA/WA-UW-1706/2020,B.1.371\n+England/NOTT-10E511/2020,B.39\n+USA/WA-UW-1705/2020,A.1\n+USA/WA-UW-1707/2020,B.1\n+England/NOTT-10E4F6/2020,B.1\n+USA/WA-UW-1745/2020,B.30\n+England/NOTT-10E4C9/2020,B.52\n+Netherlands/Diemen_1363454/2020,B.1.1\n+England/NOTT-10E4AB/2020,B.23\n+Netherlands/Eindhoven_1363782/2020,B.1.1\n+USA/WA-UW-1729/2020,A.1\n+England/NOTT-10E49C/2020,B.35\n+Netherlands/Haarlem_1363688/2020,B.1\n+Netherlands/Houten_1363498/2020,B.1\n+Netherlands/Loon_op_zand_1363512/2020,B.11\n+Netherlands/Delft_1363424/2020,B.1.1\n+USA/WA-UW-1682/2020,A.1\n+Australia/VIC908/2020,B.1.1\n+USA/WA-UW-1775/2020,B.1.1\n+England/NOTT-10E757/2020,B.39\n+USA/WA-UW-1774/2020,B.1.371\n+USA/un-UW-1832/2020,B.30\n+England/NOTT-10E739/2020,B.39\n+USA/WA-UW-1796/2020,B.1\n+England/NOTT-10E72A/2020,B.35\n+USA/WA-UW-1792/2020,B.1\n+England/NOTT-10E70C/2020,B.1.1.369\n+USA/WA-UW-1785/2020,A.1\n+Australia/VIC912/2020,B.1.23\n+Australia/VIC913/2020,B.6.6\n+Czech_Republic/IAB_1/2020,B.1\n+Czech_Republic/IAB_4/2020,B.1\n+Czech_Republic/IAB_8/2020,B.1.1\n+Czech_Republic/IAB_9/2020,B.1.1\n+Czech_Republic/IAB_10/2020,B.1\n+Czech_Republic/IAB_12/2020,B.1\n+England/NOTT-10E6A5/2020,B.1.391\n+England/NOTT-10E696/2020,B.1\n+England/NOTT-10E678/2020,B.61\n+USA/WA-UW-1770/2020,B.1\n+England/NOTT-10E669/2020,B.1.1.369\n+England/NOTT-10E65A/2020,B.1\n+USA/WA-UW-1753/2020,B.1.371\n+USA/WA-UW-1735/2020,A.1\n+England/NOTT-10E63C/2020,B.3\n+Australia/NSW09/2020,B.4\n+USA/WA-UW-1730/2020,A.1\n+England/NOTT-10E61E/2020,B.1.250\n+Australia/NSW11/2020,B.4\n+Australia/NSW13/2020,B.4.6\n+Czech_Republic/IAB_15/2020,B.1\n+Czech_Republic/IAB_20/2020,B.1.1\n+USA/WA-UW-1850/2020,A.1\n+USA/WA-UW-1858/2020,B.1\n+USA/WA-UW-1824/2020,B.1.371\n+USA/WA-UW-1826/2020,B.1.162\n+USA/WA-UW-1827/2020,B.1.319\n+USA/WA-UW-1828/2020,A.1\n+England/NOTT-10E809/2020,B.29\n+USA/un-UW-1834/2020,A.1\n+England/NOTT-10E7FD/2020,B\n+USA/WA-UW-1799/2020,B.1.320\n+USA/WA-UW-1782/2020,A.1\n+USA/WA-UW-1784/2020,A.1\n+England/NOTT-10E793/2020,B.23\n+USA/WA-UW-1779/2020,B.1\n+England/NOTT-10E784/2020,B.40\n+USA/WA-UW-1904/2020,B.1\n+USA/WA-UW-1920/2020,A.1\n+USA/WA-UW-1913/2020,A.1\n+USA/WA-UW-1919/2020,A.1\n+USA/WA-UW-1905/2020,A.1\n+USA/WA-UW-1872/2020,B.1.320\n+USA/OR-UW-1849/2020,A.1\n+USA/WA-UW-1863/2020,A.1\n+USA/WA-UW-1946/2020,A.1\n+USA/WA-UW-1835/2020,A.1\n+USA/WA-UW-1868/2020,B.4.4\n+Australia/VIC551/2020,B.1.434\n+Australia/VIC554/2020,B.1.434\n+Australia/VIC555/2020,B.1\n+Australia/VIC557/2020,B.1.23\n+Australia/VIC559/2020,B.1\n+Australia/VIC560/2020,B.1\n+Australia/VIC561/2020,B.1.1\n+Australia/VIC562/2020,B.1\n+Australia/VIC565/2020,A.2.2\n+Australia/VIC567/2020,B.1\n+Australia/VIC568/2020,A.1\n+Australia/VIC569/2020,B\n+Australia/VIC570/2020,B.1\n+Australia/VIC571/2020,B.1\n+Australia/VIC572/2020,B.40\n+Australia/VIC574/2020,B.1.1\n+Australia/VIC575/2020,B.1.1\n+England/NOTT-10E12C/2020,B.61\n+Beijing/233/2020,A\n+India/MH-1-27/2020,B\n+India/MH-1-31/2020,B\n+Australia/VIC534/2020,B.1.23\n+Australia/VIC535/2020,B\n+Australia/VIC536/2020,B.1\n+Australia/V'..b'/WB-1930400401314/2021,B.1.617.1\n+Belgium/UZA-UA-CV2132091726/2021,B.1.617.1\n+Canada/MB-NML-21570/2021,B.1.438.1\n+Canada/MB-NML-21579/2021,B.1.438.1\n+Canada/MB-NML-21589/2021,B.1.438.1\n+Canada/MB-NML-17472/2021,B.1.438.1\n+Canada/MB-NML-17747/2021,B.1.438.1\n+Canada/MB-NML-17706/2021,B.1.438.1\n+Canada/MB-NML-17692/2021,B.1.438.1\n+England/CAMC-14E335E/2021,B.1.617.1\n+England/CAMC-14E0166/2021,B.1.617.1\n+England/CAMC-14E338B/2021,B.1.617.2\n+England/CAMC-14E2F97/2021,B.1.617.2\n+England/MILK-14E0272/2021,B.1.617.2\n+USA/GA-CDC-STM-000046368/2021,B.1.617.1\n+India/KA-NIMH-SEQ-236/2021,B.1.617.1\n+India/KA-NIMH-SEQ-239/2021,B.1.617.1\n+India/KA-NIMH-SEQ-249/2021,B.1.617.1\n+India/KA-NIMH-SEQ-250/2021,B.1.617.1\n+India/KA-NIMH-SEQ-253/2021,B.1.617.1\n+India/KA-NIMH-SEQ-254/2021,B.1.617.1\n+India/KA-NIMH-SEQ-271/2021,B.1.617.1\n+India/KA-NIMH-SEQ-274/2021,B.1.617.1\n+India/KA-NIMH-SEQ-279/2021,B.1.617.1\n+India/KA-NIMH-SEQ-280/2021,B.1.617.1\n+India/KA-NIMH-SEQ-284/2021,B.1.617.1\n+India/KA-NIMH-SEQ-288/2021,B.1.617.1\n+India/KA-NIMH-SEQ-291/2021,B.1.617.1\n+India/KA-NIMH-SEQ-295/2021,B.1.617.1\n+India/KA-NIMH-SEQ-302/2021,B.1.617.1\n+USA/MA-CDC-STM-000044850/2021,B.1.617.2\n+USA/MA-CDC-STM-000044887/2021,B.1.617.2\n+Sint_Maarten/SX-RIVM-23089/2021,B.1.617\n+England/CAMC-14C2C5A/2021,B.1.617.2\n+Belgium/MBLG36792/2021,B.1.617.1\n+USA/ND-NDDH-0594/2021,B.1.438.1\n+USA/ND-NDDH-0620/2021,B.1.438.1\n+USA/ND-NDDH-0621/2021,B.1.438.1\n+USA/NJ-CDC-LC0035972/2021,B.1\n+USA/NJ-CDC-LC0036132/2021,B.1\n+USA/WI-CDC-LC0035686/2021,B.1.617.1\n+USA/CA-CDC-FG-018898/2021,B.1.617.1\n+USA/CA-CDC-FG-018335/2021,B.1.617.2\n+USA/NJ-CDC-LC0038223/2021,B.1.617.2\n+USA/WA-UW-2021033003742/2021,B.1.617.1\n+USA/CA-CDC-FG-019301/2021,B.1.617.1\n+Singapore/535/2021,B.1.617.2\n+Singapore/524/2021,B.1.617.2\n+Singapore/533/2021,B.1.617.2\n+Singapore/534/2021,B.1.617.2\n+Singapore/525/2021,B.1.617.1\n+Singapore/526/2021,B.1.617.1\n+Singapore/527/2021,B.1.617.1\n+Singapore/528/2021,B.1.617.1\n+Singapore/529/2021,B.1.617.1\n+England/CAMC-14E79FE/2021,B.1.617.1\n+England/CAMC-14E7B61/2021,B.1.617.2\n+England/CAMC-14E792B/2021,B.1.617.1\n+England/CAMC-14E7CF5/2021,B.1.617.1\n+England/CAMC-14E7C22/2021,B.1.617.1\n+Scotland/CAMC-14E0157/2021,B.1.617.1\n+Australia/NSW4471/2021,B.1.617.2\n+USA/IN-CDC-STM-000045992/2021,B.1.617.2\n+Belgium/Aalst-OLVZ-8042639/2021,B.1.620\n+USA/ND-NDDH-0641/2021,B.1.438.1\n+USA/WA-UW-2021040102602/2021,B.1.617.1\n+USA/WA-UW-2021040308606/2021,B.1.617.2\n+USA/WA-UW-2021040107121/2021,B.1.617.1\n+New_Zealand/21MV0313/2021,B.1.617\n+New_Zealand/21MV0277/2021,B.1.617\n+New_Zealand/21MV0339/2021,B.1.617\n+New_Zealand/21MV0256/2021,B.1.617\n+New_Zealand/21MV0340/2021,B.1.617\n+New_Zealand/21MV0261/2021,B.1.617\n+New_Zealand/21MV0270/2021,B.1.617\n+New_Zealand/21MV0343/2021,B.1.617\n+New_Zealand/21MV0334/2021,B.1.617\n+Ireland/D-NVRL-21IRL49397/2021,B.1.617.1\n+Ireland/D-NVRL-21IRL49399/2021,B.1.617.1\n+France/ARA-HCL021061596501/2021,B.1.620\n+France/ARA-HCL021061598501/2021,B.1.620\n+USA/WV-WVU-WV064773/2021,B.1.620\n+USA/ND-NDDH-0710/2021,B.1.438.1\n+Reunion/PIMIT_00914/2021,B.1.438.2\n+England/RAND-14F19F1/2021,B.1.617.2\n+England/RAND-14F1AD0/2021,B.1.617.2\n+England/ALDP-14EDD1A/2021,B.1.617.2\n+England/CAMC-14E7563/2021,B.1.617.2\n+England/RAND-14F1A67/2021,B.1.617.1\n+England/CAMC-14DECA6/2021,B.1.617.2\n+England/CAMC-14DEBC7/2021,B.1.617.2\n+England/CAMC-14DEBA9/2021,B.1.617.2\n+England/CAMC-14DE9DC/2021,B.1.617.1\n+England/CAMC-14DEE37/2021,B.1.617.1\n+England/CAMC-14DEBF4/2021,B.1.617.2\n+England/CAMC-14E726C/2021,B.1.617.1\n+England/MILK-14BF397/2021,B.1.617.2\n+England/RAND-14EB338/2021,B.1.617.1\n+England/RAND-14E21BF/2021,B.1.617.2\n+England/RAND-14E1D70/2021,B.1.617.2\n+Australia/WA668/2021,B.1.617.2\n+Australia/WA672/2021,B.1.617.2\n+Australia/NSW4474/2021,B.1.617.2\n+Singapore/537/2021,B.1.617.1\n+Singapore/539/2021,B.1.617.1\n+Singapore/541/2021,B.1.617.2\n+Singapore/544/2021,B.1.617.1\n+Singapore/545/2021,B.1.617.1\n+Singapore/546/2021,B.1.617.2\n+USA/NY-PRL-2021_0412_01A06/2021,B.1.617\n+USA/NY-PRL-2021_0414_00O18/2021,B.1\n'
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-21/data/lineages.metadata.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2021-04-21/data/lineages.metadata.csv Thu Jun 03 06:38:26 2021 +0000
b
b'@@ -0,0 +1,452755 @@\n+sequence_name,lineage,probability,pangolearn_version,status,note,covv_accession_id,country,sample_date,epi_week,travel_history,constellation\n+Brazil/SP-1750/2021,N.9,1.0,2021-04-14,passed_qc,,EPI_ISL_1079159,Brazil,2021-02-03,58.0,,G-K---\n+England/MILK-129BE47/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1072353,UK,2021-02-10,59.0,,G-----\n+Switzerland/BL-ETHZ-490801/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1080499,Switzerland,2021-02-09,59.0,,G-----\n+Switzerland/BS-ETHZ-490849/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1080500,Switzerland,2021-02-09,59.0,,G-----\n+Italy/CAM-AMES-1-23/2021,A.2.5.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1080740,Italy,2021-02-15,60.0,,GXXX--\n+Belgium/ULG-12383/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081135,Belgium,2021-02-14,60.0,,G-----\n+Belgium/ULG-12395/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081137,Belgium,2021-02-14,60.0,,G-----\n+Belgium/ULG-12381/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081138,Belgium,2021-02-13,59.0,,G-----\n+Belgium/ULG-12398/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081139,Belgium,2021-02-13,59.0,,G-----\n+Belgium/ULG-12428/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081142,Belgium,2021-02-12,59.0,,G-----\n+Belgium/ULG-12415/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081144,Belgium,2021-02-11,59.0,,G-----\n+Belgium/ULG-12357/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081146,Belgium,2021-02-09,59.0,,G-----\n+Belgium/ULG-12363/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081147,Belgium,2021-02-09,59.0,,G-----\n+Belgium/ULG-12370/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081148,Belgium,2021-02-04,58.0,,G-----\n+USA/MD-MDH-1057/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081232,USA,2021-02-16,60.0,,GXX---\n+Belgium/IPG-19/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081842,Belgium,2021-02-18,60.0,,GXX-X-\n+Italy/CAM-AMES-3-82/2021,P.1.1,1.0,2021-04-14,passed_qc,13/17 P.1 (B.1.1.28.1) SNPs,EPI_ISL_1082468,Italy,2021-02-15,60.0,,GYK---\n+Switzerland/GE-33292942/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1084765,Switzerland,2021-02-16,60.0,,G-----\n+Italy/CAM-AMES-6-43/2021,A.2.5.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085201,Italy,2021-02-16,60.0,,G-----\n+Italy/CAM-AMES-6-48/2021,P.1.1,1.0,2021-04-14,passed_qc,14/17 P.1 (B.1.1.28.1) SNPs,EPI_ISL_1085205,Italy,2021-02-16,60.0,,GYK---\n+Italy/CAM-AMES-6-50/2021,P.1.1,1.0,2021-04-14,passed_qc,11/17 P.1 (B.1.1.28.1) SNPs,EPI_ISL_1085207,Italy,2021-02-16,60.0,,G-X---\n+France/un-HMN-21022170010/2021,B.1.214.3,1.0,2021-04-14,passed_qc,,EPI_ISL_1085233,France,2021-02-17,60.0,,G-----\n+France/un-HMN-21022220115/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085246,France,2021-02-22,61.0,,GXX---\n+France/un-HMN-21022180510/2021,B.1.214.3,1.0,2021-04-14,passed_qc,,EPI_ISL_1085379,France,2021-02-16,60.0,,G-----\n+France/un-HMN-21022030415/2021,B.1.619,1.0,2021-04-14,passed_qc,,EPI_ISL_1085533,France,2021-02-03,58.0,,G-K---\n+France/un-HMN-21022110141/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085554,France,2021-02-10,59.0,,GXX---\n+France/un-HMN-21022020529/2021,B.1.214.3,1.0,2021-04-14,passed_qc,,EPI_ISL_1085559,France,2021-02-02,58.0,,G-----\n+France/un-HMN-21022080622/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085627,France,2021-02-08,59.0,,G-----\n+France/un-HMN-21022080646/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085628,France,2021-02-08,59.0,,G-----\n+France/un-HMN-21022100410/2021,B.1.214.3,1.0,2021-04-14,passed_qc,,EPI_ISL_1085784,France,2021-02-10,59.0,,G-----\n+France/un-HMN-21022170216/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085907,France,2021-02-16,60.0,,G-----\n+France/un-HMN-21022160227/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085908,France,2021-02-16,60.0,,G-----\n+France/un-HMN-21022180359/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085909,France,2021-02-16,60.0,,G-----\n+France/un-HMN-21022180249/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085910,France,2021-02-16,60.0,,G-'..b'I_ISL_935442,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2922/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935443,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2923/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935444,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2924/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935445,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2925/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935446,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2926/2020,B.1,1.0,2021-04-14,passed_qc,,EPI_ISL_935447,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2927/2020,B.1.1.244,1.0,2021-04-14,passed_qc,,EPI_ISL_935448,USA,2020-11-17,47.0,,G-----\n+USA/FL-BPHL-2928/2020,B.1.1.222,1.0,2021-04-14,passed_qc,,EPI_ISL_935449,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2929/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935450,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2930/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935451,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2931/2020,B.1,1.0,2021-04-14,passed_qc,,EPI_ISL_935452,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2932/2020,B.1.265,1.0,2021-04-14,passed_qc,,EPI_ISL_935453,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2933/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935454,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2934/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935455,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2935/2020,B.1.595,1.0,2021-04-14,passed_qc,,EPI_ISL_935456,USA,2020-11-18,47.0,,G-----\n+USA/FL-BPHL-2936/2020,B.1,1.0,2021-04-14,passed_qc,,EPI_ISL_935457,USA,2020-11-19,47.0,,G-----\n+USA/FL-BPHL-2937/2020,B.1.1.192,1.0,2021-04-14,passed_qc,,EPI_ISL_935458,USA,2020-11-19,47.0,,G-----\n+USA/FL-BPHL-2938/2020,B.1.1.192,1.0,2021-04-14,passed_qc,,EPI_ISL_935459,USA,2020-11-19,47.0,,G-----\n+USA/FL-BPHL-2939/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935460,USA,2020-11-19,47.0,,G-----\n+USA/FL-BPHL-2940/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935461,USA,2020-11-19,47.0,,G-----\n+USA/FL-BPHL-2941/2020,B.1.509,1.0,2021-04-14,passed_qc,,EPI_ISL_935462,USA,2020-11-19,47.0,,G-----\n+USA/FL-BPHL-2942/2020,B.1,1.0,2021-04-14,passed_qc,,EPI_ISL_935463,USA,2020-11-19,47.0,,G-----\n+USA/FL-BPHL-2943/2020,B.1.234,1.0,2021-04-14,passed_qc,,EPI_ISL_935464,USA,2020-11-20,47.0,,G-----\n+USA/FL-BPHL-2944/2020,B.1.564,1.0,2021-04-14,passed_qc,,EPI_ISL_935465,USA,2020-11-20,47.0,,G-----\n+USA/FL-BPHL-2945/2020,B.1.499.1,1.0,2021-04-14,passed_qc,,EPI_ISL_935466,USA,2020-11-30,49.0,,G-----\n+USA/FL-BPHL-2946/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935467,USA,2020-11-30,49.0,,G-----\n+USA/FL-BPHL-2947/2020,B.1.361,1.0,2021-04-14,passed_qc,,EPI_ISL_935468,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2948/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935469,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2949/2020,B.1.234,1.0,2021-04-14,passed_qc,,EPI_ISL_935470,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2950/2020,B.1.596,1.0,2021-04-14,passed_qc,,EPI_ISL_935471,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2951/2020,B.1.596,1.0,2021-04-14,passed_qc,,EPI_ISL_935472,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2952/2020,B.1.588,1.0,2021-04-14,passed_qc,,EPI_ISL_935473,USA,2020-11-30,49.0,,G-----\n+USA/FL-BPHL-2953/2020,B.1.565,1.0,2021-04-14,passed_qc,,EPI_ISL_935474,USA,2020-11-30,49.0,,G-----\n+USA/FL-BPHL-2954/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935475,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2955/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935476,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2956/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935477,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2957/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935478,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2958/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935479,USA,2020-12-01,49.0,,G-----\n+USA/FL-BPHL-2959/2020,B.1.582,1.0,2021-04-14,passed_qc,,EPI_ISL_935480,USA,2020-12-02,49.0,,G-----\n+USA/FL-BPHL-2960/2020,B.1.234,1.0,2021-04-14,passed_qc,,EPI_ISL_935481,USA,2020-12-02,49.0,,G-----\n+USA/FL-BPHL-2961/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935482,USA,2020-12-01,49.0,,G-----\n'
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-21/supporting_information/data_prep_description.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2021-04-21/supporting_information/data_prep_description.md Thu Jun 03 06:38:26 2021 +0000
[
@@ -0,0 +1,16 @@
+# Data preparation
+
+### Source
+
+All GISAID data is downloaded and run through [`grapevine`](https://github.com/cov-ert/grapevine) which excludes records without proper dates, removes duplicate sequences (taking the earliest sample of the duplicates), omits some sequences with known issues, filters by length and coverage, and trims the sequences to CDS.
+
+It also aligns the sequences using `mafft` and builds an ML tree using `iqtree`. A lineages is assigned to each sequence using `pangolin` with the previous data release.
+
+### Lineage Curation
+
+The phylogeny is annotated with lineage and then in `FigTree` the lineages are manually curated, drawing together a number of pieces of information including monophyly in the ML phylogeny (generally a bootstrap > 70 is required) and epidemiological data such as country and travel history. Any changes to lineage definitions and new lineages are documented during this process.
+
+- The lineage may have been defined earlier in the outbreak and with added sequence data, there is less support for that lineage. In these cases the associated epidemiological metadata is examined and the lineage may be refined or even dropped entirely. The lineage number will not be 'recycled', but the members will get reassigned the parent lineage designation.
+- The lineage may have very clear epidemiological support and ambiguities or homoplasies in the sequences/ tree could contribute to low bootstrap values. In these cases, if the support is strong, the lineages are called. Recall rates for these lingeages within `pangolin` may be lower however.
+
+
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-23/__init__.py
--- a/test-data/2021-04-23/__init__.py Wed May 19 13:32:34 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-_program = "pangoLEARN"
-__version__ = "2021-04-21"
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-23/data/decisionTreeHeaders_v1.joblib
b
Binary file test-data/2021-04-23/data/decisionTreeHeaders_v1.joblib has changed
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-23/data/decisionTree_v1.joblib
b
Binary file test-data/2021-04-23/data/decisionTree_v1.joblib has changed
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-23/data/decision_tree_rules.txt
--- a/test-data/2021-04-23/data/decision_tree_rules.txt Wed May 19 13:32:34 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b"@@ -1,13285 +0,0 @@\n-['lineage', '285_-', '285_A', '285_C', '285_G', '285_T', '286_-', '286_A', '286_C', '286_G', '286_T', '287_-', '287_A', '287_C', '287_G', '287_T', '288_-', '288_A', '288_C', '288_G', '288_T', '289_-', '289_A', '289_C', '289_G', '289_T', '290_-', '290_A', '290_C', '290_G', '290_T', '291_-', '291_A', '291_C', '291_G', '291_T', '292_-', '292_A', '292_C', '292_G', '292_T', '293_-', '293_A', '293_C', '293_G', '293_T', '294_-', '294_A', '294_C', '294_G', '294_T', '295_-', '295_A', '295_C', '295_G', '295_T', '296_-', '296_A', '296_C', '296_G', '296_T', '297_-', '297_A', '297_C', '297_G', '297_T', '298_-', '298_A', '298_C', '298_G', '298_T', '299_-', '299_A', '299_C', '299_G', '299_T', '300_-', '300_A', '300_C', '300_G', '300_T', '301_-', '301_A', '301_C', '301_G', '301_T', '302_-', '302_A', '302_C', '302_G', '302_T', '303_-', '303_A', '303_C', '303_G', '303_T', '304_-', '304_A', '304_C', '304_G', '304_T', '305_-', '305_A', '305_C', '305_G', '305_T', '306_-', '306_A', '306_C', '306_G', '306_T', '307_-', '307_A', '307_C', '307_G', '307_T', '308_-', '308_A', '308_C', '308_G', '308_T', '309_-', '309_A', '309_C', '309_G', '309_T', '310_-', '310_A', '310_C', '310_G', '310_T', '311_-', '311_A', '311_C', '311_G', '311_T', '312_-', '312_A', '312_C', '312_G', '312_T', '313_-', '313_A', '313_C', '313_G', '313_T', '314_-', '314_A', '314_C', '314_G', '314_T', '315_-', '315_A', '315_C', '315_G', '315_T', '316_-', '316_A', '316_C', '316_G', '316_T', '317_-', '317_A', '317_C', '317_G', '317_T', '318_-', '318_A', '318_C', '318_G', '318_T', '319_-', '319_A', '319_C', '319_G', '319_T', '320_-', '320_A', '320_C', '320_G', '320_T', '321_-', '321_A', '321_C', '321_G', '321_T', '322_-', '322_A', '322_C', '322_G', '322_T', '323_-', '323_A', '323_C', '323_G', '323_T', '324_-', '324_A', '324_C', '324_G', '324_T', '325_-', '325_A', '325_C', '325_G', '325_T', '326_-', '326_A', '326_C', '326_G', '326_T', '327_-', '327_A', '327_C', '327_G', '327_T', '328_-', '328_A', '328_C', '328_G', '328_T', '329_-', '329_A', '329_C', '329_G', '329_T', '330_-', '330_A', '330_C', '330_G', '330_T', '331_-', '331_A', '331_C', '331_G', '331_T', '332_-', '332_A', '332_C', '332_G', '332_T', '333_-', '333_A', '333_C', '333_G', '333_T', '334_-', '334_A', '334_C', '334_G', '334_T', '335_-', '335_A', '335_C', '335_G', '335_T', '336_-', '336_A', '336_C', '336_G', '336_T', '337_-', '337_A', '337_C', '337_G', '337_T', '338_-', '338_A', '338_C', '338_G', '338_T', '339_-', '339_A', '339_C', '339_G', '339_T', '340_-', '340_A', '340_C', '340_G', '340_T', '341_-', '341_A', '341_C', '341_G', '341_T', '342_-', '342_A', '342_C', '342_G', '342_T', '343_-', '343_A', '343_C', '343_G', '343_T', '344_-', '344_A', '344_C', '344_G', '344_T', '345_-', '345_A', '345_C', '345_G', '345_T', '346_-', '346_A', '346_C', '346_G', '346_T', '347_-', '347_A', '347_C', '347_G', '347_T', '348_-', '348_A', '348_C', '348_G', '348_T', '349_-', '349_A', '349_C', '349_G', '349_T', '350_-', '350_A', '350_C', '350_G', '350_T', '351_-', '351_A', '351_C', '351_G', '351_T', '352_-', '352_A', '352_C', '352_G', '352_T', '353_-', '353_A', '353_C', '353_G', '353_T', '354_-', '354_A', '354_C', '354_G', '354_T', '355_-', '355_A', '355_C', '355_G', '355_T', '357_-', '357_A', '357_C', '357_G', '357_T', '358_-', '358_A', '358_C', '358_G', '358_T', '359_-', '359_A', '359_C', '359_G', '359_T', '360_-', '360_A', '360_C', '360_G', '360_T', '361_-', '361_A', '361_C', '361_G', '361_T', '362_-', '362_A', '362_C', '362_G', '362_T', '363_-', '363_A', '363_C', '363_G', '363_T', '364_-', '364_A', '364_C', '364_G', '364_T', '365_-', '365_A', '365_C', '365_G', '365_T', '366_-', '366_A', '366_C', '366_G', '366_T', '367_-', '367_A', '367_C', '367_G', '367_T', '368_-', '368_A', '368_C', '368_G', '368_T', '369_-', '369_A', '369_C', '369_G', '369_T', '370_-', '370_A', '370_C', '370_G', '370_T', '371_-', '371_A', '371_C', '371_G', '371_T', '372_-', '372_A', '372_C', '372_G', '372_T', '373_-', '373_A', '373_C', '373_G', '373_T', '374_-"..b"43!='G',27943=='A',21254!='A',20177=='A',19705=='-'\n-B.1.177.57\t18423=='-',26800=='C',16241!='-',25613!='G',15479!='G',21613=='A',6806=='G',23643!='G',27943=='A',21254=='A'\n-B.1.177.54\t18423=='-',26800=='C',16241!='-',25613!='G',15479!='G',21613=='A',6806=='G',23643=='G'\n-B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488!='C'\n-B.1.177.16\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105!='A'\n-B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826!='A'\n-B.1.177.15\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416!='C'\n-B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780!='A'\n-B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931!='G',28143!='A',21254!='A'\n-B.1.177.7\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931!='G',28143!='A',21254=='A'\n-B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931!='G',28143=='A'\n-B.1\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931=='G',16072!='A',29644!='G'\n-B.1.177.7\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931=='G',16072!='A',29644=='G'\n-B.1.177.7\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532!='C',4826=='A',11416=='C',12780=='A',28931=='G',16072=='A'\n-B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492!='T',11532=='C'\n-B.1.177\t18423=='-',26800=='C',16241!='-',25613!='G',15479=='G',25488=='C',24105=='A',26492=='T'\n-B.1.177.18\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310!='C'\n-B.1.177.4\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869!='C'\n-B.1.177.12\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813!='G'\n-B.1.177.57\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804!='G',7043!='G',23624!='A'\n-B.1.177.69\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804!='G',7043!='G',23624=='A',28627!='C'\n-B.1.177\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804!='G',7043!='G',23624=='A',28627=='C',6285!='A'\n-B.1.177.4\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804!='G',7043!='G',23624=='A',28627=='C',6285=='A'\n-B.1.177.58\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804!='G',7043=='G'\n-W.4\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118!='G',24813=='G',9804=='G'\n-B.1.177.81\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026!='G',12118=='G'\n-B.1.177.29\t18423=='-',26800=='C',16241!='-',25613=='G',1986!='C',23310=='C',10869=='C',29026=='G'\n-B.1.177\t18423=='-',26800=='C',16241!='-',25613=='G',1986=='C',20959!='C',28294!='-',8077!='G'\n-B.1.177.4\t18423=='-',26800=='C',16241!='-',25613=='G',1986=='C',20959!='C',28294!='-',8077=='G'\n-B.1.177.4\t18423=='-',26800=='C',16241!='-',25613=='G',1986=='C',20959!='C',28294=='-'\n-B.1.177\t18423=='-',26800=='C',16241!='-',25613=='G',1986=='C',20959=='C'\n-B.1.177.21\t18423=='-',26800=='C',16241=='-',22050!='G',26166!='T'\n-B.1.177\t18423=='-',26800=='C',16241=='-',22050!='G',26166=='T'\n-AA.1\t18423=='-',26800=='C',16241=='-',22050=='G'\n"
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-23/data/lineage_recall_report.csv
--- a/test-data/2021-04-23/data/lineage_recall_report.csv Wed May 19 13:32:34 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,781 +0,0 @@\n-lineage,precision,recall,f1_score,support\n-B.1.1.4,0.82,0.9941818181818182,0.8944545454545455,468\n-A,0.8312000000000002,0.9623000000000002,0.8904,665\n-A.1,0.9960000000000001,0.9884000000000001,0.9921999999999999,2508\n-A.10,1.0,0.95,0.9667,14\n-A.11,1.0,1.0,1.0,10\n-A.12,0.8333333333333334,0.8333333333333334,0.8333333333333334,5\n-A.13,1.0,1.0,1.0,21\n-A.14,0.6,0.6,0.6,5\n-A.15,0.9856999999999999,1.0,0.9923,61\n-A.16,0.9833000000000001,1.0,0.9908999999999999,48\n-A.2,0.9953999999999998,0.9907,0.9928999999999999,1058\n-A.2.1,0.2,0.0833,0.1167,21\n-A.2.2,0.9977,0.9977,0.9977,427\n-A.2.3,0.9949999999999999,0.9946999999999999,0.9947000000000001,189\n-A.2.4,0.8116999999999999,1.0,0.8951,84\n-A.3,0.9951000000000001,0.9961,0.9957,793\n-A.4,1.0,1.0,1.0,54\n-A.5,0.9929,0.9879000000000001,0.9902999999999998,411\n-A.6,1.0,1.0,1.0,115\n-A.7,1.0,0.95,0.9667,17\n-A.9,0.3,0.2,0.2334,14\n-B,0.931,0.9460000000000001,0.9382999999999999,3723\n-B.1,0.9252,0.8999,0.9123000000000001,21297\n-B.1.1,0.9598999999999999,0.9279999999999999,0.9431999999999998,12773\n-B.1.1.1,0.9918000000000001,0.9892,0.9905000000000002,5708\n-B.1.1.10,0.9309000000000001,0.9390999999999998,0.9319,493\n-B.1.1.100,1.0,1.0,1.0,17\n-B.1.1.101,0.77,0.85,0.7833,16\n-B.1.1.102,0.6388333333333334,0.8333333333333334,0.6945,6\n-B.1.1.103,0.7,0.45,0.5334999999999999,17\n-B.1.1.104,0.9550000000000001,0.6749999999999999,0.7657,49\n-B.1.1.105,0.47614285714285715,0.5714285714285714,0.5,5\n-B.1.1.106,0.6,0.55,0.5667,15\n-B.1.1.107,0.9875,0.9464,0.9622999999999999,77\n-B.1.1.109,1.0,1.0,1.0,23\n-B.1.1.110,0.8571428571428571,0.8571428571428571,0.8571428571428571,6\n-B.1.1.111,0.9667,1.0,0.9800000000000001,26\n-B.1.1.112,1.0,1.0,1.0,15\n-B.1.1.113,0.8,0.8,0.8,24\n-B.1.1.114,0.8,0.8,0.8,5\n-B.1.1.115,1.0,1.0,1.0,42\n-B.1.1.116,0.85,0.9,0.8667,11\n-B.1.1.117,0.9,0.65,0.7334999999999999,20\n-B.1.1.118,1.0,0.8432999999999999,0.9036,52\n-B.1.1.12,0.9189999999999999,0.9666,0.9343999999999999,64\n-B.1.1.120,1.0,1.0,1.0,29\n-B.1.1.121,0.6833333333333332,0.8333333333333334,0.6970000000000001,6\n-B.1.1.122,0.95,1.0,0.9667,11\n-B.1.1.123,0.8257,0.9400000000000001,0.8713000000000001,52\n-B.1.1.125,1.0,1.0,1.0,91\n-B.1.1.126,1.0,1.0,1.0,9\n-B.1.1.127,0.8847000000000002,0.8666,0.8529,59\n-B.1.1.128,1.0,0.9334,0.96,26\n-B.1.1.129,0.12,0.15,0.1,15\n-B.1.1.13,1.0,1.0,1.0,15\n-B.1.1.130,0.7765000000000001,0.9826,0.8549999999999999,113\n-B.1.1.131,0.8667,0.9,0.8800000000000001,18\n-B.1.1.132,0.9833000000000001,1.0,0.9908999999999999,53\n-B.1.1.133,0.9,0.8,0.8300000000000001,35\n-B.1.1.134,0.975,0.9667,0.9666,68\n-B.1.1.135,0.95,0.9445,0.9439999999999997,93\n-B.1.1.136,1.0,1.0,1.0,36\n-B.1.1.137,0.9606999999999999,0.9856999999999999,0.9712,69\n-B.1.1.138,1.0,1.0,1.0,129\n-B.1.1.139,1.0,1.0,1.0,9\n-B.1.1.14,1.0,1.0,1.0,58\n-B.1.1.140,0.775,0.9,0.8157,21\n-B.1.1.141,1.0,0.9083,0.9154,123\n-B.1.1.142,1.0,1.0,1.0,21\n-B.1.1.143,0.4,0.1666,0.2334,26\n-B.1.1.144,0.9,0.9,0.9,14\n-B.1.1.145,0.9334,0.95,0.9267,20\n-B.1.1.147,0.9,0.9,0.9,12\n-B.1.1.148,0.875,0.9856999999999999,0.9189999999999999,67\n-B.1.1.149,1.0,1.0,1.0,59\n-B.1.1.15,0.9464,0.9833000000000001,0.9611999999999998,63\n-B.1.1.151,0.8183999999999999,0.9933,0.882,145\n-B.1.1.152,0.9,0.8,0.8333999999999999,12\n-B.1.1.153,1.0,0.975,0.9856999999999999,46\n-B.1.1.154,0.8400000000000001,0.9167,0.8584999999999999,25\n-B.1.1.155,0.8000999999999999,0.85,0.8067,19\n-B.1.1.157,1.0,0.9167,0.9467000000000001,24\n-B.1.1.158,0.9667,0.925,0.9427999999999999,44\n-B.1.1.159,0.8950000000000001,0.95,0.9163,40\n-B.1.1.16,0.9667,1.0,0.9800000000000001,47\n-B.1.1.160,0.95,1.0,0.9667,10\n-B.1.1.161,0.7030000000000001,0.82,0.7424999999999999,43\n-B.1.1.162,0.8667,0.85,0.8467,20\n-B.1.1.163,0.9370999999999998,0.9047000000000001,0.9147000000000001,63\n-B.1.1.164,0.8499000000000001,0.9777999999999999,0.9013,86\n-B.1.1.165,1.0,0.96,0.975,55\n-B.1.1.166,0.8667,0.6167,0.6933999999999999,25\n-B.1.1.167,0.8667,0.8167,0.8267,21\n-B.1.1.168,1.0,0.9167,0.9467000000000001,22\n-B.1.1.169,0.85,0.9,0.8667,15\n-B.1.1.17,1.0,0.9667,0.9800000000000001,29\n-B.1.1.170,1.0,1'..b'7,0.45,0.5335,21\n-B.1.9,0.9922000000000001,0.9916,0.9917999999999999,242\n-B.1.90,1.0,0.8834,0.9314,34\n-B.1.91,0.9821000000000002,1.0,0.9909000000000001,379\n-B.1.93,0.9969000000000001,0.9968,0.9968999999999999,946\n-B.1.94,0.9167,1.0,0.9467000000000001,18\n-B.1.95,0.8,0.75,0.7501,20\n-B.1.96,1.0,1.0,1.0,45\n-B.1.97,1.0,1.0,1.0,38\n-B.1.98,0.9441,0.9868,0.9645999999999999,606\n-B.10,0.9751,0.9917,0.9827999999999999,112\n-B.11,0.9644,0.9868,0.9743999999999999,305\n-B.12,0.9856999999999999,1.0,0.9923,63\n-B.13,1.0,1.0,1.0,21\n-B.15,1.0,0.975,0.9856999999999999,41\n-B.18,0.9667,1.0,0.9800000000000001,22\n-B.19,1.0,0.9667,0.9800000000000001,26\n-B.20,0.8667,0.7333999999999999,0.7800999999999999,22\n-B.23,0.8412,0.9833999999999999,0.9006999999999998,120\n-B.26,0.6344,0.65,0.6039,40\n-B.27,1.0,1.0,1.0,58\n-B.28,0.9893000000000001,0.9963000000000001,0.9926,263\n-B.29,0.9627000000000001,0.991,0.9759,216\n-B.3,0.9927999999999999,0.9879999999999999,0.9903999999999998,828\n-B.3.1,0.9829999999999999,0.9940000000000001,0.9884000000000001,508\n-B.30,1.0,0.9800000000000001,0.9888999999999999,48\n-B.31,0.9867000000000001,0.9725999999999999,0.9792,216\n-B.32,0.9333,0.9,0.9067000000000001,24\n-B.33,0.9800000000000001,0.975,0.9745999999999999,47\n-B.34,0.9856999999999999,1.0,0.9923,65\n-B.35,0.9789,0.9778,0.977,87\n-B.36,1.0,1.0,1.0,10\n-B.37,0.975,0.9667,0.9657,27\n-B.38,1.0,1.0,1.0,15\n-B.39,0.9975999999999999,0.9753999999999998,0.9861000000000001,404\n-B.4,0.9811,0.8449,0.9066000000000001,361\n-B.4.1,1.0,1.0,1.0,27\n-B.4.2,1.0,1.0,1.0,15\n-B.4.4,0.9667,0.8916999999999999,0.9124000000000001,38\n-B.4.5,0.6001000000000001,1.0,0.7475000000000002,76\n-B.40,0.9915,0.9804999999999999,0.9858,2435\n-B.41,0.975,0.9826,0.9783999999999999,117\n-B.42,0.8667,0.9,0.8800000000000001,11\n-B.43,0.975,1.0,0.9856999999999999,33\n-B.44,0.95,1.0,0.9667,10\n-B.45,0.9962,1.0,0.998,245\n-B.46,0.9800000000000001,0.9334,0.9489000000000001,32\n-B.47,0.95,1.0,0.9667,19\n-B.48,0.9823000000000001,0.8596,0.9136000000000001,148\n-B.49,1.0,1.0,1.0,19\n-B.5,0.8167,0.8,0.7800999999999999,13\n-B.51,1.0,1.0,1.0,22\n-B.52,1.0,1.0,1.0,74\n-B.53,0.9856999999999999,1.0,0.9923,58\n-B.54,0.7546999999999999,0.4459000000000001,0.5488,88\n-B.6,0.9714,0.9664999999999999,0.9684000000000001,712\n-B.6.1,1.0,0.7833,0.8501,22\n-B.6.2,1.0,1.0,1.0,14\n-B.6.3,0.9,0.7666999999999999,0.8099999999999999,24\n-B.6.4,0.95,0.85,0.8834,22\n-B.6.5,0.8888888888888888,0.8888888888888888,0.8888888888888888,9\n-B.6.6,0.9884999999999999,0.9978000000000001,0.9932000000000002,927\n-C.1,0.9704,0.9732,0.9712999999999999,186\n-C.11,1.0,1.0,1.0,10\n-C.12,1.0,1.0,1.0,122\n-C.13,0.8667999999999999,0.85,0.8200999999999998,20\n-C.14,0.8454,0.785,0.768,45\n-C.15,1.0,1.0,1.0,27\n-C.2,1.0,1.0,1.0,14\n-C.3,0.9929,1.0,0.9963,126\n-C.7,1.0,1.0,1.0,28\n-C.8,0.9875,0.9833000000000001,0.9842000000000001,63\n-C.9,0.835,0.6666000000000001,0.6974,27\n-D.2,1.0,0.9999,1.0,10570\n-D.3,1.0,1.0,1.0,140\n-E.1,1.0,1.0,1.0,41\n-F.1,1.0,1.0,1.0,10\n-G.1,0.9167,0.9,0.8967,19\n-H.1,0.9015000000000001,0.9808999999999999,0.9336999999999998,104\n-I.1,0.9042,1.0,0.9345000000000001,33\n-J.1,0.95,1.0,0.9667,15\n-K.1,1.0,0.9,0.9334,20\n-L.1,1.0,1.0,1.0,165\n-L.2,1.0,1.0,1.0,60\n-M.1,1.0,0.9667,0.9800000000000001,60\n-N.1,1.0,0.9667,0.9800000000000001,23\n-N.2,1.0,1.0,1.0,9\n-N.3,1.0,1.0,1.0,10\n-N.4,1.0,0.95,0.9667,21\n-B.1.1.119,0.3,0.4,0.33340000000000003,4\n-B.1.1.183,0.8333333333333334,0.8333333333333334,0.8333333333333334,6\n-B.1.1.81,0.75,0.75,0.75,4\n-B.1.1.91,0.6666666666666666,0.6666666666666666,0.6666666666666666,5\n-B.1.1.98,0.0,0.0,0.0,2\n-B.1.158,0.0,0.0,0.0,4\n-B.1.269,1.0,1.0,1.0,8\n-B.1.373,1.0,1.0,1.0,8\n-B.1.80,0.0,0.0,0.0,5\n-C.5,1.0,1.0,1.0,7\n-B.1.279,1.0,1.0,1.0,6\n-B.50,0.5,0.5,0.5,6\n-C.10,1.0,1.0,1.0,4\n-C.4,0.9375,1.0,0.958375,8\n-B.1.1.156,0.6666666666666666,0.6666666666666666,0.6666666666666666,6\n-B.1.1.182,1.0,1.0,1.0,4\n-B.1.1.252,1.0,1.0,1.0,5\n-C.6,0.375,0.5,0.41675,4\n-macro avg,0.9166363636363637,0.9150909090909092,0.9071818181818184,193089\n-weighted avg,0.9636363636363636,0.9598181818181818,0.9593636363636364,193089\n'
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-23/data/lineages.downsample.csv
--- a/test-data/2021-04-23/data/lineages.downsample.csv Wed May 19 13:32:34 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,179715 +0,0 @@\n-sequence_name,lineage\n-Australia/VIC390/2020,B.6.6\n-Australia/VIC473/2020,B.1.1\n-Australia/VIC475/2020,B.1.23\n-Australia/VIC476/2020,B.1.23\n-Australia/VIC479/2020,B.1.23\n-Australia/NSW14/2020,B.4\n-USA/WA13-UW9/2020,A.1\n-USA/WA-UW-1741/2020,B.1.371\n-England/NOTT-10E5E4/2020,B.23\n-USA/WA-UW-1739/2020,B.1\n-England/NOTT-10E5D5/2020,B.1.1.41\n-USA/WA-UW-1732/2020,B.1\n-USA/WA-UW-1733/2020,B.1.371\n-USA/WA-UW-1722/2020,B.1\n-England/NOTT-10E5A8/2020,B.1.1\n-Netherlands/Oisterwijk_1364072/2020,B\n-USA/WA-UW-1772/2020,B.1\n-England/NOTT-10E599/2020,A.2\n-Netherlands/Oss_1363500/2020,B.55\n-USA/WA-UW-1708/2020,A.1\n-England/NOTT-10E58A/2020,B\n-Netherlands/Rotterdam_1363790/2020,B\n-USA/WA-UW-1731/2020,A.1\n-England/NOTT-10E57B/2020,B\n-Netherlands/Rotterdam_1364040/2020,B.55\n-USA/WA-UW-1709/2020,A.1\n-England/NOTT-10E55D/2020,B.3\n-England/NOTT-10E54E/2020,B.23\n-Netherlands/Tilburg_1363354/2020,B.11\n-Netherlands/Utrecht_1363564/2020,B.1.383\n-Netherlands/Utrecht_1363628/2020,B.1.383\n-USA/WA-UW-1724/2020,B.1.371\n-England/NOTT-10E520/2020,B\n-USA/WA-UW-1706/2020,B.1.371\n-England/NOTT-10E511/2020,B.39\n-USA/WA-UW-1705/2020,A.1\n-USA/WA-UW-1707/2020,B.1\n-England/NOTT-10E4F6/2020,B.1\n-USA/WA-UW-1745/2020,B.30\n-England/NOTT-10E4C9/2020,B.52\n-Netherlands/Diemen_1363454/2020,B.1.1\n-England/NOTT-10E4AB/2020,B.23\n-Netherlands/Eindhoven_1363782/2020,B.1.1\n-USA/WA-UW-1729/2020,A.1\n-England/NOTT-10E49C/2020,B.35\n-Netherlands/Haarlem_1363688/2020,B.1\n-Netherlands/Houten_1363498/2020,B.1\n-Netherlands/Loon_op_zand_1363512/2020,B.11\n-Netherlands/Delft_1363424/2020,B.1.1\n-USA/WA-UW-1682/2020,A.1\n-Australia/VIC908/2020,B.1.1\n-USA/WA-UW-1775/2020,B.1.1\n-England/NOTT-10E757/2020,B.39\n-USA/WA-UW-1774/2020,B.1.371\n-USA/un-UW-1832/2020,B.30\n-England/NOTT-10E739/2020,B.39\n-USA/WA-UW-1796/2020,B.1\n-England/NOTT-10E72A/2020,B.35\n-USA/WA-UW-1792/2020,B.1\n-England/NOTT-10E70C/2020,B.1.1.369\n-USA/WA-UW-1785/2020,A.1\n-Australia/VIC912/2020,B.1.23\n-Australia/VIC913/2020,B.6.6\n-Czech_Republic/IAB_1/2020,B.1\n-Czech_Republic/IAB_4/2020,B.1\n-Czech_Republic/IAB_8/2020,B.1.1\n-Czech_Republic/IAB_9/2020,B.1.1\n-Czech_Republic/IAB_10/2020,B.1\n-Czech_Republic/IAB_12/2020,B.1\n-England/NOTT-10E6A5/2020,B.1.391\n-England/NOTT-10E696/2020,B.1\n-England/NOTT-10E678/2020,B.61\n-USA/WA-UW-1770/2020,B.1\n-England/NOTT-10E669/2020,B.1.1.369\n-England/NOTT-10E65A/2020,B.1\n-USA/WA-UW-1753/2020,B.1.371\n-USA/WA-UW-1735/2020,A.1\n-England/NOTT-10E63C/2020,B.3\n-Australia/NSW09/2020,B.4\n-USA/WA-UW-1730/2020,A.1\n-England/NOTT-10E61E/2020,B.1.250\n-Australia/NSW11/2020,B.4\n-Australia/NSW13/2020,B.4.6\n-Czech_Republic/IAB_15/2020,B.1\n-Czech_Republic/IAB_20/2020,B.1.1\n-USA/WA-UW-1850/2020,A.1\n-USA/WA-UW-1858/2020,B.1\n-USA/WA-UW-1824/2020,B.1.371\n-USA/WA-UW-1826/2020,B.1.162\n-USA/WA-UW-1827/2020,B.1.319\n-USA/WA-UW-1828/2020,A.1\n-England/NOTT-10E809/2020,B.29\n-USA/un-UW-1834/2020,A.1\n-England/NOTT-10E7FD/2020,B\n-USA/WA-UW-1799/2020,B.1.320\n-USA/WA-UW-1782/2020,A.1\n-USA/WA-UW-1784/2020,A.1\n-England/NOTT-10E793/2020,B.23\n-USA/WA-UW-1779/2020,B.1\n-England/NOTT-10E784/2020,B.40\n-USA/WA-UW-1904/2020,B.1\n-USA/WA-UW-1920/2020,A.1\n-USA/WA-UW-1913/2020,A.1\n-USA/WA-UW-1919/2020,A.1\n-USA/WA-UW-1905/2020,A.1\n-USA/WA-UW-1872/2020,B.1.320\n-USA/OR-UW-1849/2020,A.1\n-USA/WA-UW-1863/2020,A.1\n-USA/WA-UW-1946/2020,A.1\n-USA/WA-UW-1835/2020,A.1\n-USA/WA-UW-1868/2020,B.4.4\n-Australia/VIC551/2020,B.1.434\n-Australia/VIC554/2020,B.1.434\n-Australia/VIC555/2020,B.1\n-Australia/VIC557/2020,B.1.23\n-Australia/VIC559/2020,B.1\n-Australia/VIC560/2020,B.1\n-Australia/VIC561/2020,B.1.1\n-Australia/VIC562/2020,B.1\n-Australia/VIC565/2020,A.2.2\n-Australia/VIC567/2020,B.1\n-Australia/VIC568/2020,A.1\n-Australia/VIC569/2020,B\n-Australia/VIC570/2020,B.1\n-Australia/VIC571/2020,B.1\n-Australia/VIC572/2020,B.40\n-Australia/VIC574/2020,B.1.1\n-Australia/VIC575/2020,B.1.1\n-England/NOTT-10E12C/2020,B.61\n-Beijing/233/2020,A\n-India/MH-1-27/2020,B\n-India/MH-1-31/2020,B\n-Australia/VIC534/2020,B.1.23\n-Australia/VIC535/2020,B\n-Australia/VIC536/2020,B.1\n-Australia/V'..b'/WB-1930400401314/2021,B.1.617.1\n-Belgium/UZA-UA-CV2132091726/2021,B.1.617.1\n-Canada/MB-NML-21570/2021,B.1.438.1\n-Canada/MB-NML-21579/2021,B.1.438.1\n-Canada/MB-NML-21589/2021,B.1.438.1\n-Canada/MB-NML-17472/2021,B.1.438.1\n-Canada/MB-NML-17747/2021,B.1.438.1\n-Canada/MB-NML-17706/2021,B.1.438.1\n-Canada/MB-NML-17692/2021,B.1.438.1\n-England/CAMC-14E335E/2021,B.1.617.1\n-England/CAMC-14E0166/2021,B.1.617.1\n-England/CAMC-14E338B/2021,B.1.617.2\n-England/CAMC-14E2F97/2021,B.1.617.2\n-England/MILK-14E0272/2021,B.1.617.2\n-USA/GA-CDC-STM-000046368/2021,B.1.617.1\n-India/KA-NIMH-SEQ-236/2021,B.1.617.1\n-India/KA-NIMH-SEQ-239/2021,B.1.617.1\n-India/KA-NIMH-SEQ-249/2021,B.1.617.1\n-India/KA-NIMH-SEQ-250/2021,B.1.617.1\n-India/KA-NIMH-SEQ-253/2021,B.1.617.1\n-India/KA-NIMH-SEQ-254/2021,B.1.617.1\n-India/KA-NIMH-SEQ-271/2021,B.1.617.1\n-India/KA-NIMH-SEQ-274/2021,B.1.617.1\n-India/KA-NIMH-SEQ-279/2021,B.1.617.1\n-India/KA-NIMH-SEQ-280/2021,B.1.617.1\n-India/KA-NIMH-SEQ-284/2021,B.1.617.1\n-India/KA-NIMH-SEQ-288/2021,B.1.617.1\n-India/KA-NIMH-SEQ-291/2021,B.1.617.1\n-India/KA-NIMH-SEQ-295/2021,B.1.617.1\n-India/KA-NIMH-SEQ-302/2021,B.1.617.1\n-USA/MA-CDC-STM-000044850/2021,B.1.617.2\n-USA/MA-CDC-STM-000044887/2021,B.1.617.2\n-Sint_Maarten/SX-RIVM-23089/2021,B.1.617\n-England/CAMC-14C2C5A/2021,B.1.617.2\n-Belgium/MBLG36792/2021,B.1.617.1\n-USA/ND-NDDH-0594/2021,B.1.438.1\n-USA/ND-NDDH-0620/2021,B.1.438.1\n-USA/ND-NDDH-0621/2021,B.1.438.1\n-USA/NJ-CDC-LC0035972/2021,B.1\n-USA/NJ-CDC-LC0036132/2021,B.1\n-USA/WI-CDC-LC0035686/2021,B.1.617.1\n-USA/CA-CDC-FG-018898/2021,B.1.617.1\n-USA/CA-CDC-FG-018335/2021,B.1.617.2\n-USA/NJ-CDC-LC0038223/2021,B.1.617.2\n-USA/WA-UW-2021033003742/2021,B.1.617.1\n-USA/CA-CDC-FG-019301/2021,B.1.617.1\n-Singapore/535/2021,B.1.617.2\n-Singapore/524/2021,B.1.617.2\n-Singapore/533/2021,B.1.617.2\n-Singapore/534/2021,B.1.617.2\n-Singapore/525/2021,B.1.617.1\n-Singapore/526/2021,B.1.617.1\n-Singapore/527/2021,B.1.617.1\n-Singapore/528/2021,B.1.617.1\n-Singapore/529/2021,B.1.617.1\n-England/CAMC-14E79FE/2021,B.1.617.1\n-England/CAMC-14E7B61/2021,B.1.617.2\n-England/CAMC-14E792B/2021,B.1.617.1\n-England/CAMC-14E7CF5/2021,B.1.617.1\n-England/CAMC-14E7C22/2021,B.1.617.1\n-Scotland/CAMC-14E0157/2021,B.1.617.1\n-Australia/NSW4471/2021,B.1.617.2\n-USA/IN-CDC-STM-000045992/2021,B.1.617.2\n-Belgium/Aalst-OLVZ-8042639/2021,B.1.620\n-USA/ND-NDDH-0641/2021,B.1.438.1\n-USA/WA-UW-2021040102602/2021,B.1.617.1\n-USA/WA-UW-2021040308606/2021,B.1.617.2\n-USA/WA-UW-2021040107121/2021,B.1.617.1\n-New_Zealand/21MV0313/2021,B.1.617\n-New_Zealand/21MV0277/2021,B.1.617\n-New_Zealand/21MV0339/2021,B.1.617\n-New_Zealand/21MV0256/2021,B.1.617\n-New_Zealand/21MV0340/2021,B.1.617\n-New_Zealand/21MV0261/2021,B.1.617\n-New_Zealand/21MV0270/2021,B.1.617\n-New_Zealand/21MV0343/2021,B.1.617\n-New_Zealand/21MV0334/2021,B.1.617\n-Ireland/D-NVRL-21IRL49397/2021,B.1.617.1\n-Ireland/D-NVRL-21IRL49399/2021,B.1.617.1\n-France/ARA-HCL021061596501/2021,B.1.620\n-France/ARA-HCL021061598501/2021,B.1.620\n-USA/WV-WVU-WV064773/2021,B.1.620\n-USA/ND-NDDH-0710/2021,B.1.438.1\n-Reunion/PIMIT_00914/2021,B.1.438.2\n-England/RAND-14F19F1/2021,B.1.617.2\n-England/RAND-14F1AD0/2021,B.1.617.2\n-England/ALDP-14EDD1A/2021,B.1.617.2\n-England/CAMC-14E7563/2021,B.1.617.2\n-England/RAND-14F1A67/2021,B.1.617.1\n-England/CAMC-14DECA6/2021,B.1.617.2\n-England/CAMC-14DEBC7/2021,B.1.617.2\n-England/CAMC-14DEBA9/2021,B.1.617.2\n-England/CAMC-14DE9DC/2021,B.1.617.1\n-England/CAMC-14DEE37/2021,B.1.617.1\n-England/CAMC-14DEBF4/2021,B.1.617.2\n-England/CAMC-14E726C/2021,B.1.617.1\n-England/MILK-14BF397/2021,B.1.617.2\n-England/RAND-14EB338/2021,B.1.617.1\n-England/RAND-14E21BF/2021,B.1.617.2\n-England/RAND-14E1D70/2021,B.1.617.2\n-Australia/WA668/2021,B.1.617.2\n-Australia/WA672/2021,B.1.617.2\n-Australia/NSW4474/2021,B.1.617.2\n-Singapore/537/2021,B.1.617.1\n-Singapore/539/2021,B.1.617.1\n-Singapore/541/2021,B.1.617.2\n-Singapore/544/2021,B.1.617.1\n-Singapore/545/2021,B.1.617.1\n-Singapore/546/2021,B.1.617.2\n-USA/NY-PRL-2021_0412_01A06/2021,B.1.617\n-USA/NY-PRL-2021_0414_00O18/2021,B.1\n'
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-23/data/lineages.metadata.csv
--- a/test-data/2021-04-23/data/lineages.metadata.csv Wed May 19 13:32:34 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,452755 +0,0 @@\n-sequence_name,lineage,probability,pangolearn_version,status,note,covv_accession_id,country,sample_date,epi_week,travel_history,constellation\n-Brazil/SP-1750/2021,N.9,1.0,2021-04-14,passed_qc,,EPI_ISL_1079159,Brazil,2021-02-03,58.0,,G-K---\n-England/MILK-129BE47/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1072353,UK,2021-02-10,59.0,,G-----\n-Switzerland/BL-ETHZ-490801/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1080499,Switzerland,2021-02-09,59.0,,G-----\n-Switzerland/BS-ETHZ-490849/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1080500,Switzerland,2021-02-09,59.0,,G-----\n-Italy/CAM-AMES-1-23/2021,A.2.5.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1080740,Italy,2021-02-15,60.0,,GXXX--\n-Belgium/ULG-12383/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081135,Belgium,2021-02-14,60.0,,G-----\n-Belgium/ULG-12395/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081137,Belgium,2021-02-14,60.0,,G-----\n-Belgium/ULG-12381/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081138,Belgium,2021-02-13,59.0,,G-----\n-Belgium/ULG-12398/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081139,Belgium,2021-02-13,59.0,,G-----\n-Belgium/ULG-12428/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081142,Belgium,2021-02-12,59.0,,G-----\n-Belgium/ULG-12415/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081144,Belgium,2021-02-11,59.0,,G-----\n-Belgium/ULG-12357/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081146,Belgium,2021-02-09,59.0,,G-----\n-Belgium/ULG-12363/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081147,Belgium,2021-02-09,59.0,,G-----\n-Belgium/ULG-12370/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081148,Belgium,2021-02-04,58.0,,G-----\n-USA/MD-MDH-1057/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081232,USA,2021-02-16,60.0,,GXX---\n-Belgium/IPG-19/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1081842,Belgium,2021-02-18,60.0,,GXX-X-\n-Italy/CAM-AMES-3-82/2021,P.1.1,1.0,2021-04-14,passed_qc,13/17 P.1 (B.1.1.28.1) SNPs,EPI_ISL_1082468,Italy,2021-02-15,60.0,,GYK---\n-Switzerland/GE-33292942/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1084765,Switzerland,2021-02-16,60.0,,G-----\n-Italy/CAM-AMES-6-43/2021,A.2.5.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085201,Italy,2021-02-16,60.0,,G-----\n-Italy/CAM-AMES-6-48/2021,P.1.1,1.0,2021-04-14,passed_qc,14/17 P.1 (B.1.1.28.1) SNPs,EPI_ISL_1085205,Italy,2021-02-16,60.0,,GYK---\n-Italy/CAM-AMES-6-50/2021,P.1.1,1.0,2021-04-14,passed_qc,11/17 P.1 (B.1.1.28.1) SNPs,EPI_ISL_1085207,Italy,2021-02-16,60.0,,G-X---\n-France/un-HMN-21022170010/2021,B.1.214.3,1.0,2021-04-14,passed_qc,,EPI_ISL_1085233,France,2021-02-17,60.0,,G-----\n-France/un-HMN-21022220115/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085246,France,2021-02-22,61.0,,GXX---\n-France/un-HMN-21022180510/2021,B.1.214.3,1.0,2021-04-14,passed_qc,,EPI_ISL_1085379,France,2021-02-16,60.0,,G-----\n-France/un-HMN-21022030415/2021,B.1.619,1.0,2021-04-14,passed_qc,,EPI_ISL_1085533,France,2021-02-03,58.0,,G-K---\n-France/un-HMN-21022110141/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085554,France,2021-02-10,59.0,,GXX---\n-France/un-HMN-21022020529/2021,B.1.214.3,1.0,2021-04-14,passed_qc,,EPI_ISL_1085559,France,2021-02-02,58.0,,G-----\n-France/un-HMN-21022080622/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085627,France,2021-02-08,59.0,,G-----\n-France/un-HMN-21022080646/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085628,France,2021-02-08,59.0,,G-----\n-France/un-HMN-21022100410/2021,B.1.214.3,1.0,2021-04-14,passed_qc,,EPI_ISL_1085784,France,2021-02-10,59.0,,G-----\n-France/un-HMN-21022170216/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085907,France,2021-02-16,60.0,,G-----\n-France/un-HMN-21022160227/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085908,France,2021-02-16,60.0,,G-----\n-France/un-HMN-21022180359/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085909,France,2021-02-16,60.0,,G-----\n-France/un-HMN-21022180249/2021,B.1.214.2,1.0,2021-04-14,passed_qc,,EPI_ISL_1085910,France,2021-02-16,60.0,,G-'..b'I_ISL_935442,USA,2020-11-18,47.0,,G-----\n-USA/FL-BPHL-2922/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935443,USA,2020-11-18,47.0,,G-----\n-USA/FL-BPHL-2923/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935444,USA,2020-11-18,47.0,,G-----\n-USA/FL-BPHL-2924/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935445,USA,2020-11-18,47.0,,G-----\n-USA/FL-BPHL-2925/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935446,USA,2020-11-18,47.0,,G-----\n-USA/FL-BPHL-2926/2020,B.1,1.0,2021-04-14,passed_qc,,EPI_ISL_935447,USA,2020-11-18,47.0,,G-----\n-USA/FL-BPHL-2927/2020,B.1.1.244,1.0,2021-04-14,passed_qc,,EPI_ISL_935448,USA,2020-11-17,47.0,,G-----\n-USA/FL-BPHL-2928/2020,B.1.1.222,1.0,2021-04-14,passed_qc,,EPI_ISL_935449,USA,2020-11-18,47.0,,G-----\n-USA/FL-BPHL-2929/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935450,USA,2020-11-18,47.0,,G-----\n-USA/FL-BPHL-2930/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935451,USA,2020-11-18,47.0,,G-----\n-USA/FL-BPHL-2931/2020,B.1,1.0,2021-04-14,passed_qc,,EPI_ISL_935452,USA,2020-11-18,47.0,,G-----\n-USA/FL-BPHL-2932/2020,B.1.265,1.0,2021-04-14,passed_qc,,EPI_ISL_935453,USA,2020-11-18,47.0,,G-----\n-USA/FL-BPHL-2933/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935454,USA,2020-11-18,47.0,,G-----\n-USA/FL-BPHL-2934/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935455,USA,2020-11-18,47.0,,G-----\n-USA/FL-BPHL-2935/2020,B.1.595,1.0,2021-04-14,passed_qc,,EPI_ISL_935456,USA,2020-11-18,47.0,,G-----\n-USA/FL-BPHL-2936/2020,B.1,1.0,2021-04-14,passed_qc,,EPI_ISL_935457,USA,2020-11-19,47.0,,G-----\n-USA/FL-BPHL-2937/2020,B.1.1.192,1.0,2021-04-14,passed_qc,,EPI_ISL_935458,USA,2020-11-19,47.0,,G-----\n-USA/FL-BPHL-2938/2020,B.1.1.192,1.0,2021-04-14,passed_qc,,EPI_ISL_935459,USA,2020-11-19,47.0,,G-----\n-USA/FL-BPHL-2939/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935460,USA,2020-11-19,47.0,,G-----\n-USA/FL-BPHL-2940/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935461,USA,2020-11-19,47.0,,G-----\n-USA/FL-BPHL-2941/2020,B.1.509,1.0,2021-04-14,passed_qc,,EPI_ISL_935462,USA,2020-11-19,47.0,,G-----\n-USA/FL-BPHL-2942/2020,B.1,1.0,2021-04-14,passed_qc,,EPI_ISL_935463,USA,2020-11-19,47.0,,G-----\n-USA/FL-BPHL-2943/2020,B.1.234,1.0,2021-04-14,passed_qc,,EPI_ISL_935464,USA,2020-11-20,47.0,,G-----\n-USA/FL-BPHL-2944/2020,B.1.564,1.0,2021-04-14,passed_qc,,EPI_ISL_935465,USA,2020-11-20,47.0,,G-----\n-USA/FL-BPHL-2945/2020,B.1.499.1,1.0,2021-04-14,passed_qc,,EPI_ISL_935466,USA,2020-11-30,49.0,,G-----\n-USA/FL-BPHL-2946/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935467,USA,2020-11-30,49.0,,G-----\n-USA/FL-BPHL-2947/2020,B.1.361,1.0,2021-04-14,passed_qc,,EPI_ISL_935468,USA,2020-12-01,49.0,,G-----\n-USA/FL-BPHL-2948/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935469,USA,2020-12-01,49.0,,G-----\n-USA/FL-BPHL-2949/2020,B.1.234,1.0,2021-04-14,passed_qc,,EPI_ISL_935470,USA,2020-12-01,49.0,,G-----\n-USA/FL-BPHL-2950/2020,B.1.596,1.0,2021-04-14,passed_qc,,EPI_ISL_935471,USA,2020-12-01,49.0,,G-----\n-USA/FL-BPHL-2951/2020,B.1.596,1.0,2021-04-14,passed_qc,,EPI_ISL_935472,USA,2020-12-01,49.0,,G-----\n-USA/FL-BPHL-2952/2020,B.1.588,1.0,2021-04-14,passed_qc,,EPI_ISL_935473,USA,2020-11-30,49.0,,G-----\n-USA/FL-BPHL-2953/2020,B.1.565,1.0,2021-04-14,passed_qc,,EPI_ISL_935474,USA,2020-11-30,49.0,,G-----\n-USA/FL-BPHL-2954/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935475,USA,2020-12-01,49.0,,G-----\n-USA/FL-BPHL-2955/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935476,USA,2020-12-01,49.0,,G-----\n-USA/FL-BPHL-2956/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935477,USA,2020-12-01,49.0,,G-----\n-USA/FL-BPHL-2957/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935478,USA,2020-12-01,49.0,,G-----\n-USA/FL-BPHL-2958/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935479,USA,2020-12-01,49.0,,G-----\n-USA/FL-BPHL-2959/2020,B.1.582,1.0,2021-04-14,passed_qc,,EPI_ISL_935480,USA,2020-12-02,49.0,,G-----\n-USA/FL-BPHL-2960/2020,B.1.234,1.0,2021-04-14,passed_qc,,EPI_ISL_935481,USA,2020-12-02,49.0,,G-----\n-USA/FL-BPHL-2961/2020,B.1.2,1.0,2021-04-14,passed_qc,,EPI_ISL_935482,USA,2020-12-01,49.0,,G-----\n'
b
diff -r 514a786baaa9 -r 42126b414951 test-data/2021-04-23/supporting_information/data_prep_description.md
--- a/test-data/2021-04-23/supporting_information/data_prep_description.md Wed May 19 13:32:34 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,16 +0,0 @@
-# Data preparation
-
-### Source
-
-All GISAID data is downloaded and run through [`grapevine`](https://github.com/cov-ert/grapevine) which excludes records without proper dates, removes duplicate sequences (taking the earliest sample of the duplicates), omits some sequences with known issues, filters by length and coverage, and trims the sequences to CDS.
-
-It also aligns the sequences using `mafft` and builds an ML tree using `iqtree`. A lineages is assigned to each sequence using `pangolin` with the previous data release.
-
-### Lineage Curation
-
-The phylogeny is annotated with lineage and then in `FigTree` the lineages are manually curated, drawing together a number of pieces of information including monophyly in the ML phylogeny (generally a bootstrap > 70 is required) and epidemiological data such as country and travel history. Any changes to lineage definitions and new lineages are documented during this process.
-
-- The lineage may have been defined earlier in the outbreak and with added sequence data, there is less support for that lineage. In these cases the associated epidemiological metadata is examined and the lineage may be refined or even dropped entirely. The lineage number will not be 'recycled', but the members will get reassigned the parent lineage designation.
-- The lineage may have very clear epidemiological support and ambiguities or homoplasies in the sequences/ tree could contribute to low bootstrap values. In these cases, if the support is strong, the lineages are called. Recall rates for these lingeages within `pangolin` may be lower however.
-
-
b
diff -r 514a786baaa9 -r 42126b414951 test-data/pangolearn.loc
--- a/test-data/pangolearn.loc Wed May 19 13:32:34 2021 +0000
+++ b/test-data/pangolearn.loc Thu Jun 03 06:38:26 2021 +0000
b
@@ -6,4 +6,5 @@
 #
 # for example
 # 2021-04-14 pangoLEARN data release 2021-04-14 1.0 /tmp/database/pangolearn/pangolearn/2021-04-14
-2021-04-23 pangoLEARN data release 2021-04-23 1.0 ${__HERE__}/2021-04-23
+2021-04-21 pangoLEARN data release 2021-04-21 1.0 ${__HERE__}/2021-04-21
+2021-05-27 pangoLEARN data release 2021-05-27 3.0 ${__HERE__}/2021-05-27