Next changeset 1:99494998688a (2024-02-25) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/lineagespot commit 0bc6ed15054577af1089d55ef9aa1071d122eb6b |
added:
NC_045512.2_annot.gff3 convert_lineage_defs.py lineagespot_verbose.R lineagespot_wrapper.xml test-data/AY.1.txt test-data/B.1.351.txt test-data/B.1.617.2.txt test-data/NC_045512.2_annot.gff3 test-data/P.1.txt test-data/SampleA_freebayes_ann.vcf test-data/SampleB_freebayes_ann.vcf test-data/SampleC_freebayes_ann.vcf test-data/lineage_hits.txt test-data/lineage_report.txt test-data/pangolin_constellations.loc test-data/test-constellations/definitions/cAY.4.2.json test-data/test-constellations/definitions/cAY.4.json test-data/test-constellations/definitions/cB.1.351.json test-data/test-constellations/definitions/cB.1.617.2.json test-data/test-constellations/definitions/cP.1.json test-data/variants_table.txt tool-data/pangolin_constellations.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 NC_045512.2_annot.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/NC_045512.2_annot.gff3 Tue Aug 08 15:12:08 2023 +0000 |
b |
@@ -0,0 +1,28 @@ +##sequence-region NC_045512.2 1 29903 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=2697049 +NC_045512.2 Giorgi CDS 266 805 . + . NSP1 Leader protein +NC_045512.2 Giorgi CDS 806 2719 . + . NSP2 Non-Structural protein 2 +NC_045512.2 Giorgi CDS 2720 8554 . + . NSP3 Predicted phosphoesterase, papain-like proteinase +NC_045512.2 Giorgi CDS 8555 10054 . + . NSP4 Transmembrane protein +NC_045512.2 Giorgi CDS 10055 10972 . + . NSP5 3C-like proteinase +NC_045512.2 Giorgi CDS 10973 11842 . + . NSP6 Transmembrane protein +NC_045512.2 Giorgi CDS 11843 12091 . + . NSP7 Non-Structural Protein 7 +NC_045512.2 Giorgi CDS 12092 12685 . + . NSP8 Non-Structural Protein 8 +NC_045512.2 Giorgi CDS 12686 13024 . + . NSP9 ssRNA-binding protein +NC_045512.2 Giorgi CDS 13025 13441 . + . NSP10 Growth-factor-like protein +NC_045512.2 Giorgi CDS 13442 13468 . + . NSP12a RNA-dependent RNA polymerase, pre-ribosomal frameshift +NC_045512.2 Giorgi CDS 13468 16236 . + . NSP12b RNA-dependent RNA polymerase, post-ribosomal frameshift +NC_045512.2 Giorgi CDS 16237 18039 . + . NSP13 Helicase +NC_045512.2 Giorgi CDS 18040 19620 . + . NSP14 3'-to-5' exonuclease +NC_045512.2 Giorgi CDS 19621 20658 . + . NSP15 endoRNAse +NC_045512.2 Giorgi CDS 20659 21552 . + . NSP16 2'-O-ribose methyltransferase +NC_045512.2 Giorgi CDS 21563 25384 . + . S Spike +NC_045512.2 Giorgi CDS 25393 26220 . + . ORF3a ORF3a protein +NC_045512.2 Giorgi CDS 26245 26472 . + . E Envelope +NC_045512.2 Giorgi CDS 26523 27191 . + . M Membrane +NC_045512.2 Giorgi CDS 27202 27387 . + . ORF6 ORF6 protein +NC_045512.2 Giorgi CDS 27394 27759 . + . ORF7a ORF7a protein +NC_045512.2 Giorgi CDS 27756 27887 . + . ORF7b ORF7b protein +NC_045512.2 Giorgi CDS 27894 28259 . + . ORF8 ORF8 protein +NC_045512.2 Giorgi CDS 28274 29533 . + . N Nucleocapsid protein +NC_045512.2 Giorgi CDS 29558 29674 . + . ORF10 ORF10 protein |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 convert_lineage_defs.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/convert_lineage_defs.py Tue Aug 08 15:12:08 2023 +0000 |
[ |
@@ -0,0 +1,173 @@ +# Try to convert constellations files into the format expected by lineagespot. +# Constellations files can define parent lineages, in which case the script +# parses parent mutations recursively and adds them to the signature of the +# child. + +# CURRENT AND GENERAL LIMITATIONS +# Important to understand, please read carefully +# 1. Constellations sometimes uses base instead of amino acid positions for +# defining mutations. These can take two forms like in these examples: +# "nuc:C8986T", i.e. a SNV given in base coordinates +# "del:22029:6", i.e. a deletion of 6 bases given in base coordinates +# The current version of the script makes no attempt to convert such lines to +# amino acid coordinates, but simply drops them. +# 2. In other cases, constellations lists deletions in amino acid poisitions like +# this: +# "s:HV69-" +# While this notation could be parsed such lines are currently *also* dropped +# because it's not entirely clear how lineagespot describes deletions. +# 3. In some cases, constellation also provides mutations in mature peptide +# coordinates, like "nsp15:K259R". Lines like this are currently dropped, too. +# 4. The constellations data provided by +# https://github.com/cov-lineages/constellations +# lists mostly lineage-defining mutations that can be used to *distinguish* +# between lineages, but makes no attempt to provide complete lists of mutations +# (even through parent lineage definitions) for any lineage. + +import argparse +import json +import os +import re +import sys + + +genes_names_translation = { + "orf1a": "ORF1a", + "orf1ab": "ORF1ab", + "1ab": "ORF1ab", + "orf1b": "ORF1b", + "s": "S", + "spike": "S", + "orf3a": "ORF3a", + "e": "E", + "m": "M", + "n": "N", + "orf6": "ORF6", + "orf7a": "ORF7a", + "orf7b": "ORF7b", + "orf8": "ORF8", + "8": "ORF8", + "n": "N", + # NOTE: in constellations, mutations are sometimes, but not always, given + # in nsp coordinates instead of ORF1a/b ones. Currently, we drop these, + # while we should convert instead!!! + "nsp2": "NSP2", + "nsp3": "NSP3", + "nsp4": "NSP4", + "nsp5": "NSP5", + "nsp6": "NSP6", + "nsp7": "NSP7", + "nsp8": "NSP8", + "nsp9": "NSP9", + "nsp10": "NSP10", + "nsp12": "NSP12", + "nsp13": "NSP13", + "nsp14": "NSP14", + "nsp15": "NSP15", + "nsp16": "NSP16", +} + + +lineagespot_template = dict.fromkeys(["ORF1a", "ORF1b", "S", "ORF3a", "M", "ORF6", "ORF7a", "ORF7b", "ORF8", "E", "N"]) +definitions = {} + +pat = re.compile(r'(?P<gene>.+):(?P<ref>[A-Z]+)(?P<pos>\d+)(?P<alt>[A-Z*]+)') + + +def read_lineage_variants(x, lineage_name): + data = json.load(x) + + sites = {} + for mut in data["sites"]: + match = pat.match(mut) + if match is None: + # Likely a del or nuc mutation given at the base level + continue + # try to get a canonical gene name + gene = genes_names_translation.get( + match.group('gene'), + match.group('gene') + ) + pos = int(match.group('pos')) + if gene == 'ORF1ab': + # constellations isn't very consistent in representing ORF1ab + # mutations. They may be provided in ORF1a or ORF1b coordinates, + # but could also just be given as ORF1ab. + if pos <= 4401: + gene = 'ORF1a' + else: + gene = 'ORF1b' + # 4715 == 314 in constellations + pos = pos - 4401 + if gene not in sites: + sites[gene] = {} + sites[gene][pos] = (match.group('ref'), match.group('alt')) + + # recursively parse parent lineages and + # add their mutations to the global definitions + if "parent_lineage" in data["variant"]: + x_parent = data["variant"]["parent_lineage"] + if x_parent not in definitions: + parent_filename = f"c{x_parent}.json" + lineage_def_dir = os.path.dirname(x.name) + parent_file = os.path.join(lineage_def_dir, parent_filename) + if not os.path.isfile(parent_file): + raise FileNotFoundError( + f"{x_parent} is defined as a parent of {lineage_name}, but " + f"definitions file {parent_filename} not found in " + f"{lineage_def_dir}." + ) + with open(parent_file) as parent_in: + read_lineage_variants(parent_in, x_parent) + + # update the sites dictionary to include also mutations defined for the parent + for gene, muts in definitions[x_parent].items(): + if gene in sites: + for pos, ref_alt in muts.items(): + if pos in sites[gene]: + # exotic case of a parent site being affected in the child + # lineage again. Kepp the child site unaltered. + continue + sites[gene][pos] = ref_alt + else: + # only the parent has mutations in this gene listed + sites[gene] = muts + # done with this lineage and all of its parents + definitions[lineage_name] = sites + + +parser = argparse.ArgumentParser() +parser.add_argument( + "-i", "--input", required=True, + help="Name of the input folder" +) +parser.add_argument( + "-o", "--output", required=True, + help="Name of the output folder" +) +if len(sys.argv) < 2: + sys.exit('Please run with -h / --help for help.') + +args = parser.parse_args() + +for definitions_file in os.listdir(args.input): + # In constellations, the only reliable way to get the lineage name is from + # the file name by stripping the .json suffix from it and dropping the + # leading 'c' (e.g. cBA.5.json holds the definition for lineage BA.5). + if definitions_file[0] != 'c' or definitions_file[-5:] != '.json': + continue + lineage_name_from_file = definitions_file[1:-5] + if lineage_name_from_file in definitions: + # seems we have parsed this lineage already as a parent of another lineage + continue + with open(os.path.join(args.input, definitions_file)) as data_in: + read_lineage_variants(data_in, lineage_name_from_file) + +for lineage, sites in definitions.items(): + # if path isn't there, create one could be added + with open(os.path.join(args.output, lineage) + '.txt', "w") as data_out: + data_out.write('gene\tamino acid\n') + for gene, muts in sites.items(): + if gene in lineagespot_template: + for pos, ref_alt in muts.items(): + data_out.write(f'{gene}\t{ref_alt[0]}{pos}{ref_alt[1]}\n') |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 lineagespot_verbose.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lineagespot_verbose.R Tue Aug 08 15:12:08 2023 +0000 |
b |
@@ -0,0 +1,64 @@ +## How to execute this tool + +# A command-line interface to lineagespot for use with Galaxy +# +# The following arguments are required: +# +# 'in_vcf' a character vector of paths to VCF files object from Galaxy lineagespot/test-data +# 'in_gff3' a character vector of path to GFF3 file containing SARS-CoV-2 gene coordinates object from Galaxy lineagespot/test-data +# 'in_ref' a character vector of path to a folder containing lineage reports object from Galaxy lineagespot/test-data +# 'in_voc' a character vector containing the names of the lineages of interest +# 'in_threshold' a parameter indicating the AF threshold for identifying variants per sample +# +# Rscript ${__tool_directory__}/lineagespot_verbose.R --in_vcf ${__tool_directory__}/test-data/extdata/vcf-files --in_gff3 ${__tool_directory__}/test-data/extdata/NC_045512.2_annot.gff3 --in_ref ${__tool_directory__}/test-data/extdata/ref --in_voc "B.1.617.2, B.1.1.7, B.1.351, P.1" --in_threshold 0.8 +# Set up R error handling to go to stderr +options(show.error.messages = FALSE, error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) +}) + +# Import required libraries + +library_path <- .libPaths() + +suppressPackageStartupMessages({ + library("getopt", lib.loc = library_path) + library("data.table", lib.loc = library_path) + library("lineagespot", lib.loc = library_path) +}) + + +options(stringAsfactors = FALSE, useFancyQuotes = FALSE) + +# Take in trailing command line arguments +args <- commandArgs(trailingOnly = TRUE) + +# Get options using the spec as defined by the enclosed list +# Read the options from the default: commandArgs(TRUE) +option_specification <- matrix(c( + "in_vcf", "vcf", 1, "character", + "in_gff3", "gff3", 1, "character", + "in_ref", "ref", 1, "character", + "in_voc", "voc", 2, "character", + "in_threshold", "thr", 2, "double" +), byrow = TRUE, ncol = 4) + +options <- getopt(option_specification) + +if (!is.null(options$in_voc) && is.character(options$in_voc)) { + options$in_voc <- unlist(strsplit(options$in_voc, split = ",")) +} + +result <- lineagespot(vcf_folder = options$in_vcf, + ref_folder = options$in_ref, + gff3_path = options$in_gff3, + voc = options$in_voc, + AF_threshold = options$in_threshold) + + +# Write output to new file which will be recognized by Galaxy +fwrite(result$variants.table, sep = "\t", file = "variants_table.txt", row.names = FALSE) +fwrite(result$lineage.hits, sep = "\t", file = "lineage_hits.txt", row.names = FALSE) +fwrite(result$lineage.report, sep = "\t", file = "lineage_report.txt", row.names = FALSE) + +cat("\n Process has been completed !\n") |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 lineagespot_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lineagespot_wrapper.xml Tue Aug 08 15:12:08 2023 +0000 |
[ |
b'@@ -0,0 +1,221 @@\n+<tool id="lineagespot" name="lineagespot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.09">\n+ <description>identifies SARS-CoV-2 lineages contributing to metagenomic samples from per-sample variant files</description>\n+ <macros>\n+ <token name="@TOOL_VERSION@">1.4.0</token>\n+ <token name="@VERSION_SUFFIX@">0</token>\n+ </macros>\n+ <xrefs>\n+ <xref type="bio.tools">lineagespot</xref>\n+ <xref type="bioconductor">lineagespot</xref>\n+ </xrefs>\n+ <requirements>\n+ <requirement type="package" version="4.3.1">r-base</requirement>\n+ <requirement type="package" version="1.20.3">r-getopt</requirement>\n+ <requirement type="package" version="@TOOL_VERSION@">bioconductor-lineagespot</requirement>\n+ <requirement type="package" version="3.11">python</requirement>\n+ </requirements>\n+ <command detect_errors="exit_code"><![CDATA[\n+## Prepare lineage definitions\n+mkdir refs &&\n+#if str($voc_source.choice) == \'cached\':\n+ python \'${__tool_directory__}/convert_lineage_defs.py\' -i \'${voc_source.constellations.fields.path}/definitions\' -o refs &&\n+#end if\n+\n+## Symlink the VCFs of all samples into a vcfs folder\n+## and all custom lineage definitions into refs/\n+sh arrange_custom_inputs.sh &&\n+\n+#if str($ann_data.choice) == \'custom\':\n+ ## lineagespot insists on a .gff3 suffix for the annotation file\n+ ln -s \'$ann_data.in_gff3\' custom.gff3 &&\n+#end if\n+\n+Rscript \'${__tool_directory__}/lineagespot_verbose.R\'\n+ --in_vcf vcfs/\n+ --in_ref refs/\n+ #if str($ann_data.choice) == \'standard\':\n+ --in_gff3 \'${__tool_directory__}/NC_045512.2_annot.gff3\'\n+ #else:\n+ --in_gff3 custom.gff3\n+ #end if\n+ --in_threshold $in_threshold\n+ ]]></command>\n+ <configfiles>\n+ <configfile filename="arrange_custom_inputs.sh"><![CDATA[mkdir vcfs &&\n+#for $vcf in $in_vcf:\n+ #set $sample_name = $vcf.element_identifier.replace("\'", \'_\').replace(\'/\', \'_\')\n+ln -s \'$vcf\' \'vcfs/${sample_name}.vcf\' &&\n+#end for\n+#set $num_samples = len($in_vcf)\n+echo "Gathered $num_samples samples for lineagespot run."\n+#if str($voc_source.choice) == \'custom\':\n+ #for $lineage_def in $voc_source.collection:\n+ #set $lineage_name = $lineage_def.element_identifier.replace("\'", \'_\').replace(\'/\', \'_\')\n+ln -s \'$lineage_def\' \'refs/${lineage_name}.txt\' &&\n+#set $num_lineages = len(list($voc_source.collection))\n+ #end for\n+echo "Gathered $num_lineages custom lineage definitions for the analysis."\n+#end if]]></configfile>\n+ </configfiles>\n+ <inputs>\n+ <param type="data" name="in_vcf" format="vcf" multiple="true" label="Per-sample variant calling data" />\n+ <conditional name="voc_source">\n+ <param name="choice" type="select"\n+ label="Source of lineage definitions"\n+ help="Lineagespot detects lineage evidence based on lineage mutation patterns defined in tool-specific definition files. The Galaxy tool also has experimental support for reading lineage definitions from cached constellations data (see tool help below).">\n+ <option value="custom">Collection of lineagespot-specific definitions in history</option>\n+ <option value="cached">Lineage definitions from cached constellations data</option>\n+ </param>\n+ <when value="custom">\n+ <param name="collection" type="data_collection" collection_type="list" format="tabular" label="Collection of lineage definitions" />\n+ </when>\n+ <when value="cached">\n+ <param name="constellations" label="Cached constellations release" type="select">\n+ <options from_data_table="pangolin_constellations">\n+ <column name="value" index="0" />\n+ <column name="description" index="1" />\n+ <column name="date" index="3" />\n+ <column name="path" index="4" />\n+ <fi'..b'_contents>\n+ </output>\n+ <output name="variants_table">\n+ <assert_contents>\n+ <has_n_lines n="1829"/>\n+ </assert_contents>\n+ </output>\n+ </test>\n+ <!-- test with built-in genome annotation and cached constellations -->\n+ <test expect_num_outputs="3">\n+ <param name="in_vcf" value="SampleA_freebayes_ann.vcf,SampleB_freebayes_ann.vcf,SampleC_freebayes_ann.vcf"/>\n+ <conditional name="ann_data">\n+ <param name="choice" value="standard"/>\n+ </conditional>\n+ <conditional name="voc_source">\n+ <param name="choice" value="cached"/>\n+ <param name="constellations" value="v0.1.12"/>\n+ </conditional>\n+ <output name="lineage_hits">\n+ <assert_contents>\n+ <has_n_lines n="123"/>\n+ </assert_contents>\n+ </output>\n+ <output name="lineage_report">\n+ <assert_contents>\n+ <has_n_lines n="10"/>\n+ </assert_contents>\n+ </output>\n+ <output name="variants_table">\n+ <assert_contents>\n+ <has_n_lines n="1829"/>\n+ </assert_contents>\n+ </output>\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n+**lineagespot** - Detection of SARS-CoV-2 lineages in wastewater samples using next-generation sequencing\n+\n+The Galaxy tool wraps the functionality of the `lineagespot Bioconductor package <https://doi.org/doi:10.18129/B9.bioc.lineagespot>`__.\n+\n+**Inputs**\n+\n+*VCF datasets with per-sample variant calls*\n+\n+The tool accepts any number of input datasets in VCF format, for which it will generate reports of SARS-CoV-2 lineage evidence. These datasets need to be annotated using the VCF annotation standard field \'ANN\' as added, for example, by SnpEff.\n+\n+*Lineage definitions*\n+\n+The tool requires definitions of mutation profiles for lineages of interest (it cannot find evidence for lineages it does not know about). These can be provided as a collection of simple 2-columns tabular datasets, like this definition for lineage B.1.617.2::\n+\n+ gene amino acid\n+ ORF1b P314L\n+ ORF1b G662S\n+ ORF1b P1000L\n+ S T19R\n+ S G142D\n+ S E156G\n+ S del157/158\n+ S L452R\n+ S T478K\n+ S D614G\n+ S P681R\n+ S D950N\n+ ORF3a S26L\n+ M I82T\n+ ORF7a V82A\n+ ORF7a T120I\n+ ORF8 D119I\n+ ORF8 del120/121\n+ N D63G\n+ N R203M\n+ N D377Y\n+\n+where the gene names should match those used by the upstream tool producing the VCF ANN field.\n+\n+Alternatively, lineage definitions can be extracted from `constellations <https://github.com/cov-lineages/constellations>`__ data cached on the Galaxy server.\n+\n+.. class:: Warning mark\n+\n+ Please note that extraction of lineage definitions from constellations data is still **experimental**.\n+ The conversion process may drop some lineage defining mutations and shouldn\'t be trusted blindly.\n+ For full and up to date details see the `comments in the conversion script <https://github.com/search?q=repo%3Agalaxyproject%2Ftools-iuc+path%3A%2F%5Etools%5C%2Flineagespot%5C%2F%2F+convert_lineage_defs.py&type=code>`__.\n+\n+*Genome feature annotations*\n+\n+To learn about the position of the genes mentioned in VCF ANN fields and the lineage definitions on the genome, the tool requires an additional genome feature annotation input in gff3 format.\n+You should normally just use the offered built-in genome annotation file.\n+A custom annotation file would only be required if you\'ve mapped your sequencing data to a different reference genome than NC_045512.2 that isn\'t positionally identical to it.\n+ ]]></help>\n+ <citations>\n+ <citation type="doi">10.1038/s41598-022-06625-6</citation>\n+ </citations>\n+</tool>\n' |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/AY.1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/AY.1.txt Tue Aug 08 15:12:08 2023 +0000 |
b |
@@ -0,0 +1,32 @@ +gene amino acid +ORF1a A1306S +ORF1a P2046L +ORF1a P2287S +ORF1a V2930L +ORF1a T3255I +ORF1a T3646A +ORF1b P314L +ORF1b G662S +ORF1b P1000L +ORF1b A1918V +S T19R +S E156G +S del157/158 +S W258L +S K417N +S L452R +S T478K +S D614G +S P681R +S D950N +ORF3a S26L +M I82T +ORF7a V82A +ORF7a T120I +ORF7b T40I +ORF8 D119I +ORF8 del120/121 +N D63G +N R203M +N G215C +N D377Y \ No newline at end of file |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/B.1.351.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/B.1.351.txt Tue Aug 08 15:12:08 2023 +0000 |
b |
@@ -0,0 +1,19 @@ +gene amino acid +ORF1a T265I +ORF1a K1655N +ORF1a K3353R +ORF1a S3675K +ORF1a del3676/3678 +ORF1b P314L +S D80A +S D215G +S del241/243 +S K417N +S E484K +S N501Y +S D614G +S A701V +ORF3a Q57H +ORF3a S171L +E P71L +N T205I \ No newline at end of file |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/B.1.617.2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/B.1.617.2.txt Tue Aug 08 15:12:08 2023 +0000 |
b |
@@ -0,0 +1,22 @@ +gene amino acid +ORF1b P314L +ORF1b G662S +ORF1b P1000L +S T19R +S G142D +S E156G +S del157/158 +S L452R +S T478K +S D614G +S P681R +S D950N +ORF3a S26L +M I82T +ORF7a V82A +ORF7a T120I +ORF8 D119I +ORF8 del120/121 +N D63G +N R203M +N D377Y \ No newline at end of file |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/NC_045512.2_annot.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/NC_045512.2_annot.gff3 Tue Aug 08 15:12:08 2023 +0000 |
b |
@@ -0,0 +1,28 @@ +##sequence-region NC_045512.2 1 29903 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=2697049 +NC_045512.2 Giorgi CDS 266 805 . + . NSP1 Leader protein +NC_045512.2 Giorgi CDS 806 2719 . + . NSP2 Non-Structural protein 2 +NC_045512.2 Giorgi CDS 2720 8554 . + . NSP3 Predicted phosphoesterase, papain-like proteinase +NC_045512.2 Giorgi CDS 8555 10054 . + . NSP4 Transmembrane protein +NC_045512.2 Giorgi CDS 10055 10972 . + . NSP5 3C-like proteinase +NC_045512.2 Giorgi CDS 10973 11842 . + . NSP6 Transmembrane protein +NC_045512.2 Giorgi CDS 11843 12091 . + . NSP7 Non-Structural Protein 7 +NC_045512.2 Giorgi CDS 12092 12685 . + . NSP8 Non-Structural Protein 8 +NC_045512.2 Giorgi CDS 12686 13024 . + . NSP9 ssRNA-binding protein +NC_045512.2 Giorgi CDS 13025 13441 . + . NSP10 Growth-factor-like protein +NC_045512.2 Giorgi CDS 13442 13468 . + . NSP12a RNA-dependent RNA polymerase, pre-ribosomal frameshift +NC_045512.2 Giorgi CDS 13468 16236 . + . NSP12b RNA-dependent RNA polymerase, post-ribosomal frameshift +NC_045512.2 Giorgi CDS 16237 18039 . + . NSP13 Helicase +NC_045512.2 Giorgi CDS 18040 19620 . + . NSP14 3'-to-5' exonuclease +NC_045512.2 Giorgi CDS 19621 20658 . + . NSP15 endoRNAse +NC_045512.2 Giorgi CDS 20659 21552 . + . NSP16 2'-O-ribose methyltransferase +NC_045512.2 Giorgi CDS 21563 25384 . + . S Spike +NC_045512.2 Giorgi CDS 25393 26220 . + . ORF3a ORF3a protein +NC_045512.2 Giorgi CDS 26245 26472 . + . E Envelope +NC_045512.2 Giorgi CDS 26523 27191 . + . M Membrane +NC_045512.2 Giorgi CDS 27202 27387 . + . ORF6 ORF6 protein +NC_045512.2 Giorgi CDS 27394 27759 . + . ORF7a ORF7a protein +NC_045512.2 Giorgi CDS 27756 27887 . + . ORF7b ORF7b protein +NC_045512.2 Giorgi CDS 27894 28259 . + . ORF8 ORF8 protein +NC_045512.2 Giorgi CDS 28274 29533 . + . N Nucleocapsid protein +NC_045512.2 Giorgi CDS 29558 29674 . + . ORF10 ORF10 protein |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/P.1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/P.1.txt Tue Aug 08 15:12:08 2023 +0000 |
b |
@@ -0,0 +1,24 @@ +gene amino acid +ORF1a S1188L +ORF1a K1795Q +ORF1a S3675K +ORF1a del3676/3678 +ORF1b P314L +ORF1b E1264D +S L18F +S T20N +S P26S +S D138Y +S R190S +S K417T +S E484K +S N501Y +S D614G +S H655Y +S T1027I +S V1176F +ORF3a S253P +ORF8 E92K +N P80R +N R203K +N G204R \ No newline at end of file |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/SampleA_freebayes_ann.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/SampleA_freebayes_ann.vcf Tue Aug 08 15:12:08 2023 +0000 |
b |
b'@@ -0,0 +1,645 @@\n+##fileformat=VCFv4.2\r\n+##fileDate=20210401\r\n+##source=freeBayes v1.3.2-dirty\r\n+##reference=NC_045512.fasta\r\n+##contig=<ID=NC_045512.2,length=29903>\r\n+##phasing=none\r\n+##commandline="freebayes -f NC_045512.fasta -F 0.01 -C 1 --pooled-continuous L4_S1_L001_sorted_uniq.bam"\r\n+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">\r\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">\r\n+##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype">\r\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">\r\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\r\n+##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">\r\n+##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype.">\r\n+##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype.">\r\n+##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally">\r\n+##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally">\r\n+##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred">\r\n+##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred">\r\n+##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations">\r\n+##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations">\r\n+##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand">\r\n+##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand">\r\n+##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand">\r\n+##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand">\r\n+##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding\'s inequality">\r\n+##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding\'s inequality">\r\n+##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">\r\n+##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding\'s inequality">\r\n+##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome">\r\n+##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding\'s inequality">\r\n+##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding\'s inequality">\r\n+##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed '..b'10|protein_coding|1/1|c.494C>A|p.Thr165Lys|494/1260|494/1260|165/419||\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:31:30,1:30:1120:1:9:0,-8.43193,-100.207\r\n+NC_045512.2\t28771\t.\tATTGC\tATTTGC\t1.96799E-13\t.\tAB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1M1I4M;DP=26;DPB=26.2;DPRA=0;EPP=5.18177;EPPR=13.5202;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=30.7266;PAIRED=1;PAIREDR=0.96;PAO=0;PQA=0;PQR=0;PRO=0;QA=38;QR=889;RO=25;RPL=1;RPP=5.18177;RPPR=3.79203;RPR=0;RUN=1;SAF=0;SAP=5.18177;SAR=1;SRF=7;SRP=13.5202;SRR=18;TYPE=ins;ANN=ATTTGC|frameshift_variant|HIGH|N|GU280_gp10|transcript|GU280_gp10|protein_coding|1/1|c.500dupT|p.Leu167fs|501/1260|501/1260|167/419||\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:26:25,1:25:889:1:38:0,-4.02951,-76.5246\r\n+NC_045512.2\t28881\t.\tGGG\tAAC\t423.825\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=14;CIGAR=3X;DP=14;DPB=14;DPRA=0;EPP=33.4109;EPPR=0;GTI=0;LEN=3;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=24.0133;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=529;QR=0;RO=0;RPL=7;RPP=3.0103;RPPR=0;RPR=7;RUN=1;SAF=7;SAP=3.0103;SAR=7;SRF=0;SRP=0;SRR=0;TYPE=complex;ANN=AAC|missense_variant|MODERATE|N|GU280_gp10|transcript|GU280_gp10|protein_coding|1/1|c.608_610delGGGinsAAC|p.ArgGly203LysArg|608/1260|608/1260|203/419||\tGT:DP:AD:RO:QR:AO:QA:GL\t1/1:14:0,14:0:0:14:529:-47.9547,-4.21442,0\r\n+NC_045512.2\t28923\t.\tC\tA\t0.0150634\t.\tAB=0.142857;ABP=18.5208;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=14;DPB=14;DPRA=0;EPP=3.0103;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=5.6623;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=76;QR=448;RO=12;RPL=2;RPP=7.35324;RPPR=29.068;RPR=0;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=6;SRP=3.0103;SRR=6;TYPE=snp;ANN=A|missense_variant|MODERATE|N|GU280_gp10|transcript|GU280_gp10|protein_coding|1/1|c.650C>A|p.Ala217Asp|650/1260|650/1260|217/419||\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:14:12,2:12:448:2:76:-3.00039,0,-36.4527\r\n+NC_045512.2\t28942\t.\tG\tT\t7.11169E-9\t.\tAB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1X;DP=14;DPB=14;DPRA=0;EPP=5.18177;EPPR=3.17734;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=20.2301;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=14;QR=476;RO=13;RPL=1;RPP=5.18177;RPPR=31.2394;RPR=0;RUN=1;SAF=1;SAP=5.18177;SAR=0;SRF=6;SRP=3.17734;SRR=7;TYPE=snp;ANN=T|synonymous_variant|LOW|N|GU280_gp10|transcript|GU280_gp10|protein_coding|1/1|c.669G>T|p.Leu223Leu|669/1260|669/1260|223/419||\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:14:13,1:13:476:1:14:0,-2.81443,-41.7784\r\n+NC_045512.2\t28977\t.\tC\tT\t400.315\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=14;CIGAR=1X;DP=14;DPB=14;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=24.0133;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=503;QR=0;RO=0;RPL=14;RPP=33.4109;RPPR=0;RPR=0;RUN=1;SAF=7;SAP=3.0103;SAR=7;SRF=0;SRP=0;SRR=0;TYPE=snp;ANN=T|missense_variant|MODERATE|N|GU280_gp10|transcript|GU280_gp10|protein_coding|1/1|c.704C>T|p.Ser235Phe|704/1260|704/1260|235/419||\tGT:DP:AD:RO:QR:AO:QA:GL\t1/1:14:0,14:0:0:14:503:-45.6037,-4.21442,0\r\n+NC_045512.2\t29438\t.\tC\tT\t5.172E-5\t.\tAB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1X;DP=10;DPB=10;DPRA=0;EPP=5.18177;EPPR=3.25157;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=11.3382;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=30;QR=336;RO=9;RPL=0;RPP=5.18177;RPPR=22.5536;RPR=1;RUN=1;SAF=1;SAP=5.18177;SAR=0;SRF=4;SRP=3.25157;SRR=5;TYPE=snp;ANN=T|stop_gained|HIGH|N|GU280_gp10|transcript|GU280_gp10|protein_coding|1/1|c.1165C>T|p.Gln389*|1165/1260|1165/1260|389/419||\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:10:9,1:9:336:1:30:0,-0.0107335,-27.5939\r\n+NC_045512.2\t29682\t.\tTAATC\tTATC\t0.1631\t.\tAB=0.2;ABP=6.91895;AC=1;AF=0.5;AN=2;AO=1;CIGAR=1M1D3M;DP=5;DPB=4.8;DPRA=0;EPP=5.18177;EPPR=11.6962;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=3.26311;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=38;QR=151;RO=4;RPL=1;RPP=5.18177;RPPR=11.6962;RPR=0;RUN=1;SAF=0;SAP=5.18177;SAR=1;SRF=0;SRP=11.6962;SRR=4;TYPE=del;ANN=TATC|downstream_gene_variant|MODIFIER|S|GU280_gp02|transcript|GU280_gp02|protein_coding||c.*4300delA|||||4300|\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:5:4,1:4:151:1:38:-2.29212,0,-12.4528\r\n' |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/SampleB_freebayes_ann.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/SampleB_freebayes_ann.vcf Tue Aug 08 15:12:08 2023 +0000 |
b |
b'@@ -0,0 +1,983 @@\n+##fileformat=VCFv4.2\r\n+##fileDate=20210521\r\n+##source=freeBayes v1.3.2-dirty\r\n+##reference=NC_045512.fasta\r\n+##contig=<ID=NC_045512.2,length=29903>\r\n+##phasing=none\r\n+##commandline="freebayes -f NC_045512.fasta -F 0.01 -C 1 --pooled-continuous L14_S3_L001_sorted_uniq.bam"\r\n+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">\r\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">\r\n+##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype">\r\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">\r\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\r\n+##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">\r\n+##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype.">\r\n+##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype.">\r\n+##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally">\r\n+##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally">\r\n+##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred">\r\n+##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred">\r\n+##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations">\r\n+##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations">\r\n+##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand">\r\n+##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand">\r\n+##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand">\r\n+##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand">\r\n+##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding\'s inequality">\r\n+##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding\'s inequality">\r\n+##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">\r\n+##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding\'s inequality">\r\n+##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome">\r\n+##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding\'s inequality">\r\n+##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding\'s inequality">\r\n+##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed'..b'.05288;SRR=25;TYPE=snp;ANN=T|missense_variant|MODERATE|ORF10|GU280_gp11|transcript|GU280_gp11|protein_coding|1/1|c.37A>T|p.Ile13Leu|37/117|37/117|13/38||\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:52:51,1:51:1885:1:10:0,-14.6536,-168.911\r\n+NC_045512.2\t29607\t.\tT\tC\t2.67497E-15\t.\tAB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1X;DP=51;DPB=51;DPRA=0;EPP=5.18177;EPPR=4.57376;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=67.9229;PAIRED=1;PAIREDR=0.92;PAO=0;PQA=0;PQR=0;PRO=0;QA=18;QR=1843;RO=50;RPL=1;RPP=5.18177;RPPR=42.0968;RPR=0;RUN=1;SAF=1;SAP=5.18177;SAR=0;SRF=24;SRP=3.18402;SRR=26;TYPE=snp;ANN=C|missense_variant|MODERATE|ORF10|GU280_gp11|transcript|GU280_gp11|protein_coding|1/1|c.50T>C|p.Leu17Pro|50/117|50/117|17/38||\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:51:50,1:50:1843:1:18:0,-13.5526,-164.33\r\n+NC_045512.2\t29735\t.\tA\tC\t3.28521E-15\t.\tAB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1X;DP=40;DPB=40;DPRA=0;EPP=5.18177;EPPR=3.06598;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=54.9889;PAIRED=0;PAIREDR=0.923077;PAO=0;PQA=0;PQR=0;PRO=0;QA=9;QR=1446;RO=39;RPL=1;RPP=5.18177;RPPR=87.6977;RPR=0;RUN=1;SAF=1;SAP=5.18177;SAR=0;SRF=20;SRP=3.06598;SRR=19;TYPE=snp;ANN=C|downstream_gene_variant|MODIFIER|S|GU280_gp02|transcript|GU280_gp02|protein_coding||c.*4351A>C|||||4351|\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:40:39,1:39:1446:1:9:0,-11.1412,-129.528\r\n+NC_045512.2\t29740\t.\tA\tC\t0.0\t.\tAB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1X;DP=39;DPB=39;DPRA=0;EPP=5.18177;EPPR=3.23888;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=53.3977;PAIRED=1;PAIREDR=0.921053;PAO=0;PQA=0;PQR=0;PRO=0;QA=10;QR=1417;RO=38;RPL=1;RPP=5.18177;RPPR=85.5263;RPR=0;RUN=1;SAF=0;SAP=5.18177;SAR=1;SRF=20;SRP=3.23888;SRR=18;TYPE=snp;ANN=C|downstream_gene_variant|MODIFIER|S|GU280_gp02|transcript|GU280_gp02|protein_coding||c.*4356A>C|||||4356|\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:39:38,1:38:1417:1:10:0,-10.7402,-126.821\r\n+NC_045512.2\t29765\t.\tT\tG\t3.01588E-16\t.\tAB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1X;DP=40;DPB=40;DPRA=0;EPP=5.18177;EPPR=3.06598;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=55.2191;PAIRED=1;PAIREDR=0.897436;PAO=0;PQA=0;PQR=0;PRO=0;QA=8;QR=1371;RO=39;RPL=1;RPP=5.18177;RPPR=87.6977;RPR=0;RUN=1;SAF=1;SAP=5.18177;SAR=0;SRF=20;SRP=3.06598;SRR=19;TYPE=snp;ANN=G|downstream_gene_variant|MODIFIER|S|GU280_gp02|transcript|GU280_gp02|protein_coding||c.*4381T>G|||||4381|\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:40:39,1:39:1371:1:8:0,-11.2412,-122.867\r\n+NC_045512.2\t29784\t.\tC\tG\t5.71063E-15\t.\tAB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1X;DP=39;DPB=39;DPRA=0;EPP=5.18177;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=54.0884;PAIRED=0;PAIREDR=0.921053;PAO=0;PQA=0;PQR=0;PRO=0;QA=7;QR=1380;RO=38;RPL=1;RPP=5.18177;RPPR=85.5263;RPR=0;RUN=1;SAF=1;SAP=5.18177;SAR=0;SRF=19;SRP=3.0103;SRR=19;TYPE=snp;ANN=G|downstream_gene_variant|MODIFIER|S|GU280_gp02|transcript|GU280_gp02|protein_coding||c.*4400C>G|||||4400|\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:39:38,1:38:1380:1:7:0,-11.0402,-123.787\r\n+NC_045512.2\t29798\t.\tG\tA\t1.32952E-12\t.\tAB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1X;DP=21;DPB=21;DPRA=0;EPP=5.18177;EPPR=38.1882;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=28.8331;PAIRED=1;PAIREDR=0.95;PAO=0;PQA=0;PQR=0;PRO=0;QA=11;QR=756;RO=20;RPL=1;RPP=5.18177;RPPR=46.4397;RPR=0;RUN=1;SAF=1;SAP=5.18177;SAR=0;SRF=1;SRP=38.1882;SRR=19;TYPE=snp;ANN=A|downstream_gene_variant|MODIFIER|S|GU280_gp02|transcript|GU280_gp02|protein_coding||c.*4414G>A|||||4414|\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:21:20,1:20:756:1:11:0,-5.22163,-67.2701\r\n+NC_045512.2\t29857\t.\tC\tA\t1.90202E-6\t.\tAB=0;ABP=0;AC=0;AF=0;AN=2;AO=1;CIGAR=1X;DP=15;DPB=15;DPRA=0;EPP=5.18177;EPPR=33.4109;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=14.6411;PAIRED=1;PAIREDR=0.928571;PAO=0;PQA=0;PQR=0;PRO=0;QA=38;QR=532;RO=14;RPL=1;RPP=5.18177;RPPR=33.4109;RPR=0;RUN=1;SAF=0;SAP=5.18177;SAR=1;SRF=0;SRP=33.4109;SRR=14;TYPE=snp;ANN=A|downstream_gene_variant|MODIFIER|S|GU280_gp02|transcript|GU280_gp02|protein_coding||c.*4473C>A|||||4473|\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:15:14,1:14:532:1:38:0,-0.718181,-44.428\r\n' |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/SampleC_freebayes_ann.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/SampleC_freebayes_ann.vcf Tue Aug 08 15:12:08 2023 +0000 |
b |
b'@@ -0,0 +1,364 @@\n+##fileformat=VCFv4.2\r\n+##fileDate=20210722\r\n+##source=freeBayes v1.3.2-dirty\r\n+##reference=NC_045512.fasta\r\n+##contig=<ID=NC_045512.2,length=29903>\r\n+##phasing=none\r\n+##commandline="freebayes -f NC_045512.fasta -F 0.01 -C 1 --pooled-continuous L23MM_S3_sorted_uniq.bam"\r\n+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">\r\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">\r\n+##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype">\r\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">\r\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\r\n+##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">\r\n+##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype.">\r\n+##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype.">\r\n+##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally">\r\n+##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally">\r\n+##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred">\r\n+##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred">\r\n+##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations">\r\n+##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations">\r\n+##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand">\r\n+##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand">\r\n+##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand">\r\n+##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand">\r\n+##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding\'s inequality">\r\n+##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding\'s inequality">\r\n+##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">\r\n+##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding\'s inequality">\r\n+##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome">\r\n+##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding\'s inequality">\r\n+##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding\'s inequality">\r\n+##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Le'..b'PL=47;RPP=14.748;RPPR=272.678;RPR=27;RUN=1;SAF=36;SAP=3.12768;SAR=38;SRF=1756;SRP=3.15001;SRR=1741;TYPE=snp;ANN=C|synonymous_variant|LOW|N|GU280_gp10|transcript|GU280_gp10|protein_coding|1/1|c.756T>C|p.Ala252Ala|756/1260|756/1260|252/419||\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:3572:3497,74:3497:120356:74:2455:0,-898.696,-9450.46\r\n+NC_045512.2\t29039\t.\tA\tT\t0.0\t.\tAB=0;ABP=0;AC=0;AF=0;AN=2;AO=149;CIGAR=1X;DP=3516;DPB=3516;DPRA=0;EPP=4.77371;EPPR=20.1454;GTI=0;LEN=1;MEANALT=1;MQM=37.7181;MQMR=47.6341;NS=1;NUMALT=1;ODDS=3473.2;PAIRED=0.973154;PAIREDR=0.985744;PAO=0;PQA=0;PQR=0;PRO=0;QA=5164;QR=118588;RO=3367;RPL=86;RPP=10.7198;RPPR=174.061;RPR=63;RUN=1;SAF=76;SAP=3.14146;SAR=73;SRF=1658;SRP=4.68776;SRR=1709;TYPE=snp;ANN=T|stop_gained|HIGH|N|GU280_gp10|transcript|GU280_gp10|protein_coding|1/1|c.766A>T|p.Lys256*|766/1260|766/1260|256/419||\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:3516:3367,149:3367:118588:149:5164:0,-709.856,-9327.62\r\n+NC_045512.2\t29049\t.\tG\tA\t0.0\t.\tAB=0;ABP=0;AC=0;AF=0;AN=2;AO=101;CIGAR=1X;DP=3347;DPB=3347;DPRA=0;EPP=6.64375;EPPR=12.4923;GTI=0;LEN=1;MEANALT=2;MQM=43.099;MQMR=49.1378;NS=1;NUMALT=1;ODDS=3614.81;PAIRED=0.960396;PAIREDR=0.990133;PAO=0;PQA=0;PQR=0;PRO=0;QA=3321;QR=110630;RO=3243;RPL=47;RPP=4.06379;RPPR=70.2965;RPR=54;RUN=1;SAF=48;SAP=3.54779;SAR=53;SRF=1609;SRP=3.42879;SRR=1634;TYPE=snp;ANN=A|missense_variant|MODERATE|N|GU280_gp10|transcript|GU280_gp10|protein_coding|1/1|c.776G>A|p.Arg259Gln|776/1260|776/1260|259/419||\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:3347:3243,101:3243:110630:101:3321:0,-752.738,-8988.5\r\n+NC_045512.2\t29067\t.\tCTAAAGCATACAATGT\tCT\t2.70012E-13\t.\tAB=0;ABP=0;AC=0;AF=0;AN=2;AO=187;CIGAR=1M14D1M;DP=2118;DPB=1955.62;DPRA=0;EPP=3.02191;EPPR=5.31131;GTI=0;LEN=14;MEANALT=13;MQM=48.7647;MQMR=53.2214;NS=1;NUMALT=1;ODDS=1275.56;PAIRED=0.97861;PAIREDR=0.993197;PAO=1;PQA=34;PQR=101;PRO=3;QA=5285;QR=66815;RO=1911;RPL=76;RPP=17.2352;RPPR=87.6977;RPR=111;RUN=1;SAF=98;SAP=3.95088;SAR=89;SRF=986;SRP=7.23848;SRR=925;TYPE=del;ANN=CT|frameshift_variant|HIGH|N|GU280_gp10|transcript|GU280_gp10|protein_coding|1/1|c.798_811delAGCATACAATGTAA|p.Lys266fs|798/1260|798/1260|266/419||INFO_REALIGN_3_PRIME\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:2118:1911,187:1911:66815:187:5285:0,-187.495,-5392.39\r\n+NC_045512.2\t29402\t.\tG\tT\t33928.1\t.\tAB=0.380005;ABP=537.172;AC=1;AF=0.5;AN=2;AO=1623;CIGAR=1X;DP=4271;DPB=4271;DPRA=0;EPP=4.13551;EPPR=7.75762;GTI=0;LEN=1;MEANALT=3;MQM=49.4449;MQMR=52.7301;NS=1;NUMALT=1;ODDS=7812.23;PAIRED=0.997535;PAIREDR=0.995079;PAO=0;PQA=0;PQR=0;PRO=0;QA=57174;QR=92750;RO=2642;RPL=779;RPP=8.66309;RPPR=18.6627;RPR=844;RUN=1;SAF=832;SAP=5.25937;SAR=791;SRF=1339;SRP=4.07549;SRR=1303;TYPE=snp;ANN=T|missense_variant|MODERATE|N|GU280_gp10|transcript|GU280_gp10|protein_coding|1/1|c.1129G>T|p.Asp377Tyr|1129/1260|1129/1260|377/419||\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:4271:2642,1623:2642:92750:1623:57174:-3456.1,0,-6754.07\r\n+NC_045512.2\t29681\t.\tTTAATCA\tTTAATCTAATCA\t0.0\t.\tAB=0;ABP=0;AC=0;AF=0;AN=2;AO=59;CIGAR=1M5I6M;DP=2108;DPB=2151.14;DPRA=0;EPP=3.34154;EPPR=24.183;GTI=0;LEN=5;MEANALT=6;MQM=52.322;MQMR=51.6699;NS=1;NUMALT=1;ODDS=2266.18;PAIRED=1;PAIREDR=0.995586;PAO=1;PQA=27.5;PQR=27.5;PRO=1;QA=1893;QR=70994;RO=2039;RPL=34;RPP=5.99147;RPPR=724.345;RPR=25;RUN=1;SAF=31;SAP=3.34154;SAR=28;SRF=963;SRP=16.6089;SRR=1076;TYPE=ins;ANN=TTAATCTAATCA|downstream_gene_variant|MODIFIER|S|GU280_gp02|transcript|GU280_gp02|protein_coding||c.*4302_*4303insTAATC|||||4303|\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:2108:2039,59:2039:70994:59:1893:0,-462.802,-5912.21\r\n+NC_045512.2\t29742\t.\tG\tT\t36691.9\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=1276;CIGAR=1X;DP=1276;DPB=1276;DPRA=0;EPP=3.69101;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=49.6567;MQMR=0;NS=1;NUMALT=1;ODDS=1773.52;PAIRED=0.996082;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=44133;QR=0;RO=0;RPL=591;RPP=18.0472;RPPR=0;RPR=685;RUN=1;SAF=637;SAP=3.01711;SAR=639;SRF=0;SRP=0;SRR=0;TYPE=snp;ANN=T|downstream_gene_variant|MODIFIER|S|GU280_gp02|transcript|GU280_gp02|protein_coding||c.*4358G>T|||||4358|\tGT:DP:AD:RO:QR:AO:QA:GL\t1/1:1276:0,1276:0:0:1276:44133:-3675.72,-384.114,0\r\n' |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/lineage_hits.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lineage_hits.txt Tue Aug 08 15:12:08 2023 +0000 |
b |
b'@@ -0,0 +1,731 @@\n+Gene_Name\tAA_alt\tsample\tDP\tAD_alt\tAF\tlineage\r\n+M\tI82T\tSampleC_freebayes_ann\t3984\t2770\t0.695281124497992\tAY.1\r\n+N\tD63G\tSampleC_freebayes_ann\t2180\t787\t0.361009174311927\tAY.1\r\n+N\tR203M\tSampleC_freebayes_ann\t4147\t4125\t0.994694960212202\tAY.1\r\n+N\tG215C\tSampleC_freebayes_ann\t4477\t2574\t0.574938574938575\tAY.1\r\n+N\tD377Y\tSampleC_freebayes_ann\t4271\t1623\t0.380004682744088\tAY.1\r\n+ORF1a\tA1306S\tSampleC_freebayes_ann\t2202\t1267\t0.575386012715713\tAY.1\r\n+ORF1a\tP2046L\tSampleC_freebayes_ann\t2996\t1763\t0.58845126835781\tAY.1\r\n+ORF1a\tP2287S\tSampleC_freebayes_ann\t2633\t1563\t0.59361944549943\tAY.1\r\n+ORF1a\tV2930L\tSampleC_freebayes_ann\t1082\t913\t0.843807763401109\tAY.1\r\n+ORF1a\tT3255I\tSampleC_freebayes_ann\t857\t437\t0.509918319719953\tAY.1\r\n+ORF1a\tT3646A\tSampleC_freebayes_ann\t801\t744\t0.928838951310862\tAY.1\r\n+ORF3a\tS26L\tSampleC_freebayes_ann\t4510\t3826\t0.848337028824834\tAY.1\r\n+ORF7a\tV82A\tSampleC_freebayes_ann\t4141\t2302\t0.555904370924897\tAY.1\r\n+ORF7a\tT120I\tSampleC_freebayes_ann\t4981\t2730\t0.548082714314395\tAY.1\r\n+S\tT19R\tSampleC_freebayes_ann\t2058\t1448\t0.703595724003887\tAY.1\r\n+S\tL452R\tSampleC_freebayes_ann\t1753\t1147\t0.654306902452938\tAY.1\r\n+S\tT478K\tSampleC_freebayes_ann\t1708\t1088\t0.637002341920375\tAY.1\r\n+S\tD614G\tSampleA_freebayes_ann\t41\t41\t1\tAY.1\r\n+S\tD614G\tSampleB_freebayes_ann\t15\t15\t1\tAY.1\r\n+S\tD614G\tSampleC_freebayes_ann\t3572\t3570\t0.999440089585666\tAY.1\r\n+S\tP681R\tSampleC_freebayes_ann\t536\t536\t1\tAY.1\r\n+S\tD950N\tSampleC_freebayes_ann\t949\t368\t0.387776606954689\tAY.1\r\n+ORF1b\tP314L\tSampleC_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF1b\tG662S\tSampleA_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF1b\tP1000L\tSampleB_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF1b\tA1918V\tSampleC_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF7b\tT40I\tSampleA_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF8\tD119I\tSampleB_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF8\tdel120/121\tSampleC_freebayes_ann\t0\t0\t0\tAY.1\r\n+S\tE156G\tSampleA_freebayes_ann\t0\t0\t0\tAY.1\r\n+S\tdel157/158\tSampleB_freebayes_ann\t0\t0\t0\tAY.1\r\n+S\tW258L\tSampleC_freebayes_ann\t0\t0\t0\tAY.1\r\n+S\tK417N\tSampleA_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF1b\tP314L\tSampleB_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF1b\tG662S\tSampleC_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF1b\tP1000L\tSampleA_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF1b\tA1918V\tSampleB_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF7b\tT40I\tSampleC_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF8\tD119I\tSampleA_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF8\tdel120/121\tSampleB_freebayes_ann\t0\t0\t0\tAY.1\r\n+S\tE156G\tSampleC_freebayes_ann\t0\t0\t0\tAY.1\r\n+S\tdel157/158\tSampleA_freebayes_ann\t0\t0\t0\tAY.1\r\n+S\tW258L\tSampleB_freebayes_ann\t0\t0\t0\tAY.1\r\n+S\tK417N\tSampleC_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF1b\tP314L\tSampleA_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF1b\tG662S\tSampleB_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF1b\tP1000L\tSampleC_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF1b\tA1918V\tSampleA_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF7b\tT40I\tSampleB_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF8\tD119I\tSampleC_freebayes_ann\t0\t0\t0\tAY.1\r\n+ORF8\tdel120/121\tSampleA_freebayes_ann\t0\t0\t0\tAY.1\r\n+S\tE156G\tSampleB_freebayes_ann\t0\t0\t0\tAY.1\r\n+S\tdel157/158\tSampleC_freebayes_ann\t0\t0\t0\tAY.1\r\n+S\tW258L\tSampleA_freebayes_ann\t0\t0\t0\tAY.1\r\n+S\tK417N\tSampleB_freebayes_ann\t0\t0\t0\tAY.1\r\n+M\tI82T\tSampleC_freebayes_ann\t3984\t2770\t0.695281124497992\tAY.2\r\n+N\tD63G\tSampleC_freebayes_ann\t2180\t787\t0.361009174311927\tAY.2\r\n+N\tR203M\tSampleC_freebayes_ann\t4147\t4125\t0.994694960212202\tAY.2\r\n+N\tD377Y\tSampleC_freebayes_ann\t4271\t1623\t0.380004682744088\tAY.2\r\n+ORF1a\tP1640L\tSampleC_freebayes_ann\t1997\t332\t0.166249374061092\tAY.2\r\n+ORF1a\tA3209V\tSampleC_freebayes_ann\t1065\t373\t0.350234741784038\tAY.2\r\n+ORF1a\tV3718A\tSampleC_freebayes_ann\t2727\t444\t0.162816281628163\tAY.2\r\n+ORF1a\tT3750I\tSampleC_freebayes_ann\t2189\t349\t0.159433531292828\tAY.2\r\n+ORF3a\tS26L\tSampleC_freebayes_ann\t4510\t3826\t0.848337028824834\tAY.2\r\n+ORF7a\tV82A\tSampleC_freebayes_ann\t4141\t2302\t0.555904370924897\tAY.2\r\n+ORF7a\tT120I\tSampleC_freebayes_ann\t4981\t2730\t0.548082714314395\tAY.2\r\n+S\tT19R\tSampleC_freebayes_ann\t2058\t1448\t0.703595724003887\tAY.2\r\n+S\tL452R\tSampleC_freebayes_ann\t1753\t1147\t0.654306902452938\tAY.2\r\n+S\tT478K\tSampleC_freebayes_ann\t1708\t1088\t0.637002341920375\tAY.2\r\n+S\tD614G\tSampleA_freebayes_ann\t41\t41\t1\tAY.2\r\n+S\tD614G\tSampleB_freebayes_a'..b'_freebayes_ann\t0\t0\t0\tP.1.2\r\n+S\tE484K\tSampleB_freebayes_ann\t0\t0\t0\tP.1.2\r\n+S\tH655Y\tSampleC_freebayes_ann\t0\t0\t0\tP.1.2\r\n+S\tT1027I\tSampleA_freebayes_ann\t0\t0\t0\tP.1.2\r\n+S\tV1176F\tSampleB_freebayes_ann\t0\t0\t0\tP.1.2\r\n+N\tP80R\tSampleC_freebayes_ann\t0\t0\t0\tP.1.2\r\n+ORF1a\tS1188L\tSampleA_freebayes_ann\t0\t0\t0\tP.1.2\r\n+ORF1a\tK1795Q\tSampleB_freebayes_ann\t0\t0\t0\tP.1.2\r\n+ORF1a\tS3675K\tSampleC_freebayes_ann\t0\t0\t0\tP.1.2\r\n+ORF1a\tdel3676/3678\tSampleA_freebayes_ann\t0\t0\t0\tP.1.2\r\n+ORF1b\tP314L\tSampleB_freebayes_ann\t0\t0\t0\tP.1.2\r\n+ORF1b\tE1264D\tSampleC_freebayes_ann\t0\t0\t0\tP.1.2\r\n+ORF3a\tS253P\tSampleA_freebayes_ann\t0\t0\t0\tP.1.2\r\n+ORF8\tE92K\tSampleB_freebayes_ann\t0\t0\t0\tP.1.2\r\n+S\tL18F\tSampleC_freebayes_ann\t0\t0\t0\tP.1.2\r\n+S\tT20N\tSampleA_freebayes_ann\t0\t0\t0\tP.1.2\r\n+S\tP26S\tSampleB_freebayes_ann\t0\t0\t0\tP.1.2\r\n+S\tD138Y\tSampleC_freebayes_ann\t0\t0\t0\tP.1.2\r\n+S\tR190S\tSampleA_freebayes_ann\t0\t0\t0\tP.1.2\r\n+S\tK417T\tSampleB_freebayes_ann\t0\t0\t0\tP.1.2\r\n+S\tE484K\tSampleC_freebayes_ann\t0\t0\t0\tP.1.2\r\n+S\tH655Y\tSampleA_freebayes_ann\t0\t0\t0\tP.1.2\r\n+S\tT1027I\tSampleB_freebayes_ann\t0\t0\t0\tP.1.2\r\n+S\tV1176F\tSampleC_freebayes_ann\t0\t0\t0\tP.1.2\r\n+N\tRG203KR\tSampleA_freebayes_ann\t14\t14\t1\tP.1\r\n+N\tRG203KR\tSampleB_freebayes_ann\t69\t68\t0.985507246376812\tP.1\r\n+S\tN501Y\tSampleA_freebayes_ann\t6\t6\t1\tP.1\r\n+S\tN501Y\tSampleB_freebayes_ann\t4\t4\t1\tP.1\r\n+S\tN501Y\tSampleC_freebayes_ann\t1592\t578\t0.363065326633166\tP.1\r\n+S\tD614G\tSampleA_freebayes_ann\t41\t41\t1\tP.1\r\n+S\tD614G\tSampleB_freebayes_ann\t15\t15\t1\tP.1\r\n+S\tD614G\tSampleC_freebayes_ann\t3572\t3570\t0.999440089585666\tP.1\r\n+N\tP80R\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1a\tS1188L\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1a\tK1795Q\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1a\tS3675K\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1a\tdel3676/3678\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1b\tP314L\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1b\tE1264D\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF3a\tS253P\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF8\tE92K\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tL18F\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tT20N\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tP26S\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tD138Y\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tR190S\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tK417T\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tE484K\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tH655Y\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tT1027I\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tV1176F\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+N\tP80R\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1a\tS1188L\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1a\tK1795Q\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1a\tS3675K\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1a\tdel3676/3678\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1b\tP314L\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1b\tE1264D\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF3a\tS253P\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF8\tE92K\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tL18F\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tT20N\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tP26S\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tD138Y\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tR190S\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tK417T\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tE484K\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tH655Y\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tT1027I\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tV1176F\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+N\tP80R\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1a\tS1188L\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1a\tK1795Q\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1a\tS3675K\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1a\tdel3676/3678\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1b\tP314L\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF1b\tE1264D\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF3a\tS253P\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+ORF8\tE92K\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tL18F\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tT20N\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tP26S\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tD138Y\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tR190S\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tK417T\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tE484K\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tH655Y\tSampleA_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tT1027I\tSampleB_freebayes_ann\t0\t0\t0\tP.1\r\n+S\tV1176F\tSampleC_freebayes_ann\t0\t0\t0\tP.1\r\n' |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/lineage_report.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lineage_report.txt Tue Aug 08 15:12:08 2023 +0000 |
b |
@@ -0,0 +1,40 @@ +lineage sample meanAF meanAF_uniq minAF_uniq_nonzero N lineage N. rules lineage prop. +AY.1 SampleA_freebayes_ann 0.0833333333333333 0 1 31 0.032258064516129 +AY.1 SampleB_freebayes_ann 0.0833333333333333 0 1 31 0.032258064516129 +AY.1 SampleC_freebayes_ann 0.431625679248108 0 6 31 0.193548387096774 +AY.2 SampleA_freebayes_ann 0.0769230769230769 0 1 29 0.0344827586206897 +AY.2 SampleB_freebayes_ann 0.0769230769230769 0 1 29 0.0344827586206897 +AY.2 SampleC_freebayes_ann 0.331178263776345 0.119819132680874 0.159433531292828 4 29 0.137931034482759 +AY.3 SampleA_freebayes_ann 0.0909090909090909 0 1 30 0.0333333333333333 +AY.3 SampleB_freebayes_ann 0.0909090909090909 0 1 30 0.0333333333333333 +AY.3 SampleC_freebayes_ann 0.446013201889711 0 6 30 0.2 +AY.3.1 SampleA_freebayes_ann 0.0833333333333333 0 1 32 0.03125 +AY.3.1 SampleB_freebayes_ann 0.077724358974359 0.00520833333333333 0.0104166666666667 1 32 0.03125 +AY.3.1 SampleC_freebayes_ann 0.431625679248108 0 6 32 0.1875 +B.1.1.7 SampleA_freebayes_ann 0.707868155236576 0 12 21 0.571428571428571 +B.1.1.7 SampleB_freebayes_ann 0.795825304913559 0 16 21 0.761904761904762 +B.1.1.7 SampleC_freebayes_ann 0.367879297939765 0 3 21 0.142857142857143 +B.1.1.7+S_E484K SampleA_freebayes_ann 0.672474747474747 0 12 22 0.545454545454545 +B.1.1.7+S_E484K SampleB_freebayes_ann 0.757928861822438 0 16 22 0.727272727272727 +B.1.1.7+S_E484K SampleC_freebayes_ann 0.348517229627146 0 3 22 0.136363636363636 +B.1.351 SampleA_freebayes_ann 0.171368704970856 0 2 18 0.111111111111111 +B.1.351 SampleB_freebayes_ann 0.133333333333333 0 2 18 0.111111111111111 +B.1.351 SampleC_freebayes_ann 0.0908336944145888 0 1 18 0.0555555555555556 +B.1.351.2 SampleA_freebayes_ann 0.14688746140359 0 2 21 0.0952380952380952 +B.1.351.2 SampleB_freebayes_ann 0.111111111111111 0 2 21 0.0952380952380952 +B.1.351.2 SampleC_freebayes_ann 0.0756947453454907 0 1 21 0.0476190476190476 +B.1.351.3 SampleA_freebayes_ann 0.14688746140359 0 2 22 0.0909090909090909 +B.1.351.3 SampleB_freebayes_ann 0.111111111111111 0 2 22 0.0909090909090909 +B.1.351.3 SampleC_freebayes_ann 0.102681930767192 0 1 22 0.0454545454545455 +B.1.617.2 SampleA_freebayes_ann 0.111111111111111 0 1 21 0.0476190476190476 +B.1.617.2 SampleB_freebayes_ann 0.111111111111111 0 1 21 0.0476190476190476 +B.1.617.2 SampleC_freebayes_ann 0.417401700987995 0 4 21 0.19047619047619 +P.1 SampleA_freebayes_ann 0.136363636363636 0 3 22 0.136363636363636 +P.1 SampleB_freebayes_ann 0.13570487483531 0 3 22 0.136363636363636 +P.1 SampleC_freebayes_ann 0.0648812102961349 0 1 22 0.0454545454545455 +P.1.1 SampleA_freebayes_ann 0.0952380952380952 0 2 21 0.0952380952380952 +P.1.1 SampleB_freebayes_ann 0.0952380952380952 0 2 21 0.0952380952380952 +P.1.1 SampleC_freebayes_ann 0.0648812102961349 0 1 21 0.0476190476190476 +P.1.2 SampleA_freebayes_ann 0.136363636363636 0 3 22 0.136363636363636 +P.1.2 SampleB_freebayes_ann 0.13570487483531 0 3 22 0.136363636363636 +P.1.2 SampleC_freebayes_ann 0.0648812102961349 0 1 22 0.0454545454545455 |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/pangolin_constellations.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pangolin_constellations.loc Tue Aug 08 15:12:08 2023 +0000 |
b |
@@ -0,0 +1,11 @@ +# this is a tab separated file describing the location of constellations databases used for the +# scorpio part of the pangolin SARS-CoV-2 lineage typing tool +# +# the columns are: +# value description min_scorpio_version date path +# +# min_scorpio_version is the minimum scorpio tool major version that is needed to read the constellations data +# +# for example +v0.1.12 constellations release v0.1.12 0.3.17 2023-06-28T09:48:05 ${__HERE__}/test-constellations + |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/test-constellations/definitions/cAY.4.2.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-constellations/definitions/cAY.4.2.json Tue Aug 08 15:12:08 2023 +0000 |
[ |
@@ -0,0 +1,30 @@ +{ + "label": "Delta (AY.4.2-like)", + "description": "AY.4.2 lineage defining mutations", + "sources": [ + ], + "type": "variant", + "variant": { + "Pango_lineages": [ + "AY.4.2" + ], + "mrca_lineage": "AY.4.2", + "lineage_name": "AY.4.2", + "parent_lineage": "AY.4", + "PHE_label": "V-21OCT-01", + "representative_genome": "" + }, + "tags": [ + "AY.4.2", + "V-21OCT-01" + ], + "sites": [ + "nuc:T17040C", + "spike:A222V", + "spike:Y145H" + ], + "rules": { + "min_alt": 2, + "max_ref": 0 + } +} |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/test-constellations/definitions/cAY.4.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-constellations/definitions/cAY.4.json Tue Aug 08 15:12:08 2023 +0000 |
[ |
@@ -0,0 +1,54 @@ +{ + "label": "Delta (AY.4-like)", + "description": "AY.4 lineage defining mutations", + "sources": [ + ], + "type": "variant", + "variant": { + "Pango_lineages": [ + "AY.4" + ], + "mrca_lineage": "AY.4", + "lineage_name": "AY.4", + "parent_lineage": "B.1.617.2", + "incompatible_lineage_calls": [ + "AY.4.2" + ], + "representative_genome": "" + }, + "tags": [ + "AY.4" + ], + "sites": [ + "del:28271:1", + "del:22029:6", + "del:28248:6", + "orf1a:A1306S", + "orf1a:P2046L", + "orf1a:P2287S", + "orf1a:A2529V", + "nuc:C8986T", + "orf1a:V2930L", + "orf1a:T3255I", + "orf1a:T3646A", + "nuc:A11332G", + "orf1b:P314L", + "orf1b:G662S", + "orf1b:P1000L", + "orf1b:A1918V", + "nuc:C27874T", + "n:G215C", + "nuc:G29742T" + ], + "intermediate": [ + "nuc:G210T:0.961862", + "nuc:C241T:0.972018", + "nuc:T17040C:0.468120", + "spike:T95I:0.967111" + ], + "rules": { + "min_alt": 12, + "max_ref": 3, + "orf1a:A2529V": "alt" + } +} |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/test-constellations/definitions/cB.1.351.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-constellations/definitions/cB.1.351.json Tue Aug 08 15:12:08 2023 +0000 |
[ |
@@ -0,0 +1,46 @@ +{ + "label": "Beta (B.1.351-like)", + "description": "Defining of lineage B.1.351", + "sources": [ + "https://www.medrxiv.org/content/10.1101/2020.12.21.20248640v1" + ], + "type": "variant", + "variant": { + "Pango_lineages": [ + "B.1.351" + ], + "mrca_lineage": "B.1.351", + "PHE_label": "VOC-20DEC-02", + "WHO_label": "Beta", + "representative_genome": "" + }, + "tags": [ + "B.1.351", + "VOC-20DEC-02", + "VOC202012/02", + "Beta", + "501.V2", + "20H", + "GH" + ], + "sites": [ + "NSP2:T85I", + "ORF1ab:K1655N", + "ORF1ab:K3353R", + "S:D80A", + "S:D215G", + "S:E484K", + "S:N501Y", + "S:A701V", + "ORF3a:Q57H", + "ORF3a:S171L", + "E:P71L", + "N:T205I", + "del:22280:9", + "del:11287:9" + ], + "rules": { + "min_alt": 6, + "max_ref": 3 + } +} |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/test-constellations/definitions/cB.1.617.2.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-constellations/definitions/cB.1.617.2.json Tue Aug 08 15:12:08 2023 +0000 |
[ |
@@ -0,0 +1,51 @@ +{ + "label": "Delta (B.1.617.2-like)", + "description": "Defining constellation for lineage B.1.617.2", + "sources": [ + "https://github.com/cov-lineages/pango-designation/issues/49" + ], + "type": "variant", + "variant": { + "Pango_lineages": [ + "B.1.617.2", + "AY.1", + "AY.2" + ], + "mrca_lineage": "B.1.617.2", + "lineage_name": "B.1.617.2", + "incompatible_lineage_calls": [ + "AY.1", + "AY.2", + "AY.4", + "AY.4.2" + ], + "PHE_label": "VOC-21APR-02", + "WHO_label": "Delta", + "representative_genome": "" + }, + "tags": [ + "B.1.617.2", + "VOC-21APR-02", + "Delta", + "VUI-21APR-02" + ], + "sites": [ + "S:T19R", + "S:G142D", + "S:L452R", + "S:T478K", + "S:P681R", + "S:D950N", + "ORF3a:S26L", + "M:I82T", + "ORF7a:V82A", + "ORF7a:T120I", + "N:D63G", + "N:R203M", + "N:D377Y" + ], + "rules": { + "min_alt": 5, + "max_ref": 3 + } +} |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/test-constellations/definitions/cP.1.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-constellations/definitions/cP.1.json Tue Aug 08 15:12:08 2023 +0000 |
[ |
@@ -0,0 +1,45 @@ +{ + "label": "Gamma (P.1-like)", + "description": "Defining constellation for lineage P.1", + "sources": [ + "https://virological.org/t/586", + "https://cov-lineages.org/global_report_P.1.html" + ], + "type": "variant", + "variant": { + "Pango_lineages": [ + "P.1" + ], + "mrca_lineage": "P.1", + "PHE_label": "VOC-21JAN-02", + "WHO_label": "Gamma", + "representative_genome": "" + }, + "tags": [ + "P.1", + "VOC-21JAN-02", + "VOC202101/02" + ], + "sites": [ + "nuc:T733C", + "nuc:C2749T", + "ORF1ab:S1188L", + "ORF1ab:K1795Q", + "del:11287:9", + "nuc:C12778T", + "nuc:C13860T", + "S:T20N", + "S:P26S", + "S:K417T", + "S:E484K", + "S:N501Y", + "S:T1027I", + "ORF8:E92K", + "nuc:28262+AACA", + "N:P80R" + ], + "rules": { + "min_alt": 8, + "max_ref": 3 + } +} |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 test-data/variants_table.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/variants_table.txt Tue Aug 08 15:12:08 2023 +0000 |
b |
b'@@ -0,0 +1,1829 @@\n+CHROM\tPOS\tID\tREF\tALT\tDP\tAD_ref\tAD_alt\tGene_Name\tNt_alt\tAA_alt\tAF\tcodon_num\tsample\r\n+NC_045512.2\t328\tNC_045512.2;328;ACA;ACCA\tACA\tACCA\t36\t34\t1\tORF1a\t64dupC\tQ22fs\t0.0277777777777778\t21\tSampleA_freebayes_ann\r\n+NC_045512.2\t355\tNC_045512.2;355;C;T\tC\tT\t42\t41\t1\tORF1a\t90C>T\tG30G\t0.0238095238095238\t30\tSampleA_freebayes_ann\r\n+NC_045512.2\t366\tNC_045512.2;366;C;T\tC\tT\t42\t28\t14\tORF1a\t101C>T\tS34F\t0.333333333333333\t34\tSampleA_freebayes_ann\r\n+NC_045512.2\t401\tNC_045512.2;401;CTTAA;CTAA\tCTTAA\tCTAA\t37\t35\t2\tORF1a\t138delT\tD48fs\t0.0540540540540541\t46\tSampleA_freebayes_ann\r\n+NC_045512.2\t406\tNC_045512.2;406;AGA;AA\tAGA\tAA\t35\t34\t1\tORF1a\t142delG\tD48fs\t0.0285714285714286\t47\tSampleA_freebayes_ann\r\n+NC_045512.2\t421\tNC_045512.2;421;C;A\tC\tA\t35\t34\t1\tORF1a\t156C>A\tG52G\t0.0285714285714286\t52\tSampleA_freebayes_ann\r\n+NC_045512.2\t442\tNC_045512.2;442;C;G\tC\tG\t64\t63\t1\tORF1a\t177C>G\tG59G\t0.015625\t59\tSampleA_freebayes_ann\r\n+NC_045512.2\t445\tNC_045512.2;445;T;C\tT\tC\t64\t33\t31\tORF1a\t180T>C\tV60V\t0.484375\t60\tSampleA_freebayes_ann\r\n+NC_045512.2\t460\tNC_045512.2;460;A;C\tA\tC\t64\t63\t1\tORF1a\t195A>C\tE65D\t0.015625\t65\tSampleA_freebayes_ann\r\n+NC_045512.2\t505\tNC_045512.2;505;T;A\tT\tA\t64\t63\t1\tORF1a\t240T>A\tP80P\t0.015625\t80\tSampleA_freebayes_ann\r\n+NC_045512.2\t541\tNC_045512.2;541;CGA;CA\tCGA\tCA\t64\t62\t2\tORF1a\t277delG\tE93fs\t0.03125\t92\tSampleA_freebayes_ann\r\n+NC_045512.2\t560\tNC_045512.2;560;C;T\tC\tT\t64\t62\t2\tORF1a\t295C>T\tR99C\t0.03125\t99\tSampleA_freebayes_ann\r\n+NC_045512.2\t577\tNC_045512.2;577;T;G\tT\tG\t64\t63\t1\tORF1a\t312T>G\tL104L\t0.015625\t104\tSampleA_freebayes_ann\r\n+NC_045512.2\t578\tNC_045512.2;578;G;A\tG\tA\t64\t63\t1\tORF1a\t313G>A\tG105S\t0.015625\t105\tSampleA_freebayes_ann\r\n+NC_045512.2\t583\tNC_045512.2;583;CC;TT\tCC\tTT\t64\t30\t6\tORF1a\t318_319delCCinsTT\tL107F\t0.09375\t106\tSampleA_freebayes_ann\r\n+NC_045512.2\t583\tNC_045512.2;583;CC;TC\tCC\tTC\t64\t30\t28\tORF1a\t318_319delCCinsTT\tL107F\t0.4375\t106\tSampleA_freebayes_ann\r\n+NC_045512.2\t593\tNC_045512.2;593;C;T\tC\tT\t64\t36\t28\tORF1a\t328C>T\tH110Y\t0.4375\t110\tSampleA_freebayes_ann\r\n+NC_045512.2\t643\tNC_045512.2;643;C;T\tC\tT\t39\t25\t14\tORF1a\t378C>T\tN126N\t0.358974358974359\t126\tSampleA_freebayes_ann\r\n+NC_045512.2\t657\tNC_045512.2;657;C;T\tC\tT\t50\t36\t14\tORF1a\t392C>T\tA131V\t0.28\t131\tSampleA_freebayes_ann\r\n+NC_045512.2\t663\tNC_045512.2;663;G;C\tG\tC\t52\t51\t1\tORF1a\t398G>C\tG133A\t0.0192307692307692\t133\tSampleA_freebayes_ann\r\n+NC_045512.2\t666\tNC_045512.2;666;A;T\tA\tT\t52\t51\t1\tORF1a\t401A>T\tH134L\t0.0192307692307692\t134\tSampleA_freebayes_ann\r\n+NC_045512.2\t682\tNC_045512.2;682;T;C\tT\tC\t52\t34\t18\tORF1a\t417T>C\tD139D\t0.346153846153846\t139\tSampleA_freebayes_ann\r\n+NC_045512.2\t721\tNC_045512.2;721;T;G\tT\tG\t48\t46\t2\tORF1a\t456T>G\tD152E\t0.0416666666666667\t152\tSampleA_freebayes_ann\r\n+NC_045512.2\t724\tNC_045512.2;724;T;G\tT\tG\t42\t41\t1\tORF1a\t459T>G\tP153P\t0.0238095238095238\t153\tSampleA_freebayes_ann\r\n+NC_045512.2\t725\tNC_045512.2;725;T;G\tT\tG\t38\t37\t1\tORF1a\t460T>G\tY154D\t0.0263157894736842\t154\tSampleA_freebayes_ann\r\n+NC_045512.2\t729\tNC_045512.2;729;A;G\tA\tG\t37\t36\t1\tORF1a\t464A>G\tE155G\t0.027027027027027\t155\tSampleA_freebayes_ann\r\n+NC_045512.2\t737\tNC_045512.2;737;C;A\tC\tA\t38\t37\t1\tORF1a\t472C>A\tQ158K\t0.0263157894736842\t158\tSampleA_freebayes_ann\r\n+NC_045512.2\t913\tNC_045512.2;913;C;T\tC\tT\t38\t12\t26\tORF1a\t648C>T\tS216S\t0.68421052631579\t216\tSampleA_freebayes_ann\r\n+NC_045512.2\t924\tNC_045512.2;924;A;T\tA\tT\t38\t37\t1\tORF1a\t659A>T\tD220V\t0.0263157894736842\t220\tSampleA_freebayes_ann\r\n+NC_045512.2\t929\tNC_045512.2;929;A;T\tA\tT\t38\t37\t1\tORF1a\t664A>T\tI222F\t0.0263157894736842\t222\tSampleA_freebayes_ann\r\n+NC_045512.2\t932\tNC_045512.2;932;G;T\tG\tT\t38\t37\t1\tORF1a\t667G>T\tD223Y\t0.0263157894736842\t223\tSampleA_freebayes_ann\r\n+NC_045512.2\t946\tNC_045512.2;946;T;G\tT\tG\t48\t47\t1\tORF1a\t681T>G\tG227G\t0.0208333333333333\t227\tSampleA_freebayes_ann\r\n+NC_045512.2\t949\tNC_045512.2;949;A;G\tA\tG\t53\t52\t1\tORF1a\t684A>G\tV228V\t0.0188679245283019\t228\tSampleA_freebayes_ann\r\n+NC_045512.2\t954\tNC_045512.2;954;G;T\tG\tT\t53\t52\t1\tORF1a\t689G>T\tC230F\t0.0188679245283019\t230\tSampleA_freebayes_ann\r\n+NC_045512.2\t968\tNC_045512.2;968;G;T\tG\tT\t51\t50\t1\tORF1a\t703G>T\tE235*\t0.0196078431372549\t235\tSampleA_fr'..b'\tSampleC_freebayes_ann\r\n+NC_045512.2\t27870\tNC_045512.2;27870;GAAAC;TAAAT\tGAAAC\tTAAAT\t1272\t136\t17\tORF7b\t115G>T\tE39*\t0.0133647798742138\t39\tSampleC_freebayes_ann\r\n+NC_045512.2\t27870\tNC_045512.2;27870;GAAAC;TAAAC\tGAAAC\tTAAAC\t1272\t136\t25\tORF7b\t115G>T\tE39*\t0.0196540880503145\t39\tSampleC_freebayes_ann\r\n+NC_045512.2\t27870\tNC_045512.2;27870;GAAAC;GAAAT\tGAAAC\tGAAAT\t1272\t136\t1088\tORF7b\t115G>T\tE39*\t0.855345911949686\t39\tSampleC_freebayes_ann\r\n+NC_045512.2\t27942\tNC_045512.2;27942;C;T\tC\tT\t1411\t1229\t182\tORF8\t49C>T\tH17Y\t0.128986534372785\t17\tSampleC_freebayes_ann\r\n+NC_045512.2\t27972\tNC_045512.2;27972;C;T\tC\tT\t1308\t1152\t150\tORF8\t79C>T\tQ27*\t0.114678899082569\t27\tSampleC_freebayes_ann\r\n+NC_045512.2\t27981\tNC_045512.2;27981;CCA;CTACA\tCCA\tCTACA\t1259\t1239\t15\tORF8\t88_89insTA\tP30fs\t0.0119142176330421\t30\tSampleC_freebayes_ann\r\n+NC_045512.2\t27987\tNC_045512.2;27987;GTAG;GG\tGTAG\tGG\t1259\t1238\t17\tORF8\t95_96delTA\tV32fs\t0.0135027799841144\t32\tSampleC_freebayes_ann\r\n+NC_045512.2\t28048\tNC_045512.2;28048;G;T\tG\tT\t1234\t1101\t133\tORF8\t155G>T\tR52I\t0.107779578606159\t52\tSampleC_freebayes_ann\r\n+NC_045512.2\t28095\tNC_045512.2;28095;A;T\tA\tT\t2258\t2163\t95\tORF8\t202A>T\tK68*\t0.0420726306465899\t68\tSampleC_freebayes_ann\r\n+NC_045512.2\t28111\tNC_045512.2;28111;A;G\tA\tG\t2841\t1357\t1482\tORF8\t218A>G\tY73C\t0.521647307286167\t73\tSampleC_freebayes_ann\r\n+NC_045512.2\t28242\tNC_045512.2;28242;GTTTTAGATTTCA;GTTTTAA\tGTTTTAGATTTCA\tGTTTTAA\t6286\t4147\t1587\tORF8\t353dupT\tL118fs\t0.252465797009227\t117\tSampleC_freebayes_ann\r\n+NC_045512.2\t28242\tNC_045512.2;28242;GTTTTAGATTTCA;GTTTTTAGATTTCA\tGTTTTAGATTTCA\tGTTTTTAGATTTCA\t6286\t4147\t509\tORF8\t353dupT\tL118fs\t0.0809735921094496\t117\tSampleC_freebayes_ann\r\n+NC_045512.2\t28280\tNC_045512.2;28280;GAT;CTA\tGAT\tCTA\t6314\t2875\t3423\tN\t7_9delGATinsCTA\tD3L\t0.542128603104213\t3\tSampleC_freebayes_ann\r\n+NC_045512.2\t28295\tNC_045512.2;28295;A;G\tA\tG\t7561\t6578\t980\tN\t22A>G\tN8D\t0.129612485121016\t8\tSampleC_freebayes_ann\r\n+NC_045512.2\t28461\tNC_045512.2;28461;A;G\tA\tG\t2180\t1392\t787\tN\t188A>G\tD63G\t0.361009174311927\t63\tSampleC_freebayes_ann\r\n+NC_045512.2\t28567\tNC_045512.2;28567;C;T\tC\tT\t2372\t2344\t26\tN\t294C>T\tD98D\t0.0109612141652614\t98\tSampleC_freebayes_ann\r\n+NC_045512.2\t28579\tNC_045512.2;28579;A;T\tA\tT\t2340\t2254\t86\tN\t306A>T\tK102N\t0.0367521367521368\t102\tSampleC_freebayes_ann\r\n+NC_045512.2\t28598\tNC_045512.2;28598;T;C\tT\tC\t2328\t2299\t27\tN\t325T>C\tY109H\t0.0115979381443299\t109\tSampleC_freebayes_ann\r\n+NC_045512.2\t28881\tNC_045512.2;28881;GGGG;AACC\tGGGG\tAACC\t4147\t1\t414\tN\t608G>T\tR203M\t0.0998312032794791\t203\tSampleC_freebayes_ann\r\n+NC_045512.2\t28881\tNC_045512.2;28881;GGGG;AACG\tGGGG\tAACG\t4147\t1\t1062\tN\t608G>T\tR203M\t0.25608873884736\t203\tSampleC_freebayes_ann\r\n+NC_045512.2\t28881\tNC_045512.2;28881;GGGG;TGGG\tGGGG\tTGGG\t4147\t1\t2649\tN\t608G>T\tR203M\t0.638775018085363\t203\tSampleC_freebayes_ann\r\n+NC_045512.2\t28915\tNC_045512.2;28915;CG;TT\tCG\tTT\t4477\t1892\t125\tN\t643G>T\tG215C\t0.027920482465937\t214\tSampleC_freebayes_ann\r\n+NC_045512.2\t28915\tNC_045512.2;28915;CG;CT\tCG\tCT\t4477\t1892\t2449\tN\t643G>T\tG215C\t0.547018092472638\t214\tSampleC_freebayes_ann\r\n+NC_045512.2\t28972\tNC_045512.2;28972;A;G\tA\tG\t4047\t3939\t108\tN\t699A>G\tK233K\t0.0266864343958488\t233\tSampleC_freebayes_ann\r\n+NC_045512.2\t28977\tNC_045512.2;28977;C;T\tC\tT\t3909\t2408\t1501\tN\t704C>T\tS235F\t0.383985674085444\t235\tSampleC_freebayes_ann\r\n+NC_045512.2\t29000\tNC_045512.2;29000;G;T\tG\tT\t3760\t3028\t732\tN\t727G>T\tG243C\t0.19468085106383\t243\tSampleC_freebayes_ann\r\n+NC_045512.2\t29029\tNC_045512.2;29029;T;C\tT\tC\t3572\t3497\t74\tN\t756T>C\tA252A\t0.0207166853303471\t252\tSampleC_freebayes_ann\r\n+NC_045512.2\t29039\tNC_045512.2;29039;A;T\tA\tT\t3516\t3367\t149\tN\t766A>T\tK256*\t0.0423777019340159\t256\tSampleC_freebayes_ann\r\n+NC_045512.2\t29049\tNC_045512.2;29049;G;A\tG\tA\t3347\t3243\t101\tN\t776G>A\tR259Q\t0.0301762772632208\t259\tSampleC_freebayes_ann\r\n+NC_045512.2\t29067\tNC_045512.2;29067;CTAAAGCATACAATGT;CT\tCTAAAGCATACAATGT\tCT\t2118\t1911\t187\tN\t798_811delAGCATACAATGTAA\tK266fs\t0.0882908404154863\t265\tSampleC_freebayes_ann\r\n+NC_045512.2\t29402\tNC_045512.2;29402;G;T\tG\tT\t4271\t2642\t1623\tN\t1129G>T\tD377Y\t0.380004682744088\t377\tSampleC_freebayes_ann\r\n' |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 tool-data/pangolin_constellations.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/pangolin_constellations.loc.sample Tue Aug 08 15:12:08 2023 +0000 |
b |
@@ -0,0 +1,10 @@ +# this is a tab separated file describing the location of constellations databases used for the +# scorpio part of the pangolin SARS-CoV-2 lineage typing tool +# +# the columns are: +# value description min_scorpio_version date path +# +# min_scorpio_version is the minimum scorpio tool major version that is needed to read the constellations data +# +# for example +#v0.1.10 constellations release v0.1.10 0 2022-05-05T13:14:56 /srv/galaxy/tool-data/pangolin_constellations/v0.1.10 |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Aug 08 15:12:08 2023 +0000 |
b |
@@ -0,0 +1,7 @@ +<tables> + <table name="pangolin_constellations" comment_char="#" allow_duplicate_entries="False"> + <!-- min_scorpio_version is the minimum scorpio tool major version that is needed to read the constellations data --> + <columns>value, description, min_scorpio_version, date, path</columns> + <file path="tool-data/pangolin_constellations.loc" /> + </table> +</tables> |
b |
diff -r 000000000000 -r 6ddf5a9ce4a5 tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Tue Aug 08 15:12:08 2023 +0000 |
b |
@@ -0,0 +1,7 @@ +<tables> + <table name="pangolin_constellations" comment_char="#" allow_duplicate_entries="False"> + <!-- min_scorpio_version is the minimum scorpio tool major version that is needed to read the constellations data --> + <columns>value, description, min_scorpio_version, date, path</columns> + <file path="${__HERE__}/test-data/pangolin_constellations.loc" /> + </table> +</tables> |