Repository 'sansa_annotate'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/sansa_annotate

Changeset 0:fb00426402b0 (2021-01-25)
Next changeset 1:e3862632706d (2021-04-16)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sansa commit b9066cb246cbae9a24020a79cf53d2dd4462b77d"
added:
annotate.xml
macros.xml
test-data/db.gtf
test-data/db.vcf
test-data/input.vcf
b
diff -r 000000000000 -r fb00426402b0 annotate.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/annotate.xml Mon Jan 25 17:50:47 2021 +0000
[
b'@@ -0,0 +1,297 @@\n+<?xml version="1.0"?>\n+<tool id="sansa_annotate" name="sansa annotate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="18.01">\n+    <description>structural variants</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro="requirements"/>\n+    <expand macro="version_command"/>\n+    <command detect_errors="exit_code"><![CDATA[\n+## initialize\n+## file format gtf.gz is required\n+#if $type_cond.type_sel == \'fg\' or $type_cond.type_sel == \'sfg\'\n+    gzip -c \'$type_cond.fg.gtf\' > \'db.gtf.gz\' &&\n+#end if\n+\n+## run\n+sansa annotate\n+## sv annotation options\n+#if $type_cond.type_sel == \'s\' or $type_cond.type_sel == \'sfg\'\n+    --db \'$type_cond.s.db\'\n+    --bpoffset $type_cond.s.bpoffset\n+    --ratio $type_cond.s.ratio\n+    --strategy \'$type_cond.s.strategy\'\n+    $type_cond.s.notype\n+    $type_cond.s.nomatch\n+#end if\n+## feature and gene annotation options\n+#if $type_cond.type_sel == \'fg\' or $type_cond.type_sel == \'sfg\'\n+    --gtf \'db.gtf.gz\'\n+    --id \'$type_cond.fg.id\'\n+    --feature \'$type_cond.fg.feature\'\n+    --distance $type_cond.fg.distance\n+#end if\n+## input\n+\'$input\'\n+\n+## postprocessing\n+2>&1 | tee -a \'$out_log\'\n+    ]]></command>\n+    <inputs>\n+        <param name="input" type="data" format="vcf" label="Select input file"/>\n+        <conditional name="type_cond">\n+            <param name="type_sel" type="select" label="Select annotation mode(s)">\n+                <option value="s" selected="true">SV</option>\n+                <option value="fg">Feature/Gene</option>\n+                <option value="sfg">SV and Feature/Gene</option>\n+            </param>\n+            <when value="s">\n+                <expand macro="s"/>\n+            </when>\n+            <when value="fg">\n+                <expand macro="fg"/>\n+            </when>\n+            <when value="sfg">\n+                <expand macro="s"/>\n+                <expand macro="fg"/>\n+            </when>\n+        </conditional>\n+        <section name="oo" title="Output options" expanded="true">\n+            <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)">\n+                <option value="anno" selected="true">Annotation</option>\n+                <option value="log">Log</option>\n+                <option value="query">Query SV</option>\n+            </param>\n+        </section>\n+    </inputs>\n+    <outputs>\n+        <data name="out_anno" format="bcf" from_work_dir="anno.bcf" label="${tool.name} on ${on_string}: Annotation">\n+            <filter>\'anno\' in oo[\'out\']</filter>\n+        </data>\n+        <data name="out_query" format="tabular.gz" from_work_dir="query.tsv.gz" label="${tool.name} on ${on_string}: Query SV">\n+            <filter>\'query\' in oo[\'out\']</filter>\n+        </data>\n+        <data name="out_log" format="txt" label="${tool.name} on ${on_string}: Log">\n+            <filter>\'log\' in oo[\'out\']</filter>\n+        </data>\n+    </outputs>\n+    <tests>\n+        <!-- #1 s, default -->\n+        <test expect_num_outputs="3">\n+            <param name="input" value="input.vcf"/>\n+            <conditional name="type_cond">\n+                <param name="type_sel" value="s"/>\n+                <section name="s">\n+                    <param name="db" value="db.vcf"/>\n+                </section>\n+            </conditional>\n+            <section name="oo">\n+                <param name="out" value="anno,log,query"/>\n+            </section>\n+            <output name="out_anno">\n+                <assert_contents>\n+                    <has_size value="57072"/>\n+                </assert_contents>\n+            </output>\n+            <output name="out_query">\n+                <assert_contents>\n+                    <has_size value="91"/>\n+                </assert_contents>\n+            </output>\n+            <output name="out_log">\n+                <assert_contents>\n+                    <has_text_matching expression=".+Done.+"/>\n+                </assert_contents>\n+            </ou'..b'ery">\n+                <assert_contents>\n+                    <has_size value="91"/>\n+                </assert_contents>\n+            </output>\n+            <output name="out_log">\n+                <assert_contents>\n+                    <has_text_matching expression=".+Done.+"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <!-- #5 sfg, default -->\n+        <test expect_num_outputs="3">\n+            <param name="input" value="input.vcf"/>\n+            <conditional name="type_cond">\n+                <param name="type_sel" value="sfg"/>\n+                <section name="s">\n+                    <param name="db" value="db.vcf"/>\n+                </section>\n+                <section name="fg">\n+                    <param name="gtf" value="db.gtf"/>\n+                </section>\n+            </conditional>\n+            <section name="oo">\n+                <param name="out" value="anno,log,query"/>\n+            </section>\n+            <output name="out_anno">\n+                <assert_contents>\n+                    <has_size value="57072"/>\n+                </assert_contents>\n+            </output>\n+            <output name="out_query">\n+                <assert_contents>\n+                    <has_size value="91"/>\n+                </assert_contents>\n+            </output>\n+            <output name="out_log">\n+                <assert_contents>\n+                    <has_text_matching expression=".+Done.+"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <!-- #5 sfg, custom -->\n+        <test expect_num_outputs="3">\n+            <param name="input" value="input.vcf"/>\n+            <conditional name="type_cond">\n+                <param name="type_sel" value="sfg"/>\n+                <section name="s">\n+                    <param name="db" value="db.vcf"/>\n+                    <param name="bpoffset" value="51"/>\n+                    <param name="ratio" value="0.79"/>\n+                    <param name="strategy" value="all"/>\n+                    <param name="notype" value="true"/>\n+                    <param name="nomatch" value="true"/>\n+                </section>\n+                <section name="fg">\n+                    <param name="gtf" value="db.gtf"/>\n+                    <param name="id" value="gene_id"/>\n+                    <param name="feature" value="exon"/>\n+                    <param name="distance" value="1001"/>\n+                </section>\n+            </conditional>\n+            <section name="oo">\n+                <param name="out" value="anno,log,query"/>\n+            </section>\n+            <output name="out_anno">\n+                <assert_contents>\n+                    <has_size value="57072"/>\n+                </assert_contents>\n+            </output>\n+            <output name="out_query">\n+                <assert_contents>\n+                    <has_size value="91"/>\n+                </assert_contents>\n+            </output>\n+            <output name="out_log">\n+                <assert_contents>\n+                    <has_text_matching expression=".+Done.+"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+.. class:: infomark\n+\n+**What it does**\n+\n+Sansa *annotate* is a structural variant (SV) annotation tool.\n+\n+**Input**\n+\n+- sample (VCF)\n+- gene annotation file (GTF/GFF2/GFF3)\n+- annotation database, e.g. `gnomAD-SV <https://gnomad.broadinstitute.org/>`_ or `1000 Genomes phase 3 <https://www.internationalgenome.org/phase-3-structural-variant-dataset>`_\n+\n+**Output**\n+\n+- anno (BCF) with annotation SVs augmented by a unique ID (INFO/ANNOID) \n+- query (tabular.gz) with query SVs matched to annotation IDs\n+\n+.. class:: infomark\n+\n+**References**\n+\n+More information are available on `GitHub <https://github.com/dellytools/sansa>`_.\n+    ]]></help>\n+    <citations>\n+        <citation type="doi">10.1093/bioinformatics/bts378</citation>\n+    </citations>\n+</tool>\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r fb00426402b0 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Jan 25 17:50:47 2021 +0000
[
@@ -0,0 +1,50 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">0.0.7</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">sansa</requirement>
+        </requirements>
+    </xml>
+    <xml name="version_command">
+        <version_command><![CDATA[sansa -v | grep "Sansa " | cut -d "v" -f 3]]></version_command>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1093/bioinformatics/bts378</citation>
+        </citations>
+    </xml>
+
+    <!-- input -->
+
+    <xml name="fg">
+        <section name="fg" title="Feature and gene annotation options" expanded="true">
+            <param argument="--gtf" type="data" format="gtf,gff3" label="Select file"/>
+            <param argument="--id" type="text" value="gene_name" label="Set GTF/GFF3 attribute">
+                <option value="exon_id">exon_id</option>
+                <option value="gene_id">gene_id</option>
+                <option value="gene_name">gene_name</option>
+                <option value="Name">Name</option>
+            </param>
+            <param argument="--feature" type="text" value="gene" label="Set GTF/GFF3 feature">
+                <option value="gene">gene</option>
+                <option value="exon">exon</option>
+            </param>
+            <param argument="--distance" type="integer" min="0" value="1000" label="Set maximum distance" help="0: overlapping features only"/>
+        </section>
+    </xml>
+    <xml name="s">
+        <section name="s" title="SV annotation options" expanded="true">
+            <param argument="--db" type="data" format="vcf,bcf" label="Select database file"/>
+            <param argument="--bpoffset" type="integer" value="50" label="Set maximum breakpoint offset"/>
+            <param argument="--ratio" type="float" min="0.0" max="1.0" value="0.8" label="Set minimum size ratio smaller SV to larger SV"/>
+            <param argument="--strategy" type="select" label="Select matching strategy">
+                <option value="best" selected="true">Best</option>
+                <option value="all">All</option>
+            </param>
+            <param argument="--notype" type="boolean" truevalue="-n" falsevalue="" label="Require matching SV types?"/>
+            <param argument="--nomatch" type="boolean" truevalue="-m" falsevalue="" label="Report SVs without match in database?"/>
+        </section>
+    </xml>
+</macros>
b
diff -r 000000000000 -r fb00426402b0 test-data/db.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/db.gtf Mon Jan 25 17:50:47 2021 +0000
b
b'@@ -0,0 +1,1000 @@\n+#!genome-build GRCh37.p13\n+#!genome-version GRCh37\n+#!genome-date 2009-02\n+#!genome-build-accession NCBI:GCA_000001405.14\n+#!genebuild-last-updated 2013-09\n+1\tensembl_havana\tgene\t11869\t14412\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "4"; gene_name "DDX11L1"; gene_source "ensembl_havana"; gene_biotype "pseudogene";\n+1\thavana\ttranscript\t11869\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "4"; transcript_id "ENST00000456328"; transcript_version "2"; gene_name "DDX11L1"; gene_source "ensembl_havana"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana"; transcript_biotype "processed_transcript"; havana_transcript "OTTHUMT00000362751"; havana_transcript_version "1"; tag "basic";\n+1\thavana\texon\t11869\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "4"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "ensembl_havana"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana"; transcript_biotype "processed_transcript"; havana_transcript "OTTHUMT00000362751"; havana_transcript_version "1"; exon_id "ENSE00002234944"; exon_version "1"; tag "basic";\n+1\thavana\texon\t12613\t12721\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "4"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "ensembl_havana"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana"; transcript_biotype "processed_transcript"; havana_transcript "OTTHUMT00000362751"; havana_transcript_version "1"; exon_id "ENSE00003582793"; exon_version "1"; tag "basic";\n+1\thavana\texon\t13221\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "4"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "ensembl_havana"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana"; transcript_biotype "processed_transcript"; havana_transcript "OTTHUMT00000362751"; havana_transcript_version "1"; exon_id "ENSE00002312635"; exon_version "1"; tag "basic";\n+1\tensembl\ttranscript\t11872\t14412\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "4"; transcript_id "ENST00000515242"; transcript_version "2"; gene_name "DDX11L1"; gene_source "ensembl_havana"; gene_biotype "pseudogene"; transcript_name "DDX11L1-201"; transcript_source "ensembl"; transcript_biotype "transcribed_unprocessed_pseudogene";\n+1\tensembl\texon\t11872\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "4"; transcript_id "ENST00000515242"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "ensembl_havana"; gene_biotype "pseudogene"; transcript_name "DDX11L1-201"; transcript_source "ensembl"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00002234632"; exon_version "1";\n+1\tensembl\texon\t12613\t12721\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "4"; transcript_id "ENST00000515242"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "ensembl_havana"; gene_biotype "pseudogene"; transcript_name "DDX11L1-201"; transcript_source "ensembl"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00003608237"; exon_version "1";\n+1\tensembl\texon\t13225\t14412\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "4"; transcript_id "ENST00000515242"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "ensembl_havana"; gene_biotype "pseudogene"; transcript_name "DDX11L1-201"; transcript_source "ensembl"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00002306041"; exon_version "1";\n+1\tensembl\ttranscript\t11874\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "4"; transcript_id "ENST00000518655"; transcript_version "2"; gene_name "DDX11L1"; gene_source "ensembl_havana"; gene_biotype "pseudogene"; transcript_name "DDX11L1-202"; transcript_source "ensembl"; transcript_biotype "transcribed_unprocessed_pseudogene";\n+1\tensembl\texon\t11'..b' "ENST00000379407"; transcript_version "3"; exon_number "3"; gene_name "PLEKHN1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PLEKHN1-004"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS53256"; havana_transcript "OTTHUMT00000473255"; havana_transcript_version "1"; exon_id "ENSE00001373621"; exon_version "1"; tag "basic";\n+1\tensembl_havana\tCDS\t905657\t905803\t.\t+\t0\tgene_id "ENSG00000187583"; gene_version "6"; transcript_id "ENST00000379407"; transcript_version "3"; exon_number "3"; gene_name "PLEKHN1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PLEKHN1-004"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS53256"; havana_transcript "OTTHUMT00000473255"; havana_transcript_version "1"; protein_id "ENSP00000368717"; protein_version "2"; tag "basic";\n+1\tensembl_havana\texon\t905901\t905981\t.\t+\t.\tgene_id "ENSG00000187583"; gene_version "6"; transcript_id "ENST00000379407"; transcript_version "3"; exon_number "4"; gene_name "PLEKHN1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PLEKHN1-004"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS53256"; havana_transcript "OTTHUMT00000473255"; havana_transcript_version "1"; exon_id "ENSE00001374286"; exon_version "1"; tag "basic";\n+1\tensembl_havana\tCDS\t905901\t905981\t.\t+\t0\tgene_id "ENSG00000187583"; gene_version "6"; transcript_id "ENST00000379407"; transcript_version "3"; exon_number "4"; gene_name "PLEKHN1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PLEKHN1-004"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS53256"; havana_transcript "OTTHUMT00000473255"; havana_transcript_version "1"; protein_id "ENSP00000368717"; protein_version "2"; tag "basic";\n+1\tensembl_havana\texon\t906066\t906138\t.\t+\t.\tgene_id "ENSG00000187583"; gene_version "6"; transcript_id "ENST00000379407"; transcript_version "3"; exon_number "5"; gene_name "PLEKHN1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PLEKHN1-004"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS53256"; havana_transcript "OTTHUMT00000473255"; havana_transcript_version "1"; exon_id "ENSE00001385460"; exon_version "1"; tag "basic";\n+1\tensembl_havana\tCDS\t906066\t906138\t.\t+\t0\tgene_id "ENSG00000187583"; gene_version "6"; transcript_id "ENST00000379407"; transcript_version "3"; exon_number "5"; gene_name "PLEKHN1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PLEKHN1-004"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS53256"; havana_transcript "OTTHUMT00000473255"; havana_transcript_version "1"; protein_id "ENSP00000368717"; protein_version "2"; tag "basic";\n+1\tensembl_havana\texon\t906259\t906386\t.\t+\t.\tgene_id "ENSG00000187583"; gene_version "6"; transcript_id "ENST00000379407"; transcript_version "3"; exon_number "6"; gene_name "PLEKHN1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PLEKHN1-004"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS53256"; havana_transcript "OTTHUMT00000473255"; havana_transcript_version "1"; exon_id "ENSE00001374990"; exon_version "1"; tag "basic";\n+1\tensembl_havana\tCDS\t906259\t906386\t.\t+\t2\tgene_id "ENSG00000187583"; gene_version "6"; transcript_id "ENST00000379407"; transcript_version "3"; exon_number "6"; gene_name "PLEKHN1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PLEKHN1-004"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS53256"; havana_transcript "OTTHUMT00000473255"; havana_transcript_version "1"; protein_id "ENSP00000368717"; protein_version "2"; tag "basic";\n'
b
diff -r 000000000000 -r fb00426402b0 test-data/db.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/db.vcf Mon Jan 25 17:50:47 2021 +0000
b
b'@@ -0,0 +1,400 @@\n+##fileformat=VCFv4.2\n+##FILTER=<ID=PASS,Description="All filters passed">\n+##ALT=<ID=BND,Description="Unresolved non-reference breakpoint junction">\n+##ALT=<ID=CPX,Description="Complex SV">\n+##ALT=<ID=CTX,Description="Reciprocal translocation">\n+##ALT=<ID=DEL,Description="Deletion">\n+##ALT=<ID=DUP,Description="Duplication">\n+##ALT=<ID=INS,Description="Insertion">\n+##ALT=<ID=INS:ME,Description="Mobile element insertion of unspecified ME class">\n+##ALT=<ID=INS:ME:ALU,Description="Alu element insertion">\n+##ALT=<ID=INS:ME:LINE1,Description="LINE1 element insertion">\n+##ALT=<ID=INS:ME:SVA,Description="SVA element insertion">\n+##ALT=<ID=INS:UNK,Description="Sequence insertion of unspecified origin">\n+##ALT=<ID=INV,Description="Inversion">\n+##CPX_TYPE_CCR="Complex chromosomal rearrangement, involving two or more chromosomes and multiple SV signatures."\n+##CPX_TYPE_INS_iDEL="Insertion with deletion at insertion site."\n+##CPX_TYPE_INVdel="Complex inversion with 3\' flanking deletion."\n+##CPX_TYPE_INVdup="Complex inversion with 3\' flanking duplication."\n+##CPX_TYPE_dDUP="Dispersed duplication."\n+##CPX_TYPE_dDUP_iDEL="Dispersed duplication with deletion at insertion site."\n+##CPX_TYPE_delINVdel="Complex inversion with 5\' and 3\' flanking deletions."\n+##CPX_TYPE_delINVdup="Complex inversion with 5\' flanking deletion and 3\' flanking duplication."\n+##CPX_TYPE_delINV="Complex inversion with 5\' flanking deletion."\n+##CPX_TYPE_dupINVdel="Complex inversion with 5\' flanking duplication and 3\' flanking deletion."\n+##CPX_TYPE_dupINVdup="Complex inversion with 5\' and 3\' flanking duplications."\n+##CPX_TYPE_dupINV="Complex inversion with 5\' flanking duplication."\n+##CPX_TYPE_piDUP_FR="Palindromic inverted tandem duplication, forward-reverse orientation."\n+##CPX_TYPE_piDUP_RF="Palindromic inverted tandem duplication, reverse-forward orientation."\n+##FILTER=<ID=LOW_CALL_RATE,Description="Site does not meet minimum requirements for fraction of PCR- samples with non-null genotypes. Flags sites more prone to false discoveries.">\n+##FILTER=<ID=MULTIALLELIC,Description="Multiallelic site">\n+##FILTER=<ID=PCRPLUS_ENRICHED,Description="Site enriched for non-reference genotypes among PCR+ samples. Likely reflects technical batch effects. All PCR- samples have been assigned null GTs for these sites.>">\n+##FILTER=<ID=UNRESOLVED,Description="Variant is unresolved">\n+##FILTER=<ID=UNSTABLE_AF_PCRMINUS,Description="Allele frequency for this variant in PCR- samples is sensitive to choice of GQ filtering thresholds. All PCR- samples have been assigned null GTs for these sites.>">\n+##FORMAT=<ID=EV,Number=1,Type=String,Description="Classes of evidence supporting final genotype">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PE_GQ,Number=1,Type=Integer,Description="Paired-end genotype quality">\n+##FORMAT=<ID=PE_GT,Number=1,Type=Integer,Description="Paired-end genotype">\n+##FORMAT=<ID=RD_CN,Number=1,Type=Integer,Description="Predicted copy state">\n+##FORMAT=<ID=RD_GQ,Number=1,Type=Integer,Description="Read-depth genotype quality">\n+##FORMAT=<ID=SR_GQ,Number=1,Type=Integer,Description="Split read genotype quality">\n+##FORMAT=<ID=SR_GT,Number=1,Type=Integer,Description="Split-read genotype">\n+##INFO=<ID=ALGORITHMS,Number=.,Type=String,Description="Source algorithms">\n+##INFO=<ID=BOTHSIDES_SUPPORT,Number=0,Type=Flag,Description="Variant has read-level support for both sides of breakpoint. Indicates higher-confidence variants.">\n+##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome of second breakpoint position.">\n+##INFO=<ID=CPX_INTERVALS,Number=.,Type=String,Description="Genomic intervals constituting complex variant.">\n+##INFO=<ID=CPX_TYPE,Number=1,Type=String,Description="Class of complex variant.">\n+##INFO=<ID=END2,Number=1,Type=Integer,Description="End coordinate of second breakpoint position.">\n+##INFO=<ID=END,Number=1,Type=Integer,Description='..b'EMALE_N_HOMREF=57;OTH_FEMALE_N_HET=0;OTH_FEMALE_N_HOMALT=0;OTH_FEMALE_FREQ_HOMREF=1;OTH_FEMALE_FREQ_HET=0;OTH_FEMALE_FREQ_HOMALT=0;POPMAX_AF=0.000526\n+1\t754260\tgnomAD-SV_v2.1_DUP_1_35\tN\t<DUP>\t999\tPASS\tEND=857641;SVTYPE=DUP;SVLEN=103381;ALGORITHMS=delly,depth;BOTHSIDES_SUPPORT;EVIDENCE=BAF,PE,RD,SR;PROTEIN_CODING__COPY_GAIN=AL645608.2;AN=21686;AC=99;AF=0.004565;N_BI_GENOS=10843;N_HOMREF=10746;N_HET=95;N_HOMALT=2;FREQ_HOMREF=0.991054;FREQ_HET=0.00876141;FREQ_HOMALT=0.000184451;MALE_AN=11050;MALE_AC=52;MALE_AF=0.004706;MALE_N_BI_GENOS=5525;MALE_N_HOMREF=5474;MALE_N_HET=50;MALE_N_HOMALT=1;MALE_FREQ_HOMREF=0.990769;MALE_FREQ_HET=0.00904977;MALE_FREQ_HOMALT=0.000180995;FEMALE_AN=10590;FEMALE_AC=47;FEMALE_AF=0.004438;FEMALE_N_BI_GENOS=5295;FEMALE_N_HOMREF=5249;FEMALE_N_HET=45;FEMALE_N_HOMALT=1;FEMALE_FREQ_HOMREF=0.991313;FEMALE_FREQ_HET=0.00849858;FEMALE_FREQ_HOMALT=0.000188857;AFR_AN=9530;AFR_AC=38;AFR_AF=0.003987;AFR_N_BI_GENOS=4765;AFR_N_HOMREF=4727;AFR_N_HET=38;AFR_N_HOMALT=0;AFR_FREQ_HOMREF=0.992025;AFR_FREQ_HET=0.00797482;AFR_FREQ_HOMALT=0;AFR_MALE_AN=5226;AFR_MALE_AC=21;AFR_MALE_AF=0.004018;AFR_MALE_N_BI_GENOS=2613;AFR_MALE_N_HOMREF=2592;AFR_MALE_N_HET=21;AFR_MALE_N_HOMALT=0;AFR_MALE_FREQ_HOMREF=0.991963;AFR_MALE_FREQ_HET=0.00803674;AFR_MALE_FREQ_HOMALT=0;AFR_FEMALE_AN=4292;AFR_FEMALE_AC=17;AFR_FEMALE_AF=0.003961;AFR_FEMALE_N_BI_GENOS=2146;AFR_FEMALE_N_HOMREF=2129;AFR_FEMALE_N_HET=17;AFR_FEMALE_N_HOMALT=0;AFR_FEMALE_FREQ_HOMREF=0.992078;AFR_FEMALE_FREQ_HET=0.00792172;AFR_FEMALE_FREQ_HOMALT=0;AMR_AN=1926;AMR_AC=8;AMR_AF=0.004154;AMR_N_BI_GENOS=963;AMR_N_HOMREF=955;AMR_N_HET=8;AMR_N_HOMALT=0;AMR_FREQ_HOMREF=0.991693;AMR_FREQ_HET=0.00830737;AMR_FREQ_HOMALT=0;AMR_MALE_AN=958;AMR_MALE_AC=3;AMR_MALE_AF=0.003132;AMR_MALE_N_BI_GENOS=479;AMR_MALE_N_HOMREF=476;AMR_MALE_N_HET=3;AMR_MALE_N_HOMALT=0;AMR_MALE_FREQ_HOMREF=0.993737;AMR_MALE_FREQ_HET=0.00626305;AMR_MALE_FREQ_HOMALT=0;AMR_FEMALE_AN=960;AMR_FEMALE_AC=5;AMR_FEMALE_AF=0.005208;AMR_FEMALE_N_BI_GENOS=480;AMR_FEMALE_N_HOMREF=475;AMR_FEMALE_N_HET=5;AMR_FEMALE_N_HOMALT=0;AMR_FEMALE_FREQ_HOMREF=0.989583;AMR_FEMALE_FREQ_HET=0.0104167;AMR_FEMALE_FREQ_HOMALT=0;EAS_AN=2416;EAS_AC=1;EAS_AF=0.000414;EAS_N_BI_GENOS=1208;EAS_N_HOMREF=1207;EAS_N_HET=1;EAS_N_HOMALT=0;EAS_FREQ_HOMREF=0.999172;EAS_FREQ_HET=0.000827815;EAS_FREQ_HOMALT=0;EAS_MALE_AN=1388;EAS_MALE_AC=1;EAS_MALE_AF=0.00072;EAS_MALE_N_BI_GENOS=694;EAS_MALE_N_HOMREF=693;EAS_MALE_N_HET=1;EAS_MALE_N_HOMALT=0;EAS_MALE_FREQ_HOMREF=0.998559;EAS_MALE_FREQ_HET=0.00144092;EAS_MALE_FREQ_HOMALT=0;EAS_FEMALE_AN=1020;EAS_FEMALE_AC=0;EAS_FEMALE_AF=0;EAS_FEMALE_N_BI_GENOS=510;EAS_FEMALE_N_HOMREF=510;EAS_FEMALE_N_HET=0;EAS_FEMALE_N_HOMALT=0;EAS_FEMALE_FREQ_HOMREF=1;EAS_FEMALE_FREQ_HET=0;EAS_FEMALE_FREQ_HOMALT=0;EUR_AN=7624;EUR_AC=51;EUR_AF=0.006689;EUR_N_BI_GENOS=3812;EUR_N_HOMREF=3763;EUR_N_HET=47;EUR_N_HOMALT=2;EUR_FREQ_HOMREF=0.987146;EUR_FREQ_HET=0.0123295;EUR_FREQ_HOMALT=0.000524659;EUR_MALE_AN=3402;EUR_MALE_AC=26;EUR_MALE_AF=0.007643;EUR_MALE_N_BI_GENOS=1701;EUR_MALE_N_HOMREF=1676;EUR_MALE_N_HET=24;EUR_MALE_N_HOMALT=1;EUR_MALE_FREQ_HOMREF=0.985303;EUR_MALE_FREQ_HET=0.0141093;EUR_MALE_FREQ_HOMALT=0.000587889;EUR_FEMALE_AN=4204;EUR_FEMALE_AC=25;EUR_FEMALE_AF=0.005947;EUR_FEMALE_N_BI_GENOS=2102;EUR_FEMALE_N_HOMREF=2078;EUR_FEMALE_N_HET=23;EUR_FEMALE_N_HOMALT=1;EUR_FEMALE_FREQ_HOMREF=0.988582;EUR_FEMALE_FREQ_HET=0.010942;EUR_FEMALE_FREQ_HOMALT=0.000475737;OTH_AN=190;OTH_AC=1;OTH_AF=0.005263;OTH_N_BI_GENOS=95;OTH_N_HOMREF=94;OTH_N_HET=1;OTH_N_HOMALT=0;OTH_FREQ_HOMREF=0.989474;OTH_FREQ_HET=0.0105263;OTH_FREQ_HOMALT=0;OTH_MALE_AN=76;OTH_MALE_AC=1;OTH_MALE_AF=0.013158;OTH_MALE_N_BI_GENOS=38;OTH_MALE_N_HOMREF=37;OTH_MALE_N_HET=1;OTH_MALE_N_HOMALT=0;OTH_MALE_FREQ_HOMREF=0.973684;OTH_MALE_FREQ_HET=0.0263158;OTH_MALE_FREQ_HOMALT=0;OTH_FEMALE_AN=114;OTH_FEMALE_AC=0;OTH_FEMALE_AF=0;OTH_FEMALE_N_BI_GENOS=57;OTH_FEMALE_N_HOMREF=57;OTH_FEMALE_N_HET=0;OTH_FEMALE_N_HOMALT=0;OTH_FEMALE_FREQ_HOMREF=1;OTH_FEMALE_FREQ_HET=0;OTH_FEMALE_FREQ_HOMALT=0;POPMAX_AF=0.006689\n'
b
diff -r 000000000000 -r fb00426402b0 test-data/input.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.vcf Mon Jan 25 17:50:47 2021 +0000
b
@@ -0,0 +1,140 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##fileDate=20200728
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=BND,Description="Translocation">
+##ALT=<ID=INS,Description="Insertion">
+##FILTER=<ID=LowQual,Description="Poor quality and insufficient number of PEs and SRs.">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="PE confidence interval around END">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="PE confidence interval around POS">
+##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for POS2 coordinate in case of an inter-chromosomal translocation">
+##INFO=<ID=POS2,Number=1,Type=Integer,Description="Genomic position for CHR2 in case of an inter-chromosomal translocation">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
+##INFO=<ID=PE,Number=1,Type=Integer,Description="Paired-end support of the structural variant">
+##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends">
+##INFO=<ID=SRMAPQ,Number=1,Type=Integer,Description="Median mapping quality of split-reads">
+##INFO=<ID=SR,Number=1,Type=Integer,Description="Split-read support">
+##INFO=<ID=SRQ,Number=1,Type=Float,Description="Split-read consensus alignment quality">
+##INFO=<ID=CONSENSUS,Number=1,Type=String,Description="Split-read consensus sequence">
+##INFO=<ID=CE,Number=1,Type=Float,Description="Consensus sequence entropy">
+##INFO=<ID=CT,Number=1,Type=String,Description="Paired-end signature induced connection type">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Insertion length for SVTYPE=INS.">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV">
+##INFO=<ID=INSLEN,Number=1,Type=Integer,Description="Predicted length of the insertion">
+##INFO=<ID=HOMLEN,Number=1,Type=Integer,Description="Predicted microhomology length using a max. edit distance of 2">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Log10-scaled genotype likelihoods for RR,RA,AA genotypes">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=FT,Number=1,Type=String,Description="Per-sample genotype filter">
+##FORMAT=<ID=RC,Number=1,Type=Integer,Description="Raw high-quality read counts or base counts for the SV">
+##FORMAT=<ID=RCL,Number=1,Type=Integer,Description="Raw high-quality read counts or base counts for the left control region">
+##FORMAT=<ID=RCR,Number=1,Type=Integer,Description="Raw high-quality read counts or base counts for the right control region">
+##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Read-depth based copy-number estimate for autosomal sites">
+##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference pairs">
+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant pairs">
+##FORMAT=<ID=RR,Number=1,Type=Integer,Description="# high-quality reference junction reads">
+##FORMAT=<ID=RV,Number=1,Type=Integer,Description="# high-quality variant junction reads">
+##reference=../data/v1/genome.fasta
+##contig=<ID=chr10,length=135534747>
+##contig=<ID=chr11,length=135006516>
+##contig=<ID=chr11_gl000202_random,length=40103>
+##contig=<ID=chr12,length=133851895>
+##contig=<ID=chr13,length=115169878>
+##contig=<ID=chr14,length=107349540>
+##contig=<ID=chr15,length=102531392>
+##contig=<ID=chr16,length=90354753>
+##contig=<ID=chr17_ctg5_hap1,length=1680828>
+##contig=<ID=chr17,length=81195210>
+##contig=<ID=chr17_gl000203_random,length=37498>
+##contig=<ID=chr17_gl000204_random,length=81310>
+##contig=<ID=chr17_gl000205_random,length=174588>
+##contig=<ID=chr17_gl000206_random,length=41001>
+##contig=<ID=chr18,length=78077248>
+##contig=<ID=chr18_gl000207_random,length=4262>
+##contig=<ID=chr19,length=59128983>
+##contig=<ID=chr19_gl000208_random,length=92689>
+##contig=<ID=chr19_gl000209_random,length=159169>
+##contig=<ID=chr1,length=249250621>
+##contig=<ID=chr1_gl000191_random,length=106433>
+##contig=<ID=chr1_gl000192_random,length=547496>
+##contig=<ID=chr20,length=63025520>
+##contig=<ID=chr21,length=48129895>
+##contig=<ID=chr21_gl000210_random,length=27682>
+##contig=<ID=chr22,length=51304566>
+##contig=<ID=chr2,length=243199373>
+##contig=<ID=chr3,length=198022430>
+##contig=<ID=chr4_ctg9_hap1,length=590426>
+##contig=<ID=chr4,length=191154276>
+##contig=<ID=chr4_gl000193_random,length=189789>
+##contig=<ID=chr4_gl000194_random,length=191469>
+##contig=<ID=chr5,length=180915260>
+##contig=<ID=chr6_apd_hap1,length=4622290>
+##contig=<ID=chr6_cox_hap2,length=4795371>
+##contig=<ID=chr6_dbb_hap3,length=4610396>
+##contig=<ID=chr6,length=171115067>
+##contig=<ID=chr6_mann_hap4,length=4683263>
+##contig=<ID=chr6_mcf_hap5,length=4833398>
+##contig=<ID=chr6_qbl_hap6,length=4611984>
+##contig=<ID=chr6_ssto_hap7,length=4928567>
+##contig=<ID=chr7,length=159138663>
+##contig=<ID=chr7_gl000195_random,length=182896>
+##contig=<ID=chr8,length=146364022>
+##contig=<ID=chr8_gl000196_random,length=38914>
+##contig=<ID=chr8_gl000197_random,length=37175>
+##contig=<ID=chr9,length=141213431>
+##contig=<ID=chr9_gl000198_random,length=90085>
+##contig=<ID=chr9_gl000199_random,length=169874>
+##contig=<ID=chr9_gl000200_random,length=187035>
+##contig=<ID=chr9_gl000201_random,length=36148>
+##contig=<ID=chrM,length=16571>
+##contig=<ID=chrUn_gl000211,length=166566>
+##contig=<ID=chrUn_gl000212,length=186858>
+##contig=<ID=chrUn_gl000213,length=164239>
+##contig=<ID=chrUn_gl000214,length=137718>
+##contig=<ID=chrUn_gl000215,length=172545>
+##contig=<ID=chrUn_gl000216,length=172294>
+##contig=<ID=chrUn_gl000217,length=172149>
+##contig=<ID=chrUn_gl000218,length=161147>
+##contig=<ID=chrUn_gl000219,length=179198>
+##contig=<ID=chrUn_gl000220,length=161802>
+##contig=<ID=chrUn_gl000221,length=155397>
+##contig=<ID=chrUn_gl000222,length=186861>
+##contig=<ID=chrUn_gl000223,length=180455>
+##contig=<ID=chrUn_gl000224,length=179693>
+##contig=<ID=chrUn_gl000225,length=211173>
+##contig=<ID=chrUn_gl000226,length=15008>
+##contig=<ID=chrUn_gl000227,length=128374>
+##contig=<ID=chrUn_gl000228,length=129120>
+##contig=<ID=chrUn_gl000229,length=19913>
+##contig=<ID=chrUn_gl000230,length=43691>
+##contig=<ID=chrUn_gl000231,length=27386>
+##contig=<ID=chrUn_gl000232,length=40652>
+##contig=<ID=chrUn_gl000233,length=45941>
+##contig=<ID=chrUn_gl000234,length=40531>
+##contig=<ID=chrUn_gl000235,length=34474>
+##contig=<ID=chrUn_gl000236,length=41934>
+##contig=<ID=chrUn_gl000237,length=45867>
+##contig=<ID=chrUn_gl000238,length=39939>
+##contig=<ID=chrUn_gl000239,length=33824>
+##contig=<ID=chrUn_gl000240,length=41933>
+##contig=<ID=chrUn_gl000241,length=42152>
+##contig=<ID=chrUn_gl000242,length=43523>
+##contig=<ID=chrUn_gl000243,length=43341>
+##contig=<ID=chrUn_gl000244,length=39929>
+##contig=<ID=chrUn_gl000245,length=36651>
+##contig=<ID=chrUn_gl000246,length=38154>
+##contig=<ID=chrUn_gl000247,length=36422>
+##contig=<ID=chrUn_gl000248,length=39786>
+##contig=<ID=chrUn_gl000249,length=38502>
+##contig=<ID=chrX,length=155270560>
+##contig=<ID=chrY,length=59373566>
+##INFO=<ID=RDRATIO,Number=1,Type=Float,Description="Read-depth ratio of tumor vs. normal.">
+##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Somatic structural variant.">
+##bcftools_viewVersion=1.10.2+htslib-1.10.2
+##bcftools_viewCommand=view sv.bcf; Date=Mon Sep 21 22:19:50 2020
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR