Mercurial > repos > iuc > snpsift
changeset 5:09d6806c609e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpsift/snpsift commit 70ff70918368ff0deeb596c2190a770abe9e1c9b
author | iuc |
---|---|
date | Wed, 18 Apr 2018 07:28:51 -0400 |
parents | b04635ebfab0 |
children | 2b3e65a4252f |
files | snpSift_annotate.xml snpSift_caseControl.xml snpSift_extractFields.xml snpSift_filter.xml snpSift_int.xml snpSift_macros.xml snpSift_rmInfo.xml snpSift_vartype.xml snpSift_vcfCheck.xml test-data/extFields_test3_in.vcf test-data/extFields_test3_out.vcf |
diffstat | 11 files changed, 354 insertions(+), 128 deletions(-) [+] |
line wrap: on
line diff
--- a/snpSift_annotate.xml Tue Oct 24 07:28:17 2017 -0400 +++ b/snpSift_annotate.xml Wed Apr 18 07:28:51 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="snpSift_annotate" name="SnpSift Annotate" version="@WRAPPER_VERSION@.0"> +<tool id="snpSift_annotate" name="SnpSift Annotate" version="@WRAPPER_VERSION@.galaxy0"> <description>SNPs from dbSnp</description> <!-- You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
--- a/snpSift_caseControl.xml Tue Oct 24 07:28:17 2017 -0400 +++ b/snpSift_caseControl.xml Wed Apr 18 07:28:51 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="snpSift_caseControl" name="SnpSift CaseControl" version="@WRAPPER_VERSION@.0"> +<tool id="snpSift_caseControl" name="SnpSift CaseControl" version="@WRAPPER_VERSION@.galaxy0"> <description>Count samples are in 'case' and 'control' groups.</description> <!-- You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
--- a/snpSift_extractFields.xml Tue Oct 24 07:28:17 2017 -0400 +++ b/snpSift_extractFields.xml Wed Apr 18 07:28:51 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="snpSift_extractFields" name="SnpSift Extract Fields" version="@WRAPPER_VERSION@.0"> +<tool id="snpSift_extractFields" name="SnpSift Extract Fields" version="@WRAPPER_VERSION@.galaxy0"> <options sanitize="False" /> <description>from a VCF file into a tabular file</description> <macros> @@ -9,27 +9,27 @@ <expand macro="version_command" /> <command><![CDATA[ @CONDA_SNPSIFT_JAR_PATH@ && -cat '$input' +cat '${input}' #if $one_effect_per_line: - | "\$SNPSIFT_JAR_PATH/scripts/vcfEffOnePerLine.pl" + | perl "\$SNPSIFT_JAR_PATH/scripts/vcfEffOnePerLine.pl" #end if | SnpSift -Xmx6G extractFields #if $separator: - -s '$separator' + -s '${separator}' #end if #if $empty_text: - -e '$empty_text' + -e '${empty_text}' #end if - #echo ' '.join(['"%s"' % x for x in $extract.split()]) -> '$output' +> '${output}' ]]></command> <inputs> <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/> - <param name="extract" type="text" label="Extract" help="Need help? See below a few examples." /> + <param name="extract" type="text" label="Fields to extract" value="CHROM POS ID REF ALT FILTER" help="Separated by spaces. See help below for an explanation" /> <param name="one_effect_per_line" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="One effect per line" help="When variants have more than one effect, lists one effect per line, while all other parameters in the line are repeated across mutiple lines" /> - <param name="separator" type="text" value="" label="multiple field separator" help="Separate multiple fields in one column with this character, e.g. a comma, rather than a column for each of the multiple values" /> - <param name="empty_text" type="text" value="" label="empty field text" help="Represent empty fields with this value, rather than leaving them blank" /> + <param name="separator" type="text" value="" label="multiple field separator" help="Separate multiple fields in one column with this character, e.g. a comma, rather than a column for each of the multiple values" argument="-s" /> + <param name="empty_text" type="text" value="" label="empty field text" help="Represent empty fields with this value, rather than leaving them blank" argument="-e"/> </inputs> <outputs> <data name="output" format="tabular" /> @@ -40,156 +40,171 @@ <param name="extract" value="CHROM POS REF ALT EFF[*].EFFECT"/> <output name="output"> <assert_contents> - <has_text text="INTRAGENIC" /> - <not_has_text text="DOWNSTREAM,INTRAGENIC,INTRON,UTR_3_PRIME" /> + <has_text text="INTRAGENIC" /> + <not_has_text text="DOWNSTREAM,INTRAGENIC,INTRON,UTR_3_PRIME" /> </assert_contents> </output> </test> - <test> <param name="input" ftype="vcf" value="test_rmInfo.vcf"/> <param name="extract" value="CHROM POS REF ALT EFF[*].EFFECT"/> <param name="separator" value=","/> <output name="output"> <assert_contents> - <has_text text="DOWNSTREAM,INTRAGENIC,INTRON,UTR_3_PRIME" /> + <has_text text="DOWNSTREAM,INTRAGENIC,INTRON,UTR_3_PRIME" /> </assert_contents> </output> </test> + <test> + <param name="input" ftype="vcf" value="extFields_test3_in.vcf"/> + <param name="extract" value="CHROM POS ID REF ALT FILTER ANN[*].EFFECT"/> + <param name="one_effect_per_line" value="true"/> + <output name="output" value="extFields_test3_out.vcf"/> + </test> </tests> <help><![CDATA[ -**SnpSift Extract Fields** +**What is does** + +`SnpSift Extract Fields <http://snpeff.sourceforge.net/SnpSift.html#Extract>`_ selects columns from a VCF dataset into a Tab-delimited format. + +------ -Extract fields from a VCF file to a TXT, tab separated format, that you can easily load in R, XLS, etc. +.. class:: infomark + +**How to know which fields to extract?** + +A VCF dataset contains mandatory fields as well as optional fields. Mandatory fields are required by `VCF specifications <https://samtools.github.io/hts-specs/VCFv4.2.pdf>`_ and present in any valid VCF dataset. The **Fields to extract** input box of the tool above is already pre-filled with names of mandatory fields. -http://snpeff.sourceforge.net/SnpSift.html#Extract +To know what other fields are available in a given VCF file simply look at its header. `INFO` and `FORMAT` lines will contain description of existing fields. For example, if you see a line: + +##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> + +you can use *NS* as the field name. -You can also use sub-fields and genotype fields / sub-fields such as:: +------ + +**Dealing with field generated with SnpEff** + +The current version of `SnpEff <http://snpeff.sourceforge.net/SnpEff_manual.html>`_ produces so called *ANN* fields:: - Standard VCF fields: - CHROM - POS - ID - REF - ALT - FILTER - INFO fields: - AF - AC - DP - MQ - etc. (any info field available) - SnpEff 'ANN' fields: - "ANN[*].ALLELE" (alias GENOTYPE) - "ANN[*].EFFECT" (alias ANNOTATION): Effect in Sequence ontology terms (e.g. 'missense_variant', 'synonymous_variant', 'stop_gained', etc.) - "ANN[*].IMPACT" { HIGH, MODERATE, LOW, MODIFIER } - "ANN[*].GENE" Gene name (e.g. 'PSD3') - "ANN[*].GENEID" Gene ID - "ANN[*].FEATURE" - "ANN[*].FEATUREID" (alias TRID: Transcript ID) - "ANN[*].BIOTYPE" Biotype, as described by the annotations (e.g. 'protein_coding') - "ANN[*].RANK" Exon or Intron rank (i.e. exon number in a transcript) - "ANN[*].HGVS_C" (alias HGVS_DNA, CODON): Variant in HGVS (DNA) notation - "ANN[*].HGVS_P" (alias HGVS, HGVS_PROT, AA): Variant in HGVS (protein) notation - "ANN[*].CDNA_POS" (alias POS_CDNA) - "ANN[*].CDNA_LEN" (alias LEN_CDNA) - "ANN[*].CDS_POS" (alias POS_CDS) - "ANN[*].CDS_LEN" (alias LEN_CDS) - "ANN[*].AA_POS" (alias POS_AA) - "ANN[*].AA_LEN" (alias LEN_AA) - "ANN[*].DISTANCE" - "ANN[*].ERRORS" (alias WARNING, INFOS) - SnpEff 'EFF' fields (this is for older SnpEff/SnpSift versions, new version use 'ANN' field): - "EFF[*].EFFECT" - "EFF[*].IMPACT" - "EFF[*].FUNCLASS" - "EFF[*].CODON" - "EFF[*].AA" - "EFF[*].AA_LEN" - "EFF[*].GENE" - "EFF[*].BIOTYPE" - "EFF[*].CODING" - "EFF[*].TRID" - "EFF[*].RANK" - SnpEff 'LOF' fields: - "LOF[*].GENE" - "LOF[*].GENEID" - "LOF[*].NUMTR" - "LOF[*].PERC" - SnpEff' NMD' fields: - "NMD[*].GENE" - "NMD[*].GENEID" - "NMD[*].NUMTR" - "NMD[*].PERC" + "ANN[*].ALLELE" (alias GENOTYPE) + "ANN[*].EFFECT" (alias ANNOTATION): Effect in Sequence ontology terms (e.g. 'missense_variant', 'synonymous_variant', 'stop_gained', etc.) + "ANN[*].IMPACT" { HIGH, MODERATE, LOW, MODIFIER } + "ANN[*].GENE" Gene name (e.g. 'PSD3') + "ANN[*].GENEID" Gene ID + "ANN[*].FEATURE" + "ANN[*].FEATUREID" (alias TRID: Transcript ID) + "ANN[*].BIOTYPE" Biotype, as described by the annotations (e.g. 'protein_coding') + "ANN[*].RANK" Exon or Intron rank (i.e. exon number in a transcript) + "ANN[*].HGVS_C" (alias HGVS_DNA, CODON): Variant in HGVS (DNA) notation + "ANN[*].HGVS_P" (alias HGVS, HGVS_PROT, AA): Variant in HGVS (protein) notation + "ANN[*].CDNA_POS" (alias POS_CDNA) + "ANN[*].CDNA_LEN" (alias LEN_CDNA) + "ANN[*].CDS_POS" (alias POS_CDS) + "ANN[*].CDS_LEN" (alias LEN_CDS) + "ANN[*].AA_POS" (alias POS_AA) + "ANN[*].AA_LEN" (alias LEN_AA) + "ANN[*].DISTANCE" + "ANN[*].ERRORS" (alias WARNING, INFOS) + +Older versions produced *EFF* fields:: -Some examples: + "EFF[*].EFFECT" + "EFF[*].IMPACT" + "EFF[*].FUNCLASS" + "EFF[*].CODON" + "EFF[*].AA" + "EFF[*].AA_LEN" + "EFF[*].GENE" + "EFF[*].BIOTYPE" + "EFF[*].CODING" + "EFF[*].TRID" + "EFF[*].RANK" + +In addition there are *LOF* and *NMD* fields:: + + "LOF[*].GENE" + "LOF[*].GENEID" + "LOF[*].NUMTR" + "LOF[*].PERC" + + "NMD[*].GENE" + "NMD[*].GENEID" + "NMD[*].NUMTR" + "NMD[*].PERC" -- *Extracting chromosome, position, ID and allele frequency from a VCF file*: +To find our whether your VCF contains *ANN* or *EFF* annotations simply look at its header. + +----- - **CHROM POS ID AF** +**Usage examples** - The result will look something like:: +*Extracting chromosome, position, ID and allele frequency from a VCF file*: - #CHROM POS ID AF - 1 69134 0.086 - 1 69496 rs150690004 0.001 +**CHROM POS ID AF** + +The result will look something like:: -- *Extracting genotype fields*: + #CHROM POS ID AF + 1 69134 0.086 + 1 69496 rs150690004 0.001 - **CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT** - - This means to extract: +*Extracting genotype fields*: - - CHROM POS ID: regular fields (as in the previous example) - - THETA : This one is from INFO - - GEN[0].GL[1] : Second likelihood from first genotype - - GEN[1].GL : The whole GL fiels (all entries without separating them) - - GEN[3].GL[*] : All likelihoods form genotype 3 (this time they will be tab separated, as opposed to the previous one). - - GEN[*].GT : Genotype subfields (GT) from ALL samples (tab separated). +**CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT** + +This means to extract: - The result will look something like:: +- CHROM POS ID: regular fields (as in the previous example) +- THETA : This one is from INFO +- GEN[0].GL[1] : Second likelihood from first genotype +- GEN[1].GL : The whole GL field (all entries without separating them) +- GEN[3].GL[*] : All likelihoods form genotype 3 (this time they will be tab separated, as opposed to the previous one). +- GEN[*].GT : Genotype subfields (GT) from ALL samples (tab separated). - #CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT - 1 10583 rs58108140 0.0046 -0.47 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|0 0|0 0|1 0|0 0|1 0|0 0|0 0|1 - 1 10611 rs189107123 0.0077 -0.48 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0 - 1 13302 rs180734498 0.0048 -0.58 -2.45,-0.00,-5.00 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 1|0 0|0 0|1 0|0 +The result will look something like:: -- *Extracting fields with multiple values*: - (notice that there are multiple effect columns per line because there are mutiple effects per variant) + #CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT + 1 10583 rs58108140 0.0046 -0.47 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|0 0|0 0|1 0|0 0|1 0|0 0|0 0|1 + 1 10611 rs189107123 0.0077 -0.48 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0 + 1 13302 rs180734498 0.0048 -0.58 -2.45,-0.00,-5.00 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 1|0 0|0 0|1 0|0 - **CHROM POS REF ALT ANN[*].EFFECT** +*Extracting fields with multiple values*: + (notice that there are multiple effect columns per line because there are multiple effects per variant) - The result will look something like:: +**CHROM POS REF ALT ANN[*].EFFECT** + +The result will look something like:: - #CHROM POS REF ALT ANN[*].EFFECT - 22 17071756 T C 3_prime_UTR_variant downstream_gene_variant - 22 17072035 C T missense_variant downstream_gene_variant - 22 17072258 C A missense_variant downstream_gene_variant + #CHROM POS REF ALT ANN[*].EFFECT + 22 17071756 T C 3_prime_UTR_variant downstream_gene_variant + 22 17072035 C T missense_variant downstream_gene_variant + 22 17072258 C A missense_variant downstream_gene_variant -- *Extracting fields with multiple values using a comma as a multipe field separator:* +*Extracting fields with multiple values using a comma as a multiple field separator:* - **CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P** +**CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P** - The result will look something like:: +The result will look something like:: - #CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P - 22 17071756 T C 3_prime_UTR_variant,downstream_gene_variant .,. - 22 17072035 C T missense_variant,downstream_gene_variant p.Gly469Glu,. - 22 17072258 C A missense_variant,downstream_gene_variant p.Gly395Cys,. + #CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P + 22 17071756 T C 3_prime_UTR_variant,downstream_gene_variant .,. + 22 17072035 C T missense_variant,downstream_gene_variant p.Gly469Glu,. + 22 17072258 C A missense_variant,downstream_gene_variant p.Gly395Cys,. -- *Extracting fields with multiple values, one effect per line:* +*Extracting fields with multiple values, one effect per line:* - **CHROM POS REF ALT ANN[*].EFFECT** +**CHROM POS REF ALT ANN[*].EFFECT** - The result will look something like:: +The result will look something like:: - #CHROM POS REF ALT ANN[*].EFFECT - 22 17071756 T C 3_prime_UTR_variant - 22 17071756 T C downstream_gene_variant - 22 17072035 C T missense_variant - 22 17072035 C T downstream_gene_variant - 22 17072258 C A missense_variant - 22 17072258 C A downstream_gene_variant + #CHROM POS REF ALT ANN[*].EFFECT + 22 17071756 T C 3_prime_UTR_variant + 22 17071756 T C downstream_gene_variant + 22 17072035 C T missense_variant + 22 17072035 C T downstream_gene_variant + 22 17072258 C A missense_variant + 22 17072258 C A downstream_gene_variant @EXTERNAL_DOCUMENTATION@ - http://snpeff.sourceforge.net/SnpSift.html#Extract
--- a/snpSift_filter.xml Tue Oct 24 07:28:17 2017 -0400 +++ b/snpSift_filter.xml Wed Apr 18 07:28:51 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="snpSift_filter" name="SnpSift Filter" version="@WRAPPER_VERSION@.0"> +<tool id="snpSift_filter" name="SnpSift Filter" version="@WRAPPER_VERSION@.galaxy0"> <description>Filter variants using arbitrary expressions</description> <macros> <import>snpSift_macros.xml</import>
--- a/snpSift_int.xml Tue Oct 24 07:28:17 2017 -0400 +++ b/snpSift_int.xml Wed Apr 18 07:28:51 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="snpSift_int" name="SnpSift Intervals" version="@WRAPPER_VERSION@.0"> +<tool id="snpSift_int" name="SnpSift Intervals" version="@WRAPPER_VERSION@.galaxy0"> <description>Filter variants using intervals</description> <!-- You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
--- a/snpSift_macros.xml Tue Oct 24 07:28:17 2017 -0400 +++ b/snpSift_macros.xml Wed Apr 18 07:28:51 2018 -0400 @@ -1,7 +1,11 @@ <macros> <xml name="requirements"> <requirements> - <requirement type="package" version="4.3.1p">snpsift</requirement> + <requirement type="package" version="4.3.1t">snpsift</requirement> + <!--Coreutils are required to make readlink work across platforms. Previous version did not work on MacOS--> + <requirement type="package" version="8.25">coreutils</requirement> + <!--PERL is required to make SnpSift helper scripts run--> + <requirement type="package" version="5.26">perl</requirement> </requirements> </xml> <xml name="stdio"> @@ -16,7 +20,7 @@ SnpSift dbnsfp 2>&1|head -n 1 ]]></version_command> </xml> - <token name="@WRAPPER_VERSION@">4.3</token> + <token name="@WRAPPER_VERSION@">4.3+t</token> <token name="@EXTERNAL_DOCUMENTATION@"> For details about this tool, please go to:
--- a/snpSift_rmInfo.xml Tue Oct 24 07:28:17 2017 -0400 +++ b/snpSift_rmInfo.xml Wed Apr 18 07:28:51 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="snpSift_rmInfo" name="SnpSift rmInfo" version="@WRAPPER_VERSION@.0"> +<tool id="snpSift_rmInfo" name="SnpSift rmInfo" version="@WRAPPER_VERSION@.galaxy0"> <description>remove INFO field annotations</description> <macros> <import>snpSift_macros.xml</import>
--- a/snpSift_vartype.xml Tue Oct 24 07:28:17 2017 -0400 +++ b/snpSift_vartype.xml Wed Apr 18 07:28:51 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="snpsift_vartype" name="SnpSift Variant Type" version="@WRAPPER_VERSION@.1"> +<tool id="snpsift_vartype" name="SnpSift Variant Type" version="@WRAPPER_VERSION@.galaxy0"> <description>Annotate with variant type</description> <macros> <import>snpSift_macros.xml</import>
--- a/snpSift_vcfCheck.xml Tue Oct 24 07:28:17 2017 -0400 +++ b/snpSift_vcfCheck.xml Wed Apr 18 07:28:51 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="snpSift_vcfCheck" name="SnpSift vcfCheck" version="@WRAPPER_VERSION@.0"> +<tool id="snpSift_vcfCheck" name="SnpSift vcfCheck" version="@WRAPPER_VERSION@.galaxy0"> <description>basic checks for VCF specification compliance</description> <macros> <import>snpSift_macros.xml</import>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/extFields_test3_in.vcf Wed Apr 18 07:28:51 2018 -0400 @@ -0,0 +1,78 @@ +##fileformat=VCFv4.2 +##fileDate=20180328 +##source=freeBayes v1.1.0-46-g8d2b3a0-dirty +##reference=localref.fa +##contig=<ID=U00096,length=4639675> +##phasing=none +##commandline="freebayes --region U00096:0..4639675 --bam b_0.bam --bam b_1.bam --bam b_2.bam --bam b_3.bam --bam b_4.bam --bam b_5.bam --bam b_6.bam --bam b_7.bam --bam b_8.bam --bam b_9.bam --bam b_10.bam --bam b_11.bam --bam b_12.bam --fasta-reference localref.fa --vcf ./vcf_output/part_U00096:0..4639675.vcf --theta 0.001 --ploidy 1 -K -m 20 -q 20 -R 0 -Y 0 -e 1000 -F 0.2 -C 2 -G 1 --min-coverage 0 --min-alternate-qsum 0" +##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> +##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> +##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype."> +##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype."> +##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally"> +##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally"> +##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred"> +##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred"> +##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations"> +##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations"> +##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand"> +##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand"> +##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand"> +##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand"> +##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous"> +##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome"> +##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele"> +##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele"> +##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality"> +##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without."> +##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best."> +##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout."> +##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex."> +##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."> +##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position."> +##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."> +##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length"> +##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles"> +##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles"> +##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments"> +##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments"> +##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record."> +##INFO=<ID=technology.ILLUMINA,Number=A,Type=Float,Description="Fraction of observations supporting the alternate observed in reads from ILLUMINA"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations"> +##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block."> +##SnpEffVersion="4.3t (build 2017-11-24 10:18), by Pablo Cingolani" +##SnpEffCmd="SnpEff -i vcf -o vcf -formatEff U00096 /galaxy-repl/main/files/024/679/dataset_24679041.dat " +##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_Change| Amino_Acid_length | Gene_Name | Transcript_BioType | Gene_Coding | Transcript_ID | Exon_Rank | Genotype [ | ERRORS | WARNINGS ] )' "> +##INFO=<ID=LOF,Number=.,Type=String,Description="Predicted loss of function effects for this variant. Format: 'Gene_Name | Gene_ID | Number_of_transcripts_in_gene | Percent_of_transcripts_affected'"> +##INFO=<ID=NMD,Number=.,Type=String,Description="Predicted nonsense mediated decay effects for this variant. Format: 'Gene_Name | Gene_ID | Number_of_transcripts_in_gene | Percent_of_transcripts_affected'"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B C +U00096 1518079 . T G 1.82951E-13 . AB=0;ABP=0;AC=0;AF=0;AN=13;AO=13;CIGAR=1X;DP=170;DPB=170;DPRA=3.80208;EPP=7.18621;EPPR=80.8099;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=13;NUMALT=1;ODDS=31.1313;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=314;QR=4764;RO=157;RPL=9;RPP=7.18621;RPPR=8.0033;RPR=4;RUN=1;SAF=13;SAP=31.2394;SAR=0;SRF=63;SRP=16.3019;SRR=94;TYPE=snp;technology.ILLUMINA=1;EFF=synonymous_variant(LOW|SILENT|ggT/ggG|p.Gly343Gly/c.1029T>G|345|curA|protein_coding|CODING|b1449|1|G),upstream_gene_variant(MODIFIER||2861|c.-2861A>C|31|yncL|protein_coding|CODING|b4598||G),upstream_gene_variant(MODIFIER||1724|c.-1724A>C|149|ydcZ|protein_coding|CODING|b1447||G),upstream_gene_variant(MODIFIER||1209|c.-1209A>C|172|mnaT|protein_coding|CODING|b1448||G),upstream_gene_variant(MODIFIER||207|c.-207T>G|221|mcbR|protein_coding|CODING|b1450||G),upstream_gene_variant(MODIFIER||3252|c.-3252T>G|353|yncE|protein_coding|CODING|b1452||G),downstream_gene_variant(MODIFIER||4499|c.*4499T>G|264|ydcV|protein_coding|CODING|b1443||G),downstream_gene_variant(MODIFIER||3053|c.*3053T>G|474|patD|protein_coding|CODING|b1444||G),downstream_gene_variant(MODIFIER||2493|c.*2493T>G|57|ydcX|protein_coding|CODING|b1445||G),downstream_gene_variant(MODIFIER||2174|c.*2174T>G|77|ydcY|protein_coding|CODING|b1446||G),downstream_gene_variant(MODIFIER||908|c.*908A>C|700|yncD|protein_coding|CODING|b1451||G),downstream_gene_variant(MODIFIER||4426|c.*4426A>C|499|ansP|protein_coding|CODING|b1453||G) GT:DP:AD:RO:QR:AO:QA:GL 0:4:3,1:3:86:1:20:0,-6.02567 0:8:8,0:8:254:0:0:0,-23.1652 0:13:12,1:12:353:1:20:0,-30.048 +U00096 1518212 . T A 0.0 . AB=0;ABP=0;AC=0;AF=0;AN=13;AO=2;CIGAR=1X;DP=489;DPB=489;DPRA=0.250522;EPP=3.0103;EPPR=61.9789;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=13;NUMALT=1;ODDS=87.0721;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=78;QR=18812;RO=487;RPL=0;RPP=7.35324;RPPR=20.7076;RPR=2;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=285;SRP=33.7275;SRR=202;TYPE=snp;technology.ILLUMINA=1;EFF=upstream_gene_variant(MODIFIER||2994|c.-2994A>T|31|yncL|protein_coding|CODING|b4598||A),upstream_gene_variant(MODIFIER||1857|c.-1857A>T|149|ydcZ|protein_coding|CODING|b1447||A),upstream_gene_variant(MODIFIER||1342|c.-1342A>T|172|mnaT|protein_coding|CODING|b1448||A),upstream_gene_variant(MODIFIER||74|c.-74T>A|221|mcbR|protein_coding|CODING|b1450||A),upstream_gene_variant(MODIFIER||3119|c.-3119T>A|353|yncE|protein_coding|CODING|b1452||A),downstream_gene_variant(MODIFIER||4632|c.*4632T>A|264|ydcV|protein_coding|CODING|b1443||A),downstream_gene_variant(MODIFIER||3186|c.*3186T>A|474|patD|protein_coding|CODING|b1444||A),downstream_gene_variant(MODIFIER||2626|c.*2626T>A|57|ydcX|protein_coding|CODING|b1445||A),downstream_gene_variant(MODIFIER||2307|c.*2307T>A|77|ydcY|protein_coding|CODING|b1446||A),downstream_gene_variant(MODIFIER||124|c.*124T>A|345|curA|protein_coding|CODING|b1449||A),downstream_gene_variant(MODIFIER||775|c.*775A>T|700|yncD|protein_coding|CODING|b1451||A),downstream_gene_variant(MODIFIER||4293|c.*4293A>T|499|ansP|protein_coding|CODING|b1453||A),intergenic_region(MODIFIER|||n.1518212T>A|||||||A) GT:DP:AD:RO:QR:AO:QA:GL 0:10:8,2:8:312:2:78:0,-21.0397 0:9:9,0:9:356:0:0:0,-32.4019 0:65:65,0:65:2497:0:0:0,-224.912 +U00096 1523784 . G A 0.0 . AB=0;ABP=0;AC=0;AF=0;AN=13;AO=17;CIGAR=1X;DP=283;DPB=283;DPRA=2.07026;EPP=39.9253;EPPR=120.564;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=13;NUMALT=1;ODDS=63.7167;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=375;QR=9560;RO=266;RPL=0;RPP=39.9253;RPPR=6.9614;RPR=17;RUN=1;SAF=0;SAP=39.9253;SAR=17;SRF=189;SRP=105.412;SRR=77;TYPE=snp;technology.ILLUMINA=1;EFF=missense_variant(MODERATE|MISSENSE|tCg/tTg|p.Ser74Leu/c.221C>T|499|ansP|protein_coding|CODING|b1453|1|A),upstream_gene_variant(MODIFIER||2695|c.-2695C>T|700|yncD|protein_coding|CODING|b1451||A),upstream_gene_variant(MODIFIER||487|c.-487G>A|205|yncG|protein_coding|CODING|b1454||A),upstream_gene_variant(MODIFIER||1180|c.-1180G>A|70|yncH|protein_coding|CODING|b1455||A),upstream_gene_variant(MODIFIER||2142|c.-2142G>A|678|rhsE|protein_coding|CODING|b1456||A|WARNING_TRANSCRIPT_NO_START_CODON),upstream_gene_variant(MODIFIER||4162|c.-4162G>A|160|ydcD|protein_coding|CODING|b1457||A),upstream_gene_variant(MODIFIER||4826|c.-4826G>A|350|yncI|protein_coding|CODING|b1459||A|WARNING_TRANSCRIPT_NO_START_CODON),downstream_gene_variant(MODIFIER||4833|c.*4833G>A|221|mcbR|protein_coding|CODING|b1450||A),downstream_gene_variant(MODIFIER||1392|c.*1392G>A|353|yncE|protein_coding|CODING|b1452||A) GT:DP:AD:RO:QR:AO:QA:GL 0:9:9,0:9:327:0:0:0,-29.7663 0:16:16,0:16:580:0:0:0,-52.5218 0:26:24,2:24:916:2:44:0,-78.566 +U00096 1524994 . CTTT CT 0.0 . AB=0;ABP=0;AC=0;AF=0;AN=13;AO=2;CIGAR=1M2D1M;DP=302;DPB=301;DPRA=0.368601;EPP=7.35324;EPPR=19.6872;GTI=0;LEN=2;MEANALT=1;MQM=60;MQMR=60;NS=13;NUMALT=1;ODDS=68.2413;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=71;QR=11356;RO=300;RPL=1;RPP=3.0103;RPPR=19.6872;RPR=1;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=170;SRP=14.5915;SRR=130;TYPE=del;technology.ILLUMINA=1;EFF=frameshift_variant(HIGH||ttc/|p.Phe12fs/c.34_35delTT|70|yncH|protein_coding|CODING|b1455|1|CT|INFO_REALIGN_3_PRIME),upstream_gene_variant(MODIFIER||3907|c.-3908_-3907delAA|700|yncD|protein_coding|CODING|b1451||CT),upstream_gene_variant(MODIFIER||992|c.-993_-992delAA|499|ansP|protein_coding|CODING|b1453||CT),upstream_gene_variant(MODIFIER||930|c.-930_-929delTT|678|rhsE|protein_coding|CODING|b1456||CT|WARNING_TRANSCRIPT_NO_START_CODON),upstream_gene_variant(MODIFIER||2950|c.-2950_-2949delTT|160|ydcD|protein_coding|CODING|b1457||CT),upstream_gene_variant(MODIFIER||3614|c.-3614_-3613delTT|350|yncI|protein_coding|CODING|b1459||CT|WARNING_TRANSCRIPT_NO_START_CODON),upstream_gene_variant(MODIFIER||4844|c.-4844_-4843delTT|378|ydcC|protein_coding|CODING|b1460||CT),downstream_gene_variant(MODIFIER||2604|c.*2604_*2605delTT|353|yncE|protein_coding|CODING|b1452||CT),downstream_gene_variant(MODIFIER||108|c.*108_*109delTT|205|yncG|protein_coding|CODING|b1454||CT);LOF=(yncH|b1455|1|1.00) GT:DP:AD:RO:QR:AO:QA:GL 0:6:6,0:6:209:0:0:0,-19.1481 0:9:7,2:7:271:2:71:0,-18.0127 0:26:26,0:26:1011:0:0:0,-91.2905 +U00096 1525214 . T A 0.0 . AB=0;ABP=0;AC=0;AF=0;AN=13;AO=2;CIGAR=1X;DP=325;DPB=325;DPRA=0.264151;EPP=3.0103;EPPR=21.8947;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=13;NUMALT=1;ODDS=54.1924;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=70;QR=12493;RO=323;RPL=2;RPP=7.35324;RPPR=11.2458;RPR=0;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=153;SRP=4.9532;SRR=170;TYPE=snp;technology.ILLUMINA=1;EFF=upstream_gene_variant(MODIFIER||4125|c.-4125A>T|700|yncD|protein_coding|CODING|b1451||A),upstream_gene_variant(MODIFIER||1210|c.-1210A>T|499|ansP|protein_coding|CODING|b1453||A),upstream_gene_variant(MODIFIER||712|c.-712T>A|678|rhsE|protein_coding|CODING|b1456||A|WARNING_TRANSCRIPT_NO_START_CODON),upstream_gene_variant(MODIFIER||2732|c.-2732T>A|160|ydcD|protein_coding|CODING|b1457||A),upstream_gene_variant(MODIFIER||3396|c.-3396T>A|350|yncI|protein_coding|CODING|b1459||A|WARNING_TRANSCRIPT_NO_START_CODON),upstream_gene_variant(MODIFIER||4626|c.-4626T>A|378|ydcC|protein_coding|CODING|b1460||A),downstream_gene_variant(MODIFIER||2822|c.*2822T>A|353|yncE|protein_coding|CODING|b1452||A),downstream_gene_variant(MODIFIER||326|c.*326T>A|205|yncG|protein_coding|CODING|b1454||A),downstream_gene_variant(MODIFIER||38|c.*38T>A|70|yncH|protein_coding|CODING|b1455||A),intergenic_region(MODIFIER|||n.1525214T>A|||||||A) GT:DP:AD:RO:QR:AO:QA:GL 0:6:6,0:6:232:0:0:0,-21.246 0:11:11,0:11:433:0:0:0,-39.3229 0:38:38,0:38:1471:0:0:0,-132.652 +U00096 1525377 . AGG AGGG,AGT 5.68956E-14 . AB=0,0;ABP=0,0;AC=0,0;AF=0,0;AN=13;AO=2,3;CIGAR=1M1I2M,2M1X;DP=621;DPB=621.667;DPRA=0.196399,1.09363;EPP=3.0103,3.73412;EPPR=17.9768;GTI=0;LEN=1,1;MEANALT=2,1.5;MQM=60,60;MQMR=60;NS=13;NUMALT=2;ODDS=87.143;PAIRED=1,1;PAIREDR=1;PAO=0,0;PQA=0,0;PQR=0;PRO=0;QA=71,97;QR=23655;RO=613;RPL=2,1;RPP=7.35324,3.73412;RPPR=192.035;RPR=0,2;RUN=1,1;SAF=1,2;SAP=3.0103,3.73412;SAR=1,1;SRF=366;SRP=53.1738;SRR=247;TYPE=ins,snp;technology.ILLUMINA=1,1;EFF=upstream_gene_variant(MODIFIER||4290|c.-4290C>A|700|yncD|protein_coding|CODING|b1451||AGT),upstream_gene_variant(MODIFIER||4291|c.-4291_-4290insC|700|yncD|protein_coding|CODING|b1451||AGGG),upstream_gene_variant(MODIFIER||1375|c.-1375C>A|499|ansP|protein_coding|CODING|b1453||AGT),upstream_gene_variant(MODIFIER||1376|c.-1376_-1375insC|499|ansP|protein_coding|CODING|b1453||AGGG),upstream_gene_variant(MODIFIER||547|c.-547G>T|678|rhsE|protein_coding|CODING|b1456||AGT|WARNING_TRANSCRIPT_NO_START_CODON),upstream_gene_variant(MODIFIER||546|c.-547_-546insG|678|rhsE|protein_coding|CODING|b1456||AGGG|WARNING_TRANSCRIPT_NO_START_CODON),upstream_gene_variant(MODIFIER||2567|c.-2567G>T|160|ydcD|protein_coding|CODING|b1457||AGT),upstream_gene_variant(MODIFIER||2566|c.-2567_-2566insG|160|ydcD|protein_coding|CODING|b1457||AGGG),upstream_gene_variant(MODIFIER||3231|c.-3231G>T|350|yncI|protein_coding|CODING|b1459||AGT|WARNING_TRANSCRIPT_NO_START_CODON),upstream_gene_variant(MODIFIER||3230|c.-3231_-3230insG|350|yncI|protein_coding|CODING|b1459||AGGG|WARNING_TRANSCRIPT_NO_START_CODON),upstream_gene_variant(MODIFIER||4461|c.-4461G>T|378|ydcC|protein_coding|CODING|b1460||AGT),upstream_gene_variant(MODIFIER||4460|c.-4461_-4460insG|378|ydcC|protein_coding|CODING|b1460||AGGG),downstream_gene_variant(MODIFIER||2987|c.*2987G>T|353|yncE|protein_coding|CODING|b1452||AGT),downstream_gene_variant(MODIFIER||2988|c.*2987_*2988insG|353|yncE|protein_coding|CODING|b1452||AGGG),downstream_gene_variant(MODIFIER||491|c.*491G>T|205|yncG|protein_coding|CODING|b1454||AGT),downstream_gene_variant(MODIFIER||492|c.*491_*492insG|205|yncG|protein_coding|CODING|b1454||AGGG),downstream_gene_variant(MODIFIER||203|c.*203G>T|70|yncH|protein_coding|CODING|b1455||AGT),downstream_gene_variant(MODIFIER||204|c.*203_*204insG|70|yncH|protein_coding|CODING|b1455||AGGG),intergenic_region(MODIFIER|||n.1525379G>T|||||||AGT),intergenic_region(MODIFIER|||n.1525379_1525380insG|||||||AGGG) GT:DP:AD:RO:QR:AO:QA:GL 0:19:19,0,0:19:734:0,0:0,0:0,-66.383,-66.383 0:10:6,2,2:6:242:2,2:71,77:0,-15.396,-14.852 0:83:83,0,0:83:3242:0,0:0,0:0,-291.874,-291.874 +U00096 1527079 . A G 7.60942E-10 . AB=0;ABP=0;AC=0;AF=0;AN=13;AO=2;CIGAR=1X;DP=67;DPB=67;DPRA=0.967742;EPP=7.35324;EPPR=8.65613;GTI=0;LEN=1;MEANALT=1;MQM=40;MQMR=39.9846;NS=13;NUMALT=1;ODDS=22.6884;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=44;QR=2138;RO=65;RPL=2;RPP=7.35324;RPPR=7.05258;RPR=0;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=64;SRP=135.604;SRR=1;TYPE=snp;technology.ILLUMINA=1;EFF=missense_variant(MODERATE|MISSENSE|gAg/gGg|p.Glu385Gly/c.1154A>G|678|rhsE|protein_coding|CODING|b1456|1|G|WARNING_TRANSCRIPT_NO_START_CODON),upstream_gene_variant(MODIFIER||3075|c.-3075T>C|499|ansP|protein_coding|CODING|b1453||G),upstream_gene_variant(MODIFIER||867|c.-867A>G|160|ydcD|protein_coding|CODING|b1457||G),upstream_gene_variant(MODIFIER||1531|c.-1531A>G|350|yncI|protein_coding|CODING|b1459||G|WARNING_TRANSCRIPT_NO_START_CODON),upstream_gene_variant(MODIFIER||2761|c.-2761A>G|378|ydcC|protein_coding|CODING|b1460||G),upstream_gene_variant(MODIFIER||3997|c.-3997A>G|77|pptA|protein_coding|CODING|b1461||G),upstream_gene_variant(MODIFIER||4969|c.-4969A>G|281|nhoA|protein_coding|CODING|b1463||G),downstream_gene_variant(MODIFIER||4687|c.*4687A>G|353|yncE|protein_coding|CODING|b1452||G),downstream_gene_variant(MODIFIER||2191|c.*2191A>G|205|yncG|protein_coding|CODING|b1454||G),downstream_gene_variant(MODIFIER||1903|c.*1903A>G|70|yncH|protein_coding|CODING|b1455||G),downstream_gene_variant(MODIFIER||4227|c.*4227T>C|189|yddH|protein_coding|CODING|b1462||G) GT:DP:AD:RO:QR:AO:QA:GL 0:4:4,0:4:145:0:0:0,-12.7264 0:6:6,0:6:196:0:0:0,-17.3319 0:9:9,0:9:279:0:0:0,-24.5414 +U00096 1527291 . G T 8.98077E-15 . AB=0;ABP=0;AC=0;AF=0;AN=13;AO=2;CIGAR=1X;DP=396;DPB=396;DPRA=0.215938;EPP=3.0103;EPPR=42.9311;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=13;NUMALT=1;ODDS=57.3966;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=75;QR=13943;RO=393;RPL=0;RPP=7.35324;RPPR=39.2623;RPR=2;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=215;SRP=10.5745;SRR=178;TYPE=snp;technology.ILLUMINA=1;EFF=missense_variant(MODERATE|MISSENSE|Ggc/Tgc|p.Gly456Cys/c.1366G>T|678|rhsE|protein_coding|CODING|b1456|1|T|WARNING_TRANSCRIPT_NO_START_CODON),upstream_gene_variant(MODIFIER||3287|c.-3287C>A|499|ansP|protein_coding|CODING|b1453||T),upstream_gene_variant(MODIFIER||655|c.-655G>T|160|ydcD|protein_coding|CODING|b1457||T),upstream_gene_variant(MODIFIER||1319|c.-1319G>T|350|yncI|protein_coding|CODING|b1459||T|WARNING_TRANSCRIPT_NO_START_CODON),upstream_gene_variant(MODIFIER||2549|c.-2549G>T|378|ydcC|protein_coding|CODING|b1460||T),upstream_gene_variant(MODIFIER||3785|c.-3785G>T|77|pptA|protein_coding|CODING|b1461||T),upstream_gene_variant(MODIFIER||4757|c.-4757G>T|281|nhoA|protein_coding|CODING|b1463||T),downstream_gene_variant(MODIFIER||4899|c.*4899G>T|353|yncE|protein_coding|CODING|b1452||T),downstream_gene_variant(MODIFIER||2403|c.*2403G>T|205|yncG|protein_coding|CODING|b1454||T),downstream_gene_variant(MODIFIER||2115|c.*2115G>T|70|yncH|protein_coding|CODING|b1455||T),downstream_gene_variant(MODIFIER||4015|c.*4015C>A|189|yddH|protein_coding|CODING|b1462||T) GT:DP:AD:RO:QR:AO:QA:GL 0:12:12,0:12:422:0:0:0,-38.3029 0:12:12,0:12:418:0:0:0,-37.9417 0:48:48,0:48:1735:0:0:0,-156.391 +U00096 1527328 . A C 5.89448E-14 . AB=0;ABP=0;AC=0;AF=0;AN=13;AO=53;CIGAR=1X;DP=371;DPB=371;DPRA=0;EPP=53.2;EPPR=17.4595;GTI=0;LEN=1;MEANALT=1;MQM=59.7547;MQMR=59.9843;NS=13;NUMALT=1;ODDS=49.8508;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=1375;QR=11600;RO=318;RPL=9;RPP=53.2;RPPR=3.44733;RPR=44;RUN=1;SAF=0;SAP=118.098;SAR=53;SRF=257;SRP=265.335;SRR=61;TYPE=snp;technology.ILLUMINA=1;EFF=missense_variant(MODERATE|MISSENSE|cAc/cCc|p.His468Pro/c.1403A>C|678|rhsE|protein_coding|CODING|b1456|1|C|WARNING_TRANSCRIPT_NO_START_CODON),upstream_gene_variant(MODIFIER||3324|c.-3324T>G|499|ansP|protein_coding|CODING|b1453||C),upstream_gene_variant(MODIFIER||618|c.-618A>C|160|ydcD|protein_coding|CODING|b1457||C),upstream_gene_variant(MODIFIER||1282|c.-1282A>C|350|yncI|protein_coding|CODING|b1459||C|WARNING_TRANSCRIPT_NO_START_CODON),upstream_gene_variant(MODIFIER||2512|c.-2512A>C|378|ydcC|protein_coding|CODING|b1460||C),upstream_gene_variant(MODIFIER||3748|c.-3748A>C|77|pptA|protein_coding|CODING|b1461||C),upstream_gene_variant(MODIFIER||4720|c.-4720A>C|281|nhoA|protein_coding|CODING|b1463||C),downstream_gene_variant(MODIFIER||4936|c.*4936A>C|353|yncE|protein_coding|CODING|b1452||C),downstream_gene_variant(MODIFIER||2440|c.*2440A>C|205|yncG|protein_coding|CODING|b1454||C),downstream_gene_variant(MODIFIER||2152|c.*2152A>C|70|yncH|protein_coding|CODING|b1455||C),downstream_gene_variant(MODIFIER||3978|c.*3978T>G|189|yddH|protein_coding|CODING|b1462||C) GT:DP:AD:RO:QR:AO:QA:GL 0:22:17,5:17:627:5:112:0,-46.4475 0:11:10,1:10:378:1:22:0,-32.1693 0:33:31,2:31:1151:2:49:0,-99.2161 +U00096 1532370 . G T 7.50367E-15 . AB=0;ABP=0;AC=0;AF=0;AN=12;AO=2;CIGAR=1X;DP=378;DPB=378;DPRA=0.268293;EPP=3.0103;EPPR=19.9408;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=12;NUMALT=1;ODDS=74.0204;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=76;QR=13838;RO=374;RPL=2;RPP=7.35324;RPPR=5.82044;RPR=0;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=216;SRP=22.5419;SRR=158;TYPE=snp;technology.ILLUMINA=1;EFF=missense_variant(MODERATE|MISSENSE|cGt/cTt|p.Arg108Leu/c.323G>T|281|nhoA|protein_coding|CODING|b1463|1|T),upstream_gene_variant(MODIFIER||495|c.-495C>A|189|yddH|protein_coding|CODING|b1462||T),downstream_gene_variant(MODIFIER||4408|c.*4408G>T|678|rhsE|protein_coding|CODING|b1456||T|WARNING_TRANSCRIPT_NO_START_CODON),downstream_gene_variant(MODIFIER||3942|c.*3942G>T|160|ydcD|protein_coding|CODING|b1457||T),downstream_gene_variant(MODIFIER||2707|c.*2707G>T|350|yncI|protein_coding|CODING|b1459||T|WARNING_TRANSCRIPT_NO_START_CODON),downstream_gene_variant(MODIFIER||1394|c.*1394G>T|378|ydcC|protein_coding|CODING|b1460||T),downstream_gene_variant(MODIFIER||1061|c.*1061G>T|77|pptA|protein_coding|CODING|b1461||T),downstream_gene_variant(MODIFIER||619|c.*619C>A|297|yddE|protein_coding|CODING|b1464||T),downstream_gene_variant(MODIFIER||1591|c.*1591C>A|226|narV|protein_coding|CODING|b1465||T),downstream_gene_variant(MODIFIER||2268|c.*2268C>A|231|narW|protein_coding|CODING|b1466||T),downstream_gene_variant(MODIFIER||2963|c.*2963C>A|514|narY|protein_coding|CODING|b1467||T),downstream_gene_variant(MODIFIER||4504|c.*4504C>A|1246|narZ|protein_coding|CODING|b1468||T) GT:DP:AD:RO:QR:AO:QA:GL 0:16:16,0:16:583:0:0:0,-52.8004 0:9:7,2:7:249:2:76:0,-15.539 0:46:46,0:46:1675:0:0:0,-151.009 +U00096 1533613 . T C 6.56972E-15 . AB=0;ABP=0;AC=0;AF=0;AN=13;AO=2;CIGAR=1X;DP=489;DPB=489;DPRA=0.0989691;EPP=3.0103;EPPR=70.4686;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=59.9979;NS=13;NUMALT=1;ODDS=41.1942;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=80;QR=18551;RO=487;RPL=0;RPP=7.35324;RPPR=96.7582;RPR=2;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=203;SRP=32.265;SRR=284;TYPE=snp;technology.ILLUMINA=1;EFF=synonymous_variant(LOW|SILENT|ggA/ggG|p.Gly90Gly/c.270A>G|297|yddE|protein_coding|CODING|b1464|1|C),upstream_gene_variant(MODIFIER||1738|c.-1738A>G|189|yddH|protein_coding|CODING|b1462||C),downstream_gene_variant(MODIFIER||3950|c.*3950T>C|350|yncI|protein_coding|CODING|b1459||C|WARNING_TRANSCRIPT_NO_START_CODON),downstream_gene_variant(MODIFIER||2637|c.*2637T>C|378|ydcC|protein_coding|CODING|b1460||C),downstream_gene_variant(MODIFIER||2304|c.*2304T>C|77|pptA|protein_coding|CODING|b1461||C),downstream_gene_variant(MODIFIER||720|c.*720T>C|281|nhoA|protein_coding|CODING|b1463||C),downstream_gene_variant(MODIFIER||348|c.*348A>G|226|narV|protein_coding|CODING|b1465||C),downstream_gene_variant(MODIFIER||1025|c.*1025A>G|231|narW|protein_coding|CODING|b1466||C),downstream_gene_variant(MODIFIER||1720|c.*1720A>G|514|narY|protein_coding|CODING|b1467||C),downstream_gene_variant(MODIFIER||3261|c.*3261A>G|1246|narZ|protein_coding|CODING|b1468||C) GT:DP:AD:RO:QR:AO:QA:GL 0:26:26,0:26:1013:0:0:0,-91.465 0:11:11,0:11:417:0:0:0,-37.8758 0:58:58,0:58:2210:0:0:0,-199.104
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/extFields_test3_out.vcf Wed Apr 18 07:28:51 2018 -0400 @@ -0,0 +1,129 @@ +CHROM POS ID REF ALT FILTER ANN[*].EFFECT +U00096 1518079 T G synonymous_variant +U00096 1518079 T G upstream_gene_variant +U00096 1518079 T G upstream_gene_variant +U00096 1518079 T G upstream_gene_variant +U00096 1518079 T G upstream_gene_variant +U00096 1518079 T G upstream_gene_variant +U00096 1518079 T G downstream_gene_variant +U00096 1518079 T G downstream_gene_variant +U00096 1518079 T G downstream_gene_variant +U00096 1518079 T G downstream_gene_variant +U00096 1518079 T G downstream_gene_variant +U00096 1518079 T G downstream_gene_variant +U00096 1518212 T A upstream_gene_variant +U00096 1518212 T A upstream_gene_variant +U00096 1518212 T A upstream_gene_variant +U00096 1518212 T A upstream_gene_variant +U00096 1518212 T A upstream_gene_variant +U00096 1518212 T A downstream_gene_variant +U00096 1518212 T A downstream_gene_variant +U00096 1518212 T A downstream_gene_variant +U00096 1518212 T A downstream_gene_variant +U00096 1518212 T A downstream_gene_variant +U00096 1518212 T A downstream_gene_variant +U00096 1518212 T A downstream_gene_variant +U00096 1518212 T A intergenic_region +U00096 1523784 G A missense_variant +U00096 1523784 G A upstream_gene_variant +U00096 1523784 G A upstream_gene_variant +U00096 1523784 G A upstream_gene_variant +U00096 1523784 G A upstream_gene_variant +U00096 1523784 G A upstream_gene_variant +U00096 1523784 G A upstream_gene_variant +U00096 1523784 G A downstream_gene_variant +U00096 1523784 G A downstream_gene_variant +U00096 1524994 CTTT CT frameshift_variant +U00096 1524994 CTTT CT upstream_gene_variant +U00096 1524994 CTTT CT upstream_gene_variant +U00096 1524994 CTTT CT upstream_gene_variant +U00096 1524994 CTTT CT upstream_gene_variant +U00096 1524994 CTTT CT upstream_gene_variant +U00096 1524994 CTTT CT upstream_gene_variant +U00096 1524994 CTTT CT downstream_gene_variant +U00096 1524994 CTTT CT downstream_gene_variant +U00096 1525214 T A upstream_gene_variant +U00096 1525214 T A upstream_gene_variant +U00096 1525214 T A upstream_gene_variant +U00096 1525214 T A upstream_gene_variant +U00096 1525214 T A upstream_gene_variant +U00096 1525214 T A upstream_gene_variant +U00096 1525214 T A downstream_gene_variant +U00096 1525214 T A downstream_gene_variant +U00096 1525214 T A downstream_gene_variant +U00096 1525214 T A intergenic_region +U00096 1525377 AGG AGGG,AGT upstream_gene_variant +U00096 1525377 AGG AGGG,AGT upstream_gene_variant +U00096 1525377 AGG AGGG,AGT upstream_gene_variant +U00096 1525377 AGG AGGG,AGT upstream_gene_variant +U00096 1525377 AGG AGGG,AGT upstream_gene_variant +U00096 1525377 AGG AGGG,AGT upstream_gene_variant +U00096 1525377 AGG AGGG,AGT upstream_gene_variant +U00096 1525377 AGG AGGG,AGT upstream_gene_variant +U00096 1525377 AGG AGGG,AGT upstream_gene_variant +U00096 1525377 AGG AGGG,AGT upstream_gene_variant +U00096 1525377 AGG AGGG,AGT upstream_gene_variant +U00096 1525377 AGG AGGG,AGT upstream_gene_variant +U00096 1525377 AGG AGGG,AGT downstream_gene_variant +U00096 1525377 AGG AGGG,AGT downstream_gene_variant +U00096 1525377 AGG AGGG,AGT downstream_gene_variant +U00096 1525377 AGG AGGG,AGT downstream_gene_variant +U00096 1525377 AGG AGGG,AGT downstream_gene_variant +U00096 1525377 AGG AGGG,AGT downstream_gene_variant +U00096 1525377 AGG AGGG,AGT intergenic_region +U00096 1525377 AGG AGGG,AGT intergenic_region +U00096 1527079 A G missense_variant +U00096 1527079 A G upstream_gene_variant +U00096 1527079 A G upstream_gene_variant +U00096 1527079 A G upstream_gene_variant +U00096 1527079 A G upstream_gene_variant +U00096 1527079 A G upstream_gene_variant +U00096 1527079 A G upstream_gene_variant +U00096 1527079 A G downstream_gene_variant +U00096 1527079 A G downstream_gene_variant +U00096 1527079 A G downstream_gene_variant +U00096 1527079 A G downstream_gene_variant +U00096 1527291 G T missense_variant +U00096 1527291 G T upstream_gene_variant +U00096 1527291 G T upstream_gene_variant +U00096 1527291 G T upstream_gene_variant +U00096 1527291 G T upstream_gene_variant +U00096 1527291 G T upstream_gene_variant +U00096 1527291 G T upstream_gene_variant +U00096 1527291 G T downstream_gene_variant +U00096 1527291 G T downstream_gene_variant +U00096 1527291 G T downstream_gene_variant +U00096 1527291 G T downstream_gene_variant +U00096 1527328 A C missense_variant +U00096 1527328 A C upstream_gene_variant +U00096 1527328 A C upstream_gene_variant +U00096 1527328 A C upstream_gene_variant +U00096 1527328 A C upstream_gene_variant +U00096 1527328 A C upstream_gene_variant +U00096 1527328 A C upstream_gene_variant +U00096 1527328 A C downstream_gene_variant +U00096 1527328 A C downstream_gene_variant +U00096 1527328 A C downstream_gene_variant +U00096 1527328 A C downstream_gene_variant +U00096 1532370 G T missense_variant +U00096 1532370 G T upstream_gene_variant +U00096 1532370 G T downstream_gene_variant +U00096 1532370 G T downstream_gene_variant +U00096 1532370 G T downstream_gene_variant +U00096 1532370 G T downstream_gene_variant +U00096 1532370 G T downstream_gene_variant +U00096 1532370 G T downstream_gene_variant +U00096 1532370 G T downstream_gene_variant +U00096 1532370 G T downstream_gene_variant +U00096 1532370 G T downstream_gene_variant +U00096 1532370 G T downstream_gene_variant +U00096 1533613 T C synonymous_variant +U00096 1533613 T C upstream_gene_variant +U00096 1533613 T C downstream_gene_variant +U00096 1533613 T C downstream_gene_variant +U00096 1533613 T C downstream_gene_variant +U00096 1533613 T C downstream_gene_variant +U00096 1533613 T C downstream_gene_variant +U00096 1533613 T C downstream_gene_variant +U00096 1533613 T C downstream_gene_variant +U00096 1533613 T C downstream_gene_variant