Mercurial > repos > iuc > snpeff
changeset 29:ca2b512e8d7c draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
author | iuc |
---|---|
date | Mon, 21 Oct 2024 13:56:15 +0000 |
parents | 6322be79bd8e |
children | c7275bd8b4d6 |
files | gbk2fa.py snpEff.xml snpEff_create_db.xml snpEff_databases.xml snpEff_download.xml snpEff_macros.xml snpeff_get_chr_names.xml test-data/input.vcf |
diffstat | 8 files changed, 92 insertions(+), 149 deletions(-) [+] |
line wrap: on
line diff
--- a/gbk2fa.py Sat Sep 28 16:27:56 2024 +0000 +++ b/gbk2fa.py Mon Oct 21 13:56:15 2024 +0000 @@ -6,6 +6,9 @@ def get_opener(gbk_filename): + """Determines the appropriate opener for a given file, supporting + bzip2, gzip, or standard open. + """ try: bz2.open(gbk_filename).read(1) return bz2.open @@ -18,30 +21,38 @@ return open -parser = argparse.ArgumentParser() -parser.add_argument( - "genbank_file", - help="GenBank input file. Can be compressed with gzip or bzip2" -) -parser.add_argument( - "fasta_file", help="FASTA output datset" -) -parser.add_argument( - "--remove_version", action="store_true", - help="Remove version number from NCBI form formatted accession numbers. " - "For example, this would convert 'B000657.2' to 'B000657'" -) -args = parser.parse_args() +def main(): + parser = argparse.ArgumentParser( + description="Convert GenBank files to FASTA format. " + "Supports gzip and bzip2 compressed files." + ) + parser.add_argument( + "genbank_file", + help="GenBank input file. Can be compressed with gzip or bzip2" + ) + parser.add_argument( + "fasta_file", + help="FASTA output dataset" + ) + parser.add_argument( + "--remove_version", action="store_true", + help="Remove version number from NCBI formatted accession numbers. " + "For example, this converts 'B000657.2' to 'B000657'." + ) + args = parser.parse_args() + + gbk_open = get_opener(args.genbank_file) + with gbk_open(args.genbank_file, 'rt') as input_handle, \ + open(args.fasta_file, 'w') as output_handle: + for seq_record in SeqIO.parse(input_handle, 'genbank'): + if args.remove_version: + seq_id = seq_record.id.split('.')[0] + else: + seq_id = seq_record.id + print(f'Writing FASTA record: {seq_id}') + output_handle.write(f'>{seq_id}\n') + output_handle.write(f'{seq_record.seq}\n') -gbk_open = get_opener(args.genbank_file) -with gbk_open(args.genbank_file, 'rt') as input_handle, \ - open(args.fasta_file, 'w') as output_handle: - for seq_record in SeqIO.parse(input_handle, 'genbank'): - if args.remove_version: - seq_id = seq_record.id.split('.')[0] - else: - seq_id = seq_record.id - print('Writing FASTA record: {}'.format(seq_id)) - print('>' + seq_id, file=output_handle) - print(seq_record.seq, file=output_handle) +if __name__ == "__main__": + main()
--- a/snpEff.xml Sat Sep 28 16:27:56 2024 +0000 +++ b/snpEff.xml Mon Oct 21 13:56:15 2024 +0000 @@ -1,4 +1,4 @@ -<tool id="snpEff" name="SnpEff eff:" version="@WRAPPER_VERSION@.galaxy2"> +<tool id="snpEff" name="SnpEff eff:" version="@SNPEFF_VERSION@+galaxy@WRAPPER_VERSION@" profile="23.0"> <description> annotate variants</description> <macros> <import>snpEff_macros.xml</import> @@ -13,7 +13,7 @@ ln -s '${intervals}' intervals.bed && #end if snpEff @JAVA_OPTIONS@ eff - -i $inputFormat -o ${outputConditional.outputFormat} -upDownStreamLen $udLength + -i $inputFormat -o ${outputFormat} -upDownStreamLen $udLength #if $spliceSiteSize and str($spliceSiteSize) != '': -spliceSiteSize "$spliceSiteSize" #end if @@ -53,9 +53,6 @@ #if $csvStats: -csvStats '$csvFile' #end if - #if str($offset) != 'default': - ${offset} - #end if #if str($chr).strip() != '': -chr '$chr' #end if @@ -103,35 +100,21 @@ mkdir '$statsFile.files_path' && mv '$genes_file' '#echo os.path.join($statsFile.files_path, $genes_file_name)#' #end if - #if $outputConditional.outputFormat == 'gatk' and $outputConditional.gatk_v1 - && - ## Replace real SnpEff version with 2.0.5 to prevent this GATK 1.x error: "The version of SnpEff used to generate the SnpEff input file (x.x) is not currently supported by the GATK. Supported versions are: [2.0.5]" - sed -i.bak -e 's/^\#\#SnpEffVersion="\(\S*\s\)/\#\#SnpEffVersion="2.0.5 - real is \1/' '$snpeff_output' - #end if ]]></command> <inputs> <param name="input" type="data" format="vcf,bed" label="Sequence changes (SNPs, MNPs, InDels)"/> - - <param name="inputFormat" type="select" label="Input format"> + <param argument="-i" name="inputFormat" type="select" label="Input format" help="Specify the format of input dataset(s)"> <option value="vcf" selected="true">VCF</option> - <option value="bed">BED (Deprecated)</option> + <option value="bed">BED</option> </param> - - <conditional name="outputConditional"> - <param name="outputFormat" type="select" label="Output format"> - <option value="vcf" selected="true">VCF (only if input is VCF)</option> - <option value="gatk">GATK-compatible VCF (only if input is VCF)</option> - <option value="bed">BED</option> - <option value="bedAnn">BED annotations</option> - </param> - <when value="vcf" /> - <when value="gatk"> - <param name="gatk_v1" type="boolean" checked="true" label="Compatible with GATK 1.x" /> - </when> - <when value="bed" /> - <when value="bedAnn" /> - </conditional> - <param name="csvStats" type="boolean" truevalue="-csvStats" falsevalue="" checked="false" label="Create CSV report, useful for downstream analysis (-csvStats)" /> + <param argument="-o" name="outputFormat" type="select" label="Output format" help="Specify output format"> + <option value="vcf" selected="true">VCF (only if input is VCF)</option> + <option value="gatk">GATK-compatible VCF (only if input is VCF)</option> + <option value="bed">BED</option> + <option value="bedAnn">BED annotations</option> + </param> + <param argument="-csvStats" type="boolean" truevalue="-csvStats" falsevalue="" checked="false" label="Create CSV report?" help="Useful for downstream analyses and report generation" /> + <param argument="-noStats" name="generate_stats" type="boolean" truevalue="" falsevalue="-noStats" checked="true" label="Produce Summary Stats?" help="Generates an HTML summary of results"/> <conditional name="snpDb"> <param name="genomeSrc" type="select" label="Genome source"> <!-- These options are referenced in the help section of SnpEff download tool. If you change them, change help of SnpEff download as well --> @@ -171,8 +154,7 @@ </section> </when> <when value="named"> - <param name="genome_version" type="text" value="" label="Snpff Genome Version Name (e.g. GRCh38.86)"> - <help>@SNPEFF_DATABASE_URL@</help> + <param name="genome_version" type="text" value="" label="Snpff Genome Version Name (e.g. GRCh38.86)" help="A list of databases can be obtained with 'snpEff download' tool"> <validator type="empty_field" message="A genome version name is required" /> </param> </when> @@ -209,18 +191,16 @@ </param> </when> </conditional> - <param name="udLength" argument="-ud" type="select" label="Upstream / Downstream length"> - <option value="0">No upstream / downstream intervals (0 bases)</option> + <option value="0" selected="true">No upstream / downstream intervals (0 bases)</option> <option value="200">200 bases</option> <option value="500">500 bases</option> <option value="1000">1000 bases</option> <option value="2000">2000 bases</option> - <option value="5000" selected="true">5000 bases</option> + <option value="5000">5000 bases</option> <option value="10000">10000 bases</option> <option value="20000">20000 bases</option> </param> - <param name="spliceSiteSize" argument="-ss" type="select" optional="true" label="Set size for splice sites (donor and acceptor) in bases"> <option value="1">1 base</option> <option value="2" selected="true">2 bases</option> @@ -232,7 +212,6 @@ <option value="8">8 bases</option> <option value="9">9 bases</option> </param> - <conditional name="spliceRegion"> <param name="setSpliceRegions" type="select" label="spliceRegion Settings"> <option value="no">Use Defaults</option> @@ -245,7 +224,6 @@ <param argument="-spliceRegionIntronMax" type="integer" value="" min="1" max="10" optional="true" label="Set maximum number of bases for splice site region within intron. Default: 8 bases" /> </when> </conditional> - <param name="annotations" type="select" display="checkboxes" multiple="true" label="Annotation options"> <option value="-formatEff">Use 'EFF' field compatible with older versions (instead of 'ANN')</option> <option value="-classic">Use Classic Effect names and amino acid variant annotations (NON_SYNONYMOUS_CODING vs missense_variant and G180R vs p.Gly180Arg/c.538G>C)</option> @@ -334,12 +312,6 @@ </param> </when> </conditional> - - <param name="offset" type="select" display="radio" label="Chromosomal position"> - <option value="default" selected="true">Use default (based on input type)</option> - <option value="-0">Force zero-based positions (both input and output)</option> - <option value="-1">Force one-based positions (both input and output)</option> - </param> <param argument="-chr" type="text" label="Text to prepend to chromosome name"> <help> By default SnpEff simplifies all chromosome names. For instance 'chr1' is just '1'. @@ -347,7 +319,6 @@ </help> <validator type="regex" message="No whitespace allowed">^\S*$</validator> </param> - <param name="generate_stats" argument="-noStats" type="boolean" truevalue="" falsevalue="-noStats" checked="true" label="Produce Summary Stats" /> <param argument="-noLog" type="boolean" truevalue="-noLog" falsevalue="" checked="true" label="Suppress reporting usage statistics to server" /> </inputs> <outputs> @@ -375,8 +346,8 @@ <param name="generate_stats" value="true"/> <output name="snpeff_output"> <assert_contents> - <has_text_matching expression="KJ660346\t572\t.*missense_variant" /> - <has_text_matching expression="KJ660346\t1024\t.*synonymous_variant" /> + <has_text_matching expression="KJ660346.1\t572\t.*missense_variant" /> + <has_text_matching expression="KJ660346.1\t1024\t.*synonymous_variant" /> </assert_contents> </output> <output name="statsFile"> @@ -398,13 +369,13 @@ <param name="csvStats" value="true"/> <output name="snpeff_output"> <assert_contents> - <has_text_matching expression="KJ660346\t572\t.*missense_variant" /> - <has_text_matching expression="KJ660346\t1024\t.*synonymous_variant" /> + <has_text_matching expression="KJ660346.1\t572\t.*missense_variant" /> + <has_text_matching expression="KJ660346.1\t1024\t.*synonymous_variant" /> </assert_contents> </output> <output name="csvFile"> <assert_contents> - <has_n_lines n="185"/> + <has_n_lines n="134"/> <has_n_columns n="1" sep=","/> </assert_contents> </output>
--- a/snpEff_create_db.xml Sat Sep 28 16:27:56 2024 +0000 +++ b/snpEff_create_db.xml Mon Oct 21 13:56:15 2024 +0000 @@ -1,11 +1,11 @@ -<tool id="snpEff_build_gb" name="SnpEff build:" version="@WRAPPER_VERSION@.galaxy6" profile="22.01"> +<tool id="snpEff_build_gb" name="SnpEff build:" version="@SNPEFF_VERSION@+galaxy@WRAPPER_VERSION@" profile="23.0"> <description> database from Genbank or GFF record</description> <macros> <import>snpEff_macros.xml</import> </macros> <requirements> <expand macro="requirement" /> - <requirement type="package" version="1.79">biopython</requirement> + <requirement type="package" version="1.84">biopython</requirement> </requirements> <expand macro="stdio" /> <expand macro="version_command" /> @@ -36,7 +36,7 @@ ln -s '${input_type.input}' 'snpeff_output/${genome_version}/genes.${input_type.input_type_selector}' && #end if - snpEff @JAVA_OPTIONS@ build -v + snpEff @JAVA_OPTIONS@ build -noCheckCds -noCheckProtein -v -configOption '${genome_version}'.genome='${genome_version}' -configOption '${genome_version}'.codonTable='${codon_table}' #if str($input_type.input_type_selector) == "gb": @@ -186,7 +186,7 @@ <help><![CDATA[ **What it does** -This tool uses `"snpEff build -genbank"` or `"snpEff build -gff3"` commands to create a snpEff database. +This tool uses `snpEff build` to create a snpEff database. ------ @@ -201,7 +201,7 @@ .. class:: warningmark - SnpEff errors out on highly fragmented genomes containing multiple scaffolds. This is because a single gene may be split between multiple scaffolds causing SnpEff to crash. If this is happening use GFF route described below. + SnpEff errors out on highly fragmented genomes containing multiple scaffolds. This is because a single gene may be split between multiple scaffolds causing SnpEff to crash. If this is happening use the GFF route described below. -------
--- a/snpEff_databases.xml Sat Sep 28 16:27:56 2024 +0000 +++ b/snpEff_databases.xml Mon Oct 21 13:56:15 2024 +0000 @@ -1,4 +1,4 @@ -<tool id="snpEff_databases" name="SnpEff databases:" version="@WRAPPER_VERSION@.galaxy2"> +<tool id="snpEff_databases" name="SnpEff databases:" version="@SNPEFF_VERSION@+galaxy@WRAPPER_VERSION@" profile="23.0"> <description> list available databases</description> <macros> <import>snpEff_macros.xml</import> @@ -19,6 +19,10 @@ | grep -v '${exclude_pattern}' #end if + #if str($include_download_path) == "no": + | cut -f 1,2,3,4 + #end if + > '${snpeff_dbs}' ]]></command> <inputs> @@ -38,7 +42,10 @@ </valid> </sanitizer> </param> - + <param name="include_download_path" type="select" display="radio" label="Include download paths?" help="When snpEff dumps the list of available databases, it includes their download paths. These are not needed in the Galaxy context."> + <option value="yes">Yes</option> + <option value="no" selected="true">No</option> + </param> </inputs> <outputs> <data name="snpeff_dbs" format="tabular" label="${tool.name} @SNPEFF_VERSION@ available databases" /> @@ -63,12 +70,16 @@ <help><![CDATA[ **What it does** -This tool downloads the master list of snpEff databases from @SNPEFF_DATABASE_URL@. You can then look at this list and decide which database to use for your analysis. For example, if **List entries matching the following expression** parameter of this tool is set to *Mouse* the it will produce a tabular dataset with the following content:: +This tool downloads the master list of snpEff databases from a remote SnpEff repository. You can then look at this list and decide which database to use for your analysis. For example, if **List entries matching the following expression** parameter of this tool is set to *Mouse*, it will produce a tabular dataset with the following content:: - mm10 Mouse http://downloads.sourceforge.net/project/snpeff/databases/v4_3/snpEff_v4_3_mm10.zip - mm9 Mouse http://downloads.sourceforge.net/project/snpeff/databases/v4_3/snpEff_v4_3_mm9.zip + mm10 Mouse + mm39 Mouse + mm9 Mouse -This means that there two available snpEff databases for mouse genome versions mm9 and mm10. In order to download these databases you should use identifier from the first column (e.g., mm9 or mm10 in this case). +This means that there are three available snpEff databases for mouse genome. If you want to use mm39 in you analysis: + + - set **Genome source** option of **SnpEff eff** Galaxy tool to *Download on demand* + - enter 'mm39' into **Snpff Genome Version Name** text box ------- @@ -83,6 +94,7 @@ @SNPEFF_IN_GALAXY_INFO@ @EXTERNAL_DOCUMENTATION@ + ]]></help> <expand macro="citations" /> </tool>
--- a/snpEff_download.xml Sat Sep 28 16:27:56 2024 +0000 +++ b/snpEff_download.xml Mon Oct 21 13:56:15 2024 +0000 @@ -1,4 +1,4 @@ -<tool id="snpEff_download" name="SnpEff download:" version="@WRAPPER_VERSION@.galaxy2"> +<tool id="snpEff_download" name="SnpEff download:" version="@SNPEFF_VERSION@+galaxy@WRAPPER_VERSION@" profile="23.0"> <description> download a pre-built database</description> <macros> <import>snpEff_macros.xml</import> @@ -42,7 +42,7 @@ <help><![CDATA[ **What it does** -This tool downloads a specified database from @SNPEFF_DATABASE_URL@. It deposits it into the history. +This tool downloads a specified database from a remote SnpEff repository. It deposits it into the history. -------
--- a/snpEff_macros.xml Sat Sep 28 16:27:56 2024 +0000 +++ b/snpEff_macros.xml Mon Oct 21 13:56:15 2024 +0000 @@ -1,6 +1,6 @@ <macros> <xml name="requirement"> - <requirement type="package" version="4.3.1t">snpeff</requirement> + <requirement type="package" version="5.2">snpeff</requirement> <yield/> </xml> <xml name="stdio"> @@ -14,9 +14,8 @@ snpEff -version ]]></version_command> </xml> - <token name="@WRAPPER_VERSION@">4.3+T</token> - <token name="@SNPEFF_VERSION@">SnpEff4.3</token> - <token name="@SNPEFF_DATABASE_URL@">https://sourceforge.net/projects/snpeff/files/databases/v4_3/</token> + <token name="@WRAPPER_VERSION@">0</token> + <token name="@SNPEFF_VERSION@">5.2</token> <token name="@JAVA_OPTIONS@">-Xmx\${GALAXY_MEMORY_MB:-8192}m</token> <xml name="ref_select"> <conditional name="reference_source"> @@ -59,7 +58,7 @@ **Download pre-built databases** -SnpEff project generates large numbers of pre-build databases. These are available at @SNPEFF_DATABASE_URL@ and can downloaded. Follow these steps: +SnpEff project generates large numbers of pre-build databases. To obtain and use them follow these steps: #. Use **SnpEff databases** tool to generate a list of existing databases. Note the name of the database you need. #. Use **SnpEff download** tool to download the database.
--- a/snpeff_get_chr_names.xml Sat Sep 28 16:27:56 2024 +0000 +++ b/snpeff_get_chr_names.xml Mon Oct 21 13:56:15 2024 +0000 @@ -1,4 +1,4 @@ -<tool id="snpEff_get_chr_names" name="SnpEff chromosome-info:" version="@WRAPPER_VERSION@.galaxy2"> +<tool id="snpEff_get_chr_names" name="SnpEff chromosome-info:" version="@SNPEFF_VERSION@+galaxy@WRAPPER_VERSION@" profile="23.0"> <description>list chromosome names/lengths</description> <macros> <import>snpEff_macros.xml</import>
--- a/test-data/input.vcf Sat Sep 28 16:27:56 2024 +0000 +++ b/test-data/input.vcf Mon Oct 21 13:56:15 2024 +0000 @@ -2,59 +2,9 @@ ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> ##contig=<ID="KJ660346",length=18959> ##reference=http://www.ncbi.nlm.nih.gov/nuccore/KJ660346.2 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT EBOV_2014_EM095 EBOV_2014_G3676 EBOV_2014_G3680 EBOV_2014_G3686 EBOV_2014_G3687 EBOV_2014_G3683 EBOV_2014_EM096 EBOV_2014_EM098 EBOV_2014_EM104 EBOV_2014_EM106 EBOV_2014_EM112 EBOV_2014_EM113 EBOV_2014_EM124 EBOV_2014_G3677 EBOV_2014_G3682 EBOV_2014_G3707 EBOV_2014_G3713 EBOV_2014_G3724 EBOV_2014_G3734 EBOV_2014_G3735 EBOV_2014_G3750 EBOV_2014_G3758 EBOV_2014_G3764 EBOV_2014_G3769 EBOV_2014_G3782 EBOV_2014_G3786 EBOV_2014_G3788 EBOV_2014_G3796 EBOV_2014_G3798 EBOV_2014_G3799 EBOV_2014_G3800 EBOV_2014_G3805 EBOV_2014_G3807 EBOV_2014_G3810 EBOV_2014_G3820 EBOV_2014_G3838 EBOV_2014_G3840 EBOV_2014_G3841 EBOV_2014_G3848 EBOV_2014_NM042 EBOV_2014_G3850 EBOV_2014_EM110 EBOV_2014_EM111 EBOV_2014_EM119 EBOV_2014_G3729 EBOV_2014_G3765 EBOV_2014_G3770 EBOV_2014_G3789 EBOV_2014_G3825 EBOV_2014_G3845 EBOV_2014_G3851 EBOV_2014_G3857 EBOV_2014_EM115 EBOV_2014_EM120 EBOV_2014_G3752 EBOV_2014_G3795 EBOV_2014_G3808 EBOV_2014_G3823 EBOV_2014_EM121 EBOV_2014_G3771 EBOV_2014_G3816 EBOV_2014_G3829 EBOV_2014_G3846 EBOV_2014_G3856 EBOV_2014_G3826 EBOV_2014_G3827 EBOV_2014_G3809 EBOV_2014_G3814 EBOV_2014_G3821 EBOV_2014_G3822 EBOV_2014_G3679 EBOV_2014_G3819 EBOV_2014_G3817 EBOV_2014_G3834 EBOV_2014_G3818 EBOV_2014_G3787 EBOV_2014_G3831 EBOV_2014_G3670 EBOV_2014_KJ660346 EBOV_2014_KJ660347 EBOV_2014_KJ660348 -KJ660346 572 . A G . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 800 . C T . . . GT 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 -KJ660346 1024 . A C . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 1288 . A T . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 -KJ660346 1492 . A G . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 1849 . C T . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 -KJ660346 2124 . A G . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 -KJ660346 2185 . G A . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 -KJ660346 2341 . A G . . . GT 0 0 0 0 . 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 -KJ660346 2364 . A G . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 2497 . A G . . . GT 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 2931 . G A . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 -KJ660346 3116 . C G . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 3388 . T G . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 -KJ660346 3638 . A G . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 -KJ660346 4340 . C T . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 -KJ660346 4505 . T C . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 4709 . T C . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 4759 . T C . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 4976 . C A . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 5461 . T C . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 6175 . G A . . . GT 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 6283 . T C . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 -KJ660346 6909 . T A . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 -KJ660346 8280 . A G . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 -KJ660346 8928 . A C . . . GT 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 -KJ660346 9390 . A C . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 9536 . A G . . . GT 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 9923 . T C . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 -KJ660346 10005 . G A . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 -KJ660346 10218 . G A . . . GT 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 1 1 1 0 1 1 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 0 0 0 0 -KJ660346 10252 . A G . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 -KJ660346 10268 . T C . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 10509 . C T . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 10743 . T C . . . GT 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 10801 . A G . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 11142 . G A . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 11811 . T C . . . GT 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -KJ660346 11943 . G A . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 -KJ660346 12878 . G A . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 12885 . A C . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 . . . -KJ660346 13856 . G A . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 -KJ660346 13923 . T C . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 -KJ660346 14019 . T C . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 -KJ660346 14232 . C T . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 15599 . G A . . . GT 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -KJ660346 15660 . C T . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 -KJ660346 15963 . G A . . . GT 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 -KJ660346 16054 . T A . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 -KJ660346 16455 . T C . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 -KJ660346 16750 . T C . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 17142 . T C . . . GT 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 -KJ660346 17985 . T C . . . GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -KJ660346 18412 . T C . . . GT 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 -KJ660346 18895 . C T . . . GT 0 0 0 0 0 0 0 . 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 . 0 0 0 0 0 0 0 0 . 0 0 0 0 . 0 . 0 0 0 . 0 0 0 0 0 0 0 0 0 0 0 0 0 0 . 0 0 0 0 0 0 0 0 0 0 0 1 1 1 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT EBOV_2014_EM095 EBOV_2014_G3676 +KJ660346.1 572 . A G . . . GT 0 0 +KJ660346.1 800 . C T . . . GT 0 0 +KJ660346.1 1024 . A C . . . GT 0 0 +KJ660346.1 1288 . A T . . . GT 0 0 +KJ660346.1 1492 . A G . . . GT 0 0