Galaxy |

Changeset 0:e8adfc4c0a6b (2013-12-11)

Next changeset 1:500832f27cbc (2015-01-22)

Commit message:
Uploaded

added:
data_manager/data_manager_snpEff_databases.py
data_manager/data_manager_snpEff_databases.xml
data_manager/data_manager_snpEff_download.py
data_manager/data_manager_snpEff_download.xml
data_manager_conf.xml
datatypes_conf.xml
lib/galaxy/datatypes/snpeff.py
lib/galaxy/datatypes/snpeff.pyc
readme.rst
snpEff.xml
snpEff_download.xml
snpEff_macros.xml
snpSift_annotate.xml
snpSift_caseControl.xml
snpSift_filter.xml
snpSift_int.xml
test-data/annotate_1.vcf
test-data/annotate_5.vcf
test-data/db_test_1.vcf
test-data/interval.bed
test-data/test.private.01.vcf
test-data/test.private.02.vcf
test-data/test01.vcf
test-data/vcf_homhet.vcf
tool-data/snpeff_annotations.loc.sample
tool-data/snpeff_databases.loc.sample
tool-data/snpeff_genomedb.loc.sample
tool-data/snpeff_regulationdb.loc.sample
tool_data_table_conf.xml.sample
tool_dependencies.xml

diff -r 000000000000 -r e8adfc4c0a6b data_manager/data_manager_snpEff_databases.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_snpEff_databases.py Wed Dec 11 08:53:32 2013 -0500

[

@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import re
+import tempfile
+import subprocess
+import fileinput
+import shutil
+import optparse
+import urllib2
+from ftplib import FTP
+import tarfile
+
+from galaxy.util.json import from_json_string, to_json_string
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit(1)
+
+def fetch_databases(data_manager_dict, target_directory, jar_path):
+    (snpEff_dir,snpEff_jar) = os.path.split(jar_path)
+    if not os.path.exists(target_directory):
+        os.makedirs(target_directory)
+    databases_path = os.path.join( target_directory, 'databases.out' )
+    databases_output = open(databases_path,'w')
+    args = [ 'java','-jar', ]
+    args.append( snpEff_jar )
+    args.append( 'databases' )
+    # tmp_stderr = tempfile.NamedTemporaryFile( prefix = "tmp-data-manager-snpEff-stderr" )
+    # databases_output = open(databases_path)
+    # proc = subprocess.Popen( args=args, shell=False, cwd=snpEff_dir, stdout=databases_output.fileno(), stderr=tmp_stderr.fileno() )
+    proc = subprocess.Popen( args=args, shell=False, cwd=snpEff_dir, stdout=databases_output.fileno() )
+    return_code = proc.wait()
+    if return_code:
+        sys.exit( return_code )
+    databases_output.close()
+    try:
+        data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
+        data_manager_dict['data_tables']['snpeff_databases'] = data_manager_dict['data_tables'].get( 'snpeff_databases', [] )
+        data_table_entries = []
+        fh = open(databases_path,'r')
+        for i,line in enumerate(fh):
+            fields = line.split('\t')
+            if len(fields) >= 2:
+                genome_version = fields[0].strip()
+                if genome_version.startswith("Genome") or genome_version.startswith("-"):
+                    continue
+                #snpeff test genome
+                if genome_version == '30c2c903' or fields[1].strip() == 'TestCase' or fields[1].strip().startswith('Test_'):
+                    continue
+                description = fields[1].strip() + ' : ' + genome_version
+                data_table_entries.append(dict(value=genome_version, name=description))
+        data_manager_dict['data_tables']['snpeff_databases'] = data_table_entries
+    except Exception, e:
+        stop_err( 'Error parsing %s %s\n' % (config,str( e )) )
+    else:
+        fh.close()
+    return data_manager_dict
+
+def main():
+    #Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option( '-j', '--jar_path', dest='jar_path', action='store', type="string", default=None, help='snpEff.jar path' )
+    (options, args) = parser.parse_args()
+
+    filename = args[0]
+
+    params = from_json_string( open( filename ).read() )
+    target_directory = params[ 'output_data' ][0]['extra_files_path']
+    os.mkdir( target_directory )
+    data_manager_dict = {}
+
+
+    #Create Defuse Reference Data
+    data_manager_dict = fetch_databases( data_manager_dict, target_directory, options.jar_path)
+
+    #save info to json file
+    open( filename, 'wb' ).write( to_json_string( data_manager_dict ) )
+
+if __name__ == "__main__": main()
+

diff -r 000000000000 -r e8adfc4c0a6b data_manager/data_manager_snpEff_databases.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_snpEff_databases.xml Wed Dec 11 08:53:32 2013 -0500

[

@@ -0,0 +1,41 @@
+<tool id="data_manager_snpeff_databases" name="SnpEff Databases" version="3.4" tool_type="manage_data">
+ <description>Read the list of available snpEff databases</description>
+ <requirements>
+ <requirement type="package" version="3.4">snpEff</requirement>
+ </requirements>
+ <command interpreter="python">
+        data_manager_snpEff_databases.py --jar_path \$SNPEFF_JAR_PATH/snpEff.jar "$out_file"
+        </command>
+ <inputs>
+ </inputs>
+ <outputs>
+           <data name="out_file" format="data_manager_json"/>
+ </outputs>
+        <stdio>
+          <exit_code range=":-1"  level="fatal"   description="Error: Cannot open file" />
+          <exit_code range="1:"  level="fatal"   description="Error" />
+        </stdio>
+        <tests>
+            <test>
+                <output name="out_file">
+                    <assert_contents>
+                        
+                        <has_text text="GRCh37.72" />
+                    </assert_contents>
+                </output>
+            </test>
+        </tests>
+ <help>
+
+This tool updatess the list of SnpEff databases for the SnpEff Download data manager.
+It should only need to be run once for a snpEff version,
+since it populates the SnpEff Download data manager from the snpEff config file.
+
+For information about snpEff:    http://snpEff.sourceforge.net
+
+Please cite:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+
+ </help>
+</tool>
+

diff -r 000000000000 -r e8adfc4c0a6b data_manager/data_manager_snpEff_download.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_snpEff_download.py Wed Dec 11 08:53:32 2013 -0500

[

@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import re
+import tempfile
+import subprocess
+import fileinput
+import shutil
+import optparse
+import urllib2
+from ftplib import FTP
+import tarfile
+
+from galaxy.util.json import from_json_string, to_json_string
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit(1)
+
+"""
+# Download human database 'hg19'
+java -jar snpEff.jar download -v hg19
+
+        <command>java -jar \$SNPEFF_JAR_PATH/snpEff.jar download -c \$JAVA_JAR_PATH/snpEff.config $genomeVersion > $logfile </command>
+
+snpEffectPredictor.bin
+regulation_HeLa-S3.bin
+regulation_pattern = 'regulation_(.+).bin'
+
+
+"""
+def download_database(data_manager_dict, target_directory, jar_path,config,genome_version,organism):
+    ## get data_dir from config
+    ##---
+    ## Databases are stored here
+    ## E.g.: Information for 'hg19' is stored in data_dir/hg19/
+    ##
+    ## Note: Since version 2.1 you can use tilde ('~') as first character to refer to your home directory
+    ##---
+    #data_dir = ~/snpEff/data/
+    data_dir = target_directory
+    (snpEff_dir,snpEff_jar) = os.path.split(jar_path)
+    args = [ 'java','-jar' ]
+    args.append( jar_path )
+    args.append( 'download' )
+    args.append( '-c' )
+    args.append( config )
+    args.append( '-dataDir' )
+    args.append( data_dir )
+    args.append( '-v' )
+    args.append( genome_version )
+    proc = subprocess.Popen( args=args, shell=False, cwd=snpEff_dir )
+    return_code = proc.wait()
+    if return_code:
+        sys.exit( return_code )
+    ## search data_dir/genome_version for files
+    regulation_pattern = 'regulation_(.+).bin'
+    #  annotation files that are included in snpEff by a flag
+    annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'}
+    genome_path = os.path.join(data_dir,genome_version)
+    if os.path.isdir(genome_path):
+        for root, dirs, files in os.walk(genome_path):
+            for fname in files:
+                if fname.startswith('snpEffectPredictor'):
+                    # if snpEffectPredictor.bin download succeeded
+                    name = genome_version + (' : ' + organism if organism else '')
+                    data_table_entry = dict(value=genome_version, name=name, path=data_dir)
+                    _add_data_table_entry( data_manager_dict, 'snpeff_genomedb', data_table_entry )
+                else:
+                    m = re.match(regulation_pattern,fname)
+                    if m:
+                        name = m.groups()[0]
+                        data_table_entry = dict(genome=genome_version,value=name, name=name)
+                        _add_data_table_entry( data_manager_dict, 'snpeff_regulationdb', data_table_entry )
+                    elif fname in annotations_dict:
+                        value = annotations_dict[fname]
+                        name = value.lstrip('-')
+                        data_table_entry = dict(genome=genome_version,value=value, name=name)
+                        _add_data_table_entry( data_manager_dict, 'snpeff_annotations', data_table_entry )
+    return data_manager_dict
+
+def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ):
+    data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
+    data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get( data_table, [] )
+    data_manager_dict['data_tables'][data_table].append( data_table_entry )
+    return data_manager_dict
+
+def main():
+    #Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option( '-j', '--jar_path', dest='jar_path', action='store', type="string", default=None, help='snpEff.jar path' )
+    parser.add_option( '-c', '--config', dest='config', action='store', type="string", default=None, help='snpEff.config path' )
+    parser.add_option( '-g', '--genome_version', dest='genome_version', action='store', type="string", default=None, help='genome_version' )
+    parser.add_option( '-o', '--organism', dest='organism', action='store', type="string", default=None, help='organism name' )
+    (options, args) = parser.parse_args()
+
+    filename = args[0]
+
+    params = from_json_string( open( filename ).read() )
+    target_directory = params[ 'output_data' ][0]['extra_files_path']
+    os.mkdir( target_directory )
+    data_manager_dict = {}
+
+
+    #Create SnpEff Reference Data
+    for genome_version, organism in zip(options.genome_version.split(','), options.organism.split(',')):
+        download_database( data_manager_dict, target_directory, options.jar_path, options.config, genome_version, organism )
+
+    #save info to json file
+    open( filename, 'wb' ).write( to_json_string( data_manager_dict ) )
+
+if __name__ == "__main__": main()
+

diff -r 000000000000 -r e8adfc4c0a6b data_manager/data_manager_snpEff_download.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_snpEff_download.xml Wed Dec 11 08:53:32 2013 -0500

[

@@ -0,0 +1,51 @@
+<tool id="data_manager_snpeff_download" name="SnpEff Download" version="3.4" tool_type="manage_data">
+    <description>Download a new database</description>
+    <requirements>
+        <requirement type="package" version="3.4">snpEff</requirement>
+    </requirements>
+    <command interpreter="python">
+        data_manager_snpEff_download.py --jar_path \$SNPEFF_JAR_PATH/snpEff.jar --config \$SNPEFF_JAR_PATH/snpEff.config
+        --genome_version "${genome_databases.fields.value}"
+        --organism "${genome_databases.fields.name}"
+        "$out_file"
+        </command>
+    <inputs>
+        <param name="genome_databases" type="select" display="checkboxes" multiple="true" label="Genome Version">
+            <options from_data_table="snpeff_databases">
+                <filter type="sort_by" column="0" />
+            </options>
+        </param>
+    </inputs>
+
+    <outputs>
+           <data name="out_file" format="data_manager_json" label="${tool.name} : ${genome_databases.fields.value}"/>
+    </outputs>
+    <stdio>
+        <exit_code range=":-1"  level="fatal"   description="Error: Cannot open file" />
+        <exit_code range="1:"  level="fatal"   description="Error" />
+    </stdio>
+    <tests>
+        <test>
+            <param name="genome_databases" value="GRCh37.71"/>
+            <output name="out_file">
+                <assert_contents>
+                    
+                    <has_text text="GRCh37.71" />
+                    <has_text text="snpeff_regulationdb" />
+                    <has_text text="snpeff_annotations" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+
+This tool downloads a SnpEff database.
+
+For details about this tool, please go to http://snpEff.sourceforge.net
+
+Please cite:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+
+    </help>
+</tool>
+

diff -r 000000000000 -r e8adfc4c0a6b data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,42 @@
+<?xml version="1.0"?>
+<data_managers>
+  <data_manager tool_file="data_manager/data_manager_snpEff_databases.xml" id="data_manager_snpeff_databases" >
+    <data_table name="snpeff_databases">  
+      <output> 
+        <column name="value" /> 
+        <column name="name" /> 
+      </output>
+    </data_table>
+  </data_manager>
+  <data_manager tool_file="data_manager/data_manager_snpEff_download.xml" id="data_manager_snpeff_download" >
+    <data_table name="snpeff_genomedb">  
+      <output> 
+        <column name="value" /> 
+        <column name="name" />  
+        <column name="path" output_ref="out_file" >
+          <move type="directory" relativize_symlinks="True">
+            <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">snpEff/data</target>
+          </move>
+          <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/snpEff/data</value_translation>
+          <value_translation type="function">abspath</value_translation>
+        </column>
+      </output>
+    </data_table>
+    <data_table name="snpeff_regulationdb">  
+      <output> 
+        <column name="genome" /> 
+        <column name="value" /> 
+        <column name="name" />  
+      </output>
+    </data_table>
+    <data_table name="snpeff_annotations">  
+      <output> 
+        <column name="genome" /> 
+        <column name="value" /> 
+        <column name="name" />  
+      </output>
+    </data_table>
+  </data_manager>
+</data_managers>
+
+

diff -r 000000000000 -r e8adfc4c0a6b datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,10 @@
+<?xml version="1.0"?>
+<datatypes>
+    <datatype_files>
+        <datatype_file name="snpeff.py"/>
+    </datatype_files>
+    <registration>
+        <datatype extension="snpeffdb" type="galaxy.datatypes.snpeff:SnpEffDb" display_in_upload="True"/>
+    </registration>
+</datatypes>
+

diff -r 000000000000 -r e8adfc4c0a6b lib/galaxy/datatypes/snpeff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/datatypes/snpeff.py Wed Dec 11 08:53:32 2013 -0500

[

@@ -0,0 +1,46 @@
+"""
+SnpEff datatypes
+"""
+import os,os.path,re,sys
+import galaxy.datatypes.data
+from galaxy.datatypes.data import Text
+from galaxy.datatypes.metadata import MetadataElement
+
+class SnpEffDb( Text ):
+    """Class describing an IGV tiled data file (TDF) .tdf  binary file"""
+    file_ext = "snpeffdb"
+    MetadataElement( name="genome_version", default=None, desc="Genome Version", readonly=True, visible=True, no_value=None )
+    MetadataElement( name="regulation", default=[], desc="Regulation Names", readonly=True, visible=True, no_value=[] )
+    MetadataElement( name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[] )
+
+    def __init__( self, **kwd ):
+        Text.__init__( self, **kwd )
+
+    def set_meta( self, dataset, **kwd ):
+        Text.set_meta(self, dataset, **kwd )
+        data_dir = dataset.extra_files_path
+        ## search data_dir/genome_version for files
+        regulation_pattern = 'regulation_(.+).bin'
+        #  annotation files that are included in snpEff by a flag
+        annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'}
+        regulations = []
+        annotations = []
+        if data_dir and os.path.isdir(data_dir):
+            for root, dirs, files in os.walk(data_dir):
+                for fname in files:
+                    if fname.startswith('snpEffectPredictor'):
+                        # if snpEffectPredictor.bin download succeeded
+                        genome_version = os.path.basename(root)
+                        dataset.metadata.genome_version = genome_version
+                    else:
+                        m = re.match(regulation_pattern,fname)
+                        if m:
+                            name = m.groups()[0]
+                            regulations.append(name)
+                        elif fname in annotations_dict:
+                            value = annotations_dict[fname]
+                            name = value.lstrip('-')
+                            annotations.append(name)
+            dataset.metadata.regulation = regulations
+            dataset.metadata.annotation = annotations
+

diff -r 000000000000 -r e8adfc4c0a6b lib/galaxy/datatypes/snpeff.pyc

Binary file lib/galaxy/datatypes/snpeff.pyc has changed

diff -r 000000000000 -r e8adfc4c0a6b readme.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst Wed Dec 11 08:53:32 2013 -0500

[

@@ -0,0 +1,24 @@
+These are galaxy tools for SnpEff a variant annotation and effect prediction tool by Pablo Cingolani.
+It annotates and predicts the effects of variants on genes (such as amino acid changes).
+( http://snpeff.sourceforge.net/ )
+
+This repository contains a tool_dependencies.xml file that will attempt to automatically install SnpEff and SnpSift.
+
+This will use the default location for genome reference downloads from the snpEff.config:
+data_dir = ~/snpEff/data/
+You can manually edit the installed snpEff.config and change the location, or you can create a symbolic link to the desired data location from ~/snpEff.
+
+The genome reference options used by the tools:
+    "SnpEff"  snpEff.xml
+    "SnpEff Download" snpEff_download.xml
+are taken from: tool-data/snpeffect_genomedb.loc
+
+There are 2 datamanagers to download and install prebuilt SnpEff Genome databases:
+  data_manager_snpeff_databases - generates a list of available SnpEff genome databases into the tool-data/snpeff_databases.loc
+  data_manager_snpeff_download - downloads a SnpEff genome database selected from: tool-data/snpeff_databases.loc and adds entries to snpeff_genomedb.loc,snpeff_regulationdb.loc,snpeff_annotations.loc
+
+SnpEff citation:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+
+SnpSift citation:
+"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012.

diff -r 000000000000 -r e8adfc4c0a6b snpEff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpEff.xml Wed Dec 11 08:53:32 2013 -0500

[

b'@@ -0,0 +1,330 @@\n+<tool id="snpEff" name="SnpEff" version="3.4">\n+ <description>Variant effect and annotation</description>\n+ <expand macro="requirements" />\n+ <macros>\n+ <import>snpEff_macros.xml</import>\n+ </macros>\n+ <command>\n+ java -Xmx6G -jar \\$SNPEFF_JAR_PATH/snpEff.jar eff \n+ -c \\$SNPEFF_JAR_PATH/snpEff.config \n+ -i $inputFormat -o $outputFormat -upDownStreamLen $udLength \n+ #if $spliceSiteSize and $spliceSiteSize.__str__ != \'\':\n+ -spliceSiteSize $spliceSiteSize\n+ #end if\n+ #if $filterIn and $filterIn.__str__ != \'no_filter\':\n+ $filterIn \n+ #end if\n+ #if $filterHomHet and $filterHomHet.__str__ != \'no_filter\':\n+ $filterHomHet \n+ #end if\n+ #if $annotations and $annotations.__str__ != \'\':\n+ #echo " "\n+ #echo \' \'.join($annotations.__str__.split(\',\'))\n+ #end if\n+ #if $filterOut and $filterOut.__str__ != \'\':\n+ #echo " "\n+ #echo \' \'.join($filterOut.__str__.split(\',\'))\n+ #end if\n+ #if str( $transcripts ) != \'None\':\n+ -onlyTr $transcripts\n+ #end if\n+ #if str( $intervals ) != \'None\': ### fix this for multiple dataset input\n+ -interval $intervals\n+ #end if\n+ #if $statsFile:\n+ -stats $statsFile \n+ #end if\n+ #if $offset.__str__ != \'\':\n+ ${offset} \n+ #end if\n+ #if $chr.__str__.strip() != \'\':\n+ -chr "$chr" \n+ #end if\n+ $noLog \n+ #if $snpDb.genomeSrc == \'cached\':\n+ -dataDir ${snpDb.genomeVersion.fields.path}\n+ #if $snpDb.extra_annotations and $snpDb.extra_annotations.__str__ != \'\':\n+ #echo " "\n+ #echo \' \'.join($snpDb.extra_annotations.__str__.split(\',\'))\n+ #end if\n+ #if $snpDb.regulation and $snpDb.regulation.__str__ != \'\':\n+ -reg #echo \' -reg \'.join($snpDb.regulation.__str__.split(\',\'))#\n+ #end if\n+ $snpDb.genomeVersion\n+ #elif $snpDb.genomeSrc == \'history\':\n+ -dataDir ${snpDb.snpeff_db.extra_files_path}\n+ #if $snpDb.extra_annotations and $snpDb.extra_annotations.__str__ != \'\':\n+ #set xannotations = [\' \'] + $snpDb.extra_annotations.__str__.split(\',\')\n+ #echo " "\n+ #echo \' -\'.join($xannotations)\n+ #end if\n+ #if $snpDb.regulation and $snpDb.regulation.__str__ != \'\':\n+ -reg #echo \' -reg \'.join($snpDb.regulation.__str__.split(\',\'))#\n+ #end if\n+ ${snpDb.snpeff_db.metadata.genome_version}\n+ #else \n+ -download\n+ $snpDb.genome_version\n+ #end if\n+ $input > $snpeff_output \n+ </command>\n+ <inputs>\n+ <param format="vcf,tabular,pileup,bed" name="input" type="data" label="Sequence changes (SNPs, MNPs, InDels)"/>\n+\n+ <param name="inputFormat" type="select" label="Input format">\n+ <option value="vcf" selected="true">VCF</option>\n+ <option value="txt">Tabular (Deprecated)</option>\n+ <option value="pileup">Pileup (Deprecated)</option>\n+ <option value="bed">BED (Deprecated)</option>\n+ </param>\n+\n+ <param name="outputFormat" type="select" label="Output format">\n+ <option value="vcf" selected="true">VCF (only if input is VCF)</option>\n+ <option value="txt">Tabular</option>\n+ <option value="bed">BED</option>\n+ <option value="bedAnn">BED Annotations</option>\n+ </param>\n+\n+ <conditional name="snpDb">\n+ <param name="genomeSrc" type="select" label="Genome source">\n+ <option value="cached">Locally installed reference genome</option>\n+ <option value="history">Reference genome from your history</option>\n+ <option value="named">Named on demand</option>\n+ </param>\n+ <when value="cached">\n+ <param name="geno'..b' was added in out VCF -->\n+ \n+  \n+  \n+\n+ <test>\n+ <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>\n+ <param name="inputFormat" value="vcf"/>\n+ <param name="outputFormat" value="vcf"/>\n+ <param name="genomeSrc" value="named"/>\n+ <param name="genome_version" value="testCase"/>\n+ <param name="udLength" value="0"/>\n+ <param name="filterHomHet" value="+-het"/>\n+ <param name="filterIn" value="no_filter"/>\n+ \n+ <param name="generate_stats" value="False"/>\n+ <output name="snpeff_output">\n+ <assert_contents>\n+ \n+ <not_has_text text="EFF=NON_SYNONYMOUS_CODING" />\n+ </assert_contents>\n+ </output>\n+ </test>\n+\n+ <test>\n+ <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>\n+ <param name="inputFormat" value="vcf"/>\n+ <param name="outputFormat" value="vcf"/>\n+ <param name="genomeSrc" value="named"/>\n+ <param name="genome_version" value="testCase"/>\n+ <param name="udLength" value="0"/>\n+ <param name="filterHomHet" value="no_filter"/>\n+ <param name="filterIn" value="del"/>\n+ \n+ <param name="generate_stats" value="False"/>\n+ <output name="snpeff_output">\n+ <assert_contents>\n+ \n+ <has_text_matching expression="Y\\t59030478\\t.*EFF=INTERGENIC" />\n+ \n+ <has_text_matching expression="Y\\t59032947\\t.*SF=5\\tGT" />\n+ </assert_contents>\n+ </output>\n+ </test>\n+\n+ \n+ \n+\n+ </tests>\n+ <help>\n+\n+This tool calculate the effect of variants (SNPs/MNPs/Insertions) and deletions.\n+\n+@EXTERNAL_DOCUMENTATION@\n+\n+@CITATION_SECTION@\n+\n+ </help>\n+</tool>\n+\n'

diff -r 000000000000 -r e8adfc4c0a6b snpEff_download.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpEff_download.xml Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,31 @@
+<tool id="snpEff_download" name="SnpEff Download" version="3.4">
+    <description>Download a new database</description>
+    <expand macro="requirements" />
+    <macros>
+        <import>snpEff_macros.xml</import>
+    </macros>
+    <command>
+    echo $genomeVersion > $snpeff_db;
+    java -jar \$SNPEFF_JAR_PATH/snpEff.jar download -c \$SNPEFF_JAR_PATH/snpEff.config -dataDir $snpeff_db.extra_files_path -v $genomeVersion > $logfile
+    </command>
+    <inputs>
+        <param name="genomeVersion" type="select" label="Select the genome version you want to download">
+            <options from_data_table="snpeff_databases">
+                <filter type="sort_by" column="0" />
+            </options>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="txt" name="logfile" />
+        <data format="snpeffdb" name="snpeff_db" label="${genomeVersion}" />
+    </outputs>
+    <expand macro="stdio" />
+    <help>
+
+@EXTERNAL_DOCUMENTATION@
+
+@CITATION_SECTION@
+
+    </help>
+</tool>
+

diff -r 000000000000 -r e8adfc4c0a6b snpEff_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpEff_macros.xml Wed Dec 11 08:53:32 2013 -0500

[

@@ -0,0 +1,31 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="3.4">snpEff</requirement>
+        </requirements>
+    </xml>
+  <xml name="stdio">
+    <stdio>
+        <exit_code range=":-1"  level="fatal" description="Error: Cannot open file" />
+        <exit_code range="1:"  level="fatal" description="Error" />
+    </stdio>
+  </xml>
+  <token name="@EXTERNAL_DOCUMENTATION@">
+
+For details about this tool, please go to http://snpeff.sourceforge.net/SnpSift.html#intervals
+
+  </token>
+  <token name="@CITATION_SECTION@">------
+
+**Citation**
+
+For the underlying tool, please cite the following two publications:
+
+SnpEff citation:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+
+SnpSift citation:
+"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012.
+
+  </token>
+</macros>

diff -r 000000000000 -r e8adfc4c0a6b snpSift_annotate.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_annotate.xml Wed Dec 11 08:53:32 2013 -0500

[

@@ -0,0 +1,99 @@
+<tool id="snpSift_annotate" name="SnpSift Annotate" version="3.4">
+    <description>SNPs from dbSnp</description>
+    
+    <expand macro="requirements" />
+    <macros>
+        <import>snpEff_macros.xml</import>
+    </macros>
+    <command>
+        java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar $annotate_cmd
+        #if $annotate.id :
+          -id
+        #elif $annotate.info_ids.__str__.strip() != '' :
+          -info "$annotate.info_ids"
+        #end if
+        -q $dbSnp $input > $output
+    </command>
+    <inputs>
+        <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
+        <param format="vcf" name="dbSnp" type="data" label="VCF File with ID field annotated (e.g. dnSNP.vcf)"
+            help="The ID field for a variant in input will be assigned from a matching variant in this file."/>
+        <conditional name="annotate">
+            <param name="id" type="boolean" truevalue="id" falsevalue="info" checked="True" label="Only annotate ID field (do not add INFO field)" help=""/>
+            <when value="id"/>
+            <when value="info">
+                <param name="info_ids" type="text" value="" size="60" optional="true" label="Limit INFO annotation to these INFO IDs"
+                    help="list is a comma separated list of fields. When blank, all INFO fields are included">
+                    <validator type="regex" message="IDs separted by commas">^(([a-zA-Z][a-zA-Z0-9_-]*)(,[a-zA-Z][a-zA-Z0-9_-]*)*)?$</validator>
+                </param>
+            </when>
+        </conditional>
+        <param name="annotate_cmd" type="boolean" truevalue="annMem" falsevalue="annotate" checked="false" label="Allow unsorted VCF files">
+            <help>
+                This option will load the entire 'database' VCF file into memory (which may not be practical for large 'database' VCF files).
+                Otherwise, both the database and the input VCF files should be sorted by position (Chromosome sort order can differ between files).
+            </help>
+            </param>
+    </inputs>
+    <expand macro="stdio" />
+    <outputs>
+        <data format="vcf" name="output" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" ftype="vcf" value="annotate_1.vcf"/>
+            <param name="dbSnp" ftype="vcf" value="db_test_1.vcf"/>
+            <param name="annotate_cmd" value="False"/>
+            <param name="id" value="True"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="rs76166080" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+
+This is typically used to annotate IDs from dbSnp.
+
+Annotatating only the ID field from dbSnp137.vcf ::
+
+    Input VCF:
+    CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    .            T    C    0.0    FAIL    NS=244
+    22      16350245    .            C    A    0.0    FAIL    NS=192
+
+    Annotated Output VCF:
+    #CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    rs56234788   T    C    0.0    FAIL    NS=244
+    22      16350245    rs2905295    C    A    0.0    FAIL    NS=192
+
+
+
+Annotatating both the ID and INFO fields from dbSnp137.vcf ::
+
+    Input VCF:
+    #CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    .            T    C    0.0    FAIL    NS=244
+    22      16350245    .            C    A    0.0    FAIL    NS=192
+
+    Annotated Output VCF:
+    #CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    rs56234788   T    C    0.0    FAIL    NS=244;RSPOS=16346045;GMAF=0.162248628884826;dbSNPBuildID=129;SSR=0;SAO=0;VP=050100000000000100000100;WGT=0;VC=SNV;SLO;GNO
+    22      16350245    rs2905295    C    A    0.0    FAIL    NS=192;RSPOS=16350245;GMAF=0.230804387568556;dbSNPBuildID=101;SSR=1;SAO=0;VP=050000000000000100000140;WGT=0;VC=SNV;GNO
+
+
+@EXTERNAL_DOCUMENTATION@
+
+@CITATION_SECTION@
+
+
+    </help>
+</tool>
+

diff -r 000000000000 -r e8adfc4c0a6b snpSift_caseControl.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_caseControl.xml Wed Dec 11 08:53:32 2013 -0500

[

@@ -0,0 +1,121 @@
+<tool id="snpSift_caseControl" name="SnpSift CaseControl" version="3.4">
+    <description>Count samples are in 'case' and 'control' groups.</description>
+    
+    <expand macro="requirements" />
+    <macros>
+        <import>snpEff_macros.xml</import>
+    </macros>
+    <command>
+    java -Xmx1G -jar \$SNPEFF_JAR_PATH/SnpSift.jar caseControl -q
+    #if $name.__str__.strip() != '':
+      -name $name
+    #end if
+    #if $ctrl.ctrl_src == 'caseString':
+      '$ctrl.caseControlStr'
+    #else
+      -tfam "$ctrl.tfam"
+    #end if
+    $input > $output
+    </command>
+    <inputs>
+        <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
+        <conditional name="ctrl">
+            <param name="ctrl_src" type="select" label="Case Control defined in">
+            <option value="caseString">Case Control String</option>
+            <option value="tfam">TFAM file</option>
+        </param>
+        <when value="caseString">
+            <param name="caseControlStr" type="text" label="Case / Control column designation" size="50">
+            <help>
+                Case and control are defined by a string containing plus and minus symbols {'+', '-', '0'} where '+' is case, '-' is control and '0' is neutral
+            </help>
+            <validator type="regex" message="must be  only plus(+), minus(-), or zero(0) characters">[+-0]+</validator>
+            </param>
+        </when>
+        <when value="tfam">
+            <param format="tabular" name="tfam" type="data" label="PLINK TFAM file" help="Read more about TFAM at http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#tr"/>
+        </when>
+        </conditional>
+        <param name="name" type="text" optional="true" label="name" help="name to append to the 'Cases' or 'Controls' tags">
+            <validator type="regex" message="Use only valid ID characters">[_a-zA-Z0-9]+</validator>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="vcf" name="output" />
+    </outputs>
+    <expand macro="stdio" />
+    <tests>
+        <test>
+            <param name="input" ftype="vcf" value="test.private.01.vcf"/>
+            <param name="ctrl_src" value="caseString"/>
+            <param name="caseControlStr" value="--"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="Cases=0,0,0;" />
+                    <has_text text="Controls=0,0,0;" />
+                </assert_contents>
+            </output>
+        </test>
+
+        <test>
+            <param name="input" ftype="vcf" value="test.private.02.vcf"/>
+            <param name="ctrl_src" value="caseString"/>
+            <param name="caseControlStr" value="--"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="Cases=0,0,0;" />
+                    <has_text text="Controls=2,0,4;" />
+                </assert_contents>
+            </output>
+        </test>
+
+        <test>
+            <param name="input" ftype="vcf" value="test.private.02.vcf"/>
+            <param name="name" value=""/>
+            <param name="ctrl_src" value="caseString"/>
+            <param name="caseControlStr" value="-+"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="Cases=1,0,2;" />
+                    <has_text text="Controls=1,0,2;" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+
+**SnpSift CaseControl**
+
+Allows you to count how many samples are in 'case' group and a 'control' group. You can count 'homozygous', 'heterozygous' or 'any' variants.
+
+Case and control are defined by a string containing plus and minus symbols {'+', '-', '0'} where '+' is case, '-' is control and '0' is neutral.
+
+This command adds two annotations to the VCF file:
+
+ - **CaseControl**: Two comma separated numbers numbers representing the number of samples that have the variant in the case and the control group. Example:
+
+  "CaseControl=3,4" *the variant is present in 3 cases and 4 controls.*
+
+
+ - **CaseControlP**: A p-value (Fisher exact test) that the number of cases is N or more. Example:
+
+  "CaseControl=4,0;CaseControlP=3.030303e-02" *in this case the pValue of having 4 or more cases and zero controls is 0.03*
+
+
+For example, if we have ten samples (which means ten genotype columns in the VCF file), the first four are 'case' and the last six are 'control', so the description string would be "++++------".  Let's say we want to distinguish genotypes that are homozygous in 'case' and either homozygous or heterozygous in 'control'.  We would set:
+
+  - Hom/Het case = "hom"
+
+  - Hom/Het control = "any"
+
+  - Case / Control column designation = ""++++------"
+
+
+@EXTERNAL_DOCUMENTATION@
+
+@CITATION_SECTION@
+
+  </help>
+</tool>

diff -r 000000000000 -r e8adfc4c0a6b snpSift_filter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_filter.xml Wed Dec 11 08:53:32 2013 -0500

[

@@ -0,0 +1,148 @@
+<tool id="snpSift_filter" name="SnpSift Filter" version="3.4">
+    <options sanitize="False" />
+    <description>Filter variants using arbitrary expressions</description>
+    <expand macro="requirements" />
+    <macros>
+        <import>snpEff_macros.xml</import>
+    </macros>
+    <command>
+        java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar filter -f $input -e $exprFile $inverse
+        #if $filtering.mode == 'field':
+            #if $filtering.replace.pass:
+                --pass
+                #if $filtering.replace.filterId and len($filtering.replace.filterId.__str__.strip()) > 0:
+                    --filterId "$filtering.replace.filterId"
+                #end if
+            #end if
+            #if $filtering.addFilter and len($filtering.addFilter.__str__.strip()) > 0:
+                --addFilter "$filtering.addFilter"
+            #end if
+            #if $filtering.rmFilter and len($filtering.rmFilter.__str__.strip()) > 0:
+                --rmFilter "$filtering.rmFilter"
+            #end if
+        #end if
+         > $output
+    </command>
+    <inputs>
+        <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
+        <param name="expr" type="text" label="Filter criteria" size="160" help="Need help? See below a few examples." />
+        <param name="inverse" type="boolean" truevalue="--inverse" falsevalue="" checked="false" label="Inverse filter" help="Show lines that do not match filter expression" />
+        <conditional name="filtering">
+            <param name="mode" type="select" label="Filter mode">
+                <option value="entries" selected="true">Retain entries that pass filter, remove other entries</option>
+                <option value="field">Change the FILTER field, but retain all entries</option>
+            </param>
+            <when value="entries"/>
+            <when value="field">
+                <conditional name="replace">
+                    <param name="pass" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Set matching entry FILTER to 'PASS'"
+                           help="appends an ID tag to non-matching entry FILTER "/>
+                    <when value="no"/>
+                    <when value="yes">
+                        <param name="filterId" type="text" value="" optional="true" label="ID appended to non-matching (##FILTER tag in header and FILTER VCF field)." size="10"
+                               help="Default ID is 'SnpSift'"/>
+                    </when>
+                </conditional>
+                <param name="addFilter" type="text" value="" optional="true" label="Add a string to FILTER VCF field if 'expression' is true." size="10"/>
+                <param name="rmFilter" type="text" value="" optional="true" label="Remove a string from FILTER VCF field if 'expression' is true (and 'str' is in the field)." size="10"/>
+            </when>
+        </conditional>
+    </inputs>
+    <configfiles>
+        <configfile name="exprFile">
+        $expr
+        </configfile>
+    </configfiles>
+
+    <outputs>
+        <data format="vcf" name="output" />
+    </outputs>
+    <expand macro="stdio" />
+    <tests>
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="QUAL >= 50"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="28837706" />
+            <not_has_text text="NT_166464" />
+            </assert_contents>
+        </output>
+        </test>
+
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="(CHROM = '19')"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="3205820" />
+            <not_has_text text="NT_16" />
+            </assert_contents>
+        </output>
+        </test>
+
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="(POS >= 20175) & (POS <= 35549)"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="20175" />
+            <has_text text="35549" />
+            <has_text text="22256" />
+            <not_has_text text="18933" />
+            <not_has_text text="37567" />
+            </assert_contents>
+        </output>
+        </test>
+
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="( DP >= 5 )"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="DP=5;" />
+            <has_text text="DP=6;" />
+            <not_has_text text="DP=1;" />
+            </assert_contents>
+        </output>
+        </test>
+    </tests>
+    <help>
+
+**SnpSift filter**
+
+You can filter ia vcf file using arbitrary expressions, for instance "(QUAL > 30) | (exists INDEL) | ( countHet() > 2 )". The actual expressions can be quite complex, so it allows for a lot of flexibility.
+
+Some examples:
+
+  - *I want to filter out samples with quality less than 30*:
+
+    * **( QUAL > 30 )**
+
+  - *...but we also want InDels that have quality 20 or more*:
+
+    * **(( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 )**
+
+  - *...or any homozygous variant present in more than 3 samples*:
+
+    * **(countHom() > 3) | (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 )**
+
+  - *...or any heterozygous sample with coverage 25 or more*:
+
+    * **((countHet() > 0) & (DP >= 25)) | (countHom() > 3) | (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 )**
+
+  - *I want to keep samples where the genotype for the first sample is homozygous variant and the genotype for the second sample is reference*:
+
+    * **isHom( GEN[0] ) & isVariant( GEN[0] ) & isRef( GEN[1] )**
+
+
+@EXTERNAL_DOCUMENTATION@
+
+@CITATION_SECTION@
+
+    </help>
+</tool>

diff -r 000000000000 -r e8adfc4c0a6b snpSift_int.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_int.xml Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,56 @@
+<tool id="snpSift_int" name="SnpSift Intervals" version="3.4">
+    <description>Filter variants using intervals</description>
+    
+    <expand macro="requirements" />
+    <macros>
+        <import>snpEff_macros.xml</import>
+    </macros>
+    <command>
+        java -Xmx2G -jar \$SNPEFF_JAR_PATH/SnpSift.jar intervals -i $input $exclude $bedFile > $output
+    </command>
+    <inputs>
+        <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
+        <param format="bed" name="bedFile" type="data" label="Intervals (BED file)"/>
+        <param name="exclude" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Exclude Intervals"
+            help="Filter out (exclude) VCF entries that match any interval in the BED files"/>
+    </inputs>
+    <outputs>
+        <data format="vcf" name="output" />
+    </outputs>
+    <expand macro="stdio" />
+    <tests>
+        <test>
+            <param name="input" ftype="vcf" value="annotate_5.vcf"/>
+            <param name="bedFile" ftype="bed" value="interval.bed"/>
+            <param name="exclude" value="False"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="872687" />
+                    <not_has_text text="1195966" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input" ftype="vcf" value="annotate_5.vcf"/>
+            <param name="bedFile" ftype="bed" value="interval.bed"/>
+            <param name="exclude" value="True"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="1195966" />
+                    <not_has_text text="872687" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+
+You can filter using intervals (BED file).
+
+@EXTERNAL_DOCUMENTATION@
+
+@CITATION_SECTION@
+
+    </help>
+</tool>

diff -r 000000000000 -r e8adfc4c0a6b test-data/annotate_1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate_1.vcf Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,1 @@
+1 872687 . C G . . .

diff -r 000000000000 -r e8adfc4c0a6b test-data/annotate_5.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate_5.vcf Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,5 @@
+1 872687 rs76166080 C G . . .
+1 970878 . C T . . .
+1 979690 rs115413462 G A . . .
+1 1160967 . C T . . .
+1 1195966 rs114569001 G A . . .

diff -r 000000000000 -r e8adfc4c0a6b test-data/db_test_1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/db_test_1.vcf Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,1 @@
+1 872687 rs76166080 C G 0 . .

diff -r 000000000000 -r e8adfc4c0a6b test-data/interval.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/interval.bed Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,10 @@
+chr1 1 100000
+chr1 100000 200000
+chr1 200000 300000
+chr1 300000 400000
+chr1 400000 500000
+chr1 500000 600000
+chr1 600000 700000
+chr1 700000 800000
+chr1 800000 900000
+chr1 900000 1000000

diff -r 000000000000 -r e8adfc4c0a6b test-data/test.private.01.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.private.01.vcf Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,3 @@
+##fileformat=VCFv4.0
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT id1 id2
+1 123456 . A G . . AF=0 GT 0/0 0/0

diff -r 000000000000 -r e8adfc4c0a6b test-data/test.private.02.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.private.02.vcf Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,3 @@
+##fileformat=VCFv4.0
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT id1 id2
+1 123456 . A G . . AF=0 GT 1/1 1/1

diff -r 000000000000 -r e8adfc4c0a6b test-data/test01.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test01.vcf Wed Dec 11 08:53:32 2013 -0500

b'@@ -0,0 +1,1000 @@\n+##fileformat=VCFv4.1\n+##samtoolsVersion=0.1.16 (r963:234)\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">\n+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">\n+##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads">\n+##INFO=<ID=FQ,Number=1,Type=Float,Description="Phred probability of all samples being the same">\n+##INFO=<ID=AF1,Number=1,Type=Float,Description="Max-likelihood estimate of the site allele frequency of the first ALT allele">\n+##INFO=<ID=G3,Number=3,Type=Float,Description="ML estimate of genotype frequencies">\n+##INFO=<ID=HWE,Number=1,Type=Float,Description="Chi^2 based HWE test P-value based on G3">\n+##INFO=<ID=CI95,Number=2,Type=Float,Description="Equal-tail Bayesian credible interval of the site allele frequency at the 95% level">\n+##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias">\n+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n+##INFO=<ID=PC2,Number=2,Type=Integer,Description="Phred probability of the nonRef allele frequency in group1 samples being larger (,smaller) than in group2.">\n+##INFO=<ID=PCHI2,Number=1,Type=Float,Description="Posterior weighted chi^2 P-value for testing the association between group1 and group2 samples.">\n+##INFO=<ID=QCHI2,Number=1,Type=Integer,Description="Phred scaled PCHI2.">\n+##INFO=<ID=PR,Number=1,Type=Integer,Description="# permutations yielding a smaller PCHI2.">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GL,Number=3,Type=Float,Description="Likelihoods for RR,RA,AA genotypes (R=ref,A=alt)">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases">\n+##FORMAT=<ID=SP,Number=1,Type=Integer,Description="Phred-scaled strand bias P-value">\n+##FORMAT=<ID=PL,Number=-1,Type=Integer,Description="List of Phred-scaled genotype likelihoods, number of values is (#ALT+1)*(#ALT+2)/2">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\ts_1_BcA2.sort.rmdup.Q20.noMh.bam\n+NT_166464\t696\t.\tG\tC\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+NT_166464\t745\t.\tG\tC\t6.98\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:36,3,0:4\n+NT_166464\t7258\t.\tA\tC\t40\t.\tDP=4;AF1=0.5008;CI95=0.5,0.5;DP4=1,0,2,1;MQ=32;FQ=-4.12;PV4=1,0.28,0.21,0.17\tGT:PL:GQ\t0/1:70,0,25:28\n+NT_166464\t7268\t.\tA\tG\t8.65\t.\tDP=4;AF1=0.5004;CI95=0.5,0.5;DP4=1,0,1,1;MQ=30;FQ=3.32;PV4=1,0.017,0,1\tGT:PL:GQ\t0/1:38,0,28:32\n+NT_166464\t7283\t.\tT\tC\t11.3\t.\tDP=3;AF1=0.501;CI95=0.5,0.5;DP4=1,0,1,1;MQ=30;FQ=-4.81;PV4=1,1,0,1\tGT:PL:GQ\t0/1:41,0,24:28\n+NT_166464\t7335\t.\tG\tA\t18.8\t.\tDP=2;AF1=1;CI95=0.5,1;DP4=0,0,1,1;MQ=25;FQ=-33\tGT:PL:GQ\t1/1:50,6,0:10\n+NT_166464\t8030\t.\tA\tG\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+NT_166452\t8268\t.\tG\tA\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+NT_166452\t16693\t.\tT\tC\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+NT_166480\t12474\t.\tG\tA\t6.2\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:35,3,0:4\n+NT_166480\t12483\t.\tA\tG\t4.77\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t0/1:33,3,0:3\n+NT_166476\t578\t.\tC\tT\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+NT_166476\t22223\t.\tA\tC\t3.01\t.\tDP=4;AF1=0.4998;CI95=0.5,0.5;DP4=0,2,2,0;MQ=32;FQ=4.63;PV4=0.33,0.26,0,0.42\tGT:PL:GQ\t0/1:30,0,43:28\n+NT_166476\t22256\t.\tG\tA\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+NT_166476\t23076\t.\tA\tT\t8.44\t.\tDP=2;AF1=1;CI95=0.5,1;DP4=0,0,0,2;MQ=25;FQ=-33\tGT:PL:GQ\t1/1:39,6,0:8\n+NT_166476\t23487\t.\tC\tA\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+NT_166454\t64\t.\tT\tA\t8.64\t.\tDP=7;AF1=0.5;CI95=0.5,0.5;DP4=1,4,2,0;MQ=29;FQ=11.3;PV4=0.14,1,1,1\tGT:P'..b'\tG\t28\t.\tDP=4;AF1=1;CI95=0.5,1;DP4=0,0,0,3;MQ=25;FQ=-36\tGT:PL:GQ\t1/1:60,9,0:16\n+19\t25568441\t.\tG\tA\t89\t.\tDP=10;AF1=1;CI95=1,1;DP4=0,0,7,3;MQ=29;FQ=-57\tGT:PL:GQ\t1/1:122,30,0:57\n+19\t25568480\t.\tA\tG\t169\t.\tDP=11;AF1=1;CI95=1,1;DP4=0,0,8,3;MQ=30;FQ=-60\tGT:PL:GQ\t1/1:202,33,0:63\n+19\t25568513\t.\tT\tA\t124\t.\tDP=9;AF1=1;CI95=1,1;DP4=0,0,6,3;MQ=28;FQ=-54\tGT:PL:GQ\t1/1:157,27,0:51\n+19\t25568527\t.\tA\tG\t109\t.\tDP=9;AF1=1;CI95=1,1;DP4=0,0,6,3;MQ=28;FQ=-54\tGT:PL:GQ\t1/1:142,27,0:51\n+19\t25568536\t.\tA\tG\t13.2\t.\tDP=3;AF1=1;CI95=0.5,1;DP4=0,0,3,0;MQ=25;FQ=-36\tGT:PL:GQ\t1/1:45,9,0:14\n+19\t25581569\t.\tG\tA\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+19\t26697796\t.\tT\tA\t6.2\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:35,3,0:4\n+19\t26728829\t.\tAGG\tATGG,AG\t8.83\t.\tINDEL;DP=5;AF1=1;CI95=0.5,1;DP4=0,0,0,3;MQ=33;FQ=-40.5\tGT:PL:GQ\t1/1:71,30,24,46,0,43:8\n+19\t26747187\t.\tAGG\tAG\t14.4\t.\tINDEL;DP=3;AF1=1;CI95=0.5,1;DP4=0,0,2,0;MQ=37;FQ=-40.5\tGT:PL:GQ\t1/1:53,6,0:10\n+19\t26751288\t.\tG\tGAC\t116\t.\tINDEL;DP=8;AF1=1;CI95=0.5,1;DP4=0,0,5,1;MQ=35;FQ=-52.5\tGT:PL:GQ\t1/1:156,18,0:33\n+19\t26756358\t.\tCA\tCAAA\t11.8\t.\tINDEL;DP=6;AF1=0.5;CI95=0.5,0.5;DP4=1,1,1,1;MQ=37;FQ=14.4;PV4=1,0.41,1,0.0024\tGT:PL:GQ\t0/1:49,0,62:51\n+19\t26758413\t.\tGT\tGTT\t52.4\t.\tINDEL;DP=6;AF1=1;CI95=0.5,1;DP4=0,0,3,0;MQ=37;FQ=-43.5\tGT:PL:GQ\t1/1:92,9,0:16\n+19\t26764380\t.\tC\tT\t13\t.\tDP=2;AF1=1;CI95=0.5,1;DP4=0,0,1,1;MQ=37;FQ=-33\tGT:PL:GQ\t1/1:44,6,0:9\n+19\t26765941\t.\tAGG\tAGGGG\t24.2\t.\tINDEL;DP=5;AF1=1;CI95=0.5,1;DP4=0,0,1,1;MQ=37;FQ=-40.5\tGT:PL:GQ\t1/1:63,6,0:10\n+19\t26780556\t.\tA\tAC\t18.3\t.\tINDEL;DP=3;AF1=1;CI95=0.5,1;DP4=0,0,0,2;MQ=32;FQ=-40.5\tGT:PL:GQ\t1/1:57,6,0:10\n+19\t26787476\t.\tG\tA\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+19\t26803166\t.\tA\tG\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+19\t26803281\t.\tG\tT\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+19\t26827257\t.\tG\tA\t3.54\t.\tDP=2;AF1=0.5;CI95=0.5,0.5;DP4=1,0,1,0;MQ=37;FQ=3.54;PV4=1,1,1,1\tGT:PL:GQ\t0/1:31,0,31:29\n+19\t26847473\t.\tA\tC\t10.2\t.\tDP=2;AF1=1;CI95=0.5,1;DP4=0,0,0,2;MQ=37;FQ=-33\tGT:PL:GQ\t1/1:41,6,0:8\n+19\t26852064\t.\tTACACACACACACACACACACACACACACACACACACACA\tTACACACACACACACACACACACACACACACACACACA\t118\t.\tINDEL;DP=55;AF1=0.5;CI95=0.5,0.5;DP4=8,10,4,6;MQ=37;FQ=121;PV4=1,1,1,1\tGT:PL:GQ\t0/1:156,0,255:99\n+19\t27313337\t.\tG\tA\t3.41\t.\tDP=3;AF1=1;CI95=0.5,1;DP4=0,0,2,0;MQ=37;FQ=-33\tGT:PL:GQ\t1/1:32,6,0:4\n+19\t27314462\t.\tT\tC\t40\t.\tDP=3;AF1=1;CI95=0.5,1;DP4=0,0,2,1;MQ=37;FQ=-36\tGT:PL:GQ\t1/1:72,9,0:16\n+19\t27466173\t.\tC\tG\t3.54\t.\tDP=2;AF1=0.5;CI95=0.5,0.5;DP4=1,0,1,0;MQ=37;FQ=3.54;PV4=1,1,1,1\tGT:PL:GQ\t0/1:31,0,31:29\n+19\t28220602\t.\tT\tG\t222\t.\tDP=21;AF1=1;CI95=1,1;DP4=0,0,8,13;MQ=37;FQ=-90\tGT:PL:GQ\t1/1:255,63,0:99\n+19\t28220622\t.\tC\tT\t222\t.\tDP=21;AF1=1;CI95=1,1;DP4=0,0,8,13;MQ=37;FQ=-90\tGT:PL:GQ\t1/1:255,63,0:99\n+19\t28220668\t.\tG\tA\t222\t.\tDP=13;AF1=1;CI95=1,1;DP4=0,0,5,6;MQ=36;FQ=-60\tGT:PL:GQ\t1/1:255,33,0:63\n+19\t28220691\t.\tT\tC\t148\t.\tDP=7;AF1=1;CI95=1,1;DP4=0,0,3,4;MQ=36;FQ=-48\tGT:PL:GQ\t1/1:181,21,0:39\n+19\t28486996\t.\tT\tC\t4.77\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t0/1:33,3,0:3\n+19\t28643319\t.\tC\tT\t3.55\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t0/1:31,3,0:4\n+19\t28643329\t.\tC\tT\t4.77\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t0/1:33,3,0:3\n+19\t28714335\t.\tC\tA\t6.98\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:36,3,0:4\n+19\t28837706\t.\tA\tT\t154\t.\tDP=13;AF1=1;CI95=1,1;DP4=0,0,3,10;MQ=25;FQ=-66\tGT:PL:GQ\t1/1:187,39,0:75\n+19\t28837717\t.\tG\tA\t154\t.\tDP=13;AF1=1;CI95=1,1;DP4=0,0,3,10;MQ=25;FQ=-66\tGT:PL:GQ\t1/1:187,39,0:75\n+19\t28837735\t.\tA\tG\t154\t.\tDP=24;AF1=1;CI95=1,1;DP4=0,0,7,14;MQ=25;FQ=-90\tGT:PL:GQ\t1/1:187,63,0:99\n+19\t28837767\t.\tA\tG,T\t177\t.\tDP=53;AF1=1;CI95=1,1;DP4=0,0,21,29;MQ=30;FQ=-175\tGT:PL:GQ\t1/1:210,148,0,204,125,201:99\n+19\t28837787\t.\tC\tT\t161\t.\tDP=66;AF1=1;CI95=1,1;DP4=0,1,30,35;MQ=31;FQ=-206;PV4=1,1,1,1\tGT:PL:GQ\t1/1:194,179,0:99\n+19\t28837805\t.\tA\tG\t222\t.\tDP=54;AF1=1;CI95=1,1;DP4=0,0,26,26;MQ=32;FQ=-184\tGT:PL:GQ\t1/1:255,157,0:99\n'

diff -r 000000000000 -r e8adfc4c0a6b test-data/vcf_homhet.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vcf_homhet.vcf Wed Dec 11 08:53:32 2013 -0500

b'@@ -0,0 +1,60 @@\n+##fileformat=VCFv4.0\n+##samtoolsVersion=0.1.15 (r949:203)\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">\n+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">\n+##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads">\n+##INFO=<ID=FQ,Number=1,Type=Float,Description="Phred probability of all samples being the same">\n+##INFO=<ID=AF1,Number=1,Type=Float,Description="Max-likelihood estimate of the site allele frequency of the first ALT allele">\n+##INFO=<ID=G3,Number=3,Type=Float,Description="ML estimate of genotype frequencies">\n+##INFO=<ID=HWE,Number=1,Type=Float,Description="Chi^2 based HWE test P-value based on G3">\n+##INFO=<ID=CI95,Number=2,Type=Float,Description="Equal-tail Bayesian credible interval of the site allele frequency at the 95% level">\n+##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias">\n+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n+##INFO=<ID=PC2,Number=2,Type=Integer,Description="Phred probability of the nonRef allele frequency in group1 samples being larger (,smaller) than in group2.">\n+##INFO=<ID=PCHI2,Number=1,Type=Float,Description="Posterior weighted chi^2 P-value for testing the association between group1 and group2 samples.">\n+##INFO=<ID=QCHI2,Number=1,Type=Integer,Description="Phred scaled PCHI2.">\n+##INFO=<ID=PR,Number=1,Type=Integer,Description="# permutations yielding a smaller PCHI2.">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GL,Number=3,Type=Float,Description="Likelihoods for RR,RA,AA genotypes (R=ref,A=alt)">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases">\n+##FORMAT=<ID=SP,Number=1,Type=Integer,Description="Phred-scaled strand bias P-value">\n+##FORMAT=<ID=PL,Number=.,Type=Integer,Description="List of Phred-scaled genotype likelihoods, number of values is (#ALT+1)*(#ALT+2)/2">\n+##source_20110319.1=/wsu/home/eq/eq83/eq8302/tools/vcftools/bin//vcf-merge s_1_ACAGTGA.vcf.gz s_1_CAGATCA.vcf.gz s_1_CGATGTA.vcf.gz s_1_CTTGTAA.vcf.gz s_1_GCCAATA.vcf.gz s_1_TGACCAA.vcf.gz\n+##sourceFiles_20110319.1=0:s_1_ACAGTGA.vcf.gz,1:s_1_CAGATCA.vcf.gz,2:s_1_CGATGTA.vcf.gz,3:s_1_CTTGTAA.vcf.gz,4:s_1_GCCAATA.vcf.gz,5:s_1_TGACCAA.vcf.gz\n+##INFO=<ID=SF,Number=.,Type=String,Description="Source File (index to sourceFiles, f when filtered)">\n+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\ts_1_ACAGTGA_sort.bam\ts_1_CAGATCA_sort.bam\ts_1_CGATGTA_sort.bam\ts_1_CTTGTAA_sort.bam\ts_1_GCCAATA_sort.bam\ts_1_TGACCAA_sort.bam\n+Y\t3718196\t.\tC\tT\t7.59\t.\tAC=2;AF1=1;AN=2;CI95=0.5,1;DP4=0,0,2,0;DP=2;FQ=-33;G3=4.617e-16,8.575e-07,1;MQ=39;SF=1\tGT:GQ:PL\t.\t1/1:61:38,6,0\t.\t.\t.\t.\n+Y\t3720217\t.\tA\tG\t8.65\t.\tAC=2;AF1=1;AN=2;CI95=0.5,1;DP4=0,0,0,1;DP=2;FQ=-30;G3=4.415e-15,5.291e-06,1;MQ=38;SF=5\tGT:GQ:PL\t.\t.\t.\t.\t.\t1/1:53:38,3,0\n+Y\t3720581\t.\tA\tG\t7.80\t.\tAC=2;AF1=1;AN=2;CI95=0.5,1;DP4=0,0,1,0;DP=1;FQ=-30;G3=5.56e-15,5.291e-06,1;MQ=44;SF=1\tGT:GQ:PL\t.\t1/1:53:37,3,0\t.\t.\t.\t.\n+Y\t3721154\t.\tA\tG\t13.90\t.\tAC=2;AF1=1;AN=2;CI95=0.5,1;DP4=0,0,2,0;DP=2;FQ=-33;G3=9.194e-17,8.566e-07,1;MQ=37;SF=3\tGT:GQ:PL\t.\t.\t.\t1/1:61:45,6,0\t.\t.\n+Y\t3721230\t.\tC\tG\t21.80\t.\tAC=2;AF1=1;AN=2;CI95=0.5,1;DP4=0,0,0,2;DP=2;FQ=-33;G3=1.456e-17,8.564e-07,1;MQ=29;SF=3\tGT:GQ:PL\t.\t.\t.\t1/1:61:53,6,0\t.\t.\n+Y\t3744605\t.\tC\tA\t3.98\t.\tAC=2;AF1=1;AN=2;CI95=0.5,1;DP4=0,0,0,2;DP=2;FQ=-33;G3=1.468e-15,8.599e-07,1;MQ=19;SF=2\tGT:GQ:PL\t.\t.\t1/1:61:33,6,0\t.\t.\t.\n+Y\t4433091\t.\tT\tC\t11.10\t.\tAC=2;AF1=1;AN=2;CI95=0.5,1;DP4=0,0,2,0;DP=2;FQ=-33;G3=1.835e-16,8.568e-07,1;MQ=23;SF=5\tGT:GQ:PL\t.\t.\t.\t.\t.\t1/1:61:42,6,0\n+Y\t9945223\t.\tATTT\tATTTT\t19.80\t.\tAC=4;AF1=1;AN=4;CI95='..b',11,0,6;DP=34;FQ=32.5;G3=1.991e-14,1,7.924e-52;INDEL;MQ=35;PV4=0.28,0.049,0.14,0.2;SF=1,3,4,5\tGT:GQ:PL\t.\t0/0:70:67,0,254\t.\t1/1:99:152,0,255\t1/1:83:80,0,255\t1/1:89:86,0,255\n+Y\t10011930\t.\tACT\tA\t90.85\t.\tAC=2;AF1=0.5;AN=4;CI95=0.5,0.5;DP4=2,6,1,3;DP=17;FQ=16.6;G3=3.155e-11,1,1.991e-34;INDEL;MQ=35;PV4=1,0.00044,0.33,1;SF=0,5\tGT:GQ:PL\t0/0:54:51,0,167\t.\t.\t.\t.\t0/0:99:206,0,255\n+Y\t10011935\t.\tC\tCT\t83.83\t.\tAC=3;AF1=0.5;AN=6;CI95=0.5,0.5;DP4=1,8,2,5;DP=23;FQ=90.3;G3=1.256e-28,1,5e-26;INDEL;MQ=39;PV4=0.55,1,0.15,1;SF=1,2,4\tGT:GQ:PL\t.\t0/0:99:138,0,125\t1/1:92:89,0,148\t.\t1/1:99:138,0,171\t.\n+Y\t10011966\t.\tATT\tAT\t79.38\t.\tAC=6;AF1=0.5;AN=12;CI95=0.5,0.5;DP4=1,6,0,2;DP=14;FQ=5.09;G3=1.991e-12,1,1.256e-28;INDEL;MQ=38;PV4=1,1,0.46,0.088;SF=0,1,2,3,4,5\tGT:GQ:PL\t1/1:41:38,0,92\t1/1:76:73,0,109\t1/1:99:181,0,109\t1/1:99:114,0,103\t1/1:99:139,0,171\t1/1:99:155,0,144\n+Y\t10028061\t.\tCA\tCAA\t28.40\t.\tAC=4;AF1=1;AN=4;CI95=0.5,1;DP4=0,0,2,1;DP=9;FQ=-43.5;G3=2.739e-22,5.886e-08,1;INDEL;MQ=37;SF=4,5\tGT:GQ:PL\t.\t.\t.\t.\t1/1:72:83,9,0\t0/0:61:52,6,0\n+Y\t10029194\t.\tCA\tC\t73.47\t.\tAC=10;AF1=0.7304;AN=12;CI95=0.5,1;DP4=2,0,7,3;DP=19;FQ=-32.5;G3=2.922e-150,0.9991,0.000854;INDEL;MQ=25;PV4=1,0.4,1,0.23;SF=0,1,2,3,4,5\tGT:GQ:PL\t0/0:3:93,0,2\t1/1:85:100,17,0\t1/1:99:181,36,0\t1/1:90:107,18,0\t1/1:3:104,0,2\t1/1:70:90,10,0\n+Y\t10029452\t.\tCAA\tCAAA\t7.26\t.\tAC=4;AF1=1;AN=4;CI95=0.5,1;DP4=0,0,4,0;DP=13;FQ=-46.5;G3=2.341e-18,6.106e-08,1;INDEL;MQ=26;SF=3,4\tGT:GQ:PL\t.\t.\t.\t1/1:72:50,12,0\t1/1:72:42,12,0\t.\n+Y\t10037877\t.\tGCCC\tGCCCC\t14.40\t.\tAC=2;AF1=1;AN=2;CI95=0.5,1;DP4=0,0,0,2;DP=3;FQ=-40.5;G3=1.456e-17,8.564e-07,1;INDEL;MQ=29;SF=2\tGT:GQ:PL\t.\t.\t1/1:61:53,6,0\t.\t.\t.\n+Y\t13266272\t.\tTTTT\tTTTTATTT\t51.50\t.\tAC=1;AF1=0.5;AN=2;CI95=0.5,0.5;DP4=5,1,7,0;DP=15;FQ=54.5;G3=7.924e-19,1,3.155e-24;INDEL;MQ=30;PV4=0.46,1,0.078,0.00035;SF=3\tGT:GQ:PL\t.\t.\t.\t0/0:92:89,0,116\t.\t.\n+Y\t13268110\t.\tGC\tGCC\t3.66\t.\tAC=2;AF1=1;AN=2;CI95=0.5,1;DP4=0,0,2,0;DP=2;FQ=-40.5;G3=2.911e-16,8.571e-07,1;INDEL;MQ=23;SF=2\tGT:GQ:PL\t.\t.\t1/1:61:40,6,0\t.\t.\t.\n+Y\t13292082\t.\tTCCCCCCCCCC\tTCCCCCCC\t14.40\t.\tAC=2;AF1=1;AN=2;CI95=0.5,1;DP4=0,0,0,2;DP=2;FQ=-40.5;G3=1.456e-17,8.564e-07,1;INDEL;MQ=29;SF=3\tGT:GQ:PL\t.\t.\t.\t1/1:61:53,6,0\t.\t.\n+Y\t13297070\t.\tAGGTGGTGGTGGT\tAGGTGGTGGT\t12.70\t.\tAC=2;AF1=1;AN=2;CI95=0.5,1;DP4=0,0,0,1;DP=1;FQ=-37.5;G3=2.782e-16,5.287e-06,1;INDEL;MQ=50;SF=5\tGT:GQ:PL\t.\t.\t.\t.\t.\t1/1:53:50,3,0\n+Y\t13312198\t.\tCGGGGG\tCGGGG\t14.87\t.\tAC=5;AF1=1;AN=6;CI95=0.5,1;DP4=2,0,10,0;DP=12;FQ=-43.5;G3=1.373e-19,5.886e-08,1;INDEL;MQ=24;PV4=1,0.44,1,0.019;SF=1,4,5\tGT:GQ:PL\t.\t1/1:72:56,9,0\t.\t.\t1/1:70:57,10,0\t1/1:44:48,0,42\n+Y\t13312608\t.\tCA\tCAA\t22.50\t.\tAC=1;AF1=0.5032;AN=2;CI95=0.5,0.5;DP4=2,0,7,0;DP=16;FQ=-15.6;G3=4.937e-25,1,1.272e-08;INDEL;MQ=24;PV4=1,1,0.093,1;SF=2\tGT:GQ:PL\t.\t.\t0/0:22:60,0,19\t.\t.\t.\n+Y\t13402810\t.\tTAGAGA\tTAGA\t29.80\t.\tAC=4;AF1=1;AN=4;CI95=0.5,1;DP4=0,0,1,1;DP=2;FQ=-40.5;G3=7.299e-19,8.564e-07,1;INDEL;MQ=33;SF=0,2\tGT:GQ:PL\t1/1:61:66,6,0\t.\t1/1:72:72,9,0\t.\t.\t.\n+Y\t21153016\t.\tAG\tATG\t213.83\t.\tAC=12;AF1=1;AN=12;CI95=1,1;DP4=0,0,6,9;DP=15;FQ=-79.5;G3=7.905e-54,1e-18,1;INDEL;MQ=43;SF=0,1,2,3,4,5\tGT:GQ:PL\t1/1:99:255,45,0\t1/1:99:.,.,0\t1/1:99:255,87,0\t1/1:99:.,.,0\t1/1:99:255,78,0\t1/1:99:.,.,0\n+Y\t21153067\t.\tCCA\tC\t46.50\t.\tAC=1;AF1=0.5;AN=2;CI95=0.5,0.5;DP4=8,4,5,0;DP=18;FQ=49.5;G3=7.924e-18,1,5e-52;INDEL;MQ=39;PV4=0.26,0.08,0.035,1;SF=3\tGT:GQ:PL\t.\t.\t.\t0/0:87:84,0,255\t.\t.\n+Y\t26325233\t.\tTGAGAGAGAGAGA\tTGAGAGAGAGA\t22.20\t.\tAC=2;AF1=1;AN=2;CI95=0.5,1;DP4=0,0,2,0;DP=2;FQ=-40.5;G3=2.308e-18,8.564e-07,1;INDEL;MQ=33;SF=0\tGT:GQ:PL\t1/1:61:61,6,0\t.\t.\t.\t.\t.\n+Y\t28588049\t.\tACATCAT\tACAT\t7.35\t.\tAC=4;AF1=1;AN=4;CI95=0.5,1;DP4=0,0,1,0;DP=1;FQ=-37.5;G3=1.108e-15,5.288e-06,1;INDEL;MQ=44;SF=1,3\tGT:GQ:PL\t.\t1/1:53:44,3,0\t.\t1/1:53:44,3,0\t.\t.\n+Y\t59030478\t.\tAAAACAAACAAACAAACAAACAAACAAA\tAAAACAAACAAACAAA\t14.40\t.\tAC=2;AF1=1;AN=2;CI95=0.5,1;DP4=0,0,0,2;DP=2;FQ=-40.5;G3=1.456e-17,8.564e-07,1;INDEL;MQ=29;SF=2\tGT:GQ:PL\t.\t.\t1/1:61:53,6,0\t.\t.\t.\n+Y\t59032947\t.\tGTT\tGTTT\t28.20\t.\tAC=2;AF1=1;AN=2;CI95=0.5,1;DP4=0,0,0,2;DP=2;FQ=-40.5;G3=5.798e-19,8.564e-07,1;INDEL;MQ=37;SF=5\tGT:GQ:PL\t.\t.\t.\t.\t.\t1/1:61:67,6,0\n'

diff -r 000000000000 -r e8adfc4c0a6b tool-data/snpeff_annotations.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/snpeff_annotations.loc.sample Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,5 @@
+## Regulation Databases for SnpEff
+## These are from the list on: http://snpeff.sourceforge.net/download.html
+#genome annotation_name description
+#GRCh37.71 nextprot nextprot
+#GRCh37.71 motif motif

diff -r 000000000000 -r e8adfc4c0a6b tool-data/snpeff_databases.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/snpeff_databases.loc.sample Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,5 @@
+## Available Databases for SnpEff
+## These are from the list on: http://snpeff.sourceforge.net/download.html
+## the Description field in this sample is "Genome : Version"
+#Version Description
+#GRCh37.68 Homo sapiens : GRCh37.68

diff -r 000000000000 -r e8adfc4c0a6b tool-data/snpeff_genomedb.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/snpeff_genomedb.loc.sample Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,5 @@
+## Downloaded Databases for SnpEff
+## These are from the list on: http://snpeff.sourceforge.net/download.html
+## the Description field in this sample is "Genome : Version"
+#Version Description data_dir path
+#GRCh37.68 Homo sapiens : GRCh37.68 /home/galaxy/snpEff/data

diff -r 000000000000 -r e8adfc4c0a6b tool-data/snpeff_regulationdb.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/snpeff_regulationdb.loc.sample Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,4 @@
+## Regulation Databases for SnpEff
+## These are from the list on: http://snpeff.sourceforge.net/download.html
+#genome regulation_name description
+#GRCh37.70 CD4 CD4

diff -r 000000000000 -r e8adfc4c0a6b tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,19 @@
+<tables>
+    <table name="snpeff_databases" comment_char="#">
+        <columns>value, name</columns>
+        <file path="tool-data/snpeff_databases.loc" />
+    </table>
+    <table name="snpeff_genomedb" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/snpeff_genomedb.loc" />
+    </table>
+    <table name="snpeff_regulationdb" comment_char="#">
+        <columns>genome, value, name</columns>
+        <file path="tool-data/snpeff_regulationdb.loc" />
+    </table>
+    <table name="snpeff_annotations" comment_char="#">
+        <columns>genome, value, name</columns>
+        <file path="tool-data/snpeff_annotations.loc" />
+    </table>
+</tables>
+

diff -r 000000000000 -r e8adfc4c0a6b tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed Dec 11 08:53:32 2013 -0500

@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="snpEff" version="3.4">
+        <repository toolshed="http://toolshed.g2.bx.psu.edu" name="package_snpeff_3_4" owner="iuc" changeset_revision="af7ff8187d09" />
+    </package>
+</tool_dependency>
+