Repository 'snpeff'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/snpeff

Changeset 1:500832f27cbc (2015-01-22)
Previous changeset 0:e8adfc4c0a6b (2013-12-11) Next changeset 2:e09ce114d240 (2016-02-19)
Commit message:
Uploaded
modified:
readme.rst
snpEff.xml
snpEff_download.xml
snpEff_macros.xml
tool_data_table_conf.xml.sample
tool_dependencies.xml
added:
.shed.yml
repository_dependencies.xml
snpEff_databases.xml
tool-data/snpeffv_annotations.loc.sample
tool-data/snpeffv_databases.loc.sample
tool-data/snpeffv_genomedb.loc.sample
tool-data/snpeffv_regulationdb.loc.sample
removed:
data_manager/data_manager_snpEff_databases.py
data_manager/data_manager_snpEff_databases.xml
data_manager/data_manager_snpEff_download.py
data_manager/data_manager_snpEff_download.xml
data_manager_conf.xml
datatypes_conf.xml
lib/galaxy/datatypes/snpeff.py
lib/galaxy/datatypes/snpeff.pyc
snpSift_annotate.xml
snpSift_caseControl.xml
snpSift_filter.xml
snpSift_int.xml
test-data/annotate_1.vcf
test-data/annotate_5.vcf
test-data/db_test_1.vcf
test-data/interval.bed
test-data/test.private.01.vcf
test-data/test.private.02.vcf
test-data/test01.vcf
tool-data/snpeff_annotations.loc.sample
tool-data/snpeff_databases.loc.sample
tool-data/snpeff_genomedb.loc.sample
tool-data/snpeff_regulationdb.loc.sample
b
diff -r e8adfc4c0a6b -r 500832f27cbc .shed.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml Thu Jan 22 08:28:37 2015 -0500
b
@@ -0,0 +1,3 @@
+# repository published to https://toolshed.g2.bx.psu.edu/repos/iuc/snpeff
+owner: iuc
+name: snpeff
b
diff -r e8adfc4c0a6b -r 500832f27cbc data_manager/data_manager_snpEff_databases.py
--- a/data_manager/data_manager_snpEff_databases.py Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,82 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import os
-import re
-import tempfile
-import subprocess
-import fileinput
-import shutil
-import optparse
-import urllib2
-from ftplib import FTP
-import tarfile
-
-from galaxy.util.json import from_json_string, to_json_string
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit(1)
-
-def fetch_databases(data_manager_dict, target_directory, jar_path):
-    (snpEff_dir,snpEff_jar) = os.path.split(jar_path)
-    if not os.path.exists(target_directory):
-        os.makedirs(target_directory)
-    databases_path = os.path.join( target_directory, 'databases.out' )
-    databases_output = open(databases_path,'w')
-    args = [ 'java','-jar', ]
-    args.append( snpEff_jar )
-    args.append( 'databases' )
-    # tmp_stderr = tempfile.NamedTemporaryFile( prefix = "tmp-data-manager-snpEff-stderr" )
-    # databases_output = open(databases_path)
-    # proc = subprocess.Popen( args=args, shell=False, cwd=snpEff_dir, stdout=databases_output.fileno(), stderr=tmp_stderr.fileno() )
-    proc = subprocess.Popen( args=args, shell=False, cwd=snpEff_dir, stdout=databases_output.fileno() )
-    return_code = proc.wait()
-    if return_code:
-        sys.exit( return_code )
-    databases_output.close()
-    try:
-        data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
-        data_manager_dict['data_tables']['snpeff_databases'] = data_manager_dict['data_tables'].get( 'snpeff_databases', [] )
-        data_table_entries = []
-        fh = open(databases_path,'r')
-        for i,line in enumerate(fh):
-            fields = line.split('\t')
-            if len(fields) >= 2:
-                genome_version = fields[0].strip()
-                if genome_version.startswith("Genome") or genome_version.startswith("-"):
-                    continue
-                #snpeff test genome
-                if genome_version == '30c2c903' or fields[1].strip() == 'TestCase' or fields[1].strip().startswith('Test_'):
-                    continue
-                description = fields[1].strip() + ' : ' + genome_version
-                data_table_entries.append(dict(value=genome_version, name=description))
-        data_manager_dict['data_tables']['snpeff_databases'] = data_table_entries
-    except Exception, e:
-        stop_err( 'Error parsing %s %s\n' % (config,str( e )) )
-    else:
-        fh.close()
-    return data_manager_dict
-
-def main():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option( '-j', '--jar_path', dest='jar_path', action='store', type="string", default=None, help='snpEff.jar path' )
-    (options, args) = parser.parse_args()
-
-    filename = args[0]
-
-    params = from_json_string( open( filename ).read() )
-    target_directory = params[ 'output_data' ][0]['extra_files_path']
-    os.mkdir( target_directory )
-    data_manager_dict = {}
-
-
-    #Create Defuse Reference Data
-    data_manager_dict = fetch_databases( data_manager_dict, target_directory, options.jar_path)
-
-    #save info to json file
-    open( filename, 'wb' ).write( to_json_string( data_manager_dict ) )
-
-if __name__ == "__main__": main()
-
b
diff -r e8adfc4c0a6b -r 500832f27cbc data_manager/data_manager_snpEff_databases.xml
--- a/data_manager/data_manager_snpEff_databases.xml Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,41 +0,0 @@
-<tool id="data_manager_snpeff_databases" name="SnpEff Databases" version="3.4" tool_type="manage_data">
- <description>Read the list of available snpEff databases</description>
- <requirements>
- <requirement type="package" version="3.4">snpEff</requirement>
- </requirements>
- <command interpreter="python">
-        data_manager_snpEff_databases.py --jar_path \$SNPEFF_JAR_PATH/snpEff.jar "$out_file"
-        </command>
- <inputs>
- </inputs>
- <outputs>
-           <data name="out_file" format="data_manager_json"/>
- </outputs>
-        <stdio>
-          <exit_code range=":-1"  level="fatal"   description="Error: Cannot open file" />
-          <exit_code range="1:"  level="fatal"   description="Error" />
-        </stdio>
-        <tests>
-            <test>
-                <output name="out_file">
-                    <assert_contents>
-                        <!-- Check that a genome was added -->
-                        <has_text text="GRCh37.72" />
-                    </assert_contents>
-                </output>
-            </test>
-        </tests>
- <help>
-
-This tool updatess the list of SnpEff databases for the SnpEff Download data manager.
-It should only need to be run once for a snpEff version, 
-since it populates the SnpEff Download data manager from the snpEff config file.
-
-For information about snpEff:    http://snpEff.sourceforge.net
-
-Please cite:
-"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
-
- </help>
-</tool>
-
b
diff -r e8adfc4c0a6b -r 500832f27cbc data_manager/data_manager_snpEff_download.py
--- a/data_manager/data_manager_snpEff_download.py Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,114 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import os
-import re
-import tempfile
-import subprocess
-import fileinput
-import shutil
-import optparse
-import urllib2
-from ftplib import FTP
-import tarfile
-
-from galaxy.util.json import from_json_string, to_json_string
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit(1)
-
-"""
-# Download human database 'hg19'
-java -jar snpEff.jar download -v hg19
-
-        <command>java -jar \$SNPEFF_JAR_PATH/snpEff.jar download -c \$JAVA_JAR_PATH/snpEff.config $genomeVersion > $logfile </command>
-
-snpEffectPredictor.bin
-regulation_HeLa-S3.bin
-regulation_pattern = 'regulation_(.+).bin'
-
-
-"""
-def download_database(data_manager_dict, target_directory, jar_path,config,genome_version,organism):
-    ## get data_dir from config 
-    ##---
-    ## Databases are stored here
-    ## E.g.: Information for 'hg19' is stored in data_dir/hg19/
-    ##
-    ## Note: Since version 2.1 you can use tilde ('~') as first character to refer to your home directory
-    ##---
-    #data_dir = ~/snpEff/data/
-    data_dir = target_directory
-    (snpEff_dir,snpEff_jar) = os.path.split(jar_path)
-    args = [ 'java','-jar' ]
-    args.append( jar_path )
-    args.append( 'download' )
-    args.append( '-c' )
-    args.append( config )
-    args.append( '-dataDir' )
-    args.append( data_dir )
-    args.append( '-v' )
-    args.append( genome_version )
-    proc = subprocess.Popen( args=args, shell=False, cwd=snpEff_dir )
-    return_code = proc.wait()
-    if return_code:
-        sys.exit( return_code )
-    ## search data_dir/genome_version for files
-    regulation_pattern = 'regulation_(.+).bin'
-    #  annotation files that are included in snpEff by a flag
-    annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'}
-    genome_path = os.path.join(data_dir,genome_version)
-    if os.path.isdir(genome_path):
-        for root, dirs, files in os.walk(genome_path):
-            for fname in files:
-                if fname.startswith('snpEffectPredictor'):
-                    # if snpEffectPredictor.bin download succeeded
-                    name = genome_version + (' : ' + organism if organism else '') 
-                    data_table_entry = dict(value=genome_version, name=name, path=data_dir)
-                    _add_data_table_entry( data_manager_dict, 'snpeff_genomedb', data_table_entry )
-                else:
-                    m = re.match(regulation_pattern,fname)
-                    if m:
-                        name = m.groups()[0]
-                        data_table_entry = dict(genome=genome_version,value=name, name=name)
-                        _add_data_table_entry( data_manager_dict, 'snpeff_regulationdb', data_table_entry )
-                    elif fname in annotations_dict:
-                        value = annotations_dict[fname]
-                        name = value.lstrip('-')
-                        data_table_entry = dict(genome=genome_version,value=value, name=name)
-                        _add_data_table_entry( data_manager_dict, 'snpeff_annotations', data_table_entry )
-    return data_manager_dict
-
-def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ):
-    data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
-    data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get( data_table, [] )
-    data_manager_dict['data_tables'][data_table].append( data_table_entry )
-    return data_manager_dict
-
-def main():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option( '-j', '--jar_path', dest='jar_path', action='store', type="string", default=None, help='snpEff.jar path' )
-    parser.add_option( '-c', '--config', dest='config', action='store', type="string", default=None, help='snpEff.config path' )
-    parser.add_option( '-g', '--genome_version', dest='genome_version', action='store', type="string", default=None, help='genome_version' )
-    parser.add_option( '-o', '--organism', dest='organism', action='store', type="string", default=None, help='organism name' )
-    (options, args) = parser.parse_args()
-
-    filename = args[0]
-
-    params = from_json_string( open( filename ).read() )
-    target_directory = params[ 'output_data' ][0]['extra_files_path']
-    os.mkdir( target_directory )
-    data_manager_dict = {}
-
-
-    #Create SnpEff Reference Data
-    for genome_version, organism in zip(options.genome_version.split(','), options.organism.split(',')):
-        download_database( data_manager_dict, target_directory, options.jar_path, options.config, genome_version, organism )
-
-    #save info to json file
-    open( filename, 'wb' ).write( to_json_string( data_manager_dict ) )
-
-if __name__ == "__main__": main()
-
b
diff -r e8adfc4c0a6b -r 500832f27cbc data_manager/data_manager_snpEff_download.xml
--- a/data_manager/data_manager_snpEff_download.xml Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,51 +0,0 @@
-<tool id="data_manager_snpeff_download" name="SnpEff Download" version="3.4" tool_type="manage_data">
-    <description>Download a new database</description>
-    <requirements>
-        <requirement type="package" version="3.4">snpEff</requirement>
-    </requirements>
-    <command interpreter="python">
-        data_manager_snpEff_download.py --jar_path \$SNPEFF_JAR_PATH/snpEff.jar --config \$SNPEFF_JAR_PATH/snpEff.config 
-        --genome_version "${genome_databases.fields.value}"
-        --organism "${genome_databases.fields.name}"
-        "$out_file"
-        </command>
-    <inputs>
-        <param name="genome_databases" type="select" display="checkboxes" multiple="true" label="Genome Version">
-            <options from_data_table="snpeff_databases">
-                <filter type="sort_by" column="0" />
-            </options>
-        </param>
-    </inputs>
-
-    <outputs>
-           <data name="out_file" format="data_manager_json" label="${tool.name} : ${genome_databases.fields.value}"/>
-    </outputs>
-    <stdio>
-        <exit_code range=":-1"  level="fatal"   description="Error: Cannot open file" />
-        <exit_code range="1:"  level="fatal"   description="Error" />
-    </stdio>
-    <tests>
-        <test>
-            <param name="genome_databases" value="GRCh37.71"/>
-            <output name="out_file">
-                <assert_contents>
-                    <!-- Check that a genome was added -->
-                    <has_text text="GRCh37.71" />
-                    <has_text text="snpeff_regulationdb" />
-                    <has_text text="snpeff_annotations" />
-                </assert_contents>
-            </output>
-        </test>
-    </tests>
-    <help>
-
-This tool downloads a SnpEff database.
-
-For details about this tool, please go to http://snpEff.sourceforge.net
-
-Please cite:
-"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
-
-    </help>
-</tool>
-
b
diff -r e8adfc4c0a6b -r 500832f27cbc data_manager_conf.xml
--- a/data_manager_conf.xml Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,42 +0,0 @@
-<?xml version="1.0"?>
-<data_managers>
-  <data_manager tool_file="data_manager/data_manager_snpEff_databases.xml" id="data_manager_snpeff_databases" >
-    <data_table name="snpeff_databases">  <!-- Defines a Data Table to be modified. -->
-      <output> <!-- Handle the output of the Data Manager Tool -->
-        <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool -->
-        <column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool -->
-      </output>
-    </data_table>
-  </data_manager>
-  <data_manager tool_file="data_manager/data_manager_snpEff_download.xml" id="data_manager_snpeff_download" >
-    <data_table name="snpeff_genomedb">  <!-- Defines a Data Table to be modified. -->
-      <output> <!-- Handle the output of the Data Manager Tool -->
-        <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool -->
-        <column name="name" />  <!-- columns that are going to be specified by the Data Manager Tool -->
-        <column name="path" output_ref="out_file" >
-          <move type="directory" relativize_symlinks="True">
-            <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">snpEff/data</target>
-          </move>
-          <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/snpEff/data</value_translation>
-          <value_translation type="function">abspath</value_translation>
-        </column>
-      </output>
-    </data_table>
-    <data_table name="snpeff_regulationdb">  <!-- Defines a Data Table to be modified. -->
-      <output> <!-- Handle the output of the Data Manager Tool -->
-        <column name="genome" /> <!-- columns that are going to be specified by the Data Manager Tool -->
-        <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool -->
-        <column name="name" />  <!-- columns that are going to be specified by the Data Manager Tool -->
-      </output>
-    </data_table>
-    <data_table name="snpeff_annotations">  <!-- Defines a Data Table to be modified. -->
-      <output> <!-- Handle the output of the Data Manager Tool -->
-        <column name="genome" /> <!-- columns that are going to be specified by the Data Manager Tool -->
-        <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool -->
-        <column name="name" />  <!-- columns that are going to be specified by the Data Manager Tool -->
-      </output>
-    </data_table>
-  </data_manager>
-</data_managers>
-
-
b
diff -r e8adfc4c0a6b -r 500832f27cbc datatypes_conf.xml
--- a/datatypes_conf.xml Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,10 +0,0 @@
-<?xml version="1.0"?>
-<datatypes>
-    <datatype_files>
-        <datatype_file name="snpeff.py"/>
-    </datatype_files>
-    <registration>
-        <datatype extension="snpeffdb" type="galaxy.datatypes.snpeff:SnpEffDb" display_in_upload="True"/>
-    </registration>
-</datatypes>
-
b
diff -r e8adfc4c0a6b -r 500832f27cbc lib/galaxy/datatypes/snpeff.py
--- a/lib/galaxy/datatypes/snpeff.py Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,46 +0,0 @@
-"""
-SnpEff datatypes
-"""
-import os,os.path,re,sys
-import galaxy.datatypes.data
-from galaxy.datatypes.data import Text
-from galaxy.datatypes.metadata import MetadataElement
-
-class SnpEffDb( Text ):
-    """Class describing an IGV tiled data file (TDF) .tdf  binary file"""
-    file_ext = "snpeffdb"
-    MetadataElement( name="genome_version", default=None, desc="Genome Version", readonly=True, visible=True, no_value=None )
-    MetadataElement( name="regulation", default=[], desc="Regulation Names", readonly=True, visible=True, no_value=[] )
-    MetadataElement( name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[] )
-
-    def __init__( self, **kwd ):
-        Text.__init__( self, **kwd )
-
-    def set_meta( self, dataset, **kwd ):
-        Text.set_meta(self, dataset, **kwd )
-        data_dir = dataset.extra_files_path
-        ## search data_dir/genome_version for files
-        regulation_pattern = 'regulation_(.+).bin'
-        #  annotation files that are included in snpEff by a flag
-        annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'}
-        regulations = []
-        annotations = []
-        if data_dir and os.path.isdir(data_dir):
-            for root, dirs, files in os.walk(data_dir):
-                for fname in files:
-                    if fname.startswith('snpEffectPredictor'):
-                        # if snpEffectPredictor.bin download succeeded
-                        genome_version = os.path.basename(root)
-                        dataset.metadata.genome_version = genome_version
-                    else:
-                        m = re.match(regulation_pattern,fname)
-                        if m:
-                            name = m.groups()[0]
-                            regulations.append(name)
-                        elif fname in annotations_dict:
-                            value = annotations_dict[fname]
-                            name = value.lstrip('-')
-                            annotations.append(name)
-            dataset.metadata.regulation = regulations
-            dataset.metadata.annotation = annotations
-
b
diff -r e8adfc4c0a6b -r 500832f27cbc lib/galaxy/datatypes/snpeff.pyc
b
Binary file lib/galaxy/datatypes/snpeff.pyc has changed
b
diff -r e8adfc4c0a6b -r 500832f27cbc readme.rst
--- a/readme.rst Wed Dec 11 08:53:32 2013 -0500
+++ b/readme.rst Thu Jan 22 08:28:37 2015 -0500
[
@@ -1,24 +1,38 @@
-These are galaxy tools for SnpEff a variant annotation and effect prediction tool by Pablo Cingolani. 
+SnpEff wrappers
+===============
+
+These are galaxy tools for SnpEff_, a variant annotation and effect prediction tool by Pablo Cingolani.
 It annotates and predicts the effects of variants on genes (such as amino acid changes).
-( http://snpeff.sourceforge.net/ )
+
+.. _SnpEff: http://snpeff.sourceforge.net/
+
 
-This repository contains a tool_dependencies.xml file that will attempt to automatically install SnpEff and SnpSift.   
+This repository let you automatically install SnpEff and SnpSift.
+This will use the default location for genome reference downloads from the ``snpEff.config`` file:
+
+  data_dir = ~/snpEff/data/
 
-This will use the default location for genome reference downloads from the snpEff.config:
-data_dir = ~/snpEff/data/
-You can manually edit the installed snpEff.config and change the location, or you can create a symbolic link to the desired data location from ~/snpEff.
+You can manually edit the installed ``snpEff.config`` file and change the location, or you can create a symbolic link to the desired data location from ``~/snpEff``.
+
+The genome reference options used by the tools "SnpEff" (snpEff.xml) and "SnpEff Download" (snpEff_download.xml) are taken from the ``tool-data/snpeffect_genomedb.loc`` file.
+You can fill this file by running the following command:
 
-The genome reference options used by the tools:
-    "SnpEff"  snpEff.xml
-    "SnpEff Download" snpEff_download.xml
-are taken from: tool-data/snpeffect_genomedb.loc
+  java -jar snpEff.jar databases | tail -n +3 | cut -f 1,2 | awk '{ gsub(/_/, " ", $2); printf "%s\\t%s : %s\\n", $1, $2, $1 }' | sort -k 2 > snpeffect_genomedb.loc
+
+There are 2 datamanagers to download and install prebuilt SnpEff genome databases:
+
+* data_manager_snpeff_databases: generates a list of available SnpEff genome databases into the ``tool-data/snpeff_databases.loc`` file
+* data_manager_snpeff_download: downloads a SnpEff genome database selected from ``tool-data/snpeff_databases.loc`` and adds entries to ``snpeff_genomedb.loc``, ``snpeff_regulationdb.loc`` and ``snpeff_annotations.loc``
+
+SnpEff citation: |Cingolani2012program|_.
 
-There are 2 datamanagers to download and install prebuilt SnpEff Genome databases:
-  data_manager_snpeff_databases - generates a list of available SnpEff genome databases into the tool-data/snpeff_databases.loc 
-  data_manager_snpeff_download - downloads a SnpEff genome database selected from: tool-data/snpeff_databases.loc and adds entries to snpeff_genomedb.loc,snpeff_regulationdb.loc,snpeff_annotations.loc 
+.. |Cingolani2012program| replace:: Cingolani, P., Platts, A., Wang, L. L., Coon, M., Nguyen, T., Wang, L., Land, S. J., Lu, X., Ruden, D. M. (2012) A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of *Drosophila melanogaster* strain w1118; iso-2; iso-3. *Fly* 6(2):80-92
+.. _Cingolani2012program: https://www.landesbioscience.com/journals/fly/article/19695/
+
+SnpSift citation: |Cingolani2012using|_.
 
-SnpEff citation:
-"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+.. |Cingolani2012using| replace:: Cingolani, P., Patel, V. M., Coon, M., Nguyen, T., Land, S. J., Ruden, D. M., Lu, X. (2012) Using *Drosophila melanogaster* as a model for genotoxic chemical mutational studies with a new program, SnpSift. *Front. Genet.* 3:35
+.. _Cingolani2012using: http://journal.frontiersin.org/Journal/10.3389/fgene.2012.00035/
 
-SnpSift citation:
-"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012.
+Wrapper authors: Jim Johnson
+
b
diff -r e8adfc4c0a6b -r 500832f27cbc repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Thu Jan 22 08:28:37 2015 -0500
b
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="This requires the SnpEff datatype definitions.">
+    <repository name="snpeff_datatypes" owner="iuc" changeset_revision="d78b2b2a3388" toolshed="http://toolshed.g2.bx.psu.edu" />
+</repositories>
b
diff -r e8adfc4c0a6b -r 500832f27cbc snpEff.xml
--- a/snpEff.xml Wed Dec 11 08:53:32 2013 -0500
+++ b/snpEff.xml Thu Jan 22 08:28:37 2015 -0500
[
b'@@ -1,13 +1,14 @@\n-<tool id="snpEff" name="SnpEff" version="3.4">\n+<tool id="snpEff" name="SnpEff" version="@WRAPPER_VERSION@.0">\n     <description>Variant effect and annotation</description>\n     <expand macro="requirements" />\n     <macros>\n         <import>snpEff_macros.xml</import>\n     </macros>\n     <command>\n+<![CDATA[\n         java -Xmx6G -jar \\$SNPEFF_JAR_PATH/snpEff.jar eff \n         -c \\$SNPEFF_JAR_PATH/snpEff.config \n-        -i $inputFormat -o $outputFormat -upDownStreamLen $udLength \n+        -i $inputFormat -o ${outputConditional.outputFormat} -upDownStreamLen $udLength\n         #if $spliceSiteSize and $spliceSiteSize.__str__ != \'\':\n           -spliceSiteSize $spliceSiteSize\n         #end if\n@@ -34,7 +35,7 @@\n         #if $statsFile:\n           -stats $statsFile \n         #end if\n-        #if $offset.__str__ != \'\':\n+        #if $offset.__str__ != \'default\':\n           ${offset} \n         #end if\n         #if $chr.__str__.strip() != \'\':\n@@ -66,7 +67,19 @@\n           -download\n           $snpDb.genome_version\n         #end if\n-        $input > $snpeff_output \n+        $input > $snpeff_output ;\n+        #if $statsFile:\n+            #import os\n+            #set $genes_file = str($statsFile) + \'.genes.txt\'\n+            #set $genes_file_name = os.path.split($genes_file)[-1]\n+            mkdir $statsFile.files_path;\n+            mv $genes_file #echo os.path.join($statsFile.files_path, $genes_file_name)#;\n+        #end if\n+        #if $outputConditional.outputFormat == \'gatk\' and $outputConditional.gatk_v1\n+          ## Replace real SnpEff version with 2.0.5 to prevent this GATK 1.x error: "The version of SnpEff used to generate the SnpEff input file (x.x) is not currently supported by the GATK. Supported versions are: [2.0.5]"\n+          sed -i \'s/^\\#\\#SnpEffVersion="\\(\\S*\\s\\)/\\#\\#SnpEffVersion="2.0.5 - real is \\1/\' $snpeff_output\n+        #end if\n+]]>\n     </command>\n     <inputs>\n         <param format="vcf,tabular,pileup,bed" name="input" type="data" label="Sequence changes (SNPs, MNPs, InDels)"/>\n@@ -78,12 +91,22 @@\n             <option value="bed">BED (Deprecated)</option>\n         </param>\n \n-        <param name="outputFormat" type="select" label="Output format">\n-            <option value="vcf" selected="true">VCF (only if input is VCF)</option>\n-            <option value="txt">Tabular</option>\n-            <option value="bed">BED</option>\n-            <option value="bedAnn">BED Annotations</option>\n-        </param>\n+        <conditional name="outputConditional">\n+            <param name="outputFormat" type="select" label="Output format">\n+                <option value="vcf" selected="true">VCF (only if input is VCF)</option>\n+                <option value="gatk">GATK-compatible VCF (only if input is VCF)</option>\n+                <option value="txt">Tabular</option>\n+                <option value="bed">BED</option>\n+                <option value="bedAnn">BED annotations</option>\n+            </param>\n+            <when value="vcf" />\n+            <when value="gatk">\n+                <param name="gatk_v1" type="boolean" checked="true" label="Compatible with GATK 1.x" />\n+            </when>\n+            <when value="txt" />\n+            <when value="bed" />\n+            <when value="bedAnn" />\n+        </conditional>\n \n         <conditional name="snpDb">\n             <param name="genomeSrc" type="select" label="Genome source">\n@@ -94,35 +117,41 @@\n             <when value="cached">\n                 <param name="genomeVersion" type="select" label="Genome">\n                     <!--GENOME    DESCRIPTION-->\n-                    <options from_data_table="snpeff_genomedb">\n-                           <filter type="unique_value" column="0" />\n+                    <options from_data_table="snpeffv_genomedb">\n+                           <filter type="static_value" name="snpeff_version" value="@SNPEFF_VERSION@" column="1"/>\n+                           <filter type="unique_value" column="2" />\n                     </opt'..b'ion value="-ins">Analyze insertions only</option>\n+            <option value="-mnp">Only MNPs (multiple nucleotide polymorphisms)</option>\n+            <option value="-snp">Only SNPs (single nucleotide polymorphisms)</option>\n         </param>\n \n         <param name="annotations" type="select" display="checkboxes" multiple="true" label="Annotation options">\n-            <option value="-cancer">Perform \'cancer\' comparissons (Somatic vs Germline)</option>\n+            <option value="-cancer">Perform \'cancer\' comparisons (somatic vs. germline)</option>\n             <option value="-canon">Only use canonical transcripts</option>\n             <option value="-geneId">Use gene ID instead of gene name (VCF output)</option>\n-            <option value="-hgvs">Use HGVS annotations for amino acid sub-field</option>\n-            <option value="-lof">Add loss of function (LOF) and Nonsense mediated decay (NMD) tags</option>\n+            <option value="-lof">Add loss of function (LOF) and nonsense mediated decay (NMD) tags</option>\n             <option value="-oicr">Add OICR tag in VCF file</option>\n             <option value="-onlyReg">Only use regulation tracks</option>\n-            <option value="-sequenceOntolgy">Use Sequence Ontolgy terms.</option>\n+            <option value="-classic">Use Classic Effect names and amino acid variant annotations (NON_SYNONYMOUS_CODING vs missense_variant and G180R vs p.Gly180Arg/c.538G>C)</option>\n+            <option value="-hgvs">Override classic and use HGVS annotations for amino acid annotations (p.Gly180Arg/c.538G>C vs G180R)</option>\n+            <option value="-sequenceOntology">Override classic and use Sequence Ontolgy terms for effects (missense_variant vs NON_SYNONYMOUS_CODING)</option>\n         </param>\n         <param name="intervals" format="bed" type="data" optional="true" label="Use custom interval file for annotation"/>\n         <param name="transcripts" format="tabular" type="data" optional="true" label="Only use the transcripts in this file." help="Format is one transcript ID per line."/>\n@@ -193,7 +226,7 @@\n         </param>\n \n         <param name="offset" type="select" display="radio" optional="true" label="Chromosomal position">\n-            <option value="" selected="true">Use default (based on input type)</option>\n+            <option value="default" selected="true">Use default (based on input type)</option>\n             <option value="-0">Force zero-based positions (both input and output)</option>\n             <option value="-1">Force one-based positions (both input and output)</option>\n         </param>\n@@ -210,13 +243,12 @@\n     <outputs>\n         <data format="vcf" name="snpeff_output" >\n             <change_format>\n-                <when input="outputFormat" value="vcf" format="vcf" />\n-                <when input="outputFormat" value="txt" format="tabular" />\n-                <when input="outputFormat" value="bed" format="bed" />\n-                <when input="outputFormat" value="bedAnn" format="bed" />\n+                <when input="outputConditional.outputFormat" value="txt" format="tabular" />\n+                <when input="outputConditional.outputFormat" value="bed" format="bed" />\n+                <when input="outputConditional.outputFormat" value="bedAnn" format="bed" />\n             </change_format>\n         </data>\n-        <data format="html" name="statsFile">\n+        <data format="html" name="statsFile" label="${tool.name} on ${on_string} - stats">\n             <filter>generate_stats == True</filter>\n         </data>\n     </outputs>\n@@ -280,7 +312,7 @@\n         <param name="genome_version" value="testCase"/>\n         <param name="udLength" value="0"/>\n         <param name="filterHomHet" value="no_filter"/>\n-        <param name="filterIn" value="del"/>\n+        <param name="filterIn" value="+-del"/>\n         <!--\n         <param name="filterOut" value=""/>\n         -->\n@@ -326,5 +358,6 @@\n @CITATION_SECTION@\n \n     </help>\n+    <expand macro="citations" />\n </tool>\n \n'
b
diff -r e8adfc4c0a6b -r 500832f27cbc snpEff_databases.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpEff_databases.xml Thu Jan 22 08:28:37 2015 -0500
[
@@ -0,0 +1,26 @@
+<tool id="snpEff_databases" name="SnpEff Available Databases" version="@WRAPPER_VERSION@.0">
+    <description></description>
+    <expand macro="requirements" />
+    <macros>
+        <import>snpEff_macros.xml</import>
+    </macros>
+    <command>
+<![CDATA[
+    java -jar \$SNPEFF_JAR_PATH/snpEff.jar databases |  grep -v '^---' | sed 's/^Genome/#Genome/' | sed  's/  *//g' > $snpeff_dbs 
+]]>
+    </command>
+    <inputs>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="snpeff_dbs" label="${tool.name} @SNPEFF_VERSION@ available databases" />
+    </outputs>
+    <expand macro="stdio" />
+    <help>
+
+@EXTERNAL_DOCUMENTATION@
+
+@CITATION_SECTION@
+
+    </help>
+</tool>
+
b
diff -r e8adfc4c0a6b -r 500832f27cbc snpEff_download.xml
--- a/snpEff_download.xml Wed Dec 11 08:53:32 2013 -0500
+++ b/snpEff_download.xml Thu Jan 22 08:28:37 2015 -0500
[
@@ -1,23 +1,22 @@
-<tool id="snpEff_download" name="SnpEff Download" version="3.4">
+<tool id="snpEff_download" name="SnpEff Download" version="@WRAPPER_VERSION@.0">
     <description>Download a new database</description>
     <expand macro="requirements" />
     <macros>
         <import>snpEff_macros.xml</import>
     </macros>
     <command>
-    echo $genomeVersion > $snpeff_db; 
-    java -jar \$SNPEFF_JAR_PATH/snpEff.jar download -c \$SNPEFF_JAR_PATH/snpEff.config -dataDir $snpeff_db.extra_files_path -v $genomeVersion > $logfile 
+<![CDATA[
+    java -jar \$SNPEFF_JAR_PATH/snpEff.jar download -c \$SNPEFF_JAR_PATH/snpEff.config -dataDir $snpeff_db.files_path -v $genome_version 
+]]>
     </command>
     <inputs>
-        <param name="genomeVersion" type="select" label="Select the genome version you want to download">
-            <options from_data_table="snpeff_databases">
-                <filter type="sort_by" column="0" />
-            </options>
+        <param name="genome_version" type="text" size="40" value="" label="Select the genome version you want to download (e.g. GRCh37.74)">
+            <help>@SNPEFF_DATABASE_URL@</help>
+            <validator type="regex" message="A genome version name is required">\S+</validator>
         </param>
     </inputs>
     <outputs>
-        <data format="txt" name="logfile" />
-        <data format="snpeffdb" name="snpeff_db" label="${genomeVersion}" />
+        <data format="snpeffdb" name="snpeff_db" label="${tool.name} @SNPEFF_VERSION@ ${genome_version}" />
     </outputs>
     <expand macro="stdio" />
     <help>
@@ -27,5 +26,6 @@
 @CITATION_SECTION@
 
     </help>
+    <expand macro="citations" />
 </tool>
 
b
diff -r e8adfc4c0a6b -r 500832f27cbc snpEff_macros.xml
--- a/snpEff_macros.xml Wed Dec 11 08:53:32 2013 -0500
+++ b/snpEff_macros.xml Thu Jan 22 08:28:37 2015 -0500
[
@@ -1,7 +1,7 @@
 <macros>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="3.4">snpEff</requirement>
+            <requirement type="package" version="4.0">snpEff</requirement>
         </requirements>
     </xml>
   <xml name="stdio">
@@ -10,9 +10,13 @@
         <exit_code range="1:"  level="fatal" description="Error" />
     </stdio>
   </xml>
+  <token name="@WRAPPER_VERSION@">4.0</token>
+  <token name="@SNPEFF_VERSION@">SnpEff4.0</token>
+  <token name="@SNPEFF_DATABASE_URL@">https://snpeff-data.galaxyproject.org/databases/v4_0/</token>
   <token name="@EXTERNAL_DOCUMENTATION@">
 
-For details about this tool, please go to http://snpeff.sourceforge.net/SnpSift.html#intervals
+For details about this tool, please go to:
+ http://snpeff.sourceforge.net/SnpEff_manual.html
 
   </token>
   <token name="@CITATION_SECTION@">------
@@ -22,10 +26,13 @@
 For the underlying tool, please cite the following two publications:
 
 SnpEff citation:
-"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
-
-SnpSift citation:
-"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012.
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly 6(2):80-92, 2012
 
   </token>
+  <xml name="citations">
+      <citations>
+        <citation type="doi">10.4161/fly.19695</citation>
+        <yield />
+      </citations>
+  </xml>
 </macros>
b
diff -r e8adfc4c0a6b -r 500832f27cbc snpSift_annotate.xml
--- a/snpSift_annotate.xml Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,99 +0,0 @@
-<tool id="snpSift_annotate" name="SnpSift Annotate" version="3.4">
-    <description>SNPs from dbSnp</description>
-    <!-- 
-        You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
-    -->
-    <expand macro="requirements" />
-    <macros>
-        <import>snpEff_macros.xml</import>
-    </macros>
-    <command>
-        java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar $annotate_cmd 
-        #if $annotate.id :
-          -id
-        #elif $annotate.info_ids.__str__.strip() != '' :
-          -info "$annotate.info_ids"
-        #end if          
-        -q $dbSnp $input > $output 
-    </command>
-    <inputs>
-        <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
-        <param format="vcf" name="dbSnp" type="data" label="VCF File with ID field annotated (e.g. dnSNP.vcf)" 
-            help="The ID field for a variant in input will be assigned from a matching variant in this file."/>
-        <conditional name="annotate">
-            <param name="id" type="boolean" truevalue="id" falsevalue="info" checked="True" label="Only annotate ID field (do not add INFO field)" help=""/>
-            <when value="id"/>
-            <when value="info">
-                <param name="info_ids" type="text" value="" size="60" optional="true" label="Limit INFO annotation to these INFO IDs"
-                    help="list is a comma separated list of fields. When blank, all INFO fields are included">    
-                    <validator type="regex" message="IDs separted by commas">^(([a-zA-Z][a-zA-Z0-9_-]*)(,[a-zA-Z][a-zA-Z0-9_-]*)*)?$</validator>
-                </param>
-            </when>
-        </conditional>
-        <param name="annotate_cmd" type="boolean" truevalue="annMem" falsevalue="annotate" checked="false" label="Allow unsorted VCF files"> 
-            <help>
-                This option will load the entire 'database' VCF file into memory (which may not be practical for large 'database' VCF files).
-                Otherwise, both the database and the input VCF files should be sorted by position (Chromosome sort order can differ between files). 
-            </help>
-            </param>
-    </inputs>
-    <expand macro="stdio" />
-    <outputs>
-        <data format="vcf" name="output" />
-    </outputs>
-    <tests>
-        <test>
-            <param name="input" ftype="vcf" value="annotate_1.vcf"/>
-            <param name="dbSnp" ftype="vcf" value="db_test_1.vcf"/>
-            <param name="annotate_cmd" value="False"/>
-            <param name="id" value="True"/>
-            <output name="output">
-                <assert_contents>
-                    <has_text text="rs76166080" />
-                </assert_contents>
-            </output>
-        </test>
-    </tests>
-    <help>
-
-This is typically used to annotate IDs from dbSnp.
-
-Annotatating only the ID field from dbSnp137.vcf ::
-
-    Input VCF:
-    CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
-    22      16157571    .            T    G    0.0    FAIL    NS=53
-    22      16346045    .            T    C    0.0    FAIL    NS=244
-    22      16350245    .            C    A    0.0    FAIL    NS=192
-
-    Annotated Output VCF:
-    #CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
-    22      16157571    .            T    G    0.0    FAIL    NS=53
-    22      16346045    rs56234788   T    C    0.0    FAIL    NS=244
-    22      16350245    rs2905295    C    A    0.0    FAIL    NS=192
-
-
-
-Annotatating both the ID and INFO fields from dbSnp137.vcf ::
-
-    Input VCF:
-    #CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
-    22      16157571    .            T    G    0.0    FAIL    NS=53
-    22      16346045    .            T    C    0.0    FAIL    NS=244
-    22      16350245    .            C    A    0.0    FAIL    NS=192
-
-    Annotated Output VCF:
-    #CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
-    22      16157571    .            T    G    0.0    FAIL    NS=53
-    22      16346045    rs56234788   T    C    0.0    FAIL    NS=244;RSPOS=16346045;GMAF=0.162248628884826;dbSNPBuildID=129;SSR=0;SAO=0;VP=050100000000000100000100;WGT=0;VC=SNV;SLO;GNO
-    22      16350245    rs2905295    C    A    0.0    FAIL    NS=192;RSPOS=16350245;GMAF=0.230804387568556;dbSNPBuildID=101;SSR=1;SAO=0;VP=050000000000000100000140;WGT=0;VC=SNV;GNO
-
-
-@EXTERNAL_DOCUMENTATION@
-
-@CITATION_SECTION@
-
-
-    </help>
-</tool>
-
b
diff -r e8adfc4c0a6b -r 500832f27cbc snpSift_caseControl.xml
--- a/snpSift_caseControl.xml Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,121 +0,0 @@
-<tool id="snpSift_caseControl" name="SnpSift CaseControl" version="3.4">
-    <description>Count samples are in 'case' and 'control' groups.</description>
-    <!-- 
-        You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
-    -->
-    <expand macro="requirements" />
-    <macros>
-        <import>snpEff_macros.xml</import>
-    </macros>
-    <command>
-    java -Xmx1G -jar \$SNPEFF_JAR_PATH/SnpSift.jar caseControl -q 
-    #if $name.__str__.strip() != '':
-      -name $name
-    #end if
-    #if $ctrl.ctrl_src == 'caseString':
-      '$ctrl.caseControlStr' 
-    #else
-      -tfam "$ctrl.tfam"
-    #end if
-    $input > $output
-    </command>
-    <inputs>
-        <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
-        <conditional name="ctrl">
-            <param name="ctrl_src" type="select" label="Case Control defined in">
-            <option value="caseString">Case Control String</option>
-            <option value="tfam">TFAM file</option>
-        </param>
-        <when value="caseString">
-            <param name="caseControlStr" type="text" label="Case / Control column designation" size="50">
-            <help>
-                Case and control are defined by a string containing plus and minus symbols {'+', '-', '0'} where '+' is case, '-' is control and '0' is neutral
-            </help>
-            <validator type="regex" message="must be  only plus(+), minus(-), or zero(0) characters">[+-0]+</validator>
-            </param>
-        </when>
-        <when value="tfam">
-            <param format="tabular" name="tfam" type="data" label="PLINK TFAM file" help="Read more about TFAM at http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#tr"/>
-        </when>
-        </conditional>
-        <param name="name" type="text" optional="true" label="name" help="name to append to the 'Cases' or 'Controls' tags">
-            <validator type="regex" message="Use only valid ID characters">[_a-zA-Z0-9]+</validator>
-        </param>
-    </inputs>
-    <outputs>
-        <data format="vcf" name="output" />
-    </outputs>
-    <expand macro="stdio" />
-    <tests>
-        <test>
-            <param name="input" ftype="vcf" value="test.private.01.vcf"/>
-            <param name="ctrl_src" value="caseString"/>
-            <param name="caseControlStr" value="--"/>
-            <output name="output">
-                <assert_contents>
-                    <has_text text="Cases=0,0,0;" />
-                    <has_text text="Controls=0,0,0;" />
-                </assert_contents>
-            </output>
-        </test>
-
-        <test>
-            <param name="input" ftype="vcf" value="test.private.02.vcf"/>
-            <param name="ctrl_src" value="caseString"/>
-            <param name="caseControlStr" value="--"/>
-            <output name="output">
-                <assert_contents>
-                    <has_text text="Cases=0,0,0;" />
-                    <has_text text="Controls=2,0,4;" />
-                </assert_contents>
-            </output>
-        </test>
-
-        <test>
-            <param name="input" ftype="vcf" value="test.private.02.vcf"/>
-            <param name="name" value=""/>
-            <param name="ctrl_src" value="caseString"/>
-            <param name="caseControlStr" value="-+"/>
-            <output name="output">
-                <assert_contents>
-                    <has_text text="Cases=1,0,2;" />
-                    <has_text text="Controls=1,0,2;" />
-                </assert_contents>
-            </output>
-        </test>
-    </tests>
-    <help>
-
-**SnpSift CaseControl**
-
-Allows you to count how many samples are in 'case' group and a 'control' group. You can count 'homozygous', 'heterozygous' or 'any' variants. 
-
-Case and control are defined by a string containing plus and minus symbols {'+', '-', '0'} where '+' is case, '-' is control and '0' is neutral. 
-
-This command adds two annotations to the VCF file:
-
- - **CaseControl**: Two comma separated numbers numbers representing the number of samples that have the variant in the case and the control group. Example: 
-
-  "CaseControl=3,4" *the variant is present in 3 cases and 4 controls.*
-
-
- - **CaseControlP**: A p-value (Fisher exact test) that the number of cases is N or more. Example:
-
-  "CaseControl=4,0;CaseControlP=3.030303e-02" *in this case the pValue of having 4 or more cases and zero controls is 0.03*
-
-
-For example, if we have ten samples (which means ten genotype columns in the VCF file), the first four are 'case' and the last six are 'control', so the description string would be "++++------".  Let's say we want to distinguish genotypes that are homozygous in 'case' and either homozygous or heterozygous in 'control'.  We would set:
-
-  - Hom/Het case = "hom"
-
-  - Hom/Het control = "any"  
-
-  - Case / Control column designation = ""++++------"
-
-
-@EXTERNAL_DOCUMENTATION@
-
-@CITATION_SECTION@
-
-  </help>
-</tool>
b
diff -r e8adfc4c0a6b -r 500832f27cbc snpSift_filter.xml
--- a/snpSift_filter.xml Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,148 +0,0 @@
-<tool id="snpSift_filter" name="SnpSift Filter" version="3.4">
-    <options sanitize="False" />
-    <description>Filter variants using arbitrary expressions</description>
-    <expand macro="requirements" />
-    <macros>
-        <import>snpEff_macros.xml</import>
-    </macros>
-    <command>
-        java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar filter -f $input -e $exprFile $inverse 
-        #if $filtering.mode == 'field':
-            #if $filtering.replace.pass:
-                --pass
-                #if $filtering.replace.filterId and len($filtering.replace.filterId.__str__.strip()) > 0:
-                    --filterId "$filtering.replace.filterId"
-                #end if
-            #end if
-            #if $filtering.addFilter and len($filtering.addFilter.__str__.strip()) > 0:
-                --addFilter "$filtering.addFilter"
-            #end if
-            #if $filtering.rmFilter and len($filtering.rmFilter.__str__.strip()) > 0:
-                --rmFilter "$filtering.rmFilter"
-            #end if
-        #end if
-         > $output
-    </command>
-    <inputs>
-        <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
-        <param name="expr" type="text" label="Filter criteria" size="160" help="Need help? See below a few examples." />
-        <param name="inverse" type="boolean" truevalue="--inverse" falsevalue="" checked="false" label="Inverse filter" help="Show lines that do not match filter expression" />
-        <conditional name="filtering">
-            <param name="mode" type="select" label="Filter mode">
-                <option value="entries" selected="true">Retain entries that pass filter, remove other entries</option>
-                <option value="field">Change the FILTER field, but retain all entries</option>
-            </param> 
-            <when value="entries"/>
-            <when value="field">
-                <conditional name="replace">
-                    <param name="pass" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Set matching entry FILTER to 'PASS'" 
-                           help="appends an ID tag to non-matching entry FILTER "/>
-                    <when value="no"/>
-                    <when value="yes">
-                        <param name="filterId" type="text" value="" optional="true" label="ID appended to non-matching (##FILTER tag in header and FILTER VCF field)." size="10"
-                               help="Default ID is 'SnpSift'"/>
-                    </when>
-                </conditional>
-                <param name="addFilter" type="text" value="" optional="true" label="Add a string to FILTER VCF field if 'expression' is true." size="10"/>
-                <param name="rmFilter" type="text" value="" optional="true" label="Remove a string from FILTER VCF field if 'expression' is true (and 'str' is in the field)." size="10"/>
-            </when>
-        </conditional>
-    </inputs>
-    <configfiles>
-        <configfile name="exprFile">
-        $expr
-        </configfile> 
-    </configfiles>
-
-    <outputs>
-        <data format="vcf" name="output" />
-    </outputs>
-    <expand macro="stdio" />
-    <tests>
-        <test>
-        <param name="input" ftype="vcf" value="test01.vcf"/>
-        <param name="expr" value="QUAL >= 50"/>
-        <param name="mode" value="entries"/>
-        <output name="output">
-            <assert_contents>
-            <has_text text="28837706" />
-            <not_has_text text="NT_166464" />
-            </assert_contents>
-        </output>
-        </test>
-
-        <test>
-        <param name="input" ftype="vcf" value="test01.vcf"/>
-        <param name="expr" value="(CHROM = '19')"/>
-        <param name="mode" value="entries"/>
-        <output name="output">
-            <assert_contents>
-            <has_text text="3205820" />
-            <not_has_text text="NT_16" />
-            </assert_contents>
-        </output>
-        </test>
-
-        <test>
-        <param name="input" ftype="vcf" value="test01.vcf"/>
-        <param name="expr" value="(POS >= 20175) &amp; (POS &lt;= 35549)"/>
-        <param name="mode" value="entries"/>
-        <output name="output">
-            <assert_contents>
-            <has_text text="20175" />
-            <has_text text="35549" />
-            <has_text text="22256" />
-            <not_has_text text="18933" />
-            <not_has_text text="37567" />
-            </assert_contents>
-        </output>
-        </test>
-
-        <test>
-        <param name="input" ftype="vcf" value="test01.vcf"/>
-        <param name="expr" value="( DP >= 5 )"/>
-        <param name="mode" value="entries"/>
-        <output name="output">
-            <assert_contents>
-            <has_text text="DP=5;" />
-            <has_text text="DP=6;" />
-            <not_has_text text="DP=1;" />
-            </assert_contents>
-        </output>
-        </test>
-    </tests>
-    <help>
-
-**SnpSift filter**
-
-You can filter ia vcf file using arbitrary expressions, for instance "(QUAL > 30) | (exists INDEL) | ( countHet() > 2 )". The actual expressions can be quite complex, so it allows for a lot of flexibility.
-
-Some examples:
-
-  - *I want to filter out samples with quality less than 30*:
-
-    * **( QUAL &gt; 30 )**
-
-  - *...but we also want InDels that have quality 20 or more*:
-
-    * **(( exists INDEL ) &amp; (QUAL >= 20)) | (QUAL >= 30 )**
-
-  - *...or any homozygous variant present in more than 3 samples*:
-
-    * **(countHom() > 3) | (( exists INDEL ) &amp; (QUAL >= 20)) | (QUAL >= 30 )**
-
-  - *...or any heterozygous sample with coverage 25 or more*:
-
-    * **((countHet() > 0) &amp; (DP >= 25)) | (countHom() > 3) | (( exists INDEL ) &amp; (QUAL >= 20)) | (QUAL >= 30 )**
-
-  - *I want to keep samples where the genotype for the first sample is homozygous variant and the genotype for the second sample is reference*:
-
-    * **isHom( GEN[0] ) &amp; isVariant( GEN[0] ) &amp; isRef( GEN[1] )**
-
-
-@EXTERNAL_DOCUMENTATION@
-
-@CITATION_SECTION@
-
-    </help>
-</tool>
b
diff -r e8adfc4c0a6b -r 500832f27cbc snpSift_int.xml
--- a/snpSift_int.xml Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,56 +0,0 @@
-<tool id="snpSift_int" name="SnpSift Intervals" version="3.4">
-    <description>Filter variants using intervals</description>
-    <!-- 
-        You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
-    -->
-    <expand macro="requirements" />
-    <macros>
-        <import>snpEff_macros.xml</import>
-    </macros>
-    <command>
-        java -Xmx2G -jar \$SNPEFF_JAR_PATH/SnpSift.jar intervals -i $input $exclude $bedFile > $output
-    </command>
-    <inputs>
-        <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
-        <param format="bed" name="bedFile" type="data" label="Intervals (BED file)"/>
-        <param name="exclude" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Exclude Intervals" 
-            help="Filter out (exclude) VCF entries that match any interval in the BED files"/>
-    </inputs>
-    <outputs>
-        <data format="vcf" name="output" />
-    </outputs>
-    <expand macro="stdio" />
-    <tests>
-        <test>
-            <param name="input" ftype="vcf" value="annotate_5.vcf"/>
-            <param name="bedFile" ftype="bed" value="interval.bed"/>
-            <param name="exclude" value="False"/>
-            <output name="output">
-                <assert_contents>
-                    <has_text text="872687" />
-                    <not_has_text text="1195966" />
-                </assert_contents>
-            </output>
-        </test>
-        <test>
-            <param name="input" ftype="vcf" value="annotate_5.vcf"/>
-            <param name="bedFile" ftype="bed" value="interval.bed"/>
-            <param name="exclude" value="True"/>
-            <output name="output">
-                <assert_contents>
-                    <has_text text="1195966" />
-                    <not_has_text text="872687" />
-                </assert_contents>
-            </output>
-        </test>
-    </tests>
-    <help>
-
-You can filter using intervals (BED file).
-
-@EXTERNAL_DOCUMENTATION@
-
-@CITATION_SECTION@
-
-    </help>
-</tool>
b
diff -r e8adfc4c0a6b -r 500832f27cbc test-data/annotate_1.vcf
--- a/test-data/annotate_1.vcf Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-1 872687 . C G . . .
b
diff -r e8adfc4c0a6b -r 500832f27cbc test-data/annotate_5.vcf
--- a/test-data/annotate_5.vcf Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,5 +0,0 @@
-1 872687 rs76166080 C G . . .
-1 970878 . C T . . .
-1 979690 rs115413462 G A . . .
-1 1160967 . C T . . .
-1 1195966 rs114569001 G A . . .
b
diff -r e8adfc4c0a6b -r 500832f27cbc test-data/db_test_1.vcf
--- a/test-data/db_test_1.vcf Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-1 872687 rs76166080 C G 0 . .
b
diff -r e8adfc4c0a6b -r 500832f27cbc test-data/interval.bed
--- a/test-data/interval.bed Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,10 +0,0 @@
-chr1 1 100000
-chr1 100000 200000
-chr1 200000 300000
-chr1 300000 400000
-chr1 400000 500000
-chr1 500000 600000
-chr1 600000 700000
-chr1 700000 800000
-chr1 800000 900000
-chr1 900000 1000000
b
diff -r e8adfc4c0a6b -r 500832f27cbc test-data/test.private.01.vcf
--- a/test-data/test.private.01.vcf Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-##fileformat=VCFv4.0
-#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT id1 id2
-1 123456 . A G . . AF=0 GT 0/0 0/0
b
diff -r e8adfc4c0a6b -r 500832f27cbc test-data/test.private.02.vcf
--- a/test-data/test.private.02.vcf Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-##fileformat=VCFv4.0
-#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT id1 id2
-1 123456 . A G . . AF=0 GT 1/1 1/1
b
diff -r e8adfc4c0a6b -r 500832f27cbc test-data/test01.vcf
--- a/test-data/test01.vcf Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,1000 +0,0 @@\n-##fileformat=VCFv4.1\n-##samtoolsVersion=0.1.16 (r963:234)\n-##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">\n-##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">\n-##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads">\n-##INFO=<ID=FQ,Number=1,Type=Float,Description="Phred probability of all samples being the same">\n-##INFO=<ID=AF1,Number=1,Type=Float,Description="Max-likelihood estimate of the site allele frequency of the first ALT allele">\n-##INFO=<ID=G3,Number=3,Type=Float,Description="ML estimate of genotype frequencies">\n-##INFO=<ID=HWE,Number=1,Type=Float,Description="Chi^2 based HWE test P-value based on G3">\n-##INFO=<ID=CI95,Number=2,Type=Float,Description="Equal-tail Bayesian credible interval of the site allele frequency at the 95% level">\n-##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias">\n-##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n-##INFO=<ID=PC2,Number=2,Type=Integer,Description="Phred probability of the nonRef allele frequency in group1 samples being larger (,smaller) than in group2.">\n-##INFO=<ID=PCHI2,Number=1,Type=Float,Description="Posterior weighted chi^2 P-value for testing the association between group1 and group2 samples.">\n-##INFO=<ID=QCHI2,Number=1,Type=Integer,Description="Phred scaled PCHI2.">\n-##INFO=<ID=PR,Number=1,Type=Integer,Description="# permutations yielding a smaller PCHI2.">\n-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n-##FORMAT=<ID=GL,Number=3,Type=Float,Description="Likelihoods for RR,RA,AA genotypes (R=ref,A=alt)">\n-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases">\n-##FORMAT=<ID=SP,Number=1,Type=Integer,Description="Phred-scaled strand bias P-value">\n-##FORMAT=<ID=PL,Number=-1,Type=Integer,Description="List of Phred-scaled genotype likelihoods, number of values is (#ALT+1)*(#ALT+2)/2">\n-#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\ts_1_BcA2.sort.rmdup.Q20.noMh.bam\n-NT_166464\t696\t.\tG\tC\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n-NT_166464\t745\t.\tG\tC\t6.98\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:36,3,0:4\n-NT_166464\t7258\t.\tA\tC\t40\t.\tDP=4;AF1=0.5008;CI95=0.5,0.5;DP4=1,0,2,1;MQ=32;FQ=-4.12;PV4=1,0.28,0.21,0.17\tGT:PL:GQ\t0/1:70,0,25:28\n-NT_166464\t7268\t.\tA\tG\t8.65\t.\tDP=4;AF1=0.5004;CI95=0.5,0.5;DP4=1,0,1,1;MQ=30;FQ=3.32;PV4=1,0.017,0,1\tGT:PL:GQ\t0/1:38,0,28:32\n-NT_166464\t7283\t.\tT\tC\t11.3\t.\tDP=3;AF1=0.501;CI95=0.5,0.5;DP4=1,0,1,1;MQ=30;FQ=-4.81;PV4=1,1,0,1\tGT:PL:GQ\t0/1:41,0,24:28\n-NT_166464\t7335\t.\tG\tA\t18.8\t.\tDP=2;AF1=1;CI95=0.5,1;DP4=0,0,1,1;MQ=25;FQ=-33\tGT:PL:GQ\t1/1:50,6,0:10\n-NT_166464\t8030\t.\tA\tG\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n-NT_166452\t8268\t.\tG\tA\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n-NT_166452\t16693\t.\tT\tC\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n-NT_166480\t12474\t.\tG\tA\t6.2\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:35,3,0:4\n-NT_166480\t12483\t.\tA\tG\t4.77\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t0/1:33,3,0:3\n-NT_166476\t578\t.\tC\tT\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n-NT_166476\t22223\t.\tA\tC\t3.01\t.\tDP=4;AF1=0.4998;CI95=0.5,0.5;DP4=0,2,2,0;MQ=32;FQ=4.63;PV4=0.33,0.26,0,0.42\tGT:PL:GQ\t0/1:30,0,43:28\n-NT_166476\t22256\t.\tG\tA\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n-NT_166476\t23076\t.\tA\tT\t8.44\t.\tDP=2;AF1=1;CI95=0.5,1;DP4=0,0,0,2;MQ=25;FQ=-33\tGT:PL:GQ\t1/1:39,6,0:8\n-NT_166476\t23487\t.\tC\tA\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n-NT_166454\t64\t.\tT\tA\t8.64\t.\tDP=7;AF1=0.5;CI95=0.5,0.5;DP4=1,4,2,0;MQ=29;FQ=11.3;PV4=0.14,1,1,1\tGT:P'..b'\tG\t28\t.\tDP=4;AF1=1;CI95=0.5,1;DP4=0,0,0,3;MQ=25;FQ=-36\tGT:PL:GQ\t1/1:60,9,0:16\n-19\t25568441\t.\tG\tA\t89\t.\tDP=10;AF1=1;CI95=1,1;DP4=0,0,7,3;MQ=29;FQ=-57\tGT:PL:GQ\t1/1:122,30,0:57\n-19\t25568480\t.\tA\tG\t169\t.\tDP=11;AF1=1;CI95=1,1;DP4=0,0,8,3;MQ=30;FQ=-60\tGT:PL:GQ\t1/1:202,33,0:63\n-19\t25568513\t.\tT\tA\t124\t.\tDP=9;AF1=1;CI95=1,1;DP4=0,0,6,3;MQ=28;FQ=-54\tGT:PL:GQ\t1/1:157,27,0:51\n-19\t25568527\t.\tA\tG\t109\t.\tDP=9;AF1=1;CI95=1,1;DP4=0,0,6,3;MQ=28;FQ=-54\tGT:PL:GQ\t1/1:142,27,0:51\n-19\t25568536\t.\tA\tG\t13.2\t.\tDP=3;AF1=1;CI95=0.5,1;DP4=0,0,3,0;MQ=25;FQ=-36\tGT:PL:GQ\t1/1:45,9,0:14\n-19\t25581569\t.\tG\tA\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n-19\t26697796\t.\tT\tA\t6.2\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:35,3,0:4\n-19\t26728829\t.\tAGG\tATGG,AG\t8.83\t.\tINDEL;DP=5;AF1=1;CI95=0.5,1;DP4=0,0,0,3;MQ=33;FQ=-40.5\tGT:PL:GQ\t1/1:71,30,24,46,0,43:8\n-19\t26747187\t.\tAGG\tAG\t14.4\t.\tINDEL;DP=3;AF1=1;CI95=0.5,1;DP4=0,0,2,0;MQ=37;FQ=-40.5\tGT:PL:GQ\t1/1:53,6,0:10\n-19\t26751288\t.\tG\tGAC\t116\t.\tINDEL;DP=8;AF1=1;CI95=0.5,1;DP4=0,0,5,1;MQ=35;FQ=-52.5\tGT:PL:GQ\t1/1:156,18,0:33\n-19\t26756358\t.\tCA\tCAAA\t11.8\t.\tINDEL;DP=6;AF1=0.5;CI95=0.5,0.5;DP4=1,1,1,1;MQ=37;FQ=14.4;PV4=1,0.41,1,0.0024\tGT:PL:GQ\t0/1:49,0,62:51\n-19\t26758413\t.\tGT\tGTT\t52.4\t.\tINDEL;DP=6;AF1=1;CI95=0.5,1;DP4=0,0,3,0;MQ=37;FQ=-43.5\tGT:PL:GQ\t1/1:92,9,0:16\n-19\t26764380\t.\tC\tT\t13\t.\tDP=2;AF1=1;CI95=0.5,1;DP4=0,0,1,1;MQ=37;FQ=-33\tGT:PL:GQ\t1/1:44,6,0:9\n-19\t26765941\t.\tAGG\tAGGGG\t24.2\t.\tINDEL;DP=5;AF1=1;CI95=0.5,1;DP4=0,0,1,1;MQ=37;FQ=-40.5\tGT:PL:GQ\t1/1:63,6,0:10\n-19\t26780556\t.\tA\tAC\t18.3\t.\tINDEL;DP=3;AF1=1;CI95=0.5,1;DP4=0,0,0,2;MQ=32;FQ=-40.5\tGT:PL:GQ\t1/1:57,6,0:10\n-19\t26787476\t.\tG\tA\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n-19\t26803166\t.\tA\tG\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n-19\t26803281\t.\tG\tT\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n-19\t26827257\t.\tG\tA\t3.54\t.\tDP=2;AF1=0.5;CI95=0.5,0.5;DP4=1,0,1,0;MQ=37;FQ=3.54;PV4=1,1,1,1\tGT:PL:GQ\t0/1:31,0,31:29\n-19\t26847473\t.\tA\tC\t10.2\t.\tDP=2;AF1=1;CI95=0.5,1;DP4=0,0,0,2;MQ=37;FQ=-33\tGT:PL:GQ\t1/1:41,6,0:8\n-19\t26852064\t.\tTACACACACACACACACACACACACACACACACACACACA\tTACACACACACACACACACACACACACACACACACACA\t118\t.\tINDEL;DP=55;AF1=0.5;CI95=0.5,0.5;DP4=8,10,4,6;MQ=37;FQ=121;PV4=1,1,1,1\tGT:PL:GQ\t0/1:156,0,255:99\n-19\t27313337\t.\tG\tA\t3.41\t.\tDP=3;AF1=1;CI95=0.5,1;DP4=0,0,2,0;MQ=37;FQ=-33\tGT:PL:GQ\t1/1:32,6,0:4\n-19\t27314462\t.\tT\tC\t40\t.\tDP=3;AF1=1;CI95=0.5,1;DP4=0,0,2,1;MQ=37;FQ=-36\tGT:PL:GQ\t1/1:72,9,0:16\n-19\t27466173\t.\tC\tG\t3.54\t.\tDP=2;AF1=0.5;CI95=0.5,0.5;DP4=1,0,1,0;MQ=37;FQ=3.54;PV4=1,1,1,1\tGT:PL:GQ\t0/1:31,0,31:29\n-19\t28220602\t.\tT\tG\t222\t.\tDP=21;AF1=1;CI95=1,1;DP4=0,0,8,13;MQ=37;FQ=-90\tGT:PL:GQ\t1/1:255,63,0:99\n-19\t28220622\t.\tC\tT\t222\t.\tDP=21;AF1=1;CI95=1,1;DP4=0,0,8,13;MQ=37;FQ=-90\tGT:PL:GQ\t1/1:255,63,0:99\n-19\t28220668\t.\tG\tA\t222\t.\tDP=13;AF1=1;CI95=1,1;DP4=0,0,5,6;MQ=36;FQ=-60\tGT:PL:GQ\t1/1:255,33,0:63\n-19\t28220691\t.\tT\tC\t148\t.\tDP=7;AF1=1;CI95=1,1;DP4=0,0,3,4;MQ=36;FQ=-48\tGT:PL:GQ\t1/1:181,21,0:39\n-19\t28486996\t.\tT\tC\t4.77\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t0/1:33,3,0:3\n-19\t28643319\t.\tC\tT\t3.55\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t0/1:31,3,0:4\n-19\t28643329\t.\tC\tT\t4.77\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t0/1:33,3,0:3\n-19\t28714335\t.\tC\tA\t6.98\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:36,3,0:4\n-19\t28837706\t.\tA\tT\t154\t.\tDP=13;AF1=1;CI95=1,1;DP4=0,0,3,10;MQ=25;FQ=-66\tGT:PL:GQ\t1/1:187,39,0:75\n-19\t28837717\t.\tG\tA\t154\t.\tDP=13;AF1=1;CI95=1,1;DP4=0,0,3,10;MQ=25;FQ=-66\tGT:PL:GQ\t1/1:187,39,0:75\n-19\t28837735\t.\tA\tG\t154\t.\tDP=24;AF1=1;CI95=1,1;DP4=0,0,7,14;MQ=25;FQ=-90\tGT:PL:GQ\t1/1:187,63,0:99\n-19\t28837767\t.\tA\tG,T\t177\t.\tDP=53;AF1=1;CI95=1,1;DP4=0,0,21,29;MQ=30;FQ=-175\tGT:PL:GQ\t1/1:210,148,0,204,125,201:99\n-19\t28837787\t.\tC\tT\t161\t.\tDP=66;AF1=1;CI95=1,1;DP4=0,1,30,35;MQ=31;FQ=-206;PV4=1,1,1,1\tGT:PL:GQ\t1/1:194,179,0:99\n-19\t28837805\t.\tA\tG\t222\t.\tDP=54;AF1=1;CI95=1,1;DP4=0,0,26,26;MQ=32;FQ=-184\tGT:PL:GQ\t1/1:255,157,0:99\n'
b
diff -r e8adfc4c0a6b -r 500832f27cbc tool-data/snpeff_annotations.loc.sample
--- a/tool-data/snpeff_annotations.loc.sample Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,5 +0,0 @@
-## Regulation Databases for SnpEff 
-## These are from the list on: http://snpeff.sourceforge.net/download.html
-#genome annotation_name description
-#GRCh37.71 nextprot nextprot
-#GRCh37.71 motif motif
b
diff -r e8adfc4c0a6b -r 500832f27cbc tool-data/snpeff_databases.loc.sample
--- a/tool-data/snpeff_databases.loc.sample Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,5 +0,0 @@
-## Available Databases for SnpEff 
-## These are from the list on: http://snpeff.sourceforge.net/download.html
-## the Description field in this sample is "Genome : Version" 
-#Version Description
-#GRCh37.68 Homo sapiens : GRCh37.68
b
diff -r e8adfc4c0a6b -r 500832f27cbc tool-data/snpeff_genomedb.loc.sample
--- a/tool-data/snpeff_genomedb.loc.sample Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,5 +0,0 @@
-## Downloaded Databases for SnpEff 
-## These are from the list on: http://snpeff.sourceforge.net/download.html
-## the Description field in this sample is "Genome : Version" 
-#Version        Description data_dir path
-#GRCh37.68      Homo sapiens : GRCh37.68 /home/galaxy/snpEff/data
b
diff -r e8adfc4c0a6b -r 500832f27cbc tool-data/snpeff_regulationdb.loc.sample
--- a/tool-data/snpeff_regulationdb.loc.sample Wed Dec 11 08:53:32 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,4 +0,0 @@
-## Regulation Databases for SnpEff 
-## These are from the list on: http://snpeff.sourceforge.net/download.html
-#genome regulation_name description
-#GRCh37.70 CD4 CD4
b
diff -r e8adfc4c0a6b -r 500832f27cbc tool-data/snpeffv_annotations.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/snpeffv_annotations.loc.sample Thu Jan 22 08:28:37 2015 -0500
b
@@ -0,0 +1,5 @@
+## Regulation Databases for SnpEff 
+## These are from the list on: http://snpeff.sourceforge.net/download.html
+#key snpeff_version genome annotation_name description
+#SnpEff4.0_GRCh37.75 SnpEff4.0 GRCh37.75 nextprot nextprot
+#SnpEff4.0_GRCh38.76 SnpEff4.1 GRCh38.76 motif motif
b
diff -r e8adfc4c0a6b -r 500832f27cbc tool-data/snpeffv_databases.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/snpeffv_databases.loc.sample Thu Jan 22 08:28:37 2015 -0500
b
@@ -0,0 +1,5 @@
+## Available Databases for SnpEff 
+## These are from the list on: http://snpeff.sourceforge.net/download.html
+## the Description field in this sample is "Genome : Version" 
+#key snpeff_version Version Description
+#SnpEff4.0_GRCh37.75 SnpEff4.0 GRCh37.75 Homo sapiens : GRCh37.75
b
diff -r e8adfc4c0a6b -r 500832f27cbc tool-data/snpeffv_genomedb.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/snpeffv_genomedb.loc.sample Thu Jan 22 08:28:37 2015 -0500
b
@@ -0,0 +1,6 @@
+## Downloaded Databases for SnpEff 
+## These are from the list on: http://snpeff.sourceforge.net/download.html
+## the Description field in this sample is "Genome : Version" 
+#Key snpeff_version Version Description data_dir path
+#SnpEff4.0_GRCh37.74 SnpEff4.0 GRCh37.74 Homo sapiens : GRCh37.74 /home/galaxy/snpEff/v4_0/data
+#SnpEff4.1_GRCh38.76 SnpEff4.1 GRCh38.76 Homo sapiens : GRCh38.76 /home/galaxy/snpEff/v4_1/data
b
diff -r e8adfc4c0a6b -r 500832f27cbc tool-data/snpeffv_regulationdb.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/snpeffv_regulationdb.loc.sample Thu Jan 22 08:28:37 2015 -0500
b
@@ -0,0 +1,5 @@
+## Regulation Databases for SnpEff 
+## These are from the list on: http://snpeff.sourceforge.net/download.html
+#Key snpeff_version genome regulation_name description
+#SnpEff4.0_GRCh37.74 SnpEff4.0 GRCh37.74 CD4 CD4
+#SnpEff4.1_GRCh38.76 SnpEff4.1 GRCh38.76 CD4 CD4
b
diff -r e8adfc4c0a6b -r 500832f27cbc tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Wed Dec 11 08:53:32 2013 -0500
+++ b/tool_data_table_conf.xml.sample Thu Jan 22 08:28:37 2015 -0500
b
@@ -1,19 +1,19 @@
 <tables>
-    <table name="snpeff_databases" comment_char="#">
-        <columns>value, name</columns>
-        <file path="tool-data/snpeff_databases.loc" />
+    <table name="snpeffv_genomedb" comment_char="#">
+        <columns>key, version, value, name, path</columns>
+        <file path="tool-data/snpeffv_genomedb.loc" />
     </table>
-    <table name="snpeff_genomedb" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/snpeff_genomedb.loc" />
+    <table name="snpeffv_regulationdb" comment_char="#">
+        <columns>key, version, genome, value, name</columns>
+        <file path="tool-data/snpeffv_regulationdb.loc" />
     </table>
-    <table name="snpeff_regulationdb" comment_char="#">
-        <columns>genome, value, name</columns>
-        <file path="tool-data/snpeff_regulationdb.loc" />
+    <table name="snpeffv_annotations" comment_char="#">
+        <columns>key, version, genome, value, name</columns>
+        <file path="tool-data/snpeffv_annotations.loc" />
     </table>
-    <table name="snpeff_annotations" comment_char="#">
-        <columns>genome, value, name</columns>
-        <file path="tool-data/snpeff_annotations.loc" />
+    <table name="snpeffv_databases" comment_char="#">
+        <columns>key, version, value, name</columns>
+        <file path="tool-data/snpeffv_databases.loc" />
     </table>
 </tables>
 
b
diff -r e8adfc4c0a6b -r 500832f27cbc tool_dependencies.xml
--- a/tool_dependencies.xml Wed Dec 11 08:53:32 2013 -0500
+++ b/tool_dependencies.xml Thu Jan 22 08:28:37 2015 -0500
b
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="snpEff" version="3.4">
-        <repository toolshed="http://toolshed.g2.bx.psu.edu" name="package_snpeff_3_4" owner="iuc" changeset_revision="af7ff8187d09" />
+    <package name="snpEff" version="4.0">
+        <repository name="package_snpeff_4_0" owner="iuc" changeset_revision="6bc55957927b" toolshed="http://toolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>