Repository 'snpsift'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/snpsift

Changeset 0:9e8280e19338 (2015-01-22)
Next changeset 1:98708b88af9f (2016-06-07)
Commit message:
Uploaded
added:
readme.rst
snpSift_annotate.xml
snpSift_caseControl.xml
snpSift_filter.xml
snpSift_int.xml
snpSift_macros.xml
snpSift_rmInfo.xml
snpSift_vartype.xml
snpSift_vcfCheck.xml
test-data/annotate_1.vcf
test-data/annotate_5.vcf
test-data/db_test_1.vcf
test-data/interval.bed
test-data/test.private.01.vcf
test-data/test.private.02.vcf
test-data/test01.vcf
test-data/test_bad.vcf
test-data/test_rmEff.vcf
test-data/test_rmInfo.vcf
tool_dependencies.xml
b
diff -r 000000000000 -r 9e8280e19338 readme.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst Thu Jan 22 08:39:07 2015 -0500
[
@@ -0,0 +1,11 @@
+These are galaxy tools for SnpEff a variant annotation and effect prediction tool by Pablo Cingolani. 
+It annotates and predicts the effects of variants on genes (such as amino acid changes).
+( http://snpeff.sourceforge.net/ )
+
+This repository contains a tool_dependencies.xml file that will attempt to automatically install SnpEff and SnpSift.   
+
+SnpEff citation:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+
+SnpSift citation:
+"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012.
b
diff -r 000000000000 -r 9e8280e19338 snpSift_annotate.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_annotate.xml Thu Jan 22 08:39:07 2015 -0500
[
@@ -0,0 +1,100 @@
+<tool id="snpSift_annotate" name="SnpSift Annotate" version="4.0.0">
+    <description>SNPs from dbSnp</description>
+    <!-- 
+        You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
+    -->
+    <expand macro="requirements" />
+    <macros>
+        <import>snpSift_macros.xml</import>
+    </macros>
+    <command>
+        java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar $annotate_cmd 
+        #if $annotate.id :
+          -id
+        #elif $annotate.info_ids.__str__.strip() != '' :
+          -info "$annotate.info_ids"
+        #end if          
+        -q $dbSnp $input > $output 
+    </command>
+    <inputs>
+        <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
+        <param format="vcf" name="dbSnp" type="data" label="VCF File with ID field annotated (e.g. dnSNP.vcf)" 
+            help="The ID field for a variant in input will be assigned from a matching variant in this file."/>
+        <conditional name="annotate">
+            <param name="id" type="boolean" truevalue="id" falsevalue="info" checked="True" label="Only annotate ID field (do not add INFO field)" help=""/>
+            <when value="id"/>
+            <when value="info">
+                <param name="info_ids" type="text" value="" size="60" optional="true" label="Limit INFO annotation to these INFO IDs"
+                    help="list is a comma separated list of fields. When blank, all INFO fields are included">    
+                    <validator type="regex" message="IDs separted by commas">^(([a-zA-Z][a-zA-Z0-9_-]*)(,[a-zA-Z][a-zA-Z0-9_-]*)*)?$</validator>
+                </param>
+            </when>
+        </conditional>
+        <param name="annotate_cmd" type="boolean" truevalue="annMem" falsevalue="annotate" checked="false" label="Allow unsorted VCF files"> 
+            <help>
+                This option will load the entire 'database' VCF file into memory (which may not be practical for large 'database' VCF files).
+                Otherwise, both the database and the input VCF files should be sorted by position (Chromosome sort order can differ between files). 
+            </help>
+            </param>
+    </inputs>
+    <expand macro="stdio" />
+    <outputs>
+        <data format="vcf" name="output" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" ftype="vcf" value="annotate_1.vcf"/>
+            <param name="dbSnp" ftype="vcf" value="db_test_1.vcf"/>
+            <param name="annotate_cmd" value="False"/>
+            <param name="id" value="True"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="rs76166080" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+
+This is typically used to annotate IDs from dbSnp.
+
+Annotatating only the ID field from dbSnp137.vcf ::
+
+    Input VCF:
+    CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    .            T    C    0.0    FAIL    NS=244
+    22      16350245    .            C    A    0.0    FAIL    NS=192
+
+    Annotated Output VCF:
+    #CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    rs56234788   T    C    0.0    FAIL    NS=244
+    22      16350245    rs2905295    C    A    0.0    FAIL    NS=192
+
+
+
+Annotatating both the ID and INFO fields from dbSnp137.vcf ::
+
+    Input VCF:
+    #CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    .            T    C    0.0    FAIL    NS=244
+    22      16350245    .            C    A    0.0    FAIL    NS=192
+
+    Annotated Output VCF:
+    #CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    rs56234788   T    C    0.0    FAIL    NS=244;RSPOS=16346045;GMAF=0.162248628884826;dbSNPBuildID=129;SSR=0;SAO=0;VP=050100000000000100000100;WGT=0;VC=SNV;SLO;GNO
+    22      16350245    rs2905295    C    A    0.0    FAIL    NS=192;RSPOS=16350245;GMAF=0.230804387568556;dbSNPBuildID=101;SSR=1;SAO=0;VP=050000000000000100000140;WGT=0;VC=SNV;GNO
+
+
+@EXTERNAL_DOCUMENTATION@
+ http://snpeff.sourceforge.net/SnpSift.html#annotate
+
+@CITATION_SECTION@
+
+
+    </help>
+</tool>
+
b
diff -r 000000000000 -r 9e8280e19338 snpSift_caseControl.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_caseControl.xml Thu Jan 22 08:39:07 2015 -0500
[
@@ -0,0 +1,122 @@
+<tool id="snpSift_caseControl" name="SnpSift CaseControl" version="4.0.0">
+    <description>Count samples are in 'case' and 'control' groups.</description>
+    <!-- 
+        You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
+    -->
+    <expand macro="requirements" />
+    <macros>
+        <import>snpSift_macros.xml</import>
+    </macros>
+    <command>
+    java -Xmx1G -jar \$SNPEFF_JAR_PATH/SnpSift.jar caseControl -q 
+    #if $name.__str__.strip() != '':
+      -name $name
+    #end if
+    #if $ctrl.ctrl_src == 'caseString':
+      '$ctrl.caseControlStr' 
+    #else
+      -tfam "$ctrl.tfam"
+    #end if
+    $input > $output
+    </command>
+    <inputs>
+        <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
+        <conditional name="ctrl">
+            <param name="ctrl_src" type="select" label="Case Control defined in">
+            <option value="caseString">Case Control String</option>
+            <option value="tfam">TFAM file</option>
+        </param>
+        <when value="caseString">
+            <param name="caseControlStr" type="text" label="Case / Control column designation" size="50">
+            <help>
+                Case and control are defined by a string containing plus and minus symbols {'+', '-', '0'} where '+' is case, '-' is control and '0' is neutral
+            </help>
+            <validator type="regex" message="must be  only plus(+), minus(-), or zero(0) characters">[+-0]+</validator>
+            </param>
+        </when>
+        <when value="tfam">
+            <param format="tabular" name="tfam" type="data" label="PLINK TFAM file" help="Read more about TFAM at http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#tr"/>
+        </when>
+        </conditional>
+        <param name="name" type="text" optional="true" label="name" help="name to append to the 'Cases' or 'Controls' tags">
+            <validator type="regex" message="Use only valid ID characters">[_a-zA-Z0-9]+</validator>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="vcf" name="output" />
+    </outputs>
+    <expand macro="stdio" />
+    <tests>
+        <test>
+            <param name="input" ftype="vcf" value="test.private.01.vcf"/>
+            <param name="ctrl_src" value="caseString"/>
+            <param name="caseControlStr" value="--"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="Cases=0,0,0;" />
+                    <has_text text="Controls=0,0,0;" />
+                </assert_contents>
+            </output>
+        </test>
+
+        <test>
+            <param name="input" ftype="vcf" value="test.private.02.vcf"/>
+            <param name="ctrl_src" value="caseString"/>
+            <param name="caseControlStr" value="--"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="Cases=0,0,0;" />
+                    <has_text text="Controls=2,0,4;" />
+                </assert_contents>
+            </output>
+        </test>
+
+        <test>
+            <param name="input" ftype="vcf" value="test.private.02.vcf"/>
+            <param name="name" value=""/>
+            <param name="ctrl_src" value="caseString"/>
+            <param name="caseControlStr" value="-+"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="Cases=1,0,2;" />
+                    <has_text text="Controls=1,0,2;" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+
+**SnpSift CaseControl**
+
+Allows you to count how many samples are in 'case' group and a 'control' group. You can count 'homozygous', 'heterozygous' or 'any' variants. 
+
+Case and control are defined by a string containing plus and minus symbols {'+', '-', '0'} where '+' is case, '-' is control and '0' is neutral. 
+
+This command adds two annotations to the VCF file:
+
+ - **CaseControl**: Two comma separated numbers numbers representing the number of samples that have the variant in the case and the control group. Example: 
+
+  "CaseControl=3,4" *the variant is present in 3 cases and 4 controls.*
+
+
+ - **CaseControlP**: A p-value (Fisher exact test) that the number of cases is N or more. Example:
+
+  "CaseControl=4,0;CaseControlP=3.030303e-02" *in this case the pValue of having 4 or more cases and zero controls is 0.03*
+
+
+For example, if we have ten samples (which means ten genotype columns in the VCF file), the first four are 'case' and the last six are 'control', so the description string would be "++++------".  Let's say we want to distinguish genotypes that are homozygous in 'case' and either homozygous or heterozygous in 'control'.  We would set:
+
+  - Hom/Het case = "hom"
+
+  - Hom/Het control = "any"  
+
+  - Case / Control column designation = ""++++------"
+
+
+@EXTERNAL_DOCUMENTATION@
+ http://snpeff.sourceforge.net/SnpSift.html#casecontrol
+
+@CITATION_SECTION@
+
+  </help>
+</tool>
b
diff -r 000000000000 -r 9e8280e19338 snpSift_filter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_filter.xml Thu Jan 22 08:39:07 2015 -0500
[
@@ -0,0 +1,189 @@
+<tool id="snpSift_filter" name="SnpSift Filter" version="4.0.0">
+    <options sanitize="False" />
+    <description>Filter variants using arbitrary expressions</description>
+    <expand macro="requirements" />
+    <macros>
+        <import>snpSift_macros.xml</import>
+    </macros>
+    <command>
+        java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar filter -f $input -e $exprFile $inverse 
+        #if $filtering.mode == 'field':
+            #if $filtering.replace.pass:
+                --pass
+                #if $filtering.replace.filterId and len($filtering.replace.filterId.__str__.strip()) > 0:
+                    --filterId "$filtering.replace.filterId"
+                #end if
+            #end if
+            #if $filtering.addFilter and len($filtering.addFilter.__str__.strip()) > 0:
+                --addFilter "$filtering.addFilter"
+            #end if
+            #if $filtering.rmFilter and len($filtering.rmFilter.__str__.strip()) > 0:
+                --rmFilter "$filtering.rmFilter"
+            #end if
+        #end if
+         > $output
+    </command>
+    <inputs>
+        <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
+        <param name="expr" type="text" label="Filter criteria" size="160" help="Need help? See below a few examples." />
+        <param name="inverse" type="boolean" truevalue="--inverse" falsevalue="" checked="false" label="Inverse filter" help="Show lines that do not match filter expression" />
+        <conditional name="filtering">
+            <param name="mode" type="select" label="Filter mode">
+                <option value="entries" selected="true">Retain entries that pass filter, remove other entries</option>
+                <option value="field">Change the FILTER field, but retain all entries</option>
+            </param> 
+            <when value="entries"/>
+            <when value="field">
+                <conditional name="replace">
+                    <param name="pass" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Set matching entry FILTER to 'PASS'" 
+                           help="appends an ID tag to non-matching entry FILTER "/>
+                    <when value="no"/>
+                    <when value="yes">
+                        <param name="filterId" type="text" value="" optional="true" label="ID appended to non-matching (##FILTER tag in header and FILTER VCF field)." size="10"
+                               help="Default ID is 'SnpSift'"/>
+                    </when>
+                </conditional>
+                <param name="addFilter" type="text" value="" optional="true" label="Add a string to FILTER VCF field if 'expression' is true." size="10"/>
+                <param name="rmFilter" type="text" value="" optional="true" label="Remove a string from FILTER VCF field if 'expression' is true (and 'str' is in the field)." size="10"/>
+            </when>
+        </conditional>
+    </inputs>
+    <configfiles>
+        <configfile name="exprFile">
+$expr#slurp
+        </configfile> 
+    </configfiles>
+
+    <outputs>
+        <data format="vcf" name="output" />
+    </outputs>
+    <expand macro="stdio" />
+    <tests>
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="QUAL >= 50"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="28837706" />
+            <not_has_text text="NT_166464" />
+            </assert_contents>
+        </output>
+        </test>
+
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="(CHROM = '19')"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="3205820" />
+            <not_has_text text="NT_16" />
+            </assert_contents>
+        </output>
+        </test>
+
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="(POS >= 20175) &amp; (POS &lt;= 35549)"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="20175" />
+            <has_text text="35549" />
+            <has_text text="22256" />
+            <not_has_text text="18933" />
+            <not_has_text text="37567" />
+            </assert_contents>
+        </output>
+        </test>
+
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="( DP >= 5 )"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="DP=5;" />
+            <has_text text="DP=6;" />
+            <not_has_text text="DP=1;" />
+            </assert_contents>
+        </output>
+        </test>
+    </tests>
+    <help>
+
+**SnpSift filter**
+
+You can filter a VCF file using arbitrary expressions, for instance "(QUAL > 30) | (exists INDEL) | ( countHet() > 2 )". The actual expressions can be quite complex, so it allows for a lot of flexibility.
+
+Some examples:
+
+  - *I want just the variants from the second million bases of chr1*:
+
+    ::
+
+    ( CHROM = 'chr1' ) &amp; ( POS &gt; 1000000 )  &amp; ( POS &lt; 2000000 )
+
+  - *Filter value is either 'PASS' or it is missing*:
+
+    ::
+
+    (FILTER = 'PASS') | ( na FILTER )  
+
+  - *I want to filter lines with an EFF of 'frameshift_variant' ( for vcf files using Sequence Ontology terms )*:
+
+    ::
+  
+    ( EFF[*].EFFECT = 'frameshift_variant' )
+
+  - *I want to filter lines with an EFF of 'FRAME_SHIFT' ( for vcf files using Classic Effect names )*:
+
+    ::
+  
+    ( EFF[*].EFFECT = 'FRAME_SHIFT' )
+
+  - *I want to filter out samples with quality less than 30*:
+
+    ::
+
+    ( QUAL &gt; 30 )
+
+  - *...but we also want InDels that have quality 20 or more*:
+
+    ::
+
+    (( exists INDEL ) &amp; (QUAL >= 20)) | (QUAL >= 30 )
+  
+  - *...or any homozygous variant present in more than 3 samples*:
+
+    ::
+
+    (countHom() > 3) | (( exists INDEL ) &amp; (QUAL >= 20)) | (QUAL >= 30 )
+  
+  - *...or any heterozygous sample with coverage 25 or more*:
+
+    ::
+
+    ((countHet() > 0) &amp; (DP >= 25)) | (countHom() > 3) | (( exists INDEL ) &amp; (QUAL >= 20)) | (QUAL >= 30 )
+  
+  - *I want to keep samples where the genotype for the first sample is homozygous variant and the genotype for the second sample is reference*:
+
+    ::
+  
+    (isHom( GEN[0] ) &amp; isVariant( GEN[0] ) &amp; isRef( GEN[1] ))
+
+
+**For information regarding HGVS and Sequence Ontology terms versus classic names**:
+
+  - http://snpeff.sourceforge.net/SnpEff_manual.html#cmdline for the options: -classic, -hgvs, and -sequenceOntology
+  - http://snpeff.sourceforge.net/SnpEff_manual.html#input for the table containing the classic name and sequence onology term for each effect
+
+
+@EXTERNAL_DOCUMENTATION@
+ http://snpeff.sourceforge.net/SnpSift.html#filter
+
+@CITATION_SECTION@
+
+    </help>
+</tool>
b
diff -r 000000000000 -r 9e8280e19338 snpSift_int.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_int.xml Thu Jan 22 08:39:07 2015 -0500
b
@@ -0,0 +1,57 @@
+<tool id="snpSift_int" name="SnpSift Intervals" version="4.0.0">
+    <description>Filter variants using intervals</description>
+    <!-- 
+        You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
+    -->
+    <expand macro="requirements" />
+    <macros>
+        <import>snpSift_macros.xml</import>
+    </macros>
+    <command>
+        java -Xmx2G -jar \$SNPEFF_JAR_PATH/SnpSift.jar intervals -i $input $exclude $bedFile > $output
+    </command>
+    <inputs>
+        <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
+        <param format="bed" name="bedFile" type="data" label="Intervals (BED file)"/>
+        <param name="exclude" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Exclude Intervals" 
+            help="Filter out (exclude) VCF entries that match any interval in the BED files"/>
+    </inputs>
+    <outputs>
+        <data format="vcf" name="output" />
+    </outputs>
+    <expand macro="stdio" />
+    <tests>
+        <test>
+            <param name="input" ftype="vcf" value="annotate_5.vcf"/>
+            <param name="bedFile" ftype="bed" value="interval.bed"/>
+            <param name="exclude" value="False"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="872687" />
+                    <not_has_text text="1195966" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input" ftype="vcf" value="annotate_5.vcf"/>
+            <param name="bedFile" ftype="bed" value="interval.bed"/>
+            <param name="exclude" value="True"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="1195966" />
+                    <not_has_text text="872687" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+
+You can filter using intervals (BED file).
+
+@EXTERNAL_DOCUMENTATION@
+    http://snpeff.sourceforge.net/SnpSift.html#intervals
+
+@CITATION_SECTION@
+
+    </help>
+</tool>
b
diff -r 000000000000 -r 9e8280e19338 snpSift_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_macros.xml Thu Jan 22 08:39:07 2015 -0500
b
@@ -0,0 +1,35 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="4.0">snpEff</requirement>
+        </requirements>
+    </xml>
+  <xml name="stdio">
+    <stdio>
+        <exit_code range=":-1"  level="fatal" description="Error: Cannot open file" />
+        <exit_code range="1:"  level="fatal" description="Error" />
+    </stdio>
+  </xml>
+  <token name="@EXTERNAL_DOCUMENTATION@">
+
+For details about this tool, please go to:
+ http://snpeff.sourceforge.net/SnpEff_manual.html
+
+  </token>
+  <token name="@CITATION_SECTION@">------
+
+**Citation**
+
+For the underlying tool, please cite the following two publications:
+
+SnpSift citation:
+"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3:35, 2012.
+
+  </token>
+  <xml name="citations">
+      <citations>
+        <citation type="doi">10.3389/fgene.2012.00035</citation>
+        <yield />
+      </citations>
+  </xml>
+</macros>
b
diff -r 000000000000 -r 9e8280e19338 snpSift_rmInfo.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_rmInfo.xml Thu Jan 22 08:39:07 2015 -0500
b
@@ -0,0 +1,56 @@
+<tool id="snpSift_rmInfo" name="SnpSift rmInfo" version="4.0.0">
+    <description>remove INFO field annotations</description>
+    <expand macro="requirements" />
+    <macros>
+        <import>snpSift_macros.xml</import>
+    </macros>
+    <command>
+      java -Xmx2G -jar \$SNPEFF_JAR_PATH/SnpSift.jar rmInfo $input ' '.join($info_fields.split(',')) > $output
+    </command>
+    <inputs>
+        <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
+        <param name="info_fields" type="text" value="" label="Info fields to remove, e.g. EFF">
+          <help>Separate multiple INFO fields with a comma, e.g.: EFF,DP</help>
+          <validator type="empty_field" />
+        </param>
+    </inputs>
+    <outputs>
+        <data format="vcf" name="output" />
+    </outputs>
+    <expand macro="stdio" />
+    <tests>
+        <test>
+            <param name="input" ftype="vcf" value="test-data/test_rmInfo.vcf"/>
+            <param name="info_fields" value="EFF"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="DP=29" />
+                    <not_has_text text="EFF=EXON" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input" ftype="vcf" value="test-data/test_rmInfo.vcf"/>
+            <param name="info_fields" value="EFF"/>
+            <output name="output">
+                <assert_contents>
+                    <not_has_text text="DP=29;EFF=EXON" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+This command removes INFO fields from a VCF file (i.e. removes annotations)
+
+Removing INFO fields is usually done because you want to re-annotate the VCF file, thus removing old INFO fields in order to add new ones later. 
+
+SnpEff &amp; SnpSift only add annotations and do not change current ones. So, in order to re-annotate a file, you should first remove the old annotations and then re-annotate. 
+The reason for this behavior is simply because replacing annotation values is considered a bad practice. Imagine that you have a VCF entry  in your re-annotated file having the value "AA=1": How do you know if this is from the old annotations or from the new ones? This confusion often leads to problems in downstream steps of your pipelines, so it's better to avoid the problem by first removing all the previous annotations and then adding the new ones. 
+
+@EXTERNAL_DOCUMENTATION@
+    http://snpeff.sourceforge.net/SnpSift.html#rmInfo
+
+@CITATION_SECTION@
+
+    </help>
+</tool>
b
diff -r 000000000000 -r 9e8280e19338 snpSift_vartype.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_vartype.xml Thu Jan 22 08:39:07 2015 -0500
b
@@ -0,0 +1,39 @@
+<tool id="snpsift_vartype" name="SnpSift Variant Type" version="4.0.0">
+    <description>Annotate with variant type</description>
+    <expand macro="requirements" />
+    <macros>
+        <import>snpSift_macros.xml</import>
+    </macros>
+    <command>
+        java -jar \$SNPEFF_JAR_PATH/SnpSift.jar varType $input 2&gt; $log &gt; $output
+    </command>
+    <inputs>
+        <param format="vcf" name="input" type="data" label="Variant file (VCF)"/>
+    </inputs>
+    <outputs>
+        <data format="vcf" name="output" label="${tool.name} on ${on_string}: VCF" />
+        <data format="txt" name="log" label="${tool.name} on ${on_string}: log" />
+    </outputs>
+    <expand macro="stdio" />
+    <tests>
+    </tests>
+    <help>
+**What it does**
+
+This tool uses `SnpSift Variant type`_ to add the variant type (SNP/MNP/INS/DEL/MIXED) in the INFO field. It also adds "HOM/HET", but this last one works if there is only one sample (otherwise it doesn't make any sense).
+
+.. _SnpSift Variant type: http://snpeff.sourceforge.net/SnpSift.html#VariantType
+
+------
+
+**License**
+
+This Galaxy tool is Copyright © 2013-2014 `CRS4 Srl.`_ and is released under the `MIT license`_.
+
+.. _CRS4 Srl.: http://www.crs4.it/
+.. _MIT license: http://opensource.org/licenses/MIT
+
+@CITATION_SECTION@
+    </help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r 9e8280e19338 snpSift_vcfCheck.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_vcfCheck.xml Thu Jan 22 08:39:07 2015 -0500
b
@@ -0,0 +1,39 @@
+<tool id="snpSift_vcfCheck" name="SnpSift vcfCheck" version="4.0.0">
+    <description>basic checks for Vcf specification compliance</description>
+    <expand macro="requirements" />
+    <macros>
+        <import>snpSift_macros.xml</import>
+    </macros>
+    <command>
+      java -Xmx2G -jar \$SNPEFF_JAR_PATH/SnpSift.jar vcfCheck $input > $output
+    </command>
+    <inputs>
+        <param format="vcf" name="input" type="data" label="Variant input file in VCF format to check"/>
+    </inputs>
+    <outputs>
+        <data format="vcf" name="output" />
+    </outputs>
+    <expand macro="stdio" />
+    <tests>
+        <test>
+            <param name="input" ftype="vcf" value="test-data/test_bad.vcf"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="Errors" />
+ </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+
+Perform some basic check ups on VCF files to spot common problems.
+
+SnpSift vcfCheck checks for some common problems where VCF files are not following the specification. Given that many common VCF problems cause analysis tools and pipelines to behave unexpectedly, this command is intended as a simple debugging tool. 
+
+@EXTERNAL_DOCUMENTATION@
+ http://snpeff.sourceforge.net/SnpSift.html#vcfCheck
+
+@CITATION_SECTION@
+
+    </help>
+</tool>
b
diff -r 000000000000 -r 9e8280e19338 test-data/annotate_1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate_1.vcf Thu Jan 22 08:39:07 2015 -0500
b
@@ -0,0 +1,1 @@
+1 872687 . C G . . .
b
diff -r 000000000000 -r 9e8280e19338 test-data/annotate_5.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate_5.vcf Thu Jan 22 08:39:07 2015 -0500
b
@@ -0,0 +1,5 @@
+1 872687 rs76166080 C G . . .
+1 970878 . C T . . .
+1 979690 rs115413462 G A . . .
+1 1160967 . C T . . .
+1 1195966 rs114569001 G A . . .
b
diff -r 000000000000 -r 9e8280e19338 test-data/db_test_1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/db_test_1.vcf Thu Jan 22 08:39:07 2015 -0500
b
@@ -0,0 +1,1 @@
+1 872687 rs76166080 C G 0 . .
b
diff -r 000000000000 -r 9e8280e19338 test-data/interval.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/interval.bed Thu Jan 22 08:39:07 2015 -0500
b
@@ -0,0 +1,10 @@
+chr1 1 100000
+chr1 100000 200000
+chr1 200000 300000
+chr1 300000 400000
+chr1 400000 500000
+chr1 500000 600000
+chr1 600000 700000
+chr1 700000 800000
+chr1 800000 900000
+chr1 900000 1000000
b
diff -r 000000000000 -r 9e8280e19338 test-data/test.private.01.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.private.01.vcf Thu Jan 22 08:39:07 2015 -0500
b
@@ -0,0 +1,3 @@
+##fileformat=VCFv4.0
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT id1 id2
+1 123456 . A G . . AF=0 GT 0/0 0/0
b
diff -r 000000000000 -r 9e8280e19338 test-data/test.private.02.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.private.02.vcf Thu Jan 22 08:39:07 2015 -0500
b
@@ -0,0 +1,3 @@
+##fileformat=VCFv4.0
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT id1 id2
+1 123456 . A G . . AF=0 GT 1/1 1/1
b
diff -r 000000000000 -r 9e8280e19338 test-data/test01.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test01.vcf Thu Jan 22 08:39:07 2015 -0500
b
b'@@ -0,0 +1,1000 @@\n+##fileformat=VCFv4.1\n+##samtoolsVersion=0.1.16 (r963:234)\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">\n+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">\n+##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads">\n+##INFO=<ID=FQ,Number=1,Type=Float,Description="Phred probability of all samples being the same">\n+##INFO=<ID=AF1,Number=1,Type=Float,Description="Max-likelihood estimate of the site allele frequency of the first ALT allele">\n+##INFO=<ID=G3,Number=3,Type=Float,Description="ML estimate of genotype frequencies">\n+##INFO=<ID=HWE,Number=1,Type=Float,Description="Chi^2 based HWE test P-value based on G3">\n+##INFO=<ID=CI95,Number=2,Type=Float,Description="Equal-tail Bayesian credible interval of the site allele frequency at the 95% level">\n+##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias">\n+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n+##INFO=<ID=PC2,Number=2,Type=Integer,Description="Phred probability of the nonRef allele frequency in group1 samples being larger (,smaller) than in group2.">\n+##INFO=<ID=PCHI2,Number=1,Type=Float,Description="Posterior weighted chi^2 P-value for testing the association between group1 and group2 samples.">\n+##INFO=<ID=QCHI2,Number=1,Type=Integer,Description="Phred scaled PCHI2.">\n+##INFO=<ID=PR,Number=1,Type=Integer,Description="# permutations yielding a smaller PCHI2.">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GL,Number=3,Type=Float,Description="Likelihoods for RR,RA,AA genotypes (R=ref,A=alt)">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases">\n+##FORMAT=<ID=SP,Number=1,Type=Integer,Description="Phred-scaled strand bias P-value">\n+##FORMAT=<ID=PL,Number=-1,Type=Integer,Description="List of Phred-scaled genotype likelihoods, number of values is (#ALT+1)*(#ALT+2)/2">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\ts_1_BcA2.sort.rmdup.Q20.noMh.bam\n+NT_166464\t696\t.\tG\tC\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+NT_166464\t745\t.\tG\tC\t6.98\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:36,3,0:4\n+NT_166464\t7258\t.\tA\tC\t40\t.\tDP=4;AF1=0.5008;CI95=0.5,0.5;DP4=1,0,2,1;MQ=32;FQ=-4.12;PV4=1,0.28,0.21,0.17\tGT:PL:GQ\t0/1:70,0,25:28\n+NT_166464\t7268\t.\tA\tG\t8.65\t.\tDP=4;AF1=0.5004;CI95=0.5,0.5;DP4=1,0,1,1;MQ=30;FQ=3.32;PV4=1,0.017,0,1\tGT:PL:GQ\t0/1:38,0,28:32\n+NT_166464\t7283\t.\tT\tC\t11.3\t.\tDP=3;AF1=0.501;CI95=0.5,0.5;DP4=1,0,1,1;MQ=30;FQ=-4.81;PV4=1,1,0,1\tGT:PL:GQ\t0/1:41,0,24:28\n+NT_166464\t7335\t.\tG\tA\t18.8\t.\tDP=2;AF1=1;CI95=0.5,1;DP4=0,0,1,1;MQ=25;FQ=-33\tGT:PL:GQ\t1/1:50,6,0:10\n+NT_166464\t8030\t.\tA\tG\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+NT_166452\t8268\t.\tG\tA\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+NT_166452\t16693\t.\tT\tC\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+NT_166480\t12474\t.\tG\tA\t6.2\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:35,3,0:4\n+NT_166480\t12483\t.\tA\tG\t4.77\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t0/1:33,3,0:3\n+NT_166476\t578\t.\tC\tT\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+NT_166476\t22223\t.\tA\tC\t3.01\t.\tDP=4;AF1=0.4998;CI95=0.5,0.5;DP4=0,2,2,0;MQ=32;FQ=4.63;PV4=0.33,0.26,0,0.42\tGT:PL:GQ\t0/1:30,0,43:28\n+NT_166476\t22256\t.\tG\tA\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+NT_166476\t23076\t.\tA\tT\t8.44\t.\tDP=2;AF1=1;CI95=0.5,1;DP4=0,0,0,2;MQ=25;FQ=-33\tGT:PL:GQ\t1/1:39,6,0:8\n+NT_166476\t23487\t.\tC\tA\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+NT_166454\t64\t.\tT\tA\t8.64\t.\tDP=7;AF1=0.5;CI95=0.5,0.5;DP4=1,4,2,0;MQ=29;FQ=11.3;PV4=0.14,1,1,1\tGT:P'..b'\tG\t28\t.\tDP=4;AF1=1;CI95=0.5,1;DP4=0,0,0,3;MQ=25;FQ=-36\tGT:PL:GQ\t1/1:60,9,0:16\n+19\t25568441\t.\tG\tA\t89\t.\tDP=10;AF1=1;CI95=1,1;DP4=0,0,7,3;MQ=29;FQ=-57\tGT:PL:GQ\t1/1:122,30,0:57\n+19\t25568480\t.\tA\tG\t169\t.\tDP=11;AF1=1;CI95=1,1;DP4=0,0,8,3;MQ=30;FQ=-60\tGT:PL:GQ\t1/1:202,33,0:63\n+19\t25568513\t.\tT\tA\t124\t.\tDP=9;AF1=1;CI95=1,1;DP4=0,0,6,3;MQ=28;FQ=-54\tGT:PL:GQ\t1/1:157,27,0:51\n+19\t25568527\t.\tA\tG\t109\t.\tDP=9;AF1=1;CI95=1,1;DP4=0,0,6,3;MQ=28;FQ=-54\tGT:PL:GQ\t1/1:142,27,0:51\n+19\t25568536\t.\tA\tG\t13.2\t.\tDP=3;AF1=1;CI95=0.5,1;DP4=0,0,3,0;MQ=25;FQ=-36\tGT:PL:GQ\t1/1:45,9,0:14\n+19\t25581569\t.\tG\tA\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+19\t26697796\t.\tT\tA\t6.2\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:35,3,0:4\n+19\t26728829\t.\tAGG\tATGG,AG\t8.83\t.\tINDEL;DP=5;AF1=1;CI95=0.5,1;DP4=0,0,0,3;MQ=33;FQ=-40.5\tGT:PL:GQ\t1/1:71,30,24,46,0,43:8\n+19\t26747187\t.\tAGG\tAG\t14.4\t.\tINDEL;DP=3;AF1=1;CI95=0.5,1;DP4=0,0,2,0;MQ=37;FQ=-40.5\tGT:PL:GQ\t1/1:53,6,0:10\n+19\t26751288\t.\tG\tGAC\t116\t.\tINDEL;DP=8;AF1=1;CI95=0.5,1;DP4=0,0,5,1;MQ=35;FQ=-52.5\tGT:PL:GQ\t1/1:156,18,0:33\n+19\t26756358\t.\tCA\tCAAA\t11.8\t.\tINDEL;DP=6;AF1=0.5;CI95=0.5,0.5;DP4=1,1,1,1;MQ=37;FQ=14.4;PV4=1,0.41,1,0.0024\tGT:PL:GQ\t0/1:49,0,62:51\n+19\t26758413\t.\tGT\tGTT\t52.4\t.\tINDEL;DP=6;AF1=1;CI95=0.5,1;DP4=0,0,3,0;MQ=37;FQ=-43.5\tGT:PL:GQ\t1/1:92,9,0:16\n+19\t26764380\t.\tC\tT\t13\t.\tDP=2;AF1=1;CI95=0.5,1;DP4=0,0,1,1;MQ=37;FQ=-33\tGT:PL:GQ\t1/1:44,6,0:9\n+19\t26765941\t.\tAGG\tAGGGG\t24.2\t.\tINDEL;DP=5;AF1=1;CI95=0.5,1;DP4=0,0,1,1;MQ=37;FQ=-40.5\tGT:PL:GQ\t1/1:63,6,0:10\n+19\t26780556\t.\tA\tAC\t18.3\t.\tINDEL;DP=3;AF1=1;CI95=0.5,1;DP4=0,0,0,2;MQ=32;FQ=-40.5\tGT:PL:GQ\t1/1:57,6,0:10\n+19\t26787476\t.\tG\tA\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+19\t26803166\t.\tA\tG\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+19\t26803281\t.\tG\tT\t7.8\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:37,3,0:4\n+19\t26827257\t.\tG\tA\t3.54\t.\tDP=2;AF1=0.5;CI95=0.5,0.5;DP4=1,0,1,0;MQ=37;FQ=3.54;PV4=1,1,1,1\tGT:PL:GQ\t0/1:31,0,31:29\n+19\t26847473\t.\tA\tC\t10.2\t.\tDP=2;AF1=1;CI95=0.5,1;DP4=0,0,0,2;MQ=37;FQ=-33\tGT:PL:GQ\t1/1:41,6,0:8\n+19\t26852064\t.\tTACACACACACACACACACACACACACACACACACACACA\tTACACACACACACACACACACACACACACACACACACA\t118\t.\tINDEL;DP=55;AF1=0.5;CI95=0.5,0.5;DP4=8,10,4,6;MQ=37;FQ=121;PV4=1,1,1,1\tGT:PL:GQ\t0/1:156,0,255:99\n+19\t27313337\t.\tG\tA\t3.41\t.\tDP=3;AF1=1;CI95=0.5,1;DP4=0,0,2,0;MQ=37;FQ=-33\tGT:PL:GQ\t1/1:32,6,0:4\n+19\t27314462\t.\tT\tC\t40\t.\tDP=3;AF1=1;CI95=0.5,1;DP4=0,0,2,1;MQ=37;FQ=-36\tGT:PL:GQ\t1/1:72,9,0:16\n+19\t27466173\t.\tC\tG\t3.54\t.\tDP=2;AF1=0.5;CI95=0.5,0.5;DP4=1,0,1,0;MQ=37;FQ=3.54;PV4=1,1,1,1\tGT:PL:GQ\t0/1:31,0,31:29\n+19\t28220602\t.\tT\tG\t222\t.\tDP=21;AF1=1;CI95=1,1;DP4=0,0,8,13;MQ=37;FQ=-90\tGT:PL:GQ\t1/1:255,63,0:99\n+19\t28220622\t.\tC\tT\t222\t.\tDP=21;AF1=1;CI95=1,1;DP4=0,0,8,13;MQ=37;FQ=-90\tGT:PL:GQ\t1/1:255,63,0:99\n+19\t28220668\t.\tG\tA\t222\t.\tDP=13;AF1=1;CI95=1,1;DP4=0,0,5,6;MQ=36;FQ=-60\tGT:PL:GQ\t1/1:255,33,0:63\n+19\t28220691\t.\tT\tC\t148\t.\tDP=7;AF1=1;CI95=1,1;DP4=0,0,3,4;MQ=36;FQ=-48\tGT:PL:GQ\t1/1:181,21,0:39\n+19\t28486996\t.\tT\tC\t4.77\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t0/1:33,3,0:3\n+19\t28643319\t.\tC\tT\t3.55\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t0/1:31,3,0:4\n+19\t28643329\t.\tC\tT\t4.77\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,1,0;MQ=37;FQ=-30\tGT:PL:GQ\t0/1:33,3,0:3\n+19\t28714335\t.\tC\tA\t6.98\t.\tDP=1;AF1=1;CI95=0.5,1;DP4=0,0,0,1;MQ=37;FQ=-30\tGT:PL:GQ\t1/1:36,3,0:4\n+19\t28837706\t.\tA\tT\t154\t.\tDP=13;AF1=1;CI95=1,1;DP4=0,0,3,10;MQ=25;FQ=-66\tGT:PL:GQ\t1/1:187,39,0:75\n+19\t28837717\t.\tG\tA\t154\t.\tDP=13;AF1=1;CI95=1,1;DP4=0,0,3,10;MQ=25;FQ=-66\tGT:PL:GQ\t1/1:187,39,0:75\n+19\t28837735\t.\tA\tG\t154\t.\tDP=24;AF1=1;CI95=1,1;DP4=0,0,7,14;MQ=25;FQ=-90\tGT:PL:GQ\t1/1:187,63,0:99\n+19\t28837767\t.\tA\tG,T\t177\t.\tDP=53;AF1=1;CI95=1,1;DP4=0,0,21,29;MQ=30;FQ=-175\tGT:PL:GQ\t1/1:210,148,0,204,125,201:99\n+19\t28837787\t.\tC\tT\t161\t.\tDP=66;AF1=1;CI95=1,1;DP4=0,1,30,35;MQ=31;FQ=-206;PV4=1,1,1,1\tGT:PL:GQ\t1/1:194,179,0:99\n+19\t28837805\t.\tA\tG\t222\t.\tDP=54;AF1=1;CI95=1,1;DP4=0,0,26,26;MQ=32;FQ=-184\tGT:PL:GQ\t1/1:255,157,0:99\n'
b
diff -r 000000000000 -r 9e8280e19338 test-data/test_bad.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_bad.vcf Thu Jan 22 08:39:07 2015 -0500
[
@@ -0,0 +1,11 @@
+##fileformat=VCFv4.1
+##samtoolsVersion=0.1.18 (r982:295)
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
+##SnpEffVersion="3.5 (build 2014-02-12), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  -i vcf -o vcf -upDownStreamLen 5000 -spliceSiteSize 1 -stats /Users/jj/gxt/gxt/database/files/004/dataset_4998.dat GRCh37.71 /Users/jj/gxt/gxt/database/files/004/dataset_4996.dat "
+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_Change| Amino_Acid_length | Gene_Name | Transcript_BioType | Gene_Coding | Transcript_ID | Exon_Rank  | Genotype_Number [ | ERRORS | WARNINGS ] )' ">
+#CHROM POS ID REF ALT QUAL FILTER INFO
+chr4 100239319 rs1229984 T C 94.3 . DP=29;EFF=EXON(MODIFIER|||||ADH1B|processed_transcript|CODING|ENST00000504498|3|1),EXON(MODIFIER|||||ADH1B|retained_intron|CODING|ENST00000515694|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H48R|375|ADH1B|protein_coding|CODING|ENST00000305046|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H8R|335|ADH1B|protein_coding|CODING|ENST00000394887|3|1),UTR_3_PRIME(MODIFIER||2729|||ADH1B|nonsense_mediated_decay|CODING|ENST00000506651|4|1)
+chr12 32491626 rs1471909 G A 124.0 . DP=22;EFF=DOWNSTREAM(MODIFIER||532|||BICD1|retained_intron|CODING|ENST00000552160||1),INTRON(MODIFIER||||835|BICD1|protein_coding|CODING|ENST00000548411|7|1),INTRON(MODIFIER||||975|BICD1|protein_coding|CODING|ENST00000281474|7|1),INTRON(MODIFIER|||||BICD1|nonsense_mediated_decay|CODING|ENST00000395758|7|1),INTRON(MODIFIER|||||BICD1|retained_intron|CODING|ENST00000552226|1|1)
+chr12 3249626 rs1471909 G A 124.0 . DP=22;EFF=DOWNSTREAM(MODIFIER||532|||BICD1|retained_intron|CODING|ENST00000552160||1),INTRON(MODIFIER||||835|BICD1|protein_coding|CODING|ENST00000548411|7|1),INTRON(MODIFIER||||975|BICD1|protein_coding|CODING|ENST00000281474|7|1),INTRON(MODIFIER|||||BICD1|nonsense_mediated_decay|CODING|ENST00000395758|7|1),INTRON(MODIFIER|||||BICD1|retained_intron|CODING|ENST00000552226|1|1)
+chrX 153010066 rs11803 C T 73.8 . DP=34;EFF=DOWNSTREAM(MODIFIER||4008||221|ABCD1|protein_coding|CODING|ENST00000443684||1),INTRAGENIC(MODIFIER|||||ABCD1||CODING|||1),INTRON(MODIFIER|||||U52111.14|antisense|NON_CODING|ENST00000434284|1|1),UTR_3_PRIME(MODIFIER||877||745|ABCD1|protein_coding|CODING|ENST00000218104|10|1)
b
diff -r 000000000000 -r 9e8280e19338 test-data/test_rmEff.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_rmEff.vcf Thu Jan 22 08:39:07 2015 -0500
[
@@ -0,0 +1,10 @@
+##fileformat=VCFv4.1
+##samtoolsVersion=0.1.18 (r982:295)
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
+##SnpEffVersion="3.5 (build 2014-02-12), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  -i vcf -o vcf -upDownStreamLen 5000 -spliceSiteSize 1 -stats /Users/jj/gxt/gxt/database/files/004/dataset_4998.dat GRCh37.71 /Users/jj/gxt/gxt/database/files/004/dataset_4996.dat "
+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_Change| Amino_Acid_length | Gene_Name | Transcript_BioType | Gene_Coding | Transcript_ID | Exon_Rank  | Genotype_Number [ | ERRORS | WARNINGS ] )' ">
+#CHROM POS ID REF ALT QUAL FILTER INFO
+chr4 100239319 rs1229984 T C 94.3 . DP=29
+chr12 32491626 rs1471909 G A 124.0 . DP=22
+chrX 153010066 rs11803 C T 73.8 . DP=34
b
diff -r 000000000000 -r 9e8280e19338 test-data/test_rmInfo.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_rmInfo.vcf Thu Jan 22 08:39:07 2015 -0500
[
@@ -0,0 +1,10 @@
+##fileformat=VCFv4.1
+##samtoolsVersion=0.1.18 (r982:295)
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
+##SnpEffVersion="3.5 (build 2014-02-12), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  -i vcf -o vcf -upDownStreamLen 5000 -spliceSiteSize 1 -stats /Users/jj/gxt/gxt/database/files/004/dataset_4998.dat GRCh37.71 /Users/jj/gxt/gxt/database/files/004/dataset_4996.dat "
+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_Change| Amino_Acid_length | Gene_Name | Transcript_BioType | Gene_Coding | Transcript_ID | Exon_Rank  | Genotype_Number [ | ERRORS | WARNINGS ] )' ">
+#CHROM POS ID REF ALT QUAL FILTER INFO
+chr4 100239319 rs1229984 T C 94.3 . DP=29;EFF=EXON(MODIFIER|||||ADH1B|processed_transcript|CODING|ENST00000504498|3|1),EXON(MODIFIER|||||ADH1B|retained_intron|CODING|ENST00000515694|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H48R|375|ADH1B|protein_coding|CODING|ENST00000305046|3|1),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|cAc/cGc|H8R|335|ADH1B|protein_coding|CODING|ENST00000394887|3|1),UTR_3_PRIME(MODIFIER||2729|||ADH1B|nonsense_mediated_decay|CODING|ENST00000506651|4|1)
+chr12 32491626 rs1471909 G A 124.0 . DP=22;EFF=DOWNSTREAM(MODIFIER||532|||BICD1|retained_intron|CODING|ENST00000552160||1),INTRON(MODIFIER||||835|BICD1|protein_coding|CODING|ENST00000548411|7|1),INTRON(MODIFIER||||975|BICD1|protein_coding|CODING|ENST00000281474|7|1),INTRON(MODIFIER|||||BICD1|nonsense_mediated_decay|CODING|ENST00000395758|7|1),INTRON(MODIFIER|||||BICD1|retained_intron|CODING|ENST00000552226|1|1)
+chrX 153010066 rs11803 C T 73.8 . DP=34;EFF=DOWNSTREAM(MODIFIER||4008||221|ABCD1|protein_coding|CODING|ENST00000443684||1),INTRAGENIC(MODIFIER|||||ABCD1||CODING|||1),INTRON(MODIFIER|||||U52111.14|antisense|NON_CODING|ENST00000434284|1|1),UTR_3_PRIME(MODIFIER||877||745|ABCD1|protein_coding|CODING|ENST00000218104|10|1)
b
diff -r 000000000000 -r 9e8280e19338 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Thu Jan 22 08:39:07 2015 -0500
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="snpEff" version="4.0">
+        <repository changeset_revision="6bc55957927b" name="package_snpeff_4_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>