changeset 0:d3a72e55deca draft

Uploaded
author saskia-hiltemann
date Wed, 18 Sep 2013 10:51:20 -0400
parents
children 7d9353127f8a
files README README~ tool-data/annovar.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml tools/annovar/annovar.sh tools/annovar/annovar.xml
diffstat 7 files changed, 1872 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README	Wed Sep 18 10:51:20 2013 -0400
@@ -0,0 +1,213 @@
+ANNOVAR needs to be installed manually in the following way:
+
+
+1)	If you already have ANNOVAR installed on your system, simply edit the tool-data/annovar.loc file to reflect locations of 
+	the perl scripts (annotate_variation.pl and convert2annovar.pl) and humandb directory (directory containing the annovar database files)
+1b) Restart galaxy instance for changes in .loc file to take effect
+
+
+2)	If you do not have ANNOVAR installed, request annovar download and sign license here: 
+		http://www.openbioinformatics.org/annovar/annovar_download_form.php
+
+	3)	Once downloaded, install annovar per the installation instructions and edit annovar.loc file to reflect location of directory containing perl scripts.
+			tool uses annotate_variation.pl  and  convert2annovar.pl
+	
+	4)	Then download all desired databases for all desired builds as follows:
+			annotate_variation.pl -downdb -buildver <build> [-webfrom annovar] <database> <humandb>
+	
+		where <humandb> is location where all database files should be stored
+		and <database> is the database file to download, e.g. refGene (see bottom of document for all available database files at the time of writing this tool)
+		and <build> can be hg18 or hg19 for humans, also other organisms available.
+	
+		list of all available databases can be found here: http://www.openbioinformatics.org/annovar/annovar_db.html
+	
+	5) edit the tool-data/annovar.loc file to reflect location of humandb folder
+	5b) restart galaxy instance for changes in .loc file to take effect
+
+6) Tool uses cgatools join for combining of files, this should be installed automatically with repository. If not, get a copy from Complete Genomics directly:
+		wget http://sourceforge.net/projects/cgatools/files/1.7.1/cgatools-1.7.1.5-linux_binary-x86_64.tar.gz
+		tar xvzf cgatools-1.7.1.5-linux_binary-x86_64.tar.gz		
+
+	and place the "cgatools" binary found in bin/ directory on your $PATH
+	
+	
+list of files in my own humandb folder:
+
+	hg18_ALL.sites.2012_04.txt
+	hg18_ALL.sites.2012_04.txt.idx
+	hg18_avsift.txt
+	hg18_avsift.txt.idx
+	hg18_CEU.sites.2010_07.txt
+	hg18_CEU.sites.2010_07.txt.idx
+	hg18_cg46.txt
+	hg18_cg46.txt.idx
+	hg18_cg69.txt
+	hg18_cg69.txt.idx
+	hg18_cytoBand.txt
+	hg18_dgv.txt
+	hg18_ensGeneMrna.fa
+	hg18_ensGene.txt
+	hg18_esp5400_aa.txt
+	hg18_esp5400_aa.txt.idx
+	hg18_esp5400_all.txt
+	hg18_esp5400_all.txt.idx
+	hg18_esp5400_ea.txt
+	hg18_esp5400_ea.txt.idx
+	hg18_esp6500_aa.txt
+	hg18_esp6500_aa.txt.idx
+	hg18_esp6500_all.txt
+	hg18_esp6500_all.txt.idx
+	hg18_esp6500_ea.txt
+	hg18_esp6500_ea.txt.idx
+	hg18_esp6500si_aa.txt
+	hg18_esp6500si_aa.txt.idx
+	hg18_esp6500si_all.txt
+	hg18_esp6500si_all.txt.idx
+	hg18_esp6500si_ea.txt
+	hg18_esp6500si_ea.txt.idx
+	hg18_example_db_generic.txt
+	hg18_example_db_gff3.txt
+	hg18_genomicSuperDups.txt
+	hg18_gerp++gt2.txt
+	hg18_gerp++gt2.txt.idx
+	hg18_gwasCatalog.txt
+	hg18_JPTCHB.sites.2010_07.txt
+	hg18_JPTCHB.sites.2010_07.txt.idx
+	hg18_keggMapDesc.txt
+	hg18_keggPathway.txt
+	hg18_kgXref.txt
+	hg18_knownGeneMrna.fa
+	hg18_knownGene.txt
+	hg18_ljb_all.txt
+	hg18_ljb_all.txt.idx
+	hg18_ljb_lrt.txt
+	hg18_ljb_lrt.txt.idx
+	hg18_ljb_mt.txt
+	hg18_ljb_mt.txt.idx
+	hg18_ljb_phylop.txt
+	hg18_ljb_phylop.txt.idx
+	hg18_ljb_pp2.txt
+	hg18_ljb_pp2.txt.idx
+	hg18_ljb_sift.txt
+	hg18_ljb_sift.txt.idx
+	hg18_phastConsElements44way.txt
+	hg18_refGeneMrna.fa
+	hg18_refGene.txt
+	hg18_refLink.txt
+	hg18_snp128NonFlagged.txt
+	hg18_snp128NonFlagged.txt.idx
+	hg18_snp128.txt
+	hg18_snp128.txt.idx
+	hg18_snp129NonFlagged.txt
+	hg18_snp129NonFlagged.txt.idx
+	hg18_snp129.txt
+	hg18_snp129.txt.idx
+	hg18_snp130NonFlagged.txt
+	hg18_snp130NonFlagged.txt.idx
+	hg18_snp130.txt
+	hg18_snp130.txt.idx
+	hg18_snp131NonFlagged.txt
+	hg18_snp131NonFlagged.txt.idx
+	hg18_snp131.txt
+	hg18_snp131.txt.idx
+	hg18_snp132NonFlagged.txt
+	hg18_snp132NonFlagged.txt.idx
+	hg18_snp132.txt
+	hg18_snp132.txt.idx
+	hg18_tfbsConsSites.txt
+	hg18_YRI.sites.2010_07.txt
+	hg18_YRI.sites.2010_07.txt.idx
+	hg19_AFR.sites.2012_04.txt
+	hg19_AFR.sites.2012_04.txt.idx
+	hg19_ALL.sites.2010_11.txt
+	hg19_ALL.sites.2010_11.txt.idx
+	hg19_ALL.sites.2012_02.txt
+	hg19_ALL.sites.2012_02.txt.idx
+	hg19_ALL.sites.2012_04.txt
+	hg19_ALL.sites.2012_04.txt.idx
+	hg19_AMR.sites.2012_04.txt
+	hg19_AMR.sites.2012_04.txt.idx
+	hg19_ASN.sites.2012_04.txt
+	hg19_ASN.sites.2012_04.txt.idx
+	hg19_avsift.txt
+	hg19_avsift.txt.idx
+	hg19_cg46.txt
+	hg19_cg46.txt.idx
+	hg19_cg69.txt
+	hg19_cg69.txt.idx
+	hg19_cosmic61.txt
+	hg19_cosmic61.txt.idx
+	hg19_cosmic63.txt
+	hg19_cosmic63.txt.idx
+	hg19_cosmic64.txt
+	hg19_cosmic64.txt.idx
+	hg19_cosmic65.txt
+	hg19_cosmic65.txt.idx
+	hg19_cytoBand.txt
+	hg19_dgv.txt
+	hg19_ensGeneMrna.fa
+	hg19_ensGene.txt
+	hg19_esp5400_aa.txt
+	hg19_esp5400_aa.txt.idx
+	hg19_esp5400_all.txt
+	hg19_esp5400_all.txt.idx
+	hg19_esp5400_ea.txt
+	hg19_esp5400_ea.txt.idx
+	hg19_esp6500_aa.txt
+	hg19_esp6500_aa.txt.idx
+	hg19_esp6500_all.txt
+	hg19_esp6500_all.txt.idx
+	hg19_esp6500_ea.txt
+	hg19_esp6500_ea.txt.idx
+	hg19_esp6500si_aa.txt
+	hg19_esp6500si_aa.txt.idx
+	hg19_esp6500si_all.txt
+	hg19_esp6500si_all.txt.idx
+	hg19_esp6500si_ea.txt
+	hg19_esp6500si_ea.txt.idx
+	hg19_EUR.sites.2012_04.txt
+	hg19_EUR.sites.2012_04.txt.idx
+	hg19_genomicSuperDups.txt
+	hg19_gerp++gt2.txt
+	hg19_gerp++gt2.txt.idx
+	hg19_gwasCatalog.txt
+	hg19_keggMapDesc.txt
+	hg19_keggPathway.txt
+	hg19_kgXref.txt
+	hg19_knownGeneMrna.fa
+	hg19_knownGene.txt
+	hg19_ljb_all.txt
+	hg19_ljb_all.txt.idx
+	hg19_ljb_lrt.txt
+	hg19_ljb_lrt.txt.idx
+	hg19_ljb_mt.txt
+	hg19_ljb_mt.txt.idx
+	hg19_ljb_phylop.txt
+	hg19_ljb_phylop.txt.idx
+	hg19_ljb_pp2.txt
+	hg19_ljb_pp2.txt.idx
+	hg19_ljb_sift.txt
+	hg19_ljb_sift.txt.idx
+	hg19_phastConsElements46way.txt
+	hg19_refGeneMrna.fa
+	hg19_refGene.txt
+	hg19_refLink.txt
+	hg19_snp130NonFlagged.txt
+	hg19_snp130NonFlagged.txt.idx
+	hg19_snp130.txt
+	hg19_snp130.txt.idx
+	hg19_snp131NonFlagged.txt
+	hg19_snp131NonFlagged.txt.idx
+	hg19_snp131.txt
+	hg19_snp132NonFlagged.txt
+	hg19_snp132NonFlagged.txt.idx
+	hg19_snp132.txt
+	hg19_snp132.txt.idx
+	hg19_snp135NonFlagged.txt
+	hg19_snp135NonFlagged.txt.idx
+	hg19_snp135.txt
+	hg19_snp137NonFlagged.txt
+	hg19_snp137NonFlagged.txt.idx
+	hg19_snp137.txt
+	hg19_tfbsConsSites.txt
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README~	Wed Sep 18 10:51:20 2013 -0400
@@ -0,0 +1,211 @@
+ANNOVAR needs to be installed manually in the following way:
+
+
+1)	If you already have ANNOVAR installed on your system, simply edit the tool-data/annovar.loc file to reflect locations of 
+	the perl scripts (annotate_variation.pl and convert2annovar.pl) and humandb directory (directory containing the annovar database files)
+1b) Restart galaxy instance for changes in .loc file to take effect
+
+
+2)	If you do not have ANNOVAR installed, request annovar download and sign license here: 
+		http://www.openbioinformatics.org/annovar/annovar_download_form.php
+
+	3)	Once downloaded, install annovar per the installation instructions and edit annovar.loc file to reflect location of directory containing perl scripts.
+			tool uses annotate_variation.pl  and  convert2annovar.pl
+	
+	4)	Then download all desired databases for all desired builds as follows:
+			annotate_variation.pl -downdb -buildver <build> [-webfrom annovar] <database> <humandb>
+	
+		where <humandb> is location where all database files should be stored
+		and <database> is the database file to download, e.g. refGene (see bottom of document for all available database files at the time of writing this tool)
+		and <build> can be hg18 or hg19 for humans, also other organisms available.
+	
+		list of all available databases can be found here: http://www.openbioinformatics.org/annovar/annovar_db.html
+	
+	5) edit the tool-data/annovar.loc file to reflect location of humandb folder
+	5b) restart galaxy instance for changes in .loc file to take effect
+
+6) Tool uses cgatools join for combining of files, this should be installed automatically with repository. If not, get a copy from Complete Genomics directly:
+		wget http://sourceforge.net/projects/cgatools/files/1.7.1/cgatools-1.7.1.5-linux_binary-x86_64.tar.gz
+		tar xvzf cgatools-1.7.1.5-linux_binary-x86_64.tar.gz		
+
+	and place the "cgatools" binary found in bin/ directory on your $PATH
+	
+	
+list of files in my own humandb folder:
+
+	hg18_ALL.sites.2012_04.txt
+	hg18_ALL.sites.2012_04.txt.idx
+	hg18_avsift.txt
+	hg18_avsift.txt.idx
+	hg18_CEU.sites.2010_07.txt
+	hg18_CEU.sites.2010_07.txt.idx
+	hg18_cg46.txt
+	hg18_cg46.txt.idx
+	hg18_cg69.txt
+	hg18_cg69.txt.idx
+	hg18_cytoBand.txt
+	hg18_dgv.txt
+	hg18_ensGeneMrna.fa
+	hg18_ensGene.txt
+	hg18_esp5400_aa.txt
+	hg18_esp5400_aa.txt.idx
+	hg18_esp5400_all.txt
+	hg18_esp5400_all.txt.idx
+	hg18_esp5400_ea.txt
+	hg18_esp5400_ea.txt.idx
+	hg18_esp6500_aa.txt
+	hg18_esp6500_aa.txt.idx
+	hg18_esp6500_all.txt
+	hg18_esp6500_all.txt.idx
+	hg18_esp6500_ea.txt
+	hg18_esp6500_ea.txt.idx
+	hg18_esp6500si_aa.txt
+	hg18_esp6500si_aa.txt.idx
+	hg18_esp6500si_all.txt
+	hg18_esp6500si_all.txt.idx
+	hg18_esp6500si_ea.txt
+	hg18_esp6500si_ea.txt.idx
+	hg18_example_db_generic.txt
+	hg18_example_db_gff3.txt
+	hg18_genomicSuperDups.txt
+	hg18_gerp++gt2.txt
+	hg18_gerp++gt2.txt.idx
+	hg18_gwasCatalog.txt
+	hg18_JPTCHB.sites.2010_07.txt
+	hg18_JPTCHB.sites.2010_07.txt.idx
+	hg18_keggMapDesc.txt
+	hg18_keggPathway.txt
+	hg18_kgXref.txt
+	hg18_knownGeneMrna.fa
+	hg18_knownGene.txt
+	hg18_ljb_all.txt
+	hg18_ljb_all.txt.idx
+	hg18_ljb_lrt.txt
+	hg18_ljb_lrt.txt.idx
+	hg18_ljb_mt.txt
+	hg18_ljb_mt.txt.idx
+	hg18_ljb_phylop.txt
+	hg18_ljb_phylop.txt.idx
+	hg18_ljb_pp2.txt
+	hg18_ljb_pp2.txt.idx
+	hg18_ljb_sift.txt
+	hg18_ljb_sift.txt.idx
+	hg18_phastConsElements44way.txt
+	hg18_refGeneMrna.fa
+	hg18_refGene.txt
+	hg18_refLink.txt
+	hg18_snp128NonFlagged.txt
+	hg18_snp128NonFlagged.txt.idx
+	hg18_snp128.txt
+	hg18_snp128.txt.idx
+	hg18_snp129NonFlagged.txt
+	hg18_snp129NonFlagged.txt.idx
+	hg18_snp129.txt
+	hg18_snp129.txt.idx
+	hg18_snp130NonFlagged.txt
+	hg18_snp130NonFlagged.txt.idx
+	hg18_snp130.txt
+	hg18_snp130.txt.idx
+	hg18_snp131NonFlagged.txt
+	hg18_snp131NonFlagged.txt.idx
+	hg18_snp131.txt
+	hg18_snp131.txt.idx
+	hg18_snp132NonFlagged.txt
+	hg18_snp132NonFlagged.txt.idx
+	hg18_snp132.txt
+	hg18_snp132.txt.idx
+	hg18_tfbsConsSites.txt
+	hg18_YRI.sites.2010_07.txt
+	hg18_YRI.sites.2010_07.txt.idx
+	hg19_AFR.sites.2012_04.txt
+	hg19_AFR.sites.2012_04.txt.idx
+	hg19_ALL.sites.2010_11.txt
+	hg19_ALL.sites.2010_11.txt.idx
+	hg19_ALL.sites.2012_02.txt
+	hg19_ALL.sites.2012_02.txt.idx
+	hg19_ALL.sites.2012_04.txt
+	hg19_ALL.sites.2012_04.txt.idx
+	hg19_AMR.sites.2012_04.txt
+	hg19_AMR.sites.2012_04.txt.idx
+	hg19_ASN.sites.2012_04.txt
+	hg19_ASN.sites.2012_04.txt.idx
+	hg19_avsift.txt
+	hg19_avsift.txt.idx
+	hg19_cg46.txt
+	hg19_cg46.txt.idx
+	hg19_cg69.txt
+	hg19_cg69.txt.idx
+	hg19_cosmic61.txt
+	hg19_cosmic61.txt.idx
+	hg19_cosmic63.txt
+	hg19_cosmic63.txt.idx
+	hg19_cosmic64.txt
+	hg19_cosmic64.txt.idx
+	hg19_cytoBand.txt
+	hg19_dgv.txt
+	hg19_ensGeneMrna.fa
+	hg19_ensGene.txt
+	hg19_esp5400_aa.txt
+	hg19_esp5400_aa.txt.idx
+	hg19_esp5400_all.txt
+	hg19_esp5400_all.txt.idx
+	hg19_esp5400_ea.txt
+	hg19_esp5400_ea.txt.idx
+	hg19_esp6500_aa.txt
+	hg19_esp6500_aa.txt.idx
+	hg19_esp6500_all.txt
+	hg19_esp6500_all.txt.idx
+	hg19_esp6500_ea.txt
+	hg19_esp6500_ea.txt.idx
+	hg19_esp6500si_aa.txt
+	hg19_esp6500si_aa.txt.idx
+	hg19_esp6500si_all.txt
+	hg19_esp6500si_all.txt.idx
+	hg19_esp6500si_ea.txt
+	hg19_esp6500si_ea.txt.idx
+	hg19_EUR.sites.2012_04.txt
+	hg19_EUR.sites.2012_04.txt.idx
+	hg19_genomicSuperDups.txt
+	hg19_gerp++gt2.txt
+	hg19_gerp++gt2.txt.idx
+	hg19_gwasCatalog.txt
+	hg19_keggMapDesc.txt
+	hg19_keggPathway.txt
+	hg19_kgXref.txt
+	hg19_knownGeneMrna.fa
+	hg19_knownGene.txt
+	hg19_ljb_all.txt
+	hg19_ljb_all.txt.idx
+	hg19_ljb_lrt.txt
+	hg19_ljb_lrt.txt.idx
+	hg19_ljb_mt.txt
+	hg19_ljb_mt.txt.idx
+	hg19_ljb_phylop.txt
+	hg19_ljb_phylop.txt.idx
+	hg19_ljb_pp2.txt
+	hg19_ljb_pp2.txt.idx
+	hg19_ljb_sift.txt
+	hg19_ljb_sift.txt.idx
+	hg19_phastConsElements46way.txt
+	hg19_refGeneMrna.fa
+	hg19_refGene.txt
+	hg19_refLink.txt
+	hg19_snp130NonFlagged.txt
+	hg19_snp130NonFlagged.txt.idx
+	hg19_snp130.txt
+	hg19_snp130.txt.idx
+	hg19_snp131NonFlagged.txt
+	hg19_snp131NonFlagged.txt.idx
+	hg19_snp131.txt
+	hg19_snp132NonFlagged.txt
+	hg19_snp132NonFlagged.txt.idx
+	hg19_snp132.txt
+	hg19_snp132.txt.idx
+	hg19_snp135NonFlagged.txt
+	hg19_snp135NonFlagged.txt.idx
+	hg19_snp135.txt
+	hg19_snp137NonFlagged.txt
+	hg19_snp137NonFlagged.txt.idx
+	hg19_snp137.txt
+	hg19_tfbsConsSites.txt
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/annovar.loc.sample	Wed Sep 18 10:51:20 2013 -0400
@@ -0,0 +1,6 @@
+#loc file for annovar tool
+
+# <columns>value, dbkey, name, ANNOVAR_scripts, ANNOVAR_humandb</columns>
+
+hg18	hg18	build 36 (hg18)	/path/to/annovarscripts	/path/to/humandb
+hg19	hg19	build 37 (hg19)	/path/to/annovarscripts	/path/to/humandb	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Sep 18 10:51:20 2013 -0400
@@ -0,0 +1,5 @@
+<!-- ANNOVAR files -->
+<table name="annovar_loc" comment_char="#">
+<columns>value, dbkey, name, ANNOVAR_scripts, ANNOVAR_humandb</columns>
+<file path="tool-data/annovar.loc" /> 
+</table>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Wed Sep 18 10:51:20 2013 -0400
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<tool_dependency>
+	<package name="cgatools17" version="1"> 
+        <install version="1.0">
+            <actions>                				
+                <action type="download_by_url">http://sourceforge.net/projects/cgatools/files/1.7.1/cgatools-1.7.1.5-linux_binary-x86_64.tar.gz</action>
+				<action type="shell_command"> chmod a+x bin/cgatools</action>
+                <action type="move_file">
+                	<source>bin/cgatools</source>
+                	<destination>$INSTALL_DIR/bin</destination>
+                </action>	    
+				<action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                    <environment_variable name="PATH" action="prepend_to">$REPOSITORY_INSTALL_DIR</environment_variable>
+                </action>            	               			
+            </actions>
+        </install>
+        <readme>
+			Downloads and installs the cgatools binary. 
+        </readme>
+    </package>      
+</tool_dependency>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/annovar/annovar.sh	Wed Sep 18 10:51:20 2013 -0400
@@ -0,0 +1,1203 @@
+#!/bin/bash
+
+test="N"
+
+
+function usage(){
+	echo "usage: $0 todo"
+}
+
+function runfilter(){
+	ifile=$1	
+	columnname=$2
+	threshold=$3
+
+	if [[ $threshold == "-1" ]]
+	then
+		echo "not filtering"
+		return
+	fi
+	
+	echo "filtering: $columnname, $threshold"
+	cat $ifile
+
+	#get column number corresponding to column header
+	column=`awk 'BEGIN{
+					FS="\t";
+					col=-1
+				}{
+					if(FNR==1){
+						for(i=1;i<=NF;i++){
+							if($i == "'"${columnname}"'") 
+								col=i 
+						} 
+						print col 
+					}
+				}' $ifile `
+
+	if [ $column == -1 ]
+	then
+		echo "no such column, exiting"
+		return
+	fi	
+
+	#perform filtering using the threshold
+	awk 'BEGIN{
+		FS="\t";
+		OFS="\t";
+	}{
+		if(FNR==1) 
+			print $0; 
+		if(FNR>1){
+			if( $"'"${column}"'" == "" )  # empty column, then print
+				print $0
+			else if ("'"${threshold}"'" == "text"){}  #if set to text dont check threshold
+				
+			else if ($"'"${column}"'" < "'"${threshold}"'")  #else do check it
+				print $0	
+		}
+	}' $ifile > tmpfile
+
+	mv tmpfile $ifile	
+}
+
+# arguments: originalfile,resultfile,chrcol,startcol,endcol,refcol,obscol,addcols
+function joinresults(){
+	ofile=$1
+	rfile=$2
+	colchr=$3
+	colstart=$4
+	colend=$5
+	colref=$6
+	colobs=$7
+	addcols=$8 #e.g. "B.col1,B.col2"
+	
+	test="N"
+	
+	# echo "joining result with original file"
+	if [ $test == "Y" ]
+	then 	
+		echo "ofile: $ofile"
+		head $ofile 
+		echo "rfile: $rfile"
+		head $rfile
+	fi
+	numlines=`wc $rfile | cut -d" " -f2`
+	
+	# if empty results file, just add header fields
+	if [[ ! -s $rfile ]] 
+	then			
+		dummycol=${addcols:2}
+		outputcol=${dummycol//",B."/"	"}
+		numcommas=`echo "$addcols" | grep -o "," | wc -l`		
+		
+		awk 'BEGIN{FS="\t";OFS="\t"}{
+				if(FNR==1)
+					print $0,"'"$outputcol"'"; 
+				else{
+					printf $0
+					for(i=0;i<="'"$numcommas"'"+1;i++)
+						printf "\t"
+					printf "\n"
+				}
+			}END{}' $ofile > tempofile
+			
+			mv tempofile $ofile		
+		return
+	fi
+	
+
+	#get input file column names for cgatools join
+	col_chr_name=`head -1  $rfile | cut -f${colchr}`
+	col_start_name=`head -1  $rfile | cut -f${colstart}`
+	col_end_name=`head -1  $rfile | cut -f${colend}`
+	col_ref_name=`head -1  $rfile | cut -f${colref}`
+	col_obs_name=`head -1  $rfile | cut -f${colobs}`
+
+	#get annotation file column names for cgatools join
+	chr_name=`head -1  $ofile | cut -f${chrcol}`
+	start_name=`head -1  $ofile | cut -f${startcol}`
+	end_name=`head -1  $ofile | cut -f${endcol}`
+	ref_name=`head -1  $ofile | cut -f${refcol}`
+	obs_name=`head -1  $ofile | cut -f${obscol}`
+
+	if [ $test == "Y" ]	
+	then
+		echo "input file"
+		echo "chr   col: $col_chr_name ($colchr)"	
+		echo "start col: $col_start_name ($colstart)"	
+		echo "end   col: $col_end_name ($colend)"	
+		echo "ref   col: $col_ref_name ($colref)"	
+		echo "obs   col: $col_obs_name ($colobs)"	
+		echo ""
+		echo "annotation file"
+		echo "chr   col: $chr_name ($chrcol)"	
+		echo "start col: $start_name ($startcol)"	
+		echo "end   col: $end_name ($endcol)"	
+		echo "ref   col: $ref_name ($refcol)"	
+		echo "obs   col: $obs_name ($obscol)"	
+	fi
+
+	#perform join
+	cgatools join --beta \
+		--input $ofile $rfile \
+		--output temporiginal \
+		--match ${chr_name}:${col_chr_name} \
+		--match ${start_name}:${col_start_name} \
+		--match ${end_name}:${col_end_name} \
+		--match ${ref_name}:${col_ref_name} \
+		--match ${obs_name}:${col_obs_name} \
+		--select A.*,$addcols \
+		--always-dump \
+		--output-mode compact 
+
+	#replace originalfile
+	sed -i 's/^>//g' temporiginal #join sometimes adds a '>' symbol to header
+	mv temporiginal originalfile
+		
+	if [ $test == "Y" ]
+	then
+		echo "joining complete"
+		head originalfile
+		echo ""	
+	fi
+	
+}
+
+
+
+
+set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: esp: gerp: cosmic61: cosmic63: cosmic64: cosmic65: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage
+[ $# -eq 0 ] && usage
+
+
+
+while [ $# -gt 0 ]
+do
+    case "$1" in
+       	--inputfile)      			infile=$2;shift;;  # inputfile
+		--buildver)					buildver=$2;shift;; # hg18 or hg19
+		--humandb)					humandb=$2;shift;; # location of humandb database
+	 	--varfile)      			varfile=$2;shift;; # Y or N  
+		--VCF)						vcf=$2;shift;; #Y or N
+		--chrcol)      				chrcol=$2;shift;;  # which column has chr 
+		--startcol)      			startcol=$2;shift;;  # which column has start
+		--endcol)      				endcol=$2;shift;;  # which column has end
+		--refcol)      				refcol=$2;shift;;  # which column has ref
+		--obscol)      				obscol=$2;shift;;  # which column has alt
+		--vartypecol)      			vartypecol=$2;shift;;  # which column has vartype
+		--convertcoords)			convertcoords=$2;shift;;  # Y or N convert coordinate from CG to 1-based?		
+		--geneanno)      			geneanno=$2;shift;; # comma-separated list of strings refSeq, knowngene, ensgene  
+		--verdbsnp)					verdbsnp=$2;shift;; #comma-separated list of dbsnp version to annotate with (e.g. "132,135NonFlagged,137")"
+		--tfbs)      				tfbs=$2;shift;; 	# Y or N 
+		--mce)      				mce=$2;shift;; 	# Y or N 
+		--cytoband)      			cytoband=$2;shift;; # Y or N  
+		--segdup)      				segdup=$2;shift;; 	# Y or N 				
+        --dgv)      				dgv=$2;shift;; 	# Y or N 
+		--gwas)      				gwas=$2;shift;; 	# Y or N 
+		--ver1000g) 				ver1000g=$2;shift;; 	# Y or N 
+		--cg46)						cg46=$2;shift;;
+		--cg69)						cg69=$2;shift;;		
+		--impactscores)      		impactscores=$2;shift;; # Y or N 
+		--scriptsdir)	      		scriptsdir=$2;shift;; # Y or N 
+		--esp)      				esp=$2;shift;; 	# Y or N 
+		--gerp)      				gerp=$2;shift;; 	# Y or N 
+		--cosmic61)					cosmic61=$2;shift;;  # Y or N
+		--cosmic63)					cosmic63=$2;shift;;  # Y or N
+		--cosmic64)					cosmic64=$2;shift;;  # Y or N
+		--cosmic65)					cosmic65=$2;shift;;  # Y or N
+		--filt_dbsnp)				filt_dbsnp=$2;shift;;
+		--filt1000GALL)				threshold_1000g_ALL=$2;shift;; #threshold value
+		--filt1000GAFR)				threshold_1000g_AFR=$2;shift;; #threshold value
+		--filt1000GAMR)				threshold_1000g_AMR=$2;shift;; #threshold value
+		--filt1000GASN)				threshold_1000g_ASN=$2;shift;; #threshold value
+		--filt1000GEUR)				threshold_1000g_EUR=$2;shift;; #threshold value
+		--filtESP6500ALL)			threshold_ESP6500_ALL=$2;shift;; #threshold value
+		--filtESP6500EA)			threshold_ESP6500_EA=$2;shift;; #threshold value
+		--filtESP6500AA)			threshold_ESP6500_AA=$2;shift;; #threshold value
+		--filtcg46)					threshold_cg46=$2;shift;;
+		--filtcg69)					threshold_cg69=$2;shift;;
+		--outall)      				outfile_all=$2;shift;; # file 
+		--outfilt)      			outfile_filt=$2;shift;; # file
+		--outinvalid)				outfile_invalid=$2;shift;; #file
+		--dorunannovar)				dorunannovar=$2;shift;; 	#Y or N
+		--dofilter)					dofilter=$2;shift;; #Y or N	
+       -h)        	shift;;
+	   --)        	shift;break;;
+       -*)        	usage;;
+       *)         	break;;            
+    esac
+    shift
+done
+
+
+if [ $test == "Y" ]
+then
+	echo "dorunannovar: $dorunannovar"
+	echo "infile: $infile"
+	echo "buildver: $buildver"
+	echo "annovardb: $humandb"
+	echo "verdbnsp: $verdbsnp"
+	echo "geneanno: $geneanno"
+	echo "tfbs: $tfbs"
+	echo "mce: $mce"
+	echo "cytoband: $cytoband"
+	echo "segdup: $segdup"
+	echo "dgv: $dgv"
+	echo "gwas: $gwas"	
+	echo "g1000: ${g1000}"
+	echo "cg46: ${cg46}"	
+	echo "cg69: ${cg69}"
+	echo "impactscores: $impactscores"
+	echo "esp: $esp"
+	echo "gerp: $gerp"
+	echo "cosmic: $cosmic"
+	echo "outfile: $outfile_all"
+	echo "outinvalid: $outfile_invalid"
+	echo "outfiltered: $outfile_filt"
+	echo "varfile: $varfile"
+	echo "vcf" $vcf
+	echo "chrcol: $chrcol"
+	echo "startcol: $startcol"
+	echo "endcol: $endcol"
+	echo "refcol: $refcol"
+	echo "obscol: $obscol"
+	echo "convertcoords: $convertcoords"
+	echo "vartypecol: $vartypecol"
+	echo "dofilter: $dofilter"
+	echo "threshold_1000g_ALL  : $threshold_1000g_ALL"
+	echo "threshold_1000g_AFR  : $threshold_1000g_AFR"
+	echo "threshold_1000g_AMR  : $threshold_1000g_AMR"
+	echo "threshold_1000g_ASN  : $threshold_1000g_ASN"
+	echo "threshold_1000g_EUR  : $threshold_1000g_EUR"
+	echo "threshold_ESP6500_ALL: $threshold_ESP6500_ALL"
+	echo "threshold_ESP6500_EA : $threshold_ESP6500_EA"
+	echo "threshold_ESP6500_AA : $threshold_ESP6500_AA"
+
+fi
+
+
+refgene="N"
+knowngene="N"
+ensgene="N"	
+#parse geneanno param
+if [[ $geneanno =~ "refSeq" ]]
+then
+	refgene="Y"
+fi
+if [[ $geneanno =~ "knowngene" ]]
+then
+	knowngene="Y"
+fi
+if [[ $geneanno =~ "ensgene" ]]
+then
+	ensgene="Y"
+fi
+
+
+#parse verdbsnp/1000g/esp strings
+dbsnpstr=${verdbsnp//,/ }
+filt_dbsnpstr=${filt_dbsnp//,/ }
+g1000str=${ver1000g//,/ }
+espstr=${esp//,/ }
+
+if [ $test == "Y" ]
+then
+	echo "annotate dbsnp: $dbsnpstr"
+	echo "annotate esp:   $espstr"
+	echo "filter dbsnp: $filt_dbsnpstr"
+fi
+
+mutationtaster="N"
+avsift="N"
+lrt="N"
+polyphen2="N"
+phylop="N"
+ljbsift="N"
+#parse impactscores param
+if [[ $impactscores =~ "mutationtaster" ]]
+then
+	mutationtaster="Y"
+fi
+if [[ $impactscores =~ "sift" ]]
+then
+	avsift="Y"
+fi
+if [[ $impactscores =~ "lrt" ]]
+then
+	lrt="Y"
+fi
+if [[ $impactscores =~ "ljbsift" ]]
+then
+	ljbsift="Y"
+fi
+if [[ $impactscores =~ "pp2" ]]
+then
+	polyphen2="Y"
+fi
+if [[ $impactscores =~ "phylop" ]]
+then
+	phylop="Y"
+fi
+
+if [[ $varfile == "Y" ]]
+then
+	convertcoords="Y"
+fi
+
+#ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores
+
+
+#column header names we will be adding
+# ESP 6500
+esp6500si_colheader_ALL="ESP6500si_ALL"
+esp6500si_colheader_EA="ESP6500si_EA"
+esp6500si_colheader_AA="ESP6500si_AA"
+esp6500_colheader_ALL="ESP6500_ALL"
+esp6500_colheader_EA="ESP6500_EA"
+esp6500_colheader_AA="ESP6500_AA"
+esp5400si_colheader_ALL="ESP5400si_ALL"
+esp5400si_colheader_EA="ESP5400si_EA"
+esp5400si_colheader_AA="ESP5400si_AA"
+esp5400_colheader_ALL="ESP5400_ALL"
+esp5400_colheader_EA="ESP5400_EA"
+esp5400_colheader_AA="ESP5400_AA"
+
+
+# cg46 cg69
+cg46_colheader="CG_46_genomes"
+cg69_colheader="CG_69_genomes"
+
+cp $infile originalfile
+#run annovar or filter only?
+if [ $dorunannovar == "Y" ]
+then
+
+
+	####################################
+	#
+	#       PREPARE INPUT FILE
+	#
+	####################################
+	
+	echo "converting input file"
+	vcfheader=""
+	if [ $vcf == "Y" ]     #if CG varfile, convert
+	then 
+		# convert vcf to annovarinput
+		$scriptsdir/convert2annovar.pl --format vcf4 --includeinfo --outfile annovarinput $infile 2>&1
+		
+		#construct header line from vcf file		
+		cat $infile | grep "#CHROM" > additionalcols
+		sed -i 's/#//g' additionalcols 			
+		vcfheader="\t`cat additionalcols`"
+		echo "vcfheader:$vcfheader"
+		echo -e "chromosome\tbegin\tend\treference\tobserved\t`cat additionalcols`" > originalfile
+		cat annovarinput >> originalfile
+		
+		chrcol=1
+		startcol=2
+		endcol=3
+		refcol=4
+		obscol=5
+
+	
+	elif [ $varfile == "Y" ]     #if CG varfile, convert
+	then 
+		# convert varfile
+		$scriptsdir/convert2annovar.pl --format cg --outfile annovarinput $infile 2>&1
+		echo -e "chromosome\tbegin\tend\treference\talleleSeq\tvarType\thaplotype" > originalfile
+		cat annovarinput | cut -f1-6,8 >> originalfile
+		cat annovarinput | cut -f1-5 >> annovarinput2
+		mv annovarinput2 annovarinput
+
+		chrcol=1
+		startcol=2
+		endcol=3
+		refcol=4
+		obscol=5
+
+	elif [ $convertcoords == "Y" ]    # if CG-coordinates, convert 
+	then
+		#echo "rearranging columns and converting coordinates"
+		awk 'BEGIN{
+				FS="\t";
+				OFS="\t";
+			}{
+				if(FNR>1) { 
+					if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 }; 
+					if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" };
+					if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" };
+					if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 }; 
+			
+					printf("%s\t%s\t%s\t%s\t%s\n" ,$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'");				
+				}
+			}	
+			END{
+			}' $infile > annovarinput
+
+			#remove any "chr" prefixes
+			sed -i 's/chr//g' annovarinput
+
+			awk 'BEGIN{
+				FS="\t";
+				OFS="\t";
+			}{
+				if(FNR>=1) { 
+					if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 }; 
+					if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" };
+					if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" };
+					if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 }; 
+			
+					print $0			
+				}
+			}	
+			END{
+			}' $infile > originalfile
+
+			#remove any "chr" prefixes
+			sed -i 's/chr//g' originalfile
+			sed -i 's/omosome/chromosome/g' originalfile
+
+
+	else #only rearrange columns if already 1-based coordinates	
+		echo "rearranging columns "
+		awk 'BEGIN{
+				FS="\t";
+				OFS="\t";
+			}{
+				if(FNR>1) { 			
+					printf("%s\t%s\t%s\t%s\t%s\n",$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'");				
+				}
+			}	
+			END{
+			}' $infile > annovarinput
+
+			#remove any "chr" prefixes
+			sed -i 's/chr//g' annovarinput
+			sed 's/chr//g' $infile > originalfile
+			sed -i 's/omosome/chromosome/g' originalfile
+	fi
+
+	echo "...finished conversion"
+	
+
+
+
+	####################################
+	#
+	#       RUN ANNOVAR COMMANDS
+	#
+	####################################
+
+	
+
+	######    gene-based annotation   #######
+
+	# RefSeq Gene
+	if [ $refgene == "Y" ]
+	then
+		echo -e "\nrefSeq gene"
+		$scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype gene annovarinput $humandb 2>&1
+		
+		annovarout=annovarinput.variant_function
+		sed -i '1i\RefSeq_Func\tRefSeq_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout  3 4 5 6 7 B.RefSeq_Func,B.RefSeq_Gene
+
+		annovarout=annovarinput.exonic_variant_function 
+		sed -i '1i\linenum\tRefSeq_ExonicFunc\tRefSeq_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout  
+		joinresults originalfile $annovarout  4 5 6 7 8 B.RefSeq_ExonicFunc,B.RefSeq_AAChange
+	fi
+
+
+	# UCSC KnownGene
+	if [ $knowngene == "Y" ]
+	then
+		echo -e "\nUCSC known gene"
+		$scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype knowngene annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.variant_function
+		sed -i '1i\UCSCKnownGene_Func\tUCSCKnownGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.UCSCKnownGene_Func,B.UCSCKnownGene_Gene
+
+		annovarout=annovarinput.exonic_variant_function
+		sed -i '1i\linenum\tUCSCKnownGene_ExonicFunc\tUCSCKnownGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 4 5 6 7 8 B.UCSCKnownGene_ExonicFunc,B.UCSCKnownGene_AAChange
+	fi
+
+
+	# Emsembl Gene
+	if [ $ensgene == "Y" ]
+	then
+		echo -e "\nEnsembl gene"
+		$scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype ensgene annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.variant_function
+		sed -i '1i\EnsemblGene_Func\tEnsemblGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.EnsemblGene_Func,B.EnsemblGene_Gene
+
+		annovarout=annovarinput.exonic_variant_function
+		sed -i '1i\linenum\tEnsemblGene_ExonicFunc\tEnsemblGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 4 5 6 7 8 B.EnsemblGene_ExonicFunc,B.EnsemblGene_AAChange
+	fi
+
+
+
+	######    region-based annotation   #######
+
+
+	# Transcription Factor Binding Sites Annotation
+	if [ $mce == "Y" ]
+	then
+		echo -e "\nMost Conserved Elements"
+	
+		if [ $buildver == "hg18" ]
+		then
+			$scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce44way annovarinput $humandb 2>&1
+			annovarout=annovarinput.${buildver}_phastConsElements44way
+			sed -i '1i\db\tphastConsElements44way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+			joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements44way	
+
+		else #hg19	
+			$scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce46way annovarinput $humandb 2>&1
+			annovarout=annovarinput.${buildver}_phastConsElements46way	   
+			sed -i '1i\db\tphastConsElements46way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+			joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements46way	
+		fi
+	
+	fi
+
+
+
+	# Transcription Factor Binding Sites Annotation
+	if [ $tfbs == "Y" ]
+	then
+		echo -e "\nTranscription Factor Binding Site Annotation"
+		$scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype tfbs annovarinput $humandb 2>&1
+	
+		# arguments: originalfile, resultfile,chrcol,startcol,endcol,refcol,obscol,selectcolumns
+		annovarout=annovarinput.${buildver}_tfbsConsSites
+		sed -i '1i\db\tTFBS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.TFBS	
+	fi
+
+
+
+	# Identify cytogenetic band for genetic variants
+	if [ $cytoband == "Y" ]
+	then
+		echo -e "\nCytogenic band Annotation"
+		$scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype band annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_cytoBand
+		sed -i '1i\db\tBand\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.Band	
+	fi
+
+
+	# Identify variants located in segmental duplications
+	if [ $segdup == "Y" ]
+	then
+		echo -e "\nSegmental Duplications Annotation"
+		$scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype segdup annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_genomicSuperDups
+		sed -i '1i\db\tSegDup\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.SegDup	
+	fi
+
+
+
+	# Identify previously reported structural variants in DGV
+	if [ $dgv == "Y" ]
+	then
+		echo -e "\nDGV Annotation"
+		$scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype dgv annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_dgv
+		sed -i '1i\db\tDGV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.DGV	
+	fi
+
+
+	# Identify variants reported in previously published GWAS studies
+	if [ $gwas == "Y" ]
+	then
+		echo -e "\nGWAS Annotation"
+		$scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype gwascatalog annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_gwasCatalog
+		sed -i '1i\db\tGWAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.GWAS	
+	fi
+
+
+	
+	
+	######    filter-based annotation   #######
+
+	#dbSNP
+	for version in $dbsnpstr
+	do
+		if [ $version == "None" ] 
+		then
+			break
+		fi
+		echo -e "\ndbSNP region Annotation, version: $version"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ${version} annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_${version}_dropped		
+		sed -i '1i\db\tdb'${version}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.db${version}	
+	
+
+	done
+
+
+
+	#1000 Genomes
+
+	if [ $ver1000g != "None" ] 
+	then
+
+		for version in $g1000str
+		do
+			#column headers
+			g1000_colheader_ALL="${version}_ALL"
+			g1000_colheader_AFR="${version}_AFR"
+			g1000_colheader_AMR="${version}_AMR"
+			g1000_colheader_ASN="${version}_ASN"
+			g1000_colheader_EUR="${version}_EUR"
+			g1000_colheader_CEU="${version}_CEU"
+			g1000_colheader_YRI="${version}_YRI"
+			g1000_colheader_JPTCHB="${version}_JPTCHB"
+			
+			doALL="N"
+			doAMR="N"
+			doAFR="N"
+			doASN="N"
+			doEUR="N"
+			doCEU="N"
+			doYRI="N"
+			doJPTCHB="N"
+
+
+			if [ $version == "1000g2012apr" ]
+			then
+				fileID="2012_04"
+				doALL="Y"
+				if [ $buildver == "hg19" ]
+				then
+					doAMR="Y"
+					doAFR="Y"
+					doASN="Y"
+					doEUR="Y"
+				fi		
+			elif [[ $version == "1000g2012feb" && $buildver == "hg19" ]]
+			then
+				fileID="2012_02"				
+				doALL="Y"	
+			elif [[ $version == "1000g2010nov" && $buildver == "hg19" ]]
+			then
+				fileID="2010_11"
+				doALL="Y"	
+			elif [[ $version == "1000g2010jul" && $buildver == "hg18" ]]
+			then
+				fileID="2010_07"
+				doALL="N"
+				doCEU="Y"
+				doYRI="Y"
+				doJPTCHB="Y"
+			else
+				echo "unrecognized 1000g version, skipping"
+			fi
+
+			#ALL
+			if [ $doALL == "Y"  ]
+				then
+				echo -e "\n1000Genomes ALL"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_all" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_ALL.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ALL	
+			fi
+
+			# AFR
+			if [ $doAFR == "Y"  ]
+			then
+				echo -e "\n1000Genomes AFR"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_afr" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_AFR.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_AFR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AFR	
+			fi
+
+			
+			# AMR
+			if [ $doAMR == "Y"  ]
+			then
+				echo -e "\n1000Genomes AMR"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_amr" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_AMR.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_AMR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AMR	
+			fi
+
+			# ASN
+			if [ $doASN == "Y"  ]
+			then
+				echo -e "\n1000Genomes ASN"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_asn" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_ASN.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_ASN'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ASN	
+			fi
+
+			# EUR
+			if [ $doEUR == "Y"  ]
+			then
+				echo -e "\n1000Genomes EUR"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eur" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_EUR.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_EUR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EUR	
+			fi
+
+			# CEU
+			if [ $doCEU == "Y"  ]
+			then
+				echo -e "\n1000Genomes CEU"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_ceu" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_CEU.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_CEU'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_CEU	
+			fi
+
+			# YRI
+			if [ $doYRI == "Y"  ]
+			then
+				echo -e "\n1000Genomes YRI"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_yri" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_YRI.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_YRI'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_YRI	
+	
+
+			fi
+
+			#JPTCHB
+			if [ $doJPTCHB == "Y"  ]
+			then
+				echo -e "\n1000Genomes JPTCHB"
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_jptchb" annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_JPTCHB.sites.${fileID}_dropped
+				sed -i '1i\db\t'$g1000_colheader_JPTCHB'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_JPTCHB	
+			fi
+
+		done
+	fi
+
+
+	# SIFT
+	if [ $avsift == "Y" ]
+	then
+		echo -e "\nSIFT Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype avsift annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_avsift_dropped
+		sed -i '1i\db\tAVSIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.AVSIFT	
+	fi
+
+	#ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores
+	# SIFT2
+	if [ $ljbsift == "Y" ]
+	then
+		echo -e "\nSIFT Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_sift annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb_sift_dropped
+		sed -i '1i\db\tLJB_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.LJB_SIFT
+	fi
+
+
+	# PolyPhen2
+	if [ $polyphen2 == "Y" ]
+	then
+		echo -e "\nPolyPhen Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_pp2 annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb_pp2_dropped
+		sed -i '1i\db\tPolyPhen2\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.PolyPhen2
+	fi
+
+
+	# MutationTaster
+	if [ $mutationtaster == "Y" ]
+	then
+		echo -e "\nMutationTaster Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_mt annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb_mt_dropped
+		sed -i '1i\db\tMutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.MutationTaster
+	fi
+
+
+	# LRT
+	if [ $lrt == "Y" ]
+	then
+		echo -e "\nLRT Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_lrt annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb_lrt_dropped
+		sed -i '1i\db\tLikelihoodRatioTestScore\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.LikelihoodRatioTestScore
+	fi
+
+	# PhyloP
+	if [ $phylop == "Y" ]
+	then
+		echo -e "\nPhyloP Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_phylop annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_ljb_phylop_dropped
+		sed -i '1i\db\tPhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.PhyloP
+	fi
+
+
+	### ESP  Exome Variant Server
+	if [ $esp != "None" ] 
+	then
+		echo -e "\nESP Annotation"
+		for version in $espstr
+		do
+			echo "version: $version"
+			# 6500si ALL
+			if [ $version == "esp6500si_all" ]
+			then 				
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_all annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_esp6500si_all_dropped
+				sed -i '1i\db\t'$esp6500si_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_ALL
+			fi
+
+			
+			# 6500si European American
+			if [ $version == "esp6500si_ea" ]
+			then
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_ea annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_esp6500si_ea_dropped
+				sed -i '1i\db\t'$esp6500si_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_EA
+			fi
+
+			# 6500si African Americans
+			if [ $version == "esp6500si_aa" ]
+			then
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_aa annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_esp6500si_aa_dropped
+				sed -i '1i\db\t'$esp6500si_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_AA
+			fi
+
+
+			# 6500 ALL
+			if [ $version == "esp6500_all" ]
+			then 				
+				ls
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_all annovarinput $humandb 2>&1
+				
+				annovarout=annovarinput.${buildver}_esp6500_all_dropped				
+				sed -i '1i\db\t'$esp6500_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_ALL
+			fi
+
+			
+			# 6500 European American
+			if [ $version == "esp6500_ea" ]
+			then
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_ea annovarinput $humandb 2>&1	
+				annovarout=annovarinput.${buildver}_esp6500_ea_dropped
+				sed -i '1i\db\t'$esp6500_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_EA
+			fi
+
+			# 6500 African Americans
+			if [ $version == "esp6500_aa" ]
+			then
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_aa annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_esp6500_aa_dropped
+				sed -i '1i\db\t'$esp6500_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_AA
+			fi
+
+
+			# 5400 ALL
+			if [ $version == "esp5400_all" ]
+			then 				
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_all annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_esp5400_all_dropped
+				sed -i '1i\db\t'$esp5400_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_ALL
+			fi
+
+			
+			# 5400 European American
+			if [ $version == "esp5400_ea" ]
+			then
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_ea annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_esp5400_ea_dropped
+				sed -i '1i\db\t'$esp5400_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_EA
+			fi
+
+			# 5400 African Americans
+			if [ $version == "esp5400_aa" ]
+			then
+				$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_aa annovarinput $humandb 2>&1
+	
+				annovarout=annovarinput.${buildver}_esp5400_aa_dropped
+				sed -i '1i\db\t'$esp5400_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+				joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_AA
+			fi
+
+		done
+	fi	
+
+
+	#ESP6500
+	if [ $esp == "Y" ]
+	then
+		echo -e "\nESP Annotation OLD"
+		# ALL
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_all annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_esp6500si_all_dropped
+		sed -i '1i\db\t'$esp6500_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_ALL
+
+
+		# European American
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_ea annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_esp6500si_ea_dropped
+		sed -i '1i\db\t'$esp6500_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_EA
+
+		# African Americans
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_aa annovarinput $humandb 2>&1
+	
+		annovarout=annovarinput.${buildver}_esp6500si_aa_dropped
+		sed -i '1i\db\t'$esp6500_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_AA
+	fi
+
+
+
+	#GERP++
+	if [ $gerp == "Y" ]
+	then
+		echo -e "\nGERP++ Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype gerp++gt2 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_gerp++gt2_dropped"
+		sed -i '1i\db\tGERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.GERP++
+	fi
+
+
+	#COSMIC
+	if [[ $cosmic61 == "Y" && $buildver == "hg19" ]]
+	then
+		echo -e "\nCOSMIC61 Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic61 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cosmic61_dropped"
+		sed -i '1i\db\tCOSMIC61\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC61
+
+	fi
+
+	if [[ $cosmic63 == "Y" && $buildver == "hg19" ]]
+	then
+		echo -e "\nCOSMIC63 Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic63 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cosmic63_dropped"
+		sed -i '1i\db\tCOSMIC63\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC63
+
+	fi
+
+	if [[ $cosmic64 == "Y" && $buildver == "hg19" ]]
+	then
+		echo -e "\nCOSMIC64 Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic64 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cosmic64_dropped"
+		sed -i '1i\db\tCOSMIC64\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC64
+
+	fi
+	
+	if [[ $cosmic65 == "Y" && $buildver == "hg19" ]]
+	then
+		echo -e "\nCOSMIC65 Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic65 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cosmic65_dropped"
+		sed -i '1i\db\tCOSMIC65\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC65
+
+	fi
+
+	#cg46
+	if [[ $cg46 == "Y"  ]]
+	then
+		echo -e "\nCG 46 genomes Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg46 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cg46_dropped"
+		sed -i '1i\db\t'${cg46_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.${cg46_colheader}
+
+	fi
+
+
+	#cg69
+	if [[ $cg69 == "Y"  ]]
+	then
+		echo -e "\nCG 69 genomes Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg69 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cg69_dropped"
+		sed -i '1i\db\t'${cg69_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.${cg69_colheader}
+
+	fi
+
+
+	
+	if [ $convertcoords == "Y" ]
+	then
+		echo "converting back coordinates"
+		awk 'BEGIN{
+				FS="\t";
+				OFS="\t";
+			}{
+				if (FNR==1)
+					print $0
+				if(FNR>1) { 
+					$"'"${chrcol}"'" = "chr"$"'"${chrcol}"'"
+					if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" -= 1 }; 	
+					if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "" };			
+					if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" -=1; $"'"${obscol}"'" = "" };
+					if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" -= 1 }; 
+					print $0
+								
+				}
+			}	
+			END{
+			}' originalfile > originalfile_coords
+	else
+		mv originalfile originalfile_coords
+	fi
+
+	#restore "chr" prefix?
+
+	#move to outputfile
+	if [ ! -s annovarinput.invalid_input ]
+	then
+		echo "Congrats, your input file contained no invalid lines!" > annovarinput.invalid_input
+	fi
+	
+	cp originalfile_coords $outfile_all
+	cp annovarinput.invalid_input $outfile_invalid 2>&1
+fi #if $dorunannovar
+
+
+
+
+
+############################################
+#
+#       Filter Annotated Variants 
+#
+############################################
+
+
+if [[ $dofilter == "Y" ]]
+then
+	echo "starting filtering"
+	cp originalfile filteredfile
+	
+	### do the filtering
+	# usage: runfilter <column name> <threshold>   (-1=do not filter, 0=filter any value)
+	
+	#1000genomes
+	runfilter filteredfile ${g1000_colheader_ALL} ${threshold_1000g_ALL}
+	runfilter filteredfile ${g1000_colheader_AFR} ${threshold_1000g_AFR}
+	runfilter filteredfile ${g1000_colheader_AMR} ${threshold_1000g_AMR}
+	runfilter filteredfile ${g1000_colheader_ASN} ${threshold_1000g_ASN}
+	runfilter filteredfile ${g1000_colheader_EUR} ${threshold_1000g_EUR}
+
+	#esp
+	runfilter filteredfile ${esp6500_colheader_ALL} ${threshold_ESP6500_ALL}
+	runfilter filteredfile ${esp6500_colheader_EA} ${threshold_ESP6500_EA}
+	runfilter filteredfile ${esp6500_colheader_AA} ${threshold_ESP6500_AA}		
+	
+	#dbsnp
+	for version in $filt_dbsnpstr
+	do
+		if [ $version == "None" ] 
+		then
+			break
+		fi 
+		runfilter filteredfile "db$version" "text"  #-42 will filter any non-empty string in that field
+
+	done
+	
+	#complete genomics
+	runfilter filteredfile ${cg46_colheader} ${threshold_cg46}
+	runfilter filteredfile ${cg69_colheader} ${threshold_cg69}
+
+	#move filtered output file to galaxy output file
+	cp filteredfile $outfile_filt
+	
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/annovar/annovar.xml	Wed Sep 18 10:51:20 2013 -0400
@@ -0,0 +1,211 @@
+<tool id="Annovar" name="ANNOVAR" version="2013aug">
+	<description> Annotate a file using ANNOVAR </description>
+	
+	<requirements>		
+		<requirement type="package" version="1">cgatools17</requirement>
+	</requirements>
+	
+	<command interpreter="bash">
+		annovar.sh		
+		--impactscores ${impactscores}
+		--esp ${esp}
+		--gerp ${gerp}
+		--cosmic61 ${cosmic61}
+		--cosmic63 ${cosmic63}	
+		--cosmic64 ${cosmic64}		
+		--cosmic65 ${cosmic65}		
+		--outall ${annotated}		
+		--outinvalid ${invalid}
+		--dorunannovar ${dorun}
+		--inputfile ${infile}
+		--buildver ${reference.fields.dbkey}
+		--humandb ${reference.fields.ANNOVAR_humandb}
+		--scriptsdir ${reference.fields.ANNOVAR_scripts}	
+		--verdbsnp ${verdbsnp}
+		--geneanno ${geneanno}
+		--tfbs ${tfbs}
+		--mce ${mce}
+		--cytoband ${cytoband}
+		--segdup ${segdup}
+        --dgv ${dgv}
+		--gwas ${gwas}				
+		#if $filetype.type == "other"
+			--varfile N
+			--VCF N
+			--chrcol ${filetype.col_chr}
+			--startcol ${filetype.col_start}
+			--endcol ${filetype.col_end}
+			--obscol ${filetype.col_obs}
+			--refcol ${filetype.col_ref}
+		
+			#if $filetype.convertcoords.convert == "Y"
+				--vartypecol ${filetype.convertcoords.col_vartype}
+				--convertcoords Y
+			#else
+				--convertcoords N
+			#end if
+		#end if
+		#if $filetype.type == "vcf"
+			--varfile N
+			--VCF Y
+			--convertcoords N
+		#end if
+		#if $filetype.type == "varfile"
+			--varfile Y
+			--VCF N			
+		#end if			
+		--cg46 ${cgfortysix}
+		--cg69 ${cgsixtynine}
+		--ver1000g ${ver1000g}
+		
+	</command>
+		
+	<inputs>
+		<param name="dorun" type="hidden" value="Y"/> <!-- will add tool in future to filter on annovar columns, then will call annovar.sh with dorun==N -->
+		<param name="reference" type="select" label="Reference">
+			<options from_data_table="annovar_loc" />				
+		</param>
+				
+		<param name="infile" type="data" label="Select file to annotate" help="Must be CG varfile or a tab-separated file with a 1 line header"/>
+		<conditional name="filetype">
+			<param name="type" type="select" label="Select filetype" >
+				<option value="vcf" selected="false"> VCF4 file </option>
+				<option value="varfile" selected="false"> CG varfile </option>
+				<option value="other" selected="false"> Other </option>
+			</param>
+			<when value="other">
+				<param name="col_chr"     type="data_column"   data_ref="infile" multiple="False" label="Chromosome Column"  /> 
+				<param name="col_start"   type="data_column"   data_ref="infile" multiple="False" label="Start Column"  /> 
+				<param name="col_end"     type="data_column"   data_ref="infile" multiple="False" label="End Column"  /> 
+				<param name="col_ref"     type="data_column"   data_ref="infile" multiple="False" label="Reference Allele Column"  /> 
+				<param name="col_obs"     type="data_column"   data_ref="infile" multiple="False" label="Observed Allele Column"  /> 	
+				<conditional name="convertcoords">
+					<param name="convert" type="select" label="Is this file using Complete Genomics (0-based half-open) cooridinates?" >
+						<option value="Y"> Yes </option>
+						<option value="N" selected="True"> No </option>
+					</param>
+					<when value="Y">
+						<param name="col_vartype" type="data_column"   data_ref="infile" multiple="False" label="varType Column"  /> 
+					</when>
+				</conditional>
+			</when>
+		</conditional>
+
+
+
+		<param name="geneanno" type="select" label="Select Gene Annotation(s)" multiple="true" optional="true" display="checkboxes">			
+			<option value="refSeq" selected="true"  > RefSeq </option>
+			<option value="knowngene"> UCSC KnownGene </option>
+			<option value="ensgene"  > Ensembl </option>			
+		</param>	
+
+
+		<!-- region-based annotation -->
+		<param name="cytoband" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Cytogenic band Annotation?" help="This option identifies Giemsa-stained chromosomes bands, (e.g. 1q21.1-q23.3)."/>
+		<param name="tfbs" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Transcription Factor Binding Site Annotation?"/>
+		<param name="mce" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Most Conserved Elements Annotation?" help="This option phastCons 44-way alignments to annotate variants that fall within conserved genomic regions."/>
+		<param name="segdup" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Segmental Duplication Annotation?" help="Genetic variants that are mapped to segmental duplications are most likely sequence alignment errors and should be treated with extreme caution."/>
+		<param name="dgv" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="DGV (Database of Genomic Variants) Annotation?" help="Identify previously reported structural variants in DGV (Database of Genomic Variants) "/>
+		<param name="gwas" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="GWAS studies Annotation?" help="Identify variants reported in previously published GWAS (Genome-wide association studies) "/>
+
+
+
+		<!-- filter-based annotation -->
+		<param name="verdbsnp" type="select" label="Select dbSNP version(s) to annotate with" multiple="true" display="checkboxes"  optional="true" help="SNPs in dbSNP may be flagged as Clinically Associated, Select the NonFlagged version if you do not wish to annotate with these SNPs ">			
+			<option value="snp128"          > 128            (hg18/hg19) </option>
+			<option value="snp128NonFlagged"> 128 NonFlagged  </option>
+			<option value="snp129"          > 129            (hg18/hg19) </option>
+			<option value="snp129NonFlagged"> 129 NonFlagged  </option>			
+			<option value="snp130"          > 130            (hg18/hg19) </option>
+			<option value="snp130NonFlagged"> 130 NonFlagged  </option>
+			<option value="snp131"          > 131            (hg18/hg19) </option>	
+			<option value="snp131NonFlagged"> 131 NonFlagged  </option>
+			<option value="snp132"          > 132            (hg18/hg19) </option>
+			<option value="snp132NonFlagged"> 132 NonFlagged  </option>
+			<option value="snp135"          > 135            (hg19 only) </option>	
+			<option value="snp135NonFlagged"> 135 NonFlagged  </option>
+			<option value="snp137"          > 137            (hg19 only) </option>				
+			<option value="snp137NonFlagged"> 137 NonFlagged  </option>			
+		</param>	
+
+		<param name="ver1000g" type="select" label="Select 1000Genomes Annotation(s)" multiple="true" display="checkboxes"  optional="true" help="2012april database for ALL populations was converted to hg18 using the UCSC liftover program">			
+			<option value="1000g2012apr"> 2012apr (hg18/hg19) (5 populations: AMR,AFR,ASN,CEU,ALL) </option>
+			<option value="1000g2012feb"> 2012feb (hg19) (1 population: ALL) </option>
+			<option value="1000g2010nov"> 2010nov (hg19) (1 population: ALL) </option>
+			<option value="1000g2010jul"> 2010jul (hg18) (4 populations: YRI,JPT,CHB,CEU)</option>			
+		</param>	
+		<!-- 
+		<param name="g1000" type="boolean" checked="True" truevalue="Y" falsevalue="N" label="Annotate with 1000genomes project? (version 2012april)"/>
+		-->
+
+
+	<param name="esp" type="select" label="Select Exome Variant Server  version(s) to annotate with" multiple="true" display="checkboxes"  optional="true" help="si versions of databases contain indels and chrY calls">			
+			<option value="esp6500si_all"       > ESP6500si ALL  </option>
+			<option value="esp6500si_ea"        > ESP6500si European Americans  </option>
+			<option value="esp6500si_aa"        > ESP6500si African Americans  </option>
+			<option value="esp6500_all"         > ESP6500   ALL </option>
+			<option value="esp6500_ea"          > ESP6500   European Americans  </option>
+			<option value="esp6500_aa"          > ESP6500   African Americans   </option>			
+			<option value="esp5400_all"         > ESP5400   ALL  </option>
+			<option value="esp5400_ea"          > ESP5400   European Americans  </option>
+			<option value="esp5400_aa"          > ESP5400   African Americans  </option>			
+		</param>	
+
+
+		<param name="gerp" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="GERP++ Annotation?" help="GERP identifies constrained elements in multiple alignments by quantifying substitution deficits (see http://mendel.stanford.edu/SidowLab/downloads/gerp/ for details) This option annotates those variants having GERP++>2 in human genome, as this threshold is typically regarded as evolutionarily conserved and potentially functional"/>
+	
+		<param name="cgfortysix" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Complete Genomics 46 Genomes?" help="Diversity Panel; 46 unrelated individuals"/>
+		<param name="cgsixtynine" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Complete Genomics 69 Genomes?" help="Diversity Panel, Pedigree, YRI trio and PUR trio"/>
+		<param name="cosmic61" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Annotate with COSMIC61? (hg19 only)"/>
+		<param name="cosmic63" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Annotate with COSMIC63? (hg19 only)"/>
+		<param name="cosmic64" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Annotate with COSMIC64? (hg19 only)"/>
+		<param name="cosmic65" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Annotate with COSMIC65? (hg19 only)"/>
+		
+<param name="impactscores" type="select" label="Select functional impact scores annotate with" multiple="true" display="checkboxes" optional="true" help="LJB refers to Liu, Jian, Boerwinkle paper in Human Mutation, pubmed ID 21520341.">			
+			<option value="avsift"> AV SIFT </option>
+			<option value="ljbsift"> LJB SIFT (corresponds to 1-SIFT)</option>
+			<option value="pp2"> PolyPhen2 </option>
+			<option value="mutationtaster" > MutationTaster </option>
+			<option value="lrt"> LRT (Likelihood Ratio Test) </option>			
+			<option value="phylop"> PhyloP </option>
+		</param>	
+			
+
+		<!-- prefix for output file so you dont have to manually rename history items -->
+		<param name="fname" type="text" value="" label="Prefix for your output file" help="Optional"/>		
+				
+	</inputs>
+
+	<outputs>
+		<data format="tabular" name="invalid"   label="$fname ANNOVAR Invalid input on ${on_string}"/>	
+		<data format="tabular" name="annotated" label="$fname ANNOVAR Annotated variants on ${on_string}"/>
+	</outputs>
+
+	<help> 
+**What it does**
+
+This tool will annotate a file using ANNOVAR.
+
+**ANNOVAR Website and Documentation**
+
+Website: http://www.openbioinformatics.org/annovar/
+
+Paper: http://nar.oxfordjournals.org/content/38/16/e164
+
+**Input Formats**
+
+Input Formats may be one of the following:
+	
+	VCF file
+	
+	Complete Genomics varfile
+	
+	Custom tab-delimited file (specify chromosome, start, end, reference allele, observed allele columns)		
+
+	Custom tab-delimited CG-derived file (specify chromosome, start, end, reference allele, observed allele, varType columns)
+		
+	</help>
+
+</tool>
+
+