changeset 3:ff5325029a8e draft

Uploaded
author saskia-hiltemann
date Thu, 10 Apr 2014 09:15:32 -0400
parents 565c0e690238
children e423536a0780
files README README~ tool_data_table_conf.xml.sample~ tool_dependencies.xml~ tools/annovar/annovar.sh tools/annovar/annovar.xml
diffstat 6 files changed, 30 insertions(+), 250 deletions(-) [+]
line wrap: on
line diff
--- a/README	Mon Nov 18 10:32:33 2013 -0500
+++ b/README	Thu Apr 10 09:15:32 2014 -0400
@@ -214,6 +214,9 @@
 hg19_snp137.txt
 hg19_snp137NonFlagged.txt
 hg19_snp137NonFlagged.txt.idx
+hg19_snp138.txt
+hg19_snp138NonFlagged.txt
+hg19_snp138NonFlagged.txt.idx
 hg19_tfbsConsSites.txt
 
 
--- a/README~	Mon Nov 18 10:32:33 2013 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,213 +0,0 @@
-ANNOVAR needs to be installed manually in the following way:
-
-
-1)	If you already have ANNOVAR installed on your system, simply edit the tool-data/annovar.loc file to reflect locations of 
-	the perl scripts (annotate_variation.pl and convert2annovar.pl) and humandb directory (directory containing the annovar database files)
-1b) Restart galaxy instance for changes in .loc file to take effect
-
-
-2)	If you do not have ANNOVAR installed, request annovar download and sign license here: 
-		http://www.openbioinformatics.org/annovar/annovar_download_form.php
-
-	3)	Once downloaded, install annovar per the installation instructions and edit annovar.loc file to reflect location of directory containing perl scripts.
-			tool uses annotate_variation.pl  and  convert2annovar.pl
-	
-	4)	Then download all desired databases for all desired builds as follows:
-			annotate_variation.pl -downdb -buildver <build> [-webfrom annovar] <database> <humandb>
-	
-		where <humandb> is location where all database files should be stored
-		and <database> is the database file to download, e.g. refGene (see bottom of document for all available database files at the time of writing this tool)
-		and <build> can be hg18 or hg19 for humans, also other organisms available.
-	
-		list of all available databases can be found here: http://www.openbioinformatics.org/annovar/annovar_db.html
-	
-	5) edit the tool-data/annovar.loc file to reflect location of humandb folder
-	5b) restart galaxy instance for changes in .loc file to take effect
-
-6) Tool uses cgatools join for combining of files, this should be installed automatically with repository. If not, get a copy from Complete Genomics directly:
-		wget http://sourceforge.net/projects/cgatools/files/1.7.1/cgatools-1.7.1.5-linux_binary-x86_64.tar.gz
-		tar xvzf cgatools-1.7.1.5-linux_binary-x86_64.tar.gz		
-
-	and place the "cgatools" binary found in bin/ directory on your $PATH
-	
-	
-list of files in my own humandb folder:
-
-	hg18_ALL.sites.2012_04.txt
-	hg18_ALL.sites.2012_04.txt.idx
-	hg18_avsift.txt
-	hg18_avsift.txt.idx
-	hg18_CEU.sites.2010_07.txt
-	hg18_CEU.sites.2010_07.txt.idx
-	hg18_cg46.txt
-	hg18_cg46.txt.idx
-	hg18_cg69.txt
-	hg18_cg69.txt.idx
-	hg18_cytoBand.txt
-	hg18_dgv.txt
-	hg18_ensGeneMrna.fa
-	hg18_ensGene.txt
-	hg18_esp5400_aa.txt
-	hg18_esp5400_aa.txt.idx
-	hg18_esp5400_all.txt
-	hg18_esp5400_all.txt.idx
-	hg18_esp5400_ea.txt
-	hg18_esp5400_ea.txt.idx
-	hg18_esp6500_aa.txt
-	hg18_esp6500_aa.txt.idx
-	hg18_esp6500_all.txt
-	hg18_esp6500_all.txt.idx
-	hg18_esp6500_ea.txt
-	hg18_esp6500_ea.txt.idx
-	hg18_esp6500si_aa.txt
-	hg18_esp6500si_aa.txt.idx
-	hg18_esp6500si_all.txt
-	hg18_esp6500si_all.txt.idx
-	hg18_esp6500si_ea.txt
-	hg18_esp6500si_ea.txt.idx
-	hg18_example_db_generic.txt
-	hg18_example_db_gff3.txt
-	hg18_genomicSuperDups.txt
-	hg18_gerp++gt2.txt
-	hg18_gerp++gt2.txt.idx
-	hg18_gwasCatalog.txt
-	hg18_JPTCHB.sites.2010_07.txt
-	hg18_JPTCHB.sites.2010_07.txt.idx
-	hg18_keggMapDesc.txt
-	hg18_keggPathway.txt
-	hg18_kgXref.txt
-	hg18_knownGeneMrna.fa
-	hg18_knownGene.txt
-	hg18_ljb_all.txt
-	hg18_ljb_all.txt.idx
-	hg18_ljb_lrt.txt
-	hg18_ljb_lrt.txt.idx
-	hg18_ljb_mt.txt
-	hg18_ljb_mt.txt.idx
-	hg18_ljb_phylop.txt
-	hg18_ljb_phylop.txt.idx
-	hg18_ljb_pp2.txt
-	hg18_ljb_pp2.txt.idx
-	hg18_ljb_sift.txt
-	hg18_ljb_sift.txt.idx
-	hg18_phastConsElements44way.txt
-	hg18_refGeneMrna.fa
-	hg18_refGene.txt
-	hg18_refLink.txt
-	hg18_snp128NonFlagged.txt
-	hg18_snp128NonFlagged.txt.idx
-	hg18_snp128.txt
-	hg18_snp128.txt.idx
-	hg18_snp129NonFlagged.txt
-	hg18_snp129NonFlagged.txt.idx
-	hg18_snp129.txt
-	hg18_snp129.txt.idx
-	hg18_snp130NonFlagged.txt
-	hg18_snp130NonFlagged.txt.idx
-	hg18_snp130.txt
-	hg18_snp130.txt.idx
-	hg18_snp131NonFlagged.txt
-	hg18_snp131NonFlagged.txt.idx
-	hg18_snp131.txt
-	hg18_snp131.txt.idx
-	hg18_snp132NonFlagged.txt
-	hg18_snp132NonFlagged.txt.idx
-	hg18_snp132.txt
-	hg18_snp132.txt.idx
-	hg18_tfbsConsSites.txt
-	hg18_YRI.sites.2010_07.txt
-	hg18_YRI.sites.2010_07.txt.idx
-	hg19_AFR.sites.2012_04.txt
-	hg19_AFR.sites.2012_04.txt.idx
-	hg19_ALL.sites.2010_11.txt
-	hg19_ALL.sites.2010_11.txt.idx
-	hg19_ALL.sites.2012_02.txt
-	hg19_ALL.sites.2012_02.txt.idx
-	hg19_ALL.sites.2012_04.txt
-	hg19_ALL.sites.2012_04.txt.idx
-	hg19_AMR.sites.2012_04.txt
-	hg19_AMR.sites.2012_04.txt.idx
-	hg19_ASN.sites.2012_04.txt
-	hg19_ASN.sites.2012_04.txt.idx
-	hg19_avsift.txt
-	hg19_avsift.txt.idx
-	hg19_cg46.txt
-	hg19_cg46.txt.idx
-	hg19_cg69.txt
-	hg19_cg69.txt.idx
-	hg19_cosmic61.txt
-	hg19_cosmic61.txt.idx
-	hg19_cosmic63.txt
-	hg19_cosmic63.txt.idx
-	hg19_cosmic64.txt
-	hg19_cosmic64.txt.idx
-	hg19_cosmic65.txt
-	hg19_cosmic65.txt.idx
-	hg19_cytoBand.txt
-	hg19_dgv.txt
-	hg19_ensGeneMrna.fa
-	hg19_ensGene.txt
-	hg19_esp5400_aa.txt
-	hg19_esp5400_aa.txt.idx
-	hg19_esp5400_all.txt
-	hg19_esp5400_all.txt.idx
-	hg19_esp5400_ea.txt
-	hg19_esp5400_ea.txt.idx
-	hg19_esp6500_aa.txt
-	hg19_esp6500_aa.txt.idx
-	hg19_esp6500_all.txt
-	hg19_esp6500_all.txt.idx
-	hg19_esp6500_ea.txt
-	hg19_esp6500_ea.txt.idx
-	hg19_esp6500si_aa.txt
-	hg19_esp6500si_aa.txt.idx
-	hg19_esp6500si_all.txt
-	hg19_esp6500si_all.txt.idx
-	hg19_esp6500si_ea.txt
-	hg19_esp6500si_ea.txt.idx
-	hg19_EUR.sites.2012_04.txt
-	hg19_EUR.sites.2012_04.txt.idx
-	hg19_genomicSuperDups.txt
-	hg19_gerp++gt2.txt
-	hg19_gerp++gt2.txt.idx
-	hg19_gwasCatalog.txt
-	hg19_keggMapDesc.txt
-	hg19_keggPathway.txt
-	hg19_kgXref.txt
-	hg19_knownGeneMrna.fa
-	hg19_knownGene.txt
-	hg19_ljb_all.txt
-	hg19_ljb_all.txt.idx
-	hg19_ljb_lrt.txt
-	hg19_ljb_lrt.txt.idx
-	hg19_ljb_mt.txt
-	hg19_ljb_mt.txt.idx
-	hg19_ljb_phylop.txt
-	hg19_ljb_phylop.txt.idx
-	hg19_ljb_pp2.txt
-	hg19_ljb_pp2.txt.idx
-	hg19_ljb_sift.txt
-	hg19_ljb_sift.txt.idx
-	hg19_phastConsElements46way.txt
-	hg19_refGeneMrna.fa
-	hg19_refGene.txt
-	hg19_refLink.txt
-	hg19_snp130NonFlagged.txt
-	hg19_snp130NonFlagged.txt.idx
-	hg19_snp130.txt
-	hg19_snp130.txt.idx
-	hg19_snp131NonFlagged.txt
-	hg19_snp131NonFlagged.txt.idx
-	hg19_snp131.txt
-	hg19_snp132NonFlagged.txt
-	hg19_snp132NonFlagged.txt.idx
-	hg19_snp132.txt
-	hg19_snp132.txt.idx
-	hg19_snp135NonFlagged.txt
-	hg19_snp135NonFlagged.txt.idx
-	hg19_snp135.txt
-	hg19_snp137NonFlagged.txt
-	hg19_snp137NonFlagged.txt.idx
-	hg19_snp137.txt
-	hg19_tfbsConsSites.txt
-
--- a/tool_data_table_conf.xml.sample~	Mon Nov 18 10:32:33 2013 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-<!-- ANNOVAR files -->
-<table name="annovar_loc" comment_char="#">
-<columns>value, dbkey, name, ANNOVAR_scripts, ANNOVAR_humandb</columns>
-<file path="tool-data/annovar.loc" /> 
-</table>
--- a/tool_dependencies.xml~	Mon Nov 18 10:32:33 2013 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-	<package name="cgatools17" version="1"> 
-        <install version="1.0">
-            <actions>                				
-                <action type="download_by_url">http://sourceforge.net/projects/cgatools/files/1.7.1/cgatools-1.7.1.5-linux_binary-x86_64.tar.gz</action>
-				<action type="shell_command"> chmod a+x bin/cgatools</action>
-                <action type="move_file">
-                	<source>bin/cgatools</source>
-                	<destination>$INSTALL_DIR/bin</destination>
-                </action>	    
-				<action type="set_environment">
-                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
-                    <environment_variable name="PATH" action="prepend_to">$REPOSITORY_INSTALL_DIR</environment_variable>
-                </action>            	               			
-            </actions>
-        </install>
-        <readme>
-			Downloads and installs the cgatools binary. 
-        </readme>
-    </package>      
-</tool_dependency>
-
--- a/tools/annovar/annovar.sh	Mon Nov 18 10:32:33 2013 -0500
+++ b/tools/annovar/annovar.sh	Thu Apr 10 09:15:32 2014 -0400
@@ -179,7 +179,7 @@
 #################################
 
 
-set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: hgvs: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: newimpactscores: otherinfo: esp: gerp: cosmic61: cosmic63: cosmic64: cosmic65: cosmic67: clinvar: nci60: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage
+set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: hgvs: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: newimpactscores: otherinfo: esp: gerp: cosmic61: cosmic63: cosmic64: cosmic65: cosmic67: cosmic68: clinvar: nci60: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage
 [ $# -eq 0 ] && usage
 
 
@@ -201,7 +201,7 @@
 		--convertcoords)			convertcoords=$2;shift;;  # Y or N convert coordinate from CG to 1-based?		
 		--geneanno)      			geneanno=$2;shift;; # comma-separated list of strings refSeq, knowngene, ensgene  
 		--hgvs)						hgvs=$2;shift;;
-		--verdbsnp)					verdbsnp=$2;shift;; #comma-separated list of dbsnp version to annotate with (e.g. "132,135NonFlagged,137")"
+		--verdbsnp)					verdbsnp=$2;shift;; #comma-separated list of dbsnp version to annotate with (e.g. "132,135NonFlagged,137,138")"
 		--tfbs)      				tfbs=$2;shift;; 	# Y or N 
 		--mce)      				mce=$2;shift;; 	# Y or N 
 		--cytoband)      			cytoband=$2;shift;; # Y or N  
@@ -222,6 +222,7 @@
 		--cosmic64)					cosmic64=$2;shift;;  # Y or N
 		--cosmic65)					cosmic65=$2;shift;;  # Y or N
 		--cosmic67)					cosmic67=$2;shift;;  # Y or N
+		--cosmic68)					cosmic68=$2;shift;;  # Y or N
 		--nci60)					nci60=$2;shift;;  # Y or N
 		--clinvar)					clinvar=$2;shift;;  # Y or N
 		--filt_dbsnp)				filt_dbsnp=$2;shift;;
@@ -483,7 +484,7 @@
 	if [ $vcf == "Y" ]     #if CG varfile, convert
 	then 
 		# convert vcf to annovarinput
-		$scriptsdir/convert2annovar.pl --format vcf4 --includeinfo --outfile annovarinput $infile 2>&1
+		$scriptsdir/convert2annovar.pl --format vcf4old --allallele --includeinfo --outfile annovarinput $infile 2>&1
 		
 		#construct header line from vcf file		
 		cat $infile | grep "#CHROM" > additionalcols
@@ -1259,12 +1260,24 @@
 
 	fi
 	
+	if [[ $cosmic68 == "Y" && $buildver == "hg19" ]]
+	then
+		echo -e "\nCOSMIC68 Annotation"
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic68 annovarinput $humandb 2>&1
+	
+		annovarout="annovarinput.${buildver}_cosmic68_dropped"
+		sed -i '1i\db\tCOSMIC68\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
+		joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC68
+
+	fi
+	
+
 	if [[ $clinvar == "Y" && $buildver == "hg19" ]]
 	then
 		echo -e "\nCLINVAR Annotation"
-		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype clinvar_20131105 annovarinput $humandb 2>&1
+		$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype clinvar_20140211 annovarinput $humandb 2>&1
 	
-		annovarout="annovarinput.${buildver}_clinvar_20131105_dropped"
+		annovarout="annovarinput.${buildver}_clinvar_20140211_dropped"
 		sed -i '1i\db\tCLINVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout 
 		joinresults originalfile $annovarout 3 4 5 6 7 B.CLINVAR
 
--- a/tools/annovar/annovar.xml	Mon Nov 18 10:32:33 2013 -0500
+++ b/tools/annovar/annovar.xml	Thu Apr 10 09:15:32 2014 -0400
@@ -1,4 +1,4 @@
-<tool id="Annovar" name="ANNOVAR" version="2013aug">
+<tool id="AnnovarShed" name="ANNOVAR" version="2013aug">
 	<description> Annotate a file using ANNOVAR </description>
 	
 	<requirements>		
@@ -13,7 +13,8 @@
 		--cosmic63 ${cosmic63}	
 		--cosmic64 ${cosmic64}		
 		--cosmic65 ${cosmic65}
-		--cosmic67 ${cosmic67}			
+		--cosmic67 ${cosmic67}
+		--cosmic68 ${cosmic68}
 		--outall ${annotated}		
 		--outinvalid ${invalid}
 		--dorunannovar ${dorun}
@@ -130,7 +131,9 @@
 			<option value="snp135"          > 135            (hg19 only) </option>	
 			<option value="snp135NonFlagged"> 135 NonFlagged  </option>
 			<option value="snp137"          > 137            (hg19 only) </option>				
-			<option value="snp137NonFlagged"> 137 NonFlagged  </option>			
+			<option value="snp137NonFlagged"> 137 NonFlagged  </option>
+			<option value="snp138"          > 138            (hg19 only) </option>				
+			<option value="snp138NonFlagged"> 138 NonFlagged  </option>				
 		</param>	
 
 		<param name="ver1000g" type="select" label="Select 1000Genomes Annotation(s)" multiple="true" display="checkboxes"  optional="true" help="2012april database for ALL populations was converted to hg18 using the UCSC liftover program">			
@@ -159,7 +162,7 @@
 
 		<param name="gerp" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="GERP++ Annotation?" help="GERP identifies constrained elements in multiple alignments by quantifying substitution deficits (see http://mendel.stanford.edu/SidowLab/downloads/gerp/ for details) This option annotates those variants having GERP++>2 in human genome, as this threshold is typically regarded as evolutionarily conserved and potentially functional"/>
 	
-		<param name="clinvar" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="CLINVAR Annotation? (hg19 only)" help="version 2013-11-05. Annotations include Variant Clinical Significance (unknown, untested, non-pathogenic, probable-non-pathogenic, probable-pathogenic, pathogenic, drug-response, histocompatibility, other) and Variant disease name."/>
+		<param name="clinvar" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="CLINVAR Annotation? (hg19 only)" help="version 2014-02-11. Annotations include Variant Clinical Significance (unknown, untested, non-pathogenic, probable-non-pathogenic, probable-pathogenic, pathogenic, drug-response, histocompatibility, other) and Variant disease name."/>
 		<param name="nci60" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Annotate with NCI60? (hg19 only)" help="NCI-60 exome allele frequency data"/>
 		<param name="cgfortysix" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Complete Genomics 46 Genomes?" help="Diversity Panel; 46 unrelated individuals"/>
 		<param name="cgsixtynine" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Complete Genomics 69 Genomes?" help="Diversity Panel, Pedigree, YRI trio and PUR trio"/>
@@ -168,6 +171,8 @@
 		<param name="cosmic64" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Annotate with COSMIC64? (hg19 only)"/>
 		<param name="cosmic65" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Annotate with COSMIC65? (hg19 only)"/>
 		<param name="cosmic67" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Annotate with COSMIC67? (hg19 only)"/>
+		<param name="cosmic68" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Annotate with COSMIC68? (hg19 only)"/>
+
 		
 		<param name="newimpactscores" type="select" label="Select functional impact scores (LJB2)" multiple="true" display="checkboxes" optional="true" help="LJB refers to Liu, Jian, Boerwinkle paper in Human Mutation, pubmed ID 21520341. ">						
 			<option value="ljb2_sift"> SIFT score </option>