comparison tools/tools/annovar/annovar.sh @ 5:4600be69b96f draft

Added databases 1000g2015aug, SPIDEX, avsnp138, avsnp142, exac03
author saskia-hiltemann
date Thu, 01 Oct 2015 04:24:45 -0400
parents
children
comparison
equal deleted inserted replaced
4:e423536a0780 5:4600be69b96f
1 #!/bin/bash
2
3 test="N"
4 dofilter="N"
5
6 #########################
7 # DEFINE SOME
8 # FUNCTIONS
9 #########################
10
11 function usage(){
12 echo "usage: $0 todo"
13 }
14
15 function runfilter(){
16 ifile=$1
17 columnname=$2
18 threshold=$3
19
20 if [[ $threshold == "-1" ]]
21 then
22 echo "not filtering"
23 return
24 fi
25
26 echo "filtering: $columnname, $threshold"
27 cat $ifile
28
29 #get column number corresponding to column header
30 column=`awk 'BEGIN{
31 FS="\t";
32 col=-1
33 }{
34 if(FNR==1){
35 for(i=1;i<=NF;i++){
36 if($i == "'"${columnname}"'")
37 col=i
38 }
39 print col
40 }
41 }' $ifile `
42
43 if [ $column == -1 ]
44 then
45 echo "no such column, exiting"
46 return
47 fi
48
49 #perform filtering using the threshold
50 awk 'BEGIN{
51 FS="\t";
52 OFS="\t";
53 }{
54 if(FNR==1)
55 print $0;
56 if(FNR>1){
57 if( $"'"${column}"'" == "" ) # empty column, then print
58 print $0
59 else if ("'"${threshold}"'" == "text"){} #if set to text dont check threshold
60
61 else if ($"'"${column}"'" < "'"${threshold}"'") #else do check it
62 print $0
63 }
64 }' $ifile > tmpfile
65
66 mv tmpfile $ifile
67 }
68
69 # arguments: originalfile,resultfile,chrcol,startcol,endcol,refcol,obscol,addcols
70 function joinresults(){
71 ofile=$1
72 rfile=$2
73 colchr=$3
74 colstart=$4
75 colend=$5
76 colref=$6
77 colobs=$7
78 addcols=$8 #e.g. "B.col1,B.col2"
79
80 test="N"
81
82 # echo "joining result with original file"
83 if [ $test == "Y" ]
84 then
85 echo "ofile: $ofile"
86 head $ofile
87 echo "rfile: $rfile"
88 head $rfile
89 fi
90 numlines=`wc $rfile | cut -d" " -f2`
91
92 # if empty results file, just add header fields
93 if [[ ! -s $rfile ]]
94 then
95 dummycol=${addcols:2}
96 outputcol=${dummycol//",B."/" "}
97 numcommas=`echo "$addcols" | grep -o "," | wc -l`
98
99 awk 'BEGIN{FS="\t";OFS="\t"}{
100 if(FNR==1)
101 print $0,"'"$outputcol"'";
102 else{
103 printf $0
104 for(i=0;i<="'"$numcommas"'"+1;i++)
105 printf "\t"
106 printf "\n"
107 }
108 }END{}' $ofile > tempofile
109
110 mv tempofile $ofile
111 return
112 fi
113
114
115 #get input file column names for cgatools join
116 col_chr_name=`head -1 $rfile | cut -f${colchr}`
117 col_start_name=`head -1 $rfile | cut -f${colstart}`
118 col_end_name=`head -1 $rfile | cut -f${colend}`
119 col_ref_name=`head -1 $rfile | cut -f${colref}`
120 col_obs_name=`head -1 $rfile | cut -f${colobs}`
121
122 #get annotation file column names for cgatools join
123 chr_name=`head -1 $ofile | cut -f${chrcol}`
124 start_name=`head -1 $ofile | cut -f${startcol}`
125 end_name=`head -1 $ofile | cut -f${endcol}`
126 ref_name=`head -1 $ofile | cut -f${refcol}`
127 obs_name=`head -1 $ofile | cut -f${obscol}`
128
129 if [ $test == "Y" ]
130 then
131 echo "input file"
132 echo "chr col: $col_chr_name ($colchr)"
133 echo "start col: $col_start_name ($colstart)"
134 echo "end col: $col_end_name ($colend)"
135 echo "ref col: $col_ref_name ($colref)"
136 echo "obs col: $col_obs_name ($colobs)"
137 echo ""
138 echo "annotation file"
139 echo "chr col: $chr_name ($chrcol)"
140 echo "start col: $start_name ($startcol)"
141 echo "end col: $end_name ($endcol)"
142 echo "ref col: $ref_name ($refcol)"
143 echo "obs col: $obs_name ($obscol)"
144 fi
145
146 #perform join
147 cgatools join --beta \
148 --input $ofile $rfile \
149 --output temporiginal \
150 --match ${chr_name}:${col_chr_name} \
151 --match ${start_name}:${col_start_name} \
152 --match ${end_name}:${col_end_name} \
153 --match ${ref_name}:${col_ref_name} \
154 --match ${obs_name}:${col_obs_name} \
155 --select A.*,$addcols \
156 --always-dump \
157 --output-mode compact
158
159 #replace originalfile
160 sed -i 's/^>//g' temporiginal #join sometimes adds a '>' symbol to header
161 mv temporiginal originalfile
162
163 if [ $test == "Y" ]
164 then
165 echo "joining complete"
166 head originalfile
167 echo ""
168 fi
169
170 }
171
172
173
174
175 #################################
176 #
177 # PARSE PARAMETERS
178 #
179 #################################
180
181
182 set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: hgvs: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: newimpactscores: otherinfo: esp: exac03: gerp: cosmic61: cosmic63: cosmic64: cosmic65: cosmic67: cosmic68: clinvar: nci60: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage
183 [ $# -eq 0 ] && usage
184
185
186
187 while [ $# -gt 0 ]
188 do
189 case "$1" in
190 --inputfile) infile=$2;shift;; # inputfile
191 --buildver) buildvertmp=$2;shift;; # hg18 or hg19
192 --humandb) humandbtmp=$2;shift;; # location of humandb database
193 --varfile) varfile=$2;shift;; # Y or N
194 --VCF) vcf=$2;shift;; #Y or N
195 --chrcol) chrcol=$2;shift;; # which column has chr
196 --startcol) startcol=$2;shift;; # which column has start
197 --endcol) endcol=$2;shift;; # which column has end
198 --refcol) refcol=$2;shift;; # which column has ref
199 --obscol) obscol=$2;shift;; # which column has alt
200 --vartypecol) vartypecol=$2;shift;; # which column has vartype
201 --convertcoords) convertcoords=$2;shift;; # Y or N convert coordinate from CG to 1-based?
202 --geneanno) geneanno=$2;shift;; # comma-separated list of strings refSeq, knowngene, ensgene
203 --hgvs) hgvs=$2;shift;;
204 --verdbsnp) verdbsnp=$2;shift;; #comma-separated list of dbsnp version to annotate with (e.g. "132,135NonFlagged,137,138")"
205 --tfbs) tfbs=$2;shift;; # Y or N
206 --mce) mce=$2;shift;; # Y or N
207 --cytoband) cytoband=$2;shift;; # Y or N
208 --segdup) segdup=$2;shift;; # Y or N
209 --dgv) dgv=$2;shift;; # Y or N
210 --gwas) gwas=$2;shift;; # Y or N
211 --ver1000g) ver1000g=$2;shift;; # Y or N
212 --cg46) cg46=$2;shift;;
213 --cg69) cg69=$2;shift;;
214 --impactscores) impactscores=$2;shift;; # Y or N
215 --newimpactscores) newimpactscores=$2;shift;; # Y or N
216 --otherinfo) otherinfo=$2;shift;;
217 --scriptsdir) scriptsdirtmp=$2;shift;; # Y or N
218 --esp) esp=$2;shift;; # Y or N
219 --exac03) exac03=$2;shift;;
220 --gerp) gerp=$2;shift;; # Y or N
221 --cosmic61) cosmic61=$2;shift;; # Y or N
222 --cosmic63) cosmic63=$2;shift;; # Y or N
223 --cosmic64) cosmic64=$2;shift;; # Y or N
224 --cosmic65) cosmic65=$2;shift;; # Y or N
225 --cosmic67) cosmic67=$2;shift;; # Y or N
226 --cosmic68) cosmic68=$2;shift;; # Y or N
227 --nci60) nci60=$2;shift;; # Y or N
228 --clinvar) clinvar=$2;shift;; # Y or N
229 --filt_dbsnp) filt_dbsnp=$2;shift;;
230 --filt1000GALL) threshold_1000g_ALL=$2;shift;; #threshold value
231 --filt1000GAFR) threshold_1000g_AFR=$2;shift;; #threshold value
232 --filt1000GAMR) threshold_1000g_AMR=$2;shift;; #threshold value
233 --filt1000GASN) threshold_1000g_ASN=$2;shift;; #threshold value
234 --filt1000GEUR) threshold_1000g_EUR=$2;shift;; #threshold value
235 --filtESP6500ALL) threshold_ESP6500_ALL=$2;shift;; #threshold value
236 --filtESP6500EA) threshold_ESP6500_EA=$2;shift;; #threshold value
237 --filtESP6500AA) threshold_ESP6500_AA=$2;shift;; #threshold value
238 --filtcg46) threshold_cg46=$2;shift;;
239 --filtcg69) threshold_cg69=$2;shift;;
240 --outall) outfile_all=$2;shift;; # file
241 --outfilt) outfile_filt=$2;shift;; # file
242 --outinvalid) outfile_invalid=$2;shift;; #file
243 --dorunannovar) dorunannovar=$2;shift;; #Y or N
244 -h) shift;;
245 --) shift;break;;
246 -*) usage;;
247 *) break;;
248 esac
249 shift
250 done
251
252 #sometimes galaxy screws up these variables after updates, if comma-separated list, use only what is before first comma
253 humandb=${humandbtmp%,*}
254 buildver=${buildvertmp%,*}
255 scriptsdir=${scriptsdirtmp%,*}
256
257
258 if [ $test == "Y" ]
259 then
260 echo "dorunannovar: $dorunannovar"
261 echo "infile: $infile"
262 echo "buildver: $buildver"
263 echo "annovardb: $humandb"
264 echo "verdbnsp: $verdbsnp"
265 echo "geneanno: $geneanno"
266 echo "tfbs: $tfbs"
267 echo "mce: $mce"
268 echo "cytoband: $cytoband"
269 echo "segdup: $segdup"
270 echo "dgv: $dgv"
271 echo "gwas: $gwas"
272 echo "g1000: ${g1000}"
273 echo "cg46: ${cg46}"
274 echo "cg69: ${cg69}"
275 echo "impactscores: $impactscores"
276 echo "impactscores: $newimpactscores"
277 echo "esp: $esp"
278 echo "gerp: $gerp"
279 echo "cosmic: $cosmic"
280 echo "outfile: $outfile_all"
281 echo "outinvalid: $outfile_invalid"
282 echo "outfiltered: $outfile_filt"
283 echo "varfile: $varfile"
284 echo "vcf" $vcf
285 echo "chrcol: $chrcol"
286 echo "startcol: $startcol"
287 echo "endcol: $endcol"
288 echo "refcol: $refcol"
289 echo "obscol: $obscol"
290 echo "convertcoords: $convertcoords"
291 echo "vartypecol: $vartypecol"
292 echo "dofilter: $dofilter"
293 echo "threshold_1000g_ALL : $threshold_1000g_ALL"
294 echo "threshold_1000g_AFR : $threshold_1000g_AFR"
295 echo "threshold_1000g_AMR : $threshold_1000g_AMR"
296 echo "threshold_1000g_ASN : $threshold_1000g_ASN"
297 echo "threshold_1000g_EUR : $threshold_1000g_EUR"
298 echo "threshold_ESP6500_ALL: $threshold_ESP6500_ALL"
299 echo "threshold_ESP6500_EA : $threshold_ESP6500_EA"
300 echo "threshold_ESP6500_AA : $threshold_ESP6500_AA"
301
302 fi
303
304
305
306 ############################################
307 #
308 # Annotate Variants
309 #
310 ############################################
311
312 #parse geneanno param
313 refgene="N"
314 knowngene="N"
315 ensgene="N"
316
317 if [[ $geneanno =~ "refSeq" ]]
318 then
319 refgene="Y"
320 fi
321 if [[ $geneanno =~ "knowngene" ]]
322 then
323 knowngene="Y"
324 fi
325 if [[ $geneanno =~ "ensgene" ]]
326 then
327 ensgene="Y"
328 fi
329 if [ $hgvs == "N" ]
330 then
331 hgvs=""
332 fi
333
334 #parse verdbsnp/1000g/esp strings
335 dbsnpstr=${verdbsnp//,/ }
336 filt_dbsnpstr=${filt_dbsnp//,/ }
337 g1000str=${ver1000g//,/ }
338 espstr=${esp//,/ }
339
340 if [ $test == "Y" ]
341 then
342 echo "annotate dbsnp: $dbsnpstr"
343 echo "annotate esp: $espstr"
344 echo "filter dbsnp: $filt_dbsnpstr"
345 fi
346
347 mutationtaster="N"
348 avsift="N"
349 lrt="N"
350 polyphen2="N"
351 phylop="N"
352 ljbsift="N"
353
354 #parse old impactscores param (obsolete)
355 if [[ $impactscores =~ "mutationtaster" ]]
356 then
357 mutationtaster="Y"
358 fi
359 if [[ $impactscores =~ "sift" ]]
360 then
361 avsift="Y"
362 fi
363 if [[ $impactscores =~ "lrt" ]]
364 then
365 lrt="Y"
366 fi
367 if [[ $impactscores =~ "ljbsift" ]]
368 then
369 ljbsift="Y"
370 fi
371 if [[ $impactscores =~ "ljb2sift" ]]
372 then
373 ljb2sift="Y"
374 fi
375 if [[ $impactscores =~ "pp2" ]]
376 then
377 polyphen2="Y"
378 fi
379 if [[ $impactscores =~ "phylop" ]]
380 then
381 phylop="Y"
382 fi
383
384 if [[ $varfile == "Y" ]]
385 then
386 convertcoords="Y"
387 fi
388
389 #ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores
390
391 ljb2_sift="N"
392 ljb2_pp2hdiv="N"
393 ljb2_pp2hvar="N"
394 ljb2_lrt="N"
395 ljb2_mt="N"
396 ljb2_ma="N"
397 ljb2_fathmm="N"
398 ljb2_gerp="N"
399 ljb2_phylop="N"
400 ljb2_siphy="N"
401
402 # parse ljb2 newimpactscores param
403 # ljb2_sift, ljb2_pp2hdiv, ljb2_pp2hvar, ljb2_lrt, ljb2_mt, ljb2_ma, ljb2_fathmm, ljb2_gerp++, ljb2_phylop, ljb2_siphy
404 if [[ $newimpactscores =~ "ljb2_sift" ]]
405 then
406 ljb2_sift="Y"
407 fi
408 if [[ $newimpactscores =~ "ljb2_pp2hdiv" ]]
409 then
410 ljb2_pp2hdiv="Y"
411 fi
412 if [[ $newimpactscores =~ "ljb2_pp2hvar" ]]
413 then
414 ljb2_pp2hvar="Y"
415 fi
416 if [[ $newimpactscores =~ "ljb2_lrt" ]]
417 then
418 ljb2_lrt="Y"
419 fi
420 if [[ $newimpactscores =~ "ljb2_mt" ]]
421 then
422 ljb2_mt="Y"
423 fi
424 if [[ $newimpactscores =~ "ljb2_ma" ]]
425 then
426 ljb2_ma="Y"
427 fi
428 if [[ $newimpactscores =~ "ljb2_fathmm" ]]
429 then
430 ljb2_fathmm="Y"
431 fi
432 if [[ $newimpactscores =~ "ljb2_gerp" ]]
433 then
434 ljb2_gerp="Y"
435 fi
436 if [[ $newimpactscores =~ "ljb2_phylop" ]]
437 then
438 ljb2_phylop="Y"
439 fi
440 if [[ $newimpactscores =~ "ljb2_siphy" ]]
441 then
442 ljb2_siphy="Y"
443 fi
444
445 if [ $otherinfo == "N" ]
446 then
447 otherinfo=""
448 fi
449
450
451 #column header names we will be adding
452 # ESP 6500
453 esp6500si_colheader_ALL="ESP6500si_ALL"
454 esp6500si_colheader_EA="ESP6500si_EA"
455 esp6500si_colheader_AA="ESP6500si_AA"
456 esp6500_colheader_ALL="ESP6500_ALL"
457 esp6500_colheader_EA="ESP6500_EA"
458 esp6500_colheader_AA="ESP6500_AA"
459 esp5400si_colheader_ALL="ESP5400si_ALL"
460 esp5400si_colheader_EA="ESP5400si_EA"
461 esp5400si_colheader_AA="ESP5400si_AA"
462 esp5400_colheader_ALL="ESP5400_ALL"
463 esp5400_colheader_EA="ESP5400_EA"
464 esp5400_colheader_AA="ESP5400_AA"
465
466
467 # cg46 cg69
468 cg46_colheader="CG_46_genomes"
469 cg69_colheader="CG_69_genomes"
470
471 cp $infile originalfile
472 #run annovar or filter only?
473 if [ $dorunannovar == "Y" ]
474 then
475
476
477 ####################################
478 #
479 # PREPARE INPUT FILE
480 #
481 ####################################
482
483 echo "converting input file"
484 vcfheader=""
485 if [ $vcf == "Y" ] #if CG varfile, convert
486 then
487 # convert vcf to annovarinput
488 $scriptsdir/convert2annovar.pl --format vcf4old --allallele --includeinfo --outfile annovarinput $infile 2>&1
489
490 #construct header line from vcf file
491 cat $infile | grep "#CHROM" > additionalcols
492 sed -i 's/#//g' additionalcols
493 vcfheader="\t`cat additionalcols`"
494 echo "vcfheader:$vcfheader"
495 echo -e "chromosome\tbegin\tend\treference\tobserved\t`cat additionalcols`" > originalfile
496 cat annovarinput >> originalfile
497
498 chrcol=1
499 startcol=2
500 endcol=3
501 refcol=4
502 obscol=5
503
504
505 elif [ $varfile == "Y" ] #if CG varfile, convert
506 then
507 # convert varfile
508 $scriptsdir/convert2annovar.pl --format cg --outfile annovarinput $infile 2>&1
509 echo -e "chromosome\tbegin\tend\treference\talleleSeq\tvarType\thaplotype" > originalfile
510 cat annovarinput | cut -f1-6,8 >> originalfile
511 cat annovarinput | cut -f1-5 >> annovarinput2
512 mv annovarinput2 annovarinput
513
514 chrcol=1
515 startcol=2
516 endcol=3
517 refcol=4
518 obscol=5
519
520 elif [ $convertcoords == "Y" ] # if CG-coordinates, convert
521 then
522 #echo "rearranging columns and converting coordinates"
523 awk 'BEGIN{
524 FS="\t";
525 OFS="\t";
526 }{
527 if(FNR>1) {
528 gsub(/chr/,"",$"'"${chrcol}"'")
529 if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 };
530 if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" };
531 if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" };
532 if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 };
533
534 printf("%s\t%s\t%s\t%s\t%s\n" ,$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'");
535 }
536 }
537 END{
538 }' $infile > annovarinput
539
540 #remove any "chr" prefixes
541 #sed -i '2,$s/chr//g' annovarinput
542
543 awk 'BEGIN{
544 FS="\t";
545 OFS="\t";
546 }{
547
548 if(FNR>=1) {
549 gsub(/chr/,"",$"'"${chrcol}"'")
550 if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" += 1 };
551 if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "-" };
552 if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" +=1; $"'"${obscol}"'" = "-" };
553 if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" += 1 };
554
555 print $0
556 }
557 }
558 END{
559 }' $infile > originalfile
560
561 #remove any "chr" prefixes
562 #sed -i '2,$s/chr//g' originalfile
563 sed -i 's/omosome/chromosome/g' originalfile
564
565
566 else #only rearrange columns if already 1-based coordinates
567 echo "rearranging columns "
568 awk 'BEGIN{
569 FS="\t";
570 OFS="\t";
571 }{
572 if(FNR>1) {
573 printf("%s\t%s\t%s\t%s\t%s\n",$"'"${chrcol}"'",$"'"${startcol}"'",$"'"${endcol}"'",$"'"${refcol}"'",$"'"${obscol}"'");
574 }
575 }
576 END{
577 }' $infile > annovarinput
578
579 #remove any "chr" prefixes
580 sed -i '2,$s/chr//g' annovarinput
581 sed '2,$s/chr//g' $infile > originalfile
582 sed -i 's/omosome/chromosome/g' originalfile
583 fi
584
585 echo "...finished conversion"
586
587
588
589
590 ####################################
591 #
592 # RUN ANNOVAR COMMANDS
593 #
594 ####################################
595
596
597
598 ###### gene-based annotation #######
599
600 # RefSeq Gene
601 if [ $refgene == "Y" ]
602 then
603 echo -e "\nrefSeq gene"
604 $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype gene ${hgvs} annovarinput $humandb 2>&1
605
606 annovarout=annovarinput.variant_function
607 sed -i '1i\RefSeq_Func\tRefSeq_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
608 joinresults originalfile $annovarout 3 4 5 6 7 B.RefSeq_Func,B.RefSeq_Gene
609
610 annovarout=annovarinput.exonic_variant_function
611 sed -i '1i\linenum\tRefSeq_ExonicFunc\tRefSeq_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
612 joinresults originalfile $annovarout 4 5 6 7 8 B.RefSeq_ExonicFunc,B.RefSeq_AAChange
613 fi
614
615
616 # UCSC KnownGene
617 if [ $knowngene == "Y" ]
618 then
619 echo -e "\nUCSC known gene"
620 $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype knowngene annovarinput $humandb 2>&1
621
622 annovarout=annovarinput.variant_function
623 sed -i '1i\UCSCKnownGene_Func\tUCSCKnownGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
624 joinresults originalfile $annovarout 3 4 5 6 7 B.UCSCKnownGene_Func,B.UCSCKnownGene_Gene
625
626 annovarout=annovarinput.exonic_variant_function
627 sed -i '1i\linenum\tUCSCKnownGene_ExonicFunc\tUCSCKnownGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
628 joinresults originalfile $annovarout 4 5 6 7 8 B.UCSCKnownGene_ExonicFunc,B.UCSCKnownGene_AAChange
629 fi
630
631
632 # Emsembl Gene
633 if [ $ensgene == "Y" ]
634 then
635 echo -e "\nEnsembl gene"
636 $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype ensgene annovarinput $humandb 2>&1
637
638 annovarout=annovarinput.variant_function
639 sed -i '1i\EnsemblGene_Func\tEnsemblGene_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
640 joinresults originalfile $annovarout 3 4 5 6 7 B.EnsemblGene_Func,B.EnsemblGene_Gene
641
642 annovarout=annovarinput.exonic_variant_function
643 sed -i '1i\linenum\tEnsemblGene_ExonicFunc\tEnsemblGene_AAChange\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
644 joinresults originalfile $annovarout 4 5 6 7 8 B.EnsemblGene_ExonicFunc,B.EnsemblGene_AAChange
645 fi
646
647
648
649 ###### region-based annotation #######
650
651
652 # Transcription Factor Binding Sites Annotation
653 if [ $mce == "Y" ]
654 then
655 echo -e "\nMost Conserved Elements"
656
657 if [ $buildver == "hg18" ]
658 then
659 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce44way annovarinput $humandb 2>&1
660 annovarout=annovarinput.${buildver}_phastConsElements44way
661 sed -i '1i\db\tphastConsElements44way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
662 joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements44way
663
664 else #hg19
665 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype mce46way annovarinput $humandb 2>&1
666 annovarout=annovarinput.${buildver}_phastConsElements46way
667 sed -i '1i\db\tphastConsElements46way\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
668 joinresults originalfile $annovarout 3 4 5 6 7 B.phastConsElements46way
669 fi
670
671 fi
672
673
674
675 # Transcription Factor Binding Sites Annotation
676 if [ $tfbs == "Y" ]
677 then
678 echo -e "\nTranscription Factor Binding Site Annotation"
679 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype tfbs annovarinput $humandb 2>&1
680
681 # arguments: originalfile, resultfile,chrcol,startcol,endcol,refcol,obscol,selectcolumns
682 annovarout=annovarinput.${buildver}_tfbsConsSites
683 sed -i '1i\db\tTFBS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
684 joinresults originalfile $annovarout 3 4 5 6 7 B.TFBS
685 fi
686
687
688
689 # Identify cytogenetic band for genetic variants
690 if [ $cytoband == "Y" ]
691 then
692 echo -e "\nCytogenic band Annotation"
693 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype band annovarinput $humandb 2>&1
694
695 annovarout=annovarinput.${buildver}_cytoBand
696 sed -i '1i\db\tBand\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
697 joinresults originalfile $annovarout 3 4 5 6 7 B.Band
698 fi
699
700
701 # Identify variants located in segmental duplications
702 if [ $segdup == "Y" ]
703 then
704 echo -e "\nSegmental Duplications Annotation"
705 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype segdup annovarinput $humandb 2>&1
706
707 annovarout=annovarinput.${buildver}_genomicSuperDups
708 sed -i '1i\db\tSegDup\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
709 joinresults originalfile $annovarout 3 4 5 6 7 B.SegDup
710 fi
711
712
713
714 # Identify previously reported structural variants in DGV
715 if [ $dgv == "Y" ]
716 then
717 echo -e "\nDGV Annotation"
718 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype dgvMerged annovarinput $humandb 2>&1
719
720 annovarout=annovarinput.${buildver}_dgvMerged
721 sed -i '1i\db\tDGV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
722 joinresults originalfile $annovarout 3 4 5 6 7 B.DGV
723 fi
724
725
726 # Identify variants reported in previously published GWAS studies
727 if [ $gwas == "Y" ]
728 then
729 echo -e "\nGWAS Annotation"
730 $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype gwascatalog annovarinput $humandb 2>&1
731
732 annovarout=annovarinput.${buildver}_gwasCatalog
733 sed -i '1i\db\tGWAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
734 joinresults originalfile $annovarout 3 4 5 6 7 B.GWAS
735 fi
736
737
738
739
740 ###### filter-based annotation #######
741
742 #dbSNP
743 for version in $dbsnpstr
744 do
745 if [ $version == "None" ]
746 then
747 break
748 fi
749 echo -e "\ndbSNP region Annotation, version: $version"
750 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ${version} annovarinput $humandb 2>&1
751
752 annovarout=annovarinput.${buildver}_${version}_dropped
753 sed -i '1i\db\tdb'${version}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
754 joinresults originalfile $annovarout 3 4 5 6 7 B.db${version}
755
756
757 done
758
759
760
761 #1000 Genomes
762
763 if [ $ver1000g != "None" ]
764 then
765
766 for version in $g1000str
767 do
768 #column headers
769 g1000_colheader_ALL="${version}_ALL"
770 g1000_colheader_AFR="${version}_AFR"
771 g1000_colheader_AMR="${version}_AMR"
772 g1000_colheader_ASN="${version}_ASN"
773 g1000_colheader_EUR="${version}_EUR"
774 g1000_colheader_EAS="${version}_EAS"
775 g1000_colheader_SAS="${version}_SAS"
776 g1000_colheader_CEU="${version}_CEU"
777 g1000_colheader_YRI="${version}_YRI"
778 g1000_colheader_JPTCHB="${version}_JPTCHB"
779
780 doALL="N"
781 doAMR="N"
782 doAFR="N"
783 doASN="N"
784 doEAS="N"
785 doSAS="N"
786 doEUR="N"
787 doCEU="N"
788 doYRI="N"
789 doJPTCHB="N"
790
791
792 if [ $version == "1000g2012apr" ]
793 then
794 fileID="2012_04"
795 doALL="Y"
796 if [ $buildver == "hg19" ]
797 then
798 doAMR="Y"
799 doAFR="Y"
800 doASN="Y"
801 doEUR="Y"
802 fi
803 elif [ $version == "1000g2014oct" ]
804 then
805 fileID="2014_10"
806 doALL="Y"
807 doAMR="Y"
808 doAFR="Y"
809 doEUR="Y"
810 doEAS="Y"
811 if [ $buildver == "hg19" ]
812 then
813 doSAS="Y"
814 fi
815 elif [[ $version == "1000g2012feb" && $buildver == "hg19" ]]
816 then
817 fileID="2012_02"
818 doALL="Y"
819 elif [[ $version == "1000g2010nov" && $buildver == "hg19" ]]
820 then
821 fileID="2010_11"
822 doALL="Y"
823 elif [[ $version == "1000g2010jul" && $buildver == "hg18" ]]
824 then
825 fileID="2010_07"
826 doALL="N"
827 doCEU="Y"
828 doYRI="Y"
829 doJPTCHB="Y"
830 else
831 echo "unrecognized 1000g version, skipping"
832 fi
833
834 #ALL
835 if [ $doALL == "Y" ]
836 then
837 echo -e "\n1000Genomes ALL"
838 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_all" annovarinput $humandb 2>&1
839
840 annovarout=annovarinput.${buildver}_ALL.sites.${fileID}_dropped
841 sed -i '1i\db\t'$g1000_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
842 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ALL
843 fi
844
845 # AFR
846 if [ $doAFR == "Y" ]
847 then
848 echo -e "\n1000Genomes AFR"
849 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_afr" annovarinput $humandb 2>&1
850
851 annovarout=annovarinput.${buildver}_AFR.sites.${fileID}_dropped
852 sed -i '1i\db\t'$g1000_colheader_AFR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
853 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AFR
854 fi
855
856
857 # AMR
858 if [ $doAMR == "Y" ]
859 then
860 echo -e "\n1000Genomes AMR"
861 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_amr" annovarinput $humandb 2>&1
862
863 annovarout=annovarinput.${buildver}_AMR.sites.${fileID}_dropped
864 sed -i '1i\db\t'$g1000_colheader_AMR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
865 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_AMR
866 fi
867
868 # ASN
869 if [ $doASN == "Y" ]
870 then
871 echo -e "\n1000Genomes ASN"
872 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_asn" annovarinput $humandb 2>&1
873
874 annovarout=annovarinput.${buildver}_ASN.sites.${fileID}_dropped
875 sed -i '1i\db\t'$g1000_colheader_ASN'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
876 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_ASN
877 fi
878
879 # EAS
880 if [ $doEAS == "Y" ]
881 then
882 echo -e "\n1000Genomes EAS"
883 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eas" annovarinput $humandb 2>&1
884
885 annovarout=annovarinput.${buildver}_EAS.sites.${fileID}_dropped
886 sed -i '1i\db\t'$g1000_colheader_EAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
887 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EAS
888 fi
889
890 # SAS
891 if [ $doSAS == "Y" ]
892 then
893 echo -e "\n1000Genomes SAS"
894 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_sas" annovarinput $humandb 2>&1
895
896 annovarout=annovarinput.${buildver}_SAS.sites.${fileID}_dropped
897 sed -i '1i\db\t'$g1000_colheader_SAS'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
898 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_SAS
899 fi
900
901 # EUR
902 if [ $doEUR == "Y" ]
903 then
904 echo -e "\n1000Genomes EUR"
905 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_eur" annovarinput $humandb 2>&1
906
907 annovarout=annovarinput.${buildver}_EUR.sites.${fileID}_dropped
908 sed -i '1i\db\t'$g1000_colheader_EUR'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
909 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_EUR
910 fi
911
912 # CEU
913 if [ $doCEU == "Y" ]
914 then
915 echo -e "\n1000Genomes CEU"
916 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_ceu" annovarinput $humandb 2>&1
917
918 annovarout=annovarinput.${buildver}_CEU.sites.${fileID}_dropped
919 sed -i '1i\db\t'$g1000_colheader_CEU'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
920 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_CEU
921 fi
922
923 # YRI
924 if [ $doYRI == "Y" ]
925 then
926 echo -e "\n1000Genomes YRI"
927 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_yri" annovarinput $humandb 2>&1
928
929 annovarout=annovarinput.${buildver}_YRI.sites.${fileID}_dropped
930 sed -i '1i\db\t'$g1000_colheader_YRI'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
931 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_YRI
932
933
934 fi
935
936 #JPTCHB
937 if [ $doJPTCHB == "Y" ]
938 then
939 echo -e "\n1000Genomes JPTCHB"
940 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype "${version}_jptchb" annovarinput $humandb 2>&1
941
942 annovarout=annovarinput.${buildver}_JPTCHB.sites.${fileID}_dropped
943 sed -i '1i\db\t'$g1000_colheader_JPTCHB'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
944 joinresults originalfile $annovarout 3 4 5 6 7 B.$g1000_colheader_JPTCHB
945 fi
946
947 done
948 fi
949
950
951
952
953 #### IMPACT SCORE ANNOTATIONS
954
955
956 if [ $ljb2_sift == "Y" ]
957 then
958 echo -e "\nLJB2 SIFT Annotation"
959 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_sift annovarinput $humandb 2>&1
960
961 annovarout=annovarinput.${buildver}_ljb2_sift_dropped
962 sed -i '1i\db\tLJB2_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
963 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SIFT
964 fi
965
966 if [ $ljb2_pp2hdiv == "Y" ]
967 then
968 echo -e "\nLJB2 pp2hdiv Annotation"
969 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hdiv annovarinput $humandb 2>&1
970
971 annovarout=annovarinput.${buildver}_ljb2_pp2hdiv_dropped
972 sed -i '1i\db\tLJB2_PolyPhen2_HDIV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
973 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HDIV
974 fi
975
976 if [ $ljb2_pp2hvar == "Y" ]
977 then
978 echo -e "\nLJB2 pp2hvar Annotation"
979 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hvar annovarinput $humandb 2>&1
980
981 annovarout=annovarinput.${buildver}_ljb2_pp2hvar_dropped
982
983 head $annovarout
984 sed -i '1i\db\tLJB2_PolyPhen2_HVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
985 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HVAR
986 fi
987
988 if [ $ljb2_lrt == "Y" ]
989 then
990 echo -e "\nLJB2 LRT Annotation"
991 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_lrt annovarinput $humandb 2>&1
992
993 annovarout=annovarinput.${buildver}_ljb2_lrt_dropped
994 sed -i '1i\db\tLJB2_LRT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
995 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_LRT
996 fi
997
998 if [ $ljb2_mt == "Y" ]
999 then
1000 echo -e "\nLJB2 mutationtaster Annotation"
1001 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_mt annovarinput $humandb 2>&1
1002
1003 annovarout=annovarinput.${buildver}_ljb2_mt_dropped
1004 sed -i '1i\db\tLJB2_MutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1005 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationTaster
1006 fi
1007
1008 if [ $ljb2_ma == "Y" ]
1009 then
1010 echo -e "\nLJB2 mutationassessor Annotation"
1011 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_ma annovarinput $humandb 2>&1
1012
1013 annovarout=annovarinput.${buildver}_ljb2_ma_dropped
1014 sed -i '1i\db\tLJB2_MutationAssessor\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1015 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationAssessor
1016 fi
1017
1018 if [ $ljb2_fathmm == "Y" ]
1019 then
1020 echo -e "\nLJB2 FATHMM Annotation"
1021 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_fathmm annovarinput $humandb 2>&1
1022
1023 annovarout=annovarinput.${buildver}_ljb2_fathmm_dropped
1024 sed -i '1i\db\tLJB2_FATHMM\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1025 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_FATHMM
1026 fi
1027
1028 if [ $ljb2_gerp == "Y" ]
1029 then
1030 echo -e "\nLJB2 GERP++ Annotation"
1031 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_gerp++ annovarinput $humandb 2>&1
1032
1033 annovarout=annovarinput.${buildver}_ljb2_gerp++_dropped
1034 sed -i '1i\db\tLJB2_GERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1035 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_GERP++
1036 fi
1037
1038 if [ $ljb2_phylop == "Y" ]
1039 then
1040 echo -e "\nLJB2 PhyloP Annotation"
1041 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_phylop annovarinput $humandb 2>&1
1042
1043 annovarout=annovarinput.${buildver}_ljb2_phylop_dropped
1044 sed -i '1i\db\tLJB2_PhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1045 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PhyloP
1046 fi
1047
1048 if [ $ljb2_siphy == "Y" ]
1049 then
1050 echo -e "\nLJB2 SiPhy Annotation"
1051 $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_siphy annovarinput $humandb 2>&1
1052
1053 annovarout=annovarinput.${buildver}_ljb2_siphy_dropped
1054 sed -i '1i\db\tLJB2_SiPhy\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1055 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SiPhy
1056 fi
1057
1058
1059
1060 ### OLD IMPACT SCORE ANNOTATIONS
1061
1062 # SIFT
1063 if [ $avsift == "Y" ]
1064 then
1065 echo -e "\nSIFT Annotation"
1066 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype avsift annovarinput $humandb 2>&1
1067
1068 annovarout=annovarinput.${buildver}_avsift_dropped
1069 sed -i '1i\db\tAVSIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1070 joinresults originalfile $annovarout 3 4 5 6 7 B.AVSIFT
1071 fi
1072
1073 #ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores
1074 # SIFT2
1075 if [ $ljbsift == "Y" ]
1076 then
1077 echo -e "\nLJB SIFT Annotation"
1078 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_sift annovarinput $humandb 2>&1
1079
1080 annovarout=annovarinput.${buildver}_ljb_sift_dropped
1081 sed -i '1i\db\tLJB_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1082 joinresults originalfile $annovarout 3 4 5 6 7 B.LJB_SIFT
1083 fi
1084
1085
1086 # PolyPhen2
1087 if [ $polyphen2 == "Y" ]
1088 then
1089 echo -e "\nPolyPhen Annotation"
1090 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_pp2 annovarinput $humandb 2>&1
1091
1092 annovarout=annovarinput.${buildver}_ljb_pp2_dropped
1093 sed -i '1i\db\tPolyPhen2\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1094 joinresults originalfile $annovarout 3 4 5 6 7 B.PolyPhen2
1095 fi
1096
1097
1098 # MutationTaster
1099 if [ $mutationtaster == "Y" ]
1100 then
1101 echo -e "\nMutationTaster Annotation"
1102 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_mt annovarinput $humandb 2>&1
1103
1104 annovarout=annovarinput.${buildver}_ljb_mt_dropped
1105 sed -i '1i\db\tMutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1106 joinresults originalfile $annovarout 3 4 5 6 7 B.MutationTaster
1107 fi
1108
1109
1110 # LRT
1111 if [ $lrt == "Y" ]
1112 then
1113 echo -e "\nLRT Annotation"
1114 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_lrt annovarinput $humandb 2>&1
1115
1116 annovarout=annovarinput.${buildver}_ljb_lrt_dropped
1117 sed -i '1i\db\tLikelihoodRatioTestScore\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1118 joinresults originalfile $annovarout 3 4 5 6 7 B.LikelihoodRatioTestScore
1119 fi
1120
1121 # PhyloP
1122 if [ $phylop == "Y" ]
1123 then
1124 echo -e "\nPhyloP Annotation"
1125 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_phylop annovarinput $humandb 2>&1
1126
1127 annovarout=annovarinput.${buildver}_ljb_phylop_dropped
1128 sed -i '1i\db\tPhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1129 joinresults originalfile $annovarout 3 4 5 6 7 B.PhyloP
1130 fi
1131
1132
1133 ### ESP Exome Variant Server
1134 if [ $esp != "None" ]
1135 then
1136 echo -e "\nESP Annotation"
1137 for version in $espstr
1138 do
1139 echo "version: $version"
1140 # 6500si ALL
1141 if [ $version == "esp6500si_all" ]
1142 then
1143 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_all annovarinput $humandb 2>&1
1144
1145 annovarout=annovarinput.${buildver}_esp6500si_all_dropped
1146 sed -i '1i\db\t'$esp6500si_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1147 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_ALL
1148 fi
1149
1150
1151 # 6500si European American
1152 if [ $version == "esp6500si_ea" ]
1153 then
1154 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_ea annovarinput $humandb 2>&1
1155
1156 annovarout=annovarinput.${buildver}_esp6500si_ea_dropped
1157 sed -i '1i\db\t'$esp6500si_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout
1158 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_EA
1159 fi
1160
1161 # 6500si African Americans
1162 if [ $version == "esp6500si_aa" ]
1163 then
1164 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_aa annovarinput $humandb 2>&1
1165
1166 annovarout=annovarinput.${buildver}_esp6500si_aa_dropped
1167 sed -i '1i\db\t'$esp6500si_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout
1168 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500si_colheader_AA
1169 fi
1170
1171
1172 # 6500 ALL
1173 if [ $version == "esp6500_all" ]
1174 then
1175 ls
1176 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_all annovarinput $humandb 2>&1
1177
1178 annovarout=annovarinput.${buildver}_esp6500_all_dropped
1179 sed -i '1i\db\t'$esp6500_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'" ' $annovarout
1180 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_ALL
1181 fi
1182
1183
1184 # 6500 European American
1185 if [ $version == "esp6500_ea" ]
1186 then
1187 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_ea annovarinput $humandb 2>&1
1188 annovarout=annovarinput.${buildver}_esp6500_ea_dropped
1189 sed -i '1i\db\t'$esp6500_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1190 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_EA
1191 fi
1192
1193 # 6500 African Americans
1194 if [ $version == "esp6500_aa" ]
1195 then
1196 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500_aa annovarinput $humandb 2>&1
1197
1198 annovarout=annovarinput.${buildver}_esp6500_aa_dropped
1199 sed -i '1i\db\t'$esp6500_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1200 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_AA
1201 fi
1202
1203
1204 # 5400 ALL
1205 if [ $version == "esp5400_all" ]
1206 then
1207 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_all annovarinput $humandb 2>&1
1208
1209 annovarout=annovarinput.${buildver}_esp5400_all_dropped
1210 sed -i '1i\db\t'$esp5400_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1211 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_ALL
1212 fi
1213
1214
1215 # 5400 European American
1216 if [ $version == "esp5400_ea" ]
1217 then
1218 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_ea annovarinput $humandb 2>&1
1219
1220 annovarout=annovarinput.${buildver}_esp5400_ea_dropped
1221 sed -i '1i\db\t'$esp5400_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1222 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_EA
1223 fi
1224
1225 # 5400 African Americans
1226 if [ $version == "esp5400_aa" ]
1227 then
1228 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp5400_aa annovarinput $humandb 2>&1
1229
1230 annovarout=annovarinput.${buildver}_esp5400_aa_dropped
1231 sed -i '1i\db\t'$esp5400_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1232 joinresults originalfile $annovarout 3 4 5 6 7 B.$esp5400_colheader_AA
1233 fi
1234
1235 done
1236 fi
1237
1238
1239 #exac03
1240 if [ $exac03 == "Y" ]
1241 then
1242 echo -e "\nexac03 Annotation"
1243 $scriptsdir/annotate_variation.pl --filter -otherinfo --buildver $buildver $otherinfo -dbtype exac03 annovarinput $humandb 2>&1
1244
1245 #annovarout.tmp=annovarinput.${buildver}_exac03_dropped
1246
1247 # split allelefrequency column into several columns, one per population
1248 awk 'BEGIN{FS="\t"
1249 OFS="\t"
1250 }{
1251 gsub(",","\t",$2)
1252 print $0
1253 }END{}' annovarinput.${buildver}_exac03_dropped > $annovarout
1254
1255 head $annovarout
1256 sed -i '1i\db\tExAC_Freq\tExAC_AFR\tExAC_AMR\tExAC_EAS\tExAC_FIN\tExAC_NFE\tExAC_OTH\tExAC_SAS\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1257 joinresults originalfile $annovarout 10 11 12 13 14 B.ExAC_Freq,B.ExAC_AFR,B.ExAC_AMR,B.ExAC_EAS,B.ExAC_FIN,B.ExAC_NFE,B.ExAC_OTH,B.ExAC_SAS
1258 fi
1259
1260 #GERP++
1261 if [ $gerp == "Y" ]
1262 then
1263 echo -e "\nGERP++ Annotation"
1264 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype gerp++gt2 annovarinput $humandb 2>&1
1265
1266 annovarout="annovarinput.${buildver}_gerp++gt2_dropped"
1267 sed -i '1i\db\tGERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1268 joinresults originalfile $annovarout 3 4 5 6 7 B.GERP++
1269 fi
1270
1271
1272 #COSMIC
1273 if [[ $cosmic61 == "Y" && $buildver == "hg19" ]]
1274 then
1275 echo -e "\nCOSMIC61 Annotation"
1276 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic61 annovarinput $humandb 2>&1
1277
1278 annovarout="annovarinput.${buildver}_cosmic61_dropped"
1279 sed -i '1i\db\tCOSMIC61\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1280 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC61
1281
1282 fi
1283
1284 if [[ $cosmic63 == "Y" && $buildver == "hg19" ]]
1285 then
1286 echo -e "\nCOSMIC63 Annotation"
1287 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic63 annovarinput $humandb 2>&1
1288
1289 annovarout="annovarinput.${buildver}_cosmic63_dropped"
1290 sed -i '1i\db\tCOSMIC63\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1291 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC63
1292
1293 fi
1294
1295 if [[ $cosmic64 == "Y" && $buildver == "hg19" ]]
1296 then
1297 echo -e "\nCOSMIC64 Annotation"
1298 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic64 annovarinput $humandb 2>&1
1299
1300 annovarout="annovarinput.${buildver}_cosmic64_dropped"
1301 sed -i '1i\db\tCOSMIC64\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1302 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC64
1303
1304 fi
1305
1306 if [[ $cosmic65 == "Y" && $buildver == "hg19" ]]
1307 then
1308 echo -e "\nCOSMIC65 Annotation"
1309 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic65 annovarinput $humandb 2>&1
1310
1311 annovarout="annovarinput.${buildver}_cosmic65_dropped"
1312 sed -i '1i\db\tCOSMIC65\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1313 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC65
1314
1315 fi
1316
1317 if [[ $cosmic67 == "Y" && $buildver == "hg19" ]]
1318 then
1319 echo -e "\nCOSMIC67 Annotation"
1320 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic67 annovarinput $humandb 2>&1
1321
1322 annovarout="annovarinput.${buildver}_cosmic67_dropped"
1323 sed -i '1i\db\tCOSMIC67\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1324 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC67
1325
1326 fi
1327
1328 if [[ $cosmic68 == "Y" && $buildver == "hg19" ]]
1329 then
1330 echo -e "\nCOSMIC68 Annotation"
1331 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic68 annovarinput $humandb 2>&1
1332
1333 annovarout="annovarinput.${buildver}_cosmic68_dropped"
1334 sed -i '1i\db\tCOSMIC68\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1335 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC68
1336
1337 fi
1338
1339 if [[ $cosmic70 == "Y" && $buildver == "hg19" ]]
1340 then
1341 echo -e "\nCOSMIC70 Annotation"
1342 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic70 annovarinput $humandb 2>&1
1343
1344 annovarout="annovarinput.${buildver}_cosmic70_dropped"
1345 sed -i '1i\db\tCOSMIC70\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1346 joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC70
1347
1348 fi
1349
1350 if [[ $clinvar == "Y" && $buildver == "hg19" ]]
1351 then
1352 echo -e "\nCLINVAR Annotation"
1353 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype clinvar_20140211 annovarinput $humandb 2>&1
1354
1355 annovarout="annovarinput.${buildver}_clinvar_20140211_dropped"
1356 sed -i '1i\db\tCLINVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1357 joinresults originalfile $annovarout 3 4 5 6 7 B.CLINVAR
1358
1359 fi
1360
1361 if [[ $nci60 == "Y" && $buildver == "hg19" ]]
1362 then
1363 echo -e "\nNCI60 Annotation"
1364 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype nci60 annovarinput $humandb 2>&1
1365
1366 annovarout="annovarinput.${buildver}_nci60_dropped"
1367 sed -i '1i\db\tNCI60\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1368 joinresults originalfile $annovarout 3 4 5 6 7 B.NCI60
1369
1370 fi
1371
1372 #cg46
1373 if [[ $cg46 == "Y" ]]
1374 then
1375 echo -e "\nCG 46 genomes Annotation"
1376 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg46 annovarinput $humandb 2>&1
1377
1378 annovarout="annovarinput.${buildver}_cg46_dropped"
1379 sed -i '1i\db\t'${cg46_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1380 joinresults originalfile $annovarout 3 4 5 6 7 B.${cg46_colheader}
1381
1382 fi
1383
1384
1385 #cg69
1386 if [[ $cg69 == "Y" ]]
1387 then
1388 echo -e "\nCG 69 genomes Annotation"
1389 $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg69 annovarinput $humandb 2>&1
1390
1391 annovarout="annovarinput.${buildver}_cg69_dropped"
1392 sed -i '1i\db\t'${cg69_colheader}'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout
1393 joinresults originalfile $annovarout 3 4 5 6 7 B.${cg69_colheader}
1394
1395 fi
1396
1397
1398
1399 if [ $convertcoords == "Y" ]
1400 then
1401 echo "converting back coordinates"
1402 awk 'BEGIN{
1403 FS="\t";
1404 OFS="\t";
1405 }{
1406 if (FNR==1)
1407 print $0
1408 if(FNR>1) {
1409 $"'"${chrcol}"'" = "chr"$"'"${chrcol}"'"
1410 if( $"'"${vartypecol}"'" == "snp" ){ $"'"${startcol}"'" -= 1 };
1411 if( $"'"${vartypecol}"'" == "ins" ){ $"'"${refcol}"'" = "" };
1412 if( $"'"${vartypecol}"'" == "del" ){ $"'"${startcol}"'" -=1; $"'"${obscol}"'" = "" };
1413 if( $"'"${vartypecol}"'" == "sub" ){ $"'"${startcol}"'" -= 1 };
1414 print $0
1415
1416 }
1417 }
1418 END{
1419 }' originalfile > originalfile_coords
1420 else
1421 mv originalfile originalfile_coords
1422 fi
1423
1424 #restore "chr" prefix?
1425
1426 #move to outputfile
1427 if [ ! -s annovarinput.invalid_input ]
1428 then
1429 echo "Congrats, your input file contained no invalid lines!" > annovarinput.invalid_input
1430 fi
1431
1432 cp originalfile_coords $outfile_all
1433 cp annovarinput.invalid_input $outfile_invalid 2>&1
1434
1435 sed -i 's/chrchr/chr/g' $outfile_all
1436 sed -i 's/chrchr/chr/g' $outfile_invalid
1437
1438 fi #if $dorunannovar
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461