changeset 1:028f435b6cfb draft default tip

Uploaded
author rdaveau
date Fri, 03 Aug 2012 05:50:41 -0400
parents f753b30013e6
children
files gfapts/README gfapts/gfap_r1.0_allvar_genomic_annotater.xml gfapts/gfap_r1.0_cdsvar_functional_annotater.xml gfapts/gfap_r1.0_known_var_finder.pl gfapts/gfap_r1.0_known_var_finder.pl~ gfapts/gfap_r1.0_known_var_finder.xml gfapts/gfap_r1.0_samvcf_data_parser.pl gfapts/gfap_r1.0_samvcf_data_parser.pl~ gfapts/gfap_r1.0_samvcf_data_parser.xml
diffstat 4 files changed, 522 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/gfapts/gfap_r1.0_known_var_finder.pl	Fri Jun 29 10:20:55 2012 -0400
+++ b/gfapts/gfap_r1.0_known_var_finder.pl	Fri Aug 03 05:50:41 2012 -0400
@@ -1,12 +1,91 @@
 #!/usr/bin/perl
 
 use strict;
-use lib 'inc/perlmod';
-use ngsutil qw[ :DEFAULT &varscan ];
+#use lib 'inc/perlmod';
+#use ngsutil qw[ :DEFAULT &varscan ];
 use warnings FATAL => qw[ numeric uninitialized ];
 use File::Basename;
 use Getopt::Long;
 
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+#	TEMP include ngsutil.pm
+sub explode_varcall{
+		my $N=0;
+		$_=shift @_ foreach my($POS, $REF, $ALT);
+		$_=$POS foreach my($START, $END);
+		my(@length, @range, @idx, @VAR, @POS);
+		@{$_}=() foreach (\@length, \@range, \@idx, \@VAR, \@POS);
+		push @length, length($_) foreach ($REF, $ALT);
+		@range=sort{ $a<=>$b } @length;
+		if($range[0]==1){
+			if($range[1]!=1){
+				foreach ($REF, $ALT){
+						$_=substr($_, 1);
+						$_=~s/^$/-/;
+					}
+				if($length[0]!=1){
+						$END+=$length[0]-1;
+						$START++;
+					}
+			}
+			push @POS, $START, $END;
+			push @VAR, $REF, $ALT;
+		}else{
+			my @N=();
+			undef $_ foreach my ($i, $VAR);
+			$_-=2 foreach (@length, @range);
+			$_++ foreach ($START, $END);
+			$_=substr($_, 1) foreach ($REF, $ALT);
+			my $indel='-' x ($range[1]-$range[0]);
+			$VAR.=($_>$range[0])?
+				('-'):((substr($REF, $_, 1) ne substr($ALT, $_, 1))?
+					0:1) for 0 .. $range[1];
+			$N++ while $VAR =~ /0/g;
+			if($length[0]<$length[1]){
+				@VAR=($VAR);
+				@N=($N);
+				$N=0;
+				undef($VAR);
+				$VAR.=($_>$range[0])?
+					('-'):((substr($REF, $length[0]-$_, 1) ne substr($ALT, $length[1]-$_, 1))?
+						0:1) for reverse 0 .. $range[1];
+				$N++ while $VAR =~ /0/g;
+				if($N>=$N[0]){ $N=shift(@N); $VAR=shift(@VAR); }
+				else{ $REF=$indel . $REF; }
+			}else{ $ALT.=$indel; }
+			foreach (qw[ 0 \- ]){
+					push @idx, [ $-[0], $+[0]-$-[0] ] while ($VAR =~ /$_+/g);
+				}
+			@{$_}=() foreach (\@VAR, \@POS);
+			foreach my $k (@idx){
+					push @VAR, substr($_, ${$k}[0], ${$k}[1]) || '-' foreach ($REF, $ALT);
+					push @POS, ${$k}[0], sum(@{$k})-1;
+				}
+			$_+=$START foreach @POS;
+			$_=~s/\-+/\-/ foreach @VAR;
+			for($i=0; $i<$#POS; $i+=2){ $POS[$i+1]=$POS[$i] if $VAR[$i] eq '-'; }
+		}
+		return(\@POS, \@VAR);
+	}
+
+sub varscan{
+		$_=shift @_ foreach my($kname, $fpath, $href);
+		my($k, @buffer);
+		open IN, "<$fpath" or die $!;
+		while(<IN>){
+				next if /^#/;
+				chomp;
+				@buffer=split /\s+/, $_;
+				next if !exists $$href{($k=join(':', @buffer[0..2]))};
+				next if $$href{$k}->{ref} !~ $buffer[3];
+				next if $$href{$k}->{alt} !~ $buffer[4];
+				splice(@buffer, 0, 5);
+				$$href{$k}->{$kname}=join(':', @buffer);
+			}
+		close IN;
+	}
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
 my($varfile, $buildver, $outdir, $dir_1000g, $dir_dbsnp, $dir_cosmic, $release_1000g, $release_dbsnp, $release_cosmic, $outfile, $k, @buffer, @varlist, %opts, %varlist);
 
 GetOptions(\%opts, "varfile=s", "buildver=s", "outdir=s", "dir_1000g=s", "dir_dbsnp=s", "dir_cosmic=s", "release_1000g=s", "release_dbsnp=s", "release_cosmic=s", "outfile=s");
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gfapts/gfap_r1.0_known_var_finder.pl~	Fri Aug 03 05:50:41 2012 -0400
@@ -0,0 +1,177 @@
+#!/usr/bin/perl
+
+use strict;
+#use lib 'inc/perlmod';
+#use ngsutil qw[ :DEFAULT &varscan ];
+use warnings FATAL => qw[ numeric uninitialized ];
+use File::Basename;
+use Getopt::Long;
+
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+#	ngsutil.pm
+sub explode_varcall{
+		my $N=0;
+		$_=shift @_ foreach my($POS, $REF, $ALT);
+		$_=$POS foreach my($START, $END);
+		my(@length, @range, @idx, @VAR, @POS);
+		@{$_}=() foreach (\@length, \@range, \@idx, \@VAR, \@POS);
+		push @length, length($_) foreach ($REF, $ALT);
+		@range=sort{ $a<=>$b } @length;
+		if($range[0]==1){
+			if($range[1]!=1){
+				foreach ($REF, $ALT){
+						$_=substr($_, 1);
+						$_=~s/^$/-/;
+					}
+				if($length[0]!=1){
+						$END+=$length[0]-1;
+						$START++;
+					}
+			}
+			push @POS, $START, $END;
+			push @VAR, $REF, $ALT;
+		}else{
+			my @N=();
+			undef $_ foreach my ($i, $VAR);
+			$_-=2 foreach (@length, @range);
+			$_++ foreach ($START, $END);
+			$_=substr($_, 1) foreach ($REF, $ALT);
+			my $indel='-' x ($range[1]-$range[0]);
+			$VAR.=($_>$range[0])?
+				('-'):((substr($REF, $_, 1) ne substr($ALT, $_, 1))?
+					0:1) for 0 .. $range[1];
+			$N++ while $VAR =~ /0/g;
+			if($length[0]<$length[1]){
+				@VAR=($VAR);
+				@N=($N);
+				$N=0;
+				undef($VAR);
+				$VAR.=($_>$range[0])?
+					('-'):((substr($REF, $length[0]-$_, 1) ne substr($ALT, $length[1]-$_, 1))?
+						0:1) for reverse 0 .. $range[1];
+				$N++ while $VAR =~ /0/g;
+				if($N>=$N[0]){ $N=shift(@N); $VAR=shift(@VAR); }
+				else{ $REF=$indel . $REF; }
+			}else{ $ALT.=$indel; }
+			foreach (qw[ 0 \- ]){
+					push @idx, [ $-[0], $+[0]-$-[0] ] while ($VAR =~ /$_+/g);
+				}
+			@{$_}=() foreach (\@VAR, \@POS);
+			foreach my $k (@idx){
+					push @VAR, substr($_, ${$k}[0], ${$k}[1]) || '-' foreach ($REF, $ALT);
+					push @POS, ${$k}[0], sum(@{$k})-1;
+				}
+			$_+=$START foreach @POS;
+			$_=~s/\-+/\-/ foreach @VAR;
+			for($i=0; $i<$#POS; $i+=2){ $POS[$i+1]=$POS[$i] if $VAR[$i] eq '-'; }
+		}
+		return(\@POS, \@VAR);
+	}
+
+sub varscan{
+		$_=shift @_ foreach my($kname, $fpath, $href);
+		my($k, @buffer);
+		open IN, "<$fpath" or die $!;
+		while(<IN>){
+				next if /^#/;
+				chomp;
+				@buffer=split /\s+/, $_;
+				next if !exists $$href{($k=join(':', @buffer[0..2]))};
+				next if $$href{$k}->{ref} !~ $buffer[3];
+				next if $$href{$k}->{alt} !~ $buffer[4];
+				splice(@buffer, 0, 5);
+				$$href{$k}->{$kname}=join(':', @buffer);
+			}
+		close IN;
+	}
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+my($varfile, $buildver, $outdir, $dir_1000g, $dir_dbsnp, $dir_cosmic, $release_1000g, $release_dbsnp, $release_cosmic, $outfile, $k, @buffer, @varlist, %opts, %varlist);
+
+GetOptions(\%opts, "varfile=s", "buildver=s", "outdir=s", "dir_1000g=s", "dir_dbsnp=s", "dir_cosmic=s", "release_1000g=s", "release_dbsnp=s", "release_cosmic=s", "outfile=s");
+$varfile        = $opts{varfile};
+$buildver       = $opts{buildver};
+$outdir         = $opts{outdir};
+$dir_1000g      = $opts{dir_1000g};
+$dir_dbsnp      = $opts{dir_dbsnp};
+$dir_cosmic     = $opts{dir_cosmic};
+$release_1000g  = $opts{release_1000g};
+$release_dbsnp  = $opts{release_dbsnp};
+$release_cosmic = $opts{release_cosmic};
+$outfile        = $opts{outfile};
+
+my $fname = readlink($varfile) || $varfile;
+$fname = basename($fname);
+
+my %k=(
+	'1000g' => {
+		'dir' => $dir_1000g, 'release' => $release_1000g, 'value' => join(':', ('0.00000')x5), 'header' => join(':', 'AF_ALL', 'AF_AFR', 'AF_AMR', 'AF_ASN', 'AF_EUR')
+	}, 'dbsnp' => {
+		'dir' => $dir_dbsnp, 'release' => $release_dbsnp, 'value' => join(':', ('na')x2), 'header' => join(':', 'rs', 'dbsnp')
+	}, 'cosmic_var' => {
+		'dir' => $dir_cosmic, 'release' => $release_cosmic, 'value' => join(':', '0.00000', 'na'), 'header' => join(':', 'AF_COS', 'cid')
+	}
+);
+
+my %legend=(
+	'chr' => 'chromosome identifier',
+	'start' => "${buildver} 1-based start position",
+	'end' => "${buildver} 1-based end position",
+	'ref' => 'reference allele',
+	'alt' => 'alternate allele',
+	'QC' => 'Phred-scaled call quality',
+	'NRF' => '#reads consistent w/ the reference allele on the F-strand',
+	'NRR' => '#reads consistent w/ the reference allele on the R-strand',
+	'NAF' => '#reads consistent w/ the alternate allele on the F-strand',
+	'NAR' => '#reads consistent w/ the alternate allele on the R-strand',
+	'DP' => 'total #reads in call ie. NRF+NRR+NAF+NAR',
+	'AD' => 'total #reads consistent w/ the alternate allele ie. NAF+NAR',
+	'AF' => 'alternate allele ratio ie. AD/DP',
+	'VCF.FILTER' => 'FILTER field from the input vcf file',
+	'DPT.FILTER' => 'check for heterogeneous depth in substituted blocks',
+	'VAR.FILTER' => 'GFAP default FILTER to discriminate between TP and FP variants',
+	'P.str' => 'NRF+NAF vs. NRR+NAR binomial test P-value ie. total strand bias',
+	'P.ref' => 'NRF vs. NRR binomial test P-value ie. reference allele strand bias',
+	'P.alt' => 'NAF vs. NAR binomial test P-value ie. alternate allele strand bias',
+	'AF_ALL' => "global AF in ${release_1000g} 1000g data",
+	'AF_AFR' => "AF in AFR ${release_1000g} 1000g data",
+	'AF_AMR' => "AF in AMR ${release_1000g} 1000g data",
+	'AF_ASN' => "AF in ASN ${release_1000g} 1000g data",
+	'AF_EUR' => "AF in EUR ${release_1000g} 1000g data",
+	'AF_COS' => "AF in ${release_cosmic} cosmic data",
+	'rs' => "dbsnp rs identifier(s) from ${release_dbsnp} release",
+	'dbsnp' => "dbsnp build version(s) from ${release_dbsnp} release",
+	'cid' => "cosmic mutation identifier from ${release_cosmic} release"
+);
+my @header=('chr', 'start', 'end', 'ref', 'alt', 'DPT.FILTER', 'QC', 'NRF', 'NRR', 'NAF', 'NAR', 'VCF.FILTER', 'P.str', 'P.ref', 'P.alt', 'DP', 'AD', 'AF', 'VAR.FILTER');
+my @k=qw[ 1000g dbsnp cosmic_var ];
+
+open IN, "<$varfile" or die $!;
+while(<IN>){
+		chomp;
+		@buffer=split /\s+/, $_;
+		$buffer[0]=~s/^chr(.+)$/$1/;
+		push @varlist, ($k=join(':', @buffer[0..2]));
+		shift(@buffer) for 0..2;
+		$varlist{$k}->{$_}=shift(@buffer) foreach qw[ ref alt ];
+		$varlist{$k}->{cov}=join(':', (($buffer[0] eq 'unk')?'SKIP':'PASS'), @buffer[1..$#buffer]);
+	}
+close IN;
+
+foreach $k (@k){
+		push @header, split(/:/, $k{$k}->{header});
+		varscan($k, $k{$k}->{file}, \%varlist);
+	}
+
+my @idx=(0..4,7..10,15..17,6,12..14,11,5,18..23,26..27,24..25);
+open OUT, ">${outdir}/${fname}.dbi" or die $!;
+print OUT '#', join(' = ', $_, $legend{$_}), "\n" foreach @header[@idx];
+print OUT '#', join("\t", @header[@idx]), "\n";
+foreach $k (@varlist){
+		@buffer=(split(/:/, 'chr'.$k), $varlist{$k}->{ref}, $varlist{$k}->{alt});
+		push @buffer, split(/:/, ($varlist{$k}->{$_} || $k{$_}->{value})) foreach ('cov', @k);
+		print OUT join("\t", @buffer[@idx]), "\n";
+	}
+close OUT;
+
+system "rm $outfile; ln -s ${outdir}/${fname}.dbi $outfile" and die $!;
\ No newline at end of file
--- a/gfapts/gfap_r1.0_samvcf_data_parser.pl	Fri Jun 29 10:20:55 2012 -0400
+++ b/gfapts/gfap_r1.0_samvcf_data_parser.pl	Fri Aug 03 05:50:41 2012 -0400
@@ -1,8 +1,8 @@
 #!/usr/bin/perl
 
 use strict;
-use lib 'inc/perlmod';
-use ngsutil qw[ :DEFAULT &explode_varcall ];
+# use lib 'inc/perlmod';
+# use ngsutil qw[ :DEFAULT &explode_varcall ];
 use warnings FATAL => qw[ numeric uninitialized ];
 use List::Util qw[ sum min max ];
 use File::Basename;
@@ -13,6 +13,85 @@
 my $rbin = '/usr/bin/R';
 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+#	TEMP include ngsutil.pm
+sub explode_varcall{
+		my $N=0;
+		$_=shift @_ foreach my($POS, $REF, $ALT);
+		$_=$POS foreach my($START, $END);
+		my(@length, @range, @idx, @VAR, @POS);
+		@{$_}=() foreach (\@length, \@range, \@idx, \@VAR, \@POS);
+		push @length, length($_) foreach ($REF, $ALT);
+		@range=sort{ $a<=>$b } @length;
+		if($range[0]==1){
+			if($range[1]!=1){
+				foreach ($REF, $ALT){
+						$_=substr($_, 1);
+						$_=~s/^$/-/;
+					}
+				if($length[0]!=1){
+						$END+=$length[0]-1;
+						$START++;
+					}
+			}
+			push @POS, $START, $END;
+			push @VAR, $REF, $ALT;
+		}else{
+			my @N=();
+			undef $_ foreach my ($i, $VAR);
+			$_-=2 foreach (@length, @range);
+			$_++ foreach ($START, $END);
+			$_=substr($_, 1) foreach ($REF, $ALT);
+			my $indel='-' x ($range[1]-$range[0]);
+			$VAR.=($_>$range[0])?
+				('-'):((substr($REF, $_, 1) ne substr($ALT, $_, 1))?
+					0:1) for 0 .. $range[1];
+			$N++ while $VAR =~ /0/g;
+			if($length[0]<$length[1]){
+				@VAR=($VAR);
+				@N=($N);
+				$N=0;
+				undef($VAR);
+				$VAR.=($_>$range[0])?
+					('-'):((substr($REF, $length[0]-$_, 1) ne substr($ALT, $length[1]-$_, 1))?
+						0:1) for reverse 0 .. $range[1];
+				$N++ while $VAR =~ /0/g;
+				if($N>=$N[0]){ $N=shift(@N); $VAR=shift(@VAR); }
+				else{ $REF=$indel . $REF; }
+			}else{ $ALT.=$indel; }
+			foreach (qw[ 0 \- ]){
+					push @idx, [ $-[0], $+[0]-$-[0] ] while ($VAR =~ /$_+/g);
+				}
+			@{$_}=() foreach (\@VAR, \@POS);
+			foreach my $k (@idx){
+					push @VAR, substr($_, ${$k}[0], ${$k}[1]) || '-' foreach ($REF, $ALT);
+					push @POS, ${$k}[0], sum(@{$k})-1;
+				}
+			$_+=$START foreach @POS;
+			$_=~s/\-+/\-/ foreach @VAR;
+			for($i=0; $i<$#POS; $i+=2){ $POS[$i+1]=$POS[$i] if $VAR[$i] eq '-'; }
+		}
+		return(\@POS, \@VAR);
+	}
+
+sub varscan{
+		$_=shift @_ foreach my($kname, $fpath, $href);
+		my($k, @buffer);
+		open IN, "<$fpath" or die $!;
+		while(<IN>){
+				next if /^#/;
+				chomp;
+				@buffer=split /\s+/, $_;
+				next if !exists $$href{($k=join(':', @buffer[0..2]))};
+				next if $$href{$k}->{ref} !~ $buffer[3];
+				next if $$href{$k}->{alt} !~ $buffer[4];
+				splice(@buffer, 0, 5);
+				$$href{$k}->{$kname}=join(':', @buffer);
+			}
+		close IN;
+	}
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
 my $annovar_dir = 'inc/annovar';
 my $rdep = 'inc/R';
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gfapts/gfap_r1.0_samvcf_data_parser.pl~	Fri Aug 03 05:50:41 2012 -0400
@@ -0,0 +1,183 @@
+#!/usr/bin/perl
+
+use strict;
+# use lib 'inc/perlmod';
+# use ngsutil qw[ :DEFAULT &explode_varcall ];
+use warnings FATAL => qw[ numeric uninitialized ];
+use List::Util qw[ sum min max ];
+use File::Basename;
+use Getopt::Long;
+
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+#	PATH TO YOUR R-bin DIRECTORY
+my $rbin = '/usr/bin/R';
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+#	ngsutil.pm
+sub explode_varcall{
+		my $N=0;
+		$_=shift @_ foreach my($POS, $REF, $ALT);
+		$_=$POS foreach my($START, $END);
+		my(@length, @range, @idx, @VAR, @POS);
+		@{$_}=() foreach (\@length, \@range, \@idx, \@VAR, \@POS);
+		push @length, length($_) foreach ($REF, $ALT);
+		@range=sort{ $a<=>$b } @length;
+		if($range[0]==1){
+			if($range[1]!=1){
+				foreach ($REF, $ALT){
+						$_=substr($_, 1);
+						$_=~s/^$/-/;
+					}
+				if($length[0]!=1){
+						$END+=$length[0]-1;
+						$START++;
+					}
+			}
+			push @POS, $START, $END;
+			push @VAR, $REF, $ALT;
+		}else{
+			my @N=();
+			undef $_ foreach my ($i, $VAR);
+			$_-=2 foreach (@length, @range);
+			$_++ foreach ($START, $END);
+			$_=substr($_, 1) foreach ($REF, $ALT);
+			my $indel='-' x ($range[1]-$range[0]);
+			$VAR.=($_>$range[0])?
+				('-'):((substr($REF, $_, 1) ne substr($ALT, $_, 1))?
+					0:1) for 0 .. $range[1];
+			$N++ while $VAR =~ /0/g;
+			if($length[0]<$length[1]){
+				@VAR=($VAR);
+				@N=($N);
+				$N=0;
+				undef($VAR);
+				$VAR.=($_>$range[0])?
+					('-'):((substr($REF, $length[0]-$_, 1) ne substr($ALT, $length[1]-$_, 1))?
+						0:1) for reverse 0 .. $range[1];
+				$N++ while $VAR =~ /0/g;
+				if($N>=$N[0]){ $N=shift(@N); $VAR=shift(@VAR); }
+				else{ $REF=$indel . $REF; }
+			}else{ $ALT.=$indel; }
+			foreach (qw[ 0 \- ]){
+					push @idx, [ $-[0], $+[0]-$-[0] ] while ($VAR =~ /$_+/g);
+				}
+			@{$_}=() foreach (\@VAR, \@POS);
+			foreach my $k (@idx){
+					push @VAR, substr($_, ${$k}[0], ${$k}[1]) || '-' foreach ($REF, $ALT);
+					push @POS, ${$k}[0], sum(@{$k})-1;
+				}
+			$_+=$START foreach @POS;
+			$_=~s/\-+/\-/ foreach @VAR;
+			for($i=0; $i<$#POS; $i+=2){ $POS[$i+1]=$POS[$i] if $VAR[$i] eq '-'; }
+		}
+		return(\@POS, \@VAR);
+	}
+
+sub varscan{
+		$_=shift @_ foreach my($kname, $fpath, $href);
+		my($k, @buffer);
+		open IN, "<$fpath" or die $!;
+		while(<IN>){
+				next if /^#/;
+				chomp;
+				@buffer=split /\s+/, $_;
+				next if !exists $$href{($k=join(':', @buffer[0..2]))};
+				next if $$href{$k}->{ref} !~ $buffer[3];
+				next if $$href{$k}->{alt} !~ $buffer[4];
+				splice(@buffer, 0, 5);
+				$$href{$k}->{$kname}=join(':', @buffer);
+			}
+		close IN;
+	}
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+my $annovar_dir = 'inc/annovar';
+my $rdep = 'inc/R';
+
+my($varfile, $outdir, $outfile, $i, @DP4, @buffer, @Temp, @previous, @fnames, %opts, %chr);
+
+GetOptions(\%opts, "varfile=s", "outdir=s", "outfile=s");
+$varfile = $opts{varfile};
+$outdir  = $opts{outdir};
+$outfile = $opts{outfile};
+
+my $fname = readlink($varfile) || $varfile;
+$fname = basename($fname);
+
+my %fh=(
+	'chr1' => *chr1,	'chr2' => *chr2,	'chr3' => *chr3,	'chr4' => *chr4,	'chr5' => *chr5,
+	'chr6' => *chr6,	'chr7' => *chr7,	'chr8' => *chr8,	'chr9' => *chr9,	'chr10' => *chr10,
+	'chr11' => *chr11,	'chr12' => *chr12,	'chr13' => *chr13,	'chr14' => *chr14,	'chr15' => *chr15,
+	'chr16' => *chr16,	'chr17' => *chr17,	'chr18' => *chr18,	'chr19' => *chr19,	'chr20' => *chr20,
+	'chr21' => *chr21,	'chr22' => *chr22,	'chrX' => *chrX,	'chrY' => *chrY,	'chrM' => *chrM
+);
+
+`${annovar_dir}/convert2annovar.pl -format vcf4 $varfile -includeinfo > ${outdir}/${fname}_Temp-00 2> /dev/null` and die $!;
+
+open($fh{$_}, ">${outdir}/${fname}_${_}.Temp-00") or die $! foreach keys %fh;
+open IN, "<${outdir}/${fname}_Temp-00" or die $!;
+while(<IN>){
+		/^(\S+)\s+(?:\S+\s+){2}(\S+)\s+(\S+)/;
+		next if !exists $fh{$1};
+		if(min(length($2), length($3))!=1){
+				chomp;
+				@buffer=split /\s+/, $_;
+				@Temp=explode_varcall(@buffer[1,3..4]);
+				for($i=0; $i<$#{$Temp[0]}; $i+=2){
+						print{ $fh{$buffer[0]} } join("\t", $buffer[0], @{$Temp[0]}[$i..$i+1], @{$Temp[1]}[$i..$i+1], @buffer[6..$#buffer]), "\n";
+					}
+				next;
+			}
+		print{ $fh{$1} } $_;
+		$chr{$1}++;
+	}
+close IN;
+foreach (keys %fh){
+		close($fh{$_});
+		next if !exists $chr{$_};
+		`sort -k2,2n -k3,3n ${outdir}/${fname}_${_}.Temp-00 > ${outdir}/${fname}_${_}.Temp-01` and die $!;
+		open IN, "<${outdir}/${fname}_${_}.Temp-01" or die $!;
+		open OUT, ">${outdir}/${fname}_${_}.Temp-02" or die $!;
+		$_=readline(IN);
+		/^((?:\S+\s+){7})(?:\S+\s+){8}(\S+\s+\S+)/;
+		@buffer=split /\s+/, $1.$2;
+		($_=pop(@buffer))=~s/.+DP4=([^;]+).+/$1/;
+		@DP4=split /,/, $_;
+		push @buffer, @DP4;
+		@previous=@buffer;
+		MAINLOOP: while(<IN>){
+				/^((?:\S+\s+){7})(?:\S+\s+){8}(\S+\s+\S+)/;
+				@buffer=split /\s+/, $1.$2;
+				($_=pop(@buffer))=~s/.+DP4=([^;]+).+/$1/;
+				@DP4=split /,/, $_;
+				push @buffer, @DP4;
+				while(($previous[0] eq $buffer[0]) && ($buffer[2]==$previous[2]+1) && (join('', @previous[3..4]) !~ /-/) && (join('', @buffer[3..4]) !~ /-/)){
+						$previous[2]=$buffer[2];
+						$previous[$_].=$buffer[$_] for 3..4;
+						$previous[5]='unk' if $previous[5] ne $buffer[5];
+						$previous[7]='SKIP' if $previous[7] ne $buffer[7];
+						for (6,8..11){
+								$previous[$_]+=$buffer[$_];
+								$previous[$_]/=2;
+							}
+						next MAINLOOP;
+					}
+				$previous[7]='NONE' if $previous[7] eq '.';
+				$previous[$_]=sprintf("%.0f", $previous[$_]) for (6,8..11);
+				print OUT join("\t", @previous[0..6,8..11,7]), "\n";
+				@Temp=@previous if eof;
+				@previous=@buffer;
+			}
+		$previous[7]='NONE' if $previous[7] eq '.';
+		$previous[$_]=sprintf("%.0f", $previous[$_]) for (6,8..11);
+		print OUT join("\t", @previous[0..6,8..11,7]), "\n" if(join('_', @Temp[1..2]) ne join('_', @previous[1..2]));
+		close IN;
+		close OUT;
+	}
+foreach (1..22, 'X', 'Y', 'M'){
+		push @fnames, "${outdir}/${fname}_chr${_}.Temp-02" if exists $chr{"chr$_"};
+	}
+system join(' ', 'cat', @fnames, '>', "${outdir}/${fname}.Temp.2R") and die $!;
+`${rbin} --vanilla --slave --args ${outdir}/${fname}.Temp.2R < ${rdep}/samvcf_data_parser.R` and die $!;
+system "rm ${outdir}/${fname}*Temp* $outfile; ln -s ${outdir}/${fname}.var $outfile" and die $!;
\ No newline at end of file