view gfapts/gfap_r1.0_samvcf_data_parser.pl @ 1:028f435b6cfb draft default tip

Uploaded
author rdaveau
date Fri, 03 Aug 2012 05:50:41 -0400
parents f753b30013e6
children
line wrap: on
line source

#!/usr/bin/perl

use strict;
# use lib 'inc/perlmod';
# use ngsutil qw[ :DEFAULT &explode_varcall ];
use warnings FATAL => qw[ numeric uninitialized ];
use List::Util qw[ sum min max ];
use File::Basename;
use Getopt::Long;

#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#	PATH TO YOUR R-bin DIRECTORY
my $rbin = '/usr/bin/R';
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#	TEMP include ngsutil.pm
sub explode_varcall{
		my $N=0;
		$_=shift @_ foreach my($POS, $REF, $ALT);
		$_=$POS foreach my($START, $END);
		my(@length, @range, @idx, @VAR, @POS);
		@{$_}=() foreach (\@length, \@range, \@idx, \@VAR, \@POS);
		push @length, length($_) foreach ($REF, $ALT);
		@range=sort{ $a<=>$b } @length;
		if($range[0]==1){
			if($range[1]!=1){
				foreach ($REF, $ALT){
						$_=substr($_, 1);
						$_=~s/^$/-/;
					}
				if($length[0]!=1){
						$END+=$length[0]-1;
						$START++;
					}
			}
			push @POS, $START, $END;
			push @VAR, $REF, $ALT;
		}else{
			my @N=();
			undef $_ foreach my ($i, $VAR);
			$_-=2 foreach (@length, @range);
			$_++ foreach ($START, $END);
			$_=substr($_, 1) foreach ($REF, $ALT);
			my $indel='-' x ($range[1]-$range[0]);
			$VAR.=($_>$range[0])?
				('-'):((substr($REF, $_, 1) ne substr($ALT, $_, 1))?
					0:1) for 0 .. $range[1];
			$N++ while $VAR =~ /0/g;
			if($length[0]<$length[1]){
				@VAR=($VAR);
				@N=($N);
				$N=0;
				undef($VAR);
				$VAR.=($_>$range[0])?
					('-'):((substr($REF, $length[0]-$_, 1) ne substr($ALT, $length[1]-$_, 1))?
						0:1) for reverse 0 .. $range[1];
				$N++ while $VAR =~ /0/g;
				if($N>=$N[0]){ $N=shift(@N); $VAR=shift(@VAR); }
				else{ $REF=$indel . $REF; }
			}else{ $ALT.=$indel; }
			foreach (qw[ 0 \- ]){
					push @idx, [ $-[0], $+[0]-$-[0] ] while ($VAR =~ /$_+/g);
				}
			@{$_}=() foreach (\@VAR, \@POS);
			foreach my $k (@idx){
					push @VAR, substr($_, ${$k}[0], ${$k}[1]) || '-' foreach ($REF, $ALT);
					push @POS, ${$k}[0], sum(@{$k})-1;
				}
			$_+=$START foreach @POS;
			$_=~s/\-+/\-/ foreach @VAR;
			for($i=0; $i<$#POS; $i+=2){ $POS[$i+1]=$POS[$i] if $VAR[$i] eq '-'; }
		}
		return(\@POS, \@VAR);
	}

sub varscan{
		$_=shift @_ foreach my($kname, $fpath, $href);
		my($k, @buffer);
		open IN, "<$fpath" or die $!;
		while(<IN>){
				next if /^#/;
				chomp;
				@buffer=split /\s+/, $_;
				next if !exists $$href{($k=join(':', @buffer[0..2]))};
				next if $$href{$k}->{ref} !~ $buffer[3];
				next if $$href{$k}->{alt} !~ $buffer[4];
				splice(@buffer, 0, 5);
				$$href{$k}->{$kname}=join(':', @buffer);
			}
		close IN;
	}
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

my $annovar_dir = 'inc/annovar';
my $rdep = 'inc/R';

my($varfile, $outdir, $outfile, $i, @DP4, @buffer, @Temp, @previous, @fnames, %opts, %chr);

GetOptions(\%opts, "varfile=s", "outdir=s", "outfile=s");
$varfile = $opts{varfile};
$outdir  = $opts{outdir};
$outfile = $opts{outfile};

my $fname = readlink($varfile) || $varfile;
$fname = basename($fname);

my %fh=(
	'chr1' => *chr1,	'chr2' => *chr2,	'chr3' => *chr3,	'chr4' => *chr4,	'chr5' => *chr5,
	'chr6' => *chr6,	'chr7' => *chr7,	'chr8' => *chr8,	'chr9' => *chr9,	'chr10' => *chr10,
	'chr11' => *chr11,	'chr12' => *chr12,	'chr13' => *chr13,	'chr14' => *chr14,	'chr15' => *chr15,
	'chr16' => *chr16,	'chr17' => *chr17,	'chr18' => *chr18,	'chr19' => *chr19,	'chr20' => *chr20,
	'chr21' => *chr21,	'chr22' => *chr22,	'chrX' => *chrX,	'chrY' => *chrY,	'chrM' => *chrM
);

`${annovar_dir}/convert2annovar.pl -format vcf4 $varfile -includeinfo > ${outdir}/${fname}_Temp-00 2> /dev/null` and die $!;

open($fh{$_}, ">${outdir}/${fname}_${_}.Temp-00") or die $! foreach keys %fh;
open IN, "<${outdir}/${fname}_Temp-00" or die $!;
while(<IN>){
		/^(\S+)\s+(?:\S+\s+){2}(\S+)\s+(\S+)/;
		next if !exists $fh{$1};
		if(min(length($2), length($3))!=1){
				chomp;
				@buffer=split /\s+/, $_;
				@Temp=explode_varcall(@buffer[1,3..4]);
				for($i=0; $i<$#{$Temp[0]}; $i+=2){
						print{ $fh{$buffer[0]} } join("\t", $buffer[0], @{$Temp[0]}[$i..$i+1], @{$Temp[1]}[$i..$i+1], @buffer[6..$#buffer]), "\n";
					}
				next;
			}
		print{ $fh{$1} } $_;
		$chr{$1}++;
	}
close IN;
foreach (keys %fh){
		close($fh{$_});
		next if !exists $chr{$_};
		`sort -k2,2n -k3,3n ${outdir}/${fname}_${_}.Temp-00 > ${outdir}/${fname}_${_}.Temp-01` and die $!;
		open IN, "<${outdir}/${fname}_${_}.Temp-01" or die $!;
		open OUT, ">${outdir}/${fname}_${_}.Temp-02" or die $!;
		$_=readline(IN);
		/^((?:\S+\s+){7})(?:\S+\s+){8}(\S+\s+\S+)/;
		@buffer=split /\s+/, $1.$2;
		($_=pop(@buffer))=~s/.+DP4=([^;]+).+/$1/;
		@DP4=split /,/, $_;
		push @buffer, @DP4;
		@previous=@buffer;
		MAINLOOP: while(<IN>){
				/^((?:\S+\s+){7})(?:\S+\s+){8}(\S+\s+\S+)/;
				@buffer=split /\s+/, $1.$2;
				($_=pop(@buffer))=~s/.+DP4=([^;]+).+/$1/;
				@DP4=split /,/, $_;
				push @buffer, @DP4;
				while(($previous[0] eq $buffer[0]) && ($buffer[2]==$previous[2]+1) && (join('', @previous[3..4]) !~ /-/) && (join('', @buffer[3..4]) !~ /-/)){
						$previous[2]=$buffer[2];
						$previous[$_].=$buffer[$_] for 3..4;
						$previous[5]='unk' if $previous[5] ne $buffer[5];
						$previous[7]='SKIP' if $previous[7] ne $buffer[7];
						for (6,8..11){
								$previous[$_]+=$buffer[$_];
								$previous[$_]/=2;
							}
						next MAINLOOP;
					}
				$previous[7]='NONE' if $previous[7] eq '.';
				$previous[$_]=sprintf("%.0f", $previous[$_]) for (6,8..11);
				print OUT join("\t", @previous[0..6,8..11,7]), "\n";
				@Temp=@previous if eof;
				@previous=@buffer;
			}
		$previous[7]='NONE' if $previous[7] eq '.';
		$previous[$_]=sprintf("%.0f", $previous[$_]) for (6,8..11);
		print OUT join("\t", @previous[0..6,8..11,7]), "\n" if(join('_', @Temp[1..2]) ne join('_', @previous[1..2]));
		close IN;
		close OUT;
	}
foreach (1..22, 'X', 'Y', 'M'){
		push @fnames, "${outdir}/${fname}_chr${_}.Temp-02" if exists $chr{"chr$_"};
	}
system join(' ', 'cat', @fnames, '>', "${outdir}/${fname}.Temp.2R") and die $!;
`${rbin} --vanilla --slave --args ${outdir}/${fname}.Temp.2R < ${rdep}/samvcf_data_parser.R` and die $!;
system "rm ${outdir}/${fname}*Temp* $outfile; ln -s ${outdir}/${fname}.var $outfile" and die $!;