Mercurial > repos > geert-vandeweyer > varscan_wrapper
diff varscan/varscan_somatic.pl @ 0:848f3dc54593 draft
Uploaded
author | geert-vandeweyer |
---|---|
date | Fri, 07 Mar 2014 06:17:32 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varscan/varscan_somatic.pl Fri Mar 07 06:17:32 2014 -0500 @@ -0,0 +1,173 @@ +#!/usr/bin/perl + + +use strict; +use Cwd; + +die qq( +Bad numbr of inputs + +) if(!@ARGV); + +my $options =""; +my $normal=""; +my $command=""; +my $tumor=""; +my $tumorbam = ""; +my $output=""; +my $snp=""; +my $indel=""; +my $working_dir = cwd(); +my $log = ''; + +foreach my $input (@ARGV) +{ + my @tmp = split "::", $input; + if($tmp[0] eq "COMMAND") + { + $command = $tmp[1]; + } + elsif($tmp[0] eq "NORMAL") + { + $normal = $tmp[1]; + } + elsif($tmp[0] eq "TUMOR") + { + $tumor = $tmp[1]; + } + elsif($tmp[0] eq "TUMORBAM") + { + $tumorbam = $tmp[1]; + } + elsif($tmp[0] eq "OPTION") + { + my @p = split(/\s+/,$tmp[1]); + if ($p[0] =~ m/validation|strand-filter|output-vcf/ && $p[1] == 0) { + next; + } + $options = "$options ${tmp[1]}"; + } + elsif($tmp[0] eq "OUTPUT") + { + $output = $tmp[1]; + } + elsif($tmp[0] eq "SNP") + { + $snp = $tmp[1]; + } + elsif($tmp[0] eq "INDEL") + { + $indel = $tmp[1]; + } + + elsif($tmp[0] eq "LOG") + { + $log = $tmp[1]; + } + + else + { + die("Unknown Input: $input\n"); + } +} + +## VCF OUTPUT +if ($output ne '') { + system ("$command $normal $tumor $options --output-snp $working_dir/out.snp --output-indel $working_dir/out.indel 2>$log"); + my $indels = "$working_dir/out.indel.vcf"; + my $snps = "$working_dir/out.snp.vcf"; + + system("grep -v '^\#' $indels | grep -v '^chrom position' >> $snps"); + my @chr_ord = chromosome_order($tumorbam); + + vs2vcf($snps, $output,\@chr_ord); +} +## SNP/INDEL OUTPUT +else { + system ("$command $normal $tumor $options --output-snp $snp --output-indel $indel 2>$log"); +} + +sub vs2vcf +{ + + # + # G l o b a l v a r i a b l e s + # + my $version = '0.1'; + + # + # Read in file + # + my $input = shift; + my $output = shift; + my $chr_ord = shift; + open(IN, $input) or die "Can't open $input': $!\n"; + open(OUT, ">$output") or die "Can't create $output': $!\n"; + my %output; + + while ( <IN> ) + { + if ( /^#/ ) + { + print OUT; + next; + } + chomp; + my $line = $_; + + my @flds = split ( "\t", $line ); + my $ref = $flds[3]; + my $alt = $flds[4]; + # + # Deletion of bases + # + if ( $alt =~ /^\-/ ) + { + ($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref); + } + + # + # Insertion of bases + # + if ( $alt =~ /^\+/ ) + { + $flds[4] = $ref.substr($alt,1); + } + ## insert dot for reference positions. + if ($flds[4] eq '') { + $flds[4] = '.'; + } + print OUT join( "\t", @flds),"\n" unless defined $chr_ord; + $output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord; + } + close(IN); + # if chromosome order given return in sorted order + if(defined $chr_ord) + { + for my $chrom (@{ $chr_ord }) + { + for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} }) + { + print OUT $output{$chrom}{$pos}; + } + } + } + close(OUT); +} + + +sub chromosome_order +{ + my $input = shift; + # calculate flagstats + my $COMM = "samtools view -H $input | grep '^\@SQ'"; + my @SQ = `$COMM`; + chomp @SQ; + for(my $i = 0; $i <= $#SQ; $i++) + { + $SQ[$i] =~ s/^\@SQ\tSN:(.*?)\tLN:\d+$/$1/; + } + return(@SQ); +} + +