comparison 2.4/script/SoftSearch_Filter.pl @ 18:1163c16cb3c0 draft

Uploaded
author plus91-technologies-pvt-ltd
date Mon, 02 Jun 2014 07:35:53 -0400
parents e3609c8714fb
children
comparison
equal deleted inserted replaced
17:5343ef57827f 18:1163c16cb3c0
1 #!/usr/bin/perl -s
2 open (FILE,"$ARGV[0]")||usage();#die "Not using the right Parameters!\n\n";
3 use Getopt::Long;
4 #Declare variables
5 my ($lsc,$minDist,$skip,$nSC,$nRP,$isize,$answer);
6 GetOptions(
7 'dist:s' => \$minDist, #minimum distance between events
8 'lsc:i' => \$lsc, #minimum somatic score
9 'nsc:i' => \$nsc, #minimum depth of coverage in normal
10 'nRP:i' => \$nRP, #minimum number of times it can be seen in tumor
11 'isize:i' => \$isize,
12 'sv:s' => \$sv, #whether or not to skip small deletions
13 'q:s' => \$answer, #useful for plotting histograms
14 'skip:s' => \$skip
15 );
16 if(defined($lsc)){$lsc=$lsc} else {$lsc=0};
17 if(defined($nsc)){$nsc=$nsc} else {$nsc=0};
18 if(defined($nRP)){$nRP=$nRP} else {$nRP=0};
19 if(defined($minDist)){$minDist=$minDist} else {$minDist=0};
20 if(!$isize){$isize=0};
21 if(!$uRP){$uRP=0};
22
23 if($answer eq "yes"){$answer=$answer} else {$answer="no"};
24
25 if ($answer eq "yes"){
26 open(lsc,">lsc.out")||die;
27 open(nsc,">nsc.out")||die;
28 open(nRP,">nRP.out")||die;
29 }
30
31
32 #Remove hits if they are within $minDist
33 $chr="chr1";$pos=0;
34 while (<FILE>){
35 if ($_=~/^#/){
36 print;
37 next
38 };
39 if ($skip){next if $_=~/$skip/}
40 @_=split(/\t/,$_);
41 #Get ISIZE from INFO field
42 my @info=split(/;/,$_[7]);
43 my $k = 0;
44 my $v = 0;
45 my $infoHash;
46 for (my $i=0;$i<=@info;$i++){
47 my @tmp=split(/=/,$info[$i]);
48 $k=shift(@tmp);
49 $v=shift(@tmp);
50 $infoHash{$k}=$v;
51 }
52
53 #Get the value of TYPE to find out how many reads support the event
54 my $counts = {CTX => 0, DEL => 0, INS => 0, INV => 0, TDUP => 0, NOV_INS => 0, lSC => 0, nSC => 0,uRP =>0,sDEL => 0,levD_local=>0,distl_levD => 0 };
55 #Get Complete Hash
56 #@_[8] is format
57 #@_[9] is values
58 my @format=split(/:/, $_[8]);
59 my @sample=split(/:/,$_[9]);
60 my %hash;
61 @hash{@format}=@sample;
62 #Subset has to get proper type of variants
63 my $max_val = 0;
64 my $max_type = "NA";
65
66 #Get TYPEOF HASH
67 my %type;
68 %type = %hash ;
69 delete $type{'lSC'};
70 delete $type{'nSC'};
71 delete $type{'uRP'};
72 delete $type{'levD_local'};
73 delete $type{'distl_levD'};
74
75 while (my ($key,$val)=each(%type)){
76 if($val > $max_val){$max_val=$val;$max_type=$key}
77 }
78
79
80 #######################################################################################################
81 #Start applying filters
82
83 #Remove hits if they are within $minDist
84 $chrom=$_[0];$position=$_[1];
85
86 #next if chroms are same and distance is less than X
87 $difference=abs($pos-$position);
88 if(($chrom eq $chr)&&($difference < $minDist)){
89 $pos=$position;$chr=$chrom;;
90 next}
91 $pos=$position;$chr=$chrom;
92 $EVENT_SIZE=$infoHash{'ISIZE'};
93 $EVENT_TYPE=$max_type;
94 $EVENT_SUPPORT=$max_val;
95 $length_of_softClips=$hash{'lSC'};
96 $number_of_softclips=$hash{'nSC'};
97 $number_of_unmated=$hash{'uRP'};
98
99 ########################################################################
100 #Print if all fileds are ok
101 next if($EVENT_SIZE < $isize);
102 next if($EVENT_SUPPORT < $nRP);
103 next if($length_of_softClips < $lsc);
104 next if($number_of_softclips < $nsc);
105 next if($number_of_unmated < $uRP);
106 next if (($sv)&&($EVENT_TYPE=~/sDEL/));
107 print;
108
109
110 if ($answer eq "yes"){
111 print lsc $length_of_softClips."\n";
112 print nsc $number_of_softclips."\n";
113 print nRP $EVENT_SUPPORT."\n";
114 }
115 }
116
117
118 sub usage{
119 print "\nUsage: Soft_SearchFilter.pl <VCF>\n
120 -dist #minimum distance between events [0]
121 -lsc #minimum length soft-clip [0]
122 -nsc #minimum number of soft-clip [0]
123 -nRP #minimum number of discordant read pairs [0]
124 -isize #minimum size [0]
125 -sv #skip small deletions [no|yes]
126 -skip #pipe-delimited list of strings to skip (e.g. chrM|chY|chrGL)
127 \n"
128 }
129
130 #R
131 # lsc<-read.table("lsc.out")
132 # nsc<-read.table("nsc.out")
133 # nRP<-read.table("nRP.out")
134 # par(mfrow=c(2,2))
135 # hist(lsc$V1,breaks=100)
136 # hist(nsc$V1,breaks=100)
137 # hist(nRP$V1,breaks=100)