annotate 2.4/script/SoftSearch_Filter.pl @ 18:1163c16cb3c0 draft

Uploaded
author plus91-technologies-pvt-ltd
date Mon, 02 Jun 2014 07:35:53 -0400
parents e3609c8714fb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
1 #!/usr/bin/perl -s
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
2 open (FILE,"$ARGV[0]")||usage();#die "Not using the right Parameters!\n\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
3 use Getopt::Long;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
4 #Declare variables
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
5 my ($lsc,$minDist,$skip,$nSC,$nRP,$isize,$answer);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
6 GetOptions(
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
7 'dist:s' => \$minDist, #minimum distance between events
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
8 'lsc:i' => \$lsc, #minimum somatic score
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
9 'nsc:i' => \$nsc, #minimum depth of coverage in normal
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
10 'nRP:i' => \$nRP, #minimum number of times it can be seen in tumor
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
11 'isize:i' => \$isize,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
12 'sv:s' => \$sv, #whether or not to skip small deletions
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
13 'q:s' => \$answer, #useful for plotting histograms
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
14 'skip:s' => \$skip
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
15 );
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
16 if(defined($lsc)){$lsc=$lsc} else {$lsc=0};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
17 if(defined($nsc)){$nsc=$nsc} else {$nsc=0};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
18 if(defined($nRP)){$nRP=$nRP} else {$nRP=0};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
19 if(defined($minDist)){$minDist=$minDist} else {$minDist=0};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
20 if(!$isize){$isize=0};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
21 if(!$uRP){$uRP=0};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
22
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
23 if($answer eq "yes"){$answer=$answer} else {$answer="no"};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
24
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
25 if ($answer eq "yes"){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
26 open(lsc,">lsc.out")||die;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
27 open(nsc,">nsc.out")||die;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
28 open(nRP,">nRP.out")||die;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
29 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
30
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
31
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
32 #Remove hits if they are within $minDist
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
33 $chr="chr1";$pos=0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
34 while (<FILE>){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
35 if ($_=~/^#/){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
36 print;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
37 next
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
38 };
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
39 if ($skip){next if $_=~/$skip/}
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
40 @_=split(/\t/,$_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
41 #Get ISIZE from INFO field
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
42 my @info=split(/;/,$_[7]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
43 my $k = 0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
44 my $v = 0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
45 my $infoHash;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
46 for (my $i=0;$i<=@info;$i++){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
47 my @tmp=split(/=/,$info[$i]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
48 $k=shift(@tmp);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
49 $v=shift(@tmp);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
50 $infoHash{$k}=$v;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
51 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
52
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
53 #Get the value of TYPE to find out how many reads support the event
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
54 my $counts = {CTX => 0, DEL => 0, INS => 0, INV => 0, TDUP => 0, NOV_INS => 0, lSC => 0, nSC => 0,uRP =>0,sDEL => 0,levD_local=>0,distl_levD => 0 };
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
55 #Get Complete Hash
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
56 #@_[8] is format
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
57 #@_[9] is values
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
58 my @format=split(/:/, $_[8]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
59 my @sample=split(/:/,$_[9]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
60 my %hash;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
61 @hash{@format}=@sample;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
62 #Subset has to get proper type of variants
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
63 my $max_val = 0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
64 my $max_type = "NA";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
65
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
66 #Get TYPEOF HASH
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
67 my %type;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
68 %type = %hash ;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
69 delete $type{'lSC'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
70 delete $type{'nSC'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
71 delete $type{'uRP'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
72 delete $type{'levD_local'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
73 delete $type{'distl_levD'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
74
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
75 while (my ($key,$val)=each(%type)){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
76 if($val > $max_val){$max_val=$val;$max_type=$key}
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
77 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
78
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
79
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
80 #######################################################################################################
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
81 #Start applying filters
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
82
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
83 #Remove hits if they are within $minDist
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
84 $chrom=$_[0];$position=$_[1];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
85
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
86 #next if chroms are same and distance is less than X
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
87 $difference=abs($pos-$position);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
88 if(($chrom eq $chr)&&($difference < $minDist)){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
89 $pos=$position;$chr=$chrom;;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
90 next}
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
91 $pos=$position;$chr=$chrom;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
92 $EVENT_SIZE=$infoHash{'ISIZE'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
93 $EVENT_TYPE=$max_type;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
94 $EVENT_SUPPORT=$max_val;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
95 $length_of_softClips=$hash{'lSC'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
96 $number_of_softclips=$hash{'nSC'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
97 $number_of_unmated=$hash{'uRP'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
98
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
99 ########################################################################
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
100 #Print if all fileds are ok
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
101 next if($EVENT_SIZE < $isize);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
102 next if($EVENT_SUPPORT < $nRP);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
103 next if($length_of_softClips < $lsc);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
104 next if($number_of_softclips < $nsc);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
105 next if($number_of_unmated < $uRP);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
106 next if (($sv)&&($EVENT_TYPE=~/sDEL/));
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
107 print;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
108
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
109
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
110 if ($answer eq "yes"){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
111 print lsc $length_of_softClips."\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
112 print nsc $number_of_softclips."\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
113 print nRP $EVENT_SUPPORT."\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
114 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
115 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
116
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
117
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
118 sub usage{
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
119 print "\nUsage: Soft_SearchFilter.pl <VCF>\n
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
120 -dist #minimum distance between events [0]
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
121 -lsc #minimum length soft-clip [0]
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
122 -nsc #minimum number of soft-clip [0]
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
123 -nRP #minimum number of discordant read pairs [0]
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
124 -isize #minimum size [0]
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
125 -sv #skip small deletions [no|yes]
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
126 -skip #pipe-delimited list of strings to skip (e.g. chrM|chY|chrGL)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
127 \n"
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
128 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
129
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
130 #R
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
131 # lsc<-read.table("lsc.out")
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
132 # nsc<-read.table("nsc.out")
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
133 # nRP<-read.table("nRP.out")
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
134 # par(mfrow=c(2,2))
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
135 # hist(lsc$V1,breaks=100)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
136 # hist(nsc$V1,breaks=100)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
137 # hist(nRP$V1,breaks=100)