annotate SNV/filter_snvmix.pl @ 5:a4975ec34575

Uploaded
author ryanmorin
date Mon, 17 Oct 2011 14:57:09 -0400
parents 74f5ea818cea
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1 #!/usr/bin/perl
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
2 use strict;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
3 use Getopt::Std;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
4 use vars qw($opt_d $opt_p $opt_c $opt_C $opt_i $opt_S $opt_P $opt_I);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
5 getopts("d:cC:i:p:SPI:");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
6
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
7 my $usage = "$0 -p nonref_p-value_threshold [0.99] -c track_sequenced_bases -d minimum_nonref_depth [2] -C minimum_number_of_read_centered_calls [1] -i max_num_indels [0] -S need_dual_strand_coverage [0] -P use_unpaired_reads_too";
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
8
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
9 #chr:pos ref nref ref:ref_count,nref:nref_count,pAA,pAB,pBB,maxPr+ r- n+ n- nC nE nD
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
10 #chr:pos ref nref ref:ref_count,nref:nref_count,pAA,pAB,pBB,maxP ref_fwd ref_rev nref_fwd nref_rev nref_center nref_edges indel_pos ref_clean nref_clean ref_bad_pair nref_bad_pair ref_ins nref_ins ref_del nref_del ref_junc nref_junc nref_in_unique_pos
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
11 my $nonref_depth_thresh = $opt_d || 2;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
12 my $min_centered = $opt_C || 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
13 my $max_indels = $opt_i || 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
14 my $dual_strand = $opt_S;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
15 my $p_threshold = $opt_p || 0.99;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
16 my $track_sequenced_bases = $opt_c;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
17 my $pair_filtering = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
18 my $max_indel_relative = $opt_I; #filter on proportion of indels at SNV position relative to nonref calls. E.g. 0.5 would allow 1/2 the number of indel reads at that position vs indels.
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
19 if($opt_P){
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
20 $pair_filtering = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
21 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
22 while(<STDIN>){
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
23 chomp;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
24 my ($chrpos,$ref,$nonref,$info,$ref_plus,$ref_minus,$nonref_plus,$nonref_minus,$centered,$ended,$indels,$ref_clean,$nonref_clean,$ref_bad_pair,$nonref_bad_pair,$ref_ins,$nonref_ins,$ref_del,$nonref_del,$ref_junc,$nref_junc,$nonref_in_unique_pos) = split;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
25 my $ok;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
26 my ($ref_info, $nref_info, $pAA, $pAB, $pBB, $call) = split(/,/, $info);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
27 my $skip;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
28 if($track_sequenced_bases){
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
29 $skip = 1 if $call == 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
30 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
31 else{
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
32 next if $call == 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
33 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
34 my $is_sequenced;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
35 my ($foo,$nref_num) = split /:/, $nref_info;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
36 if($pAA < ($pAB + $pBB)) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
37 if( ($pAB + $pBB) >= $p_threshold) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
38 $is_sequenced = $pAB+$pBB;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
39 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
40 else{
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
41 next;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
42 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
43 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
44 else{
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
45 if($track_sequenced_bases){
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
46 if($pAA >= $p_threshold){
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
47 $is_sequenced = $pAA;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
48 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
49 # print STDERR "skip $skip\n";
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
50 $skip = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
51 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
52 else{
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
53 next;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
54 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
55
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
56 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
57
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
58 if($track_sequenced_bases){
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
59 if($is_sequenced){
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
60 print STDERR "$chrpos\t$is_sequenced\n";
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
61 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
62 next if $skip;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
63 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
64 if($indels <= $max_indels && $centered >= $min_centered && $nref_num >= $nonref_depth_thresh){
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
65 if($dual_strand){
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
66 $ok=1 if $nonref_minus > 0 && $nonref_plus > 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
67 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
68 else{
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
69 $ok= 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
70 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
71 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
72 else{
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
73 #print "$_ FAIL\n$indels <= $max_indels && $centered >= $min_centered && $nref_num >= $nonref_depth_thresh\n";
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
74 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
75 # if($nonref_bad_pair > ($nonref_plus+$nonref_minus)/2){
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
76 if($nonref_bad_pair > 1 && $pair_filtering){
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
77 #Rodrigo's bad pair filter fail
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
78 #print STDERR "skipping $chrpos due to $nonref_bad_pair > ($nonref_plus+$nonref_minus)/2\n";
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
79 $ok = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
80
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
81 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
82
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
83 unless($chrpos =~ /^chr/){
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
84 $chrpos = "chr$chrpos";
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
85 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
86 if($ok){
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
87 print "$chrpos\t$ref\t$nonref\t$info\n";
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
88 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
89 }