annotate PsiCLASS-1.0.2/samtools-0.1.19/misc/wgsim_eval.pl @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 #!/usr/bin/perl -w
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 # Contact: lh3
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 # Version: 0.1.5
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 use strict;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 use warnings;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 use Getopt::Std;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 &wgsim_eval;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 exit;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 sub wgsim_eval {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 my %opts = (g=>5);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 getopts('pcag:', \%opts);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 die("Usage: wgsim_eval.pl [-pca] [-g $opts{g}] <in.sam>\n") if (@ARGV == 0 && -t STDIN);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 my (@c0, @c1, %fnfp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 my ($max_q, $flag) = (0, 0);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 my $gap = $opts{g};
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 $flag |= 1 if (defined $opts{p});
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 $flag |= 2 if (defined $opts{c});
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 while (<>) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 next if (/^\@/);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 my @t = split("\t");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 next if (@t < 11);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 my $line = $_;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 my ($q, $is_correct, $chr, $left, $rght) = (int($t[4]/10), 1, $t[2], $t[3], $t[3]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 $max_q = $q if ($q > $max_q);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 # right coordinate
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 $_ = $t[5]; s/(\d+)[MDN]/$rght+=$1,'x'/eg;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 --$rght;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 # correct for soft clipping
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 my ($left0, $rght0) = ($left, $rght);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 $left -= $1 if (/^(\d+)[SH]/);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 $rght += $1 if (/(\d+)[SH]$/);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 $left0 -= $1 if (/(\d+)[SH]$/);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 $rght0 += $1 if (/^(\d+)[SH]/);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 # skip unmapped reads
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 next if (($t[1]&0x4) || $chr eq '*');
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 # parse read name and check
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 if ($t[0] =~ /^(\S+)_(\d+)_(\d+)_/) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 if ($1 ne $chr) { # different chr
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 $is_correct = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 } else {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 if ($flag & 2) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 if (($t[1]&0x40) && !($t[1]&0x10)) { # F3, forward
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 $is_correct = 0 if (abs($2 - $left) > $gap && abs($2 - $left0) > $gap);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 } elsif (($t[1]&0x40) && ($t[1]&0x10)) { # F3, reverse
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 $is_correct = 0 if (abs($3 - $rght) > $gap && abs($3 - $rght0) > $gap);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 } elsif (($t[1]&0x80) && !($t[1]&0x10)) { # R3, forward
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 $is_correct = 0 if (abs($3 - $left) > $gap && abs($3 - $left0) > $gap);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 } else { # R3, reverse
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 $is_correct = 0 if (abs($2 - $rght) > $gap && abs($3 - $rght0) > $gap);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 } else {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 if ($t[1] & 0x10) { # reverse
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 $is_correct = 0 if (abs($3 - $rght) > $gap && abs($3 - $rght0) > $gap); # in case of indels that are close to the end of a reads
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 } else {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 $is_correct = 0 if (abs($2 - $left) > $gap && abs($2 - $left0) > $gap);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 } else {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 warn("[wgsim_eval] read '$t[0]' was not generated by wgsim?\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 next;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 ++$c0[$q];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 ++$c1[$q] unless ($is_correct);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 @{$fnfp{$t[4]}} = (0, 0) unless (defined $fnfp{$t[4]});
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 ++$fnfp{$t[4]}[0];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 ++$fnfp{$t[4]}[1] unless ($is_correct);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 print STDERR $line if (($flag&1) && !$is_correct && $q > 0);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 # print
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 my ($cc0, $cc1) = (0, 0);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 if (!defined($opts{a})) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 for (my $i = $max_q; $i >= 0; --$i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 $c0[$i] = 0 unless (defined $c0[$i]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 $c1[$i] = 0 unless (defined $c1[$i]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 $cc0 += $c0[$i]; $cc1 += $c1[$i];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 printf("%.2dx %12d / %-12d %12d %.3e\n", $i, $c1[$i], $c0[$i], $cc0, $cc1/$cc0) if ($cc0);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 } else {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 for (reverse(sort {$a<=>$b} (keys %fnfp))) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 next if ($_ == 0);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 $cc0 += $fnfp{$_}[0];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 $cc1 += $fnfp{$_}[1];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 print join("\t", $_, $cc0, $cc1), "\n";
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 }