Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/samtools-0.1.19/misc/wgsim_eval.pl @ 0:903fc43d6227 draft default tip
Uploaded
| author | lsong10 | 
|---|---|
| date | Fri, 26 Mar 2021 16:52:45 +0000 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:903fc43d6227 | 
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 | |
| 3 # Contact: lh3 | |
| 4 # Version: 0.1.5 | |
| 5 | |
| 6 use strict; | |
| 7 use warnings; | |
| 8 use Getopt::Std; | |
| 9 | |
| 10 &wgsim_eval; | |
| 11 exit; | |
| 12 | |
| 13 sub wgsim_eval { | |
| 14 my %opts = (g=>5); | |
| 15 getopts('pcag:', \%opts); | |
| 16 die("Usage: wgsim_eval.pl [-pca] [-g $opts{g}] <in.sam>\n") if (@ARGV == 0 && -t STDIN); | |
| 17 my (@c0, @c1, %fnfp); | |
| 18 my ($max_q, $flag) = (0, 0); | |
| 19 my $gap = $opts{g}; | |
| 20 $flag |= 1 if (defined $opts{p}); | |
| 21 $flag |= 2 if (defined $opts{c}); | |
| 22 while (<>) { | |
| 23 next if (/^\@/); | |
| 24 my @t = split("\t"); | |
| 25 next if (@t < 11); | |
| 26 my $line = $_; | |
| 27 my ($q, $is_correct, $chr, $left, $rght) = (int($t[4]/10), 1, $t[2], $t[3], $t[3]); | |
| 28 $max_q = $q if ($q > $max_q); | |
| 29 # right coordinate | |
| 30 $_ = $t[5]; s/(\d+)[MDN]/$rght+=$1,'x'/eg; | |
| 31 --$rght; | |
| 32 # correct for soft clipping | |
| 33 my ($left0, $rght0) = ($left, $rght); | |
| 34 $left -= $1 if (/^(\d+)[SH]/); | |
| 35 $rght += $1 if (/(\d+)[SH]$/); | |
| 36 $left0 -= $1 if (/(\d+)[SH]$/); | |
| 37 $rght0 += $1 if (/^(\d+)[SH]/); | |
| 38 # skip unmapped reads | |
| 39 next if (($t[1]&0x4) || $chr eq '*'); | |
| 40 # parse read name and check | |
| 41 if ($t[0] =~ /^(\S+)_(\d+)_(\d+)_/) { | |
| 42 if ($1 ne $chr) { # different chr | |
| 43 $is_correct = 0; | |
| 44 } else { | |
| 45 if ($flag & 2) { | |
| 46 if (($t[1]&0x40) && !($t[1]&0x10)) { # F3, forward | |
| 47 $is_correct = 0 if (abs($2 - $left) > $gap && abs($2 - $left0) > $gap); | |
| 48 } elsif (($t[1]&0x40) && ($t[1]&0x10)) { # F3, reverse | |
| 49 $is_correct = 0 if (abs($3 - $rght) > $gap && abs($3 - $rght0) > $gap); | |
| 50 } elsif (($t[1]&0x80) && !($t[1]&0x10)) { # R3, forward | |
| 51 $is_correct = 0 if (abs($3 - $left) > $gap && abs($3 - $left0) > $gap); | |
| 52 } else { # R3, reverse | |
| 53 $is_correct = 0 if (abs($2 - $rght) > $gap && abs($3 - $rght0) > $gap); | |
| 54 } | |
| 55 } else { | |
| 56 if ($t[1] & 0x10) { # reverse | |
| 57 $is_correct = 0 if (abs($3 - $rght) > $gap && abs($3 - $rght0) > $gap); # in case of indels that are close to the end of a reads | |
| 58 } else { | |
| 59 $is_correct = 0 if (abs($2 - $left) > $gap && abs($2 - $left0) > $gap); | |
| 60 } | |
| 61 } | |
| 62 } | |
| 63 } else { | |
| 64 warn("[wgsim_eval] read '$t[0]' was not generated by wgsim?\n"); | |
| 65 next; | |
| 66 } | |
| 67 ++$c0[$q]; | |
| 68 ++$c1[$q] unless ($is_correct); | |
| 69 @{$fnfp{$t[4]}} = (0, 0) unless (defined $fnfp{$t[4]}); | |
| 70 ++$fnfp{$t[4]}[0]; | |
| 71 ++$fnfp{$t[4]}[1] unless ($is_correct); | |
| 72 print STDERR $line if (($flag&1) && !$is_correct && $q > 0); | |
| 73 } | |
| 74 # print | |
| 75 my ($cc0, $cc1) = (0, 0); | |
| 76 if (!defined($opts{a})) { | |
| 77 for (my $i = $max_q; $i >= 0; --$i) { | |
| 78 $c0[$i] = 0 unless (defined $c0[$i]); | |
| 79 $c1[$i] = 0 unless (defined $c1[$i]); | |
| 80 $cc0 += $c0[$i]; $cc1 += $c1[$i]; | |
| 81 printf("%.2dx %12d / %-12d %12d %.3e\n", $i, $c1[$i], $c0[$i], $cc0, $cc1/$cc0) if ($cc0); | |
| 82 } | |
| 83 } else { | |
| 84 for (reverse(sort {$a<=>$b} (keys %fnfp))) { | |
| 85 next if ($_ == 0); | |
| 86 $cc0 += $fnfp{$_}[0]; | |
| 87 $cc1 += $fnfp{$_}[1]; | |
| 88 print join("\t", $_, $cc0, $cc1), "\n"; | |
| 89 } | |
| 90 } | |
| 91 } | 
