diff PsiCLASS-1.0.2/samtools-0.1.19/misc/wgsim_eval.pl @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PsiCLASS-1.0.2/samtools-0.1.19/misc/wgsim_eval.pl	Fri Mar 26 16:52:45 2021 +0000
@@ -0,0 +1,91 @@
+#!/usr/bin/perl -w
+
+# Contact: lh3
+# Version: 0.1.5
+
+use strict;
+use warnings;
+use Getopt::Std;
+
+&wgsim_eval;
+exit;
+
+sub wgsim_eval {
+  my %opts = (g=>5);
+  getopts('pcag:', \%opts);
+  die("Usage: wgsim_eval.pl [-pca] [-g $opts{g}] <in.sam>\n") if (@ARGV == 0 && -t STDIN);
+  my (@c0, @c1, %fnfp);
+  my ($max_q, $flag) = (0, 0);
+  my $gap = $opts{g};
+  $flag |= 1 if (defined $opts{p});
+  $flag |= 2 if (defined $opts{c});
+  while (<>) {
+	next if (/^\@/);
+	my @t = split("\t");
+	next if (@t < 11);
+	my $line = $_;
+	my ($q, $is_correct, $chr, $left, $rght) = (int($t[4]/10), 1, $t[2], $t[3], $t[3]);
+	$max_q = $q if ($q > $max_q);
+	# right coordinate
+	$_ = $t[5]; s/(\d+)[MDN]/$rght+=$1,'x'/eg;
+	--$rght;
+	# correct for soft clipping
+	my ($left0, $rght0) = ($left, $rght);
+	$left -= $1 if (/^(\d+)[SH]/);
+	$rght += $1 if (/(\d+)[SH]$/);
+	$left0 -= $1 if (/(\d+)[SH]$/);
+	$rght0 += $1 if (/^(\d+)[SH]/);
+	# skip unmapped reads
+	next if (($t[1]&0x4) || $chr eq '*');
+	# parse read name and check
+	if ($t[0] =~ /^(\S+)_(\d+)_(\d+)_/) {
+	  if ($1 ne $chr) { # different chr
+		$is_correct = 0;
+	  } else {
+		if ($flag & 2) {
+		  if (($t[1]&0x40) && !($t[1]&0x10)) { # F3, forward
+			$is_correct = 0 if (abs($2 - $left) > $gap && abs($2 - $left0) > $gap);
+		  } elsif (($t[1]&0x40) && ($t[1]&0x10)) { # F3, reverse
+			$is_correct = 0 if (abs($3 - $rght) > $gap && abs($3 - $rght0) > $gap);
+		  } elsif (($t[1]&0x80) && !($t[1]&0x10)) { # R3, forward
+			$is_correct = 0 if (abs($3 - $left) > $gap && abs($3 - $left0) > $gap);
+		  } else { # R3, reverse
+			$is_correct = 0 if (abs($2 - $rght) > $gap && abs($3 - $rght0) > $gap);
+		  }
+		} else {
+		  if ($t[1] & 0x10) { # reverse
+			$is_correct = 0 if (abs($3 - $rght) > $gap && abs($3 - $rght0) > $gap); # in case of indels that are close to the end of a reads
+		  } else {
+			$is_correct = 0 if (abs($2 - $left) > $gap && abs($2 - $left0) > $gap);
+		  }
+		}
+	  }
+	} else {
+	  warn("[wgsim_eval] read '$t[0]' was not generated by wgsim?\n");
+	  next;
+	}
+	++$c0[$q];
+	++$c1[$q] unless ($is_correct);
+	@{$fnfp{$t[4]}} = (0, 0) unless (defined $fnfp{$t[4]});
+	++$fnfp{$t[4]}[0];
+	++$fnfp{$t[4]}[1] unless ($is_correct);
+	print STDERR $line if (($flag&1) && !$is_correct && $q > 0);
+  }
+  # print
+  my ($cc0, $cc1) = (0, 0);
+  if (!defined($opts{a})) {
+	for (my $i = $max_q; $i >= 0; --$i) {
+	  $c0[$i] = 0 unless (defined $c0[$i]);
+	  $c1[$i] = 0 unless (defined $c1[$i]);
+	  $cc0 += $c0[$i]; $cc1 += $c1[$i];
+	  printf("%.2dx %12d / %-12d  %12d  %.3e\n", $i, $c1[$i], $c0[$i], $cc0, $cc1/$cc0) if ($cc0);
+	}
+  } else {
+	for (reverse(sort {$a<=>$b} (keys %fnfp))) {
+	  next if ($_ == 0);
+	  $cc0 += $fnfp{$_}[0];
+	  $cc1 += $fnfp{$_}[1];
+	  print join("\t", $_, $cc0, $cc1), "\n";
+	}
+  }
+}