diff scripts/refine_breakpoint.ex.pl @ 0:28d1a6f8143f draft

planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
author portiahollyoak
date Mon, 25 Apr 2016 13:08:56 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/refine_breakpoint.ex.pl	Mon Apr 25 13:08:56 2016 -0400
@@ -0,0 +1,168 @@
+#! /usr/bin/perl
+
+use strict;
+
+my @files=<*.excision.cluster.*>;
+foreach my $file (@files) {
+    if (($file !~ /sfcp/)&&($file !~ /refsup/)) {
+	my $sfcp=$file.".sfcp";
+	my $title=$file.".refined.bp";
+	
+	open (input, "<$file") or die "Can't open $file since $!\n";
+	open (input1, "<$sfcp") or die "Can't open $sfcp since $!\n";
+	open (output, ">>$title") or die "Can't open $title since $!\n";
+	print output "Chr\tStart\tEnd\tTransposonName\t5\'_Junction\t3\'_Junction\n";
+	while (my $line=<input>) {
+	    chomp($line);
+	    my @a=split(/\s+/, $line);
+	    my $line1=<input1>;
+	    chomp($line1);
+	    my @b=split(/\t/, $line1);
+	    my @pos=split(/\;/, $b[4]);
+	    my $plusnext=""; my $minusnext="";
+	    my $plusover=0; my $minusover=0;
+	    my $lpcoor=""; my $lmcoor=""; my $rpcoor=""; my $rmcoor="";
+	    my $lp=0; my $lm=0; my $rp=0; my $rm=0;
+	    my %plus=(); my %minus=();
+	    foreach my $site (@pos) {
+		my @x=split(/\:/, $site);
+		my @y=split(/\(/, $x[0]);
+		chop($y[1]);
+		if (($y[0] =~ /\-/)&&($y[1] eq "+")&&($x[1] >= $plusover)) {
+		    if ($plusover >= 2) {$plusnext="$lpcoor\-$rpcoor\:$plusover";}
+		    $plusover=$x[1]; 
+		    my @z=split(/\-/, $y[0]);
+		    $lpcoor=$z[0]; $lp=$x[1];
+		    $rpcoor=$z[1]; $rp=$x[1];
+		}
+		elsif (($y[0] =~ /\-/)&&($y[1] eq "-")&&($x[1] >= $minusover)) {
+		    if ($minusover >= 2) {$minusnext="$lmcoor\-$rmcoor\:$minusover";}
+		    $minusover=$x[1];
+		    my @z=split(/\-/, $y[0]);
+		    $lmcoor=$z[0]; $lm=$x[1];
+		    $rmcoor=$z[1]; $rm=$x[1];
+		}
+		elsif (($y[0] !~ /\-/)&&($y[1] eq "+")) {
+		    $plus{$y[0]}=$x[1];
+		}
+		elsif (($y[0] !~ /\-/)&&($y[1] eq "-")) {
+		    $minus{$y[0]}=$x[1];
+		}
+	    }
+	    
+	    if (($plusnext ne "")&&($minusover == 0)) {
+		my @m=split(/\:/, $plusnext);
+		if (($m[1] >= 2)&&($m[1] == $plusover)) {
+		    my $count1=$m[1]; my $count2=$plusover;
+		    foreach my $id (keys %plus) {
+			if ($m[0] =~ /$id/) {$count1 += $plus{$id};}
+			elsif ($rpcoor == $id) {$count2 += $plus{$id};}
+		    }
+		    if ($count1 > $count2) {
+			my @n=split(/\-/, $m[0]);
+			$lpcoor=$n[0]; $lp=$m[1];
+			$rpcoor=$n[1]; $rp=$m[1];
+		    }
+		}
+	    }
+
+	    if (($minusnext ne "")&&($plusover == 0)) {
+		my @m=split(/\:/, $minusnext);
+		if (($m[1] >= 2)&&($m[1] == $minusover)) {
+		    my $count1=$m[1]; my $count2=$minusover;
+		    foreach my $id (keys %minus) {
+			if ($m[0] =~ /$id/) {$count1 += $minus{$id};}
+			elsif ($lmcoor == $id) {$count2 += $minus{$id};}
+		    }
+		    if ($count1 > $count2) {
+			my @n=split(/\-/, $m[0]);
+			$lmcoor=$n[0]; $lm=$m[1];
+			$rmcoor=$n[1]; $rm=$m[1];
+		    }
+		}
+	    }			    
+
+	    if (($plusover >= 2)&&($minusover >= 2)&&(($lpcoor-$rpcoor) != ($lmcoor-$rmcoor))) {
+		if ($plusnext ne "") {
+		    my @m=split(/\:/, $plusnext);
+		    my @n=split(/\-/, $m[0]);
+		    if ((($n[1]-$n[0]) == ($rmcoor-$lmcoor))&&($m[1] >= 2)) {
+			$rpcoor=$n[1];
+			$lpcoor=$n[0];
+			$plusover=$m[1];
+			$lp=$m[1];
+			$rp=$m[1];
+		    }
+		}
+		if ($minusnext ne "") {
+                    my @m=split(/\:/, $minusnext);
+                    my @n=split(/\-/, $m[0]);
+                    if ((($n[1]-$n[0]) == ($rpcoor-$lpcoor))&&($m[1] >= 2)) {
+			$rmcoor=$n[1];
+			$lmcoor=$n[0];
+			$minusover=$m[1];
+			$lm=$m[1];
+			$rm=$m[1];
+                    }
+		}
+	    }
+
+	    my $plusc=0; my $pluscoor="";
+	    my $minusc=0; my $minuscoor="";
+	    foreach my $id (keys %plus) {
+		if ($id eq $rpcoor) {
+		    $rp=$plusover+$plus{$id};
+		}
+		if ($plus{$id} > $plusc) {
+		    $plusc=$plus{$id};
+		    $pluscoor=$id;
+		}
+		elsif (($plus{$id} == $plusc)&&(abs($id-$b[2]) < abs($pluscoor-$b[2]))) {
+                    $plusc=$plus{$id};
+                    $pluscoor=$id;
+		}
+	    }
+	    foreach my $id (keys %minus) {
+		if ($id eq $lmcoor) {
+		    $lm=$minusover+$minus{$id};
+		}
+		if ($minus{$id} > $minusc) {
+		    $minusc=$minus{$id};
+		    $minuscoor=$id;
+		}
+		elsif (($minus{$id} == $minusc)&&(abs($id-$b[1]) < abs($minuscoor-$b[1]))) {
+                    $minusc=$minus{$id};
+                    $minuscoor=$id;
+		}
+	    }
+	    if ($plusover < 2) {
+		$lpcoor="";
+		if ($plusc >= 3) {$rpcoor=$pluscoor; $rp=$plusc;}
+		else {$rpcoor="";}
+	    }
+	    if ($minusover < 2) {
+		$rmcoor="";
+		if ($minusc >= 3) {$lmcoor=$minuscoor; $lm=$minusc;}
+		else {$lmcoor="";}
+	    }	    
+		
+	    my $bp1=""; my $bp2="";
+	    if (($lpcoor ne "")&&($lmcoor ne "")) {
+		$bp1="$lpcoor\(\+\)\:$lp,$lmcoor\(\-\)\:$lm";
+	    }
+	    elsif ($lpcoor ne "") {$bp1="$lpcoor\(\+\)\:$lp";}
+	    elsif ($lmcoor ne "") {$bp1="$lmcoor\(\-\)\:$lm";}
+	    if (($rpcoor ne "")&&($rmcoor ne "")) {
+		$bp2="$rpcoor\(\+\)\:$rp,$rmcoor\(\-\)\:$rm";
+	    }
+	    elsif ($rpcoor ne "") {$bp2="$rpcoor\(\+\)\:$rp";}
+	    elsif ($rmcoor ne "") {$bp2="$rmcoor\(\-\)\:$rm";}
+
+	    print output "$a[2]\t$a[3]\t$a[4]\t$a[5]\t$bp1\t$bp2\n";
+	}
+	
+	close input;
+	close input1;
+	close output;
+    }
+}