diff bin/mark.mt.4eland.pl @ 1:adc0f7765d85 draft

planemo upload
author bioitcore
date Thu, 07 Sep 2017 15:06:58 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/mark.mt.4eland.pl	Thu Sep 07 15:06:58 2017 -0400
@@ -0,0 +1,56 @@
+# this file is to convert mult mapped reads to nm reads by simply marked it as NM reads. 
+# its for the convience of inclusion ratio computation, if one read can be mapped to mult positions in the genome, then it will be marked as NM
+# later, can be used to add information for dealing with this mult reads, for example, the coverage in the region
+
+use strict;
+my $inputfilename=$ARGV[0];
+my $LongMarker="L";
+my $ShortMarker="S";
+
+
+open(input, $inputfilename);
+while(my $line=<input>)
+{
+	#print "new line\n";
+	chomp($line);
+	my @array = split("\t",$line);
+	my $match=$array[3];
+	if( $array[2] eq "NM" or $match eq "")
+	{
+		print $line,"\n";
+		next;
+	}
+
+	my $marker=$LongMarker.$ShortMarker;
+	my @genome_pos;
+	#while($match1=~/\/(\S[^,]*\[[$marker]\])\S[^,]*:(\d*)[RF]/g)
+	#this array is used to store the mapped position for this read
+	my @chr;
+	my @start;
+	my @end;
+	while($match=~/(chr\S[^\|]*)\|(\d*)\|(\d*)\|/g)
+	{
+		push @chr, $1;
+		push @start, $2;
+		push @end, $3;
+	}
+	@chr=sort(@chr);
+	if (scalar(@chr)<=1)
+	{
+                print $line,"\n";
+                next;
+        }
+
+	@start=sort(@start);
+	@end=sort(@end);
+	if($chr[0] ne $chr[scalar(@chr)-1] or $start[scalar(@chr)-1]-$start[0]>100000)
+	{
+		print $line, "\tMT\n";
+	}
+	else
+	{
+		print $line,"\n";
+
+	}
+}
+close(input);