view bin/mark.mt.4eland.pl @ 5:2ebca9da5e42 draft default tip

planemo upload
author bioitcore
date Thu, 07 Sep 2017 17:39:24 -0400
parents adc0f7765d85
children
line wrap: on
line source

# this file is to convert mult mapped reads to nm reads by simply marked it as NM reads. 
# its for the convience of inclusion ratio computation, if one read can be mapped to mult positions in the genome, then it will be marked as NM
# later, can be used to add information for dealing with this mult reads, for example, the coverage in the region

use strict;
my $inputfilename=$ARGV[0];
my $LongMarker="L";
my $ShortMarker="S";


open(input, $inputfilename);
while(my $line=<input>)
{
	#print "new line\n";
	chomp($line);
	my @array = split("\t",$line);
	my $match=$array[3];
	if( $array[2] eq "NM" or $match eq "")
	{
		print $line,"\n";
		next;
	}

	my $marker=$LongMarker.$ShortMarker;
	my @genome_pos;
	#while($match1=~/\/(\S[^,]*\[[$marker]\])\S[^,]*:(\d*)[RF]/g)
	#this array is used to store the mapped position for this read
	my @chr;
	my @start;
	my @end;
	while($match=~/(chr\S[^\|]*)\|(\d*)\|(\d*)\|/g)
	{
		push @chr, $1;
		push @start, $2;
		push @end, $3;
	}
	@chr=sort(@chr);
	if (scalar(@chr)<=1)
	{
                print $line,"\n";
                next;
        }

	@start=sort(@start);
	@end=sort(@end);
	if($chr[0] ne $chr[scalar(@chr)-1] or $start[scalar(@chr)-1]-$start[0]>100000)
	{
		print $line, "\tMT\n";
	}
	else
	{
		print $line,"\n";

	}
}
close(input);