Mercurial > repos > bioitcore > splicetrap
diff bin/mark.mt.4eland.pl @ 1:adc0f7765d85 draft
planemo upload
author | bioitcore |
---|---|
date | Thu, 07 Sep 2017 15:06:58 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/mark.mt.4eland.pl Thu Sep 07 15:06:58 2017 -0400 @@ -0,0 +1,56 @@ +# this file is to convert mult mapped reads to nm reads by simply marked it as NM reads. +# its for the convience of inclusion ratio computation, if one read can be mapped to mult positions in the genome, then it will be marked as NM +# later, can be used to add information for dealing with this mult reads, for example, the coverage in the region + +use strict; +my $inputfilename=$ARGV[0]; +my $LongMarker="L"; +my $ShortMarker="S"; + + +open(input, $inputfilename); +while(my $line=<input>) +{ + #print "new line\n"; + chomp($line); + my @array = split("\t",$line); + my $match=$array[3]; + if( $array[2] eq "NM" or $match eq "") + { + print $line,"\n"; + next; + } + + my $marker=$LongMarker.$ShortMarker; + my @genome_pos; + #while($match1=~/\/(\S[^,]*\[[$marker]\])\S[^,]*:(\d*)[RF]/g) + #this array is used to store the mapped position for this read + my @chr; + my @start; + my @end; + while($match=~/(chr\S[^\|]*)\|(\d*)\|(\d*)\|/g) + { + push @chr, $1; + push @start, $2; + push @end, $3; + } + @chr=sort(@chr); + if (scalar(@chr)<=1) + { + print $line,"\n"; + next; + } + + @start=sort(@start); + @end=sort(@end); + if($chr[0] ne $chr[scalar(@chr)-1] or $start[scalar(@chr)-1]-$start[0]>100000) + { + print $line, "\tMT\n"; + } + else + { + print $line,"\n"; + + } +} +close(input);