1
|
1 # this file is to convert mult mapped reads to nm reads by simply marked it as NM reads.
|
|
2 # its for the convience of inclusion ratio computation, if one read can be mapped to mult positions in the genome, then it will be marked as NM
|
|
3 # later, can be used to add information for dealing with this mult reads, for example, the coverage in the region
|
|
4
|
|
5 use strict;
|
|
6 my $inputfilename=$ARGV[0];
|
|
7 my $LongMarker="L";
|
|
8 my $ShortMarker="S";
|
|
9
|
|
10
|
|
11 open(input, $inputfilename);
|
|
12 while(my $line=<input>)
|
|
13 {
|
|
14 #print "new line\n";
|
|
15 chomp($line);
|
|
16 my @array = split("\t",$line);
|
|
17 my $match=$array[3];
|
|
18 if( $array[2] eq "NM" or $match eq "")
|
|
19 {
|
|
20 print $line,"\n";
|
|
21 next;
|
|
22 }
|
|
23
|
|
24 my $marker=$LongMarker.$ShortMarker;
|
|
25 my @genome_pos;
|
|
26 #while($match1=~/\/(\S[^,]*\[[$marker]\])\S[^,]*:(\d*)[RF]/g)
|
|
27 #this array is used to store the mapped position for this read
|
|
28 my @chr;
|
|
29 my @start;
|
|
30 my @end;
|
|
31 while($match=~/(chr\S[^\|]*)\|(\d*)\|(\d*)\|/g)
|
|
32 {
|
|
33 push @chr, $1;
|
|
34 push @start, $2;
|
|
35 push @end, $3;
|
|
36 }
|
|
37 @chr=sort(@chr);
|
|
38 if (scalar(@chr)<=1)
|
|
39 {
|
|
40 print $line,"\n";
|
|
41 next;
|
|
42 }
|
|
43
|
|
44 @start=sort(@start);
|
|
45 @end=sort(@end);
|
|
46 if($chr[0] ne $chr[scalar(@chr)-1] or $start[scalar(@chr)-1]-$start[0]>100000)
|
|
47 {
|
|
48 print $line, "\tMT\n";
|
|
49 }
|
|
50 else
|
|
51 {
|
|
52 print $line,"\n";
|
|
53
|
|
54 }
|
|
55 }
|
|
56 close(input);
|