annotate bin/mark.mt.4eland.pl @ 5:2ebca9da5e42 draft default tip

planemo upload
author bioitcore
date Thu, 07 Sep 2017 17:39:24 -0400
parents adc0f7765d85
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
1 # this file is to convert mult mapped reads to nm reads by simply marked it as NM reads.
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
2 # its for the convience of inclusion ratio computation, if one read can be mapped to mult positions in the genome, then it will be marked as NM
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
3 # later, can be used to add information for dealing with this mult reads, for example, the coverage in the region
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
4
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
5 use strict;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
6 my $inputfilename=$ARGV[0];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
7 my $LongMarker="L";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
8 my $ShortMarker="S";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
9
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
10
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
11 open(input, $inputfilename);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
12 while(my $line=<input>)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
13 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
14 #print "new line\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
15 chomp($line);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
16 my @array = split("\t",$line);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
17 my $match=$array[3];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
18 if( $array[2] eq "NM" or $match eq "")
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
19 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
20 print $line,"\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
21 next;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
22 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
23
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
24 my $marker=$LongMarker.$ShortMarker;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
25 my @genome_pos;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
26 #while($match1=~/\/(\S[^,]*\[[$marker]\])\S[^,]*:(\d*)[RF]/g)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
27 #this array is used to store the mapped position for this read
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
28 my @chr;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
29 my @start;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
30 my @end;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
31 while($match=~/(chr\S[^\|]*)\|(\d*)\|(\d*)\|/g)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
32 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
33 push @chr, $1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
34 push @start, $2;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
35 push @end, $3;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
36 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
37 @chr=sort(@chr);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
38 if (scalar(@chr)<=1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
39 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
40 print $line,"\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
41 next;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
42 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
43
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
44 @start=sort(@start);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
45 @end=sort(@end);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
46 if($chr[0] ne $chr[scalar(@chr)-1] or $start[scalar(@chr)-1]-$start[0]>100000)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
47 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
48 print $line, "\tMT\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
49 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
50 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
51 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
52 print $line,"\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
53
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
54 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
55 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
56 close(input);