annotate pyPRADA_1.2/tools/samtools-0.1.16/misc/novo2sam.pl @ 3:f17965495ec9 draft default tip

Uploaded
author siyuan
date Tue, 11 Mar 2014 12:14:01 -0400
parents acc2ca1a3ba4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
1 #!/usr/bin/perl -w
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
3 # Contact: lh3
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
4 # Version: 0.1.3
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
5
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
6 #Modified by Zayed Albertyn(zayed.albertyn@gmail.com) & Colin Hercus(colin@novocraft.com)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
7
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
8 #use strict;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
9 #use warnings;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
10 use Data::Dumper;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
11 use Getopt::Std;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
12
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
13 &novo2sam;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
14 exit;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
15
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
16 sub mating {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
17 my ($s1, $s2) = @_;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
18 my $isize = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
19 if ($s1->[2] ne '*' && $s1->[2] eq $s2->[2]) { # then calculate $isize
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
20 my $x1 = ($s1->[1] & 0x10)? $s1->[3] + length($s1->[9]) : $s1->[3];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
21 my $x2 = ($s2->[1] & 0x10)? $s2->[3] + length($s2->[9]) : $s2->[3];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
22 $isize = $x2 - $x1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
23 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
24 # update mate coordinate
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
25 if ($s2->[2] ne '*') {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
26 @$s1[6..8] = (($s2->[2] eq $s1->[2])? "=" : $s2->[2], $s2->[3], $isize);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
27 $s1->[1] |= 0x20 if ($s2->[1] & 0x10);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
28 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
29 $s1->[1] |= 0x8;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
30 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
31 if ($s1->[2] ne '*') {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
32 @$s2[6..8] = (($s1->[2] eq $s2->[2])? "=" : $s1->[2], $s1->[3], -$isize);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
33 $s2->[1] |= 0x20 if ($s1->[1] & 0x10);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
34 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
35 $s2->[1] |= 0x8;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
36 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
37 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
38
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
39 sub novo2sam {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
40 my %opts = ();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
41 getopts("p", \%opts);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
42 die("Usage: novo2sam.pl [-p] <aln.novo>\n") if (@ARGV == 0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
43 my $is_paired = defined($opts{p});
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
44 # core loop
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
45 my @s1 = ();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
46 my @s2 = ();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
47 my ($s_last, $s_curr) = (\@s1, \@s2);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
48 while (<>) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
49 next if (/^#/);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
50 next if (/(QC|NM)\s*$/ || /(R\s+\d+)\s*$/);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
51 &novo2sam_aux($_, $s_curr, $is_paired);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
52 if (@$s_last != 0 && $s_last->[0] eq $s_curr->[0]) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
53 &mating($s_last, $s_curr);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
54 print join("\t", @$s_last), "\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
55 print join("\t", @$s_curr), "\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
56 @$s_last = (); @$s_curr = ();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
57 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
58 print join("\t", @$s_last), "\n" if (@$s_last != 0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
59 my $s = $s_last; $s_last = $s_curr; $s_curr = $s;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
60 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
61 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
62 print join("\t", @$s_last), "\n" if (@$s_last != 0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
63 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
64
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
65 sub novo2sam_aux {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
66 my ($line, $s, $is_paired) = @_;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
67
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
68 chomp($line);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
69 my @t = split(/\s+/, $line);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
70 my @variations = @t[13 .. $#t];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
71 @$s = ();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
72 return if ($t[4] ne 'U');
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
73 my $len = length($t[2]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
74 # read name
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
75 $s->[0] = substr($t[0], 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
76 $s->[0] =~ s/\/[12]$//g;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
77 # initial flag (will be updated later)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
78 $s->[1] = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
79 $s->[1] |= 1 | 1<<($t[1] eq 'L'? 6 : 7);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
80 $s->[1] |= 2 if ($t[10] eq '.');
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
81 # read & quality
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
82 if ($t[9] eq 'R') {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
83 $s->[9] = reverse($t[2]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
84 $s->[10] = reverse($t[3]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
85 $s->[9] =~ tr/ACGTRYMKWSNacgtrymkwsn/TGCAYRKMWSNtgcayrkmwsn/;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
86 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
87 $s->[9] = $t[2]; $s->[10] = $t[3];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
88 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
89 # cigar
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
90 my $cigarstring ="";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
91 if (scalar @variations ==0 ) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
92 $s->[5] = $len . "M"; # IMPORTANT: this cigar is not correct for gapped alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
93 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
94 #convert to correct CIGAR
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
95 my $tmpstr = join" ",@variations ;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
96 if ( $tmpstr=~ /\+|\-/ ) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
97 $cigarstring = cigar_method($line,\@variations,$len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
98 $s->[5]=$cigarstring;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
99 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
100 $s->[5]=$len. "M";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
101 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
102 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
103
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
104 # coor
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
105 $s->[2] = substr($t[7], 1); $s->[3] = $t[8];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
106 $s->[1] |= 0x10 if ($t[9] eq 'R');
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
107 # mapQ
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
108 $s->[4] = $t[5] > $t[6]? $t[5] : $t[6];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
109 # mate coordinate
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
110 $s->[6] = '*'; $s->[7] = $s->[8] = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
111 # aux
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
112 push(@$s, "NM:i:".(@t-13));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
113 my $md = '';
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
114 $md = mdtag($md,$line,\@variations,$len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
115 push(@$s, "MD:Z:$md");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
116
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
117 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
118
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
119 sub mdtag {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
120 my $oldmd = shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
121 my $line = shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
122 my $ref =shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
123 my $rdlen = shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
124 my @variations = @$ref;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
125 my $string="";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
126 my $mdtag="";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
127 my $t=1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
128 my $q=1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
129 my $deleteflag=0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
130 my $len =0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
131 foreach $string (@variations) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
132 my ($indeltype,$insert) = indeltype($string);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
133 if ($indeltype eq "+") {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
134 $len = length ($insert);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
135 $q+=$len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
136 next;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
137 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
138 my $pos = $1 if $string =~ /^(\d+)/;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
139 $len = $pos - $t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
140 if ($len !=0 || ($deleteflag eq 1 && $indeltype eq ">")) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
141 $mdtag.=$len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
142 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
143 $t+=$len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
144 $q+=$len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
145 if ($indeltype eq ">") {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
146 $mdtag.=$insert;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
147 $deleteflag=0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
148 $t+=1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
149 $q+=1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
150 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
151 if ($indeltype eq "-") {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
152 my $deletedbase = $2 if $string =~ /(\d+)\-([A-Za-z]+)/;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
153 if ($deleteflag == 0 ) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
154 $mdtag.="^";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
155 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
156 $mdtag.=$deletedbase;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
157 $deleteflag=1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
158 $t+=1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
159 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
160 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
161 $len = $rdlen - $q + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
162 if ($len > 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
163 $mdtag.="$len";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
164 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
165 # print "In:$line\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
166 # print "MD: OLD => NEW\nMD: $oldmd => $mdtag\n\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
167
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
168 return $mdtag;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
169 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
170
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
171 sub indeltype {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
172 my $string = shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
173 my $insert="";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
174 my $indeltype;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
175 if ($string =~ /([A-Za-z]+)\>/) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
176 $indeltype=">";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
177 $insert=$1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
178 } elsif ($string =~ /\-/) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
179 $indeltype="-";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
180 } elsif ($string =~ /\+([A-Za-z]+)/) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
181 $indeltype="+";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
182 $insert=$1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
183 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
184 return ($indeltype,$insert);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
185
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
186 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
187
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
188
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
189 sub cigar_method {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
190 my $line = shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
191 my $ref =shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
192 my $rdlen = shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
193 my @variations = @$ref;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
194 my $string="";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
195 my $type="";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
196 my $t =1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
197 my $q=1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
198 my $indeltype="";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
199 my $cigar= "";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
200 my $insert = "";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
201 my $len=0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
202 my @cig=();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
203 foreach $string (@variations) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
204 next if $string =~ />/;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
205 my $pos = $1 if $string =~ /^(\d+)/;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
206
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
207 if ($string =~ /\+([A-Za-z]+)/) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
208 $indeltype="+";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
209 $insert = $1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
210 }elsif ($string =~ /\-([A-Za-z]+)/) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
211 $indeltype="-";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
212 $insert = $1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
213 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
214 #print "$pos $indeltype $insert $t $q\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
215 $len = $pos - $t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
216 if ( $len > 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
217 $cigar.=$len."M";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
218 push(@cig,$len."M");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
219 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
220 $t+=$len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
221 $q+=$len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
222
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
223 if ($indeltype eq "-") {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
224 $cigar.="D";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
225 push(@cig,"D");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
226 $t++;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
227 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
228 if ($indeltype eq "+") {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
229 $len = length ($insert);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
230 if ($len == 1) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
231 $cigar.="I";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
232 push(@cig,"I");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
233 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
234 if ($len > 1) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
235 $cigar.=$len."I";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
236 push(@cig,$len."I")
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
237 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
238 $q+=$len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
239 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
240 $insert="";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
241 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
242 $len= $rdlen - $q + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
243 if ($len > 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
244 $cigar.=$len."M";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
245 push(@cig,$len."M");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
246 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
247
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
248 $cigar = newcigar($cigar,'D');
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
249 $cigar = newcigar($cigar,'I');
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
250
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
251 #print "$line\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
252 #print "c CIGAR:\t$cigar\n\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
253 return $cigar;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
254
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
255 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
256
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
257
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
258
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
259 sub newcigar {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
260 my $cigar = shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
261 my $char = shift;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
262 my $new = "";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
263 my $copy = $cigar;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
264 #print "$cigar\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
265 $copy =~ s/^($char+)/$1;/g;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
266 #print "$copy\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
267 $copy =~ s/([^0-9$char])($char+)/$1;$2;/g;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
268 #print "$copy\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
269 my @parts = split(/;/,$copy);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
270 my $el="";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
271 foreach $el (@parts) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
272 #print "$el\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
273 if ($el =~ /^$char+$/) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
274 $new.=length($el).$char;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
275 }else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
276 $new.=$el;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
277 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
278
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
279 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
280 return $new;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
281 }