annotate SNV/SNVMix2_source/SNVMix2-v0.12.1-rc1/samtools-0.1.6/misc/export2sam.pl @ 7:351b3acadd17 default tip

Uploaded
author ryanmorin
date Tue, 18 Oct 2011 18:33:15 -0400
parents 74f5ea818cea
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1 #!/usr/bin/perl -w
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
2
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
3 # Contact: lh3
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
4 # Version: 0.1.2 (03JAN2009)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
5
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
6 use strict;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
7 use warnings;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
8 use Getopt::Std;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
9
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
10 &export2sam;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
11 exit;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
12
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
13 sub export2sam {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
14 my ($fh1, $fh2, $is_paired);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
15 $is_paired = (@ARGV >= 2);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
16 die("export2sam.pl <read1.export> [<read2.export>]\n") if (@ARGV == 0);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
17 open($fh1, $ARGV[0]) || die;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
18 if ($is_paired) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
19 open($fh2, $ARGV[1]) || die;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
20 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
21 # conversion table
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
22 my @conv_table;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
23 for (-64..64) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
24 $conv_table[$_+64] = chr(int(33 + 10*log(1+10**($_/10.0))/log(10)+.499));
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
25 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
26 # core loop
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
27 while (<$fh1>) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
28 my (@s1, @s2);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
29 &export2sam_aux($_, \@s1, \@conv_table, $is_paired);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
30 if ($is_paired) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
31 $_ = <$fh2>;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
32 &export2sam_aux($_, \@s2, \@conv_table, $is_paired);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
33 if (@s1 && @s2) { # then set mate coordinate
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
34 my $isize = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
35 if ($s1[2] ne '*' && $s1[2] eq $s2[2]) { # then calculate $isize
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
36 my $x1 = ($s1[1] & 0x10)? $s1[3] + length($s1[9]) : $s1[3];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
37 my $x2 = ($s2[1] & 0x10)? $s2[3] + length($s2[9]) : $s2[3];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
38 $isize = $x2 - $x1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
39 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
40 # update mate coordinate
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
41 if ($s2[2] ne '*') {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
42 @s1[6..8] = (($s2[2] eq $s1[2])? "=" : $s2[2], $s2[3], $isize);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
43 $s1[1] |= 0x20 if ($s2[1] & 0x10);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
44 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
45 $s1[1] |= 0x8;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
46 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
47 if ($s1[2] ne '*') {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
48 @s2[6..8] = (($s1[2] eq $s2[2])? "=" : $s1[2], $s1[3], -$isize);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
49 $s2[1] |= 0x20 if ($s1[1] & 0x10);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
50 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
51 $s2[1] |= 0x8;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
52 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
53 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
54 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
55 print join("\t", @s1), "\n" if (@s1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
56 print join("\t", @s2), "\n" if (@s2 && $is_paired);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
57 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
58 close($fh1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
59 close($fh2) if ($is_paired);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
60 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
61
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
62 sub export2sam_aux {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
63 my ($line, $s, $ct, $is_paired) = @_;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
64 chomp($line);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
65 my @t = split("\t", $line);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
66 @$s = ();
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
67 return if ($t[21] ne 'Y');
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
68 # read name
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
69 $s->[0] = $t[1]? "$t[0]_$t[1]:$t[2]:$t[3]:$t[4]:$t[5]" : "$t[0]:$t[2]:$t[3]:$t[4]:$t[5]";
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
70 # initial flag (will be updated later)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
71 $s->[1] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
72 $s->[1] |= 1 | 1<<(5 + $t[7]) if ($is_paired);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
73 # read & quality
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
74 $s->[9] = $t[8]; $s->[10] = $t[9];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
75 if ($t[13] eq 'R') { # then reverse the sequence and quality
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
76 $s->[9] = reverse($t[8]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
77 $s->[9] =~ tr/ACGTacgt/TGCAtgca/;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
78 $s->[10] = reverse($t[9]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
79 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
80 $s->[10] =~ s/(.)/$ct->[ord($1)]/eg; # change coding
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
81 # cigar
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
82 $s->[5] = length($s->[9]) . "M";
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
83 # coor
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
84 my $has_coor = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
85 $s->[2] = "*";
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
86 if ($t[10] eq 'NM' || $t[10] eq 'QC') {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
87 $s->[1] |= 0x4; # unmapped
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
88 } elsif ($t[10] =~ /(\d+):(\d+):(\d+)/) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
89 $s->[1] |= 0x4; # TODO: should I set BAM_FUNMAP in this case?
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
90 push(@$s, "H0:i:$1", "H1:i:$2", "H2:i:$3")
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
91 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
92 $s->[2] = $t[10];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
93 $has_coor = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
94 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
95 $s->[3] = $has_coor? $t[12] : 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
96 $s->[1] |= 0x10 if ($has_coor && $t[13] eq 'R');
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
97 # mapQ (TODO: should I choose the larger between $t[15] and $t[16]?)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
98 $s->[4] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
99 $s->[4] = $t[15] if ($t[15] ne '');
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
100 $s->[4] = $t[16] if ($t[16] ne '' && $s->[4] < $t[16]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
101 # mate coordinate
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
102 $s->[6] = '*'; $s->[7] = $s->[8] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
103 # aux
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
104 push(@$s, "BC:Z:$t[6]") if ($t[6]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
105 push(@$s, "MD:Z:$t[14]") if ($has_coor);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
106 push(@$s, "SM:i:$t[15]") if ($is_paired && $has_coor);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
107 }