comparison scripts/pickUniqMate.pl @ 0:28d1a6f8143f draft

planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
author portiahollyoak
date Mon, 25 Apr 2016 13:08:56 -0400
parents
children ca36262102d8
comparison
equal deleted inserted replaced
-1:000000000000 0:28d1a6f8143f
1 #!/share/bin/perl
2 use List::Util qw(sum);
3 use Bio::Seq;
4
5 die "perl $0 <mate sam with header> <uniq bed>\n" if @ARGV<1;
6
7 open in,$ARGV[1];
8 my %uniq;
9 while(<in>)
10 {
11 chomp;
12 my @f=split;
13 $uniq{$f[3]}=[@f];
14 }
15 close in;
16
17 open in,$ARGV[0];
18 my (%te,@ref,%ref);
19 while(<in>)
20 {
21 chomp;
22 my @f=split/\t/,$_,12;
23 # headers
24 if(/^\@SQ/)
25 {
26 my ($sn,$ln)=/SN:(.*?)\tLN:(\d+)/;
27 push @ref,[$sn,$ln];
28 $ref{$sn}=$#ref;
29 next;
30 }
31
32 # unmapped
33 next if $f[2] eq "*";
34
35 # alignments
36 if($f[11]=~/XT:A:/)
37 {
38 my ($rnum)=$f[1]=~/(\d)$/;
39 # CIGAR
40 my (@cigar_m)=$f[5]=~/(\d+)M/g;
41 my (@cigar_d)=$f[5]=~/(\d+)D/g;
42 my (@cigar_s)=$f[5]=~/(\d+)S/g;
43 my (@cigar_i)=$f[5]=~/(\d+)I/g;
44 my $aln_ln=sum(@cigar_m,@cigar_d);
45
46 my $strand="+";
47 if($f[1]=~/r/)
48 {
49 my $seq=Bio::Seq->new(-seq=>$f[9]);
50 $f[9]=$seq->revcom->seq;
51 $strand="-";
52 }
53
54 # align to the junctions
55 if(($f[3]+$aln_ln-1)>${$ref[$ref{$f[2]}]}[1])
56 {
57 if(($f[3]+($aln_ln-1)/2)>${$ref[$ref{$f[2]}]}[1])
58 {
59 $f[2]=${$ref[$ref{$f[2]}+1]}[0];
60 $f[3]=1;
61 $aln_ln=$aln_ln-(${$ref[$ref{$f[2]}]}[1]-$f[3]+1);
62 }
63 else
64 {
65 $aln_ln=${$ref[$ref{$f[2]}]}[1]-$f[3]+1;
66 }
67 }
68
69 $pe{$f[0]}{$rnum}=$f[2].",".$strand."$f[3]".";";
70
71 # XA tag
72 if($f[11]=~/XA:Z:/)
73 {
74 my ($xa)=$f[11]=~/XA:Z:(.*);$/;
75 my @xa=split(";",$xa);
76 $pe{$f[0]}{$rnum}.=join(",",(split/,/)[0,1]).";" foreach @xa;
77 }
78 }
79 }
80 close in;
81
82 foreach my $id (keys %pe)
83 {
84 next if exists $pe{$id}{1} && exists $pe{$id}{2} && exists $uniq{$id."/1"} && exists $uniq{$id."/2"};
85 foreach my $rid (keys %{$pe{$id}})
86 {
87 my $mate_id=($rid==1)?2:1;
88 if(exists $uniq{$id."/".$mate_id})
89 {
90 ${$uniq{$id."/".$mate_id}}[4]=$pe{$id}{$rid};
91 print join("\t",@{$uniq{$id."/".$mate_id}}),"\n";
92 }
93 }
94 }