annotate scripts/pickUniqMate.pl @ 9:20a6e46295d6 draft

planemo upload for repository https://github.com/portiahollyoak/Tools commit 2bc118a40da59fc3a2b8ee9ed04152c8effff94f-dirty
author portiahollyoak
date Tue, 26 Apr 2016 06:36:46 -0400
parents 28d1a6f8143f
children ca36262102d8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
1 #!/share/bin/perl
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
2 use List::Util qw(sum);
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
3 use Bio::Seq;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
4
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
5 die "perl $0 <mate sam with header> <uniq bed>\n" if @ARGV<1;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
6
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
7 open in,$ARGV[1];
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
8 my %uniq;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
9 while(<in>)
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
10 {
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
11 chomp;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
12 my @f=split;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
13 $uniq{$f[3]}=[@f];
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
14 }
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
15 close in;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
16
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
17 open in,$ARGV[0];
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
18 my (%te,@ref,%ref);
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
19 while(<in>)
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
20 {
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
21 chomp;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
22 my @f=split/\t/,$_,12;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
23 # headers
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
24 if(/^\@SQ/)
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
25 {
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
26 my ($sn,$ln)=/SN:(.*?)\tLN:(\d+)/;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
27 push @ref,[$sn,$ln];
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
28 $ref{$sn}=$#ref;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
29 next;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
30 }
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
31
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
32 # unmapped
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
33 next if $f[2] eq "*";
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
34
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
35 # alignments
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
36 if($f[11]=~/XT:A:/)
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
37 {
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
38 my ($rnum)=$f[1]=~/(\d)$/;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
39 # CIGAR
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
40 my (@cigar_m)=$f[5]=~/(\d+)M/g;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
41 my (@cigar_d)=$f[5]=~/(\d+)D/g;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
42 my (@cigar_s)=$f[5]=~/(\d+)S/g;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
43 my (@cigar_i)=$f[5]=~/(\d+)I/g;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
44 my $aln_ln=sum(@cigar_m,@cigar_d);
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
45
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
46 my $strand="+";
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
47 if($f[1]=~/r/)
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
48 {
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
49 my $seq=Bio::Seq->new(-seq=>$f[9]);
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
50 $f[9]=$seq->revcom->seq;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
51 $strand="-";
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
52 }
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
53
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
54 # align to the junctions
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
55 if(($f[3]+$aln_ln-1)>${$ref[$ref{$f[2]}]}[1])
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
56 {
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
57 if(($f[3]+($aln_ln-1)/2)>${$ref[$ref{$f[2]}]}[1])
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
58 {
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
59 $f[2]=${$ref[$ref{$f[2]}+1]}[0];
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
60 $f[3]=1;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
61 $aln_ln=$aln_ln-(${$ref[$ref{$f[2]}]}[1]-$f[3]+1);
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
62 }
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
63 else
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
64 {
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
65 $aln_ln=${$ref[$ref{$f[2]}]}[1]-$f[3]+1;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
66 }
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
67 }
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
68
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
69 $pe{$f[0]}{$rnum}=$f[2].",".$strand."$f[3]".";";
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
70
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
71 # XA tag
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
72 if($f[11]=~/XA:Z:/)
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
73 {
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
74 my ($xa)=$f[11]=~/XA:Z:(.*);$/;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
75 my @xa=split(";",$xa);
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
76 $pe{$f[0]}{$rnum}.=join(",",(split/,/)[0,1]).";" foreach @xa;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
77 }
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
78 }
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
79 }
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
80 close in;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
81
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
82 foreach my $id (keys %pe)
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
83 {
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
84 next if exists $pe{$id}{1} && exists $pe{$id}{2} && exists $uniq{$id."/1"} && exists $uniq{$id."/2"};
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
85 foreach my $rid (keys %{$pe{$id}})
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
86 {
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
87 my $mate_id=($rid==1)?2:1;
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
88 if(exists $uniq{$id."/".$mate_id})
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
89 {
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
90 ${$uniq{$id."/".$mate_id}}[4]=$pe{$id}{$rid};
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
91 print join("\t",@{$uniq{$id."/".$mate_id}}),"\n";
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
92 }
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
93 }
28d1a6f8143f planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
portiahollyoak
parents:
diff changeset
94 }