comparison scripts/pickOverlapPair.ex_MEM.pl @ 12:ca36262102d8 draft

planemo upload for repository https://github.com/portiahollyoak/Tools commit 5d021f520b653582862ec98dd812a051b804aa50
author portiahollyoak
date Fri, 29 Apr 2016 05:47:54 -0400
parents
children 9672fe07a232
comparison
equal deleted inserted replaced
11:e19d9742c99b 12:ca36262102d8
1 #!/share/bin/perl
2 use Bio::Seq;
3 use List::Util qw(sum);
4
5 die "perl $0 <*.excision.cluster.rpmk.refined.bp>\n" if @ARGV<0;
6
7 my $title=$ARGV[0];
8 if ($title =~ /annotation/) {
9 $title =~ s/excision.cluster.annotation.refined.bp/sorted.bam/;
10 }
11 else {$title =~ s/excision.cluster.rpmk.refined.bp/sorted.bam/;}
12 #system("samtools index /home/wangj2/scratch/bill/bill_genomic/$title");
13
14 my %chrs=();
15 system("samtools view -H $title > header");
16 open (input, "<header") or die "Can't open header since $!\n";
17 while (my $line=<input>) {
18 if ($line =~ /^\@SQ/) {
19 my @a=split(/\t/, $line);
20 for my $j (0..$#a) {
21 if ($a[$j] =~ /^SN:/) {
22 $a[$j] =~ s/^SN://;
23 $chrs{$a[$j]}=1;
24 }
25 }
26 }
27 }
28 close input;
29 system("rm header");
30
31 open (input, "<$ARGV[0]") or die "Can't open $ARGV[0] since $!\n";
32 my $header=<input>;
33 while (my $line=<input>) {
34 chomp($line);
35 my @a=split(/\t/, $line);
36
37 my $left=0;
38 my $right=0;
39 if ($a[4] eq "") {$left=$a[1];}
40 else {
41 my @t=split(/\,/, $a[4]);
42 my @p=split(/\(/, $t[$#t]);
43 $left=$p[0];
44 }
45 if ($a[5] eq "") {$right=$a[2];}
46 else {
47 my @t=split(/\,/, $a[5]);
48 my @p=split(/\(/, $t[0]);
49 $right=$p[0];
50 }
51
52 my $leftlower=$left-500;
53 my $leftupper=$left+500;
54 my $rightlower=$right-500;
55 my $rightupper=$right+500;
56 my $chr_num=$a[0];
57 $chr_num =~ s/chr//;
58 if (($chrs{$a[0]} == 1) && (! defined $chrs{$chr_num})) {$chr_num=$a[0];}
59 system("samtools view -Xf 0x2 $title $chr_num\:$leftlower\-$leftupper $chr_num\:$rightlower\-$rightupper > temp.sam");
60
61 open in,"temp.sam";
62 my %ps=();
63 my %me=();
64 my %uniqp=();
65 my %uniqm=();
66 my $ref_sup=0;
67
68 while(<in>)
69 {
70 chomp;
71 my @f=split/\t/,$_,12;
72 ## read number 1 or 2
73 my ($rnum)=$f[1]=~/(\d)$/;
74
75 ## XT:A:*
76 my $xt="";
77 my @a=split(/\s+/, $_);
78 my $as=0;
79 my $xs=0;
80 for my $i (11..$#a) {
81 if ($a[$i] =~ /^AS:i:/) {
82 $a[$i] =~ s/AS:i://;
83 $as=$a[$i];
84 }
85 elsif ($a[$i] =~ /^XS:i:/) {
86 $a[$i] =~ s/XS:i://;
87 $xs=$a[$i];
88 }
89 if (($xs > 0) && ($as-$xs <= $ARGV[2])) {$xt="R";}
90 elsif ($as > 0) {$xt="U";}
91 }
92
93 ## Coordinate
94 my $coor=$f[3];
95 if ($f[1]=~/r/)
96 {
97 if ($xt eq "U") {$uniqm{$f[0]}=1;}
98 my (@cigar_m)=$f[5]=~/(\d+)M/g;
99 my (@cigar_d)=$f[5]=~/(\d+)D/g;
100 my (@cigar_s)=$f[5]=~/(\d+)S/g;
101 my (@cigar_i)=$f[5]=~/(\d+)I/g;
102 my $aln_ln=sum(@cigar_m,@cigar_d);
103 $me{$f[0]}=$f[3]+$aln_ln-1;
104 }
105 elsif ($f[1]=~/R/) {
106 $ps{$f[0]}=$f[3];
107 if ($xt eq "U") {$uniqp{$f[0]}=1;}
108 }
109
110 # ${$pe{$f[0]}}[$rnum-1]=[$xt,$coor];
111 }
112 close in;
113
114 foreach my $id (keys %ps)
115 {
116 # my @rid=@{$pe{$id}};
117
118 # if(($rid[0][0] eq "U" && $rid[1][0] eq "M") || ($rid[0][0] eq "M" && $rid[1][0] eq "U"))
119 # {
120 # $soft_clip++;
121 # print "$id\n";
122 # }
123
124 if ((defined $me{$id})&&((defined $uniqp{$id})||(defined $uniqm{$id})))
125 {
126 if (((($ps{$id}+5)<=$right)&&($me{$id}>$right)&&($uniqm{$id}==1)) || ((($me{$id}-5)>=$left)&&($ps{$id}<$left)&&($uniqp{$id}==1))) {
127 $ref_sup++;
128 }
129 }
130 }
131
132 print "$a[0]\t$a[1]\t$a[2]\t$a[3]\t$left\t$right\t$ref_sup\n";
133 system("rm temp.sam");
134 }
135