comparison scripts/pickUniqMate.pl @ 12:ca36262102d8 draft

planemo upload for repository https://github.com/portiahollyoak/Tools commit 5d021f520b653582862ec98dd812a051b804aa50
author portiahollyoak
date Fri, 29 Apr 2016 05:47:54 -0400
parents 28d1a6f8143f
children 9672fe07a232
comparison
equal deleted inserted replaced
11:e19d9742c99b 12:ca36262102d8
17 open in,$ARGV[0]; 17 open in,$ARGV[0];
18 my (%te,@ref,%ref); 18 my (%te,@ref,%ref);
19 while(<in>) 19 while(<in>)
20 { 20 {
21 chomp; 21 chomp;
22 my @f=split/\t/,$_,12; 22 my @f=split/\s+/,$_;
23 # headers 23 # headers
24 if(/^\@SQ/) 24 if(/^\@SQ/)
25 { 25 {
26 my ($sn,$ln)=/SN:(.*?)\tLN:(\d+)/; 26 my ($sn,$ln)=/SN:(.*?)\tLN:(\d+)/;
27 push @ref,[$sn,$ln]; 27 push @ref,[$sn,$ln];
30 } 30 }
31 31
32 # unmapped 32 # unmapped
33 next if $f[2] eq "*"; 33 next if $f[2] eq "*";
34 34
35 # alignments 35 my $mm=200;
36 if($f[11]=~/XT:A:/) 36 my $xa="";
37 for my $q (11..$#f)
37 { 38 {
38 my ($rnum)=$f[1]=~/(\d)$/; 39 if($f[$q]=~/NM:/)
39 # CIGAR 40 {
40 my (@cigar_m)=$f[5]=~/(\d+)M/g; 41 $mm=$f[$q];
41 my (@cigar_d)=$f[5]=~/(\d+)D/g; 42 $mm =~ s/NM://;
42 my (@cigar_s)=$f[5]=~/(\d+)S/g; 43 }
43 my (@cigar_i)=$f[5]=~/(\d+)I/g;
44 my $aln_ln=sum(@cigar_m,@cigar_d);
45
46 my $strand="+";
47 if($f[1]=~/r/)
48 {
49 my $seq=Bio::Seq->new(-seq=>$f[9]);
50 $f[9]=$seq->revcom->seq;
51 $strand="-";
52 }
53 44
54 # align to the junctions 45 if($f[$q]=~/XA:Z:/)
55 if(($f[3]+$aln_ln-1)>${$ref[$ref{$f[2]}]}[1]) 46 {
56 { 47 ($xa)=$f[$q]=~/XA:Z:(.*);$/;
57 if(($f[3]+($aln_ln-1)/2)>${$ref[$ref{$f[2]}]}[1]) 48 last;
58 { 49 }
59 $f[2]=${$ref[$ref{$f[2]}+1]}[0]; 50 }
60 $f[3]=1;
61 $aln_ln=$aln_ln-(${$ref[$ref{$f[2]}]}[1]-$f[3]+1);
62 }
63 else
64 {
65 $aln_ln=${$ref[$ref{$f[2]}]}[1]-$f[3]+1;
66 }
67 }
68 51
69 $pe{$f[0]}{$rnum}=$f[2].",".$strand."$f[3]".";"; 52 if ($mm > 5) {next;}
70 53
71 # XA tag 54 my ($rnum)=$f[1]=~/(\d)$/;
72 if($f[11]=~/XA:Z:/) 55 # CIGAR
73 { 56 my (@cigar_m)=$f[5]=~/(\d+)M/g;
74 my ($xa)=$f[11]=~/XA:Z:(.*);$/; 57 my (@cigar_d)=$f[5]=~/(\d+)D/g;
75 my @xa=split(";",$xa); 58 my (@cigar_s)=$f[5]=~/(\d+)S/g;
76 $pe{$f[0]}{$rnum}.=join(",",(split/,/)[0,1]).";" foreach @xa; 59 my (@cigar_i)=$f[5]=~/(\d+)I/g;
77 } 60 my $aln_ln=sum(@cigar_m,@cigar_d);
61
62 my $strand="+";
63 if($f[1]=~/r/)
64 {
65 my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna');
66 $f[9]=$seq->revcom->seq;
67 $strand="-";
78 } 68 }
69
70 # align to the junctions
71 if(($f[3]+$aln_ln-1)>${$ref[$ref{$f[2]}]}[1])
72 {
73 if(($f[3]+($aln_ln-1)/2)>${$ref[$ref{$f[2]}]}[1])
74 {
75 $f[2]=${$ref[$ref{$f[2]}+1]}[0];
76 $f[3]=1;
77 $aln_ln=$aln_ln-(${$ref[$ref{$f[2]}]}[1]-$f[3]+1);
78 }
79 else
80 {
81 $aln_ln=${$ref[$ref{$f[2]}]}[1]-$f[3]+1;
82 }
83 }
84
85 $pe{$f[0]}{$rnum}=$f[2].",".$strand."$f[3]".";";
86
87 # XA tag
88 my @xa=split(";",$xa);
89 $pe{$f[0]}{$rnum}.=join(",",(split/,/)[0,1]).";" foreach @xa;
90
79 } 91 }
80 close in; 92 close in;
81 93
82 foreach my $id (keys %pe) 94 foreach my $id (keys %pe)
83 { 95 {