Mercurial > repos > portiahollyoak > temp
comparison scripts/pickUniqMate.pl @ 12:ca36262102d8 draft
planemo upload for repository https://github.com/portiahollyoak/Tools commit 5d021f520b653582862ec98dd812a051b804aa50
author | portiahollyoak |
---|---|
date | Fri, 29 Apr 2016 05:47:54 -0400 |
parents | 28d1a6f8143f |
children | 9672fe07a232 |
comparison
equal
deleted
inserted
replaced
11:e19d9742c99b | 12:ca36262102d8 |
---|---|
17 open in,$ARGV[0]; | 17 open in,$ARGV[0]; |
18 my (%te,@ref,%ref); | 18 my (%te,@ref,%ref); |
19 while(<in>) | 19 while(<in>) |
20 { | 20 { |
21 chomp; | 21 chomp; |
22 my @f=split/\t/,$_,12; | 22 my @f=split/\s+/,$_; |
23 # headers | 23 # headers |
24 if(/^\@SQ/) | 24 if(/^\@SQ/) |
25 { | 25 { |
26 my ($sn,$ln)=/SN:(.*?)\tLN:(\d+)/; | 26 my ($sn,$ln)=/SN:(.*?)\tLN:(\d+)/; |
27 push @ref,[$sn,$ln]; | 27 push @ref,[$sn,$ln]; |
30 } | 30 } |
31 | 31 |
32 # unmapped | 32 # unmapped |
33 next if $f[2] eq "*"; | 33 next if $f[2] eq "*"; |
34 | 34 |
35 # alignments | 35 my $mm=200; |
36 if($f[11]=~/XT:A:/) | 36 my $xa=""; |
37 for my $q (11..$#f) | |
37 { | 38 { |
38 my ($rnum)=$f[1]=~/(\d)$/; | 39 if($f[$q]=~/NM:/) |
39 # CIGAR | 40 { |
40 my (@cigar_m)=$f[5]=~/(\d+)M/g; | 41 $mm=$f[$q]; |
41 my (@cigar_d)=$f[5]=~/(\d+)D/g; | 42 $mm =~ s/NM://; |
42 my (@cigar_s)=$f[5]=~/(\d+)S/g; | 43 } |
43 my (@cigar_i)=$f[5]=~/(\d+)I/g; | |
44 my $aln_ln=sum(@cigar_m,@cigar_d); | |
45 | |
46 my $strand="+"; | |
47 if($f[1]=~/r/) | |
48 { | |
49 my $seq=Bio::Seq->new(-seq=>$f[9]); | |
50 $f[9]=$seq->revcom->seq; | |
51 $strand="-"; | |
52 } | |
53 | 44 |
54 # align to the junctions | 45 if($f[$q]=~/XA:Z:/) |
55 if(($f[3]+$aln_ln-1)>${$ref[$ref{$f[2]}]}[1]) | 46 { |
56 { | 47 ($xa)=$f[$q]=~/XA:Z:(.*);$/; |
57 if(($f[3]+($aln_ln-1)/2)>${$ref[$ref{$f[2]}]}[1]) | 48 last; |
58 { | 49 } |
59 $f[2]=${$ref[$ref{$f[2]}+1]}[0]; | 50 } |
60 $f[3]=1; | |
61 $aln_ln=$aln_ln-(${$ref[$ref{$f[2]}]}[1]-$f[3]+1); | |
62 } | |
63 else | |
64 { | |
65 $aln_ln=${$ref[$ref{$f[2]}]}[1]-$f[3]+1; | |
66 } | |
67 } | |
68 | 51 |
69 $pe{$f[0]}{$rnum}=$f[2].",".$strand."$f[3]".";"; | 52 if ($mm > 5) {next;} |
70 | 53 |
71 # XA tag | 54 my ($rnum)=$f[1]=~/(\d)$/; |
72 if($f[11]=~/XA:Z:/) | 55 # CIGAR |
73 { | 56 my (@cigar_m)=$f[5]=~/(\d+)M/g; |
74 my ($xa)=$f[11]=~/XA:Z:(.*);$/; | 57 my (@cigar_d)=$f[5]=~/(\d+)D/g; |
75 my @xa=split(";",$xa); | 58 my (@cigar_s)=$f[5]=~/(\d+)S/g; |
76 $pe{$f[0]}{$rnum}.=join(",",(split/,/)[0,1]).";" foreach @xa; | 59 my (@cigar_i)=$f[5]=~/(\d+)I/g; |
77 } | 60 my $aln_ln=sum(@cigar_m,@cigar_d); |
61 | |
62 my $strand="+"; | |
63 if($f[1]=~/r/) | |
64 { | |
65 my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna'); | |
66 $f[9]=$seq->revcom->seq; | |
67 $strand="-"; | |
78 } | 68 } |
69 | |
70 # align to the junctions | |
71 if(($f[3]+$aln_ln-1)>${$ref[$ref{$f[2]}]}[1]) | |
72 { | |
73 if(($f[3]+($aln_ln-1)/2)>${$ref[$ref{$f[2]}]}[1]) | |
74 { | |
75 $f[2]=${$ref[$ref{$f[2]}+1]}[0]; | |
76 $f[3]=1; | |
77 $aln_ln=$aln_ln-(${$ref[$ref{$f[2]}]}[1]-$f[3]+1); | |
78 } | |
79 else | |
80 { | |
81 $aln_ln=${$ref[$ref{$f[2]}]}[1]-$f[3]+1; | |
82 } | |
83 } | |
84 | |
85 $pe{$f[0]}{$rnum}=$f[2].",".$strand."$f[3]".";"; | |
86 | |
87 # XA tag | |
88 my @xa=split(";",$xa); | |
89 $pe{$f[0]}{$rnum}.=join(",",(split/,/)[0,1]).";" foreach @xa; | |
90 | |
79 } | 91 } |
80 close in; | 92 close in; |
81 | 93 |
82 foreach my $id (keys %pe) | 94 foreach my $id (keys %pe) |
83 { | 95 { |