Mercurial > repos > portiahollyoak > temp
view scripts/pickUniqMate.pl @ 21:9672fe07a232 draft default tip
planemo upload for repository https://github.com/portiahollyoak/Tools commit 0fea84d05f8976b8360a8b4943ecb01b87e3ade0-dirty
author | mvdbeek |
---|---|
date | Mon, 05 Dec 2016 09:58:47 -0500 |
parents | ca36262102d8 |
children |
line wrap: on
line source
#!/share/bin/perl use List::Util qw(sum); use Bio::Seq; die "perl $0 <mate sam with header> <uniq bed>\n" if @ARGV<1; open in,$ARGV[1]; my %uniq; while(<in>) { chomp; my @f=split; $uniq{$f[3]}=[@f]; } close in; open in,$ARGV[0]; my (%te,@ref,%ref); while(<in>) { chomp; my @f=split/\s+/,$_; # headers if(/^\@SQ/) { my ($sn,$ln)=/SN:(.*?)\tLN:(\d+)/; push @ref,[$sn,$ln]; $ref{$sn}=$#ref; next; } # unmapped next if $f[2] eq "*"; my $mm=200; my $xa=""; for my $q (11..$#f) { if($f[$q]=~/NM:/) { $mm=$f[$q]; $mm =~ s/NM://; } if($f[$q]=~/XA:Z:/) { ($xa)=$f[$q]=~/XA:Z:(.*);$/; last; } } if ($mm > 5) {next;} #my ($rnum)=$f[1]=~/(\d)$/; my $rnum=1; if (($f[1] & 128) == 128) {$rnum=2;} # CIGAR my (@cigar_m)=$f[5]=~/(\d+)M/g; my (@cigar_d)=$f[5]=~/(\d+)D/g; my (@cigar_s)=$f[5]=~/(\d+)S/g; my (@cigar_i)=$f[5]=~/(\d+)I/g; my $aln_ln=sum(@cigar_m,@cigar_d); my $strand="+"; if(($f[1] & 16) == 16) { my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna'); $f[9]=$seq->revcom->seq; $strand="-"; } # align to the junctions if(($f[3]+$aln_ln-1)>${$ref[$ref{$f[2]}]}[1]) { if(($f[3]+($aln_ln-1)/2)>${$ref[$ref{$f[2]}]}[1]) { $f[2]=${$ref[$ref{$f[2]}+1]}[0]; $f[3]=1; $aln_ln=$aln_ln-(${$ref[$ref{$f[2]}]}[1]-$f[3]+1); } else { $aln_ln=${$ref[$ref{$f[2]}]}[1]-$f[3]+1; } } $pe{$f[0]}{$rnum}=$f[2].",".$strand."$f[3]".";"; # XA tag my @xa=split(";",$xa); $pe{$f[0]}{$rnum}.=join(",",(split/,/)[0,1]).";" foreach @xa; } close in; foreach my $id (keys %pe) { next if exists $pe{$id}{1} && exists $pe{$id}{2} && exists $uniq{$id."/1"} && exists $uniq{$id."/2"}; foreach my $rid (keys %{$pe{$id}}) { my $mate_id=($rid==1)?2:1; if(exists $uniq{$id."/".$mate_id}) { ${$uniq{$id."/".$mate_id}}[4]=$pe{$id}{$rid}; print join("\t",@{$uniq{$id."/".$mate_id}}),"\n"; } } }