view scripts/pickUniqPos_MEM.pl @ 14:bc39ae53be03 draft

planemo upload for repository https://github.com/portiahollyoak/Tools commit bcb1f256bca5591aa3df390a302c19d52fba14c2
author portiahollyoak
date Mon, 23 May 2016 06:53:10 -0400
parents ca36262102d8
children 9672fe07a232
line wrap: on
line source

#!/share/bin/perl
use Bio::Seq;
use List::Util qw(sum);

die "perl $0 <sam>\n" if @ARGV<1;
open in,$ARGV[0];
my %pe;
while(<in>)
{
	chomp;
	my @f=split/\t/,$_,12;
	## read number 1 or 2
	my ($rnum)=$f[1]=~/(\d)$/;

	## XT:A:* 
        my $xt="";
        my @a=split(/\s+/, $_);
        my $as=0;
        my $xs=0;
        for my $i (11..$#a) {
            if ($a[$i] =~ /^AS:i:/) {
                $a[$i] =~ s/AS:i://;
                $as=$a[$i];
            }
            elsif ($a[$i] =~ /^XS:i:/) {
                $a[$i] =~ s/XS:i://;
                $xs=$a[$i];
            }
            if (($xs > 0) && ($as-$xs <= $ARGV[1])) {$xt="R";}
            elsif ($as > 0) {$xt="U";}
        }

	my $strand="+";
	## revcomp
	if($f[1]=~/r/)
        {
                my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna');
                $f[9]=$seq->revcom->seq;
		$strand="-";
        }

	## parse CIGAR
	if($xt eq "U")
        {
                # CIGAR
                my (@cigar_m)=$f[5]=~/(\d+)M/g;
                my (@cigar_d)=$f[5]=~/(\d+)D/g;
                my (@cigar_s)=$f[5]=~/(\d+)S/g;
                my (@cigar_i)=$f[5]=~/(\d+)I/g;
                my $aln_ln=sum(@cigar_m,@cigar_d);
		
		print $f[2],"\t",$f[3]-1,"\t",$f[3]-1+$aln_ln,"\t$f[0]/$rnum\t",$f[9],"\t",$strand,"\n";
	}
}
close in;