view annotate_ends/attach.tags.pl @ 0:68a3648c7d91 draft default tip

Uploaded
author matteoc
date Thu, 22 Dec 2016 04:45:31 -0500
parents
children
line wrap: on
line source

#!/usr/bin/perl -w
use strict;

my $fos_file=shift;
my $fosS=read_fa($fos_file);

my $cont_file=shift;
my $contS=read_fa($cont_file);

my $bfile=shift;
my %best=();

my $sim_coff=shift;
my $len_coff=shift;

my $out_name=shift;
open(OUT,">$out_name");

my $out_table=shift;
open(TABLE,">$out_table");

open(IN,$bfile);
while(<IN>)
{
	next if $_=~/\#/;
	my ($in,$node,$ident,$alnL,$rs,$re,$score)=(split(/\s+/))[0,1,2,3,8,9,11];
	next unless $alnL>=$len_coff;
	next unless $ident>=$sim_coff;
	unless ($best{$in})
	{
		if ($re<$rs)
		{
			my $tm=$re;
			$re=$rs;
			$rs=$tm
		}
		$best{$in}=[$node,$score,$rs,$re,$alnL];
		print TABLE "$in $node $rs $re\n";
	}else{
		next unless $score> $best{$in}[1];
		$best{$in}=[$node,$score,$rs,$re,$alnL];
	}
	
}

my %addT=();

foreach my $best (keys %best)
{
	my $node=$best{$best}[0];
	my $rs=$best{$best}[2];
	my $re=$best{$best}[3];
	my $alnL=$best{$best}[4];
	my $relL=$alnL/length($fosS->{$best});
	my $lseq=length($contS->{$node});
	my $a=$lseq-$re;
	print TABLE "Add $node $best $rs $a $relL\n";
	if (($rs<=1500 || ($a)<=1500)) #&& $relL>0.35)
	{
		#print TABLE "Add $node $best $rs $a $relL\n";
		push(@{$addT{$node}},$best);
	}else{
		#print TABLE "Discard $node $best $rs $a $relL\n";
	}	
	
}
my $unF=0;
foreach my $seq (sort keys %$contS)
{
	my $tag="";
	unless ($addT{$seq})
	{
		$unF++;
		$tag="unf$unF";
	}else{
		my @adds=@{$addT{$seq}};
		if ($#adds==1)
		{	
			my $t=$adds[0];
			$t=(split(/\_/,$t))[0];
			$tag.=$t."_FR";
		}else{
			foreach my $t (@{$addT{$seq}})
                        {
                                $tag.="$t";
                        }
	
		}
	}
	my $SEQ=form($contS->{$seq},80);
	print OUT ">$tag\n$SEQ\n";
	print TABLE "$seq\t$tag\n";
}



sub read_fa
{
	my $file=$_[0];
	my $seqF;
	my $id="";
	open(IN,$file);
	while(<IN>)
	{
		chomp;
		if ($_=~/^>(.*)/)
		{
			$id=$1;
			$id=(split(/\s+/,$id))[0];
		}else{
			$seqF->{$id}.=$_;
		}
	}
	return $seqF;
}

sub form
{
        my $string=$_[0];
        my $len=$_[1];
        my $outS="";
        for (my $i=0;$i<=length($string);$i+=$len)
        {
                $outS.=substr($string,$i,$len)."\n";
        }
        #print "A:$outS";
        #        #$outS=~s/\s+//g;
        return $outS;
}