Mercurial > repos > matteoc > agame_custom_tools
view annotate_ends/attach.tags.pl @ 0:68a3648c7d91 draft default tip
Uploaded
author | matteoc |
---|---|
date | Thu, 22 Dec 2016 04:45:31 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl -w use strict; my $fos_file=shift; my $fosS=read_fa($fos_file); my $cont_file=shift; my $contS=read_fa($cont_file); my $bfile=shift; my %best=(); my $sim_coff=shift; my $len_coff=shift; my $out_name=shift; open(OUT,">$out_name"); my $out_table=shift; open(TABLE,">$out_table"); open(IN,$bfile); while(<IN>) { next if $_=~/\#/; my ($in,$node,$ident,$alnL,$rs,$re,$score)=(split(/\s+/))[0,1,2,3,8,9,11]; next unless $alnL>=$len_coff; next unless $ident>=$sim_coff; unless ($best{$in}) { if ($re<$rs) { my $tm=$re; $re=$rs; $rs=$tm } $best{$in}=[$node,$score,$rs,$re,$alnL]; print TABLE "$in $node $rs $re\n"; }else{ next unless $score> $best{$in}[1]; $best{$in}=[$node,$score,$rs,$re,$alnL]; } } my %addT=(); foreach my $best (keys %best) { my $node=$best{$best}[0]; my $rs=$best{$best}[2]; my $re=$best{$best}[3]; my $alnL=$best{$best}[4]; my $relL=$alnL/length($fosS->{$best}); my $lseq=length($contS->{$node}); my $a=$lseq-$re; print TABLE "Add $node $best $rs $a $relL\n"; if (($rs<=1500 || ($a)<=1500)) #&& $relL>0.35) { #print TABLE "Add $node $best $rs $a $relL\n"; push(@{$addT{$node}},$best); }else{ #print TABLE "Discard $node $best $rs $a $relL\n"; } } my $unF=0; foreach my $seq (sort keys %$contS) { my $tag=""; unless ($addT{$seq}) { $unF++; $tag="unf$unF"; }else{ my @adds=@{$addT{$seq}}; if ($#adds==1) { my $t=$adds[0]; $t=(split(/\_/,$t))[0]; $tag.=$t."_FR"; }else{ foreach my $t (@{$addT{$seq}}) { $tag.="$t"; } } } my $SEQ=form($contS->{$seq},80); print OUT ">$tag\n$SEQ\n"; print TABLE "$seq\t$tag\n"; } sub read_fa { my $file=$_[0]; my $seqF; my $id=""; open(IN,$file); while(<IN>) { chomp; if ($_=~/^>(.*)/) { $id=$1; $id=(split(/\s+/,$id))[0]; }else{ $seqF->{$id}.=$_; } } return $seqF; } sub form { my $string=$_[0]; my $len=$_[1]; my $outS=""; for (my $i=0;$i<=length($string);$i+=$len) { $outS.=substr($string,$i,$len)."\n"; } #print "A:$outS"; # #$outS=~s/\s+//g; return $outS; }