view fosm_cluster/compute.stats.pl @ 0:68a3648c7d91 draft default tip

Uploaded
author matteoc
date Thu, 22 Dec 2016 04:45:31 -0500
parents
children
line wrap: on
line source

#!/usr/bin/perl -w
$file=shift;
$ncluster=shift;
$ofile=shift;
$fasfile=shift;
$flist="4mers.list";
open(IN,$flist);
while(<IN>)
{
        chomp;
        push (@let,$_);
}
close(IN);
open(OUT,">$file.stats");
open(IN,$file);
%c=();
$name=(split(/\./,$file))[0];
while(<IN>)
{
	chomp;
	if ($_=~/^>(.*)/)
	{
		$id=$1;
		$M{$id}=$name;
	}else{
		$c{$id}.=$_;
	}
}
close(IN);
foreach $s (keys %c)
{
	($len,$cov)=(split(/\_/,$s))[3,5];
	$seq=$c{$s};
        $at=0;
        $gc=0;
        $le=length($seq);
        $tt=0;
        %DD=();
	@seq=(split('',$c{$s}));

        foreach $l (@seq)
        {
                if ($l eq "A" || $l eq "T")
                {
                        $at++;
                }else{
                        $gc++
                }
        }
        for ($i=0;$i<=length($seq)-4;$i++)
        {
                $subs=substr($seq,$i,4);
                $rc=reverse($subs);
                $rc=~tr/ACGT/TGCA/;
                $tt+=2;
                $DD{$subs}++;
                $DD{$rc}++;
        }
        $gc=$gc/$le;
        next unless $len > 1000;
        print OUT "$s\t$gc\t$cov\t";
	foreach $L (sort @let)
        {
                $val=$DD{$L} ? $DD{$L}/$tt : 0;
                print OUT "$val\t";
        }
        print OUT "$M{$s}\n";
}

system("./kmeans.R $file.stats $ncluster $ofile ")==0||die("no kmeans");

open(OF,$ofile);
$l=<OF>;
open(FAS,">$fasfile");
while(<OF>)
{
	($id,$cluster)=(split())[0,1];
	$NC{$cluster}++;
	$id=~s/\"//g;
	print FAS ">$cluster\_$NC{$cluster}\#$id\n$c{$id}\n";		
}