Mercurial > repos > matteoc > agame_custom_tools
view fosm_cluster/compute.stats.pl @ 0:68a3648c7d91 draft default tip
Uploaded
author | matteoc |
---|---|
date | Thu, 22 Dec 2016 04:45:31 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl -w $file=shift; $ncluster=shift; $ofile=shift; $fasfile=shift; $flist="4mers.list"; open(IN,$flist); while(<IN>) { chomp; push (@let,$_); } close(IN); open(OUT,">$file.stats"); open(IN,$file); %c=(); $name=(split(/\./,$file))[0]; while(<IN>) { chomp; if ($_=~/^>(.*)/) { $id=$1; $M{$id}=$name; }else{ $c{$id}.=$_; } } close(IN); foreach $s (keys %c) { ($len,$cov)=(split(/\_/,$s))[3,5]; $seq=$c{$s}; $at=0; $gc=0; $le=length($seq); $tt=0; %DD=(); @seq=(split('',$c{$s})); foreach $l (@seq) { if ($l eq "A" || $l eq "T") { $at++; }else{ $gc++ } } for ($i=0;$i<=length($seq)-4;$i++) { $subs=substr($seq,$i,4); $rc=reverse($subs); $rc=~tr/ACGT/TGCA/; $tt+=2; $DD{$subs}++; $DD{$rc}++; } $gc=$gc/$le; next unless $len > 1000; print OUT "$s\t$gc\t$cov\t"; foreach $L (sort @let) { $val=$DD{$L} ? $DD{$L}/$tt : 0; print OUT "$val\t"; } print OUT "$M{$s}\n"; } system("./kmeans.R $file.stats $ncluster $ofile ")==0||die("no kmeans"); open(OF,$ofile); $l=<OF>; open(FAS,">$fasfile"); while(<OF>) { ($id,$cluster)=(split())[0,1]; $NC{$cluster}++; $id=~s/\"//g; print FAS ">$cluster\_$NC{$cluster}\#$id\n$c{$id}\n"; }