Mercurial > repos > dcouvin > countdifferences
view countDifferences.pl @ 0:a9fb5e4abb2d draft default tip
Uploaded
author | dcouvin |
---|---|
date | Mon, 20 Sep 2021 21:51:24 +0000 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl -w use strict; use warnings; #use DateTime; #use Date::Calc qw(:all); use Bio::SeqIO; #use String::Similarity; #use StringOO::Similarity; #use lib qw(./StringOO); #use Similarity::similarity; #use local::lib "$FindBin::Bin/.."; ### points to ~/mydir1 and local::lib finds lib #use lib "$FindBin::StringOO/../lib"; #package Similarity.pm; #use String::Approx 'adistr'; #package String::Similarity; #package String::Similarity qw(similarity); #use String::Similarity; ################################################################## ## Perl program to count variants from aligned multi-FASTA file ################################################################## my $file = $ARGV[0]; my %hSeq = (); my @arrayID = (); my $resultFile = $ARGV[1]; #"resultVariants.xls"; my $resultSimilarity = $ARGV[2]; #"resultSimilarity.xls"; my $chain = ""; my $seqIO = Bio::SeqIO->new(-format=>'Fasta', -file=>$file); while (my $seq = $seqIO->next_seq()) { my $seqID = $seq->id; my $seqNuc = $seq->seq; push @arrayID, $seqID; $hSeq{$seqID} = $seqNuc; #my @seqArray = split //, $seqNuc; } open(RES, ">", $resultFile) or die "error open file $!:"; open(SIM, ">", $resultSimilarity) or die "error open file $!:"; print RES "\t"; print SIM "\t"; foreach my $id (@arrayID) { print RES "$id\t"; print SIM "$id\t"; } print RES "\n"; print SIM "\n"; for(my $i = 0; $i <= $#arrayID; $i++){ print RES "$arrayID[$i]\t"; print SIM "$arrayID[$i]\t"; for(my $j = 0; $j <= $#arrayID; $j++){ #for(my $j = $i+1; $j <= $#arrayID; $j++){ my $snpVariants = compare($hSeq{$arrayID[$i]}, $hSeq{$arrayID[$j]}); #my $similarity = adistr($hSeq{$arrayID[$i]}, $hSeq{$arrayID[$j]}); my $similarity = ident($hSeq{$arrayID[$i]}, $hSeq{$arrayID[$j]}); # replace similarity by ident my $roundSim = sprintf('%.3f', $similarity); print RES "$snpVariants\t"; print SIM "$roundSim\t"; #print RES ("SNPs between $arrayID[$i] and $arrayID[$j] = $snpVariants\t"); } print RES "\n"; print SIM "\n"; } close(RES) or die "error when closing file $!:"; close(SIM) or die "error when closing file $!:"; #foreach my $id (@arrayID) { # my @seqArray = split //, $hSeq{$id}; # #} #------------------------------------------------------------------------------ # compare two aligned sequences sub compare { my ($seq1,$seq2) = @_; #$seq1 = uc $seq1; #$seq2 = uc $seq2; my @seqArray1 = split //, $seq1; my @seqArray2 = split //, $seq2; my $variants = 0; for(my $i = 0; $i <= $#seqArray1; $i++){ if( $seqArray1[$i] ne $seqArray2[$i] ){ $variants++; } } return $variants; } # calculate percent identity between two aligned sequences sub ident { my ($seq1,$seq2) = @_; #$seq1 = uc $seq1; #$seq2 = uc $seq2; my @seqArray1 = split //, $seq1; my @seqArray2 = split //, $seq2; my $sim = 0; for(my $i = 0; $i <= $#seqArray1; $i++){ if( $seqArray1[$i] eq $seqArray2[$i] ){ $sim++; } } return ($sim / ($#seqArray1 + 1)); } #------------------------------------------------------------------------------