Mercurial > repos > dcouvin > countdifferences
comparison countDifferences.pl @ 0:a9fb5e4abb2d draft default tip
Uploaded
| author | dcouvin |
|---|---|
| date | Mon, 20 Sep 2021 21:51:24 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a9fb5e4abb2d |
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 use strict; | |
| 3 use warnings; | |
| 4 #use DateTime; | |
| 5 #use Date::Calc qw(:all); | |
| 6 use Bio::SeqIO; | |
| 7 #use String::Similarity; | |
| 8 #use StringOO::Similarity; | |
| 9 #use lib qw(./StringOO); | |
| 10 #use Similarity::similarity; | |
| 11 | |
| 12 #use local::lib "$FindBin::Bin/.."; ### points to ~/mydir1 and local::lib finds lib | |
| 13 #use lib "$FindBin::StringOO/../lib"; | |
| 14 #package Similarity.pm; | |
| 15 #use String::Approx 'adistr'; | |
| 16 #package String::Similarity; | |
| 17 #package String::Similarity qw(similarity); | |
| 18 #use String::Similarity; | |
| 19 | |
| 20 ################################################################## | |
| 21 ## Perl program to count variants from aligned multi-FASTA file | |
| 22 ################################################################## | |
| 23 | |
| 24 my $file = $ARGV[0]; | |
| 25 my %hSeq = (); | |
| 26 my @arrayID = (); | |
| 27 my $resultFile = $ARGV[1]; #"resultVariants.xls"; | |
| 28 my $resultSimilarity = $ARGV[2]; #"resultSimilarity.xls"; | |
| 29 my $chain = ""; | |
| 30 | |
| 31 my $seqIO = Bio::SeqIO->new(-format=>'Fasta', -file=>$file); | |
| 32 | |
| 33 while (my $seq = $seqIO->next_seq()) { | |
| 34 my $seqID = $seq->id; | |
| 35 my $seqNuc = $seq->seq; | |
| 36 push @arrayID, $seqID; | |
| 37 $hSeq{$seqID} = $seqNuc; | |
| 38 #my @seqArray = split //, $seqNuc; | |
| 39 } | |
| 40 | |
| 41 open(RES, ">", $resultFile) or die "error open file $!:"; | |
| 42 open(SIM, ">", $resultSimilarity) or die "error open file $!:"; | |
| 43 | |
| 44 print RES "\t"; | |
| 45 print SIM "\t"; | |
| 46 foreach my $id (@arrayID) { | |
| 47 print RES "$id\t"; | |
| 48 print SIM "$id\t"; | |
| 49 } | |
| 50 print RES "\n"; | |
| 51 print SIM "\n"; | |
| 52 | |
| 53 for(my $i = 0; $i <= $#arrayID; $i++){ | |
| 54 print RES "$arrayID[$i]\t"; | |
| 55 print SIM "$arrayID[$i]\t"; | |
| 56 for(my $j = 0; $j <= $#arrayID; $j++){ #for(my $j = $i+1; $j <= $#arrayID; $j++){ | |
| 57 my $snpVariants = compare($hSeq{$arrayID[$i]}, $hSeq{$arrayID[$j]}); | |
| 58 #my $similarity = adistr($hSeq{$arrayID[$i]}, $hSeq{$arrayID[$j]}); | |
| 59 my $similarity = ident($hSeq{$arrayID[$i]}, $hSeq{$arrayID[$j]}); # replace similarity by ident | |
| 60 my $roundSim = sprintf('%.3f', $similarity); | |
| 61 print RES "$snpVariants\t"; | |
| 62 print SIM "$roundSim\t"; | |
| 63 #print RES ("SNPs between $arrayID[$i] and $arrayID[$j] = $snpVariants\t"); | |
| 64 } | |
| 65 print RES "\n"; | |
| 66 print SIM "\n"; | |
| 67 } | |
| 68 | |
| 69 close(RES) or die "error when closing file $!:"; | |
| 70 close(SIM) or die "error when closing file $!:"; | |
| 71 | |
| 72 #foreach my $id (@arrayID) { | |
| 73 # my @seqArray = split //, $hSeq{$id}; | |
| 74 # | |
| 75 #} | |
| 76 | |
| 77 | |
| 78 #------------------------------------------------------------------------------ | |
| 79 # compare two aligned sequences | |
| 80 sub compare { | |
| 81 my ($seq1,$seq2) = @_; | |
| 82 #$seq1 = uc $seq1; | |
| 83 #$seq2 = uc $seq2; | |
| 84 my @seqArray1 = split //, $seq1; | |
| 85 my @seqArray2 = split //, $seq2; | |
| 86 my $variants = 0; | |
| 87 | |
| 88 for(my $i = 0; $i <= $#seqArray1; $i++){ | |
| 89 if( $seqArray1[$i] ne $seqArray2[$i] ){ | |
| 90 $variants++; | |
| 91 } | |
| 92 } | |
| 93 | |
| 94 return $variants; | |
| 95 } | |
| 96 | |
| 97 # calculate percent identity between two aligned sequences | |
| 98 sub ident { | |
| 99 my ($seq1,$seq2) = @_; | |
| 100 #$seq1 = uc $seq1; | |
| 101 #$seq2 = uc $seq2; | |
| 102 my @seqArray1 = split //, $seq1; | |
| 103 my @seqArray2 = split //, $seq2; | |
| 104 my $sim = 0; | |
| 105 | |
| 106 for(my $i = 0; $i <= $#seqArray1; $i++){ | |
| 107 if( $seqArray1[$i] eq $seqArray2[$i] ){ | |
| 108 $sim++; | |
| 109 } | |
| 110 } | |
| 111 | |
| 112 return ($sim / ($#seqArray1 + 1)); | |
| 113 } | |
| 114 | |
| 115 #------------------------------------------------------------------------------ |
