Mercurial > repos > cpt > cpt_psm_prep
comparison lib/CPT/Bio/NW_MSA.pm @ 1:d724f34e671d draft default tip
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
| author | cpt |
|---|---|
| date | Mon, 05 Jun 2023 02:50:07 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:e4de0a0e90c8 | 1:d724f34e671d |
|---|---|
| 1 package CPT::Bio::NW_MSA; | |
| 2 use Moose; | |
| 3 use strict; | |
| 4 use warnings; | |
| 5 use autodie; | |
| 6 use List::Util qw(max); | |
| 7 | |
| 8 has 'sequences' => ( is => 'rw', isa => 'ArrayRef'); | |
| 9 # If true, relationships are assumed to be bidirectional | |
| 10 has 'bidi' => (is => 'rw', isa => 'Bool'); | |
| 11 has 'relationships' => ( is => 'rw', isa => 'HashRef', default => sub { {} }); | |
| 12 # | |
| 13 has 'verbose' => ( is => 'rw', isa => 'Num'); | |
| 14 # | |
| 15 has 'gap_penalty' => ( is => 'rw', isa => 'Num'); | |
| 16 has 'match_score' => ( is => 'rw', isa => 'Num'); | |
| 17 has 'mismatch_score' => ( is => 'rw', isa => 'Num'); | |
| 18 | |
| 19 # local stuff | |
| 20 #has 'current_list' => ( is => 'rw', isa => 'ArrayRef'); | |
| 21 has 'merger' => (is => 'rw', isa => 'HashRef'); | |
| 22 has 'number_of_aligned_lists' => ( is => 'rw', isa => 'Num', default => sub { 0 }); | |
| 23 | |
| 24 sub add_relationship { | |
| 25 my ($self, $from, $to) = @_; | |
| 26 ${$self->relationships()}{$from}{$to} = 1; | |
| 27 if($self->bidi()){ | |
| 28 ${$self->relationships()}{$to}{$from} = 1; | |
| 29 } | |
| 30 } | |
| 31 | |
| 32 sub Sij { | |
| 33 my($self, $merger_row, $query) = @_; | |
| 34 # Comparing a query against a merger row | |
| 35 my @check_against = @{$merger_row}; | |
| 36 #print "Checking " . join(",",@check_against) .":$query\t"; | |
| 37 foreach(@check_against){ | |
| 38 if(${$self->relationships()}{$query}{$_} | |
| 39 || ${$self->relationships()}{$_}{$query}){ | |
| 40 #print $self->match_score() . "\n"; | |
| 41 return $self->match_score(); | |
| 42 } | |
| 43 } | |
| 44 #print $self->mismatch_score() . "\n"; | |
| 45 return $self->mismatch_score(); | |
| 46 } | |
| 47 | |
| 48 sub align_list { | |
| 49 my ($self, $list_ref) = @_; | |
| 50 # If we haven't aligned any lists, we do something special | |
| 51 if($self->number_of_aligned_lists() == 0){ | |
| 52 # Pretend we've aligned ONE list already | |
| 53 $self->number_of_aligned_lists(1); | |
| 54 my @list = @{$list_ref}; | |
| 55 # Fake the merger | |
| 56 my %merger; | |
| 57 for(my $i = 0; $i < scalar @list; $i++){ | |
| 58 $merger{$i} = [$list[$i]]; | |
| 59 } | |
| 60 $self->merger(\%merger); | |
| 61 }else{ | |
| 62 $self->find_best_path($self->merger(), $list_ref); | |
| 63 $self->number_of_aligned_lists($self->number_of_aligned_lists() + 1); | |
| 64 } | |
| 65 } | |
| 66 | |
| 67 sub find_best_path { | |
| 68 my ($self, $merger_ref, $list_ref) = @_; | |
| 69 my %merger = %{$merger_ref}; | |
| 70 my @list = @{$list_ref}; | |
| 71 | |
| 72 my $max_i = scalar(keys(%merger)); | |
| 73 my $max_j = scalar(@list); | |
| 74 | |
| 75 my %score_mat; | |
| 76 my %point_mat; | |
| 77 | |
| 78 # Initial zeros for matrices | |
| 79 $point_mat{0}{0} = 'DONE'; | |
| 80 $score_mat{0}{0} = 0; | |
| 81 | |
| 82 for(my $a = 1; $a <= $max_i; $a++){ | |
| 83 $point_mat{$a}{0} = 'U'; | |
| 84 $score_mat{$a}{0} = $self->gap_penalty(); | |
| 85 } | |
| 86 for(my $b = 1; $b <= $max_j; $b++){ | |
| 87 $point_mat{0}{$b} = 'L'; | |
| 88 $score_mat{0}{$b} = $self->gap_penalty(); | |
| 89 } | |
| 90 | |
| 91 # Score | |
| 92 for(my $i = 1 ; $i <= $max_i; $i++){ | |
| 93 my $ci = $merger{$i-1}; | |
| 94 for(my $j = 1; $j <= $max_j; $j++){ | |
| 95 my $cj = $list[$j-1]; | |
| 96 # Scoring | |
| 97 my $diag_score = $score_mat{$i-1}{$j-1} + $self->Sij($ci,$cj); | |
| 98 my $up_score = $score_mat{$i-1}{$j} + $self->gap_penalty(); | |
| 99 my $left_score = $score_mat{$i}{$j-1} + $self->gap_penalty(); | |
| 100 | |
| 101 if($diag_score >= $up_score){ | |
| 102 if($diag_score >= $left_score){ | |
| 103 $score_mat{$i}{$j} = $diag_score; | |
| 104 $point_mat{$i}{$j} = 'D'; | |
| 105 }else{ | |
| 106 $score_mat{$i}{$j} = $left_score; | |
| 107 $point_mat{$i}{$j} = 'L'; | |
| 108 } | |
| 109 }else{ | |
| 110 if($up_score >= $left_score){ | |
| 111 $score_mat{$i}{$j} = $up_score; | |
| 112 $point_mat{$i}{$j} = 'U'; | |
| 113 }else{ | |
| 114 $score_mat{$i}{$j} = $left_score; | |
| 115 $point_mat{$i}{$j} = 'L'; | |
| 116 } | |
| 117 } | |
| 118 } | |
| 119 } | |
| 120 | |
| 121 $self->print2DArray('score_mat', \%score_mat); | |
| 122 $self->print2DArray('point_mat', \%point_mat); | |
| 123 | |
| 124 | |
| 125 # Calculate merger | |
| 126 my @new_row_set; | |
| 127 my $i = $max_i + 0; | |
| 128 my $j = $max_j + 0; | |
| 129 while($i != 0 || $j != 0){ | |
| 130 my $dir = $point_mat{$i}{$j}; | |
| 131 my @new_row; | |
| 132 if($dir eq 'D'){ | |
| 133 push(@new_row, @{$merger{$i-1}}, $list[$j-1]); | |
| 134 $i--; | |
| 135 $j--; | |
| 136 }elsif($dir eq 'L'){ | |
| 137 push(@new_row, split(//, '-' x ($self->number_of_aligned_lists()))); | |
| 138 push(@new_row, $list[$j-1]); | |
| 139 $j--; | |
| 140 }elsif($dir eq 'U'){ | |
| 141 push(@new_row, @{$merger{$i-1}}, '-'); | |
| 142 $i--; | |
| 143 } | |
| 144 if($self->verbose()){ | |
| 145 print join("\t", $i, $j, $dir, @new_row),"\n"; | |
| 146 } | |
| 147 push(@new_row_set,\@new_row); | |
| 148 } | |
| 149 | |
| 150 my %new_merger; | |
| 151 for(my $i = 0; $i < scalar(@new_row_set); $i++){ | |
| 152 $new_merger{$i} = $new_row_set[scalar(@new_row_set) - $i - 1]; | |
| 153 } | |
| 154 $self->merger(\%new_merger); | |
| 155 } | |
| 156 | |
| 157 sub merged_array{ | |
| 158 my ($self) = @_; | |
| 159 my %m = %{$self->merger()}; | |
| 160 my @result; | |
| 161 foreach(sort{$a<=>$b} keys(%m)){ | |
| 162 push(@result, $m{$_}); | |
| 163 } | |
| 164 return @result; | |
| 165 } | |
| 166 | |
| 167 | |
| 168 sub print2DArray{ | |
| 169 my($self,$name, $ref) = @_; | |
| 170 if($self->verbose && defined $ref){ | |
| 171 print '*' x 32,"\n"; | |
| 172 print $name,"\n"; | |
| 173 if(ref $ref eq 'ARRAY'){ | |
| 174 foreach(@{$ref}){ | |
| 175 print join("\t",@{$_}),"\n"; | |
| 176 } | |
| 177 }elsif(ref $ref eq 'HASH'){ | |
| 178 my %h = %{$ref}; | |
| 179 foreach my $a(sort(keys(%h))){ | |
| 180 if(ref $h{$a} eq 'ARRAY'){ | |
| 181 print join("", map { sprintf "%-4s",$_ } @{$h{$a}}); | |
| 182 }else{ | |
| 183 foreach my $b(sort(keys($h{$a}))){ | |
| 184 if(defined($h{$a}{$b})){ | |
| 185 printf "%5s", $h{$a}{$b}; | |
| 186 } | |
| 187 } | |
| 188 } | |
| 189 print "\n"; | |
| 190 } | |
| 191 }else{ | |
| 192 die 'Unsupported'; | |
| 193 } | |
| 194 print '*' x 32,"\n"; | |
| 195 } | |
| 196 } | |
| 197 | |
| 198 | |
| 199 no Moose; | |
| 200 1; | |
| 201 | |
| 202 __END__ | |
| 203 | |
| 204 =pod | |
| 205 | |
| 206 =encoding UTF-8 | |
| 207 | |
| 208 =head1 NAME | |
| 209 | |
| 210 CPT::Bio::NW_MSA | |
| 211 | |
| 212 =head1 VERSION | |
| 213 | |
| 214 version 1.99.4 | |
| 215 | |
| 216 =head1 AUTHOR | |
| 217 | |
| 218 Eric Rasche <rasche.eric@yandex.ru> | |
| 219 | |
| 220 =head1 COPYRIGHT AND LICENSE | |
| 221 | |
| 222 This software is Copyright (c) 2014 by Eric Rasche. | |
| 223 | |
| 224 This is free software, licensed under: | |
| 225 | |
| 226 The GNU General Public License, Version 3, June 2007 | |
| 227 | |
| 228 =cut |
