Mercurial > repos > jjv_bioinformaticians > phylogeny_maximum_parsimony_informative_sites
view phylo-protpars.pl @ 4:549887e3f045 draft default tip
Uploading readme.txt
author | jjv_bioinformaticians |
---|---|
date | Wed, 12 Apr 2017 11:39:43 -0400 |
parents | 1cd1d4171142 |
children |
line wrap: on
line source
#!/usr/bin/perl -w use Bio::AlignIO; use Bio::Root::IO; use Bio::Seq; use Bio::SeqIO; use Bio::SimpleAlign; use Bio::TreeIO; use strict; use warnings; use Bio::Tools::Run::Phylo::Phylip::ProtPars; use Bio::Tools::Run::Alignment::Clustalw; use Bio::Tools::Run::Phylo::Phylip::DrawTree; ## Links # https://metacpan.org/pod/Bio::AlignIO # https://metacpan.org/pod/Bio::SimpleAlign # https://metacpan.org/pod/Bio::Seq ## $ENV{PHYLIPDIR} = '/usr/lib/phylip/bin/'; $ENV{CLUSTALWDIR} = '/usr/local/bin/'; my $inputfilename = ''; my $outputfilename = 'sequences'; my $outHandler; my $phylogeny = 0; my $nValues = 0; my $nFrequency = 0; sub preprocess { my ($file, $nValues, $nFrequency) = @_; my $in = Bio::AlignIO->new(-file => $file , -format => 'phylip'); my $aln = $in->next_aln(); # Get the first alignment # Getting header my $numSeq = $aln->num_sequences; # 5 seqs my $length = $aln->length(); # 376 bases aligning with gaps # Header end my @sequences; while ( $aln->num_sequences ) { # Number of sequences in this aligment. # Extract sequences and check values for the alignment column $pos #my @secuencias=(); foreach my $seq ($aln->each_seq) { my $sequence = $seq->seq();# my @arraySequence = $sequence =~ /./sg; # String to array push(@sequences,\@arraySequence); # Matrix of sequences } # Creates hash that contains the repetitions of each aa in seq my @toDelete = (); for (my $j=0; $j<$length;$j++) { my %amino = (); # AA hash for (my $i=0; $i<$numSeq;$i++) { my $char = $sequences[$i][$j]; if ($char ne "-") { if (exists $amino{$sequences[$i][$j]}) { $amino{$sequences[$i][$j]} += 1; } else { $amino{$sequences[$i][$j]} = 1; } } } # End for i ($numSeq) my $count = 0; foreach my $key (keys %amino) { # $key = AA name my $aminoCount = $amino{$key}; # Number of repeats of current amino acid. if ($aminoCount >= $nFrequency) { $count++; } } if (!($count >= $nValues)) { # Delete column push(@toDelete, $j); } } # End for j ($length) my $new_aln = $aln; # Remove the columns included in "toDelete" hash for (my $i = ((scalar(@toDelete)-1)); $i >= 0 ; $i--) { $new_aln = $new_aln->remove_columns([$toDelete[$i],$toDelete[$i]]); } $outHandler->write_aln($new_aln); $aln = $in->next_aln(); # Take the next aligment, if it was. } # End while } ## # MAIN ## ## Check input parameters my $num_args = $#ARGV + 1; if (!($num_args == 1 || $num_args == 3)) { print "\nUsage: phylo-protpars.pl file.fasta [Num. Values] [Num. Repeats]\n"; exit; } # Read arg2 and arg3 if ($num_args == 3) { $outHandler = Bio::AlignIO->new(-file => ">$outputfilename.aln.phy", -format => 'phylip'); $phylogeny = 1; } # Taking values of parameters from arguments $inputfilename = $ARGV[0]; if (length($inputfilename)>200){ print "Error: File name length msut be <200 characters\n"; exit; } if ($phylogeny) { # Parse values $nValues=$ARGV[1]; if ($nValues < 2) { print "Error: Number of values less than 2\n"; exit; } $nFrequency=$ARGV[2]; if ($nFrequency < 2) { print "Error: Number of repeats less than 2\n"; exit; } } ## End checking # Start MSA my @params = ('ktuple' => 2, 'matrix' => 'BLOSUM','output'=>'phylip', 'outfile'=>$outputfilename.".aln"); my $factory = Bio::Tools::Run::Alignment::Clustalw->new(@params); my $aln = $factory->run($inputfilename); # Need to parse the MSA output file? if ($phylogeny) { preprocess($outputfilename.".aln", $nValues, $nFrequency); } # Process phylogeny my @params2 = (idlength=>30,threshold=>10,jumble=>"17,10",outgroup=>2); my $tree_factory = Bio::Tools::Run::Phylo::Phylip::ProtPars-> new(@params2); my $output = ""; if ($phylogeny) { $output = $outputfilename.".aln.phy"; } else { $output = $outputfilename.".aln"; } # Create the tree my $tree = $tree_factory->run($output); my @params3=('fontfile'=>'fontfile', 'tempdir'=>'images/'); my $treedraw = Bio::Tools::Run::Phylo::Phylip::DrawTree->new(@params3); #If file 'plotfile' exists, it has to be deleted. if (-e 'images/plotfile'){ unlink 'images/plotfile' or warn "Could not unlink images/plotfile: $!"; } open FILE, ">treePars.txt" or die $!; print FILE $tree->as_text('newick'); # Create draw with treePars # my $treeimagefile = $treedraw->run("treePars.txt");