diff tools/regVariation/microsatellite_birthdeath.pl @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/regVariation/microsatellite_birthdeath.pl	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,3984 @@
+#!/usr/bin/perl -w
+use strict;
+use warnings;
+use Term::ANSIColor;
+use Pod::Checker; 
+use File::Basename;
+use IO::Handle;
+use Cwd;
+use File::Path qw(make_path remove_tree);
+use File::Temp qw/ tempfile tempdir /;
+my $tdir = tempdir( CLEANUP => 0 );
+chdir $tdir;
+my $dir = getcwd;  
+#print "current dit=$dir\n";
+
+use vars qw (%treesToReject %template $printer $interr_poscord $interrcord $no_of_interruptionscord $stringfile @tags 
+$infocord $typecord $startcord $strandcord $endcord $microsatcord $motifcord $sequencepos $no_of_species 
+$gapcord %thresholdhash $tree_decipherer @sp_ident %revHash %sameHash %treesToIgnore %alternate @exactspecies @exacttags);
+use FileHandle;
+use IO::Handle;                     # 5.004 or higher
+
+#my @ar = ("/Users/ydk/work/rhesus_microsat/results/galay/chr22_5sp.maf.txt", "/Users/ydk/work/rhesus_microsat/results/galay/dataset_11.dat",
+#"/Users/ydk/work/rhesus_microsat/results/galay/chr22_5spec.maf.summ","hg18,panTro2,ponAbe2,rheMac2,calJac1","((((hg18, panTro2), ponAbe2), rheMac2), calJac1)","9,10,12,12",
+#"10","0.8");
+my @ar = @ARGV;
+my ($maf, $orth, $summout, $species_set, $tree_definition, $thresholds, $FLANK_SUPPORT, $SIMILARITY_THRESH) = @ar;
+$SIMILARITY_THRESH=$SIMILARITY_THRESH/100;
+#########################
+$SIMILARITY_THRESH = $SIMILARITY_THRESH/100;
+my $EDGE_DISTANCE = 10; 
+my $COMPLEXITY_SUPPORT = 20;
+load_thresholds("9_10_12_12");
+#########################
+
+my $complexity=int($COMPLEXITY_SUPPORT * (1/40));
+
+#print "complexity=$complexity\n";
+#<STDIN>;
+
+#$printer = 1;
+
+my $rando = int(rand(1000));
+my $localdate = `date`;
+$localdate =~ /([0-9]+):([0-9]+):([0-9]+)/;
+my $info = $rando.$1.$2.$3;
+
+#---------------------------------------------------------------------------
+# GETTING INPUT INFORMATION AND OPENING INPUT AND OUTPUT FILES
+
+
+my @thresharr = (0, split(/,/,$thresholds));
+my $randno=int(rand(100000));
+my $megamatch = $randno.".megamatch.net.axt"; #"/gpfs/home/ydk104/work/rhesus_microsat/axtNet/hg18.panTro2.ponAbe2.rheMac2.calJac1/chr1.hg18.panTro2.ponAbe2.rheMac2.calJac1.net.axt";
+my $megamatchlck = $megamatch.".lck";
+unlink $megamatchlck;
+
+#my $selected= $orth;
+#my $eventfile = $orth;
+#$selected = $selected."_SELECTED";
+#$selected = $selected."_".$SIMILARITY_THRESH;
+#my $runtime = $selected.".runtime";
+
+my $inputtags = "H:C:O:R:M";
+$inputtags = $ARGV[3] if exists $ARGV[3] && $ARGV[3] =~ /[A-Z]:[A-Z]/;
+
+my @all_tags = split(/:/, $inputtags);
+my $inputsp = "hg18:panTro2:ponAbe2:rheMac2:calJac1";
+$inputsp = $ARGV[4] if exists $ARGV[4] && $ARGV[3] =~ /[0-9]+:/;
+@sp_ident = split(/:/,$inputsp);
+my $junkfile = $orth."_junk";
+
+my $sh = load_sameHash(1);
+my $rh = load_revHash(1);
+
+#print "inputs are : \n"; foreach(@ARGV){print $_,"\n";} 
+#open (SELECT, ">$selected") or die "Cannot open selected file: $selected: $!";
+open (SUMMARY, ">$summout") or die "Cannot open summout file: $summout: $!";
+#open (RUN, ">$runtime") or die "Cannot open orth file: $runtime: $!";
+#my $ctlfile = "baseml\.ctl"; #$ARGV[4];
+#my $treefile = "/gpfs/home/ydk104/work/rhesus_microsat/codes/lib/"; 	#1 THIS IS THE THE TREE UNDER CONSIDERATION, IN NEWICK 
+my %registeredTrees = ();
+my @removalReasons = 
+("microsatellite is compound", 
+"complex structure", 
+"if no. if micros is more than no. of species", 
+"if more than one micro per species ", 
+"if microsat contains N", 
+"different motif than required ", 
+"more than zero interruptions", 
+"microsat could not form key ", 
+"orthologous microsats of different motif size ",
+"orthologous microsats of different motifs ", 
+"microsats belong to different alignment blocks altogether", 
+"microsat near edge", 
+"microsat in low complexity region", 
+"microsat flanks dont align well", 
+"phylogeny not informative");
+my %allowedhash=();
+#---------------------------------------------------------------------------
+# WORKING ON MAKING THE MEGAMATCH FILE
+my $chromt=int(rand(10000));
+my $p_chr=$chromt;
+
+$tree_definition=~s/,/, /g;
+$tree_definition =~ s/, +/, /g;
+my @exactspeciesset_unarranged = split(/,/,$species_set);
+my $largesttree = "$tree_definition;";
+$tree_definition=~s/[\)\(, ]/\t/g;
+		
+my @treespecies=split(/\t+/,$tree_definition);
+
+foreach my $spec (@treespecies){
+	foreach my $espec (@exactspeciesset_unarranged){
+		push @exactspecies, $spec if $spec eq $espec;
+	}
+}
+#print "exactspecies=@exactspecies\n";
+my $focalspec = $exactspecies[0];
+my $arranged_species_set=join(".",@exactspecies);
+@exacttags=@exactspecies;
+foreach my $extag (@exacttags){
+	$extag =~ s/hg18/H/g;
+	$extag =~ s/panTro2/C/g;
+	$extag =~ s/ponAbe2/O/g;
+	$extag =~ s/rheMac2/R/g;
+	$extag =~ s/calJac1/M/g;
+}
+my $chr_name = join(".",("chr".$p_chr),$arranged_species_set, "net", "axt");
+#print "sending to maftoAxt_multispecies: $maf, $tree_definition, $chr_name, $species_set .. focalspec=$focalspec \n"; 
+maftoAxt_multispecies($maf, $tree_definition, $chr_name, $species_set);
+my @filterseqfiles= ($chr_name);
+		$largesttree =~ s/hg18/H/g;
+		$largesttree =~ s/panTro2/C/g;
+		$largesttree =~ s/ponAbe2/O/g;
+		$largesttree =~ s/rheMac2/R/g;
+		$largesttree =~ s/calJac1/M/g;
+#---------------------------------------------------------------------------
+
+my ($lagestnodes, $largestbranches) = get_nodes($largesttree); 
+shift (@$lagestnodes);
+my @extendedtitle=();
+
+my $title = ();
+my $parttitle = ();
+my @titlearr = ();
+my @firsttitle=($focalspec."chrom", $focalspec."start", $focalspec."end", $focalspec."motif", $focalspec."motifsize", $focalspec."threshold");
+
+my @finames= qw(chr	start	end	motif	motifsize	microsat	mutation	mutation.position	mutation.from	mutation.to	insertion.details	deletion.details);
+
+my @fititle=();
+
+foreach my $spec (split(",",$species_set)){
+	push @fititle, $spec;
+	foreach my $name (@finames){
+		push @fititle, $spec.".".$name;
+	}
+}
+
+
+my @othertitle=qw(somechr somestart	somened	event source);
+
+my @fnames = ();
+push @fnames, qw(insertions_num  deletions_num  motinsertions_num  motinsertionsf_num  motdeletions_num  motdeletionsf_num  noninsertions_num  nondeletions_num) ;
+push @fnames, qw(binsertions_num bdeletions_num bmotinsertions_num bmotinsertionsf_num bmotdeletions_num bmotdeletionsf_num bnoninsertions_num bnondeletions_num) ;
+push @fnames, qw(dinsertions_num ddeletions_num dmotinsertions_num dmotinsertionsf_num dmotdeletions_num dmotdeletionsf_num dnoninsertions_num dnondeletions_num) ;
+push @fnames, qw(ninsertions_num ndeletions_num nmotinsertions_num nmotinsertionsf_num nmotdeletions_num nmotdeletionsf_num nnoninsertions_num nnondeletions_num) ;
+push @fnames, qw(substitutions_num bsubstitutions_num dsubstitutions_num nsubstitutions_num indels_num subs_num);
+
+my @fullnames = ();
+
+foreach my $lnode (@$lagestnodes){
+	my @pair = @$lnode;
+	my @nodemutarr = ();
+	for my $p (@pair){
+	#	print "p = $p\n";
+		$p =~ s/[\(\), ]+//g;
+		$p =~ s/H/hg18/g;
+		$p =~ s/C/panTro2/g;
+		$p =~ s/O/ponAbe2/g;
+		$p =~ s/R/rheMac2/g;
+		$p =~ s/M/calJac1/g;
+		foreach my $n (@fnames) {	push @fullnames, $p.".".$n;}
+	}
+}
+print SUMMARY "#",join("\t", @firsttitle, @fititle, @othertitle);
+
+print SUMMARY "\t",join("\t", @fullnames);
+#$title = $title."\t".join("\t", @fullnames);
+
+print SUMMARY "\t",join("\t", @fnames);
+#$title=  $title."\t".join("\t", @fnames);
+
+print SUMMARY "\t","tree","\t", "cleancase", "\n";
+#$title=  $title."\t"."tree"."\t"."cleancase". "\n";
+
+#print $title; #<STDIN>;
+
+#print "all_tags = @all_tags\n";
+
+for my $no (3 ... $#all_tags+1){
+#	print "no=$no\n"; #<STDIN>;
+	@tags = @all_tags[0 ... $no-1];
+	#print "tags = = @tags\n" if $printer == 1;
+	%template=();
+	my @nextcounter = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+	#next if scalar(@tags) < 4;
+	
+	#print "now doing tags = @tags, no = $no\n"; 
+	open (ORTH, "<$orth") or die "Cannot open orth file: $orth: $!";
+	
+#	print SUMMARY join "\t", qw (species chr start end branch motif microsat mutation position from to insertion deletion);
+	
+	
+	##################### T E M P O R A R Y #####################
+	my @finaltitle=();
+	my @singletitle = qw (species chr start end motif motifsize microsat strand microsatsize col10 col11 col12 col13);
+	my $endtitle = ();
+	foreach my $tag (@tags){
+		my @tempsingle = ();
+		
+		foreach my $single (@singletitle){
+			push @tempsingle, $tag.$single;
+		}
+		@finaltitle = (@finaltitle, @tempsingle);	
+	}
+
+#	print SUMMARY join("\t",@finaltitle),"\n";
+	
+	#############################################################
+	
+	#---------------------------------------------------------------------------
+	# GET THE TREE FROM TREE FILE
+	my $tree = ();
+	$tree = "((H, C), O)" if $no == 3;
+	$tree = "(((H, C), O), R)" if $no == 4;
+	$tree = "((((H, C), O), R), M)" if $no == 5;
+#	$tree=~s/;$//g;
+#	print "our tree = $tree\n";
+	#---------------------------------------------------------------------------
+	# LOADING HASH CONTAINING ALL POSSIBLE TREES:
+	$tree_decipherer = "/gpfs/home/ydk104/work/rhesus_microsat/codes/lib/tree_analysis_".join("",@tags).".txt";
+	load_allPossibleTrees($tree_decipherer, \%template, \%alternate);
+	
+	#---------------------------------------------------------------------------
+	# LOADING THE TREES TO REJECT FOR BIRTH ANALYSIS
+	%treesToReject=();
+	%treesToIgnore=();
+	load_treesToReject(@tags); 
+	load_treesToIgnore(@tags);
+	#---------------------------------------------------------------------------
+	# LOADING INPUT DATA INTO HASHES AND ARRAYS
+	
+	
+	#1 THIS IS THE POINT WHERE WE CAN FILTER OUT LARGE MICROSAT CLUSTERS 
+	#2 AS WELL AS MULTIPLE-ALIGNMENT-BLOCKS-SPANNING MICROSATS (KIND OF
+	#3 IMPLICIT IN THE FIRST PART OF THE SENTENCE ITSELF IN MOST CASES).
+	
+	my %orths=();
+	my $counterm = 0;
+	my $loaded = 0;
+	my %seen = ();
+	my @allowedchrs = ();
+#	print "no = $no\n"; #<STDIN>;
+
+	while (my $line = <ORTH>){
+		#print "line=$line\n";
+		$line =~ s/>hg18/>H/g;
+		$line =~ s/>panTro2/>C/g;
+		$line =~ s/>ponAbe2/>O/g;
+		$line =~ s/>rheMac2/>R/g;
+		$line =~ s/>calJac1/>M/g;
+		my @micros = split(/>/,$line); 									# LOADING ALL THE MICROSAT ENTRIES FROM THE CLUSTER INTO @micros
+		#print "micros=",printarr(@micros),"\n"; #<STDIN>;
+		shift @micros; 													# EMPTYING THE FIRST, EMTPY ELEMENT OF THE ARRAY
+		$no_of_species = adjustCoordinates($micros[0]);
+		next if $no_of_species != $no;
+		$counterm++;
+		#------------------------------------------------
+		$nextcounter[0]++  if $line =~ /compound/;
+		next if $line =~ /compound/; 									# GETTING RID OF COMPOUND MICROSATS
+		#------------------------------------------------
+		#next if $line =~ /[A-Za-z]>[a-zA-Z]/;
+		#------------------------------------------------
+		chomp $line;
+		my $match_count = ($line =~ s/>/>/g); 							# COUNTING THE NUMBER OF MICROSAT ENTRIES IN THE CLUSTER
+		#print "number of species = $match_count\n";
+		my $stopper = 0;	
+		foreach my $mic (@micros){
+			my @local = split(/\t/,$mic);
+			if ($local[$typecord] =~ /\./ || exists($local[$no_of_interruptionscord+2])) {$stopper = 1; $nextcounter[1]++;
+			last; } 
+																		# REMOVING CLUSTERS WITH THE CYRPTIC, (UNRESOLVABLY COMPLEX) MICROSAT ENTRIES IN THEM
+		}
+		next if $stopper ==1;
+		#------------------------------------------------
+		$nextcounter[2]++ if (scalar(@micros) >$no_of_species);
+	
+		next if (scalar(@micros) >$no_of_species); 						#1 REMOVING MICROSAT CLUSTERS WITH MORE NUMBER OF MICROSAT ENTRIES THAN THE NUMBER OF SPECIES IN THE DATASET.
+																		#2 THIS IS SO BECAUSE SUCH CLUSTERS IMPLY THAT IN AT LEAST ONE SPECIES, THERE IS MORE THAN ONE MICROSAT ENTRY
+																		#3 IN THE CLUSTER. THUS, HERE WE ARE GETTING RID OF MICROSATS CLUSTERS THAT INCLUDE MULTUPLE, NEIGHBORING
+																		#4 MICROSATS, AND STICK TO CLEAN MICROSATS THAT DO NOT HAVE ANY MICROSATS IN NEIGHBORHOOD.
+																		#5 THIS 'NEIGHBORHOOD-RANGE' HAD BEEN DECIDED PREVIOUSLY IN OUR CODE multiSpecies_orthFinder4.pl
+		my $nexter = 0;
+		foreach my $tag (@tags){
+			my $tagcount = ($line =~ s/>$tag\t/>$tag\t/g);
+			if ($tagcount > 1) { $nexter =1; #print colored ['red'],"multiple entires per species : $tagcount of $tag\n" if $printer == 1; 
+				next; 
+			}
+		}
+		
+		if ($nexter == 1){
+			$nextcounter[3]++;
+			next;
+		}
+		#------------------------------------------------
+		foreach my $mic (@micros){										#1	REMOVING MICROSATELLITES WITH ANY 'N's IN THEM
+			my @local = split(/\t/,$mic);								
+			if ($local[$microsatcord] =~ /N/) {$stopper =1; 		$nextcounter[4]++;
+			last;}			
+		}
+		next if $stopper ==1;
+		#print "till here 1\n"; #<STDIN>;
+		#------------------------------------------------
+		my @micros_copy = @micros;
+		
+		my $tempmicro = shift(@micros_copy);							#1 CURRENTLY OBTAINING INFORMATION FOR THE FIRST 
+																		#2 MICROSAT IN THE CLUSTER.
+		my @tempfields = split(/\t/,$tempmicro);
+		my $prevtype = $tempfields[$typecord];
+		my $tempmotif = $tempfields[$motifcord];
+		
+		my $tempfirstmotif = ();
+		if (scalar(@tempfields) > $microsatcord + 2){
+			if ($tempfields[$no_of_interruptionscord] >= 1) {			#1	DISCARDING MICROSATS WITH MORE THAN ZERO INTERRUPTIONS
+																		#2	IN THE FIRST MICROSAT OF THE CLUSTER
+				$nexter =1; #print colored ['blue'],"more than one interruptions \n" if $printer == 1; 
+			}
+		}
+		if ($nexter == 1){
+			$nextcounter[6]++;
+			next;
+		}															#1	DONE OBTAINING INFORMATION REGARDING 
+																		#2	THE FIRST MICROSAT FROM THE CLUSTER
+		
+		if ($tempmotif =~ /^\[/){
+			$tempmotif =~ s/^\[//g;
+			$tempmotif =~ /([a-zA-Z]+)\].*/;
+			$tempfirstmotif = $1;										#1 OBTAINING THE FIRTS MOTIF OF MICROSAT
+		}
+		else {$tempfirstmotif = $tempmotif;}
+		my $prevmotif = $tempfirstmotif;
+		
+		my $key = ();
+		if ($tempmicro =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
+			$key = join("\t",$1, $2,  $4, $5);
+		}
+		else{
+		#	print "counld not form  a key \n" if $printer == 1;
+			$nextcounter[7]++;
+			next;
+		}
+		#-----------------												#1	NOW, AFTER OBTAINING INFORMATION ABOUT
+																		#2	THE FIRST MICROSAT IN THE CLUSTER, THE
+																		#3	FOLLOWING LOOP GOES THROUGH THE OTHER MICROSATS
+																		#4	TO SEE IF THEY SHARE THE REQUIRED FEATURES (BELOW)
+	
+		foreach my $micro (@micros_copy){
+			my @fields = split(/\t/,$micro);
+			#-----------------	
+			if (scalar(@fields) > $microsatcord + 2){					#1	DISCARDING MICROSATS WITH MORE THAN ONE INTERRUPTIONS
+				if ($fields[$no_of_interruptionscord] >= 1) {$nexter =1; #print colored ['blue'],"more than one interruptions \n" if $printer == 1; 
+				$nextcounter[6]++;
+				last; }											
+			}
+			#-----------------	
+			if (($prevtype ne "0") && ($prevtype ne $fields[$typecord])) { 
+				$nexter =1; #print colored ['yellow'],"microsat of different type \n" if $printer == 1; 
+				$nextcounter[8]++;
+				last; }														#1 DISCARDING MICROSAT CLUSTERS WHERE MICROSATS BELONG
+			#-----------------											#2 TO DIFFERENT TYPES (MONOS, DIS, TRIS ETC.)
+			$prevtype = $fields[$typecord];
+			
+			my $motif = $fields[$motifcord];
+			my $firstmotif = ();
+		
+			if ($motif =~ /^\[/){
+				$motif =~ s/^\[//g;
+				$motif =~ /([a-zA-Z]+)\].*/;
+				$firstmotif = $1;
+			}
+			else {$firstmotif = $motif;}
+			
+			my $motifpattern = $firstmotif.$firstmotif;
+			my $prevmotifpattern = $prevmotif.$prevmotif;
+			
+			if (($prevmotif ne "0")&&(($motifpattern !~ /$prevmotif/i)||($prevmotifpattern !~ /$firstmotif/i)) ) {  
+				$nexter =1; #print colored ['green'],"different motifs used \n$line\n" if $printer == 1;
+				$nextcounter[9]++;
+				last; 
+			}														#1	DISCARDING MICROSAT CLUSTERS WHERE MICROSATS BELONG
+																	#2	TO DIFFERENT MOTIFS
+			my $prevmotif = $firstmotif;
+			#-----------------			
+			
+			for my $t (0 ... $#tags){								#1	DISCARDING MICROSAT CLUSTERS WHERE MICROSAT ENTRIES BELONG
+																	#2	DIFFERENT ALIGNMENT BLOCKS
+				if ($micro =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
+					my $key2 = join("\t",$1, $2,  $4, $5);
+					if ($key2 ne $key){
+#						print "microsats belong to diffferent alignment blocks altogether\n" if $printer == 1;
+						$nextcounter[10]++;
+						$nexter = 1; last;
+					}
+				}
+				else{
+				#	print "counld not form  a key \n" if $printer == 1;
+					$nexter = 1; last;
+				}
+			}
+			
+		}
+		#####################
+		if ($nexter == 1){
+			#	print "nexting\n" if $printer == 1; 
+				next;
+			}
+		else{
+#			print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n$key:\n$line\nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n"  if $printer == 1;
+			push (@{$orths{$key}},$line);	
+			$loaded++;
+			if ($line =~ /($focalspec)\s([a-zA-Z0-9]+)\s([0-9]+)\s([0-9]+)/ ) {
+#				print "$line\n"  if $printer == 1; #if $line =~ /Contig/;
+#				print "################ ################\n" if $printer == 1;
+				push @allowedchrs, $2 if !exists $allowedhash{$2};
+				$allowedhash{$2} = 1; 
+				my $key = join("\t",$1, $2, $3, $4);
+				#print "print the shit: $key\n" if $printer  == 1;
+				$seen{$key} = 1;
+			}
+			else { #print "Key could not be formed in SPUT for ($org) ($title) ([0-9]+) ([0-9]+)\n";
+			}
+		}
+	}
+	close ORTH;
+
+#	print "now studying where we lost microsatellites: @nextcounter\n";
+	for my $reason (0 ... $#nextcounter){
+#		print $removalReasons[$reason]."\t".$nextcounter[$reason],"\n";
+	}
+#	print "\ntotal number of keys formed = ", scalar(keys %orths), " = \n";
+#	print "done filtering .. counterm = $counterm and loaded = $loaded\n"; 
+
+	#----------------------------------------------------------------------------------------------------------------
+	# NOW GENERATING THE ALIGNMENT FILE WITH RELELEVENT ALIGNMENTS STORED ONLY.
+	
+	while (1){
+		if (-e $megamatchlck){
+#			print "waiting to write into $megamatchlck\n";
+			sleep 10;
+		}
+		else{
+			open (MEGAMLCK, ">$megamatchlck") or die "Cannot open megamatchlck file $megamatchlck: $!";	
+			open (MEGAM, ">$megamatch") or die "Cannot open megamatch file $megamatch: $!";
+			last;
+		}
+	}
+	
+	foreach my $seqfile (@filterseqfiles){
+		my $fullpath = $seqfile;
+		
+#		print "opening file: $fullpath\n"; 
+		open (MATCH, "<$fullpath") or die "Cannot open MATCH file $fullpath: $!";
+		my $matchlines = 0;
+
+		while (my $line = <MATCH>)	{
+			if ($line =~ /($focalspec)\s([a-zA-Z0-9]+)\s([0-9]+)\s([0-9]+)/ ) {
+				my $key = join("\t",$1, $2, $3, $4);
+				if (exists $seen{$key}){
+					while (1){
+						$matchlines++;
+						print MEGAM $line;
+						$line = <MATCH>;
+						print MEGAM "\n" if $line !~/[0-9a-zA-Z]/;
+						last if $line !~/[0-9a-zA-Z]/;
+					}
+				}
+			}
+		}
+#		print "matchlines = $matchlines\n";
+		close MATCH;
+	}
+	close MEGAMLCK;
+	
+	unlink $megamatchlck;
+	close MEGAM;
+	undef %seen;
+	#----------------------------------------------------------------------------------------------------------------
+
+	#---------------------------------------------------------------------------
+	# NOW, AFTER FILTERING MANY MICROSATS, AND LOADING THE FILTERED ONES INTO
+	# THE HASH %orths , WE GO THROUGH THE ALIGNMENT FILE, AND STUDY THE 
+	# FLANKING SEQUENCES OF ALL THESE MICROSATS, TO FILTER THEM FURTHER
+	#$printer = 1;
+	
+	my $microreadcounter=0;
+	my $contigsentered=0;
+	my $contignotrightcounter=0;
+	my $keynotformedcounter=0;
+	my $keynotfoundcounter= 0;
+	my $dotcounter = 0;
+
+	open (BO, "<$megamatch") or die "Cannot open alignment file: $megamatch: $!";
+	
+	while (my $line = <BO>){
+#		print "." if $dotcounter % 100 ==0;
+#		print "\n" if $dotcounter % 5000 ==0;
+#		print "dotcounter = $dotcounter\n " if $printer == 1;
+		next if $line !~ /^[0-9]+/;
+		$dotcounter++;
+#		print colored ['green'], "~" x 60, "\n" if $printer == 1;
+#		print colored ['green'], $line;# if $printer == 1;
+		chomp $line;	
+		my @fields2 = split(/\t/,$line);
+		my $key2 = ();
+		my $alignment_no = ();										#1 TEMPORARY
+		if ($line =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
+			$key2 = join("\t",$1, $2,  $4, $5);
+			$alignment_no=$1;
+		}
+		else {print "seq line $line incompatible\n"; $keynotformedcounter++; next;}
+		
+		$no_of_species = adjustCoordinates($line);
+		$contignotrightcounter++  if $no_of_species != $no;
+	#	print "contignotrightcounter=$contignotrightcounter\n";
+	#	print "no_of_species=$no_of_species\n";
+	#	print "no=$no\n";
+		
+		next if $no_of_species != $no;
+		
+	#	print "key = $key2\n" if $printer == 1;
+		my @clusters = ();											#1 EXTRACTING MICROSATS CORRESPONDING TO THIS 
+																	#2 ALIGNMENT BLOCK
+		if (exists($orths{$key2})){
+			@clusters = @{$orths{$key2}};
+			$contigsentered++;
+			delete $orths{$key2};
+		}
+		else{
+			#print "orth does not exist\n";
+			$keynotfoundcounter++;
+			next;
+		}
+		
+		my %sequences=();											#1 WILL STORE SEQUENCES IN THE CURRENT ALIGNMENT BLOCK
+		my $humseq = ();
+		foreach my $tag (@tags){									#1 READING THE ALIGNMENT FILE AND CAPTURING SEQUENCES
+			my $seq = <BO>;											#2 OF ALL SPECIES.
+			chomp $seq;
+			$sequences{$tag} =  " ".$seq;
+			#print "sequences = $sequences{$tag}\n" if $printer == 1;
+			$humseq = $seq if $tag =~ /H/;
+		}
+		
+		
+		foreach my $cluster (@clusters){							#1 NOW, GOING THROUGH THE CLUSTER OF MICROSATS
+			#print "x" x 60, "\n" if $printer == 1;
+			#print colored ['red'],"cluster = $cluster\n";
+			$largesttree =~ s/hg18/H/g;
+			$largesttree =~ s/panTro2/C/g;
+			$largesttree =~ s/ponAbe2/O/g;
+			$largesttree =~ s/rheMac2/R/g;
+			$largesttree =~ s/calJac1/M/g;
+
+			$microreadcounter++;
+			my @micros = split(/>/,$cluster);
+			shift @micros;
+	
+			my $edge_microsat=0;									#1 THIS WILL HAVE VALUE "1" IF MICROSAT IS FOUND
+																	#2 TO BE TOO CLOSE TO THE EDGES OF ALIGNMENT BLOCK
+			
+			my @starts= ();	my %start_hash=();						#1 STORES THE START AND END COORDINATES OF MICROSATELLITES
+			my @ends = ();	my %end_hash=();						#2 SO THAT LATER, WE WILL BE ABLE TO FIND THE EXTREME 
+																	#3 COORDINATE VALUES OF THE ORTHOLOGOUS MIROSATELLITES.
+			
+			my %microhash=();
+			my %microsathash=();
+			my %nonmicrosathash=();
+			my $motif=();											#1 BASIC MOTIF OF THE MICROSATELLITE.. THERE'S ONLY 1
+			#print "tags=@tags\n";
+			for my $i (0 ... $#tags){								#1	FINDING THE MICROSAT, AND THE ALIGNMENT SEQUENCE
+																	#2	CORRESPONDING TO THE PARTICULAR SPECIES (AS PER 
+																	#3	THE VARIABLE $TAG;
+				my $tag = $tags[$i];
+			#		print $seq;
+				my $locus="NULL";										#1	THIS WILL STORE THE MICROSAT OF THIS SPECIES.
+																		#2	IF THERE IS NO MICROSAT, IT WILL REMAIN "NULL"
+					
+				foreach my $micro (@micros){	
+				#	print "micro=$micro, tag=$tag\n";
+					if ($micro =~ /^$tag/){								#1	MICROSAT OF THIS SPECIES FOUND..
+						$locus  = $micro;
+						my @fields = split(/\t/,$micro);
+						$motif = $fields[$motifcord];
+						$microsathash{$tag}=$fields[$microsatcord];
+				#		print "fields=@fields, and startcord=$startcord = $fields[$startcord]\n";
+						push(@starts, $fields[$startcord]);
+						push(@ends, $fields[$endcord]);
+						$start_hash{$tag}=$fields[$startcord];
+						$end_hash{$tag}=$fields[$endcord];
+						last;
+					}
+					else{$microsathash{$tag}="NULL"}
+				}
+				$microhash{$tag}=$locus;
+		
+			}	
+			
+			
+			
+			my $extreme_start  = smallest_number(@starts);		#1 THESE TWO ARE THE EXTREME COORDINATES OF THE 
+			my $extreme_end  = largest_number(@ends);			#2 MICROSAT CLUSTER ACCROSS ALL THE SPECIES IN 
+																#3 WHOM IT IS FOUND TO BE ORTHOLOGOUS.
+			
+			#print "starts=@starts... ends=@ends\n";
+				
+			my %up_flanks = ();									#1	CONTAINS UPSTEAM FLANKING REGIONS FOR EACH SPECIES
+			my %down_flanks = ();								#1	CONTAINS DOWNDTREAM FLANKING REGIONS FOR EACH SPECIES
+			
+			my %up_largeflanks = ();
+			my %down_largeflanks = ();
+			
+			my %locusandflanks = ();
+			my %locusandlargeflanks = ();			
+			
+			my %up_internal_flanks=();							#1	CONTAINS SEQUENCE BETWEEN THE $extreme_start and the 
+																#2	ACTUAL START OF MICROSATELLITE IN THE SPECIES
+			my %down_internal_flanks=();						#1	CONTAINS SEQUENCE BETWEEN THE $extreme_end and the 
+																#2	ACTUAL end OF MICROSATELLITE IN THE SPECIES
+			
+			my %alignment=();									#1 CONTAINS ACTUAL ALIGNMENT SEQUENCE BETWEEN THE TWO
+																#2 EXTEME VALUES.
+			
+			my %microsatstarts=();										#1 WITHIN EACH ALIGNMENT, IF THERE EXISTS A MICROSATELLITE
+																#2 THIS HASH CONTAINS THE START SITE OF THE MICROSATELLITE
+																#3 WIHIN THE ALIGNMENT
+			next if !defined $extreme_start;
+			next if !defined $extreme_end;
+			next if $extreme_start > length($sequences{$tags[0]});
+			next if $extreme_start < 0;
+			next if $extreme_end > length($sequences{$tags[0]});
+			
+			for my $i (0 ... $#tags){							#1 NOW THAT WE HAVE GATHERED INFORMATION REGARDING
+																#2 SEQUENCE ALIGNMENT AND MICROSATELLITE COORDINATES
+																#3 AS WELL AS THE EXTREME COORDINATES OF THE 
+																#4 MICROSAT CLUSTER, WE WILL PROCEED TO EXTRACT THE
+																#5 FLANKING SEQUENCE OF ALL ORGS, AND STUDY IT IN 
+																#6 MORE DETAIL.
+				my $tag = $tags[$i];		
+			#	print "tag=$tag.. seqlength = ",length($sequences{$tag})," extreme_start=$extreme_start and extreme_end=$extreme_end\n";
+				my $upstream_gaps = (substr($sequences{$tag}, 0, $extreme_start) =~ s/\-/-/g);		#1	NOW MEASURING THE NUMBER OF GAPS IN THE UPSTEAM 
+																										#2	AND DOWNSTREAM SEQUENCES OF THE MICROSATs IN THIS
+																										#3	CLUSTER.
+	
+				my $downstream_gaps = (substr($sequences{$tag}, $extreme_end) =~ s/\-/-/g);
+				if (($extreme_start - $upstream_gaps )< $EDGE_DISTANCE || (length($sequences{$tag}) - $extreme_end - $downstream_gaps) <  $EDGE_DISTANCE){
+					$edge_microsat=1;
+					
+					last;
+				}
+				else{
+					$up_flanks{$tag} = substr($sequences{$tag}, $extreme_start - $FLANK_SUPPORT, $FLANK_SUPPORT);
+					$down_flanks{$tag} = substr($sequences{$tag}, $extreme_end+1, $FLANK_SUPPORT);
+
+					$up_largeflanks{$tag} = substr($sequences{$tag}, $extreme_start - $COMPLEXITY_SUPPORT, $COMPLEXITY_SUPPORT);
+					$down_largeflanks{$tag} = substr($sequences{$tag}, $extreme_end+1, $COMPLEXITY_SUPPORT);
+
+
+					$alignment{$tag} = substr($sequences{$tag}, $extreme_start, $extreme_end-$extreme_start+1);
+					$locusandflanks{$tag} = $up_flanks{$tag}."[".$alignment{$tag}."]".$down_flanks{$tag};
+					$locusandlargeflanks{$tag} = $up_largeflanks{$tag}."[".$alignment{$tag}."]".$down_largeflanks{$tag};
+					
+					if ($microhash{$tag} ne "NULL"){
+						$up_internal_flanks{$tag} = substr($sequences{$tag}, $extreme_start , $start_hash{$tag}-$extreme_start);
+						$down_internal_flanks{$tag} = substr($sequences{$tag}, $end_hash{$tag} , $extreme_end-$end_hash{$tag});
+						$microsatstarts{$tag}=$start_hash{$tag}-$extreme_start;
+#						print "tag = $tag, internal flanks = $up_internal_flanks{$tag} and $down_internal_flanks{$tag} and start = $microsatstarts{$tag}\n" if $printer == 1;
+					}
+					else{
+						$nonmicrosathash{$tag}=substr($sequences{$tag}, $extreme_start, $extreme_end-$extreme_start+1);
+					
+					}
+			#		print "up flank for species $tag = $up_flanks{$tag} \ndown flank for species $tag = $down_flanks{$tag} \n" if $printer == 1;
+										
+				}
+	
+			}
+			$nextcounter[11]++  if $edge_microsat==1;
+			next if $edge_microsat==1;
+			
+			
+			my $low_complexity = 0; 								#1 VALUE WILL BE 1 IF ANY OF THE FLANKING REGIONS
+																	#2 IS FOUND TO BE OF LOW COMPLEXITY, BY USING THE
+																	#3 FUNCTION sub test_complexity
+			
+			
+			for my $i (0 ... $#tags){
+#				print "i = $tags[$i]\n" if $printer == 1;
+				if (test_complexity($up_largeflanks{$tags[$i]}, $COMPLEXITY_SUPPORT) eq "LOW" || test_complexity($down_largeflanks{$tags[$i]}, $COMPLEXITY_SUPPORT) eq "LOW"){
+#					print "i = $i, low complexity regions: $up_largeflanks{$tags[$i]}: ",test_complexity($up_largeflanks{$tags[$i]}, $COMPLEXITY_SUPPORT), "  and $down_largeflanks{$tags[$i]} = ",test_complexity($down_largeflanks{$tags[$i]}, $COMPLEXITY_SUPPORT),"\n" if $printer == 1;
+					$low_complexity =1; last;
+				}
+			}
+			
+			$nextcounter[12]++  if $low_complexity==1;
+			next if $low_complexity == 1;
+			
+			
+			my $sequence_dissimilarity = 0;										#1 THIS VALYE WILL BE 1 IF THE SEQUENCE SIMILARITY
+																	#2 BETWEEN ANY OF THE SPECIES AGAINST THE HUMAN
+																	#3 FLANKING SEQUENCES IS BELOW A CERTAIN THRESHOLD
+																	#4 AS DESCRIBED IN FUNCTION sub sequence_similarity
+			my %donepair = ();
+			for my $i (0 ... $#tags){
+			#	print "i = $tags[$i]\n" if $printer == 1;
+#				next if $i == 0;
+			#	print colored ['magenta'],"THIS IS UP\n" if $printer == 1;
+
+				for my $b (0 ... $#tags){
+					next if $b == $i;
+					my $pair = ();
+					$pair = $i."_".$b if $i < $b;
+					$pair = $b."_".$i if $b < $i;
+					next if exists $donepair{$pair};
+					my ($up_similarity,$upnucdiffs, $upindeldiffs) = sequence_similarity($up_flanks{$tags[$i]}, $up_flanks{$tags[$b]}, $SIMILARITY_THRESH, $info);
+					my ($down_similarity,$downnucdiffs, $downindeldiffs) = sequence_similarity($down_flanks{$tags[$i]}, $down_flanks{$tags[$b]}, $SIMILARITY_THRESH, $info);				
+					$donepair{$pair} = $up_similarity."_".$down_similarity;
+					
+#					print RUN "$up_similarity	$upnucdiffs	$upindeldiffs	$down_similarity	$downnucdiffs	$downindeldiffs\n";
+					
+					if ( $up_similarity < $SIMILARITY_THRESH || $down_similarity < $SIMILARITY_THRESH){
+						$sequence_dissimilarity =1; 
+						last;
+					}
+				}
+			}
+			$nextcounter[13]++  if $sequence_dissimilarity==1;
+
+			next if $sequence_dissimilarity == 1;
+			my ($simplified_microsat, $Hchrom, $Hstart, $Hend, $locusmotif, $locusmotifsize) = summarize_microsat($cluster, $humseq);
+		#	print "simplified_microsat=$simplified_microsat\n"; <STDIN>;
+			my ($tree_analysis, $alternative_trees, $conformation) = treeStudy($simplified_microsat);
+			
+			if (exists $treesToReject{$tree_analysis}){
+				$nextcounter[14]++;
+				next;
+			}
+
+# 			my $adjuster=();
+# 			if ($no_of_species == 4){
+# 				my @sields = split(/\t/,$simplified_microsat);
+# 				my $somend = pop(@sields);
+# 				my $somestart = pop(@sields);
+# 				my $somechr = pop(@sields);
+# 				$adjuster = "NA\t" x 13 ;
+# 				$simplified_microsat = join ("\t", @sields, $adjuster).$somechr."\t".$somestart."\t".$somend;
+# 			}
+# 			if ($no_of_species == 3){
+# 				my @sields = split(/\t/,$simplified_microsat);
+# 				my $somend = pop(@sields);
+# 				my $somestart = pop(@sields);
+# 				my $somechr = pop(@sields);
+# 				$adjuster = "NA\t" x 26 ;
+# 				$simplified_microsat = join ("\t", @sields, $adjuster).$somechr."\t".$somestart."\t".$somend;
+# 			}
+# 			
+			$registeredTrees{$tree_analysis} = 1 if !exists $registeredTrees{$tree_analysis};
+			$registeredTrees{$tree_analysis}++ if exists $registeredTrees{$tree_analysis};
+				
+			if (exists $treesToIgnore{$tree_analysis}){
+				my @appendarr = ();
+
+				print SUMMARY $Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t", $thresharr[$locusmotifsize], "\t", $simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t";
+				#print "SUMMARY ",$Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t", $thresharr[$locusmotifsize], "\t", $simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t";
+#				print SELECT $Hchrom,"\t",$Hstart,"\t",$Hend,"\t","NOEVENT", "\t\t", $cluster,"\n";
+				
+				foreach my $lnode (@$lagestnodes){
+					my @pair = @$lnode;
+					my @nodemutarr = ();
+					for my $p (@pair){
+						my @mutinfoarray1 = ();
+						for (1 ... 38){
+							push (@mutinfoarray1, "NA")
+						}
+						print SUMMARY join ("\t", @mutinfoarray1[0...($#mutinfoarray1)] ),"\t"; 			
+					}
+	
+				}
+				for (1 ... 38){
+					push (@appendarr, "NA")
+				}
+				print SUMMARY join ("\t", @appendarr,"NULL", "NULL"),"\n"; 
+		#		print "SUMMARY ",join ("\t", @appendarr,"NULL", "NULL"),"\n"; #<STDIN>;
+				next;
+			}
+			
+			my ($mutations_array, $nodes, $branches_hash, $alivehash, $primaryalignment) = peel_onion($tree, \%sequences, \%alignment, \@tags, \%microsathash, \%nonmicrosathash, $motif, $tree_analysis, $thresholdhash{length($motif)}, \%microsatstarts);
+			
+			if ($mutations_array eq "NULL"){
+				my @appendarr = ();
+
+				print SUMMARY $Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t", $thresharr[$locusmotifsize],"\t",$simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t";
+		#		print "SUMMARY ", $Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t", $thresharr[$locusmotifsize],"\t",$simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t"; 
+#				print SELECT $Hchrom,"\t",$Hstart,"\t",$Hend,"\t","EVENT", "\t\t", $cluster,"\n";
+
+				foreach my $lnode (@$lagestnodes){
+					my @pair = @$lnode;
+					my @nodemutarr = ();
+					for my $p (@pair){
+						my @mutinfoarray1 = ();
+						for (1 ... 38){
+							push (@mutinfoarray1, "NA")
+						}
+						print SUMMARY join ("\t", @mutinfoarray1[0...($#mutinfoarray1)] ),"\t"; 			
+		#				print  join ("\t", "SUMMARY", @mutinfoarray1[0...($#mutinfoarray1)] ),"\t"; 			
+					}
+	
+				}
+				for (1 ... 38){
+					push (@appendarr, "NA")
+				}
+				print SUMMARY join ("\t", @appendarr,"NULL", "NULL"),"\n"; 
+			#	print  join ("\t","SUMMARY", @appendarr,"NULL", "NULL"),"\n"; #<STDIN>;
+				next;
+			}
+			
+			
+#			print "sent: \n" if $printer == 1;
+#			print "nodes = @$nodes,  branches array:\n" if $mutations_array ne "NULL" &&  $printer == 1;
+
+			my ($newstatus, $newmutations_array, $newnodes, $newbranches_hash, $newalivehash, $finalalignment) = fillAlignmentGaps($tree, \%sequences, \%alignment, \@tags, \%microsathash, \%nonmicrosathash, $motif, $tree_analysis, $thresholdhash{length($motif)}, \%microsatstarts);
+#			print "newmutations_array returned = \n",join("\n",@$newmutations_array),"\n" if $newmutations_array ne "NULL" &&  $printer == 1;
+			my @finalmutations_array= ();
+			@finalmutations_array = selectMutationArray($mutations_array, $newmutations_array, \@tags, $alivehash, \%alignment, $motif) if $newmutations_array ne "NULL";
+			@finalmutations_array = selectMutationArray($mutations_array, $mutations_array, \@tags, $alivehash, \%alignment, $motif) if $newmutations_array eq "NULL";
+
+			my ($besttree, $treescore) = selectBetterTree($tree_analysis, $alternate{$tree_analysis}, \@finalmutations_array);
+			my $cleancase = "UNCLEAN";
+			
+			$cleancase = checkCleanCase($besttree, $finalalignment) if $treescore > 0 && $finalalignment ne "NULL" && $finalalignment =~ /\!/;
+			$cleancase = checkCleanCase($besttree, $primaryalignment) if $treescore > 0 && $finalalignment eq "NULL" && $primaryalignment =~ /\!/ && $primaryalignment ne "NULL";
+			$cleancase = "CLEAN" if $finalalignment eq "NULL" && $primaryalignment !~ /\!/ && $primaryalignment ne "NULL";
+			$cleancase = "CLEAN" if $finalalignment ne "NULL" && $finalalignment !~ /\!/ ;
+			$besttree = "NULL" if $treescore <= 0;
+			print SUMMARY $Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t", $thresharr[$locusmotifsize],"\t",$simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t";
+		#	print "SUMMARY ", $Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t", $thresharr[$locusmotifsize],"\t",$simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t";
+		
+#			print SELECT $Hchrom,"\t",$Hstart,"\t",$Hend,"\t","EVENT", "\t\t", $cluster,"\n";
+			
+			my @mutinfoarray =();
+			
+			foreach my $lnode (@$lagestnodes){
+				my @pair = @$lnode;
+				my $joint = "(".join(", ",@pair).")";
+				my @nodemutarr = ();
+
+				for my $p (@pair){
+						foreach my $mut (@finalmutations_array){
+						$mut =~ /node=([A-Z, \(\)]+)/;
+						push @nodemutarr, $mut if $p eq $1;
+					}
+#					print "from pair @pair, p=$p\n";
+					@mutinfoarray = summarizeMutations(\@nodemutarr, $besttree);
+					print SUMMARY join ("\t", @mutinfoarray[0...($#mutinfoarray-1)] ),"\t"; 			
+		#			print "SUMMARY ",join ("\t", @mutinfoarray[0...($#mutinfoarray-1)] ),"\t"; 			
+				}
+
+			}
+			
+			@mutinfoarray = summarizeMutations(\@finalmutations_array, $besttree);
+			print SUMMARY join ("\t", @mutinfoarray ),"\t"; 			
+			print SUMMARY  $cleancase, "\n";
+		#	print "SUMMARY ",join ("\t", @mutinfoarray,$cleancase ),"\n"; #<STDIN>; 			
+		#	print "summarized\n"; <STDIN>;
+	
+			my %indelcatch = ();
+			my %substcatch = ();
+			my %typecatch = ();
+			my %nodescatch = ();
+			my $mutconcat = join("\t", @finalmutations_array)."\n";
+			my %indelposcatch = ();
+			my %subsposcatch = ();
+			
+				foreach my $fmut ( @finalmutations_array){
+#					next if $fmut !~ /indeltype=[a-zA-Z]+/;
+					#print RUN $fmut, "\n";
+					$fmut =~ /node=([a-zA-Z, \(\)]+)/;
+					my $lnode = $1;
+					$nodescatch{$1}=1;
+					
+					if ($fmut =~ /type=substitution/){
+		#				print "fmut=$fmut\n";
+						$fmut =~ /from=([a-zA-Z\-]+)\tto=([a-zA-Z\-]+)/;
+						my $from=$1;
+		#				print "from=$from\n";
+						my $to=$2;
+		#				print "to=$to\n";	
+						push @{$substcatch{$lnode}} , ("from:".$from." to:".$to);
+						$fmut =~ /position=([0-9]+)/;
+						push @{$subsposcatch{$lnode}}, $1;
+					}
+					
+					if ($fmut =~ /insertion=[a-zA-Z\-]+/){
+						$fmut =~ /insertion=([a-zA-Z\-]+)/;
+						push @{$indelcatch{$lnode}} , $1;
+						$fmut =~ /indeltype=([a-zA-Z]+)/;
+						push @{$typecatch{$lnode}}, $1;
+						$fmut =~ /position=([0-9]+)/;
+						push @{$indelposcatch{$lnode}}, $1;
+					}
+					if ($fmut =~ /deletion=[a-zA-Z\-]+/){
+						$fmut =~ /deletion=([a-zA-Z\-]+)/;
+						push @{$indelcatch{$lnode}} , $1;
+						$fmut =~ /indeltype=([a-zA-Z]+)/;
+						push @{$typecatch{$lnode}}, $1;
+						$fmut =~ /position=([0-9]+)/;
+						push @{$indelposcatch{$lnode}}, $1;
+					}
+				}
+							
+		#	print  $simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t" if $printer == 1;
+		#	print join ("<\t>", @mutinfoarray),"\n" if $printer == 1; 
+		#	print "where mutinfoarray = @mutinfoarray\n"  if $printer == 1;
+		#	#print RUN ".";
+			
+		#	print colored ['red'], "-------------------------------------------------------------\n" if $printer == 1;
+		#	print colored ['red'], "-------------------------------------------------------------\n" if $printer == 1;
+
+		#	print colored ['red'],"finalmutations_array=\n" if $printer == 1;
+			foreach (@finalmutations_array) {
+#				print colored ['red'], "$_\n" if $_ =~ /type=substitution/ && $printer == 1  ;
+#				print colored ['yellow'], "$_\n" if $_ !~ /type=substitution/ && $printer == 1  ;
+				
+			}# if $line =~ /cal/;# && $line =~ /chr4/;
+			
+#			print colored ['red'], "-------------------------------------------------------------\n" if $printer == 1;
+#			print colored ['red'], "-------------------------------------------------------------\n" if $printer == 1;
+#			print "tree analysis = $tree_analysis\n" if $printer == 1;
+			
+		#	my $mutations = "@$mutations_array";
+
+
+			next;
+			for my $keys (@$nodes) {foreach my $key (@$keys){
+										#print "key = $key, => $branches_hash->{$key}\n";
+									} 
+								#	print "x" x 50, "\n";
+								}
+			my ($birth_steps, $death_steps) = decipher_history($mutations_array,join("",@tags),$nodes,$branches_hash,$tree_analysis,$conformation, $alivehash, $simplified_microsat);
+		}
+	}
+	close BO;
+#	print "now studying where we lost microsatellites:";
+#	print "x" x 60,"\n";
+	for my $reason (0 ... $#nextcounter){
+#		print $removalReasons[$reason]."\t".$nextcounter[$reason],"\n";
+	}
+#	print "x" x 60,"\n";
+#	print "In total we read $microreadcounter microsatellites after reading through $contigsentered contigs\n";
+#	print " we lost $keynotformedcounter contigs as they did not form the key, \n";
+#	print "$contignotrightcounter contigs as they were not of the right species configuration\n";  
+#	print "$keynotfoundcounter contigs as they did not contain the microsats\n";  
+#	print "... In total we went through a file that had $dotcounter contigs...\n";  
+#	print join ("\n","remaining orth keys = ", (keys %orths),"");
+
+
+#	print "now printing counted trees: \n";
+		if (scalar(keys %registeredTrees) > 0){
+		foreach my $keyb ( sort (keys %registeredTrees) )
+		{
+#			print "$keyb : $registeredTrees{$keyb}\n";
+		}
+	}
+	
+	
+}	
+
+my @summarizarr = ("+C=+C +R.+C -HCOR,+C",
+"+H=+H +R.+H -HCOR,+H",
+"-C=-C -R.-C +HCOR,-C",
+"-H=-H -R.-H +HCOR,-H",
+"+HC=+HC",
+"-HC=-HC",
+"+O=+O -HCOR,+O",
+"-O=-O +HCOR,-O",
+"+HCO=+HCO",
+"-HCO=-HCO",
+"+R=+R +R.+C +R.+H",
+"-R=-R -R.-C -R.-H");
+
+foreach my $line (@summarizarr){
+	next if $line !~ /[A-Za-z0-9]/;
+#	print $line;
+	chomp $line;
+	my @fields = split(/=/,$line);
+#	print "title = $fields[0]\n";
+	my @parts=split(/ +/, $fields[1]);
+	my %partshash = ();
+	 foreach my $part (@parts){$partshash{$part}=1;}
+	my $count=0;	
+	foreach my $key ( sort keys %registeredTrees ){
+		next if !exists $partshash{$key};
+#		print "now adding $registeredTrees{$key} from $key\n";
+		$count+=$registeredTrees{$key};
+	}
+#	print "$fields[0] : $count\n";
+}
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+sub largest_number{
+	my $counter = 0;
+	my($max) = shift(@_);
+    foreach my $temp (@_) {
+    	
+    	#print "finding largest array: $maxcounter \n";
+    	if($temp > $max){
+        	$max = $temp;
+        }
+    }
+    return($max);
+}
+
+sub smallest_number{
+	my $counter = 0;
+	my($min) = shift(@_);
+    foreach my $temp (@_) {
+    	#print "finding largest array: $maxcounter \n";
+    	if($temp < $min){
+        	$min = $temp;
+        }
+    }
+    return($min);
+}
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+sub baseml_parser{
+	my $outputfile = $_[0];
+	open(BOUT,"<$outputfile") or die "Cannot open output of upstream baseml $outputfile: $!";
+	my @info = ();
+	my @branchields = ();
+	my @distanceields = ();
+	my @bout = <BOUT>;
+	#print colored ['red'], @bout ,"\n";
+	for my $b (0 ... $#bout){
+		my $bine=$bout[$b];
+		#print  colored ['yellow'], "sentence = ",$bine;
+		if ($bine =~ /TREE/){
+			$bine=$bout[$b++];
+			$bine=$bout[$b++];
+			$bine=$bout[$b++];
+			#print "FOUND",$bine;
+			chomp $bine;
+			$bine =~ s/^\s+//g;
+			@branchields = split(/\s+/,$bine);
+			$bine=$bout[$b++];
+			chomp $bine;
+			$bine =~ s/^\s+//g;
+			@distanceields = split(/\s+/,$bine);
+			#print "LASTING..............\n";
+			last;
+		}
+		else{		
+		}
+	}
+	
+	close BOUT;
+#			print "branchfields = @branchields and distanceields = @distanceields\n"  if $printer == 1; 
+	my %distance_hash=();
+	for my $d (0 ... $#branchields){
+		$distance_hash{$branchields[$d]} = $distanceields[$d];
+	}
+	
+	$info[0] = $distance_hash{"9..1"} + $distance_hash{"9..2"};
+	$info[1] = $distance_hash{"9..1"} + $distance_hash{"8..9"}+ $distance_hash{"8..3"};
+	$info[2] = $distance_hash{"9..1"} + $distance_hash{"8..9"}+$distance_hash{"7..8"}+$distance_hash{"7..4"};
+	$info[3] = $distance_hash{"9..1"} + $distance_hash{"8..9"}+$distance_hash{"7..8"}+$distance_hash{"6..7"}+$distance_hash{"6..5"};
+	
+#	print "\nsending back: @info\n" if $printer == 1;
+	
+	return join("\t",@info);
+	
+}
+
+
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+sub test_complexity{
+	my $printer = 0;
+	my $sequence = $_[0];
+	my $COMPLEXITY_SUPPORT = $_[1];
+	my $complexity=int($COMPLEXITY_SUPPORT * (1/40));				#1	THIS IS AN ARBITRARY THRESHOLD SET FOR LOW COMPLEXITY.
+																	#2	THE INSPIRATION WAS WEB MILLER'S MAIL SENT ON 
+																	#3	19 Apr 2008 WHERE HE CLASSED AS HIGH COMPLEXITY
+																	#4	REGION, IF 40 BP OF SEQUENCE HAS AT LEAST 3 OF
+																	#5	EACH NUCLEOTIDE. HENCE, I NORMALIZE THIS PARAMETER
+																	#6	FOR THE ACTUAL LENGTH OF $FLANK_SUPPORT SET BY
+																	#7	THE USER.
+																	#8	WEB MILLER SENT THE MAIL TO YDK104@PSU.EDU
+	
+	
+	
+	my $As = ($sequence=~ s/A/A/gi);
+	my $Ts = ($sequence=~ s/T/T/gi);
+	my $Gs = ($sequence=~ s/G/G/gi);
+	my $Cs = ($sequence=~ s/C/C/gi);
+	#print "seq = $sequence, As=$As, Ts=$Ts, Gs=$Gs, Cs=$Cs\n" if $printer == 1;
+	
+	my $ans = ();
+
+	return "HIGH" if $As >= $complexity && $Ts >= $complexity && $Cs >= $complexity && $Gs >= $complexity;
+
+	my @nts = ("A","T","G","C","-");
+	
+	my $lowcomplex = 0;
+	
+	foreach my $nt (@nts){
+		$lowcomplex =1 if $sequence =~ /(($nt\-*){10,})/i;
+#		print "caught with a mono of $nt : $1 in $sequence\n" if $sequence =~ /(($nt\-*){10,})/i;
+		$lowcomplex =1 if $sequence =~ /(($nt[A-Za-z]){10,})/i;
+		$lowcomplex =1 if $sequence =~ /(([A-Za-z]$nt){10,})/i;
+#		print "caught with a di with $nt : $2 in $sequence\n" if $sequence =~ /(($nt[A-Za-z]){10,})/i || $sequence =~ /(([A-Za-z]$nt){10,})/i;
+		my $nont = ($sequence=~ s/$nt/$nt/gi);
+		
+	}
+#	print "leaving for now.. $sequence\n" if $printer == 1 && $lowcomplex == 0;
+	#<STDIN>;
+	return "HIGH" if $lowcomplex == 0;
+	return "LOW" ;
+}
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+sub sequence_similarity{
+	my $printer = 0;
+	my @seq1 = split(/\s*/, $_[0]);
+	my @seq2 = split(/\s*/, $_[1]);
+	my $similarity_thresh = $_[2];
+	my $info = $_[3];
+#	print "input = @_\n" if $printer == 1;
+	my $seq1str = $_[0];
+	my $seq2str = $_[1];
+	$seq1str=~s/\-//g; 	$seq2str=~s/\-//g;
+	my $similarity=0;
+	
+	my $nucdiffs=0;
+	my $nucsims=0;
+	my $indeldiffs=0;
+	
+	for my $i (0...$#seq1){
+		$similarity++ if $seq1[$i] =~ /$seq2[$i]/i  ; #|| $seq1[$i] =~ /\-/i || $seq2[$i] =~ /\-/i ;
+		$nucsims++ if $seq1[$i] =~ /$seq2[$i]/i && ($seq1[$i] =~ /[a-zA-Z]/i && $seq2[$i] =~ /[a-zA-Z]/i);
+		$nucdiffs++ if $seq1[$i] !~ /$seq2[$i]/i && ($seq1[$i] =~ /[a-zA-Z]/i && $seq2[$i] =~ /[a-zA-Z]/i);
+		$indeldiffs++ if $seq1[$i] !~ /$seq2[$i]/i && $seq1[$i] =~ /\-/i || $seq2[$i] =~ /\-/i;
+	}
+	my $sim = $similarity/length($_[0]);
+	return ( $sim, $nucdiffs, $indeldiffs ); #<=  $similarity_thresh;
+}
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+
+sub load_treesToReject{
+	my @rejectlist = ();
+	my $alltags = join("",@_);
+	@rejectlist = qw (-HCOR +HCOR) if $alltags eq "HCORM";
+	@rejectlist = qw ( -HCO|+R +HCO|-R) if $alltags eq "HCOR";
+	@rejectlist = qw ( -HC|+O +HC|-O) if $alltags eq "HCO";
+
+	%treesToReject=();
+	$treesToReject{$_} = $_ foreach (@rejectlist);
+	#print "loaded to reject for $alltags; ", $treesToReject{$_},"\n" foreach (@rejectlist); #<STDIN>;
+}
+#--------------------------------------------------------------------------------------------------------
+sub load_treesToIgnore{
+	my @rejectlist = ();
+	my $alltags = join("",@_);
+	@rejectlist = qw (-HCOR +HCOR +HCORM -HCORM) if $alltags eq "HCORM";
+	@rejectlist = qw ( -HCO|+R +HCO|-R +HCOR -HCOR) if $alltags eq "HCOR";
+	@rejectlist = qw ( -HC|+O +HC|-O +HCO -HCO) if $alltags eq "HCO";
+
+	%treesToIgnore=();
+	$treesToIgnore{$_} = $_ foreach (@rejectlist);
+	#print "loaded ", $treesToIgnore{$_},"\n" foreach (@rejectlist);
+}
+#--------------------------------------------------------------------------------------------------------
+sub load_thresholds{
+	my @threshold_array=split(/[,_]/,$_[0]);
+	unshift @threshold_array, "0";
+	for my $size (1 ... 4){
+		$thresholdhash{$size}=$threshold_array[$size];
+	}
+}
+#--------------------------------------------------------------------------------------------------------
+sub load_allPossibleTrees{
+	#1 THIS FILE STORES ALL POSSIBLE SCENARIOS OF MICROSATELLITE
+	#2 BIRTH AND DEATH EVENTS ON A 5-PRIMATE TREE OF H,C,O,R,M
+	#3 IN FORM OF A TEXT FILE. THIS WILL BE USED AS A TEMPLET
+	#4 TO COMPARE EACH MICROSATELLITE CLUSTER TO UNDERSTAND THE
+	#5 EVOLUTION OF EACH LOCUS. WE WILL THEN DISCARD SOME 
+	#6 MICROSATS ACCRODING TO THEIR EVOLUTIONARY BEHAVIOUR ON 
+	#7 THE TREE. MOST PROBABLY WE WILL REMOVE THOSE MICROSATS
+	#8 THAT ARE NOT SUFFICIENTLY INFORMATIVE, LIKE IN CASE OF
+	#9 AN OUTGROUP MICROSATELLITE BEING DIFFERENT FRON ALL OTHER
+	#10 SPECIES IN THE TREE.
+	my $tree_list = $_[0];	
+#	print "file to be loaded: $tree_list\n";
+
+	my @trarr = ();
+	@trarr = ("#H C O	CONCLUSION	ALTERNATE",
+"+ + +	+HCO	NA",
+"+ _ _	+H	NA",
+"_ + _	+C	NA",
+"_ _ +	-HC|+O	NA",
+"+ _ +	-C	+H",
+"_ + +	-H	+C",
+"+ + _	+HC|-O	NA",
+"_ _ _	-HCO	NA") if $tree_list =~ /_HCO\.txt/;
+	@trarr = ("#H C O R	CONCLUSION	ALTERNATE",
+"_ _ _ _	-HCOR	NA",
+"+ + + +	+HCOR	NA",
+"+ + + _	+HCO|-R	+H.+C.+O",
+"+ + _ _	+HC	+H.+C;-O",
+"+ _ _ _	+H	+HC,-C",
+"_ + _ _	+C	+HC,-H",
+"_ _ + _	+O	-HC|-H.-C",
+"_ _ + +	-HC	-H.-C",
+"+ _ _ +	+H|-C.-O	+HC,-C",
+"_ + _ +	+C	-H.-O",
+"_ + + _	-H	+C.+O",
+"_ _ _ +	-HCO|+R	NA",
+"+ _ + _	+H.+O|-C	NA",
+"_ + + +	-H	-HC,+C",
+"+ _ + +	-C	-HC,+H",
+"+ + _ +	-O	+HC") if $tree_list =~ /_HCOR\.txt/;
+
+		@trarr = ("#H C O R M	CONCLUSION	ALTERNATE",
+"+ + + + _	+HCOR	NA",
+"+ + + _ +	-R	+HCO;+HC.+O;+H.+C.+O",
+"+ + _ + +	-O	-HCO,+HC|-HCO,+HC;-HCO,(+H.+C)",
+"+ _ + + +	-C	-HC,+H;+HCO,(+H.+O)",
+"_ + + + +	-H	-HC,+C;-HCO,(+C.+O)",
+"_ _ _ _ +	-HCOR	NA",
+"_ _ _ + _	+R	-HC.-O;-H.-C.-O",
+"_ _ + _ _	+O	+HCO,-HC;+HCO,(-H.-C)",
+"_ + _ _ _	+C	+HC,-H;+HCO,(-H.-O)",
+"+ _ _ _ _	+H	+HC,-C;+HCO,(-C.-O)",
+"+ + + _ _	+HCO	+H.+C.+O",
+"+ + _ + _	-O	+R.+HC|-HCO,+HC;+H.+C.+R|-HCO,(+H.+C)",
+"+ _ + + _	-C	-HC,+H;+H.+O.+R|-HCO,(+H.+O)",
+"_ + + + _	-H	-HC,+C;+C.+O.+R|-HCO,(+C.+O)",
+"_ _ _ + +	-HCO	-HC.-O;-H.-C.-O",
+"_ _ + _ +	+O	+HCO,-HC;+HCO,(-H.-C)",
+"_ + _ _ +	+C	+HC,-H;+HCO,(-H.-O)",
+"+ _ _ _ +	+H	-HC,+H;+HCO,(-C.-O)",
+"+ + _ _ +	+HC	-R.-O|+HCO,-O|+H.+C;-HCO,+HC;-HCO,(+H.+C)",
+"+ _ + _ +	-R.-C|+HCO,-C|+H.+O	NA",
+"_ + + _ +	-R.-H|+HCO,-H|+C.+O	NA",
+"_ _ + + _	-HC	+R.+O|-HCO,+O|+HCO,-HC",
+"_ + _ + _	+R.+C|-HCO,+C|-HC,+C	+HCO,(-H.-O)",
+"+ _ _ + _	+R.+H|-C.-O	+HCO,(-C.-O)",
+"+ _ _ + +	-O.-C|-HCO,+H	+R.+H;-HCO,(+R.+H)",
+"_ + _ + +	-O.-H|-HCO,+C	+R.+C;-HCO,(+R.+C)",
+"_ + + _ _	+HCO,-H|+O.+C	NA",
+"+ _ + _ _	+HCO,-C|+O.+H	NA",
+"_ _ + + +	-HC	-H.-C|-HCO,+O",
+"+ + _ _ _	+HC	+H.+C|+HCO,-O|-HCO,+HC;-HCO,(+H.+C)",
+"+ + + + +	+HCORM	NA") if $tree_list =~ /_HCORM\.txt/;
+
+	
+	my $template_p = $_[1];
+	my $alternate_p = $_[2];
+																	#1 THIS IS THE HASH IN WHICH INFORMATION FROM THE ABOVE FILE
+																	#2 GETS STORED, USING THE WHILE LOOP BELOW. HERE, THE KEY
+																	#3 OF EACH ROW IS THE EVOLUTIONARY CONFIGURATION OF A LOCUS
+																	#4 ON THE PRIMATE TREE, BASED ON PRESENCE/ABSENCE OF A MICROSAT
+																	#5 AT THAT LOCUS, LIKE SAY "+ + + _ _" .. EACH COLUMN BELONGS 
+																	#6 TO ONE SPECIES; HERE THE COLUMN NAMES ARE "H C O R M".
+																	#7 THE VALUE FOR EACH ENTRY IS THE MEANING OF THE ABOVE 
+																	#8 CONFIGURATION (I.E., CONFIGURAION OF THE KEY. HERE, THE
+																	#9 VALUE WILL BE +HCO, SIGNIFYING A BIRTH IN HUMAN-CHIMP-ORANG
+																	#10 COMMON ANCESTOR. THIS HASH HAS BEEN LOADED HERE TO BE USED
+																	#11 LATER BY THE SUBROUTINE sub treeStudy{} THAT STUDIES
+																	#12 EVOLUTIONARY CONFIGURAION OF EACH MICROSAT LOCUS, AS
+																	#13 MENTIONED ABOVE.	
+	my @keys_array=();
+	foreach my $line (@trarr){
+		next if $line =~ /^#/;
+		chomp $line;
+		my @fields = split("\t", $line);
+		push @keys_array, $fields[0];
+#		print "loading: $fields[0]\n";
+		$template_p->{$fields[0]}[0] = $fields[1];
+		$template_p->{$fields[0]}[1] = 0;
+		$alternate_p->{$fields[1]} = $fields[2];
+		
+	}
+#	print "loaded the trees with keys: @keys_array\n";
+	return $template_p, \@keys_array, $alternate_p;
+}
+
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+sub checkCleanCase{
+	my $printer = 0;
+	my $tree = $_[0];
+	my $finalalignment = $_[1];
+	
+	#print "IN checkCleanCase: @_\n";
+	#<STDIN>;
+	my @indivspecies = $tree =~ /[A-Z]/g;
+	$finalalignment =~ s/\./_/g;
+	my @captured = $finalalignment =~ /[A-Za-z, \(\):]+\![:A-Za-z, \(\)]/g;
+	
+	my $unclean = 0;
+	
+	foreach my $sp (@indivspecies){
+		foreach my $cap (@captured){
+			$cap =~ s/:[A-Za-z\-]+//g;
+			my @sps = $cap =~ /[A-Z]+/g;		
+			my $spsc = join("", @sps);
+#			print "checking whether imp species $sp is present in $cap i.e, in $spsc\n " if $printer == 1;
+			if ($spsc =~ /$sp/){
+#				print "foind : $sp\n";
+				$unclean = 1; last;
+			}
+		}
+		last if $unclean == 1;
+	}
+	#<STDIN>;
+	return "CLEAN" if $unclean == 0;
+	return "UNCLEAN";
+}
+
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+#--------------------------------------------------------------------------------------------------------
+
+
+sub adjustCoordinates{
+	my $line = $_[0];
+	my $no_of_species = $line =~ s/(chr[0-9a-zA-Z]+)|(Contig[0-9a-zA-Z\._\-]+)/x/g;
+	$infocord = 2 + (4*$no_of_species) - 1;
+	$typecord = 2 + (4*$no_of_species) + 1 - 1;
+	$motifcord = 2 + (4*$no_of_species) + 2 - 1;
+	$gapcord = $motifcord+1;
+	$startcord = $gapcord+1;
+	$strandcord = $startcord+1;
+	$endcord = $strandcord + 1;
+	$microsatcord = $endcord + 1;
+	$sequencepos = 2 + (5*$no_of_species) + 1 -1 ; 
+	$interr_poscord = $microsatcord + 3;
+	$no_of_interruptionscord = $microsatcord + 4;
+	$interrcord = $microsatcord + 2;
+	#print "$line\n startcord = $startcord, and endcord = $endcord and no_of_species = $no_of_species\n" if $printer == 1;
+	return $no_of_species;
+}
+
+
+sub printhash{
+	my $alivehash = $_[0];
+	my @tags = @$_[1];
+#	print "print hash\n";
+	foreach my $tag (@tags){
+#		print "$tag=",$alivehash->{$tag},"\n" if exists $alivehash->{$tag};
+	}
+		
+	return "\n"
+}
+sub peel_onion{
+	my $printer = 0;
+#	print "received: @_\n" ; #<STDIN>;
+	$printer = 0;
+	my ($tree, $sequences, $alignment, $tagarray, $microsathash, $nonmicrosathash, $motif, $tree_analysis, $threshold, $microsatstarts) = @_;
+#	print "in peel onion.. tree = $tree \n" if $printer == 1;
+	my %sequence_hash=();
+
+
+#	for my $i (0 ... $#sequences){ $sequence_hash{$species[$i]}=$sequences->[$i]; }
+
+
+	my %node_sequences=();
+	
+	my %node_alignments = ();			#NEW, Nov 28 2008
+	my @tags=();
+	my @locus_sequences=();
+	my %alivehash=();
+	foreach my $tag (@$tagarray) { 
+		#print "adding: $tag\n";
+		push(@tags, $tag);
+		$node_sequences{$tag}=join ".",split(/\s*/,$microsathash->{$tag}) if $microsathash->{$tag} ne "NULL"; 
+		$alivehash{$tag}= $tag if $microsathash->{$tag} ne "NULL";
+		$node_sequences{$tag}=join ".",split(/\s*/,$nonmicrosathash->{$tag}) if $microsathash->{$tag} eq "NULL";
+		$node_alignments{$tag}=join ".",split(/\s*/,$alignment->{$tag}) ;
+		push @locus_sequences, $node_sequences{$tag};
+		#print "adding to node_seq: $tag = ",$node_alignments{$tag},"\n";
+	}
+
+	
+
+	my ($nodes_arr, $branches_hash) = get_nodes($tree);
+	my @nodes=@$nodes_arr;
+#	print "recieved nodes = " if $printer == 1; 
+#	foreach my $key (@nodes) {print "@$key "  if $printer == 1;}
+	
+#	print "\n" if $printer == 1;
+	
+	#POPULATE branches_hash WITH INFORMATION ABOUT LIVESTATUS
+	foreach my $keys (@nodes){
+		my @pair = @$keys;
+		my $joint = "(".join(", ",@pair).")";
+		my $copykey = join "", @pair;
+		$copykey =~ s/[\W ]+//g;
+#		print "for node: $keys, copykey = $copykey and joint = $joint\n" if $printer == 1;
+		my $livestatus = 1;
+		foreach my $copy (split(/\s*/,$copykey)){
+			$livestatus = 0 if !exists $alivehash{$copy};
+		}
+		$alivehash{$joint} = $joint if !exists $alivehash{$joint} && $livestatus == 1;
+#		print "alivehash = $alivehash{$joint}\n" if exists $alivehash{$joint} && $printer == 1;
+	}
+		
+	@nodes = reverse(@nodes); #1 THIS IS IN ORDER TO GO THROUGH THE TREE FROM LEAVES TO ROOT.
+
+	my @mutations_array=();
+
+	my $joint = ();
+	foreach my $node (@nodes){
+		my @pair = @$node; 
+#		print "now in the nodes for loop, pair = @pair\n and sequences=\n" if $printer == 1;
+		$joint = "(".join(", ",@pair).")"; 	
+		my @pair_sequences=();
+
+		foreach my $tag (@pair){
+#			print "$tag:  $node_alignments{$tag}\n" if $printer == 1;
+			print $node_alignments{$tag},"\n" if $printer == 1;
+			push @pair_sequences, $node_alignments{$tag};
+		}
+#		print "ppeel onion joint = $joint , pair_sequences=>@pair_sequences< , pair=>@pair<\n" if $printer == 1; 
+		
+		my ($compared, $substitutions_list) = base_by_base_simple($motif,\@pair_sequences, scalar(@pair_sequences), @pair, $joint);
+		$node_alignments{$joint}=$compared;
+		push(  @mutations_array,split(/:/,$substitutions_list));
+#		print "newly added to node_sequences: $node_alignments{$joint} and list of mutations =\n", join("\n",@mutations_array),"\n" if $printer == 1;
+	}
+	
+#	print "now sending for analyze_mutations: mutation_array=@mutations_array, nodes=@nodes, branches_hash=$branches_hash, alignment=$alignment, tags=@tags, alivehash=\%alivehash, node_sequences=\%node_sequences, microsatstarts=$microsatstarts, motif=$motif\n" if $printer == 1;
+	## <STDIN> if $printer == 1;
+	
+	my $analayzed_mutations = analyze_mutations(\@mutations_array, \@nodes, $branches_hash, $alignment, \@tags, \%alivehash, \%node_sequences, $microsatstarts, $motif);
+
+#	print "returning: ", $analayzed_mutations, \@nodes, $branches_hash,"\n" if scalar @mutations_array > 0 && $printer == 1;
+#	print "returning: NULL, NULL, NULL " if scalar @mutations_array == 0 && $printer == 1;
+#	print "final node alignment = $node_alignments{$joint}\n" if $printer == 1;
+	
+	# <STDIN> if $printer == 1;
+
+
+	return ($analayzed_mutations, \@nodes, $branches_hash, \%alivehash, $node_alignments{$joint}) if scalar @mutations_array > 0;
+	return ("NULL",\@nodes,$branches_hash, \%alivehash, "NULL") if scalar @mutations_array == 0;
+}
+
+#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
+#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
+
+#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
+#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
+
+sub get_nodes{
+	my $printer = 0;
+
+	my $tree=$_[0];
+	#$tree =~ s/ +//g;
+	$tree =~ s/\t+//g;
+	$tree=~s/;//g;
+	print "tree=$tree\n" if $printer == 1; 
+	my @nodes = ();
+	my @onions=($tree);
+	my %branches=();
+	foreach my $bite (@onions){
+		$bite=~ s/^\(|\)$//g;
+		chomp $bite;
+#		print "tree = $bite \n"; 
+#		<STDIN>;
+		$bite=~ /([ ,\(\)A-Z]+)\,\s*([ ,\(\)A-Z]+)/;
+		#$tree =~ /(\(\(\(H, C\), O\), R\))\, (M)/;
+		my @raw_nodes = ($1, $2);
+		print "raw nodes =  $1 and $2\n" if $printer == 1;
+		push(@nodes, [@raw_nodes]);
+		foreach my $node (@raw_nodes) {push (@onions, $node) if $node =~ /,/;}
+		foreach my $node (@raw_nodes) {$branches{$node}="(".$bite.")"; print "adding to branches: $node = ($bite)\n" if $printer == 1;}
+		print "onions = @onions\n" if $printer == 1;<STDIN> if $printer == 1;
+	}
+	$printer = 0;
+	return \@nodes, \%branches;
+}
+
+
+#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
+#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
+#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
+#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
+sub analyze_mutations{
+	my ($mutations_array, $nodes, $branches_hash, $alignment, $tags, $alivehash, $node_sequences, $microsatstarts, $motif) = @_;
+	my $locuslength = length($alignment->{$tags->[0]});
+	my $printer = 0;
+	
+	
+#	print " IN analyzed_mutations....\n" if $printer == 1; #  \n mutations array = @$mutations_array, \nAND locuslength = $locuslength\n" if $printer == 1;
+	my %mutation_hash=();
+	my %froms_megahash=();
+	my %tos_megahash=();
+	my %position_hash=();
+	my @solutions_array=();
+	foreach  my $mutation (@$mutations_array){
+#		print "loadin mutation: $mutation\n" if $printer == 1;
+		my %localhash= $mutation =~ /([\S ]+)=([\S ]+)/g;
+		$mutation_hash{$localhash{"position"}} = {%localhash};
+		push @{$position_hash{$localhash{"position"}}},$localhash{"node"};
+#		print "feeding position hash with $localhash{position}: $position_hash{$localhash{position}}[0]\n" if $printer == 1;
+		$froms_megahash{$localhash{"position"}}{$localhash{"node"}}=$localhash{"from"};
+		$tos_megahash{$localhash{"position"}}{$localhash{"node"}}=$localhash{"to"};
+#		print "just a trial: $mutation_hash{$localhash{position}}{position}\n" if $printer == 1;
+#		print "loadin in tos_megahash: $localhash{position} {$localhash{node} = $localhash{to}\n" if $printer == 1;
+#		print "loadin in from: $localhash{position} {$localhash{node} = $localhash{from}\n" if $printer == 1;
+	}
+	
+#	print "now going through each position in loculength:\n" if $printer == 1;
+	## <STDIN> if $printer == 1;
+	
+	for my $pos (0 ... $locuslength-1){
+#		print "at position: $pos\n" if $printer == 1;
+
+		if (exists($mutation_hash{$pos})){
+			my @local_nodes=@{$position_hash{$pos}};
+#			print "found mutation: @{$position_hash{$pos}} :  @local_nodes\n" if $printer == 1;
+			
+			foreach my $local_node (@local_nodes){
+#				print "at local node: $local_node ... from state = $froms_megahash{$pos}{$local_node}\n" if $printer == 1;
+				my $open_insertion=();
+				my $open_deletion=();
+				my $open_to_substitution=();
+				my $open_from_substitution=();
+				if ($froms_megahash{$pos}{$local_node} eq "-"){
+				#	print "here exists a microsatellite from $local_node to $branches_hash->{$local_node}\n" if $printer == 1 &&  exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};;
+				#	print "for localnode $local_node, amd the realated branches_hash:$branches_hash->{$local_node},  nexting as exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}}\n" if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}} && $printer == 1;
+					#next if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};
+					$open_insertion=$tos_megahash{$pos}{$local_node};
+					for my $posnext ($pos+1 ... $locuslength-1){
+#						print "in first if  .... studying posnext: $posnext\n" if $printer == 1;
+						last if !exists ($froms_megahash{$posnext}{$local_node});
+#						print "for posnext: $posnext, there exists $froms_megahash{$posnext}{$local_node}.. already, open_insertion = $open_insertion.. checking is $froms_megahash{$posnext}{$local_node} matters\n" if $printer == 1;
+						$open_insertion = $open_insertion.$tos_megahash{$posnext}{$local_node} if $froms_megahash{$posnext}{$local_node} eq "-";
+#						print "now open_insertion=$open_insertion\n" if $printer == 1;
+						delete $mutation_hash{$posnext} if $froms_megahash{$posnext}{$local_node} eq "-";
+					}					
+					print "1 Feeding in: ", join("\t", "node=$local_node","type=insertion" ,"position=$pos", "from=", "to=", "insertion=$open_insertion", "deletion="),"\n" if $printer == 1;
+					push (@solutions_array, join("\t", "node=$local_node","type=insertion" ,"position=$pos", "from=", "to=", "insertion=$open_insertion", "deletion="));
+				}
+				elsif ($tos_megahash{$pos}{$local_node} eq "-"){
+				#	print "here exists a microsatellite to $local_node from $branches_hash->{$local_node}\n" if $printer == 1 && exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};;
+				#	print "for localnode $local_node, amd the realated branches_hash:$branches_hash->{$local_node},  nexting as exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}}\n" if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};
+					#next if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};
+					$open_deletion=$froms_megahash{$pos}{$local_node};
+					for my $posnext ($pos+1 ... $locuslength-1){
+						print "in 1st elsif studying posnext: $posnext\n" if $printer == 1;
+						print "nexting as nextpos does not exist\n" if !exists ($tos_megahash{$posnext}{$local_node}) && $printer == 1;
+						last if !exists ($tos_megahash{$posnext}{$local_node});
+						print "for posnext: $posnext, there exists $tos_megahash{$posnext}{$local_node}\n" if $printer == 1;
+						$open_deletion = $open_deletion.$froms_megahash{$posnext}{$local_node} if $tos_megahash{$posnext}{$local_node} eq "-";
+						delete $mutation_hash{$posnext} if $tos_megahash{$posnext}{$local_node} eq "-";
+					}					
+					print "2 Feeding in:",  join("\t", "node=$local_node","type=deletion" ,"position=$pos", "from=", "to=", "insertion=", "deletion=$open_deletion"), "\n" if $printer == 1;
+					push (@solutions_array, join("\t", "node=$local_node","type=deletion" ,"position=$pos", "from=", "to=", "insertion=", "deletion=$open_deletion"));
+				}
+				elsif ($tos_megahash{$pos}{$local_node} ne "-"){
+				#	print "here exists a microsatellite from $local_node to $branches_hash->{$local_node}\n" if $printer == 1 && exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};;
+				#	print "for localnode $local_node, amd the realated branches_hash:$branches_hash->{$local_node},  nexting as exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}}\n" if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};
+					#next if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};
+				#	print "microsatstart = $microsatstarts->{$local_node}  \n" if exists $microsatstarts->{$local_node} && $pos < $microsatstarts->{$local_node} && $printer == 1;
+					next if exists $microsatstarts->{$local_node} && $pos < $microsatstarts->{$local_node};
+					$open_to_substitution=$tos_megahash{$pos}{$local_node};
+					$open_from_substitution=$froms_megahash{$pos}{$local_node};
+					print "open from substitution: $open_from_substitution \n" if $printer == 1;
+					for my $posnext ($pos+1 ... $locuslength-1){
+						#print "in last elsif studying posnext: $posnext\n";
+						last if !exists ($tos_megahash{$posnext}{$local_node});
+						print "for posnext: $posnext, there exists $tos_megahash{$posnext}{$local_node}\n" if $printer == 1;
+						$open_to_substitution = $open_to_substitution.$tos_megahash{$posnext}{$local_node} if $tos_megahash{$posnext}{$local_node} ne "-";
+						$open_from_substitution = $open_from_substitution.$froms_megahash{$posnext}{$local_node} if $tos_megahash{$posnext}{$local_node} ne "-";
+						delete $mutation_hash{$posnext} if $tos_megahash{$posnext}{$local_node} ne "-" && $froms_megahash{$posnext}{$local_node} ;
+					}	
+					print "open from substitution: $open_from_substitution \n" if $printer == 1;
+					
+					#IS THE STRETCH OF SUBSTITUTION MICROSATELLITE-LIKE?
+					my @motif_parts=split(/\s*/,$motif);
+					#GENERATING THE FLEXIBLE LEFT END
+					my $left_query=();
+					for my $k (1 ... $#motif_parts) {
+						$left_query= $motif_parts[$k]."|)";
+						$left_query="(".$left_query;
+					}
+					$left_query=$left_query."?";
+				
+					print "left_quewry = $left_query\n" if $printer == 1;
+					#GENERATING THE FLEXIBLE RIGHT END	
+					my $right_query=();
+					for my $k (0 ... ($#motif_parts-1)) {
+						$right_query= "(|".$motif_parts[$k];
+						$right_query=$right_query.")";
+					}
+					$right_query=$right_query."?";
+					print "right_query = $right_query\n" if $printer == 1;
+					print "Hence, searching for: ^$left_query($motif)+$right_query\$\n" if $printer == 1;
+					
+					my $motifcomb=$motif x 50;
+					print "motifcomb = $motifcomb\n" if $printer == 1;
+					if ( ($motifcomb =~/$open_to_substitution/i) && (length ($open_to_substitution) >= length($motif)) ){
+						print "sequence microsat-like\n" if $printer == 1;
+						my $all_microsat_like = 0;
+						print "3 feeding in: ", join("\t", "node=$local_node","type=deletion" ,"position=$pos", "from=", "to=", "insertion=", "deletion=$open_from_substitution"), "\n" if $printer == 1;
+						push (@solutions_array, join("\t", "node=$local_node","type=deletion" ,"position=$pos", "from=", "to=", "insertion=", "deletion=$open_from_substitution"));
+						print "4 feeding in: ", join("\t", "node=$local_node","type=insertion" ,"position=$pos", "from=", "to=", "insertion=$open_to_substitution", "deletion="), "\n" if $printer == 1;
+						push (@solutions_array, join("\t", "node=$local_node","type=insertion" ,"position=$pos", "from=", "to=", "insertion=$open_to_substitution", "deletion="));
+						
+					}
+					else{
+						print "5 feeding in: ", join("\t", "node=$local_node","type=substitution" ,"position=$pos", "from=$open_from_substitution", "to=$open_to_substitution", "insertion=", "deletion="), "\n" if $printer == 1;
+						push (@solutions_array, join("\t", "node=$local_node","type=substitution" ,"position=$pos", "from=$open_from_substitution", "to=$open_to_substitution", "insertion=", "deletion="));
+					}
+					#IS THE FROM-SEQUENCE MICROSATELLITE-LIKE?
+
+				}
+				#<STDIN> if $printer ==1;
+			}
+			#<STDIN> if $printer ==1;
+		}
+	}
+
+	print "\n", "#" x 50, "\n"  if $printer == 1;
+	foreach my $tag (@$tags){
+		print "$tag: $alignment->{$tag}\n" if $printer == 1;	
+	}
+	print "\n", "#" x 50, "\n" if $printer == 1;
+
+	print "returning SOLUTIONS ARRAY : \n",join("\n", @solutions_array),"\n" if $printer == 1; 
+	#print "end\n";
+	#<STDIN> if 
+	return \@solutions_array;
+}
+#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
+#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
+#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
+#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
+
+sub base_by_base_simple{
+	my $printer = 0;
+	my ($motif, $locus, $no, $pair0, $pair1, $joint) = @_;
+	my @seq_array=();
+	print "IN SUBROUTUNE base_by_base_simple.. information received = @_\n" if $printer == 1;
+	print "pair0 = $pair0 and pair1  = $pair1\n" if $printer == 1;
+
+	my @example=split(/\./,$locus->[0]);
+	print "example, for length = @example\n" if $printer == 1;
+	for my $i (0...$no-1){push(@seq_array, [split(/\./,$locus->[$i])]); print "for $i, from $locus->[$i],  seq_array = >@{$seq_array[$i]}<\n" if $printer == 1;}
+
+	my @compared_sequence=();
+	my @substitutions_list;
+	for my $i (0...scalar(@example)-1){
+
+		#print "i = $i\n" if $printer == 1;
+		#print "comparing $seq_array[0][$i] and  $seq_array[1][$i] \n" ;#if $printer == 1;
+		if ($seq_array[0][$i] =~ /!/ && $seq_array[1][$i] !~ /!/){
+
+			my $resolution= resolve_base($seq_array[0][$i],$seq_array[1][$i], $pair1 ,"keep" );
+		#	print "ancestral = $resolution\n" if $printer == 1;
+
+			if ($resolution =~ /$seq_array[1][$i]/i && $resolution !~ /!/){
+				push @substitutions_list, add_mutation($i, $pair0,  $seq_array[0][$i], $resolution );
+			}
+			elsif ( $resolution !~ /!/){
+				push @substitutions_list, add_mutation($i, $pair1,  $seq_array[1][$i], $resolution);			
+			}
+			push @compared_sequence,$resolution;
+		}
+		elsif ($seq_array[0][$i] !~ /!/ && $seq_array[1][$i] =~ /!/){
+
+			my $resolution=  resolve_base($seq_array[1][$i],$seq_array[0][$i], $pair0, "invert" );		
+		#	print "ancestral = $resolution\n" if $printer == 1;
+
+			if ($resolution =~ /$seq_array[0][$i]/i && $resolution !~ /!/){
+				push @substitutions_list, add_mutation($i, $pair1, $seq_array[1][$i], $resolution);
+			}
+			elsif ( $resolution !~ /!/){
+				push @substitutions_list, add_mutation($i, $pair0,  $seq_array[0][$i], $resolution);			
+			}
+			push @compared_sequence,$resolution;
+		}
+		elsif($seq_array[0][$i] =~ /!/ && $seq_array[1][$i] =~ /!/){
+			push @compared_sequence, add_bases($seq_array[0][$i],$seq_array[1][$i], $pair0, $pair1, $joint );
+		}
+		else{
+			if($seq_array[0][$i] !~ /^$seq_array[1][$i]$/i){
+				push @compared_sequence, $pair0.":".$seq_array[0][$i]."!".$pair1.":".$seq_array[1][$i];
+			}
+			else{
+		#		print "perfect match\n" if $printer == 1;
+				push @compared_sequence, $seq_array[0][$i];
+			}
+		}		
+	}
+	print "returning: comared = @compared_sequence \nand substitutions list =\n", join("\n",@substitutions_list),"\n" if $printer == 1; 
+	return join(".",@compared_sequence), join(":", @substitutions_list) if scalar (@substitutions_list) > 0;
+	return join(".",@compared_sequence), "" if scalar (@substitutions_list) == 0;
+}
+
+
+sub resolve_base{
+	my $printer = 0;
+
+	print "IN SUBROUTUNE resolve_base.. information received = @_\n" if $printer == 1;
+	my ($optional, $single, $singlesp, $arg) = @_;
+	my @options=split(/!/,$optional);
+	foreach my $option(@options) { 
+		$option=~s/[A-Z\(\) ,]+://g;
+		if ($option =~ /$single/i){
+			print "option = $option , returning single: $single\n" if $printer == 1;
+			return $single;
+		}
+	}
+	print "returning ",$optional."!".$singlesp.":".$single. "\n" if $arg eq "keep" && $printer == 1; 
+	print "returning ",$singlesp.":".$single."!".$optional. "\n" if $arg eq "invert" && $printer == 1; 
+	return $optional."!".$singlesp.":".$single if $arg eq "keep";
+	return $singlesp.":".$single."!".$optional if $arg eq "invert";
+
+}
+
+sub same_length{
+	my $printer = 0;
+	my @locus = @_;
+	my $temp = shift @locus;
+	$temp=~s/-|,//g;
+	foreach my $l (@locus){
+		$l=~s/-|,//g;
+		return 0 if length($l) != length($temp);
+		$temp = $l;
+	}
+	return 1;
+}
+sub treeStudy{
+	my $printer = 0;
+#	print "template DEFINED.. received: @_\n" if defined %template; 
+#	print "only received = @_" if !defined %template; 
+	my $stopper = 0;
+	if (!defined %template){
+		$stopper = 1;
+		%template=();
+		print "tree decipherer = $tree_decipherer\n" if $printer == 1;
+		my ( $template_ref, $keys_array)=load_allPossibleTrees($tree_decipherer, \%template);
+		print "return = $template_ref and @{$keys_array}\n" if $printer == 1;
+		foreach my $key (@$keys_array){
+			print "addding : $template_ref->{$key} for $key\n" if $printer == 1;
+			$template{$key} = $template_ref->{$key};
+		}
+	}
+	
+	for my $templet ( keys %template ) {
+	#	print "$templet => @{$template{$templet}}\n";
+	}
+	<STDIN> if !defined %template;
+
+	my $strict = 0;
+
+    my $H = 0;
+    my $Hchr = 1;
+    my $Hstart = 2;
+    my $Hend = 3;    
+	my $Hmotif = 4;
+	my $Hmotiflen = 5;
+	my $Hmicro = 6;
+	my $Hstrand = 7;
+	my $Hmicrolen = 8;
+	my $Hinterpos = 9;
+	my $Hrelativepos = 10;
+	my $Hinter = 11;
+	my $Hinterlen = 12;
+	
+	my $C = 13;
+    my $Cchr = 14;
+    my $Cstart = 15;
+    my $Cend = 16;    
+	my $Cmotif = 17;
+	my $Cmotiflen = 18;
+	my $Cmicro = 19;
+	my $Cstrand = 20;
+	my $Cmicrolen = 21;
+	my $Cinterpos = 22;
+	my $Crelativepos = 23;
+	my $Cinter = 24;
+	my $Cinterlen = 25;
+	
+	my $O = 26;
+    my $Ochr = 27;
+    my $Ostart = 28;
+    my $Oend = 29;    
+	my $Omotif = 30;
+	my $Omotiflen = 31;
+	my $Omicro = 32;
+	my $Ostrand = 33;
+	my $Omicrolen = 34;
+	my $Ointerpos = 35;
+	my $Orelativepos = 36;
+	my $Ointer = 37;
+	my $Ointerlen = 38;
+	
+	my $R = 39;
+    my $Rchr = 40;
+    my $Rstart = 41;
+    my $Rend = 42;    
+	my $Rmotif = 43;
+	my $Rmotiflen = 44;
+	my $Rmicro = 45;
+	my $Rstrand = 46;
+	my $Rmicrolen = 47;
+	my $Rinterpos = 48;
+	my $Rrelativepos = 49;
+	my $Rinter = 50;
+	my $Rinterlen = 51;
+	
+    my $Mchr = 52;
+    my $Mstart = 53;
+    my $Mend = 54;    
+	my $M = 55;
+	my $Mmotif = 56;
+	my $Mmotiflen = 57;
+	my $Mmicro = 58;
+	my $Mstrand = 59;
+	my $Mmicrolen = 60;
+	my $Minterpos = 61;
+	my $Mrelativepos = 62;
+	my $Minter = 63;
+	my $Minterlen = 64;
+	
+	#-------------------------------------------------------------------------------#
+	my @analysis=();
+	
+	
+	my %speciesOrder = ();
+	$speciesOrder{"H"} = 0;
+	$speciesOrder{"C"} = 1;
+	$speciesOrder{"O"} = 2;
+	$speciesOrder{"R"} = 3;
+	$speciesOrder{"M"} = 4;
+	#-------------------------------------------------------------------------------#
+
+	my $line = $_[0];
+	chomp $line;
+	
+	my @f = split(/\t/,$line);
+	print "received array : @f.. recieved tags = @tags\n" if $printer == 1;
+	
+	# collect all motifs
+	my @motifs=();
+	 @motifs = ($f[$Hmotif], $f[$Cmotif], $f[$Omotif], $f[$Rmotif], $f[$Mmotif]) if $tags[$#tags] =~ /M/;
+	 @motifs = ($f[$Hmotif], $f[$Cmotif], $f[$Omotif], $f[$Rmotif]) if $tags[$#tags] =~ /R/;
+	 @motifs = ($f[$Hmotif], $f[$Cmotif], $f[$Omotif]) if $tags[$#tags] =~ /O/;
+#	print "motifs in the array = $f[$Hmotif], $f[$Cmotif], $f[$Omotif], $f[$Rmotif]\n" if $tags[$#tags] =~ /R/;;
+	print "motifs = @motifs\n" if $printer == 1;
+	my @translation = ();
+	foreach my $motif (@motifs){
+		push(@translation, "_") if $motif eq "NA";
+		push(@translation, "+") if $motif ne "NA";
+	}
+	my $translate = join(" ", @translation);
+#	print "translate = >$translate< and analysis = $template{$translate}[0].. on the other hand, ",$template{"- - +"}[0],"\n"; 
+	my @analyses = split(/\|/,$template{$translate}[0]);
+	
+	print "motifs = @motifs, analyses = @analyses\n" if $printer == 1; 
+
+	if (scalar(@analyses) == 1) {
+		#print "analysis = $analyses[0]\n"; 
+		if ($analyses[0] !~ /,|\./ ){
+			if ($analyses[0] =~ /\+/){
+				my $analysis = $analyses[0];
+				$analysis =~ s/\+|\-//g;
+				my @species = split(/\s*/,$analysis);
+				my @currentMotifs = ();
+				foreach my $specie (@species){	push(@currentMotifs, $motifs[$speciesOrder{$specie}]); print "pushing into currentMotifs: $speciesOrder{$specie}: $motifs[$speciesOrder{$specie}]\n" if $printer == 1;}
+				print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
+				$template{$translate}[1]++ if $strict == 1 && consistency(@currentMotifs) ne "NULL";
+				$template{$translate}[1]++ if $strict == 0;
+				print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
+			}
+			else{
+				my $analysis = $analyses[0];
+				$analysis =~ s/\+|\-//g;
+				my @species = split(/\s*/,$analysis);
+				my @currentMotifs = ();
+				my @complementarySpecies = ();
+				my $allSpecies = join("",@tags);
+				foreach my $specie (@species){	$allSpecies =~	s/$specie//g; }
+				foreach my $specie (split(/\s*/,$allSpecies)){	push(@currentMotifs, $motifs[$speciesOrder{$specie}]); print "pushing into currentMotifs: $speciesOrder{$specie}: $motifs[$speciesOrder{$specie}]\n" if $printer == 1;;}
+				print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
+				$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 1 && consistency(@currentMotifs) ne "NULL";
+				$template{$translate}[1]=$template{$translate}[1]+1  if $strict == 0;
+				print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
+			}
+		}
+
+		elsif ($analyses[0] =~ /,/) {
+			my @events = split(/,/,$analyses[0]);	
+			print "events = @events \n " if $printer == 1;
+			if ($events[0] =~ /\+/){
+				my $analysis1 = $events[0];
+				$analysis1 =~ s/\+|\-//g;
+				my $analysis2 = $events[1];
+				$analysis2 =~ s/\+|\-//g;
+				my @nSpecies = split(/\s*/,$analysis2);
+				print "original anslysis = $analysis1 " if $printer == 1;
+				foreach my $specie (@nSpecies){ $analysis1=~ s/$specie//g;}
+				print "processed anslysis = $analysis1 \n" if $printer == 1; 
+				my @currentMotifs = ();
+				foreach my $specie (split(/\s*/,$analysis1)){push(@currentMotifs, $motifs[$speciesOrder{$specie}]); }
+				print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
+				$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 1 && consistency(@currentMotifs) ne "NULL";
+				$template{$translate}[1]=$template{$translate}[1]+1  if $strict == 0;
+				print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
+			}
+			else{
+				my $analysis1 = $events[0];
+				$analysis1 =~ s/\+|\-//g;
+				my $analysis2 = $events[1];
+				$analysis2 =~ s/\+|\-//g;
+				my @pSpecies = split(/\s*/,$analysis2);
+				my @currentMotifs = ();
+				foreach my $specie (@pSpecies){	push(@currentMotifs, $motifs[$speciesOrder{$specie}]); }
+				print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
+				$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 1 && consistency(@currentMotifs) ne "NULL";
+				$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 0;
+				print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
+			
+			}
+			
+		}
+		elsif ($analyses[0] =~ /\./) {
+			my @events = split(/\./,$analyses[0]);	
+			foreach my $event (@events){
+				print "event = $event \n" if $printer == 1;
+				if ($event =~ /\+/){
+					my $analysis = $event;
+					$analysis =~ s/\+|\-//g;
+					my @species = split(/\s*/,$analysis);
+					my @currentMotifs = ();
+					foreach my $specie (@species){	push(@currentMotifs, $motifs[$speciesOrder{$specie}]); }
+					#print consistency(@currentMotifs),"<- \n"; 
+					print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
+					$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 1 && consistency(@currentMotifs) ne "NULL";
+					$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 0;
+					print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
+				}
+				else{
+					my $analysis = $event;
+					$analysis =~ s/\+|\-//g;
+					my @species = split(/\s*/,$analysis);
+					my @currentMotifs = ();
+					my @complementarySpecies = ();
+					my $allSpecies = join("",@tags);
+					foreach my $specie (@species){	$allSpecies =~	s/$specie//g; }
+					foreach my $specie (split(/\s*/,$allSpecies)){	push(@currentMotifs, $motifs[$speciesOrder{$specie}]); }
+					#print consistency(@currentMotifs),"<- \n"; 
+					print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
+					$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 1 && consistency(@currentMotifs) ne "NULL";
+					$template{$translate}[1]=$template{$translate}[1]+1 if $strict == 0;
+					print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
+				}
+			}	
+		
+		}
+	}
+	else{		
+		my $finalanalysis = ();
+		$template{$translate}[1]++;
+		foreach my $analysis (@analyses){ ;}
+	}
+	# test if motifs where microsats are present, as indeed of same the motif composition
+	
+	
+	
+	for my $templet ( keys %template ) {
+	#	print "now returning: $templet\n";
+		if (@{ $template{$templet} }[1] > 0){
+			print "returning in the end: $templet and $translate\n" if $printer == 1;
+			$template{$templet}[1] = 0;
+			return 	(@{$template{$templet}}[0], $translate);
+		}
+	}
+	undef %template;
+	print "sending NULL\n" if $printer == 1;
+	return ("NULL", $translate);
+	
+}
+
+
+sub consistency{
+	my @motifs = @_;
+	print "in consistency \n" if $printer == 1;
+	print "motifs sent = >",join("|",@motifs),"< \n" if $printer == 1; 
+	return $motifs[0] if scalar(@motifs) == 1;
+	my $prevmotif = shift(@motifs);
+	my $stopper = 0;
+	for my $i (0 ... $#motifs){
+		next if $motifs[$i] eq "NA";
+		my $templet = $motifs[$i].$motifs[$i];
+		if ($templet !~ /$prevmotif/i){
+			$stopper = 1; last;
+		}
+	}
+	return $prevmotif if $stopper == 0;
+	return "NULL" if $stopper == 1;
+}
+sub summarize_microsat{
+	my $printer = 0;
+	my $line = $_[0];	
+	my $humseq = $_[1];
+
+	my @gaps = $line =~ /[0-9]+\t[0-9]+\t[\+\-]/g;
+	my @starts = $line =~ /[0-9]+\t[\+\-]/g;
+	my @ends = $line =~ /[\+\-]\t[0-9]+/g;
+	print "starts = @starts\tends = @ends\n" if $printer == 1;
+	for my $i (0 ... $#gaps) {$gaps[$i] =~ s/\t[0-9]+\t[\+\-]//g;}
+	for my $i (0 ... $#starts) {$starts[$i] =~ s/\t[\+\-]//g;}
+	for my $i (0 ... $#ends) {$ends[$i] =~ s/[\+\-]\t//g;}
+
+	my $minstart = array_smallest_number(@starts);
+	my $maxend = array_largest_number(@ends);
+
+	my $humupstream_st = substr($humseq, 0, $minstart);
+	my $humupstream_en = substr($humseq, 0, $maxend);
+	my $no_of_gaps_to_start = 0;
+	my $no_of_gaps_to_end = 0;
+	$no_of_gaps_to_start = ($humupstream_st =~ s/\-/x/g) if $humupstream_st=~/\-/;
+	$no_of_gaps_to_end = ($humupstream_en =~ s/\-/x/g) if $humupstream_en=~/\-/;
+
+	my $locusmotif = ();
+	
+	
+	print "IN SUB SUMMARIZE_MICROSAT $line\n" if $printer == 1;
+	#return "NULL" if $line =~ /compound/;
+	my $Hstart = "NA";
+	my $Hend = "NA";
+	chomp $line;
+	my $match_count = ($line =~ s/>/>/g);
+	#print "number of species = $match_count\n";
+	my @micros = split(/>/,$line);
+	shift @micros;
+	my $stopper = 0;
+	
+	
+	foreach my $mic (@micros){
+		my @local = split(/\t/,$mic);
+		if ($local[$microsatcord] =~ /N/) {$stopper =1; last;}
+	}
+	return "NULL" if $stopper ==1;
+	
+	#------------------------------------------------------
+
+	my @arranged = ();
+	for my $arr (0 ... $#exacttags) {$arranged[$arr] = '0';}
+		
+	foreach my $micro (@micros){
+		for my $i (0 ... $#exacttags){
+			if ($micro =~ /^$exacttags[$i]/){
+				$arranged[$i] = $micro;
+				last;
+			}
+		}
+	}
+#	print "arranged = @arranged \n" ; <STDIN>;;
+	
+	my @endstatement = ();
+	my $turn = 0;
+	my $species_counter = 0;
+	#	print scalar(@arranged),"\n";
+	
+	my $species_no=0;
+	
+	my $orthHchr = 0;
+
+	foreach my $micro (@arranged) {
+		$micro =~ s/\t\t/\t \t/g;
+		$micro =~ s/\t,/\t ,/g;
+		$micro =~ s/,\t/, \t/g;
+		print "------------------------------------------------------------------------------------------\n" if $printer == 1;
+		chomp $micro;
+		if ($micro eq '0'){
+			push(@endstatement, join("\t",$exacttags[$species_counter],"NA","NA","NA","NA",0 ,"NA", "NA", 0,"NA","NA","NA", "NA" ));
+			$species_counter++;
+			print join("|","ENDSTATEMENT:",@endstatement),"\n" if $printer == 1;
+			next;
+		}
+	#		print $micro,"\n";
+		print "micro  = $micro \n" if $printer == 1;
+		my @fields  = split(/\t/,$micro);
+		my $microcopy = $fields[$microsatcord];
+		$microcopy =~ s/\[|\]|-//g;
+		my $microsatlength = length($microcopy);
+		print "microsat = $fields[$microsatcord] and microsatlength = $microsatlength\n" if $printer == 1;
+#		print "sp_ident = @sp_ident.. species_no=$species_no\n";
+		$micro =~ /$sp_ident[$species_no]\s(\S+)\s([0-9]+)\s([0-9]+)/;
+		
+	
+		my $sp_chr=$1;
+		my $sp_start=$2 + $fields[$startcord] - $fields[$gapcord];
+		my $sp_end= $sp_start + $microsatlength - 1;
+		
+		$species_no++;
+	
+		$micro =~ /$focalspec\s(\S+)\s([0-9]+)\s([0-9]+)/;
+		$orthHchr=$1;
+		$Hstart=$2+$minstart-$no_of_gaps_to_start;
+		$Hend=$2+$maxend-$no_of_gaps_to_end;
+
+		print "Hstart = $Hstart = $fields[4] + $fields[$startcord] - $fields[$gapcord]\n" if $printer == 1;
+	
+		my $motif = $fields[$motifcord];
+		my $firstmotif = ();
+		my $strand = $fields[$strandcord];
+	#		print "strand = $strand\n";
+		
+	
+		if ($motif =~ /^\[/){
+			$motif =~ s/^\[//g;
+			$motif =~ /([a-zA-Z]+)\].*/;
+			$firstmotif = $1;
+		}
+		
+		else {$firstmotif = $motif;}
+		print "firstmotif =$firstmotif : \n" if $printer == 1;
+		$firstmotif = allCaps($firstmotif);
+
+		if (exists $revHash{$firstmotif} && $turn == 0) {
+			$turn=1 if $species_counter==0;
+			$firstmotif = $revHash{$firstmotif};
+		}
+		
+		elsif (exists $revHash{$firstmotif} && $turn == 1) {$firstmotif = $revHash{$firstmotif}; $turn = 1;}
+		print "changed firstmotif =$firstmotif\n" if $printer == 1;
+	#		<STDIN>;
+		$locusmotif = $firstmotif;
+		
+		if (scalar(@fields) > $microsatcord + 2){
+			print "fields = @fields ... interr_poscord=$interr_poscord=$fields[$interr_poscord] .. interrcord=$interrcord=$fields[$interrcord]\n" if $printer == 1; 
+
+			my @interposes = ();
+			@interposes = split(",",$fields[$interr_poscord]) if $fields[$interr_poscord] =~ /,/; 
+			$interposes[0] = $fields[$interr_poscord] if $fields[$interr_poscord] !~ /,/ ;
+			print "interposes=@interposes\n" if $printer == 1;
+			my @relativeposes = (); 
+			my @interruptions = ();
+			@interruptions = split(",",$fields[$interrcord]) if $fields[$interrcord] =~ /,/; 
+			$interruptions[0] = $fields[$interrcord]  if $fields[$interrcord] !~ /,/;
+			my @interlens = ();
+
+			
+			for my $i (0 ... $#interposes){
+			
+				my $interpos = $interposes[$i];
+				my $nexter = 0;
+				my $interruption = $interruptions[$i];
+				my $interlen = length($interruption);
+				push (@interlens, $interlen);
+			
+				
+				my $relativepos = (100 * $interpos) / $microsatlength;
+				print "relativepos  = $relativepos ,interpos=$interpos, interruption=$interruption, interlen=$interlen \n" if $printer == 1;
+				$relativepos = (100 * ($interpos-$interlen)) / $microsatlength if $relativepos > 50;
+				print "-->  = $relativepos\n" if $printer == 1;
+				$interruption = "IND" if length($interruption) < 1;
+		
+				if ($turn == 1){
+					$fields[$microsatcord] = switch_micro($fields[$microsatcord]);
+					$interruption = switch_nucl($interruption) unless $interruption eq "IND";
+					$interpos = ($microsatlength - $interpos) - $interlen + 2;
+					print "turn interpos = $interpos for $fields[$microsatcord]\n" if $printer == 1;
+					$relativepos = (100 * $interpos) / $microsatlength;
+					$relativepos = (100 * ($interpos-$interlen)) / $microsatlength if $relativepos > 50;
+		
+		
+					$strand = '+' if $strand eq '-';
+					$strand = '-' if $strand eq '+';
+				}
+				print "final relativepos = $relativepos\n" if $printer == 1;
+				push(@relativeposes, $relativepos);
+			}
+			push(@endstatement,join("\t",($exacttags[$species_counter],$sp_chr, $sp_start, $sp_end, $firstmotif,length($firstmotif),$fields[$microsatcord],$strand,$microsatlength,join(",",@interposes),join(",",@relativeposes),join(",",@interruptions), join(",",@interlens))));
+		}
+		
+		else{
+			push(@endstatement, join("\t",$exacttags[$species_counter],$sp_chr, $sp_start, $sp_end, $firstmotif,length($firstmotif),$fields[$microsatcord],$strand,$microsatlength,"NA","NA","NA", "NA"));
+		}
+		
+		$species_counter++;
+	}
+	
+		$locusmotif = $sameHash{$locusmotif} if exists $sameHash{$locusmotif};
+		$locusmotif = $revHash{$locusmotif} if exists $revHash{$locusmotif};
+	
+		my $endst =  join("\t", @endstatement, $orthHchr, $Hstart, $Hend);
+		print join("\t", @endstatement, $orthHchr,  $Hstart, $Hend), "\n" if $printer == 1; 
+	
+	
+	return (join("\t", @endstatement, $orthHchr, $Hstart, $Hend), $orthHchr, $Hstart, $Hend, $locusmotif, length($locusmotif));
+	
+}
+
+sub switch_nucl{
+	my @strand = split(/\s*/,$_[0]);
+	for my $i (0 ... $#strand){
+		if ($strand[$i] =~ /c/i) {$strand[$i] = "G";next;}
+		if ($strand[$i] =~ /a/i) {$strand[$i] = "T";next;}
+		if ($strand[$i] =~ /t/i) { $strand[$i] = "A";next;}
+		if ($strand[$i] =~ /g/i) {$strand[$i] = "C";next;}
+	}
+	return join("",@strand);
+}
+
+
+sub switch_micro{
+	my $micro = reverse($_[0]);
+	my @strand = split(/\s*/,$micro);
+	for my $i (0 ... $#strand){
+		if ($strand[$i] =~ /c/i) {$strand[$i] = "G";next;}
+		if ($strand[$i] =~ /a/i) {$strand[$i] = "T";next;}
+		if ($strand[$i] =~ /t/i) { $strand[$i] = "A";next;}
+		if ($strand[$i] =~ /g/i) {$strand[$i] = "C";next;}
+		if ($strand[$i] =~ /\[/i) {$strand[$i] = "]";next;}
+		if ($strand[$i] =~ /\]/i) {$strand[$i] = "[";next;}
+	}
+	return join("",@strand);
+}
+sub decipher_history{
+	my $printer = 0;
+	my ($mutations_array, $tags_string, $nodes, $branches_hash, $tree_analysis, $confirmation_string, $alivehash) = @_;
+	my %mutations_hash=();
+	foreach my $mutation (@$mutations_array){
+		print "mutation = $mutation\n" if $printer == 1;
+		my %local = $mutation =~ /([\S ]+)=([\S ]+)/g;
+		push @{$mutations_hash{$local{"node"}}},$mutation; 
+		print "just for confirmation: $local{node} pushed as: $mutation\n" if $printer == 1;
+	}
+	my @nodes;
+	my @birth_steps=();
+	my @death_steps=();
+	
+	my @tags=split(/\s*/,$tags_string);
+	my @confirmation=split(/\s+/,$confirmation_string);
+	my %info=();
+	
+	for my $i (0 ... $#tags){
+		$info{$tags[$i]}=$confirmation[$i];
+		print "feeding info: $tags[$i] = $info{$tags[$i]}\n" if $printer == 1;
+	}
+	
+	for my $keys (@$nodes) {
+		foreach my $key (@$keys){
+#			print "current key  = $key\n";
+			my $copykey = $key;
+			$copykey =~ s/[\W ]+//g;
+			my @copykeys=split(/\s*/,$copykey);
+			my $states=();
+			foreach my $copy (@copykeys){
+				$states=$states.$info{$copy};
+			}
+			
+			print "reduced key = $copykey and state = $states\n" if $printer == 1;
+			
+			if (exists $mutations_hash{$key}) {
+				
+				if ($states=~/\+/){
+					push @birth_steps, @{$mutations_hash{$key}};
+					$birth_steps[$#birth_steps] =~ s/\S+=//g;
+					delete $mutations_hash{$key};
+				}
+				else{
+					push @death_steps, @{$mutations_hash{$key}};	
+					$death_steps[$#death_steps] =~ s/\S+=//g;
+					delete $mutations_hash{$key};
+				}
+			}
+		}
+	}
+	print "conformation = $confirmation_string\n" if $printer == 1;
+	push (@birth_steps, "NULL") if scalar(@birth_steps) == 0;
+	push (@death_steps, "NULL") if scalar(@death_steps) == 0;
+	print "birth steps = ",join("\n",@birth_steps)," and death steps = ",join("\n",@death_steps),"\n" if $printer == 1;
+	return \@birth_steps, \@death_steps;
+}
+
+sub fillAlignmentGaps{
+	my $printer = 0;
+	print "received: @_\n" if $printer == 1;
+	my ($tree, $sequences, $alignment, $tagarray, $microsathash, $nonmicrosathash, $motif, $tree_analysis, $threshold, $microsatstarts) = @_;
+	print "in fillAlignmentGaps.. tree = $tree \n" if $printer == 1;
+	my %sequence_hash=();
+
+	my @phases = ();
+	my $concat = $motif.$motif;
+	my $motifsize = length($motif);
+	
+	for my $i (1 ... $motifsize){
+		push @phases, substr($concat, $i, $motifsize);
+	}
+
+	my $concatalignment = ();
+	foreach my $tag (@tags){
+		$concatalignment = $concatalignment.$alignment->{$tag};
+	}
+#	print "returningg NULL","NULL","NULL", "NULL\n" if $concatalignment !~ /-/;
+	return 0, "NULL","NULL","NULL", "NULL","NULL" if $concatalignment !~ /-/;
+	
+	
+
+	my %node_sequences_temp=();
+	my %node_alignments_temp =();			#NEW, Nov 28 2008
+	
+	my @tags=();
+	my @locus_sequences=();
+	my %alivehash=();
+
+#	print "IN fillAlignmentGaps\n";# <STDIN>;
+	my %fillrecord = ();
+	
+	my $change = 0;
+	foreach my $tag (@$tagarray) { 
+		#print "adding: $tag\n";
+		push(@tags, $tag);
+		if (exists $microsathash->{$tag}){
+			my $micro = $microsathash->{$tag};
+			my $orig_micro = $micro;
+			($micro, $fillrecord{$tag}) = fillgaps($micro, \@phases);
+			$change = 1 if uc($micro) ne uc($orig_micro);
+			$node_sequences_temp{$tag}=$micro if $microsathash->{$tag} ne "NULL"; 
+		}
+		if (exists $nonmicrosathash->{$tag}){
+			my $micro = $nonmicrosathash->{$tag};
+			my $orig_micro = $micro;
+			($micro, $fillrecord{$tag}) = fillgaps($micro, \@phases);
+			$change = 1 if uc($micro) ne uc($orig_micro);
+			$node_sequences_temp{$tag}=$micro if $nonmicrosathash->{$tag} ne "NULL"; 
+		}
+		
+		if (exists $alignment->{$tag}){
+			my $micro = $alignment->{$tag};
+			my $orig_micro = $micro;
+			($micro, $fillrecord{$tag}) = fillgaps($micro, \@phases);
+			$change = 1 if uc($micro) ne uc($orig_micro);
+			$node_alignments_temp{$tag}=$micro if $alignment->{$tag} ne "NULL"; 			
+		}
+		
+		#print "adding to node_sequences: $tag = ",$node_sequences_temp{$tag},"\n" if $printer == 1;
+		#print "adding to node_alignments: $tag = ",$node_alignments_temp{$tag},"\n" if $printer == 1;
+	}
+
+
+	my %node_sequences=();
+	my %node_alignments =();			#NEW, Nov 28 2008
+	foreach my $tag (@$tagarray) { 
+		$node_sequences{$tag} = join ".",split(/\s*/,$node_sequences_temp{$tag});
+		$node_alignments{$tag} = join ".",split(/\s*/,$node_alignments_temp{$tag});		
+	}	
+
+	print "\n", "#" x 50, "\n" if $printer == 1;
+	foreach my $tag (@tags){
+		print "$tag: $alignment->{$tag} = $node_alignments{$tag}\n" if $printer == 1;	
+	}
+	print "\n", "#" x 50, "\n" if $printer == 1;
+#	print "change = $change\n";
+	#<STDIN> if $concatalignment=~/\-/;
+	
+#	<STDIN> if $printer == 1 && $concatalignment =~ /\-/;
+
+	return 0, "NULL","NULL","NULL", "NULL", "NULL" if $change == 0;
+	
+	my ($nodes_arr, $branches_hash) = get_nodes($tree);
+	my @nodes=@$nodes_arr;
+	print "recieved nodes = @nodes\n" if $printer == 1; 
+	
+	
+	#POPULATE branches_hash WITH INFORMATION ABOUT LIVESTATUS
+	foreach my $keys (@nodes){
+		my @pair = @$keys;
+		my $joint = "(".join(", ",@pair).")";
+		my $copykey = join "", @pair;
+		$copykey =~ s/[\W ]+//g;
+		print "for node: $keys, copykey = $copykey and joint = $joint\n" if $printer == 1;
+		my $livestatus = 1;
+		foreach my $copy (split(/\s*/,$copykey)){
+			$livestatus = 0 if !exists $alivehash{$copy};
+		}
+		$alivehash{$joint} = $joint if !exists $alivehash{$joint} && $livestatus == 1;
+		print "alivehash = $alivehash{$joint}\n" if exists $alivehash{$joint} && $printer == 1;
+	}
+	
+
+	
+	@nodes = reverse(@nodes); #1 THIS IS IN ORDER TO GO THROUGH THE TREE FROM LEAVES TO ROOT.
+
+	my @mutations_array=();
+
+	my $joint = ();
+	foreach my $node (@nodes){
+		my @pair = @$node; 
+		print "now in the nodes for loop, pair = @pair\n and sequences=\n" if $printer == 1;
+		$joint = "(".join(", ",@pair).")"; 	
+		print "joint = $joint \n" if $printer == 1; 
+		my @pair_sequences=();
+
+		foreach my $tag (@pair){
+			print "tag = $tag: " if $printer == 1;
+			print $node_alignments{$tag},"\n" if $printer == 1;
+			push @pair_sequences, $node_alignments{$tag};
+		}
+#		print "fillgap\n";
+		my ($compared, $substitutions_list) = base_by_base_simple($motif,\@pair_sequences, scalar(@pair_sequences), @pair, $joint);
+		$node_alignments{$joint}=$compared;
+		push(  @mutations_array,split(/:/,$substitutions_list));
+		print "newly added to node_sequences: $node_alignments{$joint} and list of mutations = @mutations_array\n" if $printer == 1;
+	}
+	print "now sending for analyze_mutations: mutation_array=@mutations_array, nodes=@nodes, branches_hash=$branches_hash, alignment=$alignment, tags=@tags, alivehash=%alivehash, node_sequences=\%node_sequences, microsatstarts=$microsatstarts, motif=$motif\n" if $printer == 1;
+#	<STDIN> if $printer == 1;
+	
+	my $analayzed_mutations = analyze_mutations(\@mutations_array, \@nodes, $branches_hash, $alignment, \@tags, \%alivehash, \%node_sequences, $microsatstarts, $motif);
+
+#	print "returningt: ", $analayzed_mutations, \@nodes,"\n" if scalar @mutations_array > 0;;
+#	print "returningy: NULL, NULL, NULL " if scalar @mutations_array == 0 && $printer == 1;
+	print "final node alignment after filling for $joint= " if $printer == 1;
+	print "$node_alignments{$joint}\n" if $printer == 1;
+	
+
+	return 1, $analayzed_mutations, \@nodes, $branches_hash, \%alivehash, $node_alignments{$joint} if scalar @mutations_array > 0 ;
+	return 1, "NULL","NULL","NULL", "NULL", "NULL" if scalar @mutations_array == 0;
+}
+
+
+
+sub add_mutation{
+	my $printer = 0;
+	print "IN SUBROUTUNE add_mutation.. information received = @_\n" if $printer == 1;
+	my ($i , $bite, $to, $from) = @_;
+	print "bite = $bite.. all received info = ",join("^", @_),"\n" if $printer == 1;
+	print "to=$to\n" if $printer == 1;
+	print "tis split = ",join(" and ",split(/!/,$to)),"\n" if $printer == 1;
+	my @toields = split "!",$to;
+	print "toilds  = @toields\n" if $printer == 1;
+	my @mutations=();
+	
+	foreach my $toield (@toields){
+		my @toinfo=split(":",$toield);
+		print " at toinfo=@toinfo \n" if $printer == 1;
+		next if  $toinfo[1] =~ /$from/i;
+		my @mutation = @toinfo if $toinfo[1] !~ /$from/i;
+		print "adding to mutaton list: ", join(",", "node=$mutation[0]","type=substitution" ,"position=$i", "from=$from", "to=$mutation[1]", "insertion=", "deletion="),"\n" if $printer == 1;
+		push (@mutations, join("\t", "node=$mutation[0]","type=substitution" ,"position=$i", "from=$from", "to=$mutation[1]", "insertion=", "deletion="));
+	}
+	return @mutations;
+}
+
+
+sub add_bases{
+
+	my $printer = 0;
+	print "IN SUBROUTUNE add_bases.. information received = @_\n" if $printer == 1;
+	my ($optional0, $optional1, $pair0, $pair1,$joint) = @_;
+	my $total_list=();
+
+	my @total_list0=split(/!/,$optional0);
+	my @total_list1=split(/!/,$optional1);
+	my @all_list=();
+	my %total_hash0=();
+	foreach my $entry (@total_list0) { 		
+		$entry = uc $entry; 
+		$entry =~ /(\S+):(\S+)/; 
+		$total_hash0{$2}=$1;
+		push @all_list, $2; 
+	}
+
+	my %total_hash1=();
+	foreach my $entry (@total_list1) { 		
+		$entry = uc $entry; 
+		$entry =~ /(\S+):(\S+)/; 
+		$total_hash1{$2}=$1;
+		push @all_list, $2; 
+	}
+
+	my %alphabetical_hash=();
+	my @return_options=();	
+
+	for my $i (0 ... $#all_list){
+		my $alph = $all_list[$i];
+		if (exists $total_hash0{$alph} && exists $total_hash1{$alph}){
+			push(@return_options, $joint.":".$alph);
+			delete $total_hash0{$alph}; delete $total_hash1{$alph};
+		}
+		if (exists $total_hash0{$alph} && !exists $total_hash1{$alph}){
+			push(@return_options, $pair0.":".$alph);
+			delete $total_hash0{$alph};
+		}
+		if (!exists $total_hash0{$alph} && exists $total_hash1{$alph}){
+			push(@return_options, $pair1.":".$alph);
+			delete $total_hash1{$alph};
+		}
+
+	}
+
+	print "returning ",join "!",@return_options,"\n" if $printer == 1; 
+	return join "!",@return_options;
+
+}
+
+
+sub fillgaps{
+#	print "IN fillgaps: @_\n";	
+	my ($micro, $phasesinput) = @_;
+	#print "in microsathash ,,.. micro = $micro\n";
+	return $micro if $micro !~ /\-/;
+	my $orig_micro = $micro;
+	my @phases = @$phasesinput;
+	
+	my %tested_patterns = ();
+	
+	foreach my $phase (@phases){
+	#	print "considering phase: $phase\n";
+		my @phase_prefixes = ();
+		my @prephase_left_contexts = ();
+		my @prephase_right_contexts = ();
+		my @pregapsize = ();
+		my @prepostfilins = ();
+	
+		my @phase_suffixes;
+		my @suffphase_left_contexts;
+		my @suffphase_right_contexts;
+		my @suffgapsize;
+		my @suffpostfilins;
+	
+		my @postfilins = ();
+		my $motifsize = length($phases[0]);
+		
+		my $change = 0;
+	
+		for my $u (0 ... $motifsize-1){
+			my $concat = $phase.$phase.$phase.$phase;
+			my @concatarr = split(/\s*/, $concat);
+			my $l = 0;
+			while ($l < $u){
+				shift @concatarr;
+				$l++;
+			}
+			$concat = join ("", @concatarr);
+			
+			for my $t (0 ... $motifsize-1){
+				for my $k (1 ... $motifsize-1){
+					push @phase_prefixes, substr($concat, $motifsize+$t, $k);
+					push @prephase_left_contexts, substr ($concat, $t, $motifsize);
+					push @prephase_right_contexts, substr ($concat, $motifsize+$t+$k+($motifsize-$k), 1);
+					push @pregapsize, $k;
+					push @prepostfilins, substr($concat,  $motifsize+$t+$k, ($motifsize-$k));
+				#	print "reading: $concat, t=$t, k=$k prefix: $prephase_left_contexts[$#prephase_left_contexts] $phase_prefixes[$#phase_prefixes] -x$pregapsize[$#pregapsize] $prephase_right_contexts[$#prephase_right_contexts]\n";
+				#	print "phase_prefixes = $phase_prefixes[$#phase_prefixes]\n";
+				#	print "prephase_left_contexts = $prephase_left_contexts[$#prephase_left_contexts]\n";
+				#	print "prephase_right_contexts = $prephase_right_contexts[$#prephase_right_contexts]\n";
+				#	print "pregapsize = $pregapsize[$#pregapsize]\n";
+				#	print "prepostfilins = $prepostfilins[$#prepostfilins]\n";
+				}
+			}
+		}
+	
+	#	print "looking if $micro =~ /($phase\-{$motifsize})/i || $micro =~ /^(\-{$motifsize,}$phase)/i\n";
+		if ($micro =~ /($phase\-{$motifsize,})$/i || $micro =~ /^(\-{$motifsize,}$phase)/i){
+	#			print "micro: $micro needs further gap removal: $1\n";
+			while ($micro =~ /$phase(\-{$motifsize,})$/i || $micro =~ /^(\-{$motifsize,})$phase/i){
+			#	print "micro: $micro needs further gap removal: $1\n";
+			
+			#	print "phase being considered = $phase\n";
+				my $num = ();
+				$num = $micro =~ s/$phase\-{$motifsize}/$phase$phase/gi if $micro =~ /$phase\-{$motifsize,}/i;
+				$num = $micro =~ s/\-{$motifsize}$phase/$phase$phase/gi if $micro =~ /\-{$motifsize,}$phase/i;
+			#	print "num = $num\n";
+				$change = 1 if $num == 1;
+			}				
+		}
+	
+		elsif ($micro =~ /(($phase)+)\-{$motifsize,}(($phase)+)/i){
+			while ($micro =~ /(($phase)+)\-{$motifsize,}(($phase)+)/i){
+		#		print "checking lengths of $1 and $3 for $micro... \n";
+				my $num = ();
+				if (length($1) >= length($3)){
+		#			print "$micro matches (($phase)+)\-{$motifsize,}(($phase)+) = $1, >= , $3 \n";				
+					$num = $micro =~ s/$phase\-{$motifsize}/$phase$phase/gi ;				
+				}
+				if (length($1) < length($3)){
+		#			print "$micro matches (($phase)+)\-{$motifsize,}(($phase)+) = $1, < , $3 \n";
+					$num = $micro =~ s/\-{$motifsize}$phase/$phase$phase/gi ;
+				}		
+	#			print "micro changed to $micro\n";
+			}
+		}
+		elsif ($micro =~ /([A-Z]+)\-{$motifsize,}(($phase)+)/i){
+			while ($micro =~ /([A-Z]+)\-{$motifsize,}(($phase)+)/i){		
+		#			print "$micro matches ([A-Z]+)\-{$motifsize}(($phase)+) = 1=$1, - , 3=$3 \n";
+					my $num = 0;
+					$num = $micro =~ s/\-{$motifsize}$phase/$phase$phase/gi ;
+			}
+		}
+		elsif ($micro =~ /(($phase)+)\-{$motifsize,}([A-Z]+)/i){
+			while ($micro =~ /(($phase)+)\-{$motifsize,}([A-Z]+)/i){		
+			#		print "$micro matches (($phase)+)\-{$motifsize,}([A-Z]+) = 1=$1, - , 3=$3 \n";
+					my $num = 0;
+					$num = $micro =~ s/$phase\-{$motifsize}/$phase$phase/gi ;				
+			}	
+		}
+	
+	#	print "$orig_micro to $micro\n";
+		
+	#s	<STDIN>;
+		
+		for my $h (0 ... $#phase_prefixes){
+	#		print "searching using prefix : $prephase_left_contexts[$h]$phase_prefixes[$h]\-{$pregapsize[$h]}$prephase_right_contexts[$h]\n";
+			my $pattern = $prephase_left_contexts[$h].$phase_prefixes[$h].$pregapsize[$h].$prephase_right_contexts[$h];
+	#		print "returning orig_micro = $orig_micro, micro = $micro \n" if exists $tested_patterns{$pattern};
+			if ($micro =~ /$prephase_left_contexts[$h]$phase_prefixes[$h]\-{$pregapsize[$h]}$prephase_right_contexts[$h]/i){
+				return $orig_micro if exists $tested_patterns{$pattern};
+				while ($micro =~ /($prephase_left_contexts[$h]$phase_prefixes[$h]\-{$pregapsize[$h]}$prephase_right_contexts[$h])/i){
+					$tested_patterns{$pattern} = $pattern;
+	#				print "micro: $micro needs further gap removal: $1\n";
+				
+	#				print "prefix being considered = $phase_prefixes[$h]\n";
+					my $num = ();
+					$num = ($micro =~ s/$prephase_left_contexts[$h]$phase_prefixes[$h]\-{$pregapsize[$h]}$prephase_right_contexts[$h]/$prephase_left_contexts[$h]$phase_prefixes[$h]$prepostfilins[$h]$prephase_right_contexts[$h]/gi) ;
+	#				print "num = $num, micro = $micro\n";
+					$change = 1 if $num == 1;
+					
+					return $orig_micro if $num > 1;
+				}				
+			}
+		
+		}
+	}
+	return $orig_micro if length($micro) != length($orig_micro);
+	return $micro;
+}
+
+sub selectMutationArray{
+	my $printer =0;
+
+	my $oldmutspt  = $_[0];
+	my $newmutspt  = $_[1];
+	my $tagstringpt = $_[2];
+	my $alivehashpt = $_[3];
+	my $alignmentpt = $_[4];
+	my $motif = $_[5];
+
+	my @alivehasharr=();
+	
+	my @tags = @$tagstringpt;
+	my $alignmentln = length($alignmentpt->{$tags[0]});
+	
+	foreach my $key (keys %$alivehashpt) { push @alivehasharr, $key; print "we have alive: $key\n" if $printer == 1;}
+	
+	my %newside = ();
+	my %oldside = ();
+	my %newmuts = ();
+	
+	my %commons = ();
+	my %olds = ();
+	foreach my $old (@$oldmutspt){
+		$olds{$old} = 1;
+	}
+	foreach my $new (@$newmutspt){
+		$commons{$new} = 1 if exists $olds{$new};;
+	}
+		
+	
+	foreach my $pos ( 0 ... $alignmentln){
+		#print "pos = $pos\n" if $printer == 1;
+		my $newyes = 0;
+		foreach my $mut (@$newmutspt){
+			$newmuts{$mut} = 1;
+			chomp $mut;
+			$newyes++;
+			 $mut =~ s/=\t/= \t/g;
+			 $mut =~ s/=$/= /g;
+
+			 $mut =~ /node=([A-Z\(\), ]+)\stype=([a-zA-Z ]+)\sposition=([0-9 ]+)\sfrom=([a-zA-Z\- ]+)\sto=([a-zA-Z\- ]+)\sinsertion=([a-zA-Z\- ]+)\sdeletion=([a-zA-Z\- ]+)/;
+			my $node = $1;
+			next if $3 != $pos;
+			print "new mut = $mut\n" if $printer == 1;
+			print "node = $node, pos = $3 ... and alivehasharr = >@alivehasharr<\n" if $printer == 1;
+			my $alivenode = 0;
+			foreach my $key (@alivehasharr){
+				$alivenode = 1 if $key =~ /$node/;
+			}
+		#	next if $alivenode == 0;
+			my $indel_type = " ";
+			if ($2 eq "insertion" || $2 eq "deletion"){
+				my $thisindel = ();
+				$thisindel = $6 if $2 eq "insertion";
+				$thisindel = $7 if $2 eq "deletion";
+				
+				$indel_type = "i".checkIndelType($node, $thisindel, $motif,$alignmentpt,$3, $2) if $2 eq "insertion";
+				$indel_type = "d".checkIndelType($node, $thisindel, $motif,$alignmentpt, $3, $2) if $2 eq "deletion";
+				$indel_type = $indel_type."f" if $indel_type =~ /mot/ && length($thisindel) >= length($motif);
+			}
+			print "indeltype = $indel_type\n" if $printer == 1;
+			my $added = 0;
+			
+			if (exists $newside{$pos} && $indel_type =~ /[a-z]+/){
+				print "we have a preexisting one for $pos\n" if $printer == 1;
+				my @preexisting = @{$newside{$pos}};
+				foreach my $pre (@preexisting){
+					print "looking at $pre\n" if $printer == 1;
+					next if $pre !~ /node=$node/;
+					next if $pre !~ /indeltype=([a-z]+)/;
+					my $currtype = $1;
+					
+					if ($currtype =~ /inon/ && $indel_type =~ /dmot/){
+						delete $newside{$pos};
+						push @{$newside{$pos}}, $pre;
+						$added = 1;
+					}
+					if ($currtype =~ /dnon/ && $indel_type =~ /imot/){
+						delete $newside{$pos};
+						push @{$newside{$pos}}, $pre;
+						$added = 1;
+					}
+					if ($currtype =~ /dmot/ && $indel_type =~ /inon/){
+						delete $newside{$pos};
+						push @{$newside{$pos}}, $mut."\tindeltype=$indel_type";
+						$added = 1;
+					}
+					if ($currtype =~ /imot/ && $indel_type =~ /dnon/){
+						delete $newside{$pos};
+						push @{$newside{$pos}}, $mut."\tindeltype=$indel_type";
+						$added = 1;
+					}					
+				}
+			}
+			
+			print "added = $added\n" if $printer == 1;
+			push @{$newside{$pos}}, $mut."\tindeltype=$indel_type" if $added == 0;
+
+			print "for new pos,: $pos we have: @{$newside{$pos}}\n " if $printer == 1;
+		}
+	}
+	
+	foreach my $pos ( 0 ... $alignmentln){
+		my $oldyes = 0;
+		foreach my $mut (@$oldmutspt){
+			chomp $mut;
+			$oldyes++;
+			 $mut =~ s/=\t/= \t/g;
+			 $mut =~ s/=$/= /g;
+			$mut =~ /node=([A-Z\(\), ]+)\ttype=([a-zA-Z ]+)\tposition=([0-9 ]+)\tfrom=([a-zA-Z\- ]+)\tto=([a-zA-Z\- ]+)\tinsertion=([a-zA-Z\- ]+)\tdeletion=([a-zA-Z\- ]+)/;
+			my $node = $1;
+			next if $3 != $pos;
+			print "old mut = $mut\n" if $printer == 1;
+			my $alivenode = 0;
+			foreach my $key (@alivehasharr){
+				$alivenode = 1 if $key =~ /$node/;
+			}
+			#next if $alivenode == 0;
+			my $indel_type = " ";
+			if ($2 eq "insertion" || $2 eq "deletion"){
+				$indel_type = "i".checkIndelType($node, $6, $motif,$alignmentpt, $3, $2) if $2 eq "insertion";
+				$indel_type = "d".checkIndelType($node, $7, $motif,$alignmentpt, $3, $2) if $2 eq "deletion";
+				next if $indel_type =~/non/;
+			}
+			else{ next;}
+
+			my $imp=0;
+			$imp = 1 if $indel_type =~ /dmot/ && $alivenode == 0;
+			$imp = 1 if $indel_type =~ /imot/ && $alivenode == 1;
+			
+			
+			if (exists $newside{$pos} && $indel_type =~ /[a-z]+/){
+				my @preexisting = @{$newside{$pos}};
+				print "we have a preexisting one for $pos: @preexisting\n" if $printer == 1;
+				next if $imp == 0;
+				
+				if (scalar(@preexisting) == 1){
+					my $foundmut = $preexisting[0];
+					$foundmut=~ /node=([A-Z, \(\)]+)/;
+					next if $1 eq $node;
+					
+					if (exists $oldside{$pos} || exists $commons{$foundmut}){
+						print "not replacing, but just adding\n" if $printer == 1;
+						push @{$newside{$pos}}, $mut."\tindeltype=$indel_type";
+						push @{$oldside{$pos}}, $mut."\tindeltype=$indel_type";
+						next;
+					}
+					
+					delete $newside{$pos};
+					push @{$oldside{$pos}}, $mut."\tindeltype=$indel_type";
+					push @{$newside{$pos}}, $mut."\tindeltype=$indel_type";
+					print "now  new one is : @{$newside{$pos}}\n" if $printer == 1;
+				}
+				
+				print "for pos: $pos: @{$newside{$pos}}\n" if $printer == 1;
+				next;
+			}
+
+			
+			my @news = @{$newside{$pos}} if exists $newside{$pos};
+			
+			print "mut = $mut and news = @news\n" if $printer == 1; 
+			push @{$oldside{$pos}}, $mut."\tindeltype=$indel_type";
+			push @{$newside{$pos}}, $mut."\tindeltype=$indel_type";
+		}
+	}
+
+	print "in the end, our collected mutations = \n" if $printer == 1;
+	my @returnarr = ();
+	foreach my $key (keys %newside) {push @returnarr,@{$newside{$key}};}
+	print join("\n", @returnarr),"\n" if $printer == 1;
+	#<STDIN>;
+	return @returnarr;
+
+}
+
+
+sub checkIndelType{
+	my $printer  = 0;
+	my $node = $_[0];
+	my $indel = $_[1];
+	my $motif = $_[2];
+	my $alignmentpt = $_[3];
+	my $posit = $_[4];
+	my $type = $_[5];
+	my @phases =();
+	my %prephases = ();
+	my %postphases = ();
+	#print "motif = $motif\n";
+	print "IN checkIndelType ... received: @_\n" if $printer == 1;
+	my $concat = $motif.$motif.$motif.$motif;
+	my $motiflength = length($motif);
+	
+	if ($motiflength > length ($indel)){
+		return "non" if $motif !~ /$indel/i;
+		return checkIndelType_ComplexAnalysis($node, $indel, $motif, $alignmentpt, $posit, $type);
+	}
+	
+	my $firstpass = 0;
+	for my $y (0 ... $motiflength-1){
+		my $phase = substr($concat, $motiflength+$y, $motiflength);
+		push @phases, $phase;
+		$firstpass = 1 if $indel =~ /$phase/i;
+		for my $k (0 ... length($motif)-1){
+			print "at: motiflength=$motiflength , y=$y , k=$k.. for pre: $motiflength+$y-$k and post: $motiflength+$y-$k+$motiflength in $concat\n" if $printer == 1;
+			my $pre = substr($concat, $motiflength+$y-$k, $k );
+			my $post = substr($concat, $motiflength+$y+$motiflength, $k);
+			print "adding to phases : $phase - $pre and $post\n" if $printer == 1;
+			push @{$prephases{$phase}} , $pre;
+			push @{$postphases{$phase}} , $post;			
+		}
+		
+	}
+	print "firstpass 1= $firstpass\n" if $printer == 1;
+	return "non" if $firstpass ==0;
+	$firstpass =0;
+	
+	foreach my $phase (@phases){
+		my @pres = @{$prephases{$phase}};
+		my @posts = @{$postphases{$phase}};
+		
+		foreach my $pre (@pres){
+			foreach my $post (@posts){
+				
+				$firstpass = 1 if $indel =~ /($pre)?($phase)+($post)?/i && length($indel) > (3 * length($motif));
+				$firstpass = 1 if $indel =~ /^($pre)?($phase)+($post)?$/i && length($indel) < (3 * length($motif));
+				print "matched here : ($pre)?($phase)+($post)?\n" if $printer == 1;
+				last if $firstpass == 1;
+			}
+			last if $firstpass == 1;
+		}
+		last if $firstpass == 1;
+	}
+	
+	print "firstpass 2= $firstpass\n" if $printer == 1;
+	return "non" if $firstpass ==0;
+	return "mot" if $firstpass ==1;	
+}
+
+
+sub checkIndelType_ComplexAnalysis{
+	my $printer = 0;
+	my $node = $_[0];
+	my $indel = $_[1];
+	my $motif = $_[2];
+	my $alignmentpt = $_[3];
+	my $pos = $_[4];
+	my $type = $_[5];
+	my @speciesinvolved = $node =~ /[A-Z]+/g;
+	
+	my @seqs = ();
+	my $residualseq = length($motif) - length($indel);
+	print "IN COMPLEX ANALYSIS ... received: @_  .... speciesinvolved = @speciesinvolved\n" if $printer == 1;
+	print "we have position = $pos, sseq = $alignmentpt->{$speciesinvolved[0]}\n" if $printer == 1;
+	print "residualseq = $residualseq\n" if $printer == 1;
+	print "pos=$pos... got: @_\n" if $printer == 1;
+	foreach my $sp (@speciesinvolved){
+		my $spseq = $alignmentpt->{$sp};
+		#print "orig spseq = $spseq\n";
+		my $subseq = ();
+		
+		if ($type eq "deletion"){
+			my @indelparts = split(/\s*/,$indel);
+			my @seqparts = split(/\s*/,$spseq);
+			
+			for my $p ($pos ... $pos+length($indel)-1){
+				$seqparts[$p] = shift @indelparts;
+			}
+			$spseq = join("",@seqparts);
+		}
+		#print "mod spseq = $spseq\n";
+	#	$spseq=~ s/\-//g if $type !~ /deletion/;
+		
+		print "substr($spseq, $pos-($residualseq), length($indel)+$residualseq+$residualseq)\n" if $pos > 0 && $pos < (length($spseq) - length($motif))  && $printer == 1;
+		print "substr($spseq, 0, length($indel)+$residualseq)\n" if $pos == 0 && $printer == 1;
+		print "substr($spseq, $pos - $residualseq, length($indel)+$residualseq)\n" if $pos >= (length($spseq) - length($motif))  && $printer == 1;
+		
+		$subseq = substr($spseq, $pos-($residualseq), length($indel)+$residualseq+$residualseq) if $pos > 0 && $pos < (length($spseq) - length($motif))  ;
+		$subseq = substr($spseq, 0, length($indel)+$residualseq) if $pos == 0;
+		$subseq = substr($spseq, $pos - $residualseq, length($indel)+$residualseq) if $pos >= (length($spseq) - length($motif))  ;
+		print "spseq = $spseq . subseq=$subseq . type = $type\n" if $printer == 1;
+		#<STDIN> if $subseq !~ /[a-z\-]/i; 
+		$subseq =~ s/\-/$indel/g if $type =~ /insertion/;
+		push @seqs, $subseq;
+		print "seqs = @seqs\n" if $printer == 1;
+	}
+	return "non" if checkIfSeqsIdentical(@seqs) eq "NO";
+	
+	print "checking for $seqs[0] \n" if $printer == 1;
+	
+	my @phases =();
+	my %prephases = ();
+	my %postphases = ();
+	my $concat = $motif.$motif.$motif.$motif;
+	my $motiflength = length($motif);
+	
+	my $firstpass = 0;
+	
+	for my $y (0 ... $motiflength-1){
+		my $phase = substr($concat, $motiflength+$y, $motiflength);
+		push @phases, $phase;
+		$firstpass = 1 if $seqs[0] =~ /$phase/i;
+		for my $k (0 ... length($motif)-1){
+			my $pre = substr($concat, $motiflength+$y-$k, $k );
+			my $post = substr($concat, $motiflength+$y+$motiflength, $k);
+			print "adding to phases : $phase - $pre and $post\n" if $printer == 1;
+			push @{$prephases{$phase}} , $pre;
+			push @{$postphases{$phase}} , $post;			
+		}
+		
+	}
+	print "firstpass 1= $firstpass.. also, res-d = ",(length($seqs[0]))%(length($motif)),"\n" if $printer == 1;
+	return "non" if $firstpass ==0;
+	$firstpass =0;
+	foreach my $phase (@phases){
+		
+		$firstpass = 1 if $seqs[0] =~ /^($phase)+$/i && ((length($seqs[0]))%(length($motif))) == 0;
+
+		if (((length($seqs[0]))%(length($motif))) != 0){
+			my @pres = @{$prephases{$phase}};
+			my @posts = @{$postphases{$phase}};
+			foreach my $pre (@pres){
+				foreach my $post (@posts){
+					next if $pre !~ /\S/ && $post !~ /\S/;
+					$firstpass = 1 if ($seqs[0] =~ /^($pre)($phase)+($post)$/i || $seqs[0] =~ /^($pre)($phase)+$/i || $seqs[0] =~ /^($phase)+($post)$/i);
+					print "caught with $pre $phase $post\n" if $printer == 1;
+					last if $firstpass == 1;
+				}
+				last if $firstpass == 1;
+			}
+		}
+		
+		last if $firstpass == 1;
+	}
+	
+	#print "indel = $indel.. motif = $motif.. firstpass 2= mot\n" if $firstpass ==1;	
+	#print "indel = $indel.. motif = $motif.. firstpass 2= non\n" if $firstpass ==0;	
+	#<STDIN>;# if $firstpass ==1;
+	return "non" if $firstpass ==0;
+	return "mot" if $firstpass ==1;	
+
+}
+
+sub checkIfSeqsIdentical{
+	my @seqs = @_;
+	my $identical = 1;
+	
+	for my $j (1 ... $#seqs){
+		$identical = 0 if uc($seqs[0]) ne uc($seqs[$j]); 
+	}
+	return "NO" if $identical == 0;
+	return "YES" if $identical == 1;
+
+}
+
+sub summarizeMutations{
+	my $mutspt = $_[0];
+	my @muts = @$mutspt;
+	my $tree = $_[1];
+	
+	my @returnarr = ();
+	
+	for (1 ... 38){
+		push @returnarr, "NA";
+	}
+	push @returnarr, "NULL";
+	return @returnarr if $tree eq "NULL" || scalar(@muts) < 1;
+	
+	
+	my @bspecies = ();
+	my @dspecies = ();
+	my $treecopy = $tree;
+	$treecopy =~ s/[\(\)]//g;
+	my @treeparts  = split(/[\.,]+/, $treecopy);
+	
+	for my $part (@treeparts){
+		if ($part =~ /\+/){
+			$part =~ s/\+//g;
+			#my @sp = split(/\s*/, $part);
+			#foreach my $p (@sp) {push @bspecies, $p;}
+			push @bspecies, $part;
+		}
+		if ($part =~ /\-/){
+			$part =~ s/\-//g;
+			#my @sp = split(/\s*/, $part);
+			#foreach my $p (@sp) {push @dspecies,  $p;}		
+			push @dspecies, $part;			
+		}
+		
+	}
+	#print "-------------------------------------------------------\n";
+	
+	my ($insertions, $deletions, $motinsertions, $motinsertionsf, $motdeletions, $motdeletionsf, $noninsertions, $nondeletions) = (0,0,0,0,0,0,0,0);
+	my ($binsertions, $bdeletions, $bmotinsertions,$bmotinsertionsf, $bmotdeletions, $bmotdeletionsf, $bnoninsertions, $bnondeletions) = (0,0,0,0,0,0,0,0);
+	my ($dinsertions, $ddeletions, $dmotinsertions,$dmotinsertionsf, $dmotdeletions, $dmotdeletionsf, $dnoninsertions, $dnondeletions) = (0,0,0,0,0,0,0,0);
+	my ($ninsertions, $ndeletions, $nmotinsertions,$nmotinsertionsf, $nmotdeletions, $nmotdeletionsf, $nnoninsertions, $nnondeletions) = (0,0,0,0,0,0,0,0);
+	my ($substitutions, $bsubstitutions, $dsubstitutions, $nsubstitutions, $indels, $subs) = (0,0,0,0,"NA","NA");
+
+	my @insertionsarr = (" ");
+	my @deletionsarr = (" ");
+	
+	my @substitutionsarr = (" ");
+	
+	
+	foreach my $mut (@muts){
+	#	print "mut = $mut\n";
+		chomp $mut;
+		$mut =~ s/=\t/= /g;
+		$mut =~ s/=$/= /g;
+		my %mhash = ();
+		my @mields = split(/\t/,$mut);
+		
+		foreach my $m (@mields){
+			my @fields = split(/=/,$m);
+			next if $fields[1] eq " ";
+			$mhash{$fields[0]} = $fields[1];
+		}
+		
+		my $myutype = ();
+		my $decided = 0;
+		
+		my $localnode  = $mhash{"node"};
+		$localnode =~ s/[\(\)\. ,]//g;
+
+		
+		foreach my $s (@bspecies){
+			if ($localnode eq $s)		{
+				$decided = 1; $myutype = "b";
+			}
+		}
+		
+		foreach my $s (@dspecies){
+			if ($localnode eq $s)		{
+				$decided = 1; $myutype = "d";
+			}
+		}
+
+		$myutype = "n" if $decided != 1;
+		
+		
+	#	print "tree=$tree, birth species=@bspecies, death species=@dspecies, node=$mhash{node}  .. myutype=$myutype .. \n";		
+	#	<STDIN> if $mhash{"type"} eq "insertion" && $myutype eq "b";
+		
+		
+		if ($mhash{"type"} eq "substitution"){
+			$substitutions++;
+			$bsubstitutions++ if $myutype eq "b";
+			$dsubstitutions++ if $myutype eq "d";
+			$nsubstitutions++ if $myutype eq "n";
+	#		print "substitution: from= $mhash{from}, to = $mhash{to}, and type = myutype\n";
+			push @substitutionsarr, "b:$mhash{position}:".$mhash{"from"}.">".$mhash{"to"} if $myutype eq "b";
+			push @substitutionsarr, "d:$mhash{position}:".$mhash{"from"}.">".$mhash{"to"} if $myutype eq "d";
+			push @substitutionsarr, "n:$mhash{position}:".$mhash{"from"}.">".$mhash{"to"} if $myutype eq "n";
+	#		print "substitutionsarr = @substitutionsarr\n";
+	#		<STDIN>;
+		}
+		else{
+			#print "tree=$tree, birth species=@bspecies, death species=@dspecies, node=$mhash{node}  .. myutype=$myutype .. indeltype=$mhash{indeltype}\n";		
+			if ($mhash{"type"} eq "deletion"){
+				$deletions++;
+				
+				$motdeletions++ if $mhash{"indeltype"} =~ /dmot/;
+				$motdeletionsf++ if $mhash{"indeltype"} =~ /dmotf/;
+				
+				$nondeletions++ if $mhash{"indeltype"} =~ /dnon/;
+				
+				$bdeletions++ if $myutype eq "b";
+				$ddeletions++ if $myutype eq "d";
+				$ndeletions++ if $myutype eq "n";
+
+				$bmotdeletions++ if $mhash{"indeltype"} =~ /dmot/ && $myutype eq "b";
+				$bmotdeletionsf++ if $mhash{"indeltype"} =~ /dmotf/ && $myutype eq "b";
+				$bnondeletions++ if $mhash{"indeltype"} =~ /dnon/ && $myutype eq "b";
+
+				$dmotdeletions++ if $mhash{"indeltype"} =~ /dmot/ && $myutype eq "d";
+				$dmotdeletionsf++ if $mhash{"indeltype"} =~ /dmotf/ && $myutype eq "d";
+				$dnondeletions++ if $mhash{"indeltype"} =~ /dnon/ && $myutype eq "d"; 
+
+				$nmotdeletions++ if $mhash{"indeltype"} =~ /dmot/ && $myutype eq "n";
+				$nmotdeletionsf++ if $mhash{"indeltype"} =~ /dmotf/ && $myutype eq "n";
+				$nnondeletions++ if $mhash{"indeltype"} =~ /dnon/ && $myutype eq "n";
+				
+				push @deletionsarr, "b:$mhash{indeltype}:$mhash{position}:".$mhash{"deletion"} if $myutype eq "b";
+				push @deletionsarr, "d:$mhash{indeltype}:$mhash{position}:".$mhash{"deletion"} if $myutype eq "d";
+				push @deletionsarr, "n:$mhash{indeltype}:$mhash{position}:".$mhash{"deletion"} if $myutype eq "n";
+			}
+
+			if ($mhash{"type"} eq "insertion"){
+				$insertions++;
+				
+				$motinsertions++ if $mhash{"indeltype"} =~ /imot/;
+				$motinsertionsf++ if $mhash{"indeltype"} =~ /imotf/;
+				$noninsertions++ if $mhash{"indeltype"} =~ /inon/;
+				
+				$binsertions++ if $myutype eq "b";
+				$dinsertions++ if $myutype eq "d";
+				$ninsertions++ if $myutype eq "n";
+
+				$bmotinsertions++ if $mhash{"indeltype"} =~ /imot/ && $myutype eq "b";
+				$bmotinsertionsf++ if $mhash{"indeltype"} =~ /imotf/ && $myutype eq "b";
+				$bnoninsertions++ if $mhash{"indeltype"} =~ /inon/ && $myutype eq "b";
+
+				$dmotinsertions++ if $mhash{"indeltype"} =~ /imot/ && $myutype eq "d";
+				$dmotinsertionsf++ if $mhash{"indeltype"} =~ /imotf/ && $myutype eq "d";
+				$dnoninsertions++ if $mhash{"indeltype"} =~ /inon/ && $myutype eq "d"; 
+
+				$nmotinsertions++ if $mhash{"indeltype"} =~ /imot/ && $myutype eq "n";
+				$nmotinsertionsf++ if $mhash{"indeltype"} =~ /imotf/ && $myutype eq "n";
+				$nnoninsertions++ if $mhash{"indeltype"} =~ /inon/ && $myutype eq "n";
+
+				push @insertionsarr, "b:$mhash{indeltype}:$mhash{position}:".$mhash{"insertion"} if $myutype eq "b";
+				push @insertionsarr, "d:$mhash{indeltype}:$mhash{position}:".$mhash{"insertion"} if $myutype eq "d";
+				push @insertionsarr, "n:$mhash{indeltype}:$mhash{position}:".$mhash{"insertion"} if $myutype eq "n";
+				
+			}
+		}
+	}
+	
+	
+	
+	$indels = "ins=".join(",",@insertionsarr).";dels=".join(",",@deletionsarr) if scalar(@insertionsarr) > 1 || scalar(@deletionsarr) > 1 ;
+	$subs = join(",",@substitutionsarr) if scalar(@substitutionsarr) > 1;
+	$indels =~ s/ //g;
+	$subs =~ s/ //g  ;
+ 	
+ 	#print "indels = $indels, subs=$subs\n";
+ 	##<STDIN> if $indels =~ /[a-zA-Z0-9]/ || $subs =~ /[a-zA-Z0-9]/ ;
+	#print "tree = $tree, indels = $indels, subs = $subs, bspecies = @bspecies, dspecies = @dspecies \n";
+	my @returnarray = ();
+	
+	push (@returnarray, $insertions,  $deletions,  $motinsertions, $motinsertionsf,  $motdeletions,  $motdeletionsf,  $noninsertions,  $nondeletions) ;
+	push (@returnarray, $binsertions, $bdeletions, $bmotinsertions,$bmotinsertionsf, $bmotdeletions, $bmotdeletionsf, $bnoninsertions, $bnondeletions) ;
+	push (@returnarray, $dinsertions, $ddeletions, $dmotinsertions,$dmotinsertionsf, $dmotdeletions, $dmotdeletionsf, $dnoninsertions, $dnondeletions) ;
+	push (@returnarray, $ninsertions, $ndeletions, $nmotinsertions,$nmotinsertionsf, $nmotdeletions, $nmotdeletionsf, $nnoninsertions, $nnondeletions) ;
+	push (@returnarray, $substitutions, $bsubstitutions, $dsubstitutions, $nsubstitutions, $indels, $subs) ;
+		
+	push @returnarray, $tree;
+	
+	my @copy = @returnarray;
+	return (@returnarray);
+	
+}
+
+sub selectBetterTree{
+	my $printer = 0;
+	my $treestudy = $_[0];
+	my $alt = $_[1];
+	my $mutspt = $_[2];
+	my @muts = @$mutspt;
+	my @trees = (); my @alternatetrees=();
+
+	@trees  = split(/\|/,$treestudy) if $treestudy =~ /\|/;
+	@alternatetrees  = split(/[\|;]/,$alt) if $alt =~ /[\|;\(\)]/;
+
+	$trees[0]  = $treestudy if $treestudy !~ /\|/;
+	$alternatetrees[0]  = $alt if $alt !~ /[\|;\(\)]/;
+	
+	my @alltrees = (@trees, @alternatetrees);
+#	push(@alltrees,@alternatetrees);
+	
+	my %mutspecies = ();
+	
+	print "IN selectBetterTree..treestudy=$treestudy. alt=$alt. for: @_. trees=@trees<. alternatetrees=@alternatetrees\n" if $printer == 1;
+	#<STDIN>;
+	foreach my $mut (@muts){
+		print colored ['green'],"mut = $mut\n" if $printer == 1;
+		$mut =~ /node=([A-Z,\(\) ]+)/;
+		my $node  = $1;
+		$node =~s/[,\(\) ]+//g;
+		my @indivspecies = $node =~ /[A-Z]+/g;
+		#print "adding node: $node\n" if $printer == 1;
+		$mutspecies{$node} = $node;
+		
+		#foreach (@indivspecies) { 
+			#$mutspecies{$mut} = $_; #print "for $_ adding $mutspecies{$_}\n";
+		#}
+		
+	}
+	
+	my @treerecords = ();
+	my $treecount = -1;
+	foreach my $tree (@alltrees){
+		print "checking with tree $tree\n" if $printer == 1;
+		$treecount++;
+		$treerecords[$treecount] = 0;
+		my @indivspecies = ($tree =~ /[A-Z]+/g);
+		print "indivspecies=@indivspecies\n" if $printer == 1;
+		foreach my $species (@indivspecies){
+			print "checkin if exists species: $species\n" if $printer == 1;
+			$treerecords[$treecount]+=2 if exists $mutspecies{$species} && $mutspecies{$species} !~ /indeltype=[a-z]mot/;
+			$treerecords[$treecount]+=1.5 if exists $mutspecies{$species} && $mutspecies{$species} =~ /indeltype=[a-z]mot/;
+			$treerecords[$treecount]-- if !exists $mutspecies{$species};
+		}
+		
+		print "for tree $tree, our treecount = $treerecords[$treecount]\n" if $printer == 1;
+	}
+	
+	my @best_tree = array_largest_number_arrayPosition(@treerecords);
+	print "treerecords = @treerecords. hence, best tree = @best_tree\n" if $printer == 1;
+	
+	return ($alltrees[$best_tree[0]], $treerecords[$best_tree[0]]) if scalar(@best_tree) == 1;
+	print "best_tree[0] = $best_tree[0], and treerecords = $treerecords[$best_tree[0]]\n" if $printer == 1;
+	return ("NULL", -1) if $treerecords[$best_tree[0]] < 1;
+	my $rando = int(rand($#trees));
+	return ($alltrees[$rando], $treerecords[$rando]) if scalar(@best_tree) > 1;
+	
+}
+
+
+
+
+sub load_sameHash{
+	#my $g = %$_[0];
+	$sameHash{"CAGT"}="AGTC";
+	$sameHash{"ATGA"}="AATG";
+	$sameHash{"CAAC"}="AACC";
+	$sameHash{"GGAA"}="AAGG";
+	$sameHash{"TAAG"}="AAGT";
+	$sameHash{"CGAG"}="AGCG";
+	$sameHash{"TAGG"}="AGGT";
+	$sameHash{"GCAG"}="AGGC";
+	$sameHash{"TAGA"}="ATAG";
+	$sameHash{"TGA"}="ATG";
+	$sameHash{"CAAG"}="AAGC";
+	$sameHash{"CTAA"}="AACT";
+	$sameHash{"CAAT"}="AATC";
+	$sameHash{"GTAG"}="AGGT";
+	$sameHash{"GAAG"}="AAGG";
+	$sameHash{"CGA"}="ACG";
+	$sameHash{"GTAA"}="AAGT";
+	$sameHash{"ACAA"}="AAAC";
+	$sameHash{"GCGG"}="GGGC";
+	$sameHash{"ATCA"}="AATC";
+	$sameHash{"TAAC"}="AACT";
+	$sameHash{"GGCA"}="AGGC";
+	$sameHash{"TGAG"}="AGTG";
+	$sameHash{"AACA"}="AAAC";
+	$sameHash{"GAGC"}="AGCG";
+	$sameHash{"ACCA"}="AACC";
+	$sameHash{"TGAA"}="AATG";
+	$sameHash{"ACA"}="AAC";
+	$sameHash{"GAAC"}="AACG";
+	$sameHash{"GCA"}="AGC";
+	$sameHash{"CCAC"}="ACCC";
+	$sameHash{"CATA"}="ATAC";
+	$sameHash{"CAC"}="ACC";
+	$sameHash{"TACA"}="ATAC";
+	$sameHash{"GGAC"}="ACGG";
+	$sameHash{"AGA"}="AAG";
+	$sameHash{"ATAA"}="AAAT";
+	$sameHash{"CA"}="AC";
+	$sameHash{"CCCA"}="ACCC";
+	$sameHash{"TCAA"}="AATC";
+	$sameHash{"CAGA"}="AGAC";
+	$sameHash{"AATA"}="AAAT";
+	$sameHash{"CCA"}="ACC";
+	$sameHash{"AGAA"}="AAAG";
+	$sameHash{"AGTA"}="AAGT";
+	$sameHash{"GACG"}="ACGG";
+	$sameHash{"TCAG"}="AGTC";
+	$sameHash{"ACGA"}="AACG";
+	$sameHash{"CGCA"}="ACGC";
+	$sameHash{"GAGT"}="AGTG";
+	$sameHash{"GA"}="AG";
+	$sameHash{"TA"}="AT";
+	$sameHash{"TAA"}="AAT";
+	$sameHash{"CAG"}="AGC";
+	$sameHash{"GATA"}="ATAG";
+	$sameHash{"GTA"}="AGT";
+	$sameHash{"CCAA"}="AACC";
+	$sameHash{"TAG"}="AGT";
+	$sameHash{"CAAA"}="AAAC";
+	$sameHash{"AAGA"}="AAAG";
+	$sameHash{"CACG"}="ACGC";
+	$sameHash{"GTCA"}="AGTC";
+	$sameHash{"GGA"}="AGG";
+	$sameHash{"GGAT"}="ATGG";
+	$sameHash{"CGGG"}="GGGC";
+	$sameHash{"CGGA"}="ACGG";
+	$sameHash{"AGGA"}="AAGG";
+	$sameHash{"TAAA"}="AAAT";
+	$sameHash{"GAGA"}="AGAG";
+	$sameHash{"ACTA"}="AACT";
+	$sameHash{"GCGA"}="AGCG";
+	$sameHash{"CACA"}="ACAC";
+	$sameHash{"AGAT"}="ATAG";
+	$sameHash{"GAGG"}="AGGG";
+	$sameHash{"CGAC"}="ACCG";
+	$sameHash{"GGAG"}="AGGG";
+	$sameHash{"GCCA"}="AGCC";
+	$sameHash{"CCAG"}="AGCC";
+	$sameHash{"GAAA"}="AAAG";
+	$sameHash{"CAGG"}="AGGC";
+	$sameHash{"GAC"}="ACG";
+	$sameHash{"CAA"}="AAC";
+	$sameHash{"GACC"}="ACCG";
+	$sameHash{"GGCG"}="GGGC";
+	$sameHash{"GGTA"}="AGGT";
+	$sameHash{"AGCA"}="AAGC";
+	$sameHash{"GATG"}="ATGG";
+	$sameHash{"GTGA"}="AGTG";
+	$sameHash{"ACAG"}="AGAC";
+	$sameHash{"CGG"}="GGC";
+	$sameHash{"ATA"}="AAT";
+	$sameHash{"GACA"}="AGAC";
+	$sameHash{"GCAA"}="AAGC";
+	$sameHash{"CAGC"}="AGCC";
+	$sameHash{"GGGA"}="AGGG";
+	$sameHash{"GAG"}="AGG";
+	$sameHash{"ACAT"}="ATAC";
+	$sameHash{"GAAT"}="AATG";
+	$sameHash{"CACC"}="ACCC";
+	$sameHash{"GAT"}="ATG";
+	$sameHash{"GCG"}="GGC";
+	$sameHash{"GCAC"}="ACGC";
+	$sameHash{"GAA"}="AAG";
+	$sameHash{"TGGA"}="ATGG";
+	$sameHash{"CCGA"}="ACCG";
+	$sameHash{"CGAA"}="AACG";
+}
+
+
+
+sub load_revHash{
+	$revHash{"CTGA"}="AGTC";
+	$revHash{"TCTT"}="AAAG";
+	$revHash{"CTAG"}="AGCT";
+	$revHash{"GGTG"}="ACCC";
+	$revHash{"GCC"}="GGC";
+	$revHash{"GCTT"}="AAGC";
+	$revHash{"GCGT"}="ACGC";
+	$revHash{"GTTG"}="AACC";
+	$revHash{"CTCC"}="AGGG";
+	$revHash{"ATC"}="ATG";
+	$revHash{"CGAT"}="ATCG";
+	$revHash{"TTAA"}="AATT";
+	$revHash{"GTTC"}="AACG";
+	$revHash{"CTGC"}="AGGC";
+	$revHash{"TCGA"}="ATCG";
+	$revHash{"ATCT"}="ATAG";
+	$revHash{"GGTT"}="AACC";
+	$revHash{"CTTA"}="AAGT";
+	$revHash{"TGGC"}="AGCC";
+	$revHash{"CCG"}="GGC";
+	$revHash{"CGGC"}="GGCC";
+	$revHash{"TTAG"}="AACT";
+	$revHash{"GTG"}="ACC";
+	$revHash{"CTTT"}="AAAG";
+	$revHash{"TGCA"}="ATGC";
+	$revHash{"CGCT"}="AGCG";
+	$revHash{"TTCC"}="AAGG";
+	$revHash{"CT"}="AG";
+	$revHash{"C"}="G";
+	$revHash{"CTCT"}="AGAG";
+	$revHash{"ACTT"}="AAGT";
+	$revHash{"GGTC"}="ACCG";
+	$revHash{"ATTC"}="AATG";
+	$revHash{"GGGT"}="ACCC";
+	$revHash{"CCTA"}="AGGT";
+	$revHash{"CGCG"}="GCGC";
+	$revHash{"GTGT"}="ACAC";
+	$revHash{"GCCC"}="GGGC";
+	$revHash{"GTCG"}="ACCG";
+	$revHash{"TCCC"}="AGGG";
+	$revHash{"TTCA"}="AATG";
+	$revHash{"AGTT"}="AACT";
+	$revHash{"CCCT"}="AGGG";
+	$revHash{"CCGC"}="GGGC";
+	$revHash{"CTT"}="AAG";
+	$revHash{"TTGG"}="AACC";
+	$revHash{"ATT"}="AAT";
+	$revHash{"TAGC"}="AGCT";
+	$revHash{"ACTG"}="AGTC";
+	$revHash{"TCAC"}="AGTG";
+	$revHash{"CTGT"}="AGAC";
+	$revHash{"TGTG"}="ACAC";
+	$revHash{"ATCC"}="ATGG";
+	$revHash{"GTGG"}="ACCC";
+	$revHash{"TGGG"}="ACCC";
+	$revHash{"TCGG"}="ACCG";
+	$revHash{"CGGT"}="ACCG";
+	$revHash{"GCTC"}="AGCG";
+	$revHash{"TACG"}="ACGT";
+	$revHash{"GTTT"}="AAAC";
+	$revHash{"CAT"}="ATG";
+	$revHash{"CATG"}="ATGC";
+	$revHash{"GTTA"}="AACT";
+	$revHash{"CACT"}="AGTG";
+	$revHash{"TCAT"}="AATG";
+	$revHash{"TTA"}="AAT";
+	$revHash{"TGTA"}="ATAC";
+	$revHash{"TTTC"}="AAAG";
+	$revHash{"TACT"}="AAGT";
+	$revHash{"TGTT"}="AAAC";
+	$revHash{"CTA"}="AGT";
+	$revHash{"GACT"}="AGTC";
+	$revHash{"TTGC"}="AAGC";
+	$revHash{"TTC"}="AAG";
+	$revHash{"GCT"}="AGC";
+	$revHash{"GCAT"}="ATGC";
+	$revHash{"TGGT"}="AACC";
+	$revHash{"CCT"}="AGG";
+	$revHash{"CATC"}="ATGG";
+	$revHash{"CCAT"}="ATGG";
+	$revHash{"CCCG"}="GGGC";
+	$revHash{"TGCC"}="AGGC";
+	$revHash{"TG"}="AC";
+	$revHash{"TGCT"}="AAGC";
+	$revHash{"GCCG"}="GGCC";
+	$revHash{"TCTG"}="AGAC";
+	$revHash{"TGT"}="AAC";
+	$revHash{"TTAT"}="AAAT";
+	$revHash{"TAGT"}="AACT";
+	$revHash{"TATG"}="ATAC";
+	$revHash{"TTTA"}="AAAT";
+	$revHash{"CGTA"}="ACGT";
+	$revHash{"TA"}="AT";
+	$revHash{"TGTC"}="AGAC";
+	$revHash{"CTAT"}="ATAG";
+	$revHash{"TATA"}="ATAT";
+	$revHash{"TAC"}="AGT";
+	$revHash{"TC"}="AG";
+	$revHash{"CATT"}="AATG";
+	$revHash{"TCG"}="ACG";
+	$revHash{"ATTT"}="AAAT";
+	$revHash{"CGTG"}="ACGC";
+	$revHash{"CTG"}="AGC";
+	$revHash{"TCGT"}="AACG";
+	$revHash{"TCCG"}="ACGG";
+	$revHash{"GTT"}="AAC";
+	$revHash{"ATGT"}="ATAC";
+	$revHash{"CTTG"}="AAGC";
+	$revHash{"CCTT"}="AAGG";
+	$revHash{"GATC"}="ATCG";
+	$revHash{"CTGG"}="AGCC";
+	$revHash{"TTCT"}="AAAG";
+	$revHash{"CGTC"}="ACGG";
+	$revHash{"CG"}="GC";
+	$revHash{"TATT"}="AAAT";
+	$revHash{"CTCG"}="AGCG";
+	$revHash{"TCTC"}="AGAG";
+	$revHash{"TCCT"}="AAGG";
+	$revHash{"TGG"}="ACC";
+	$revHash{"ACTC"}="AGTG";
+	$revHash{"CTC"}="AGG";
+	$revHash{"CGC"}="GGC";
+	$revHash{"TTG"}="AAC";
+	$revHash{"ACCT"}="AGGT";
+	$revHash{"TCTA"}="ATAG";
+	$revHash{"GTAC"}="ACGT";
+	$revHash{"TTGA"}="AATC";
+	$revHash{"GTCC"}="ACGG";
+	$revHash{"GATT"}="AATC";
+	$revHash{"T"}="A";
+	$revHash{"CGTT"}="AACG";
+	$revHash{"GTC"}="ACG";
+	$revHash{"GCCT"}="AGGC";
+	$revHash{"TGC"}="AGC";
+	$revHash{"TTTG"}="AAAC";
+	$revHash{"GGCT"}="AGCC";
+	$revHash{"TCA"}="ATG";
+	$revHash{"GTGC"}="ACGC";
+	$revHash{"TGAT"}="AATC";
+	$revHash{"TAT"}="AAT";
+	$revHash{"CTAC"}="AGGT";
+	$revHash{"TGCG"}="ACGC";
+	$revHash{"CTCA"}="AGTG";
+	$revHash{"CTTC"}="AAGG";
+	$revHash{"GCTG"}="AGCC";
+	$revHash{"TATC"}="ATAG";
+	$revHash{"TAAT"}="AATT";
+	$revHash{"ACT"}="AGT";
+	$revHash{"TCGC"}="AGCG";
+	$revHash{"GGT"}="ACC";
+	$revHash{"TCC"}="AGG";
+	$revHash{"TTGT"}="AAAC";
+	$revHash{"TGAC"}="AGTC";
+	$revHash{"TTAC"}="AAGT";
+	$revHash{"CGT"}="ACG";
+	$revHash{"ATTA"}="AATT";
+	$revHash{"ATTG"}="AATC";
+	$revHash{"CCTC"}="AGGG";
+	$revHash{"CCGG"}="GGCC";
+	$revHash{"CCGT"}="ACGG";
+	$revHash{"TCCA"}="ATGG";
+	$revHash{"CGCC"}="GGGC";
+	$revHash{"GT"}="AC";
+	$revHash{"TTCG"}="AACG";
+	$revHash{"CCTG"}="AGGC";
+	$revHash{"TCT"}="AAG";
+	$revHash{"GTAT"}="ATAC";
+	$revHash{"GTCT"}="AGAC";
+	$revHash{"GCTA"}="AGCT";
+	$revHash{"TACC"}="AGGT";
+}
+
+
+sub allCaps{
+	my $motif = $_[0];
+	$motif =~ s/a/A/g;
+	$motif =~ s/c/C/g;
+	$motif =~ s/t/T/g;
+	$motif =~ s/g/G/g;
+	return $motif;
+}
+
+
+sub all_caps{
+	my @strand = split(/\s*/,$_[0]);
+	for my $i (0 ... $#strand){
+		if ($strand[$i] =~ /c/) {$strand[$i] = "C";next;}
+		if ($strand[$i] =~ /a/) {$strand[$i] = "A";next;}
+		if ($strand[$i] =~ /t/) { $strand[$i] = "T";next;}
+		if ($strand[$i] =~ /g/) {$strand[$i] = "G";next;}
+	}
+	return join("",@strand);
+}
+sub array_mean{
+	return "NA" if scalar(@_) == 0;
+	my $sum = 0;
+	foreach my $val (@_){
+		$sum = $sum + $val;
+	}
+	return ($sum/scalar(@_));
+}
+sub array_sum{
+	return "NA" if scalar(@_) == 0;
+	my $sum = 0;
+	foreach my $val (@_){
+		$sum = $sum + $val;
+	}
+	return ($sum);
+}
+
+sub variance{
+	return "NA" if scalar(@_) == 0;
+	return 0 if scalar(@_) == 1;
+	my $mean = 	array_mean(@_);
+	my $num = 0;
+	return 0 if scalar(@_) == 1;
+#	print "mean = $mean .. array = >@_<\n";
+	foreach my $ele (@_){
+	#	print "$num = $num + ($ele-$mean)*($ele-$mean)\n";
+		$num = $num + ($ele-$mean)*($ele-$mean);
+	}
+	my $var = $num / scalar(@_);
+	return $var;
+}
+
+sub array_95confIntervals{
+	return "NA" if scalar(@_) <= 0;
+	my @sorted = sort { $a <=> $b } @_;
+#	print  "@sorted=",scalar(@sorted), "\n";
+	my $aDeechNo = int((scalar(@sorted) * 2.5) / 100);
+	my $saaDeNo = int((scalar(@sorted) * 97.5) / 100);
+	
+	return ($sorted[$aDeechNo], $sorted[$saaDeNo]);
+}
+
+sub array_median{
+	return "NA" if scalar(@_) == 0;
+	return $_[0] if scalar(@_) == 1;
+	my @sorted = sort { $a <=> $b } @_;
+	my $totalno = scalar(@sorted);
+	
+	#print "sorted = @sorted\n";
+	
+	my $pick = ();
+	if ($totalno % 2 == 1){
+		#print "odd set .. totalno = $totalno\n";
+		my $mid = $totalno / 2;
+		my $onehalfno = $mid - $mid % 1;
+		my $secondhalfno = $onehalfno + 1;
+		my $onehalf = $sorted[$onehalfno-1];
+		my $secondhalf = $sorted[$secondhalfno-1];
+		#print "onehalfno = $onehalfno and secondhalfno = $secondhalfno \n onehalf = $onehalf and secondhalf = $secondhalf\n";
+		
+		$pick =  $secondhalf;
+	}
+	else{
+		#print "even set .. totalno = $totalno\n";
+		my $mid = $totalno / 2;
+		my $onehalfno = $mid;
+		my $secondhalfno = $onehalfno + 1;
+		my $onehalf = $sorted[$onehalfno-1];
+		my $secondhalf = $sorted[$secondhalfno-1];
+		#print "onehalfno = $onehalfno and secondhalfno = $secondhalfno \n onehalf = $onehalf and secondhalf = $secondhalf\n";
+		$pick = ($onehalf + $secondhalf )/2;
+		
+	}
+	#print "pick = $pick..\n";
+	return $pick;
+
+}
+
+
+sub array_numerical_sort{
+		return "NA" if scalar(@_) == 0;
+        my @sorted = sort { $a <=> $b } @_;
+        return (@sorted);
+}
+
+sub array_smallest_number{
+	return "NA" if scalar(@_) == 0;
+	return $_[0] if scalar(@_) == 1;
+	my @sorted = sort { $a <=> $b } @_;
+	return $sorted[0];
+}
+
+
+sub array_largest_number{
+	return "NA" if scalar(@_) == 0;
+	return $_[0] if scalar(@_) == 1;
+	my @sorted = sort { $a <=> $b } @_;
+	return $sorted[$#sorted];
+}
+
+
+sub array_largest_number_arrayPosition{
+	return "NA" if scalar(@_) == 0;
+	return 0 if scalar(@_) == 1;
+	my $maxpos = 0;
+	my @maxposes = ();
+	my @maxvals = ();
+	my $maxval = array_smallest_number(@_);
+	for my $i (0 ... $#_){
+		if ($_[$i] > $maxval){
+			$maxval = $_[$i];
+			$maxpos = $i;
+		}
+		if ($_[$i] == $maxval){
+			$maxval = $_[$i];
+			if (scalar(@maxposes) == 0){
+				push @maxposes, $i;
+				push @maxvals, $_[$i];
+				
+			}
+			elsif ($maxvals[0] == $maxval){
+				push @maxposes, $i;
+				push @maxvals, $_[$i];
+			}
+			else{
+				@maxposes = (); @maxvals = ();
+				push @maxposes, $i;
+				push @maxvals, $_[$i];
+			}
+			
+		}
+		
+	}
+	return $maxpos  if scalar(@maxposes) < 2;
+	return (@maxposes);
+}
+
+sub array_smallest_number_arrayPosition{
+	return "NA" if scalar(@_) == 0;
+	return 0 if scalar(@_) == 1;
+	my $minpos = 0;
+	my @minposes = ();
+	my @minvals = ();
+	my $minval = array_largest_number(@_);
+	my $maxval = array_smallest_number(@_);
+	#print "starting with $maxval, ending with $minval\n";
+	for my $i (0 ... $#_){
+		if ($_[$i] < $minval){
+			$minval = $_[$i];
+			$minpos = $i;
+		}
+		if ($_[$i] == $minval){
+			$minval = $_[$i];
+			if (scalar(@minposes) == 0){
+				push @minposes, $i;
+				push @minvals, $_[$i];
+				
+			}
+			elsif ($minvals[0] == $minval){
+				push @minposes, $i;
+				push @minvals, $_[$i];
+			}
+			else{
+				@minposes = (); @minvals = ();
+				push @minposes, $i;
+				push @minvals, $_[$i];
+			}
+			
+		}
+		
+	}
+	#print "minposes=@minposes\n";
+
+	return $minpos  if scalar(@minposes) < 2;
+	return (@minposes);
+}
+
+sub basic_stats{
+	my @arr = @_;
+#	print " array_smallest_number= ", array_smallest_number(@arr)," array_largest_number= ", array_largest_number(@arr), " array_mean= ",array_mean(@arr),"\n";
+	return ":";
+}
+#xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx 
+
+sub maftoAxt_multispecies {
+	my $printer = 0;
+#	print "in maftoAxt_multispecies : got @_\n";
+	my $fname=$_[0];
+	open(IN,"<$_[0]") or die "Cannot open $_[0]: $! \n";
+	my $treedefinition = $_[1];
+	open(OUT,">$_[2]") or die "Cannot open $_[2]: $! \n";
+	my $counter = 0;
+	my $exactspeciesset = $_[3];
+	my @exactspeciesset_unarranged = split(/,/,$exactspeciesset);
+	
+	$treedefinition=~s/[\)\(, ]/\t/g;
+	my @species=split(/\t+/,$treedefinition);
+	my @exactspecies=();
+	
+	foreach my $spec (@species){
+		foreach my $espec (@exactspeciesset_unarranged){
+			push @exactspecies, $spec if $spec eq $espec;
+		}
+	}
+#	print "exactspecies=@exactspecies\n";
+	
+	###########
+	my $select = 2;  
+	#select = 1 if all species need sequences to be present for each block otherwise, it is 0
+	#select = 2 only the allowed set make up the alignment. use the removeset
+	# information to detect alignmenets that have other important genomes aligned.
+	###########
+	my @allowedset = ();
+	@allowedset = split(/;/,allowedSetOfSpecies(join("_",@species))) if $select == 0;
+	@allowedset = join("_",0,@species) if $select == 1;
+	#print "species = @species , allowedset =",join("\n", @allowedset) ," \n"; 
+	@allowedset = join("_",0,@exactspecies) if $select == 2;
+	#print "allowedset = @allowedset and exactspecies = @exactspecies\n";
+	
+	my $start = 0;
+	my @sequences = ();
+	my @titles = ();
+	my $species_counter = "0";
+	my $countermatch = 0;
+	my $outsideSpecies=0;
+	
+	while(my $line = <IN>){
+		next if $line =~ /^#/;
+		next if $line =~ /^i/;
+		chomp $line;
+		#print "$line";
+		my @fields = split(/\s+/,$line);
+		chomp $line;
+		if ($line =~ /^a /){
+			$start = 1;
+		}
+		
+		if ($line =~ /^s /){
+		#	print "fields1 = $fields[1] , start = $start\n";
+		
+			foreach my $sp (@species){
+				if ($fields[1] =~ /$sp/){
+					$species_counter = $species_counter."_".$sp;
+					push(@sequences, $fields[6]);
+					my @sp_info = split(/\./,$fields[1]);
+					my $title = join(" ",@sp_info, $fields[2], ($fields[2]+$fields[3]), $fields[4]);
+					push(@titles, $title);				
+					
+				}
+			}
+		}
+		
+		if (($line !~ /^a/) && ($line !~ /^s/) && ($line !~ /^#/) && ($line !~ /^i/) && ($start = 1)){
+		
+			my $arranged = reorderSpecies($species_counter, @species);
+			my $stopper = 1;
+			my $arrno = 0;
+			foreach my $set (@allowedset){
+				if ($arranged eq $set){
+	#				print "$arranged == $set\n";
+					$stopper = 0; last;
+				}
+				$arrno++;
+			}
+	
+			if ($stopper == 0) {
+			#	print "    accepted\n";
+				@titles = split ";", orderInfo(join(";", @titles), $species_counter, $arranged) if $species_counter ne $arranged;
+				
+				@sequences = split ";", orderInfo(join(";", @sequences), $species_counter, $arranged) if $species_counter ne $arranged;
+				my $filteredseq = filter_gaps(@sequences);
+				
+				if ($filteredseq ne "SHORT"){
+					$counter++;
+					print OUT join (" ",$counter, @titles), "\n";
+					print OUT $filteredseq, "\n";
+					print OUT "\n"; 
+					$countermatch++;
+				}
+			#	my @filtered_seq = split(/\t/,filter_gaps(@sequences) );
+			}
+			else{#print "\n";
+			}
+	
+			@sequences = (); @titles = (); $start = 0;$species_counter = "0";
+			next;		
+			
+		}
+	}
+#	print "countermatch = $countermatch\n";
+}
+
+sub reorderSpecies{
+	my @inarr=@_;
+	my $currSpecies = shift (@inarr);
+	my $ordered_species = 0;
+	my @species=@inarr;
+	foreach my $order (@species){
+		$ordered_species = $ordered_species."_".$order	if	$currSpecies=~ /$order/;
+	}
+	return $ordered_species;
+
+}
+
+sub filter_gaps{
+	my @sequences = @_;
+#	print "sequences sent are @sequences\n";
+	my $seq_length = length($sequences[0]);
+	my $seq_no = scalar(@sequences);
+	my $allgaps = ();
+	for (1 ... $seq_no){
+		$allgaps = $allgaps."-";
+	}
+	
+	my @seq_array = ();
+	my $seq_counter = 0;
+	foreach my $seq (@sequences){
+#		my @sequence = split(/\s*/,$seq);
+		$seq_array[$seq_counter] = [split(/\s*/,$seq)];
+#		push @seq_array, [@sequence];
+		$seq_counter++;
+	}
+	my $g = 0;
+	while ( $g < $seq_length){
+		last if (!exists $seq_array[0][$g]);
+		my $bases = ();
+		for my $u (0 ... $#seq_array){
+			$bases = $bases.$seq_array[$u][$g];
+		}	
+#		print $bases, "\n";
+		if ($bases eq $allgaps){
+#			print "bases are $bases, position is $g \n";
+			for my $seq (@seq_array){
+				splice(@$seq , $g, 1);
+			}
+		}
+		else {
+			$g++;
+		}
+	}
+	
+	my @outs = ();
+	
+	foreach my $seq (@seq_array){
+		push(@outs, join("",@$seq));
+	}
+	return "SHORT" if length($outs[0]) <=100;
+	return (join("\n", @outs));	
+}
+
+
+sub allowedSetOfSpecies{
+	my @allowed_species = split(/_/,$_[0]);
+	unshift @allowed_species, 0;
+#	print "allowed set = @allowed_species \n";
+	my @output = ();
+	for (0 ... scalar(@allowed_species) - 4){
+		push(@output, join("_",@allowed_species));
+		pop @allowed_species;
+	}
+	return join(";",reverse(@output));
+
+}
+
+
+sub orderInfo{
+	my @info = split(/;/,$_[0]);
+#	print "info = @info";
+	my @old = split(/_/,$_[1]);
+	my @new = split(/_/,$_[2]);
+	shift @old; shift @new;
+	my @outinfo = ();
+	foreach my $spe (@new){
+		for my $no (0 ... $#old){
+			if ($spe eq $old[$no]){
+				push(@outinfo, $info[$no]);
+			}
+		}
+	}
+#	print "outinfo = @outinfo \n"; 
+	return join(";", @outinfo);
+}
+
+#xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx 
+
+sub printarr {
+	print ">::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::\n";
+	foreach my $line (@_) {print "$line\n";}
+	print "::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::<\n";
+}
+