view tools/regVariation/multispecies_MicrosatDataGenerator_interrupted_GALAXY.pl @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line source

#!/usr/bin/perl
use strict;
use warnings;
use Term::ANSIColor;
use File::Basename;
use IO::Handle;
use Cwd;
use File::Path;
use File::Temp qw/ tempfile tempdir /;
use vars qw($distance @thresholds @tags $printer $mergestarts $mergeends $mergemicros $interrtypecord $microscanned $interrcord $interr_poscord $no_of_interruptionscord $infocord $typecord $startcord $strandcord $endcord $microsatcord $motifcord $sequencepos $no_of_species $gapcord $prinkter);

$ENV{'PATH'} .= ':' . dirname($0);
my $date = `date`;

my ($mafile, $orthfile, $threshold_array,  $species_set, $tree_definition, $separation) = @ARGV;
if (!$mafile or !$orthfile or !$threshold_array or !$separation or !$tree_definition or !$species_set) { die "missing arguments\n"; }

#-------------------------------------------------------------------------------
# WHICH SPUTNIK USED?
my $sputnikpath = ();
$sputnikpath = "sputnik";
#print "sputnik_Mac-PowerPC non-existant\n" if !-e $sputnikpath;
#exit if !-e $sputnikpath;
#$sputnikpath = "bx-sputnik" ;
#print "ARGV input = @ARGV\n";
#print "ARGV input :\n mafile=$mafile\n orthfile=$orthfile\n threshold_array=$threshold_array\n  species_set=$species_set\n tree_definition=$tree_definition\n separation=$separation\n";
#-------------------------------------------------------------------------------
# RUNFILE
#-------------------------------------------------------------------------------
$distance = 1; #bp
$distance++;
#-------------------------------------------------------------------------------
# MICROSATELLITE THRESHOLD SETTINGS (LENGTH, BP)
$threshold_array=~ s/,/_/g;
my @thresharr = split("_",$threshold_array);
my @thresholds=@thresharr;
my $mono_threshold = $thresharr[0];
my $di_threshold = $thresharr[1];
my $tri_threshold = $thresharr[2];
my $tetra_threshold = $thresharr[3];
#my $threshold_array = join("_",($mono_threshold, $di_threshold, $tri_threshold, $tetra_threshold));
my $tdir = tempdir( CLEANUP => 0 );
chdir $tdir;
my $dir = getcwd;  
#print "current dit=$dir\n";
#-------------------------------------------------------------------------------
# CREATE AXT FILES IN FORWARD AND REVERSE ORDERS IF NECESSARY
my @chrfiles=();

#my $mafile =  "/Users/ydk/work/rhesus_microsat/results/galay/align.txt"; #$ARGV[0];
my $chromt=int(rand(10000));
my $p_chr=$chromt;


my @exactspeciesset_unarranged = split(/,/,$species_set);
$tree_definition=~s/[\)\(, ]/\t/g;
my @treespecies=split(/\t+/,$tree_definition);
my @exactspecies=();

foreach my $spec (@treespecies){
	foreach my $espec (@exactspeciesset_unarranged){
		push @exactspecies, $spec if $spec eq $espec;
	}
}
#print "exactspecies=@exactspecies\n";
my $focalspec = $exactspecies[0];
my $arranged_species_set=join(".",@exactspecies);
my $chr_name = join(".",("chr".$p_chr),$arranged_species_set, "net", "axt");
#print "sending to maftoAxt_multispecies: $mafile, $tree_definition, $chr_name, $species_set .. focalspec=$focalspec \n"; 
maftoAxt_multispecies($mafile, $tree_definition, $chr_name, $species_set);
#print "done maf to axt conversion\n";
my $reverse_chr_name = join(".",("chr".$p_chr."r"),$arranged_species_set, "net", "axt");
artificial_axdata_inverter ($chr_name, $reverse_chr_name);
#print "reverse_chr_name=$reverse_chr_name\n"; 
#-------------------------------------------------------------------------------
# FIND THE CORRESPONDING CHIMP CHROMOSOME FROM FILE ORTp_chrS.TXT
foreach my $direct ("reverse_direction","forward_direction"){
	$p_chr=$chromt;
	#print "direction = $direct\n";
	$p_chr = $p_chr."r" if $direct eq "reverse_direction";
	$p_chr = $p_chr if $direct eq "forward_direction";
	my $config = $species_set;
	$config=~s/,/./g;
	my @orgs = split(/\./,$arranged_species_set);
	#print "ORGS= @orgs\n";
	my @tag=@orgs;
		
	
	my $tags = join(",", @tag);
	my @tags=@tag;
	chomp $p_chr;
	$tags = join("_", split(/,/, $tags));
	my $pchr = "chr".$p_chr;
	
	my $ptag = $orgs[0]."-".$pchr.".".join(".",@orgs[1 ... scalar(@orgs)-1])."-".$threshold_array;
	my @sp_tags = ();
	
	#print "orgs=@orgs, pchr=$pchr, hence, ptag = $ptag\n";
	foreach my $sp (@tag){
		push(@sp_tags, ($sp.".".$ptag));
	}

	my $preptag = $orgs[0]."-".$pchr.".".join(".",@orgs[1 ... scalar(@orgs)-1]);
	my @presp_tags = ();
	
	foreach my $sp (@tag){
		push(@presp_tags, ($sp.".".$preptag));
	}

	my $resultdir = "";
	my $orthdir = "";
	my $filtereddir = "";
	my $pipedir = "";
	
	my @title_queries = ();
	push(@title_queries, "^[0-9]+");
	my $sep="\\s";
	for my $or (0 ... $#orgs){
		my $title =  join($sep, ($orgs[$or],  "[A-Za-z]+[0-9a-zA-Z]+", "[0-9]+", "[0-9]+", "[\\-\\+]"));
		#$title =~ s/chr\\+\\s+\+/chr/g;
		push(@title_queries, $title);
	}
	my $title_query = join($sep, @title_queries);
	#print "title_queries=@title_queries\n";
	#print "query = >$title_query<\n"; 
	#print "orgs = @orgs\n"; 
	#-------------------------------------------------------------------------------
	# GET AXTNET FILES, EDIT THEM AND SPLIT THEM INTO HUMAN AND CHIMP INPUT FILES
	my $t1input = $pchr.".".$arranged_species_set.".net.axt";
	
	my @t1outputs = ();
	
	foreach my $sp (@presp_tags){
		push(@t1outputs, $sp."_gap_op");
	}
	
	multi_species_t1($t1input,$tags,(join(",", @t1outputs)), $title_query); 
	#print "t1outputs=@t1outputs\n";
	#print "done t1\n";
	#-------------------------------------------------------------------------------
	#START T2.PL
	
	my $stag  = (); my $tag1 = (); my $tag2 = ();  my $schrs = ();
	
	for my $t (0 ... scalar(@tags)-1){
		multi_species_t2($t1outputs[$t], $tag[$t]);		
	}
	#-------------------------------------------------------------------------------
	#START T2.2.PL
	
	my @temp_tags = @tag;
	
	foreach my $sp (@presp_tags){
		my $t2input =  $sp."_nogap_op_unrand";
		multi_species_t2_2($t2input, shift(@temp_tags));
	}
	undef (@temp_tags);
	
	#-------------------------------------------------------------------------------
	#START SPUTNIK
	
	my @jobIDs = ();
	@temp_tags = @tag;
	my @sput_filelist = ();
	
	foreach my $sp (@presp_tags){
		#print "sp = $sp\n";
		my $sputnikoutput = $pipedir.$sp."_sput_op0";
		my $sputnikinput = $pipedir.$sp."_nogap_op_unrand";
		push(@sput_filelist, $sputnikinput);
		my $sputnikcommand = $sputnikpath." ".$sputnikinput." > ".$sputnikoutput;
		#print "$sputnikcommand\n";
		my @sputnikcommand_system = $sputnikcommand;
		system(@sputnikcommand_system);
	}

	#-------------------------------------------------------------------------------
	#START SPUTNIK OUTPUT CORRECTOR
	
	foreach my $sp (@presp_tags){
		my $corroutput = $pipedir.$sp."_sput_op1";
		my $corrinput = $pipedir.$sp."_sput_op0";
		sputnikoutput_corrector($corrinput,$corroutput);
		
		my $t4output = $pipedir.$sp."_sput_op2";
		multi_species_t4($corroutput,$t4output);
	
		my $t5output = $pipedir.$sp."_sput_op3";
		multi_species_t5($t4output,$t5output);
		#print "done t5.pl for $sp\n";
	
		my $t6output = $pipedir.$sp."_sput_op4";
		multi_species_t6($t5output,$t6output,scalar(@orgs));
	}
	#-------------------------------------------------------------------------------
	#START T9.PL FOR T10.PL AND FOR INTERRUPTED HUNTING
	
	foreach my $sp (@presp_tags){
		my $t9output = $pipedir.$sp."_gap_op_unrand_match";
		my $t9sequence = $pipedir.$sp."_gap_op_unrand2";
		my $t9micro = $pipedir.$sp."_sput_op4";
		t9($t9micro,$t9sequence,$t9output);
		
		my $t9output2 = $pipedir.$sp."_nogap_op_unrand2_match";
		my $t9sequence2 = $pipedir.$sp."_nogap_op_unrand2";
		t9($t9micro,$t9sequence2,$t9output2);
	}
	#print "done both t9.pl for all orgs\n";

	#-------------------------------------------------------------------------------
	# FIND COMPOUND MICROSATELLITES
	
	@jobIDs = ();
	my $species_counter = 0;
	
	foreach my $sp (@presp_tags){
		my $simple_microsats=$pipedir.$sp."_sput_op4_simple";
		my $compound_microsats=$pipedir.$sp."_sput_op4_compound";
		my $input_micro = $pipedir.$sp."_sput_op4";
		my $input_seq = $pipedir.$sp."_nogap_op_unrand2_match";
		multiSpecies_compound_microsat_hunter3($input_micro,$input_seq,$simple_microsats,$compound_microsats,$orgs[$species_counter], scalar(@sp_tags), $threshold_array );
		$species_counter++;
	}
	
	#-------------------------------------------------------------------------------
	# READING  AND FILTERING SIMPLE MICROSATELLITES
	my $spcounter2=0;
	foreach my $sp (@sp_tags){
		my $presp = $presp_tags[$spcounter2];
		$spcounter2++;
		my $simple_microsats=$pipedir.$presp."_sput_op4_simple";
		my $simple_filterout = $pipedir.$sp."_sput_op4_simple_filtered";
		my $simple_residue = $pipedir.$sp."_sput_op4_simple_residue";
		multiSpecies_filtering_interrupted_microsats($simple_microsats, $simple_filterout, $simple_residue,$threshold_array,$threshold_array,scalar(@sp_tags));
	}
	
	#-------------------------------------------------------------------------------
	# ANALYZE  COMPOUND MICROSATELLITES FOR BEING INTERRUPTED MICROSATS
	
	$species_counter = 0;
	foreach my $sp (@sp_tags){
		my $presp = $presp_tags[$species_counter];
		my $compound_microsats = $pipedir.$presp."_sput_op4_compound";
		my $analyzed_simple_microsats=$pipedir.$presp."_sput_op4_compound_interrupted";
		my $analyzed_compound_microsats=$pipedir.$presp."_sput_op4_compound_pure";
		my $seq_file = $pipedir.$presp."_nogap_op_unrand2_match";
		multiSpecies_compound_microsat_analyzer($compound_microsats,$seq_file,$analyzed_simple_microsats,$analyzed_compound_microsats,$orgs[$species_counter], scalar(@sp_tags));
		$species_counter++;
	}
	#-------------------------------------------------------------------------------
	# REANALYZE COMPOUND MICROSATELLITES FOR PRESENCE OF SIMPLE ONES WITHIN THEM.. 
	$species_counter = 0;
	
	foreach my $sp (@sp_tags){
		my $presp = $presp_tags[$species_counter];
		my $compound_microsats = $pipedir.$presp."_sput_op4_compound_pure";
		my $compound_interrupted = $pipedir.$presp."_sput_op4_compound_clarifiedInterrupted";
		my $compound_compound = $pipedir.$presp."_sput_op4_compound_compound";
		my $seq_file = $pipedir.$presp."_nogap_op_unrand2_match";
		multiSpecies_compoundClarifyer($compound_microsats,$seq_file,$compound_interrupted,$compound_compound,$orgs[$species_counter], scalar(@sp_tags), "2_4_6_8", "3_4_6_8", "2_4_6_8");
		$species_counter++;
	}
	#-------------------------------------------------------------------------------
	# READING  AND FILTERING SIMPLE AND COMPOUND MICROSATELLITES
	$species_counter = 0;
	
	foreach my $sp (@sp_tags){
		my $presp = $presp_tags[$species_counter];
	
		my $simple_microsats=$pipedir.$presp."_sput_op4_compound_clarifiedInterrupted";
		my $simple_filterout = $pipedir.$sp."_sput_op4_compound_clarifiedInterrupted_filtered";
		my $simple_residue = $pipedir.$sp."_sput_op4_compound_clarifiedInterrupted_residue";
		multiSpecies_filtering_interrupted_microsats($simple_microsats, $simple_filterout, $simple_residue,$threshold_array,$threshold_array,scalar(@sp_tags));
	
		my $simple_microsats2 = $pipedir.$presp."_sput_op4_compound_interrupted";
		my $simple_filterout2 = $pipedir.$sp."_sput_op4_compound_interrupted_filtered";
		my $simple_residue2 = $pipedir.$sp."_sput_op4_compound_interrupted_residue";
		multiSpecies_filtering_interrupted_microsats($simple_microsats2, $simple_filterout2, $simple_residue2,$threshold_array,$threshold_array,scalar(@sp_tags));
	
		my $compound_microsats=$pipedir.$presp."_sput_op4_compound_compound";
		my $compound_filterout = $pipedir.$sp."_sput_op4_compound_compound_filtered";
		my $compound_residue = $pipedir.$sp."_sput_op4_compound_compound_residue";
		multispecies_filtering_compound_microsats($compound_microsats, $compound_filterout, $compound_residue,$threshold_array,$threshold_array,scalar(@sp_tags));
		$species_counter++;
	}
	#print "done filtering both simple and compound microsatellites \n";
	
	#-------------------------------------------------------------------------------
	
	my @combinedarray = ();
	my @combinedarray_indicators = ("mononucleotide", "dinucleotide", "trinucleotide", "tetranucleotide");
	my @combinedarray_tags = ("mono", "di", "tri", "tetra");
	$species_counter = 0;
	
	foreach my $sp (@sp_tags){
		my $simple_interrupted = $pipedir.$sp."_simple_analyzed_simple";
		push @{$combinedarray[$species_counter]}, $pipedir.$sp."_simple_analyzed_simple_mono", $pipedir.$sp."_simple_analyzed_simple_di", $pipedir.$sp."_simple_analyzed_simple_tri", $pipedir.$sp."_simple_analyzed_simple_tetra";
		$species_counter++;
	}
	
	#-------------------------------------------------------------------------------
	# PUT TOGETHER THE INTERRUPTED AND SIMPLE MICROSATELLITES BASED ON THEIR MOTIF SIZE FOR FURTHER EXTENTION	
	my $sp_counter = 0;
	foreach my $sp (@sp_tags){	
		my $analyzed_simple = $pipedir.$sp."_sput_op4_compound_interrupted_filtered";
		my $clarifyed_simple = $pipedir.$sp."_sput_op4_compound_clarifiedInterrupted_filtered";
		my $simple = $pipedir.$sp."_sput_op4_simple_filtered";
		my $simple_analyzed_simple = $pipedir.$sp."_simple_analyzed_simple";
		`cat $analyzed_simple $clarifyed_simple $simple > $simple_analyzed_simple`;
		for my $i (0 ... 3){
			`grep "$combinedarray_indicators[$i]" $simple_analyzed_simple > $combinedarray[$sp_counter][$i]`;	
		}
		$sp_counter++;		
	}
	#print "\ndone grouping interrupted & simple microsats based on their motif size for further extention\n";
	
	#-------------------------------------------------------------------------------
	# BREAK CHROMOSOME INTO PARTS OF CERTAIN NO. CONTIGS EACH, FOR FUTURE SEARCHING OF INTERRUPTED MICROSATELLITES
	# ESPECIALLY DI, TRI AND TETRANUCLEOTIDE MICROSATELLITES
	@temp_tags = @sp_tags;
	my $increment = 1000000;
	my @splist = ();
	my $targetdir = $pipedir;
	$species_counter=0;
	
	foreach my $sp (@sp_tags){
		my $presp = $presp_tags[$species_counter];
		$species_counter++;
		my $localtag = shift @temp_tags;
		my $locallist = $targetdir.$localtag."_".$p_chr."_list";
		push(@splist, $locallist);
		my $input = $pipedir.$presp."_nogap_op_unrand2_match";
		chromosome_unrand_breaker($input,$targetdir,$locallist,$increment, $localtag, $pchr);
	}
	

	my @unionarray = ();
	#print "splist=@splist\n";
	#-------------------------------------------------------------------------------
	# FIND INTERRUPTED MICROSATELLITES
	
	$species_counter = 0;		
	
	for my $i (0 .. $#combinedarray){
		
		@jobIDs = ();
		open (JLIST1, "$splist[$i]") or die "Cannot open file $splist[$i]: $!";
		
		while (my $sp1  = <JLIST1>){
			#print "$splist[$i]: sp1=$sp1\n";
			chomp $sp1;
			
			for my $j (0 ... $#combinedarray_tags){
				my $interr  = $sp1."_interr_".$combinedarray_tags[$j];
				my $simple  = $sp1."_simple_".$combinedarray_tags[$j];
				push @{$unionarray[$i]}, $interr, $simple;	
				multiSpecies_interruptedMicrosatHunter($combinedarray[$i][$j],$sp1,$interr ,$simple, $orgs[$species_counter], scalar(@sp_tags), "3_4_6_8"); 
			}		
		}
		$species_counter++;
	}
	close JLIST1;	
	#-------------------------------------------------------------------------------
	#	REUNION AND ZIPPING BEFORE T10.PL
	
	my @allarray = ();
	
	for my $i (0 ... $#sp_tags){	
		my $localfile = $pipedir.$sp_tags[$i]."_allmicrosats";
		unlink $localfile if -e $localfile;
		push(@allarray, $localfile);
	
		my $unfiltered_localfile= $localfile."_unfiltered";
		my $residue_localfile= $localfile."_residue";
		
		unlink $unfiltered_localfile;
		#unlink $unfiltered_localfile;
		for my $j (0 ... $#{$unionarray[$i]}){
			#print "listing files for species $i  and list number $j= \n$unionarray[$i][$j] \n";
			`cat $unionarray[$i][$j] >> $unfiltered_localfile`;
			unlink $unionarray[$i][$j];
		}

		multiSpecies_filtering_interrupted_microsats($unfiltered_localfile, $localfile, $residue_localfile,$threshold_array,$threshold_array,scalar(@sp_tags) );
		my $analyzed_compound = $pipedir.$sp_tags[$i]."_sput_op4_compound_compound_filtered";
		my $simple_residue = $pipedir.$sp_tags[$i]."_sput_op4_simple_residue";
		my $compound_residue = $pipedir.$sp_tags[$i]."_sput_op4_compound_residue";
		
		`cat $analyzed_compound >> $localfile`;
	}
	#-------------------------------------------------------------------------------
	# MERGING MICROSATELLITES THAT ARE VERY CLOSE TO EACH OTHER, INCLUDING THOSE FOUND BY SEARCHING IN 2 OPPOSIT DIRECTIONS
	
	my $toescape=0;
	
	
	for my $i (0 ... $#sp_tags){	
		my $localfile = $pipedir.$sp_tags[$i]."_allmicrosats";
		$localfile =~ /$focalspec\-(chr[0-9a-zA-Z]+)\./;
		my $direction = $1;
		#print "localfile = $localfile , direction = $direction\n"; 
#		`gzip $reverse_chr_name` if $direction =~ /chr[0-9a-zA-Z]+r/ && $switchboard{"deleting_processFiles"} != 1;
		$toescape =1 if $direction =~ /chr[0-9a-zA-Z]+r/;
		last if $direction =~ /chr[0-9a-zA-Z]+r/;
		my $nogap_sequence = $pipedir.$presp_tags[$i]."_nogap_op_unrand2_match";
		my $gap_sequence = $pipedir.$presp_tags[$i]."_gap_op_unrand_match";
		my $reverselocal = $localfile;
		$reverselocal =~ s/\-chr([0-9a-zA-Z]+)\./-chr$1r./g;
		merge_interruptedMicrosats($nogap_sequence,$localfile, $reverselocal ,scalar(@sp_tags)); 
		#-------------------------------------------------------------------------------
		my $forward_separate = $localfile."_separate";
		my $reverse_separate = $reverselocal."_separate";
		my $diff = $forward_separate."_diff";
		my $miss = $forward_separate."_miss";
		my $common = $forward_separate."_common";
		forward_reverse_sputoutput_comparer($nogap_sequence,$forward_separate, $reverse_separate, $diff, $miss, $common ,scalar(@sp_tags)); 
		#-------------------------------------------------------------------------------
		my $symmetrical_file = $localfile."_symmetrical";
		my $merged_file = $localfile."_merged";
		#print "cating: $merged_file $common  into -> $symmetrical_file \n"; 
		`cat $merged_file $common > $symmetrical_file`;
		#-------------------------------------------------------------------------------
		my $t10output = $symmetrical_file."_fin_hit_all_2";
		new_multispecies_t10($gap_sequence, $symmetrical_file, $t10output, join(".", @orgs));
		#-------------------------------------------------------------------------------		
	}
	next if $toescape == 1;
	#------------------------------------------------------------------------------------------------
	# BRINGING IT ALL TOGETHER: FINDING ORTHOLOGOUS MICROSATELLITES AMONG THE SPECIES
	
	
	my @micros_array = ();
	my $sampletag = ();
	for my $i (0 ... $#sp_tags){
		my $finhitFile = $pipedir.$sp_tags[$i]."_allmicrosats_symmetrical_fin_hit_all_2";
		push(@micros_array, $finhitFile);
		$sampletag = $sp_tags[$i];
	}
	#$sampletag =~ s/^([A-Z]+\.)/ORTH_/;
	#$sampletag = $sampletag."_monoThresh-".$mono_threshold."bp";
	my $orthanswer = multiSpecies_orthFinder4($t1input, join(":",@micros_array), $orthfile, join(":", @orgs), $separation);
}
$date = `date`;
#print "date = $date\n";
#remove_tree($tdir);
#------------------------------------------------------------------------------------------------
#------------------------------------------------------------------------------------------------
#------------------------------------------------------------------------------------------------
#------------------------------------------------------------------------------------------------

#xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx 

sub maftoAxt_multispecies {
	#print "in maftoAxt_multispecies : got @_\n";
	my $fname=$_[0];
	open(IN,"<$_[0]") or die "Cannot open $_[0]: $! \n";
	my $treedefinition = $_[1];
	open(OUT,">$_[2]") or die "Cannot open $_[2]: $! \n";
	my $counter = 0;
	my $exactspeciesset = $_[3];
	my @exactspeciesset_unarranged = split(/,/,$exactspeciesset);
	
	$treedefinition=~s/[\)\(, ]/\t/g;
	my @species=split(/\t+/,$treedefinition);
	my @exactspecies=();
	
	foreach my $spec (@species){
		foreach my $espec (@exactspeciesset_unarranged){
			push @exactspecies, $spec if $spec eq $espec;
		}
	}
	#print "exactspecies=@exactspecies\n";
	
	###########
	my $select = 2;  
	#select = 1 if all species need sequences to be present for each block otherwise, it is 0
	#select = 2 only the allowed set make up the alignment. use the removeset
	# information to detect alignmenets that have other important genomes aligned.
	###########
	my @allowedset = ();
	@allowedset = split(/;/,allowedSetOfSpecies(join("_",@species))) if $select == 0;
	@allowedset = join("_",0,@species) if $select == 1;
	#print "species = @species , allowedset =",join("\n", @allowedset) ," \n"; 
	@allowedset = join("_",0,@exactspecies) if $select == 2;
	#print "allowedset = @allowedset and exactspecies = @exactspecies\n";
	
	my $start = 0;
	my @sequences = ();
	my @titles = ();
	my $species_counter = "0";
	my $countermatch = 0;
	my $outsideSpecies=0;
	
	while(my $line = <IN>){
		next if $line =~ /^#/;
		next if $line =~ /^i/;
		chomp $line;
		#print "$line";
		my @fields = split(/\s+/,$line);
		chomp $line;
		if ($line =~ /^a /){
			$start = 1;
		}
		
		if ($line =~ /^s /){
		#	print "fields1 = $fields[1] , start = $start\n";
		
			foreach my $sp (@species){
				if ($fields[1] =~ /$sp/){
					$species_counter = $species_counter."_".$sp;
					push(@sequences, $fields[6]);
					my @sp_info = split(/\./,$fields[1]);
					my $title = join(" ",@sp_info, $fields[2], ($fields[2]+$fields[3]), $fields[4]);
					push(@titles, $title);				
					
				}
			}
		}
		
		if (($line !~ /^a/) && ($line !~ /^s/) && ($line !~ /^#/) && ($line !~ /^i/) && ($start = 1)){
			my $arranged = reorderSpecies($species_counter, @species);
			my $stopper = 1;
			my $arrno = 0;
			foreach my $set (@allowedset){
				if ($arranged eq $set){
	#				print "$arranged == $set\n";
					$stopper = 0; last;
				}
				$arrno++;
			}
	
			if ($stopper == 0) {
			#	print "    accepted\n";
				@titles = split ";", orderInfo(join(";", @titles), $species_counter, $arranged) if $species_counter ne $arranged;				
				@sequences = split ";", orderInfo(join(";", @sequences), $species_counter, $arranged) if $species_counter ne $arranged;				
				my $filteredseq = filter_gaps(@sequences);
				
				if ($filteredseq ne "SHORT"){
					$counter++;
					print OUT join (" ",$counter, @titles), "\n";
					print OUT $filteredseq, "\n";
					print OUT "\n"; 
					$countermatch++;
				}
			}
			else{#print "\n";
			}
	
			@sequences = (); @titles = (); $start = 0;$species_counter = "0";
			next;		
			
		}
	}
#	print "countermatch = $countermatch\n";
}

sub reorderSpecies{
	my @inarr=@_;
	my $currSpecies = shift (@inarr);
	my $ordered_species = 0;
	my @species=@inarr;
	foreach my $order (@species){
		$ordered_species = $ordered_species."_".$order	if	$currSpecies=~ /$order/;
	}
	return $ordered_species;

}

sub filter_gaps{
	my @sequences = @_;
#	print "sequences sent are @sequences\n";
	my $seq_length = length($sequences[0]);
	my $seq_no = scalar(@sequences);
	my $allgaps = ();
	for (1 ... $seq_no){
		$allgaps = $allgaps."-";
	}
	
	my @seq_array = ();
	my $seq_counter = 0;
	foreach my $seq (@sequences){
#		my @sequence = split(/\s*/,$seq);
		$seq_array[$seq_counter] = [split(/\s*/,$seq)];
#		push @seq_array, [@sequence];
		$seq_counter++;
	}
	my $g = 0;
	while ( $g < $seq_length){
		last if (!exists $seq_array[0][$g]);
		my $bases = ();
		for my $u (0 ... $#seq_array){
			$bases = $bases.$seq_array[$u][$g];
		}	
#		print $bases, "\n";
		if ($bases eq $allgaps){
#			print "bases are $bases, position is $g \n";
			for my $seq (@seq_array){
				splice(@$seq , $g, 1);
			}
		}
		else {
			$g++;
		}
	}
	
	my @outs = ();
	
	foreach my $seq (@seq_array){
		push(@outs, join("",@$seq));
	}
	return "SHORT" if length($outs[0]) <=100;
	return (join("\n", @outs));	
}


sub allowedSetOfSpecies{
	my @allowed_species = split(/_/,$_[0]);
	unshift @allowed_species, 0;
#	print "allowed set = @allowed_species \n";
	my @output = ();
	for (0 ... scalar(@allowed_species) - 4){
		push(@output, join("_",@allowed_species));
		pop @allowed_species;
	}
	return join(";",reverse(@output));

}


sub orderInfo{
	my @info = split(/;/,$_[0]);
#	print "info = @info";
	my @old = split(/_/,$_[1]);
	my @new = split(/_/,$_[2]);
	shift @old; shift @new;
	my @outinfo = ();
	foreach my $spe (@new){
		for my $no (0 ... $#old){
			if ($spe eq $old[$no]){
				push(@outinfo, $info[$no]);
			}
		}
	}
#	print "outinfo = @outinfo \n"; 
	return join(";", @outinfo);
}

#xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx 

#xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx 
sub artificial_axdata_inverter{
	open(IN,"<$_[0]") or die "Cannot open file $_[0]: $!";
	open(OUT,">$_[1]") or die "Cannot open file $_[1]: $!";
	my $linecounter=0;
	while (my $line = <IN>){
		$linecounter++;
		#print "$linecounter\n";
		chomp $line;	
		my $final_line = $line;	
		my $trycounter = 0;
		if ($line =~ /^[a-zA-Z\-]/){
		#	while ($final_line eq $line){
				my @fields = split(/\s*/,$line);
				
				$final_line = join("",reverse(@fields));
		#		print colored ['red'], "$line\n$final_line\n" if $final_line eq $line && $line !~ /chr/ && $line =~ /[a-zA-Z]/;
		#		$trycounter++;
		#		print "trying again....$trycounter : $final_line\n" if $final_line eq $line;
		#	}
		}
		
	#	print colored ['yellow'], "$line\n$final_line\n" if $final_line eq $line && $line !~ /chr/ && $line =~ /[a-zA-Z]/;
		if ($line =~ /^[0-9]/){
			$line =~ s/chr([A-Z0-9a-b]+)/chr$1r/g;
			$final_line = $line;
		}
		print OUT $final_line,"\n";
		#print "$line\n$final_line\n" if $final_line eq $line && $line !~ /chr/ && $line =~ /[a-zA-Z]/;
	}
	close OUT;
}
#xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx 


#xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx 

sub multi_species_t1 {

	my $input1 = $_[0];
#	print "@_\n"; #<STDIN>;
	my @tags = split(/_/, $_[1]);
	my @outputs = split(/,/, $_[2]);
	my $title_query = $_[3];
	my @handles = ();
	
	open(FILEB,"<$input1")or die "Cannot open file: $input1 $!";
	my $i = 0;
	foreach my $path (@outputs){
		$handles[$i] = IO::Handle->new();
		open ($handles[$i], ">$path") or die "Can't open $path : $!";
		$i++;
	}
	
	my $curdef;
	my $start = 0;
	
	while (my $line = <FILEB> ) {
		if ($line =~ /^\d/){
			$line =~ s/ +/\t/g;
			my @fields = split(/\s+/, $line);
			if (($line =~ /$title_query/)){
				my $title = $line;
				my $counter = 0;
				foreach my $tag (@tags){
					$line = <FILEB>;
					print {$handles[$counter]} ">",$tag,"\t",$title, " ",$line;  	
					$counter++;
				}
			}
			else{
					foreach my $tag (@tags){
					my $tine = <FILEB>;
				}		
			}
		
		}
	}
	
	foreach my $hand (@handles){
		$hand->close(); 
	}
	
	close FILEB;
}

#xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx 

#xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx 

sub multi_species_t2{
	
	my $input = $_[0];
	my $species = $_[1];
	my $output1 = $input."_unr";
	
	#------------------------------------------------------------------------------------------
	open (FILEF1, "<$input") or die "Cannot open file $input :$!";
	open (FILEF2, ">$output1") or die "Cannot open file $output1 :$!";
	
	my $line1 = <FILEF1>;
	
	while($line1){
	{
	#    chomp($line);
		if ($line1 =~ (m/^\>$species/)){
		chomp($line1);
		print FILEF2 $line1;
		$line1 = <FILEF1>;
		chomp($line1);
		print FILEF2 "\t", $line1,"\n";
	   }   
	}
	$line1 = <FILEF1>;
	}
	
	close FILEF1;
	close FILEF2;
	#------------------------------------------------------------------------------------------
	
	my $output2 = $output1."and";
	my $output3 = $output1."and2";
	open(IN,"<$output1");
	open (FILEF3, ">$output2");
	open (FILEF4, ">$output3");
	
	
	while (<IN>){
		my $line = $_;
		chomp($line);
		my @fields=split (/\t/, $line);
	#   print $line,"\n";
		if($fields[5] ne "chrUn_random"){
			print FILEF3 join ("\t",@fields[0 ... scalar(@fields)-2]), "\n", $fields[scalar(@fields)-1], "\n";
			print FILEF4 join ("\t",@fields[0 ... scalar(@fields)-2]), "\t", $fields[scalar(@fields)-1], "\n";	
		}
	}
	
	
	close IN;
	close FILEF3;
	close FILEF4;
	unlink $output1;
	
	#------------------------------------------------------------------------------------------
	# OLD T3.PL RUDIMENT
	
	my $t3output = $output2;
	$t3output =~ s/gap_op_unrand/nogap_op_unrand/g;
	
	open(IN,"<$output2");
	open(OUTA,">$t3output");
	
	
	while (<IN>){
		s/-//g unless /^>/;
		print OUTA;
	}
	
	close IN;
	close OUTA;
	#------------------------------------------------------------------------------------------
}
#xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx 


#xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxxmulti_species_t2_2 xxxxxxx 
sub multi_species_t2_2{
	#print "IN multi_species_t2_2 : @_\n";
	my $input = $_[0];
	my $species = $_[1];
	my $output1 = $input."2";
	
	
	open (FILEF1, "<$input");
	open (FILEF2, ">$output1");
	
	my $line1 = <FILEF1>;
	
	while($line1){
	{
	#    chomp($line);
		if ($line1 =~ (m/^\>$species/)){
		chomp($line1);
		print FILEF2 $line1;
		$line1 = <FILEF1>;
		chomp($line1);
		print FILEF2 "\t", $line1,"\n";
	   }   
	}
	$line1 = <FILEF1>;
	}
	
	close FILEF1;
	close FILEF2;
}

#xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxx multi_species_t2_2 xxxxxxx 


#xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx 
sub sputnikoutput_corrector{
	my $input = $_[0];
	my $output = $_[1];
	open(IN,"<$input") or die "Cannot open file $input :$!";
	open(OUT,">$output") or die "Cannot open file $output :$!";
	my $tine;
	while (my $line=<IN>){
		if($line =~/length /){
			$tine = $line;
			$tine =~ s/\s+/\t/g;
			my @fields = split(/\t/,$tine);	
			if ($fields[6] > 60){
				print OUT $line;
				$line = <IN>;
			
				while (($line !~ /nucleotide/) && ($line !~ /^>/)){
					chomp $line;
					print OUT $line;
					$line = <IN>;
				}
				print OUT "\n";
				print OUT $line;
			}
			else{
				print OUT $line;
			}
		}
		else{
			print OUT $line;
		}
	}
	close IN;
	close OUT;
}
#xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx 


#xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx 
sub multi_species_t4{
#	print "multi_species_t4 : @_\n";
	my $input = $_[0];
	my $output = $_[1];
	open (FILEA, "<$input");
	open (FILEB, ">$output");
	
	my $line = <FILEA>;
	
	while ($line) {
	   # chomp $line;
		if ($line =~ />/) {
			chomp $line;
			print FILEB $line, "\n"; 
		}
	
		
		if ($line =~ /^m/ | $line =~ /^d/ | $line =~ /^t/ | $line =~ /^p/){
		chomp $line;
		print FILEB $line, " " ;
		$line = <FILEA>;
		chomp $line;
		print FILEB $line,"\n";
		}
	
		$line = <FILEA>;
	}
	
	
	close FILEA;
	close FILEB;

}

#xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx 


#xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx 
sub multi_species_t5{
	
	my $input = $_[0];
	my $output = $_[1];
	
	open(FILEB,"<$input");
	open(FILEC,">$output");
	
	my $curdef;
	
	while (my $line = <FILEB> ) {
	
		if ($line =~ /^>/){
		chomp $line;
		$curdef = $line;
		next;
	}
	
	if ($line =~ /^m/ | $line =~ /^d/ | $line =~ /^t/ | $line =~ /^p/){
		print  FILEC $curdef," ",$line;
	}
	
	}
	
	
	close FILEB;
	close FILEC;

}
#xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx 


#xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx 
sub multi_species_t6{
	my $input = $_[0];
	my $output = $_[1];	
	my $focalstrand=$_[3];
#	print "inpput = @_\n"; 
	open (FILE, "<$input");
	open (FILE_MICRO, ">$output");
	my $linecounter=0;	
	while (my $line = <FILE>){
		$linecounter++;
		chomp $line;
		#print "line = $line\n";
		#MONO#
		$line =~ /$focalspec\s[a-zA-Z]+[0-9a-zA-Z]+\s[0-9]+\s[0-9]+\s([+\-])/;
		my $strand=$1;
		my $no_of_species = ($line =~ s/\s+[+\-]\s+/ /g);
		#print "line = $line\n";
		my $specfieldsend = 2 + ($no_of_species*4) - 1;
		my @fields = split(/\s+/, $line);
		my @speciesdata = @fields[0 ... $specfieldsend];
		$line =~ /([a-z]+nucleotide)\s([0-9]+)\s:\s([0-9]+)/;
		my ($tide, $start, $end) = ($1, $2, $3);
		#print "no_of_species=$no_of_species.. speciesdata = @speciesdata and ($tide, $start, $end)\n";
		if($line  =~ /mononucleotide/){
			print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields],  mono($fields[$#fields]),),"\n";
		}
		#DI#	
		elsif($line =~ /dinucleotide/){
			print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields],  di($fields[$#fields]),),"\n";
		}
		#TRI#	
		elsif($line =~ /trinucleotide/ ){
			print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields],  tri($fields[$#fields]),),"\n";
		}
		#TETRA#
		elsif($line =~ /tetranucleotide/){
			print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields],  tetra($fields[$#fields]),),"\n";
		}
		#PENTA#
		elsif($line =~ /pentanucleotide/){
			#print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields],  penta($fields[$#fields]),),"\n";
		}
		else{
		#	print "not: @fields\n";
		}
	}
#	print "linecounter=$linecounter\n"; 
	close FILE;
	close FILE_MICRO;
}

sub mono {
	my $st = $_[0];
	my $tp = unpack "A1"x(length($st)/1),$st;
	my $var1 = substr($tp, 0, 1);
	return join ("\t", $var1);
}
sub di {
	my $st = $_[0];
	my $tp = unpack "A2"x(length($st)/2),$st;
	my $var1 = substr($tp, 0, 2);
	return join ("\t", $var1);
}
sub tri {
	my $st = $_[0];
	my $tp = unpack "A3"x(length($st)/3),$st;
	my $var1 = substr($tp, 0, 3);
	return join ("\t", $var1);
}
sub tetra {
	my $st = $_[0];
	my $tp = unpack "A4"x(length($st)/4),$st;
	my $var1 = substr($tp, 0, 4);
	return join ("\t", $var1);
}
sub penta {
	my $st = $_[0];
	my $tp = unpack "A5"x(length($st)/5),$st;
	my $var1 = substr($tp, 0, 5);
	return join ("\t", $var1);
}

#xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx 


#xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx 
sub t9{
	my $input1 = $_[0];
	my $input2 = $_[1];
	my $output = $_[2];
	
	
	open(IN1,"<$input1") if -e $input1; 
	open(IN2,"<$input2") or die "cannot open file $_[1] : $!";
	open(OUT,">$output") or die "cannot open file $_[2] : $!"; 
	
	
	my %seen = ();
	my $prevkey = 0;
	
	if (-e $input1){
		while (my $line = <IN1>){
			chomp($line);
			my @fields = split(/\t/,$line);
			my $key1 = join ("_",@fields[0,1,3,4,5]);
		#	print "key in t9 = $key1\n";
			$seen{$key1}++	if ($prevkey ne $key1) ;
			$prevkey = $key1;
		}
#		print "done first hash\n";
		close IN1;
	}
	
	while (my $line = <IN2>){
	#	print $line, "**\n";
		if (-e $input1){
			chomp($line);
			my @fields = split(/\t/,$line);
			my $key2 = join ("_",@fields[0,1,3,4,5]);
			if (exists $seen{$key2}){
				print OUT "$line\n"	;
				delete $seen{$key2};
			}
		}
		else {
			print OUT "$line\n"	;		
		}
	}
	
	close IN2;
	close OUT;
}
#xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx 


#xxxxxxxxxxxxxx multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx  multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx  multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx 


sub multiSpecies_compound_microsat_hunter3{	
	
	my $input1 = $_[0];  ###### the *_sput_op4_ii file
	my $input2 = $_[1];  ###### looks like this: my $t8humanoutput = $pipedir.$ptag."_nogap_op_unrand2"
	my $output1 = $_[2]; ###### plain microsatellite file
	my $output2 = $_[3]; ###### compound microsatellite file
	my $org = $_[4]; ###### 1 or 2
	$no_of_species = $_[5];
	#print "IN multiSpecies_compound_microsat_hunter3: @_\n"; 
	#my @tags = split(/\t/,$info);
	sub compoundify;
	open(IN,"<$input1") or die "Cannot open file $input1 $!";
	open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
	open(OUT,">$output1") or die "Cannot open file $output1 $!";
	open(OUT2,">$output2") or die "Cannot open file $output2 $!";
	$infocord = 2 + (4*$no_of_species) - 1;
	$startcord = 2 + (4*$no_of_species) + 2 - 1;
	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
	$endcord = 2 + (4*$no_of_species) + 4 - 1;
	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
	my $sequencepos = 2 + (5*$no_of_species) + 1 -1 ; 
	
	my @thresholds = ("0");
	push(@thresholds, split(/_/,$_[6]));
	sub thresholdCheck;
	my %micros = ();
	while (my $line = <IN>){
	#	print "$org\t(chr[0-9]+)\t([0-9]+)\t([0-9])+\t \n";
		next if $line =~ /\t\t/;
		if ($line =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([a-zA-Z0-9]+)\s([a-zA-Z]+[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
			my $key = join("\t",$1, $2, $3, $4, $5);
		#	print $key, "#-#-#-#-#-#-#-#\n";
			push (@{$micros{$key}},$line);		
		}
		else{
		}
	}
	close IN;
	my @deletedlines = ();
	
	my $linecount = 0;
	
	while(my $sine = <SEQ>){
		my %microstart=();
		my %microend=();
	
		my @sields = split(/\t/,$sine);
	
		my $key = ();
	
		if ($sine =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([a-zA-Z0-9]+)\s([a-zA-Z]+[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
			$key = join("\t",$1, $2, $3, $4, $5);
		#	print $key, "<-<-<-<-<-<-<-<\n";		
		}
		else{
		}
	
		if (exists $micros{$key}){
			$linecount++;	
			my @microstring = @{$micros{$key}};
			my @tempmicrostring = @{$micros{$key}};
			
			foreach my $line (@tempmicrostring){
				my @fields = split(/\t/,$line);
				my $start = $fields[$startcord];
				my $end = $fields[$endcord];
				push (@{$microstart{$start}},$line);
				push (@{$microend{$end}},$line);	
			}
			my $firstflag = 'down';
			while( my $line =shift(@microstring)){
	#			print "-----------\nline = $line ";
				chomp $line;
				my @fields = split(/\t/,$line);
				my $start = $fields[$startcord];
				my $end = $fields[$endcord];
				my $startmicro = $line;
				my $endmicro = $line;
	
			#	print "fields=@fields, start = $start end=$end, startcord=$startcord, endcord=$endcord\n";
	
				delete ($microstart{$start});
				delete ($microend{$end});
				my $flag = 'down';	
				my $startflag = 'down';
				my $endflag = 'down';
				my $prestart = $start - $distance;
				my $postend = $end + $distance;
				my @compoundlines = ();
				my %compoundhash = ();
				push (@compoundlines, $line);
				push (@{$compoundhash{$line}},$line);
				my $startrank = 1;
				my $endrank = 1;
				
				while( ($startflag eq "down") || ($endflag eq "down") ){
				if ((($prestart < 0) && $firstflag eq "up") || (($postend > length($sields[$sequencepos])) && $firstflag eq "up") ) {
#					print "coming to the end of sequence,prestart = $prestart &  post end = $postend and sequence length =", length($sields[$sequencepos])," so exiting\n";
					last;
				}
					
				$firstflag = "up";
				if ($startflag eq "down"){		
					for my $i ($prestart ... $start){
					
						if(exists $microend{$i}){	
							chomp $microend{$i}[0];
							if(exists $compoundhash{$microend{$i}[0]}) {next;}
	#						print "sending from microend $startmicro, $microend{$i}[0] |||\n";
							if (identityMatch_thresholdCheck($startmicro, $microend{$i}[0], $startrank) eq "proceed"){
								push(@compoundlines, $microend{$i}[0]);
	#							print "accepted\n";
								my @tields = split(/\t/,$microend{$i}[0]);
								$startmicro = $microend{$i}[0];
								chomp $startmicro;
								$start = $tields[$startcord];
								$flag = 'down';
								$startrank++;
	#							print "startcompund = $microend{$i}[0]\n";
								delete $microend{$i};
								delete $microstart{$start};
								$startflag = 'down';
								$prestart = $start - $distance;
								last;
							}	
							else{
								$flag = 'up';
								$startflag = 'up';							
							}
						}
						else{
							$flag = 'up';
							$startflag = 'up';
						}
					}
				}
					
				$endrank = $startrank;
				
				if ($endflag eq "down"){				
					for my $i ($end ... $postend){
					
						if(exists $microstart{$i} ){
							chomp $microstart{$i}[0];
							if(exists $compoundhash{$microstart{$i}[0]}) {next;}	
	#						print "sending from microstart $endmicro, $microstart{$i}[0] |||\n";
	
							if(identityMatch_thresholdCheck($endmicro,$microstart{$i}[0], $endrank) eq "proceed"){
								push(@compoundlines, $microstart{$i}[0]);
	#								print "accepted\n";
								my @tields = split(/\t/,$microstart{$i}[0]);
								$end = $tields[$endcord]-0;
								$endmicro = $microstart{$i}[0];
								$endrank++;
								chomp $endmicro;
								$flag = 'down';
	#							print "endcompund = $microstart{$i}[0]\n";
								delete $microstart{$i};
								delete $microend{$end};
								shift @microstring;
								$postend = $end + $distance;
								$endflag = 'down';
								last;								
							}
							else{
								$flag = 'up';
								$endflag = 'up';							
							}
						}
						else{
							$flag = 'up';
							$endflag = 'up';
						}
					}
				}
	#			print "for next turn, flag status: startflag = $startflag and endflag = $endflag \n";
			} 														#end while( $flag eq "down")
	#			print "compoundlines = @compoundlines \n";
			if (scalar (@compoundlines) == 1){
				print OUT $line,"\n";			
			}
			if (scalar (@compoundlines) > 1){
				my $compoundline = compoundify(\@compoundlines, $sields[$sequencepos]);
	#				print $compoundline,"\n";
				print OUT2 $compoundline,"\n";
			}
			} #end foreach my $line (@microstring){
		}	#if (exists $micros{$key}){
	
	
	}
	
	close OUT;
	close OUT2;
}


#------------------------------------------------------------------------------------------------
sub compoundify{
	my ($compoundlines, $sequence)  = @_;
#	print "\nfound to compound : @$compoundlines and$sequence \n";
	my $noOfComps = @$compoundlines;
#	print "Number of elements in hash is $noOfComps\n";
	my @starts;
	my @ends;
	foreach my $line (@$compoundlines){
#		print "compoundify.. line = $line \n";
		chomp $line;
		my @fields = split(/\t/,$line);
		my $start = $fields[$startcord];
		my $end = $fields[$endcord];
	#	print "start = $start, end = $end \n";
		push(@starts, $start);
		push(@ends,$end);		
	}
	my @temp = @$compoundlines;
	my $startline=$temp[0];
	my @mields  = split(/\t/,$startline);
	my $startcoord = $mields[$startcord];
	my $startgapsign=$mields[$endcord];
	my @startsorted = sort { $a <=> $b } @starts;
	my @endsorted = sort { $a <=> $b } @ends;
	my @intervals;
	for my $end (0 ... (scalar(@endsorted)-2)){
		my $interval = substr($sequence,($endsorted[$end]+1),(($startsorted[$end+1])-($endsorted[$end])-1));
		push(@intervals,$interval);
	#	print "interval = $interval =\n";
	#	print "substr(sequence,($endsorted[$end]+1),(($startsorted[$end+1])-($endsorted[$end])-1))\n";
	}
	push(@intervals,"");
	my $compoundmicrosat=();
	my $multiunit="";
	foreach my $line (@$compoundlines){
		my @fields = split(/\t/,$line);
		my $component="[".$fields[$microsatcord]."]".shift(@intervals);
		$compoundmicrosat=$compoundmicrosat.$component;
		$multiunit=$multiunit."[".$fields[$motifcord]."]";
#		print "multiunit = $multiunit\n";
	}
	my $compoundcopy = $compoundmicrosat;
	$compoundcopy =~ s/\[|\]//g;
	my $compoundlength = $mields[$startcord] + length($compoundcopy) - 1;
	
	
	my $compoundline = join("\t",(@mields[0 ... $infocord], "compound",@mields[$startcord ... $startcord+1],$compoundlength,$compoundmicrosat, $multiunit));
	return $compoundline;
}	

#------------------------------------------------------------------------------------------------

sub identityMatch_thresholdCheck{
	my $line1 = $_[0];
	my $line2 = $_[1];
	my $rank = $_[2];
	my @lields1 = split(/\t/,$line1);
	my @lields2 = split(/\t/,$line2);
#	print "recieved $line1 && $line2\n motif comparison: ", length($lields1[$motifcord])," : ",length($lields2[$motifcord]),"\n";
	
	if (length($lields1[$motifcord]) == length($lields2[$motifcord])){
		my $probe = $lields1[$motifcord].$lields1[$motifcord];
			#print "$probe :: $lields2[$motifcord]\n";
		return "proceed" if $probe =~ /$lields2[$motifcord]/;
			#print "line recieved\n";
		if ($rank ==1){
			return "proceed" if thresholdCheck($line1) eq "proceed" && thresholdCheck($line2) eq "proceed";
		}
		else {
			return "proceed" if thresholdCheck($line2) eq "proceed";
			return "stop";
		}
	}
	else{
		if ($rank ==1){
			return "proceed" if thresholdCheck($line1) eq "proceed" && thresholdCheck($line2) eq "proceed";
		}
		else {
			return "proceed" if thresholdCheck($line2) eq "proceed";
			return "stop";
		}
	}
	return "stop";
}
#------------------------------------------------------------------------------------------------

sub thresholdCheck{
	my @checkthresholds=(0,@thresholds);
	#print "IN thresholdCheck: @_\n";
	my $line = $_[0];
	my @lields = split(/\t/,$line);
	return "proceed" if length($lields[$microsatcord]) >= $checkthresholds[length($lields[$motifcord])];
	return "stop";
}
#xxxxxxxxxxxxxx multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx  multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx  multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx 


#xxxxxxxxxxxxxx multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx  multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx  multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx 

sub multiSpecies_filtering_interrupted_microsats{
#	print "IN multiSpecies_filtering_interrupted_microsats: @_\n";
	my $unfiltered = $_[0];
	my $filtered = $_[1];
	my $residue = $_[2];
	my $no_of_species = $_[5];
	open(UNF,"<$unfiltered") or die "Cannot open file $unfiltered: $!";
	open(FIL,">$filtered") or die "Cannot open file $filtered: $!";
	open(RES,">$residue") or die "Cannot open file $residue: $!";
		
	$infocord = 2 + (4*$no_of_species) - 1;
	$startcord = 2 + (4*$no_of_species) + 2 - 1;
	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
	$endcord = 2 + (4*$no_of_species) + 4 - 1;
	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
	
	
	my @sub_thresholds = (0);
	
	push(@sub_thresholds, split(/_/,$_[3]));
	my @thresholds = (0);
	
	push(@thresholds, split(/_/,$_[4]));
	
	while (my $line = <UNF>) {
		next if $line !~ /[a-z]/;
		#print $line;
		chomp $line;
		my @fields = split(/\t/,$line);
		my $motif = $fields[$motifcord];
		my $realmotif = $motif;
		#print "motif = $motif\n";
		if ($motif =~ /^\[/){
			$motif =~ s/^\[//g;
			my @motifs = split(/\]/,$motif);
			$realmotif = $motifs[0];
		}
#		print "realmotif = $realmotif";
		my $motif_size = length($realmotif);
		
		my $microsat = $fields[$microsatcord];
#		print "microsat = $microsat\n";
		$microsat =~ s/^\[|\]$//sg;
		my @microsats = split(/\][a-zA-Z|-]*\[/,$microsat);
		
		$microsat = join("",@microsats);
		if (length($microsat) < $thresholds[$motif_size]) {
		#	print length($microsat)," < ",$thresholds[$motif_size],"\n"; 
			print RES $line,"\n"; next;
		}
		my @lengths = ();
		foreach my $mic (@microsats){
			push(@lengths, length($mic));
		}
		if (largest_microsat(@lengths) < $sub_thresholds[$motif_size]) {
	#		print largest_microsat(@lengths)," < ",$sub_thresholds[$motif_size],"\n"; 
			print RES $line,"\n"; next;}
			else {print FIL $line,"\n"; next;
		}
	}
	close FIL;
	close RES;

}

sub largest_microsat{
	my $counter = 0;
	my($max) = shift(@_);
    foreach my $temp (@_) {
    	#print "finding largest array: $maxcounter \n";
    	if($temp > $max){
        	$max = $temp;
        }
    }
    return($max);
}

#xxxxxxxxxxxxxx multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx  multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx  multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx 


#xxxxxxxxxxxxxx multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx  multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx  multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx 
sub multiSpecies_compound_microsat_analyzer{
	####### PARAMETER ########
	##########################
	
	my $input1 = $_[0];  ###### the *_sput_op4_ii file
	my $input2 = $_[1];  ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
	my $output1 = $_[2]; ###### interrupted microsatellite file, in new .interrupted format
	my $output2 = $_[3]; ###### the pure compound microsatellites
	my $org = $_[4];
	my $no_of_species = $_[5];
#	print "IN multiSpecies_compound_microsat_analyzer: $input1\n $input2\n $output1\n $output2\n $org\n $no_of_species\n";
	$infocord = 2 + (4*$no_of_species) - 1;
	$typecord = 2 + (4*$no_of_species) + 1 - 1;
	$startcord = 2 + (4*$no_of_species) + 2 - 1;
	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
	$endcord = 2 + (4*$no_of_species) + 4 - 1;
	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
	
	open(IN,"<$input1") or die "Cannot open file $input1 $!";
	open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
	
	open(OUT,">$output1") or die "Cannot open file $output1 $!";
	open(OUT2,">$output2") or die "Cannot open file $output2 $!";
	
	
#	print "opened files \n";
	my %micros = ();
	my $keycounter=0;
	my $linecounter=0;
	while (my $line = <IN>){
		$linecounter++;
		if ($line =~ /([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
			my $key = join("\t",$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
			push (@{$micros{$key}},$line);		
			$keycounter++;
		}
		else{
	#		print "no key\n";
		}
	}
	close IN;
	my @deletedlines = ();
#	print "done hash . linecounter=$linecounter, keycounter=$keycounter\n";
	#---------------------------------------------------------------------------------------------------
	# NOW READING THE SEQUENCE FILE
	my $keyfound=0;
	my $keyexists=0;
	my $inter=0;
	my $pure=0;
	
	while(my $sine = <SEQ>){
		my %microstart=();
		my %microend=();
		my @sields = split(/\t/,$sine);
		my $key = ();
		if ($sine =~ /([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
			$key = join("\t",$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
			$keyfound++;
		}
		else{
		
		}
		if (exists $micros{$key}){
			$keyexists++;
			my @microstring = @{$micros{$key}};
	
			my @filteredmicrostring;
			
			foreach my $line (@microstring){
				chomp $line;
				my $copy_line = $line;
				my @fields = split(/\t/,$line);
				my $start = $fields[$startcord];
				my $end = $fields[$endcord];				
				# FOR COMPOUND MICROSATELLITES	
				if ($fields[$typecord] eq "compound"){					
					$line = compound_microsat_analyser($line);	
					if ($line eq "NULL") {
						print OUT2 "$copy_line\n";
						$pure++;
						next;
					}
					else{
						print OUT "$line\n";
						$inter++;
						next;
					}
				}
			}
			
		}	#if (exists $micros{$key}){
	}
	close OUT;
	close OUT2;
#	print "keyfound=$keyfound, keyexists=$keyexists, pure=$pure, inter=$inter\n";
}
	
sub compound_microsat_analyser{
	my $line = $_[0];
	my @fields = split(/\t/,$line);
	my $motifline = $fields[$motifcord];
	my $microsat = $fields[$microsatcord];
	$motifline =~ s/^\[|\]$//g;
	$microsat =~ s/^\[|\]$//g;
	$microsat =~ s/-//g;
	my @interruptions = ();
	my @motields = split(/\]\[/,$motifline);
	my @microields = split(/\][a-zA-Z|-]*\[/,$microsat);
	my @inields = split(/[.*]/,$microsat);
	shift @inields;
	my @motifcount = scalar(@motields);
	my $prevmotif = $motields[0];
	my $prevmicro = $microields[0];
	my $prevphase = substr($microields[0],-(length($motields[0])),length($motields[0]));
	my $localflag = 'down';
	my @infoarray = ();
	
	for my $l (1 ... (scalar(@motields)-1)){
		my $probe = $prevmotif.$prevmotif;
		if (length $prevmotif != length $motields[$l]) {$localflag = "up"; last;}
		
		if ($probe =~ /$motields[$l]/i){ 
			my $curr_endphase = substr($microields[$l],-length($motields[$l]),length($motields[$l]));
			my $curr_startphase = substr($microields[$l],0,length($motields[$l]));
			if ($curr_startphase =~ /$prevphase/i) {
				$infoarray[$l-1] = "insertion";
			}
			else {
				$infoarray[$l-1] = "indel/substitution";
			}
		
			$prevmotif = $motields[$l]; $prevmicro = $microields[$l]; $prevphase = $curr_endphase;
			next;
		}
		else {$localflag = "up"; last;}
	}
	if ($localflag eq 'up') {return "NULL";}
	
	if (length($prevmotif) == 1) {$fields[$typecord] = "mononucleotide";}
	if (length($prevmotif) == 2) {$fields[$typecord] = "dinucleotide";}
	if (length($prevmotif) == 3) {$fields[$typecord] = "trinucleotide";}
	if (length($prevmotif) == 4) {$fields[$typecord] = "tetranucleotide";}
	if (length($prevmotif) == 5) {$fields[$typecord] = "pentanucleotide";}

	@microields = split(/[\[|\]]/,$microsat);
	my @microsats = ();
	my @positions = ();
	my $lengthtracker = 0;

	for my $i (0 ... (scalar(@microields ) - 1)){
		if ($i%2 == 0){
			push(@microsats,$microields[$i]);
			$lengthtracker = $lengthtracker + length($microields[$i]);

		}
		else{			
			push(@interruptions,$microields[$i]);
			push(@positions, $lengthtracker+1);
			$lengthtracker = $lengthtracker + length($microields[$i]);	
		}				
		
	}
	my $returnline = join("\t",(join("\t",@fields),join(",",(@infoarray)),join(",",(@interruptions)),join(",",(@positions)),scalar(@interruptions)));
	return($returnline);
}

#xxxxxxxxxxxxxx multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx  multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx  multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx 


#xxxxxxxxxxxxxx multiSpecies_compoundClarifyer xxxxxxxxxxxxxx  multiSpecies_compoundClarifyer xxxxxxxxxxxxxx  multiSpecies_compoundClarifyer xxxxxxxxxxxxxx 

sub multiSpecies_compoundClarifyer{	
#	print "IN multiSpecies_compoundClarifyer: @_\n";
	my $input1 = $_[0];  ###### the *_sput_compound
	my $input2 = $_[1];  ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
	my $output1 = $_[2]; ###### interrupted microsatellite file, in new .interrupted format
	my $output2 = $_[3]; ###### compound file
	my $org = $_[4];
	my $no_of_species = $_[5];
	@thresholds = "0";
	push(@thresholds, split(/_/,$_[6]));
	
	
	$infocord = 2 + (4*$no_of_species) - 1;
	$typecord = 2 + (4*$no_of_species) + 1 - 1;
	$startcord = 2 + (4*$no_of_species) + 2 - 1;
	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
	$endcord = 2 + (4*$no_of_species) + 4 - 1;
	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
	$sequencepos = 2 + (5*$no_of_species) + 1 -1 ; 
	
	$interr_poscord = $motifcord + 3;
	$no_of_interruptionscord = $motifcord + 4;
	$interrcord = $motifcord + 2;
	$interrtypecord = $motifcord + 1;
	
	
	open(IN,"<$input1") or die "Cannot open file $input1 $!";
	open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
	
	open(INT,">$output1") or die "Cannot open file $output2 $!";
	open(COMP,">$output2") or die "Cannot open file $output2 $!";
	#open(CH,">changed") or die "Cannot open file changed $!";
		
#	print "opened files \n";
	my $linecounter = 0;
	my $microcounter = 0;
	
	my %micros = ();
	while (my $line = <IN>){
	#	print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
		$linecounter++;
		if ($line =~ /([a-zA-Z0-9]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([a-zA-Z0-9]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([a-zA-Z0-9]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
			my $key = join("\t",$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
	#		print $key, "#-#-#-#-#-#-#-#\n";
			push (@{$micros{$key}},$line);	
			$microcounter++;
		}
		else {print $line;}
	}
#	print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
	close IN;
	my @deletedlines = ();
#	print "done hash \n";
	$linecounter = 0;
	#---------------------------------------------------------------------------------------------------
	# NOW READING THE SEQUENCE FILE
	my @microsat_types = qw(_ mononucleotide dinucleotide trinucleotide tetranucleotide);
	 $printer = 0;
	
	while(my $sine = <SEQ>){
		my %microstart=();
		my %microend=();
		my @sields = split(/\t/,$sine);
		my $key = ();
		if ($sine =~ /([a-z0-9A-Z]+)\s+(chr[0-9a-zA-Z]+)\s+([0-9]+)\s+([0-9]+)\s+[\+|\-]\s+([a-z0-9A-Z]+)\s+(chr[0-9a-zA-Z]+)\s+([0-9]+)\s+([0-9]+)\s+[\+|\-]\s+([a-z0-9A-Z]+)\s+(chr[0-9a-zA-Z]+)\s+([0-9]+)\s+([0-9]+)\s/ ) {
			$key = join("\t",$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
		}
		else{
#			print "no key in $sine\nfor pattern ([a-z0-9A-Z]+) (chr[0-9a-zA-Z]+) ([0-9]+) ([0-9]+) [\+|\-] (a-z0-9A-Z) (chr[0-9a-zA-Z]+) ([0-9]+) ([0-9]+) [\+|\-] (a-z0-9A-Z) (chr[0-9a-zA-Z]+) ([0-9]+) ([0-9]+)   / \n"; 
		}
	
		if (exists $micros{$key}){
			my @microstring = @{$micros{$key}};
			delete $micros{$key};
			
			foreach my $line (@microstring){
#				print "#---------#---------#---------#---------#---------#---------#---------#---------\n" if $printer == 1; 
#				print "microsat = $line" if $printer == 1; 
				$linecounter++;
				my $copy_line = $line;
				my @mields = split(/\t/,$line);
				my @fields = @mields;
				my $start = $fields[$startcord];
				my $end = $fields[$endcord];
				my $microsat = $fields[$microsatcord];
				my $motifline = $fields[$motifcord];
				my $microsatcopy = $microsat;
				my $positioner = $microsat;
				$positioner =~ s/[a-zA-Z|-]/_/g;
				$microsatcopy =~ s/^\[|\]$//gs;
				chomp $microsatcopy;
				my @microields = split(/\][a-zA-Z|-]*\[/,$microsatcopy);
				my @inields = split(/\[[a-zA-Z|-]*\]/,$microsat);
				my $absolutstart = 1; my $absolutend = $absolutstart + ($end-$start);
#				print "absolut: start = $absolutstart, end = $absolutend\n" if $printer == 1;
				shift @inields;
				#print "inields =@inields<\n";
				$motifline =~ s/^\[|\]$//gs;
				chomp $motifline;
				#print "microsat = $microsat, its copy = $microsatcopy motifline = $motifline<\n";
				my @motields = split(/\]\[/,$motifline);
				my $seq = $microsatcopy;
				$seq =~ s/\[|\]//g;
				my $seqlen = length($seq);
				$seq = " ".$seq;
	
				my $longestmotif_no = longest_array_element(@motields);
				my $shortestmotif_no = shortest_array_element(@motields);
				#print "shortest motif = $motields[$shortestmotif_no], longest motif = $motields[$longestmotif_no] \n";
			
				my $search = $motields[$longestmotif_no].$motields[$longestmotif_no];
				if ((length($motields[$longestmotif_no]) == length($motields[$shortestmotif_no])) && ($search !~ /$motields[$shortestmotif_no]/) ){
					print COMP $line;
					next;
				}
			
				my @shortestmotif_nos = ();
				for my $m (0 ... $#motields){
					push(@shortestmotif_nos, $m) if (length($motields[$m]) == length($motields[$shortestmotif_no]) );
				}
				## LOOKING AT LEFT OF THE SHORTEST MOTIF------------------------------------------------
				my $newleft =();
				my $leftstopper = 0; my $rightstopper = 0;
				foreach my $shortmotif_no (@shortestmotif_nos){
					next if $shortmotif_no == 0;
					my $last_left =  $shortmotif_no; #$#motields;
					my $last_hitter = 0;
					for (my $i =($shortmotif_no-1); $i>=0; $i--){	
						my $search  = $motields[$shortmotif_no];
						if (length($motields[$shortmotif_no]) == 1){ $search = $motields[$shortmotif_no].$motields[$shortmotif_no] ;}
						if( (length($motields[$i]) > length($motields[$shortmotif_no])) && length($microields[$i]) > (2.5 * length($motields[$i])) ){
							$last_hitter = 1;
							$last_left = $i+1; last;				
						}
						my $probe = $motields[$i];
						if (length($motields[$shortmotif_no]) == length($motields[$i])) {$probe = $motields[$i].$motields[$i];}
	
						if ($probe !~ /$search/){
							$last_hitter = 1;
							$last_left = $i+1; 
	#						print "hit the last match: before $microields[$i]..last left = $last_left.. exiting.\n";
							last;
						}
						$last_left--;$last_hitter = 1;
	#					print "passed tests, last left = $last_left\n";
					}
	#				print "comparing whether $last_left < $shortmotif_no, lasthit = $last_hitter\n";
					if (($last_left) < $shortmotif_no && $last_hitter == 1) {$leftstopper=0; last;}
					else {$leftstopper = 1;
	#					print "leftstopper = 1\n";
					}
				}
				
				## LOOKING AT LEFT OF THE SHORTEST MOTIF------------------------------------------------
				my $newright =();
				foreach my $shortmotif_no (@shortestmotif_nos){
					next if $shortmotif_no == $#motields;
					my $last_right =  $shortmotif_no;# -1;
					for my $i ($shortmotif_no+1 ... $#motields){
						my $search  = $motields[$shortmotif_no];
						if (length($motields[$shortmotif_no]) == 1 ){ $search = $motields[$shortmotif_no].$motields[$shortmotif_no] ;}
						if ( (length($motields[$i]) > length($motields[$shortmotif_no])) && length($microields[$i]) > (2.5 * length($motields[$i])) ){
							$last_right = $i-1; last;			
						}
						my $probe = $motields[$i];
						if (length($motields[$shortmotif_no]) == length($motields[$i])) {$probe = $motields[$i].$motields[$i];}
						if (  $probe !~ /$search/){
							$last_right = $i-1; last;
						}
						$last_right++; 
					}
					if (($last_right) > $shortmotif_no) {$rightstopper=0; last;# print "rightstopper = 0\n";
					}
					else {$rightstopper = 1;
					}
				}
				
				
				if ($rightstopper == 1 && $leftstopper == 1){
					print COMP $line; 
#					print "rightstopper == 1 && leftstopper == 1\n" if $printer == 1; 
					next;
				}
	
#				print "pased initial testing phase \n" if $printer == 1; 
				my @outputs = ();
				my @orig_starts = ();
				my @orig_ends = ();
				for my $mic (0 ... $#microields){
					my $miclen = length($microields[$mic]);
					my $microleftlen = 0;
					#print "\nmic = $mic\n";
					if($mic > 0){
						for my $submin (0 ... $mic-1){
							my $interval = ();
							if (!exists $inields[$submin]) {$interval = "";}
							else {$interval = $inields[$submin];}
							#print "inield =$interval< and microield =$microields[$submin]<\n  ";
							$microleftlen = $microleftlen + length($microields[$submin]) + length($interval);
						}
					}
					push(@orig_starts,($microleftlen+1));
					push(@orig_ends, ($microleftlen+1 + $miclen -1));
				}				
	
	#############  F I N A L L Y   S T U D Y I N G   S E Q U E N C E S  #########@@@@#########@@@@#########@@@@#########@@@@#########@@@@
	
	
				for my $mic (0 ... $#microields){
					my $miclen = length($microields[$mic]);
					my $microleftlen = 0;
					if($mic > 0){
						for my $submin (0 ... $mic-1){
						#	if(!exists $inields[$submin]) {$inields[$submin] = "";}
							my $interval = ();
							if (!exists $inields[$submin]) {$interval = "";}
							else {$interval = $inields[$submin];}
							#print "inield =$interval< and microield =$microields[$submin]<\n  ";
							$microleftlen = $microleftlen + length($microields[$submin]) + length($interval);
						}
					}
					$fields[$startcord] = $microleftlen+1;
					$fields[$endcord] = $fields[$startcord] + $miclen -1;
					$fields[$typecord] = $microsat_types[length($motields[$mic])];
					$fields[$microsatcord] = $microields[$mic];
					$fields[$motifcord] = $motields[$mic];		
					my $templine = join("\t", (@fields[0 .. $motifcord]) );
					my $orig_templine = join("\t", (@fields[0 .. $motifcord]) );
					my $newline;
					my $lefter = 1; my $righter = 1;
					if ( $fields[$startcord] < 2){$lefter = 0;}
					if ($fields[$endcord] == $seqlen){$righter = 0;}
	
					while($lefter == 1){
						$newline = left_extender($templine, $seq,$org);
#						print "returned line from left extender= $newline \n" if $printer == 1; 
						if ($newline eq $templine){$templine = $newline; last;}
						else {$templine = $newline;}
						
						if (left_extention_permission_giver($templine) eq "no") {last;}
					}
					while($righter == 1){
						$newline = right_extender($templine, $seq,$org);
#						print "returned line from right extender= $newline \n" if $printer == 1; 
						if ($newline eq $templine){$templine = $newline; last;}
						else {$templine = $newline;}
						if (right_extention_permission_giver($templine) eq "no") {last;}
					}
					my @tempfields = split(/\t/,$templine);
					$tempfields[$microsatcord] =~ s/\]|\[//g;
					$tempfields[$motifcord] =~ s/^\[|\]$//gs;
					my @tempmotields = split(/\]\[/,$tempfields[$motifcord]);			
					
					if (scalar(@tempmotields) == 1 && $templine eq $orig_templine) { 
#						print "scalar ( tempmotields) = 1\n" if $printer == 1; 
						next;
					} 
					my $prevmotif = shift(@tempmotields);
					my $stopper = 0;
					
					foreach my $tempmot (@tempmotields){
						if (length($tempmot) != length($prevmotif)) {$stopper = 1; last;}
						my $search = $prevmotif.$prevmotif;
						if ($search !~ /$tempmot/) {$stopper = 1; last;}
						$prevmotif = $tempmot;
					}
					if ( $stopper == 1) { 
#						print "length tempmot  != length prevmotif\n" if $printer == 1; 
						next; 
					} 
					my $lastend  = 0;
					#----------------------------------------------------------
					my $left_captured = (); my $right_captured = ();
					my $left_bp = (); my $right_bp = ();
	#				print "new startcord = $tempfields[$startcord] , new endcord  = $tempfields[$endcord].. orig strts = @orig_starts and orig ends = @orig_ends\n";
					for my $o (0 ... $#orig_starts){
#						print "we are talking abut tempstart:$tempfields[$startcord] >= origstart:$lastend && tempstart:$tempfields[$startcord] <= origend: $orig_ends[$o] \n" if $printer == 1; 
#						print "we are talking abut tempend:$tempfields[$endcord] >= origstart:$lastend && tempstart:$tempfields[$endcord] >= origend: $orig_ends[$o] \n" if $printer == 1; 
	
						if (($tempfields[$startcord] > $lastend)  && ($tempfields[$startcord] <= $orig_ends[$o])){ # && ($tempfields[$startcord] != $fields[$startcord])
#							print "motif captured on left is $microields[$o] from $microsat\n" if $printer == 1; 
							$left_captured  = $o;
							$left_bp =  $orig_ends[$o] - $tempfields[$startcord] + 1;
						}
						elsif ($tempfields[$endcord] > $lastend  && $tempfields[$endcord] <= $orig_ends[$o]){ #&& $tempfields[$endcord] != $fields[$endcord])
#							print "motif captured on right is $microields[$o] from $microsat\n" if $printer == 1; 
							$right_captured  = $o;
							$right_bp = $tempfields[$endcord]  - $orig_starts[$o] + 1;
						}
						$lastend = $orig_ends[$o]
					}
#					print "leftcaptured = $left_captured, right = $right_captured\n" if $printer==1;
					my $leftmotif = (); my $left_trashed = ();
					if ($tempfields[$startcord] != $fields[$startcord]) {
						$leftmotif = $motields[$left_captured]; 
#						print "$left_captured in @microields: $motields[$left_captured]\n" if $printer == 1; 
						if ( $left_captured !~ /[0-9]+/) {print $line,"\n", $templine,"\n"; }
						 $left_trashed = length($microields[$left_captured]) - $left_bp;
					}
					my $rightmotif = (); my $right_trashed = ();
					if ($tempfields[$endcord] != $fields[$endcord]) {
#						print "$right_captured in @microields: $motields[$right_captured]\n" if $printer == 1; 
						$rightmotif = $motields[$right_captured];
						$right_trashed = length($microields[$right_captured]) - $right_bp;
					} 
					
					########## P A R A M S #####################@@@@#########@@@@#########@@@@#########@@@@#########@@@@#########@@@@#########@@@@
					$stopper = 0;
					my $deletioner = 0;
					#if($tempfields[$startcord] != $fields[$startcord]){
#						print "enter left: tempfields,startcord  : $tempfields[$startcord] != $absolutstart && left_captured: $left_captured != 0 \n" if $printer==1;
						if ($left_captured != 0){ 
#							print "at line 370, going: 0 ... $left_captured-1 \n" if $printer == 1;
							for my $e (0 ... $left_captured-1){
								if( length($motields[$e]) > 2 && length($microields[$e]) > (3* length($motields[$e]) )){
#									print "motif on left not included too big to be ignored : $microields[$e] \n" if $printer == 1;  
									$deletioner++; last;
								}
								if( length($motields[$e]) == 2 && length($microields[$e]) > (3* length($motields[$e]) )){
#									print "motif on left not included too big to be ignored : $microields[$e] \n" if $printer == 1;  
									$deletioner++; last;
								}
								if( length($motields[$e]) == 1 && length($microields[$e]) > (4* length($motields[$e]) )){
#									print "motif on left not included too big to be ignored : $microields[$e] \n" if $printer == 1; 
									$deletioner++; last;
								}
							}				
						}
					#}
#					print "after left search, deletioner = $deletioner\n" if $printer == 1;
					if ($deletioner >= 1) {  
#						print "deletioner = $deletioner\n" if $printer == 1;
						next; 
					} 
					
					$deletioner = 0;
	
					#if($tempfields[$endcord] != $fields[$endcord]){				
#						print "if tempfields endcord: $tempfields[$endcord] != absolutend: $absolutend\n and $right_captured != $#microields\n" if $printer==1;
						if ($right_captured != $#microields){ 
#							print "at line 394, going: $right_captured+1 ... $#microields \n" if $printer == 1;
							for my $e ($right_captured+1 ... $#microields){
								if( length($motields[$e]) > 2 &&  length($microields[$e]) > (3* length($motields[$e])) ){
#									print "motif on right not included too big to be ignored : $microields[$e] \n" if $printer == 1; 
									$deletioner++; last;
								}
								if( length($motields[$e]) == 2 && length($microields[$e]) > (3* length($motields[$e]) )){
#									print "motif on right not included too big to be ignored : $microields[$e] \n" if $printer == 1;
									$deletioner++; last;
								}
								if( length($motields[$e]) == 1 && length($microields[$e]) > (4* length($motields[$e]) )){
#									print "motif on right not included too big to be ignored : $microields[$e] \n" if $printer == 1;
									$deletioner++; last;
								}
							}				
						}
					#}
#					print "deletioner = $deletioner\n" if $printer == 1;
					if ($deletioner >= 1) {  
						next; 
					} 
					my $leftMotifs_notCaptured = ();
					my $rightMotifs_notCaptured = ();
									
					if ($tempfields[$startcord] != $fields[$startcord] ){
						#print "in left params: (length($leftmotif) == 1 && $tempfields[$startcord] != $fields[$startcord]) ... and .... $left_trashed > (1.5* length($leftmotif]) && ($tempfields[$startcord] != $fields[$startcord])\n";
						if (length($leftmotif) == 1 && $left_trashed > 3){
#							print "invaded left motif is long mononucleotide" if $printer == 1;
							 next;
	
						}
						elsif ((length($leftmotif) != 1 && $left_trashed > ( thrashallow($leftmotif)) && ($tempfields[$startcord] != $fields[$startcord]) ) ){
#							print "invaded left motif too long" if $printer == 1; 						
							 next; 
						}
					}
					if ($tempfields[$endcord] != $fields[$endcord] ){
						#print "in right params: after $tempfields[$endcord] != $fields[$endcord]  .....   (length($rightmotif)==1 && $tempfields[$endcord] != $fields[$endcord]) ... and ... $right_trashed > (1.5* length($rightmotif))\n";
						if (length($rightmotif)==1 && $right_trashed){
#							print "invaded right motif is long mononucleotide" if $printer == 1; 
							 next; 
	
						}
						elsif (length($rightmotif) !=1 && ($right_trashed > ( thrashallow($rightmotif))  && $tempfields[$endcord] != $fields[$endcord])){
#							print "invaded right motif too long" if $printer == 1;
							 next; 
	
						}
					}
					push @outputs, $templine;
				}
				if (scalar(@outputs) == 0){ print COMP $line; next;}
	#			print "outputs are:", join("\n",@outputs),"\n";
				if (scalar(@outputs) == 1){ 
					my @oields = split(/\t/,$outputs[0]);
					my $start = $oields[$startcord]+$mields[$startcord]-1;
					my $end = $start+($oields[$endcord]-$oields[$startcord]);
					$oields[$startcord] = $start; $oields[$endcord] = $end;
					print INT join("\t",@oields), "\n"; 
				#	print CH $line,;
				}
				if (scalar(@outputs) > 1){ 
					my $motif_min = 10;
					my $chosen_one = $outputs[0];
					foreach my $micro (@outputs){
						my @oields = split(/\t/,$micro);
						my $tempmotif = $oields[$motifcord];
						$tempmotif =~ s/^\[|\]$//gs;
						my @omots = split(/\]\[/, $tempmotif);
			#			print "motif_min  = $motif_min, current motif  = $tempmotif\n";
						my $start = $oields[$startcord]+$mields[$startcord]-1;
						my $end = $start+($oields[$endcord]-$oields[$startcord]);
						$oields[$startcord] = $start; $oields[$endcord] = $end;
						if(length($omots[0]) < $motif_min) {
							$chosen_one = join("\t",@oields); 
							$motif_min = length($omots[0]);
						}
					}
					print INT $chosen_one, "\n";
				#	print "chosen one is ".$chosen_one, "\n";
				#	print CH $line;
				
				
				}
				
			}
			
		}	#if (exists $micros{$key}){
		else{
		}
	}
	close INT;
	close COMP;
}
sub left_extender{
	#print "left extender\n";
	my ($line, $seq, $org) = @_;	
#	print "in left extender... line passed = $line and sequence is $seq\n";
	chomp $line;
	my @fields = split(/\t/,$line);
	my $rstart = $fields[$startcord];
	my $microsat = $fields[$microsatcord];
	$microsat =~ s/\[|\]//g;
	my $rend = $rstart + length($microsat)-1;
	$microsat =~ s/-//g;
	my $motif = $fields[$motifcord];
	my $firstmotif = ();

	if ($motif =~ /^\[/){
		$motif =~ s/^\[//g;
		$motif =~ /([a-zA-Z]+)\].*/;
		$firstmotif = $1;
	}
	else {$firstmotif = $motif;}
	
	#print "hacked microsat = $microsat, motif = $motif, firstmotif = $firstmotif\n";
	my $leftphase = substr($microsat, 0,length($firstmotif));
	my $phaser = $leftphase.$leftphase;
	my @phase = split(/\s*/,$leftphase);
	my @phases;
	my @copy_phases = @phases;
	my $crawler=0;
	for (0 ... (length($leftphase)-1)){
		push(@phases, substr($phaser, $crawler, length($leftphase)));
		$crawler++;
	}

	my $start = $rstart;
	my $end = $rend;
	
	my $leftseq = substr($seq, 0, $start);
#	print "left phases are @phases , start = $start left sequence = ",substr($leftseq, -10),"\n";	
	my @extentions = ();
	my @trappeds = ();
	my @intervalposs = ();
	my @trappedposs = ();
	my @trappedphases = ();
	my @intervals = ();
	my $firstmotif_length = length($firstmotif);
	foreach my $phase (@phases){
#		print "left phase\t",substr($leftseq, -10),"\t$phase\n";
#		print "search patter = (($phase)+([a-zA-Z|-]{0,$firstmotif_length})) \n";
		if ($leftseq =~ /(($phase)+([a-zA-Z|-]{0,$firstmotif_length}))$/i){
#			print "in left pattern\n";
			my $trapped = $1;
			my $trappedpos = length($leftseq)-length($trapped);
			my $interval = $3;
			my $intervalpos = index($trapped, $interval) + 1;
#			print "left trapped = $trapped, interval = $interval, intervalpos = $intervalpos\n";

			my $extention = substr($trapped, 0, length($trapped)-length($interval));
			my $leftpeep = substr($seq, 0, ($start-length($trapped)));
			my @passed_overhangs;
			
			for my $i (1 ... length($phase)-1){
				my $overhang = substr($phase, -length($phase)+$i);
#				print "current overhang = $overhang, leftpeep = ",substr($leftpeep,-10)," whole sequence = ",substr($seq, ($end - ($end-$start) - 20), (($end-$start)+20)),"\n";
				#TEMPORARY... BETTER METHOD NEEDED
				$leftpeep =~ s/-//g;
				if ($leftpeep =~ /$overhang$/i){
					push(@passed_overhangs,$overhang);
#					print "l overhang\n";
				}
			}
			
			if(scalar(@passed_overhangs)>0){
				my $overhang = $passed_overhangs[longest_array_element(@passed_overhangs)];
				$extention = $overhang.$extention;
				$trapped = $overhang.$trapped;
				#print "trapped extended to $trapped \n";
				$trappedpos = length($leftseq)-length($trapped);
			}
			
			push(@extentions,$extention);
#			print "extentions = @extentions \n";

			push(@trappeds,$trapped );
			push(@intervalposs,length($extention)+1);
			push(@trappedposs, $trappedpos);
#			print "trappeds = @trappeds\n";
			push(@trappedphases, substr($extention,0,length($phase)));
			push(@intervals, $interval);
		}
	}
	if (scalar(@trappeds == 0)) {return $line;}
	
	my $nikaal = shortest_array_element(@intervals);
	
	if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
	$fields[$motifcord] = "[".$trappedphases[$nikaal]."]".$fields[$motifcord];
	##print "new fields 9 = $fields[9]\n";
	$fields[$startcord] = $fields[$startcord]-length($trappeds[$nikaal]);

	if($fields[$microsatcord] !~ /^\[/i){
		$fields[$microsatcord] = "[".$fields[$microsatcord]."]";
	}
	
	$fields[$microsatcord] = "[".$extentions[$nikaal]."]".$intervals[$nikaal].$fields[$microsatcord];
	
	if (exists ($fields[$motifcord+1])){
		$fields[$motifcord+1] = "indel/deletion,".$fields[$motifcord+1];
	}
	else{$fields[$motifcord+1] = "indel/deletion";}
	##print "new fields 14 = $fields[14]\n";
	
	if (exists ($fields[$motifcord+2])){		
		$fields[$motifcord+2] = $intervals[$nikaal].",".$fields[$motifcord+2];
	}
	else{$fields[$motifcord+2] =  $intervals[$nikaal];}	
	my @seventeen=();	
	if (exists ($fields[$motifcord+3])){			
		@seventeen = split(/,/,$fields[$motifcord+3]);	
	#	#print "scalarseventeen =@seventeen<-\n";
		for (0 ... scalar(@seventeen)-1) {$seventeen[$_] = $seventeen[$_]+length($trappeds[$nikaal]);}
		$fields[$motifcord+3] = ($intervalposs[$nikaal]).",".join(",",@seventeen);
		$fields[$motifcord+4] = $fields[$motifcord+4]+1;
	}
	
	else {$fields[$motifcord+3] = $intervalposs[$nikaal]; $fields[$motifcord+4]=1}
	
	##print "new fields 16 = $fields[16]\n";
	##print "new fields 17 = $fields[17]\n";
	
	
	my $returnline =  join("\t",@fields);
	my $pastline  = $returnline;
	if ($fields[$microsatcord] =~ /\[/){
		$returnline = multiSpecies_compoundClarifyer_merge($returnline);
	}
	return $returnline;
}
sub right_extender{
	my ($line, $seq, $org) = @_;	
	chomp $line;
	my @fields = split(/\t/,$line);
	my $rstart = $fields[$startcord];
	my $microsat = $fields[$microsatcord];
	$microsat =~ s/\[|\]//g;
	my $rend = $rstart + length($microsat)-1;
	$microsat =~ s/-//g;
	my $motif = $fields[$motifcord];
	my $temp_lastmotif = ();

	if ($motif =~ /\]$/s){
		$motif =~ s/\]$//sg;
		$motif =~ /.*\[([a-zA-Z]+)/;
		$temp_lastmotif = $1;
	}
	else {$temp_lastmotif = $motif;}
	my $lastmotif = substr($microsat,-length($temp_lastmotif));
	##print "hacked microsat = $microsat, motif = $motif, lastmotif = $lastmotif\n";
	my $rightphase = substr($microsat, -length($lastmotif));
	my $phaser = $rightphase.$rightphase;
	my @phase = split(/\s*/,$rightphase);
	my @phases;
	my @copy_phases = @phases;
	my $crawler=0;
	for (0 ... (length($rightphase)-1)){
		push(@phases, substr($phaser, $crawler, length($rightphase)));
		$crawler++;
	}

	my $start = $rstart;
	my $end = $rend;
	
	my $rightseq = substr($seq, $end+1);
	my @extentions = ();
	my @trappeds = ();
	my @intervalposs = ();
	my @trappedposs = ();
	my @trappedphases = ();
	my @intervals = ();
	my $lastmotif_length = length($lastmotif);
	foreach my $phase (@phases){
		if ($rightseq =~ /^(([a-zA-Z|-]{0,$lastmotif_length}?)($phase)+)/i){
			my $trapped = $1;
			my $trappedpos = $end+1;
			my $interval = $2;
			my $intervalpos = index($trapped, $interval) + 1;

			my $extention = substr($trapped, length($interval));
			my $rightpeep = substr($seq, ($end+length($trapped))+1);
			my @passed_overhangs = "";
			
			#TEMPORARY... BETTER METHOD NEEDED
			$rightpeep =~ s/-//g;

			for my $i (1 ... length($phase)-1){
				my $overhang = substr($phase,0, $i);
#				#print "current extention = $extention, overhang = $overhang, rightpeep = ",substr($rightpeep,0,10),"\n";
				if ($rightpeep =~ /^$overhang/i){
					push(@passed_overhangs, $overhang);
#					#print "r overhang\n";
				}
			}
			if (scalar(@passed_overhangs) > 0){			
				my $overhang = @passed_overhangs[longest_array_element(@passed_overhangs)];
				$extention = $extention.$overhang;
				$trapped = $trapped.$overhang;
#				#print "trapped extended to $trapped \n";
			}
		
			push(@extentions,$extention);
			##print "extentions = @extentions \n";

			push(@trappeds,$trapped );
			push(@intervalposs,$intervalpos);
			push(@trappedposs, $trappedpos);
#			#print "trappeds = @trappeds\n";
			push(@trappedphases, substr($extention,0,length($phase)));
			push(@intervals, $interval);
		}
	}
	if (scalar(@trappeds == 0)) {return $line;}
	
#	my $nikaal = longest_array_element(@trappeds);
	my $nikaal = shortest_array_element(@intervals);
	
#	#print "longest element found = $nikaal \n";
	
	if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
	$fields[$motifcord] = $fields[$motifcord]."[".$trappedphases[$nikaal]."]";
	##print "new fields 9 = $fields[9]";
	$fields[$endcord] = $fields[$endcord] + length($trappeds[$nikaal]);

	##print "new fields 11 = $fields[11]\n";

	if($fields[$microsatcord] !~ /^\[/i){
		$fields[$microsatcord] = "[".$fields[$microsatcord]."]";
	}
	
	$fields[$microsatcord] = $fields[$microsatcord].$intervals[$nikaal]."[".$extentions[$nikaal]."]";
	##print "new fields 12 = $fields[12]\n";
	
	##print "scalar of fields = ",scalar(@fields),"\n";
	if (exists ($fields[$motifcord+1])){
#		print " print fields = @fields.. scalar=", scalar(@fields),".. motifcord+1 = $motifcord + 1 \n " if !exists $fields[$motifcord+1];
#		<STDIN> if !exists $fields[$motifcord+1];
		$fields[$motifcord+1] = $fields[$motifcord+1].",indel/deletion";
	}
	else{$fields[$motifcord+1] = "indel/deletion";}
	##print "new fields 14 = $fields[14]\n";
	
	if (exists ($fields[$motifcord+2])){
		$fields[$motifcord+2] = $fields[$motifcord+2].",".$intervals[$nikaal];
	}
	else{$fields[$motifcord+2] =  $intervals[$nikaal];}	
	##print "new fields 15 = $fields[15]\n";

	my @seventeen=();
	if (exists ($fields[$motifcord+3])){
		##print "at 608 we are doing this:length($microsat)+$intervalposs[$nikaal]\n";
#		print " print fields = @fields\n " if !exists $fields[$motifcord+3];
		<STDIN> if !exists $fields[$motifcord+3];
		my $currpos = length($microsat)+$intervalposs[$nikaal];
		$fields[$motifcord+3] = $fields[$motifcord+3].",".$currpos;
		$fields[$motifcord+4] = $fields[$motifcord+4]+1;

	}
	
	else {$fields[$motifcord+3] = length($microsat)+$intervalposs[$nikaal]; $fields[$motifcord+4]=1}
	
	##print "new fields 16 = $fields[16]\n";
	
	##print "new fields 17 = $fields[17]\n";
	my $returnline = join("\t",@fields);
	my $pastline  = $returnline;
	if ($fields[$microsatcord] =~ /\[/){
		$returnline = multiSpecies_compoundClarifyer_merge($returnline);
	}
	#print "finally right-extended line = ",$returnline,"\n";
	return $returnline;
}
sub longest_array_element{
	my $counter = 0;
	my($max) = shift(@_);
	my $maxcounter = 0;
    foreach my $temp (@_) {
    	$counter++;
    	#print "finding largest array: $maxcounter \n" if $prinkter == 1;
    	if(length($temp) > length($max)){
        	$max = $temp;
        	$maxcounter = $counter;
        }
    }
    return($maxcounter);
}
sub shortest_array_element{
	my $counter = 0;
	my($min) = shift(@_);
	my $mincounter = 0;
    foreach my $temp (@_) {
    	$counter++;
    	#print "finding largest array: $mincounter \n" if $prinkter == 1;
    	if(length($temp) < length($min)){
        	$min = $temp;
        	$mincounter = $counter;
        }
    }
    return($mincounter);
}


sub left_extention_permission_giver{
	my @fields = split(/\t/,$_[0]);
	my $microsat = $fields[$microsatcord];
	$microsat =~ s/(^\[)|-//g;
	my $motif = $fields[$motifcord];
	my $firstmotif = ();
	my $firststretch = ();
	my @stretches=();
	if ($motif =~ /^\[/){
		$motif =~ s/^\[//g;
		$motif =~ /([a-zA-Z]+)\].*/;
		$firstmotif = $1;
		@stretches = split(/\]/,$microsat);
		$firststretch = $stretches[0];
		##print "firststretch = $firststretch\n";
	}
	else {$firstmotif = $motif;$firststretch = $microsat;}
	
	if (length($firststretch) < $thresholds[length($firstmotif)]){
		return "no";
	}
	else {return "yes";}

}
sub right_extention_permission_giver{
	my @fields = split(/\t/,$_[0]);
	my $microsat = $fields[$microsatcord];
	$microsat =~ s/-|(\]$)//sg;
	my $motif = $fields[$motifcord];
	my $temp_lastmotif = ();
	my $laststretch = ();
	my @stretches=();


	if ($motif =~ /\]/){
		$motif =~ s/\]$//gs;
		$motif =~ /.*\[([a-zA-Z]+)$/;
		$temp_lastmotif = $1;
		@stretches = split(/\[/,$microsat);
		$laststretch = pop(@stretches);
		##print "last stretch = $laststretch\n";
	}
	else {$temp_lastmotif = $motif; $laststretch = $microsat;}

	if (length($laststretch) < $thresholds[length($temp_lastmotif)]){
		return "no";
	}
	else { return "yes";}


}
sub multiSpecies_compoundClarifyer_merge{
	my $line = $_[0];
	#print "sent for mering: $line \n";
	my @mields = split(/\t/,$line);
	my @fields = @mields;
	my $microsat = $fields[$microsatcord];
	my $motifline = $fields[$motifcord];
	my $microsatcopy = $microsat;
	$microsatcopy =~ s/^\[|\]$//sg;
	my @microields = split(/\][a-zA-Z|-]*\[/,$microsatcopy);
	my @inields = split(/\[[a-zA-Z|-]*\]/,$microsat);
	shift @inields;
	#print "inields =@inields<\n";
	$motifline =~ s/^\[|\]$//sg;
	my @motields = split(/\]\[/,$motifline);
	my @firstmotifs = ();
	my @lastmotifs = ();
	for my $i  (0 ... $#microields){
		$firstmotifs[$i] =  substr($microields[$i],0,length($motields[$i]));
		$lastmotifs[$i] = substr($microields[$i],-length($motields[$i]));
	}
	#print "firstmotif = @firstmotifs... lastmotif = @lastmotifs\n";
	my @mergelist = ();
	my @inter_poses = split(/,/,$fields[$interr_poscord]);
	my $no_of_interruptions = $fields[$no_of_interruptionscord];
	my @interruptions = split(/,/,$fields[$interrcord]);
	my @interrtypes = split(/,/,$fields[$interrtypecord]);
	my $stopper = 0;
	for my $i (0 ... $#motields-1){
		#print "studying connection of $motields[$i] and $motields[$i+1], i = $i in $microsat\n";
		if (($lastmotifs[$i] eq $firstmotifs[$i+1]) && !exists $inields[$i]){
			$stopper = 1;
			push(@mergelist, ($i)."_".($i+1));
		}
	}
	
	return $line if scalar(@mergelist) == 0;
	
	foreach my $merging (@mergelist){
		my @sets = split(/_/, $merging);
		my @tempmicro = ();
		my @tempmot = ();
		for my $i (0 ... $sets[0]-1){
			push(@tempmicro, "[".$microields[$i]."]");
			push(@tempmicro, $inields[$i]);
			push(@tempmot, "[".$motields[$i]."]");
			#print "adding pre-motifs number $i\n";
		}
		my $pusher = "[".$microields[$sets[0]].$microields[$sets[1]]."]";
		push (@tempmicro, $pusher);
		push(@tempmot, "[".$motields[$sets[0]]."]");
		my $outcoming = -2;
		for my $i ($sets[1]+1 ... $#microields-1){
			push(@tempmicro, "[".$microields[$i]."]");
			push(@tempmicro, $inields[$i]);	
			push(@tempmot, "[".$motields[$i]."]");
			#print "adding post-motifs number $i\n";
			$outcoming  = $i;
		}
		if ($outcoming != -2){
			#print "outcoming = $outcoming \n";
			push(@tempmicro, "[".$microields[$outcoming+1 ]."]");
			push(@tempmot,"[". $motields[$outcoming+1]."]");
		}
		$fields[$microsatcord] = join("",@tempmicro);
		$fields[$motifcord] = join("",@tempmot);
		
		splice(@interrtypes, $sets[0], 1);
		$fields[$interrtypecord] = join(",",@interrtypes);
		splice(@interruptions, $sets[0], 1);
		$fields[$interrcord] = join(",",@interruptions);
		splice(@inter_poses, $sets[0], 1);
		$fields[$interr_poscord] = join(",",@inter_poses);
		$no_of_interruptions = $no_of_interruptions - 1;
	}	

	if ($no_of_interruptions == 0){
		$fields[$microsatcord] =~ s/^\[|\]$//sg;
		$fields[$motifcord] =~ s/^\[|\]$//sg;
		$line = join("\t", @fields[0 ... $motifcord]);		
	}
	else{
		$line = join("\t", @fields);
	}
	return $line;
}

sub thrashallow{
	my $motif = $_[0];
	return 4 if length($motif) == 2;
	return 6 if length($motif) == 3;
	return 8 if length($motif) == 4;
	
}

#xxxxxxxxxxxxxx multiSpecies_compoundClarifyer xxxxxxxxxxxxxx  multiSpecies_compoundClarifyer xxxxxxxxxxxxxx  multiSpecies_compoundClarifyer xxxxxxxxxxxxxx 


#xxxxxxxxxxxxxx multispecies_filtering_compound_microsats xxxxxxxxxxxxxx  multispecies_filtering_compound_microsats xxxxxxxxxxxxxx  multispecies_filtering_compound_microsats xxxxxxxxxxxxxx 
sub multispecies_filtering_compound_microsats{
	my $unfiltered = $_[0];
	my $filtered = $_[1];
	my $residue = $_[2];
	my $no_of_species = $_[5];
	open(UNF,"<$unfiltered") or die "Cannot open file $unfiltered: $!";
	open(FIL,">$filtered") or die "Cannot open file $filtered: $!";
	open(RES,">$residue") or die "Cannot open file $residue: $!";
	
	$infocord = 2 + (4*$no_of_species) - 1;
	$startcord = 2 + (4*$no_of_species) + 2 - 1;
	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
	$endcord = 2 + (4*$no_of_species) + 4 - 1;
	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
	
	my @sub_thresholds = ("0");
	push(@sub_thresholds, split(/_/,$_[3]));
	my @thresholds = ("0");
	push(@thresholds, split(/_/,$_[4]));
	
	while (my $line = <UNF>) {
		if ($line !~ /compound/){
			print FIL $line,"\n"; next;
		}
		chomp $line;
		my @fields = split(/\t/,$line);
		my $motifline = $fields[$motifcord];
		$motifline =~ s/^\[|\]$//g;
		my @motifs = split(/\]\[/,$motifline);
		my $microsat = $fields[$microsatcord];
		$microsat =~ s/^\[|\]$|-//g;
		my @microsats = split(/\][a-zA-Z|-]*\[/,$microsat);		
		
		my $stopper = 0;
		for my $i (0 ... $#motifs){
			my @common = ();
			my $probe = $motifs[$i].$motifs[$i];
			my $motif_size = length($motifs[$i]);
		
			for my $j (0 ... $#motifs){	
				next if length($motifs[$i]) != length($motifs[$j]);
				push(@common, length($microsats[$j])) if $probe =~ /$motifs[$j]/i;
			}
			
			if (largest_microsat(@common) < $sub_thresholds[$motif_size]) {$stopper = 1; last;}
			else {next;}
		}
		
		if ($stopper  == 1){
			print RES $line,"\n";
		}
		else { print FIL $line,"\n"; }
	}
	close FIL;
	close RES;
}

#xxxxxxxxxxxxxx multispecies_filtering_compound_microsats xxxxxxxxxxxxxx  multispecies_filtering_compound_microsats xxxxxxxxxxxxxx  multispecies_filtering_compound_microsats xxxxxxxxxxxxxx 


#xxxxxxxxxxxxxx chromosome_unrand_breaker xxxxxxxxxxxxxx  chromosome_unrand_breaker xxxxxxxxxxxxxx  chromosome_unrand_breaker xxxxxxxxxxxxxx 

sub chromosome_unrand_breaker{
#	print "IN chromosome_unrand_breaker: @_\n ";
	my $input1 = $_[0];  ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
	my $dir = $_[1]; ###### directory where subsets are put
	my $output2 = $_[2]; ###### list of subset files
	my $increment = $_[3];
	my $info = $_[4];
	my $chr = $_[5];
	open(SEQ,"<$input1") or die "Cannot open file $input1 $!";
	
	open(OUT,">$output2") or die "Cannot open file $output2 $!";
	
	#---------------------------------------------------------------------------------------------------
	# NOW READING THE SEQUENCE FILE
	
	my $seed = 0;
	my $subset = $dir.$info."_".$chr."_".$seed."_".($seed+$increment);
	print OUT $subset,"\n";
	open(SUB,">$subset");
	
	while(my $sine = <SEQ>){
		$seed++;
		print SUB $sine;
		
		if ($seed%$increment == 0 ){
			close SUB;
			$subset = $dir.$info."_".$chr."_".$seed."_".($seed+$increment);
			open(SUB,">$subset");
			print SUB $sine;
			print OUT $subset,"\n";
	#		print $subset,"\n";
		}
	}
	close OUT;
	close SUB;
}
#xxxxxxxxxxxxxx chromosome_unrand_breaker xxxxxxxxxxxxxx  chromosome_unrand_breaker xxxxxxxxxxxxxx  chromosome_unrand_breaker xxxxxxxxxxxxxx 


#xxxxxxxxxxxxxx multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx  multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx  multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx 
sub multiSpecies_interruptedMicrosatHunter{
#	print "IN multiSpecies_interruptedMicrosatHunter: @_\n";
	my $input1 = $_[0];  ###### the *_sput_op4_ii file
	my $input2 = $_[1];  ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
	my $output1 = $_[2]; ###### interrupted microsatellite file, in new .interrupted format
	my $output2 = $_[3]; ###### uninterrupted microsatellite file
	my $org = $_[4];
	my $no_of_species = $_[5];
	
	my @thresholds = "0";
	push(@thresholds, split(/_/,$_[6]));
	
#	print "thresholds = @thresholds \n";
	$infocord = 2 + (4*$no_of_species) - 1;
	$typecord = 2 + (4*$no_of_species) + 1 - 1;
	$startcord = 2 + (4*$no_of_species) + 2 - 1;
	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
	$endcord = 2 + (4*$no_of_species) + 4 - 1;
	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
	$sequencepos = 2 + (5*$no_of_species) + 1 -1 ; 
	
	$interr_poscord = $motifcord + 3;
	$no_of_interruptionscord = $motifcord + 4;
	$interrcord = $motifcord + 2;
	$interrtypecord = $motifcord + 1;
	
	
	$prinkter = 0;
#	print "prionkytet = $prinkter\n";
	
	open(IN,"<$input1") or die "Cannot open file $input1 $!";
	open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
	
	open(INT,">$output1") or die "Cannot open file $output2 $!";
	open(UNINT,">$output2") or die "Cannot open file $output2 $!";
	
#	print "opened files !!\n";
	my $linecounter = 0;
	my $microcounter = 0;
	
	my %micros = ();
	while (my $line = <IN>){
	#	print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
		$linecounter++;
		if ($line =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s+(chr[0-9a-zA-Z]+)\s([0-9]+)\s+([0-9]+)\s/ ) {
			my $key = join("\t",$1, $2, $3, $4, $5);
		#	print $key, "#-#-#-#-#-#-#-#\n" if $prinkter == 1;
			push (@{$micros{$key}},$line);	
			$microcounter++;
		}
		else {#print $line if $prinkter == 1;
		}
	}
#	print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
	close IN;
	my @deletedlines = ();
#	print "done hash \n";
	$linecounter = 0;
	#---------------------------------------------------------------------------------------------------
	# NOW READING THE SEQUENCE FILE
	while(my $sine = <SEQ>){
		#print $linecounter,"\n" if $linecounter % 1000 == 0;
		my %microstart=();
		my %microend=();
		my @sields = split(/\t/,$sine);
		my $key = ();
		if ($sine =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
			$key = join("\t",$1, $2, $3, $4, $5);
	#		print $key, "<-<-<-<-<-<-<-<\n";		
		}
	
	#	$prinkter = 1 if $sine =~ /^>H\t499\t/;
	
		if (exists $micros{$key}){
			my @microstring = @{$micros{$key}};
			delete $micros{$key};
			my @filteredmicrostring;
#			print "sequence = $sields[$sequencepos]" if $prinkter == 1;
			foreach my $line (@microstring){
				$linecounter++;
				my $copy_line = $line;
				my @fields = split(/\t/,$line);
				my $start = $fields[$startcord];
				my $end = $fields[$endcord];
				
#				print $line if $prinkter == 1;
				#LOOKING FOR LEFTWARD EXTENTION OF MICROSATELLITE 
				my $newline;
				while(1){
				#	print "\n before left sequence = $sields[$sequencepos]\n" if $prinkter == 1;
					if (multiSpecies_interruptedMicrosatHunter_left_extention_permission_giver($line) eq "no") {last;}
	
					$newline = multiSpecies_interruptedMicrosatHunter_left_extender($line, $sields[$sequencepos],$org);
					if ($newline eq $line){$line = $newline; last;}
					else {$line = $newline;}
					
					if (multiSpecies_interruptedMicrosatHunter_left_extention_permission_giver($line) eq "no") {last;}
#					print "returned line from left extender= $line \n" if $prinkter == 1;
				}
				while(1){
				#	print "sequence = $sields[$sequencepos]\n" if $prinkter == 1;
					if (multiSpecies_interruptedMicrosatHunter_right_extention_permission_giver($line) eq "no") {last;}
					
					$newline = multiSpecies_interruptedMicrosatHunter_right_extender($line, $sields[$sequencepos],$org);
					if ($newline eq $line){$line = $newline; last;}
					else {$line = $newline;}
					
					if (multiSpecies_interruptedMicrosatHunter_right_extention_permission_giver($line) eq "no") {last;}
#					print "returned line from right extender= $line \n" if $prinkter == 1;
				}
#				print "\n>>>>>>>>>>>>>>>>\n In the end, the line is: \n$line\n<<<<<<<<<<<<<<<<\n" if $prinkter == 1;
	
				my @tempfields = split(/\t/,$line);
				if ($tempfields[$microsatcord] =~ /\[/){
					print INT $line,"\n";
				}
				else{
					print UNINT $line,"\n";
				}
				
				if ($line =~ /NULL/){ next; }
				push(@filteredmicrostring, $line);
				push (@{$microstart{$start}},$line);
				push (@{$microend{$end}},$line);
			}
			
			my $firstflag = 'down';
	
		}	#if (exists $micros{$key}){
	}
	close INT;
	close UNINT;
#	print "final number of lines = $linecounter\n";
}

sub multiSpecies_interruptedMicrosatHunter_left_extender{
	my ($line, $seq, $org) = @_;	
#	print "left extender, like passed = $line\n" if $prinkter == 1;
#	print "in left extender... line passed = $line and sequence is $seq\n" if $prinkter == 1;
	chomp $line;
	my @fields = split(/\t/,$line);
	my $rstart = $fields[$startcord];
	my $microsat = $fields[$microsatcord];
	$microsat =~ s/\[|\]//g;
	my $rend = $rstart + length($microsat)-1;
	$microsat =~ s/-//g;
	my $motif = $fields[$motifcord];
	my $firstmotif = ();

	if ($motif =~ /^\[/){
		$motif =~ s/^\[//g;
		$motif =~ /([a-zA-Z]+)\].*/;
		$firstmotif = $1;
	}
	else {$firstmotif = $motif;}
	
#	print "hacked microsat = $microsat, motif = $motif, firstmotif = $firstmotif\n" if $prinkter == 1;
	my $leftphase = substr($microsat, 0,length($firstmotif));
	my $phaser = $leftphase.$leftphase;
	my @phase = split(/\s*/,$leftphase);
	my @phases;
	my @copy_phases = @phases;
	my $crawler=0;
	for (0 ... (length($leftphase)-1)){
		push(@phases, substr($phaser, $crawler, length($leftphase)));
		$crawler++;
	}

	my $start = $rstart;
	my $end = $rend;
	
	my $leftseq = substr($seq, 0, $start);
#	print "left phases are @phases , start = $start left sequence = ",substr($leftseq, -10),"\n" if $prinkter == 1;	
	my @extentions = ();
	my @trappeds = ();
	my @intervalposs = ();
	my @trappedposs = ();
	my @trappedphases = ();
	my @intervals = ();
	my $firstmotif_length = length($firstmotif);
	foreach my $phase (@phases){
#		print "left phase\t",substr($leftseq, -10),"\t$phase\n" if $prinkter == 1;
#		print "search patter = (($phase)+([a-zA-Z|-]{0,$firstmotif_length})) \n" if $prinkter == 1;
		if ($leftseq =~ /(($phase)+([a-zA-Z|-]{0,$firstmotif_length}))$/i){
#			print "in left pattern\n" if $prinkter == 1;
			my $trapped = $1;
			my $trappedpos = length($leftseq)-length($trapped);
			my $interval = $3;
			my $intervalpos = index($trapped, $interval) + 1;
#			print "left trapped = $trapped, interval = $interval, intervalpos = $intervalpos\n" if $prinkter == 1;

			my $extention = substr($trapped, 0, length($trapped)-length($interval));
			my $leftpeep = substr($seq, 0, ($start-length($trapped)));
			my @passed_overhangs;
			
			for my $i (1 ... length($phase)-1){
				my $overhang = substr($phase, -length($phase)+$i);
#				print "current overhang = $overhang, leftpeep = ",substr($leftpeep,-10)," whole sequence = ",substr($seq, ($end - ($end-$start) - 20), (($end-$start)+20)),"\n" if $prinkter == 1;
				#TEMPORARY... BETTER METHOD NEEDED
				$leftpeep =~ s/-//g;
				if ($leftpeep =~ /$overhang$/i){
					push(@passed_overhangs,$overhang);
#					print "l overhang\n" if $prinkter == 1;
				}
			}
			
			if(scalar(@passed_overhangs)>0){
				my $overhang = $passed_overhangs[longest_array_element(@passed_overhangs)];
				$extention = $overhang.$extention;
				$trapped = $overhang.$trapped;
#				print "trapped extended to $trapped \n" if $prinkter == 1;
				$trappedpos = length($leftseq)-length($trapped);
			}
			
			push(@extentions,$extention);
#			print "extentions = @extentions \n" if $prinkter == 1;

			push(@trappeds,$trapped );
			push(@intervalposs,length($extention)+1);
			push(@trappedposs, $trappedpos);
#			print "trappeds = @trappeds\n" if $prinkter == 1;
			push(@trappedphases, substr($extention,0,length($phase)));
			push(@intervals, $interval);
		}
	}
	if (scalar(@trappeds == 0)) {return $line;}
	
############################	my $nikaal = longest_array_element(@trappeds);
	my $nikaal = shortest_array_element(@intervals);
	
#	print "longest element found = $nikaal \n" if $prinkter == 1;
	
	if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
	$fields[$motifcord] = "[".$trappedphases[$nikaal]."]".$fields[$motifcord];
	#print "new fields 9 = $fields[9]\n" if $prinkter == 1;
	$fields[$startcord] = $fields[$startcord]-length($trappeds[$nikaal]);

	#print "new fields 9 = $fields[9]\n" if $prinkter == 1;

	if($fields[$microsatcord] !~ /^\[/i){
		$fields[$microsatcord] = "[".$fields[$microsatcord]."]";
	}
	
	$fields[$microsatcord] = "[".$extentions[$nikaal]."]".$intervals[$nikaal].$fields[$microsatcord];
	#print "new fields 14 = $fields[12]\n" if $prinkter == 1;
	
	#print "scalar of fields = ",scalar(@fields),"\n" if $prinkter == 1;

	
	if (scalar(@fields) > $motifcord+1){
		$fields[$motifcord+1] = "indel/deletion,".$fields[$motifcord+1];
	}
	else{$fields[$motifcord+1] = "indel/deletion";}
	#print "new fields 14 = $fields[14]\n" if $prinkter == 1;
	
	if (scalar(@fields)>$motifcord+2){		
		$fields[$motifcord+2] = $intervals[$nikaal].",".$fields[$motifcord+2];
	}
	else{$fields[$motifcord+2] =  $intervals[$nikaal];}	
	#print "new fields 15 = $fields[15]\n" if $prinkter == 1;

	my @seventeen=();
	
	if (scalar(@fields)>$motifcord+3){			
		@seventeen = split(/,/,$fields[$motifcord+3]);	
	#	print "scalarseventeen =@seventeen<-\n" if $prinkter == 1;
		for (0 ... scalar(@seventeen)-1) {$seventeen[$_] = $seventeen[$_]+length($trappeds[$nikaal]);}
		$fields[$motifcord+3] = ($intervalposs[$nikaal]).",".join(",",@seventeen);
		$fields[$motifcord+4] = $fields[$motifcord+4]+1;
	}
	
	else {$fields[$motifcord+3] = $intervalposs[$nikaal]; $fields[$motifcord+4]=1}
	
	#print "new fields 16 = $fields[16]\n" if $prinkter == 1;
	#print "new fields 17 = $fields[17]\n" if $prinkter == 1;
	
#	return join("\t",@fields);	
	my $returnline = join("\t",@fields);
	my $pastline  = $returnline;
	if ($fields[$microsatcord] =~ /\[/){
		$returnline = multiSpecies_interruptedMicrosatHunter_merge($returnline);
	}
#	print "finally left-extended line = ",$returnline,"\n" if $prinkter == 1;
	return $returnline;
}

sub multiSpecies_interruptedMicrosatHunter_right_extender{
#	print "right extender\n" if $prinkter == 1;
	my ($line, $seq, $org) = @_;	
#	print "in right extender... line passed = $line\n" if $prinkter == 1;
#	print "line = $line, sequence = ",$seq, "\n" if $prinkter == 1;
	chomp $line;
	my @fields = split(/\t/,$line);
	my $rstart = $fields[$startcord];
	my $microsat = $fields[$microsatcord];
	$microsat =~ s/\[|\]//g;
	my $rend = $rstart + length($microsat)-1;
	$microsat =~ s/-//g;
	my $motif = $fields[$motifcord];
	my $temp_lastmotif = ();

	if ($motif =~ /\]$/){
		$motif =~ s/\]$//g;
		$motif =~ /.*\[([a-zA-Z]+)/;
		$temp_lastmotif = $1;
	}
	else {$temp_lastmotif = $motif;}
	my $lastmotif = substr($microsat,-length($temp_lastmotif));
#	print "hacked microsat = $microsat, motif = $motif, lastmotif = $lastmotif\n" if $prinkter == 1;
	my $rightphase = substr($microsat, -length($lastmotif));
	my $phaser = $rightphase.$rightphase;
	my @phase = split(/\s*/,$rightphase);
	my @phases;
	my @copy_phases = @phases;
	my $crawler=0;
	for (0 ... (length($rightphase)-1)){
		push(@phases, substr($phaser, $crawler, length($rightphase)));
		$crawler++;
	}

	my $start = $rstart;
	my $end = $rend;
	
	my $rightseq = substr($seq, $end+1);
#	print "length of sequence  = " ,length($seq), "the coordinate to start from = ", $end+1, "\n" if $prinkter == 1;
#	print "right phases are @phases , end = $end right sequence = ",substr($rightseq,0,10),"\n" if $prinkter == 1;	
	my @extentions = ();
	my @trappeds = ();
	my @intervalposs = ();
	my @trappedposs = ();
	my @trappedphases = ();
	my @intervals = ();
	my $lastmotif_length = length($lastmotif);
	foreach my $phase (@phases){
#		print "right phase\t$phase\t",substr($rightseq,0,10),"\n" if $prinkter == 1;
#		print "search patter = (([a-zA-Z|-]{0,$lastmotif_length})($phase)+) \n" if $prinkter == 1;
		if ($rightseq =~ /^(([a-zA-Z|-]{0,$lastmotif_length}?)($phase)+)/i){
#			print "in right pattern\n" if $prinkter == 1;
			my $trapped = $1;
			my $trappedpos = $end+1;
			my $interval = $2;
			my $intervalpos = index($trapped, $interval) + 1;
#			print "trapped = $trapped, interval = $interval\n" if $prinkter == 1;

			my $extention = substr($trapped, length($interval));
			my $rightpeep = substr($seq, ($end+length($trapped))+1);
			my @passed_overhangs = "";
			
			#TEMPORARY... BETTER METHOD NEEDED
			$rightpeep =~ s/-//g;

			for my $i (1 ... length($phase)-1){
				my $overhang = substr($phase,0, $i);
#				print "current extention = $extention, overhang = $overhang, rightpeep = ",substr($rightpeep,0,10),"\n" if $prinkter == 1;
				if ($rightpeep =~ /^$overhang/i){
					push(@passed_overhangs, $overhang);
#					print "r overhang\n" if $prinkter == 1;
				}
			}
			if (scalar(@passed_overhangs) > 0){			
				my $overhang = @passed_overhangs[longest_array_element(@passed_overhangs)];
				$extention = $extention.$overhang;
				$trapped = $trapped.$overhang;
#				print "trapped extended to $trapped \n" if $prinkter == 1;
			}
		
			push(@extentions,$extention);
			#print "extentions = @extentions \n" if $prinkter == 1;

			push(@trappeds,$trapped );
			push(@intervalposs,$intervalpos);
			push(@trappedposs, $trappedpos);
#			print "trappeds = @trappeds\n" if $prinkter == 1;
			push(@trappedphases, substr($extention,0,length($phase)));
			push(@intervals, $interval);
		}
	}
	if (scalar(@trappeds == 0)) {return $line;}
	
###################################	my $nikaal = longest_array_element(@trappeds);
	my $nikaal = shortest_array_element(@intervals);
	
#	print "longest element found = $nikaal \n" if $prinkter == 1;
	
	if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
	$fields[$motifcord] = $fields[$motifcord]."[".$trappedphases[$nikaal]."]";
	$fields[$endcord] = $fields[$endcord] + length($trappeds[$nikaal]);


	if($fields[$microsatcord] !~ /^\[/i){
		$fields[$microsatcord] = "[".$fields[$microsatcord]."]";
	}
	
	$fields[$microsatcord] = $fields[$microsatcord].$intervals[$nikaal]."[".$extentions[$nikaal]."]";
	
	
	if (scalar(@fields) > $motifcord+1){
		$fields[$motifcord+1] = $fields[$motifcord+1].",indel/deletion";
	}
	else{$fields[$motifcord+1] = "indel/deletion";}
	
	if (scalar(@fields)>$motifcord+2){		
		$fields[$motifcord+2] = $fields[$motifcord+2].",".$intervals[$nikaal];
	}
	else{$fields[$motifcord+2] =  $intervals[$nikaal];}	

	my @seventeen=();
	if (scalar(@fields)>$motifcord+3){
		#print "at 608 we are doing this:length($microsat)+$intervalposs[$nikaal]\n" if $prinkter == 1;
		my $currpos = length($microsat)+$intervalposs[$nikaal];
		$fields[$motifcord+3] = $fields[$motifcord+3].",".$currpos;
		$fields[$motifcord+4] = $fields[$motifcord+4]+1;

	}
	
	else {$fields[$motifcord+3] = length($microsat)+$intervalposs[$nikaal]; $fields[$motifcord+4]=1}
	
#	print "finally right-extended line = ",join("\t",@fields),"\n" if $prinkter == 1;
#	return join("\t",@fields);

	my $returnline = join("\t",@fields);
	my $pastline  = $returnline;
	if ($fields[$microsatcord] =~ /\[/){
		$returnline = multiSpecies_interruptedMicrosatHunter_merge($returnline);
	}
#	print "finally right-extended line = ",$returnline,"\n" if $prinkter == 1;
	return $returnline;

}

sub multiSpecies_interruptedMicrosatHunter_left_extention_permission_giver{
	my @fields = split(/\t/,$_[0]);
	my $microsat = $fields[$microsatcord];
	$microsat =~ s/(^\[)|-//sg;
	my $motif = $fields[$motifcord];
	chomp $motif;
#	print $motif, "\n" if $motif !~ /^\[/;
	my $firstmotif = ();
	my $firststretch = ();
	my @stretches=();
	
#	print "motif = $motif, microsat = $microsat\n" if $prinkter == 1;
	if ($motif =~ /^\[/){
		$motif =~ s/^\[//sg;
		$motif =~ /([a-zA-Z]+)\].*/;
		$firstmotif = $1;
		@stretches = split(/\]/,$microsat);
		$firststretch = $stretches[0];
		#print "firststretch = $firststretch\n" if $prinkter == 1;
	}
	else {$firstmotif = $motif;$firststretch = $microsat;}
#	print "if length:firststretch - length($firststretch) < threshes length :firstmotif ($firstmotif) - $thresholds[length($firstmotif)]\n" if $prinkter == 1; 
	if (length($firststretch) < $thresholds[length($firstmotif)]){
		return "no";
	}
	else {return "yes";}

}
sub multiSpecies_interruptedMicrosatHunter_right_extention_permission_giver{
	my @fields = split(/\t/,$_[0]);
	my $microsat = $fields[$microsatcord];
	$microsat =~ s/-|(\]$)//sg;
	my $motif = $fields[$motifcord];
	chomp $motif;
	my $temp_lastmotif = ();
	my $laststretch = ();
	my @stretches=();


	if ($motif =~ /\]/){
		$motif =~ s/\]$//sg;
		$motif =~ /.*\[([a-zA-Z]+)$/;
		$temp_lastmotif = $1;
		@stretches = split(/\[/,$microsat);
		$laststretch = pop(@stretches);
		#print "last stretch = $laststretch\n" if $prinkter == 1;
	}
	else {$temp_lastmotif = $motif; $laststretch = $microsat;}

	if (length($laststretch) < $thresholds[length($temp_lastmotif)]){
		return "no";
	}
	else { return "yes";}


}
sub checking_substitutions{
	
	my ($line, $seq, $startprobes, $endprobes) = @_;
	#print "sequence = $seq \n" if $prinkter == 1;
	#print "COMMAND  = \n $line, \n $seq, \n $startprobes \n, $endprobes\n";
		#		<STDIN>;
	my @seqarray = split(/\s*/,$seq);
	my @startsubst_probes = split(/\|/,$startprobes); 
	my @endsubst_probes = split(/\|/,$endprobes);
	chomp $line;
	my @fields = split(/\t/,$line);
	my $start = $fields[11] - $fields[10];
	my $end = $fields[13] - $fields[10];
	my $motif = $fields[9]; #IN FUTURE, USE THIS AS A PROBE, LIKE MOTIF = $FIELDS[9].$FIELDS[9]
	$motif =~ s/\[|\]//g;
	my $microsat = $fields[14];
	$microsat =~ s/\[|\]//g;
	#------------------------------------------------------------------------
	# GETTING START AND END PHASES
	my $startphase = substr($microsat,0, length($motif));
	my $endphase = substr($microsat,-length($motif), length($motif));
	#print "start and end phases are  - $startphase and $endphase\n";
	my $startflag = 'down';
	my $endflag = 'down';
	my $substitution_distance = length($motif);
	my $prestart = $start - $substitution_distance;
	my $postend = $end + $substitution_distance;
	my @endadds = ();
	my @startadds = ();
		if (($prestart < 0) || ($postend > scalar(@seqarray))) {
			last;
		}
	#------------------------------------------------------------------------#------------------------------------------------------------------------
	# CHECKING FOR SUBSTITUTION PROBES NOW			
		
	if ($fields[8] ne "mononucleotide"){				
		while ($startflag eq "down"){		
			my $search = join("",@seqarray[$prestart...($start-1)]);
			#print "search is from $prestart...($start-1) = $search\n";
			foreach my $probe (@startsubst_probes){
				#print "\t\tprobe = $probe\n";				
				if ($search =~ /^$probe/){
					#print "\tfound addition to the left - $search \n";
					my $copyprobe = $probe;
					my $type;
					my $subspos = 0;
					my $interruption = "";
					if ($search eq $startphase) { $type = "NONE";}
					else{
						$copyprobe =~ s/\[a-zA-Z\]/^/g;
						$subspos = index($copyprobe,"^") + 1;
						$type = "substitution";
						$interruption  = substr($search, $subspos,1);
					}
					my $addinfo = join("\t",$prestart, $start, $search, $type, $interruption, $subspos);
					#print "adding information: $addinfo \n";
					push(@startadds, $addinfo);
					$prestart = $prestart - $substitution_distance;
					$start = $start-$substitution_distance;
					$startflag = 'down';
					
					last;
				}
				else{
					$startflag = 'up';
				}
			}
		}
		#<STDIN>;		
		while ($endflag eq "down"){									
			my $search = join("",@seqarray[($end+1)...$postend]);
			#print "search is from ($end+1)...$postend] = $search\n";

			foreach my $probe (@endsubst_probes){
				#print "\t\tprobe = $probe\n";
				if ($search =~ /$probe$/){
					my $copyprobe = $probe;
					my $type;
					my $subspos = 0;
					my $interruption = "";
					if ($search eq $endphase) { $type = "NONE";}
					else{
						$copyprobe =~ s/\[a-zA-Z\]/^/g;
						$subspos = index($copyprobe,"^") + 1;
						$type = "substitution";
						$interruption  = substr($search, $subspos,1);
					}
					my $addinfo = join("\t",$end, $postend, $search, $type, $interruption, $subspos);
					#print "adding information: $addinfo \n";
					push(@endadds, $addinfo);
					$postend = $postend + $substitution_distance;
					$end = $end+$substitution_distance;
					push(@endadds, $search);
					$endflag = 'down';
					last;
				}
				else{
					$endflag = 'up';
				}
			}
		}
		#print "startadds = @startadds, endadds  = @endadds \n";
	
	}
}
sub microsat_packer{
	my $microsat = $_[0];
	my $addition = $_[1];



}
sub multiSpecies_interruptedMicrosatHunter_merge{
		$prinkter = 0;
#	print "~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~\n";
	my $line = $_[0];
#	print "sent for mering: $line \n" if $prinkter ==1;
	my @mields = split(/\t/,$line);
	my @fields = @mields;
	my $microsat = allCaps($fields[$microsatcord]);
	my $motifline = allCaps($fields[$motifcord]);
	my $microsatcopy = $microsat;
#	print "microsat = $microsat|\n" if $prinkter ==1;
	$microsatcopy =~ s/^\[|\]$//sg;
	chomp $microsatcopy;
	my @microields = split(/\][a-zA-Z|-]*\[/,$microsatcopy);
	my @inields = split(/\[[a-zA-Z|-]*\]/,$microsat);
	shift @inields;
#	print "inields =",join("|",@inields)," microields = ",join("|",@microields)," and count of microields = ", $#microields,"\n" if $prinkter ==1;
	$motifline =~ s/^\[|\]$//sg;
	my @motields = split(/\]\[/,$motifline);
	my @firstmotifs = ();
	my @lastmotifs = ();
	for my $i  (0 ... $#microields){
		$firstmotifs[$i] =  substr($microields[$i],0,length($motields[$i]));
		$lastmotifs[$i] = substr($microields[$i],-length($motields[$i]));
	}
#	print "firstmotif = @firstmotifs... lastmotif = @lastmotifs\n" if $prinkter ==1;
	my @mergelist = ();
	my @inter_poses = split(/,/,$fields[$interr_poscord]);
	my $no_of_interruptions = $fields[$no_of_interruptionscord];
	my @interruptions = split(/,/,$fields[$interrcord]);
	my @interrtypes = split(/,/,$fields[$interrtypecord]);
	my $stopper = 0;
	for my $i (0 ... $#motields-1){
#		print "studying connection of $motields[$i] and $motields[$i+1], i = $i in $microsat\n:$lastmotifs[$i] eq $firstmotifs[$i+1]?\n" if $prinkter ==1;
		if ((allCaps($lastmotifs[$i]) eq allCaps($firstmotifs[$i+1])) && (!exists $inields[$i] || $inields[$i] !~ /[a-zA-Z]/)){
			$stopper = 1;
			push(@mergelist, ($i)."_".($i+1)); #<STDIN> if $prinkter ==1;
		}
	}
	
#	print "mergelist = @mergelist\n" if $prinkter ==1;
	return $line if scalar(@mergelist) == 0;
#	print "merging @mergelist\n" if $prinkter ==1;
#	<STDIN> if $prinkter ==1;
	
	foreach my $merging (@mergelist){
		my @sets = split(/_/, $merging);
#		print "sets = @sets\n" if $prinkter ==1;
		my @tempmicro = ();
		my @tempmot = ();
#		print "for loop going from 0 ... ", $sets[0]-1, "\n" if $prinkter ==1;
		for my $i (0 ... $sets[0]-1){
#			print " adding pre- i = $i adding: microields= $microields[$i]. motields = $motields[$i], inields = |$inields[$i]|\n" if $prinkter ==1;
			push(@tempmicro, "[".$microields[$i]."]");
			push(@tempmicro, $inields[$i]);
			push(@tempmot, "[".$motields[$i]."]");
#			print "adding pre-motifs number $i\n" if $prinkter ==1;
#			print "tempmot = @tempmot, tempmicro = @tempmicro \n" if $prinkter ==1;
		}
#		print "tempmot = @tempmot, tempmicro = @tempmicro \n" if $prinkter ==1;
#		print "now pushing ", "[",$microields[$sets[0]]," and ",$microields[$sets[1]],"]\n" if $prinkter ==1;
		my $pusher = "[".$microields[$sets[0]].$microields[$sets[1]]."]";
#		print "middle is, from @motields -   @sets, number 0 which is  is\n";
#		print ": $motields[$sets[0]]\n";
		push (@tempmicro, $pusher);
		push(@tempmot, "[".$motields[$sets[0]]."]");
		push (@tempmicro, $inields[$sets[1]]) if $sets[1] != $#microields && exists $sets[1] && exists $inields[$sets[1]];
		my $outcoming = -2;
#		print "tempmot = @tempmot, tempmicro = @tempmicro \n" if $prinkter ==1;
#		print "for loop going from ",$sets[1]+1, " ... ", $#microields, "\n" if $prinkter ==1;
		for my $i ($sets[1]+1 ... $#microields){
#			print " adding post- i = $i adding: microields= $microields[$i]. motields = $motields[$i]\n" if $prinkter ==1;
			push(@tempmicro, "[".$microields[$i]."]") if exists $microields[$i];
			push(@tempmicro, $inields[$i]) unless $i == $#microields || !exists $inields[$i];
			push(@tempmot, "[".$motields[$i]."]");
#			print "adding post-motifs number $i\n" if $prinkter ==1;
			$outcoming  = $i;
		}
#		print "____________________________________________________________________________\n";
		$prinkter = 0;
		$fields[$microsatcord] = join("",@tempmicro);
		$fields[$motifcord] = join("",@tempmot);
#		print "tempmot = @tempmot, tempmicro = @tempmicro . microsat = $fields[$microsatcord] and motif = $fields[$motifcord] \n" if $prinkter ==1;
		
		splice(@interrtypes, $sets[0], 1);
		$fields[$interrtypecord] = join(",",@interrtypes);
		splice(@interruptions, $sets[0], 1);
		$fields[$interrcord] = join(",",@interruptions);
		splice(@inter_poses, $sets[0], 1);
		$fields[$interr_poscord] = join(",",@inter_poses);
		$no_of_interruptions = $no_of_interruptions - 1;
	}	

	if ($no_of_interruptions == 0 && $line !~ /compound/){
		$fields[$microsatcord] =~ s/^\[|\]$//sg;
		$fields[$motifcord] =~ s/^\[|\]$//sg;
		$line = join("\t", @fields[0 ... $motifcord]);		
	}
	else{
		$line = join("\t", @fields);
	}
#	print "post merging, the line is $line\n" if $prinkter ==1;
	#<STDIN> if $stopper ==1;
	return $line;
}
sub interval_asseser{
	my $pre_phase = $_[0]; my $post_phase = $_[1]; my $inter = $_[3];
}
#---------------------------------------------------------------------------------------------------
sub allCaps{
	my $motif = $_[0];
	$motif =~ s/a/A/g;
	$motif =~ s/c/C/g;
	$motif =~ s/t/T/g;
	$motif =~ s/g/G/g;
	return $motif;
}


#xxxxxxxxxxxxxx multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx  chromosome_unrand_breamultiSpecies_interruptedMicrosatHunterker xxxxxxxxxxxxxx  multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx 


#xxxxxxxxxxxxxx merge_interruptedMicrosats xxxxxxxxxxxxxx  merge_interruptedMicrosats xxxxxxxxxxxxxx  merge_interruptedMicrosats xxxxxxxxxxxxxx 
sub merge_interruptedMicrosats{
#	print "IN merge_interruptedMicrosats: @_\n";
	my $input0 = $_[0];  ######looks like this: my $t8humanoutput = $pipedir.$ptag."_nogap_op_unrand2"
	my $input1 = $_[1];  ###### the *_sput_op4_ii file
	my $input2 = $_[2];  ###### the *_sput_op4_ii file
	$no_of_species = $_[3];
	
	my $output1 = $_[1]."_separate";    #$_[3]; ###### plain microsatellite file forward
	my $output2 = $_[2]."_separate";    ##$_[4]; ###### plain microsatellite file reverse
	my $output3 = $_[1]."_merged";    ##$_[5]; ###### plain microsatellite file forward
	#my $output4 = $_[2]."_merged";    ##$_[6]; ###### plain microsatellite file reverse
	#my $info = $_[4];
	#my @tags = split(/\t/,$info);
	
	open(SEQ,"<$input0") or die "Cannot open file $input0 $!";
	open(INF,"<$input1") or die "Cannot open file $input1 $!";
	open(INR,"<$input2") or die "Cannot open file $input2 $!";
	open(OUTF,">$output1") or die "Cannot open file $output1 $!";
	open(OUTR,">$output2") or die "Cannot open file $output2 $!";
	open(MER,">$output3") or die "Cannot open file $output3 $!";
	#open(MERR,">$output4") or die "Cannot open file $output4 $!";
		
	
		
	 $printer = 0;
	
#	print "files opened \n";
	$infocord = 2 + (4*$no_of_species) - 1;
	$startcord = 2 + (4*$no_of_species) + 2 - 1;
	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
	$endcord = 2 + (4*$no_of_species) + 4 - 1;
	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
	$typecord = $infocord + 1;
	my $sequencepos = 2 + (5*$no_of_species) + 1 -1 ; 
	
	$interrtypecord = $motifcord + 1;
	$interrcord = $motifcord + 2;
	$interr_poscord = $motifcord + 3;
	$no_of_interruptionscord = $motifcord + 4;
	$mergestarts  = $no_of_interruptionscord+ 1;
	$mergeends = $no_of_interruptionscord+ 2;
	$mergemicros = $no_of_interruptionscord+ 3;
	
	# NOW ADDING FORWARD MICROSATELLITES TO HASH
	my %fmicros = ();
	my $microcounter=0;
	my $linecounter = 0;
	while (my $line = <INF>){
	#	print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
		$linecounter++;
		if ($line =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
			my $key = join("\t",$1, $2, $4, $5);
		#	print $key, "#-#-#-#-#-#-#-#\n";
			push (@{$fmicros{$key}},$line);	
			$microcounter++;
		}
		else {print $line;}
	}
#	print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
	close INF;
	my @deletedlines = ();
#	print "done forward hash \n";
	$linecounter = 0;
	#---------------------------------------------------------------------------------------------------
	# NOW ADDING REVERSE MICROSATELLITES TO HASH
	my %rmicros = ();
	$microcounter=0;
	while (my $line = <INR>){
	#	print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
		$linecounter++;
		if ($line =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
			my $key = join("\t",$1, $2, $4, $5);
	#		print $key, "#-#-#-#-#-#-#-#\n";
			push (@{$rmicros{$key}},$line);	
			$microcounter++;
		}
		else {print "cant make key\n";}
	}
#	print "number of reverse microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
	close INR;
#	print "done reverse hash \n";
	$linecounter = 0;
	
	#------------------------------------------------------------------------------------------------
	
	while(my $sine = <SEQ>){
		#<STDIN> if $sine =~ /16349128/;
		next if $sine !~ /[a-zA-Z0-9]/;
#		print "-" x 150, "\n"  if $printer == 1;
		my @sields = split(/\t/,$sine);
		my @merged = ();
	
		my $key = ();
	
		if ($sine =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
			$key = join("\t",$1, $2, $4, $5);
		#	print $key, "<-<-<-<-<-<-<-<\n";		
		}
	#	print "key = $key\n";
		
		my @sets1;
		my @sets2;
		chomp $sields[$sequencepos];
		my $rev_sequence = reverse($sields[$sequencepos]); 
		$rev_sequence =~ s/ //g;
		$rev_sequence = " ".$rev_sequence;
		next if (!exists $fmicros{$key} && !exists $rmicros{$key});
		
		if (exists $fmicros{$key}){
		#	print "line no : $linecount\n";
			my @raw_microstring = @{$fmicros{$key}};
			my %starts = (); my %ends = ();
#			print colored ['yellow'],"unsorted, unfiltered microats = \n" if $printer == 1; foreach (@raw_microstring) {print colored ['blue'],$_,"\n" if $printer == 1;}
			my @microstring=();
			for my $u (0 ... $#raw_microstring){
				my @tields = split(/\t/,$raw_microstring[$u]);
				next if exists $starts{$tields[$startcord]} && exists $ends{$tields[$endcord]};
				push(@microstring, $raw_microstring[$u]);
				$starts{$tields[$startcord]} = $tields[$startcord];
				$ends{$tields[$endcord]} = $tields[$endcord];
			}
			
	#		print "founf microstring in forward\n: @microstring\n"; 
			chomp @microstring;
			my $clusterresult = (find_clusters(@microstring, $sields[$sequencepos])); 	
			@sets1 = split("\=", $clusterresult);
			my @temp = split(/_/,$sets1[0]) ; $microscanned+= scalar(@temp);
		#	print "sets = ", join("<all\nmerged>", @sets1), "\n<<-sets1\n"; <STDIN>;
		}	#if (exists $micros{$key}){
	
		if (exists $rmicros{$key}){
		#	print "line no : $linecount\n";
			my @raw_microstring = @{$rmicros{$key}};
			my %starts = (); my %ends = ();
#			print colored ['yellow'],"unsorted, unfiltered microats = \n" if $printer == 1; foreach (@raw_microstring) {print colored ['blue'],$_,"\n" if $printer == 1;}
			my @microstring=();
			for my $u (0 ... $#raw_microstring){
				my @tields = split(/\t/,$raw_microstring[$u]);
				next if exists $starts{$tields[$startcord]} && exists $ends{$tields[$endcord]};
				push(@microstring, $raw_microstring[$u]);
				$starts{$tields[$startcord]} = $tields[$startcord];
				$ends{$tields[$endcord]} = $tields[$endcord];
			}
	#		print "founf microstring in reverse\n: @microstring\n"; <STDIN>;
			chomp @microstring;
	#		print "sending reversed sequence\n";
			my $clusterresult = (find_clusters(@microstring, $rev_sequence ) ); 	
			@sets2 = split("\=", $clusterresult);
			my @temp = split(/_/,$sets2[0]) ; $microscanned+= scalar(@temp);
		}	#if (exists $micros{$key}){
	
		my @popout1 = ();
		my @popout2 = ();
		my @forwardset = ();
		if (exists $sets2[1] ){
			if(exists $sets1[0]) {
				push (@popout1, $sets1[0],$sets2[1]);
				my @forwardset = split("=", popOuter(@popout1, $rev_sequence ));#		
				print OUTF join("\n",split("_", $forwardset[0])), "\n";
				my @localmerged = split("_", $forwardset[1]);
				my $sequence = $sields[$sequencepos];
				$sequence =~ s/ //g;
				for my $j (0 ... $#localmerged){
					$localmerged[$j] = 	invert_justCoordinates ($localmerged[$j], length($sequence));
				}
				
				push (@merged, @localmerged);
				
			}
			else{
				my @localmerged = split("_", $sets2[1]);
				my $sequence = $sields[$sequencepos];
				$sequence =~ s/ //g;
				for my $j (0 ... $#localmerged){
					$localmerged[$j] = 	invert_justCoordinates ($localmerged[$j], length($sequence));
				}
				
				push (@merged, @localmerged);
			}
		}
		elsif (exists $sets1[0]){
			print OUTF join("\n",split("_", $sets1[0])), "\n";
		}
		
		my @reverseset= ();
		if (exists $sets1[1]){
			if (exists $sets2[0]){
				push (@popout2, $sets2[0],$sets1[1]);
			#	print "popout2 = @popout2\n";
				my @reverseset = split("=", popOuter(@popout2, $sields[$sequencepos]));
				#print "reverseset = $reverseset[1] < --- reverseset1\n"; 
				print OUTR join("\n",split("_", $reverseset[0])), "\n";
				push(@merged,  (split("_", $reverseset[1])));
			}
			else{
				push(@merged,  (split("_", $sets1[1])));
			}
		}
		elsif (exists $sets2[0]){
			print OUTR join("\n",split("_", $sets2[0])), "\n";
		
		}
		
		if (scalar @merged > 0){
			my @filtered_merged = split("__",(filterDuplicates_merged(@merged)));
			print MER join("\n", @filtered_merged),"\n"; 
		}	
	#		<STDIN> if $sine =~ /16349128/;
	
	}
	close(SEQ);
	close(INF);
	close(INR);
	close(OUTF);
	close(OUTR);
	close(MER);

}
sub find_clusters{
	my @input = @_;
	my $sequence = pop(@input);
	$sequence =~ s/ //g;
	my @microstring0 = @input;
#	print "IN: find_clusters:\n";
	my %microstart=();
	my %microend=();
	my @nonmerged = ();
	my @mergedSet = ();
#		print "set of microsats = @microstring \n";
	my @microstring = map { $_->[0] } sort custom map { [$_, split /\t/ ] } @microstring0; 
#	print "microstring = ", join("\n",@microstring0) ," \n---->\n", join("\n", @microstring),"\n ,,+." if $printer == 1; 
	#<STDIN> if $printer == 1; 
	my @tempmicrostring = @microstring;
	foreach my $line (@tempmicrostring){
		my @fields = split(/\t/,$line);
		my $start = $fields[$startcord];
		my $end = $fields[$endcord];
		next if $start !~ /[0-9]+/ || $end !~ /[0-9]+/;
	#		print " starts >>> start: $start = $fields[11] - $fields[10] || $end = $fields[13] - $fields[10]\n";
		push (@{$microstart{$start}},$line);
		push (@{$microend{$end}},$line);	
	}
	my $firstflag = 'down';
	while( my $line =shift(@microstring)){
#		print "-----------\nline = $line \n" if $printer == 1;
		chomp $line;
		my @fields = split(/\t/,$line);
		my $start = $fields[$startcord];
		my $end = $fields[$endcord];
		next if $start !~ /[0-9]+/ || $end !~ /[0-9]+/ || $distance !~ /[0-9]+/ ;
		my $startmicro = $line;
		my $endmicro = $line;
#		print "start: $start = $fields[11] - $fields[10] || $end = $fields[13] - $fields[10]\n";

		delete ($microstart{$start});
		delete ($microend{$end});
		my $flag = 'down';	
		my $startflag = 'down';
		my $endflag = 'down';
		my $prestart = $start - $distance;
		my $postend = $end + $distance;
		my @compoundlines = ();
		my %compoundhash = ();
		push (@compoundlines, $line);
		push (@{$compoundhash{$line}},$line);
		my $startrank = 1;
		my $endrank = 1;

		while( ($startflag eq "down") || ($endflag eq "down") ){
#			print "prestart=$prestart, post end =$postend.. seqlen =", length($sequence)," firstflag = $firstflag \n" if $printer == 1;
			if ( (($prestart < 0) && $firstflag eq "up") || (($postend > length($sequence) && $firstflag eq "up")) ){
#				print "coming to the end of sequence,post end = $postend and sequence length =", length($sequence)," so exiting\n" if $printer == 1;
				last;
			}
			
			$firstflag = "up";
			if ($startflag eq "down"){		
				for my $i ($prestart ... $end){
					if(exists $microend{$i}){	
						chomp $microend{$i}[0];
						if(exists $compoundhash{$microend{$i}[0]}) {next;}
							chomp $microend{$i}[0];
							push(@compoundlines, $microend{$i}[0]);
							my @tields = split(/\t/,$microend{$i}[0]);
							$startmicro = $microend{$i}[0];
							chomp $startmicro;
							$flag = 'down';
							$startrank++;
#							print "deleting $microend{$i}[0] and $microstart{$tields[$startcord]}[0]\n" if $printer == 1;
							delete $microend{$i};
							delete $microstart{$tields[$startcord]};
							$end = $tields[$endcord];
							$startflag = 'down';
							$prestart = $tields[$startcord] - $distance;
							last;
					}
					else{
						$flag = 'up';
						$startflag = 'up';
					}
				}
			}
	
			if ($endflag eq "down"){	

				for my $i ($start ... $postend){
#					print "$start ----> $i -----> $postend\n" if $printer == 1;
					if(exists $microstart{$i} ){
						chomp $microstart{$i}[0];
						if(exists $compoundhash{$microstart{$i}[0]}) {next;}	
							chomp $microstart{$i}[0];
							push(@compoundlines, $microstart{$i}[0]);
							my @tields = split(/\t/,$microstart{$i}[0]);
							$endmicro = $microstart{$i}[0];
							$endrank++;
							chomp $endmicro;
							$flag = 'down';
#							print "deleting $microend{$tields[$endcord]}[0]\n" if $printer == 1;
							
							delete $microstart{$i} if exists $microstart{$i} ;
							delete $microend{$tields[$endcord]} if exists $microend{$tields[$endcord]};
#							print "done\n" if $printer == 1;
							
							shift @microstring;
							$end = $tields[$endcord];
							$postend = $tields[$endcord] + $distance;
							$endflag = 'down';
							last;								
					}
					else{
						$flag = 'up';
						$endflag = 'up';
					}
#					print "out of the if\n" if $printer == 1;
				}
#				print "out of the for\n" if $printer == 1;

			}
#			print "for next turn, flag status: startflag = $startflag and endflag = $endflag \n";
		} 														#end while( $flag eq "down")
#			print "compoundlines = @compoundlines \n" if $printer == 1;

		if (scalar (@compoundlines) == 1){
			push(@nonmerged, $line);		

		}
		if (scalar (@compoundlines) > 1){
#			print "FROM CLUSTERER\n"  if $printer == 1;
			push(@mergedSet,merge_microsats(@compoundlines, $sequence) );
		}
	} #end foreach my $line (@microstring){
#	print join("\n",@mergedSet),"<-----mergedSet\n"  if $printer == 1;
#<STDIN> if scalar(@mergedSet) > 0;
#	print "EXIT: find_clusters\n";
return (join("_",@nonmerged). "=".join("_",@mergedSet));
}

sub custom {
	$a->[$startcord+1] <=> $b->[$startcord+1];
}

sub popOuter {
#	print "\nIN: popOuter @_\n";
	my @all = split ("_",$_[0]);
#	<STDIN> if !defined $_[0];
	my @merged = split ("_",$_[1]);
	my $sequence = $_[2];
	my $seqlen = length($sequence);
#	print "\nIN: popOuter @_\n" if scalar(@_) != 3;
#	<STDIN> if scalar(@_) != 3;
	my %microstart=();
	my %microend=();
	my @mergedSet = ();
	my @nonmerged = ();
#	print "\n\n\n all = @all\n<--all\n";
	
	foreach my $line (@all){
		my @fields = split(/\t/,$line);
		my $start = $seqlen - $fields[$startcord]+ 1;
		my $end = $seqlen - $fields[$endcord] + 1;
		push (@{$microstart{$start}},$line);
		push (@{$microend{$end}},$line);	
	}
	my $firstflag = 'down';

	my %forPopouting = ();

	while( my $line =shift(@merged)){
#		print "\n MErgedline: $line \n" if $printer == 1;
		chomp $line;
		my @fields = split(/\t/,$line);
		my $start = $fields[$startcord];
		my $end = $fields[$endcord];
		my $startmicro = $line;
		my $endmicro = $line;

		delete ($microstart{$start});
		delete ($microend{$end});
		my $flag = 'down';	
		my $startflag = 'down';
		my $endflag = 'down';
		my $prestart = $start - $distance;
		my $postend = $end + $distance;
		my @compoundlines = ();
		my %compoundhash = ();
		push (@compoundlines, $line);
		my $startrank = 1;
		my $endrank = 1;
		
	#	print "\nstart = $start, end = $end\n";
	#	<STDIN>;
		for my $i ($start ... $end){
			if(exists $microend{$i}){	
		#		print "\nmicrosat exists: $microend{$i}[0] microsat exists\n"; 
				chomp $microend{$i}[0];
				my @fields = split(/\t/,$microend{$i}[0]);
				delete $microstart{$seqlen - $fields[$startcord] + 1};
				my $invertseq = $sequence;
				$invertseq =~ s/ //g;
				push(@compoundlines, invert_microsat($microend{$i}[0] , $invertseq ));
				delete $microend{$i};
				
			}

			if(exists $microstart{$i} ){
		#		print "\nmicrosat exists: $microstart{$i}[0] microsat exists\n"; 

				chomp $microstart{$i}[0];
				my @fields = split(/\t/,$microstart{$i}[0]);
				delete $microend{$seqlen - $fields[$endcord] + 1};
				my $invertseq = $sequence;
				$invertseq =~ s/ //g;
				push(@compoundlines, invert_microsat($microstart{$i}[0], $invertseq) );						
				delete $microstart{$i};
			}
		}			
	
		if (scalar (@compoundlines) == 1){
			push(@mergedSet,join("\t",@compoundlines) );
		}
		else {
#			print "FROM POPOUTER\n" if $printer == 1;
			push(@mergedSet, merge_microsats(@compoundlines, $sequence) );
		}
	} 
	
	foreach my $key (sort keys %microstart) {
    	push(@nonmerged,$microstart{$key}[0]);
	}
	
	return (join("_",@nonmerged). "=".join("_",@mergedSet) );
}



sub invert_justCoordinates{
	my $microsat = $_[0];
#	print "IN invert_justCoordinates\n" if $printer == 1;
	chomp $microsat;
	my $seqLength = $_[1];
	my @fields = split(/\t/,$microsat);
	my $start = $seqLength - $fields[$endcord] + 1;
	my $end = $seqLength - $fields[$startcord] + 1;
	$fields[$startcord] = $start;
	$fields[$endcord] = $end;
	$fields[$microsatcord] = reverse_micro($fields[$microsatcord]);
#	print "RETURNIG: ", join("\t",@fields), "\n" if $printer == 1;
	return join("\t",@fields); 	
}

sub largest_number{
	my $counter = 0;
	my($max) = shift(@_);
    foreach my $temp (@_) {
    	#print "finding largest array: $maxcounter \n";
    	if($temp > $max){
        	$max = $temp;
        }
    }
    return($max);
}
sub smallest_number{
	my $counter = 0;
	my($min) = shift(@_);
    foreach my $temp (@_) {
    	#print "finding largest array: $maxcounter \n";
    	if($temp < $min){
        	$min = $temp;
        }
    }
    return($min);
}


sub filterDuplicates_merged{
	my @merged = @_;
	my %revmerged = ();
	my @fmerged = ();
	foreach my $micro (@merged) {
		my @fields = split(/\t/,$micro);
		if ($fields[3] =~ /chr[A-Z0-9a-z]+r/){
			my $key = join("_",$fields[1], $fields[$startcord], $fields[$endcord]);
	#		print "adding  ... \n$key\n$micro\n";
			push(@{$revmerged{$key}}, $micro);
		}
		else{
		#	print "pushing.. $micro\n";
			push(@fmerged, $micro);
		}
	}
#	print "\n";
	foreach my $micro (@fmerged) {
			my @fields = split(/\t/,$micro);
			my $key = join("_",$fields[1], $fields[$startcord], $fields[$endcord]);
	#		print "searching for key $key\n";
			if (exists $revmerged{$key}){
		#		print "deleting $revmerged{$key}[0]\n";
				delete $revmerged{$key};
			}
	}
	foreach my $key (sort keys %revmerged) {
    	push(@fmerged,$revmerged{$key}[0]);
	}
#	print "returning ", join("\n", @fmerged),"\n" ;
	return join("__", @fmerged);
}

sub invert_microsat{
	my $micro = $_[0];
	chomp $micro;
	if ($micro =~ /chr[A-Z0-9a-z]+r/) { $micro =~  s/chr([0-9a-b]+)r/chr$1/g ;}
	else {  $micro =~  s/chr([0-9a-b]+)/chr$1r/g ; }
	my $sequence = $_[1];
	$sequence =~ s/ //g;
	my $seqlen = length($sequence);
	my @fields = split(/\t/,$micro);
	my $start = $seqlen - $fields[$endcord] +1;
	my $end = $seqlen - $fields[$startcord] +1;
	$fields[$startcord]  = $start;
	$fields[$endcord] = $end;
	$fields[$motifcord] = reverse_micro($fields[$motifcord]);
	$fields[$microsatcord] = reverse_micro($fields[$microsatcord]);
	if ($fields[$typecord] ne "compound" && exists $fields[$no_of_interruptionscord] ){
		my @intertypes = split(/,/,$fields[$interrtypecord]);
		my @inters = split(/,/,$fields[$interrcord]);
		my @interposes = split(/,/,$fields[$interr_poscord]);
		$fields[$interrtypecord] = join(",",reverse(@intertypes));
		$fields[$no_of_interruptionscord] = scalar(@interposes);
		for my $i (0 ... $fields[$no_of_interruptionscord]-1){
			if (exists $inters[$i] && $inters[$i] =~ /[a-zA-Z]/){
				$inters[$i] = reverse($inters[$i]);
				$interposes[$i] = $interposes[$i] + length($inters[$i]) - 1;
			}	
			else{
				$inters[$i] = "";
				$interposes[$i] = $interposes[$i] - 1;
			}
			$interposes[$i] = ($end - $start + 1) - $interposes[$i] + 1;
		}
		
		$fields[$interrcord] = join(",",reverse(@inters));
		$fields[$interr_poscord] = join(",",reverse(@interposes));
	}
	
	my $finalmicrosat = join("\t", @fields);
	return $finalmicrosat;			

}
sub reverse_micro{
	my $micro = reverse($_[0]);
	my @strand = split(/\s*/,$micro);
	for my $i (0 ... $#strand){
		if ($strand[$i] =~ /\[/i) {$strand[$i] = "]";next;}
		if ($strand[$i] =~ /\]/i) {$strand[$i] = "[";next;}
	}
	return join("",@strand);
}

#xxxxxxxxxxxxxx merge_interruptedMicrosats xxxxxxxxxxxxxx  merge_interruptedMicrosats xxxxxxxxxxxxxx  merge_interruptedMicrosats xxxxxxxxxxxxxx 


#xxxxxxxxxxxxxx forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx  forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx  forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx 

sub forward_reverse_sputoutput_comparer	{
#	print "IN forward_reverse_sputoutput_comparer: @_\n";
	my $input0 = $_[0];  ###### the *nogap_unrand_match file
	my $input1 = $_[1];  ###### the real file, *sput* data
	my $input2 = $_[2];  ###### the reverse file, *sput* data
	my $output1 = $_[3]; ###### microsats different in real file
	my $output2 = $_[4]; ###### microsats missing in real file
	my $output3 = $_[5]; ###### microsats common among real and reverse file
	my $no_of_species = $_[6];
	
	$infocord = 2 + (4*$no_of_species) - 1;
	$typecord = 2 + (4*$no_of_species) + 1 - 1;
	$startcord = 2 + (4*$no_of_species) + 2 - 1;
	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
	$endcord = 2 + (4*$no_of_species) + 4 - 1;
	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
	$sequencepos = 2 + (5*$no_of_species) + 1 -1 ; 
	$interrtypecord = $motifcord + 1;
	$interrcord = $motifcord + 2;
	$interr_poscord = $motifcord + 3;
	$no_of_interruptionscord = $motifcord + 4;
	$mergestarts  = $no_of_interruptionscord+ 1;
	$mergeends = $no_of_interruptionscord+ 2;
	$mergemicros = $no_of_interruptionscord+ 3;
	
	
	open(SEQ,"<$input0") or die "Cannot open file $input0 $!";
	open(INF,"<$input1") or die "Cannot open file $input1 $!";
	open(INR,"<$input2") or die "Cannot open file $input2 $!";
	
	open(DIFF,">$output1") or die "Cannot open file $output1 $!";
	#open(MISS,">$output2") or die "Cannot open file $output2 $!";
	open(SAME,">$output3") or die "Cannot open file $output3 $!";
		
		
#	print "opened files \n";
	my $linecounter = 0;
	my $fcounter = 0;
	my $rcounter = 0;
	
	$printer = 0;
	#---------------------------------------------------------------------------------------------------
	# NOW ADDING FORWARD MICROSATELLITES TO HASH
	my %fmicros = ();
	my $microcounter=0;
	while (my $line = <INF>){
		$linecounter++;
		if ($line =~ /([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
			my $key = join("\t",$1, $3, $4, $5, $7, $8, $9, $11, $12);
		#	print $key, "#-#-#-#-#-#-#-#\n";
			push (@{$fmicros{$key}},$line);	
			$microcounter++;
		}
		else {
		#print $line;
		}
	}
#	print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
	close INF;
	my @deletedlines = ();
#	print "done forward hash \n";
	$linecounter = 0;
	#---------------------------------------------------------------------------------------------------
	# NOW ADDING REVERSE MICROSATELLITES TO HASH
	my %rmicros = ();
	$microcounter=0;
	while (my $line = <INR>){
		$linecounter++;
		if ($line =~ /([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
			my $key = join("\t",$1, $3, $4, $5, $7, $8, $9, $11, $12);
		#	print $key, "#-#-#-#-#-#-#-#\n";
			push (@{$rmicros{$key}},$line);	
			$microcounter++;
		}
		else {}
	}
#	print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
	close INR;
#	print "done reverse hash \n";
	$linecounter = 0;
	#---------------------------------------------------------------------------------------------------
	#---------------------------------------------------------------------------------------------------
	# NOW READING THE SEQUENCE FILE
	while(my $sine = <SEQ>){
		my %microstart=();
		my %microend=();
		my @sields = split(/\t/,$sine);
		my $key = ();
		if ($sine =~ /([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
			$key = join("\t",$1, $3, $4, $5, $7, $8, $9, $11, $12);
		}
		else {
			next;
		}
		$printer = 0;
		my $sequence = $sields[$sequencepos];
		chomp $sequence;
		$sequence =~ s/ //g;
		my @localfs = ();
		my @localrs = ();
		
		if (exists $fmicros{$key}){
			@localfs = @{$fmicros{$key}};
			delete $fmicros{$key};		
		}
	
		my %forwardstarts = ();
		my %forwardends = ();
	
		foreach my $f (@localfs){
			my @fields = split(/\t/,$f);
			push (@{$forwardstarts{$fields[$startcord]}},$f);
			push (@{$forwardends{$fields[$endcord]}},$fields[$startcord]);
		}
	
		if (exists $rmicros{$key}){
			@localrs = @{$rmicros{$key}};
			delete $rmicros{$key};		
		}
		else{
		}
	
		foreach my $r (@localrs){
			chomp $r;
			my @rields = split(/\t/,$r);
#			print "rields = @rields\n" if $printer == 1;
			my $reciprocalstart = length($sequence) - $rields[$endcord] + 1;
			my $reciprocalend = length($sequence) - $rields[$startcord] + 1;
#			print "reciprocal start = $reciprocalstart = ",length($sequence)," - $rields[$endcord] + 1\n" if $printer == 1;
			my $microsat = reverse_micro(all_caps($rields[$microsatcord]));
			my @localcollection=();
			for my $i ($reciprocalstart+1 ... $reciprocalend-1){
				if (exists $forwardstarts{$i}){
					push(@localcollection, $forwardstarts{$i}[0] );
					delete $forwardstarts{$i}; 
				}
				if (exists $forwardends{$i}){
					next if !exists $forwardstarts{$forwardends{$i}[0]};
					push(@localcollection, $forwardstarts{$forwardends{$i}[0]}[0] );
				}
			}
			if (exists $forwardstarts{$reciprocalstart} && exists $forwardends{$reciprocalend}) {push(@localcollection,$forwardstarts{$reciprocalstart}[0]);}
			
			if (scalar(@localcollection) == 0){
				print SAME invert_microsat($r,($sequence) ), "\n";
			}
	
			elsif (scalar(@localcollection) == 1){
#				print "f microsat = $localcollection[0]\n"  if $printer == 1;
				my @lields = split(/\t/,$localcollection[0]);
				$lields[$microsatcord]=all_caps($lields[$microsatcord]);
#				print "comparing: $microsat and $lields[$microsatcord]\n" if $printer == 1;
#				print "coordinates are: $lields[$startcord]-$lields[$endcord] and $reciprocalstart-$reciprocalend\n" if $printer == 1;
				if ($microsat eq $lields[$microsatcord]){
					chomp $localcollection[0];
					print SAME $localcollection[0], "\n";
				}
				if ($microsat ne $lields[$microsatcord]){
					chomp $localcollection[0];
					my $newmicro = microsatChooser(join("\t",@lields), join("\t",@rields), $sequence);
#					print "newmicro = $newmicro\n"  if $printer == 1;
					if ($newmicro =~ /[a-zA-Z]/){
						print SAME $newmicro,"\n";
					}
					else{
					print DIFF join("\t",$localcollection[0],"-->",$rields[$typecord],$reciprocalstart,$reciprocalend, $rields[$microsatcord], reverse_micro($rields[$motifcord]), @rields[$motifcord+1 ... $#rields] ),"\n";
#					print join("\t",$localcollection[0],"-->",$rields[$typecord],$reciprocalstart,$reciprocalend, $rields[$microsatcord], reverse_micro($rields[$motifcord]), @rields[$motifcord+1 ... $#rields] ),"\n" if $printer == 1;
#					print "@rields\n@lields\n" if $printer == 1;
					}
				}			
			}
			else{
#				print "multiple found for $r --> ", join("\t",@localcollection),"\n" if $printer == 1;
			}
		}
	}	
		
	close(SEQ);
	close(INF);
	close(INR);
	close(DIFF);
	close(SAME);
	
}
sub all_caps{
	my @strand = split(/\s*/,$_[0]);
	for my $i (0 ... $#strand){
		if ($strand[$i] =~ /c/) {$strand[$i] = "C";next;}
		if ($strand[$i] =~ /a/) {$strand[$i] = "A";next;}
		if ($strand[$i] =~ /t/) { $strand[$i] = "T";next;}
		if ($strand[$i] =~ /g/) {$strand[$i] = "G";next;}
	}
	return join("",@strand);
}
sub microsatChooser{
	my $forward = $_[0];
	my $reverse = $_[1];
	my $sequence = $_[2];
	my $seqLength = length($sequence);
	$sequence =~ s/ //g;
	my @fields = split(/\t/,$forward);
	my @rields = split(/\t/,$reverse);	
	my $r_start = $seqLength - $rields[$endcord] + 1;
	my $r_end = $seqLength - $rields[$startcord] + 1;
	
	
	my $f_microsat = $fields[$microsatcord];
	my $r_microsat = $rields[$microsatcord];
	
	if ($fields[$typecord] =~ /\./ && $rields[$typecord] =~ /\./) {
		return $forward if length($f_microsat) >= length($r_microsat);
		return invert_microsat($reverse, $sequence) if length($f_microsat) < length($r_microsat);
	}
	return $forward if all_caps($fields[$motifcord]) eq all_caps($rields[$motifcord]) && $fields[$startcord] == $rields[$startcord] && $fields[$endcord] == $rields[$endcord];	

	my $f_microsat_copy = $f_microsat;
	my $r_microsat_copy = $r_microsat;
	$f_microsat_copy =~ s/^\[|\]$//g;
	$r_microsat_copy =~ s/^\[|\]$//g;

	my @f_microields = split(/\][a-zA-Z]*\[/,$f_microsat_copy);
	my @r_microields = split(/\][a-zA-Z]*\[/,$r_microsat_copy);
	my @f_intields = split(/\][a-zA-Z]*\[/,$f_microsat_copy);
	my @r_intields = split(/\][a-zA-Z]*\[/,$r_microsat_copy);
	
	my $f_motif = $fields[$motifcord];
	my $r_motif = $rields[$motifcord];
	my $f_motif_copy = $f_motif;
	my $r_motif_copy = $r_motif;
	$f_motif_copy =~ s/^\[|\]$//g;
	$r_motif_copy =~ s/^\[|\]$//g;

	my @f_motields = split(/\]\[/,$f_motif_copy);
	my @r_motields = split(/\]\[/,$r_motif_copy);

	my $f_purestretch = join("",@f_microields);
	my $r_purestretch = join("",@r_microields);

	if ($fields[$typecord]=~/nucleotide/ && $rields[$typecord]=~/nucleotide/){
#		print "now.. studying $forward\n$reverse\n" if $printer == 1;
		if ($fields[$typecord] eq $rields[$typecord]){
#			print "comparing motifs::", all_caps($fields[$motifcord]) ," and ", all_caps(reverse_micro($rields[$motifcord])), "\n" if $printer == 1;

			if(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 1){
				my $subset_answer = isSubset($forward, $reverse, $seqLength);
#				print "subset answer = $subset_answer\n" if $printer == 1;
				return $forward if $subset_answer == 1; 
				return invert_microsat($reverse, $sequence) if $subset_answer == 2; 
				return $forward if $subset_answer == 0 && length($f_purestretch) >= length($r_purestretch);
				return invert_microsat($reverse, $sequence) if $subset_answer == 0 && length($f_purestretch) < length($r_purestretch);
				return $forward if $subset_answer == 3 && slided_microsat($forward, $reverse, $seqLength) == 0 && length($f_purestretch) >= length($r_purestretch);
				return invert_microsat($reverse, $sequence) if $subset_answer == 3 && slided_microsat($forward, $reverse, $seqLength) == 0 && length($f_purestretch) < length($r_purestretch);
				return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence) if $subset_answer == 3 ;			
			}
			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 0){
				return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
			}
			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 2){
				return $forward;
			}
			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 3){
				return invert_microsat($reverse, $sequence);
			}
		}
		else{
			my $fmotlen = ();
			my $rmotlen = ();
	 		$fmotlen =1 if $fields[$typecord] eq "mononucleotide";
	 		$fmotlen =2 if $fields[$typecord] eq "dinucleotide";
	 		$fmotlen =3 if $fields[$typecord] eq "trinucleotide";
	 		$fmotlen =4 if $fields[$typecord] eq "tetranucleotide";
	 		$rmotlen =1 if $rields[$typecord] eq "mononucleotide";
	 		$rmotlen =2 if $rields[$typecord] eq "dinucleotide";
	 		$rmotlen =3 if $rields[$typecord] eq "trinucleotide";
	 		$rmotlen =4 if $rields[$typecord] eq "tetranucleotide";
			
			if ($fmotlen < $rmotlen){
				if (abs($fields[$startcord] -  $r_start) <= $fmotlen || abs($fields[$endcord] -  $r_end) <= $fmotlen ){
					return $forward;
				}
				else{
					return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
				}
			}
			if ($fmotlen > $rmotlen){
				if (abs($fields[$startcord] -  $r_start) <= $rmotlen || abs($fields[$endcord] -  $r_end) <= $rmotlen){
					return invert_microsat($reverse, $sequence);
				}
				else{
					return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
				}
			}
		}
	}
	if ($fields[$typecord] eq "compound" && $rields[$typecord] eq "compound"){
#			print "comparing compound motifs::", all_caps($fields[$motifcord]) ," and ", all_caps(reverse_micro($rields[$motifcord])), "\n" if $printer == 1;
			if(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 1){
				my $subset_answer = isSubset($forward, $reverse, $seqLength);
#				print "subset answer = $subset_answer\n" if $printer == 1;
				return $forward if $subset_answer == 1; 
				return invert_microsat($reverse, $sequence) if $subset_answer == 2; 
#				print length($f_purestretch) ,">", length($r_purestretch)," \n" if $printer == 1;
				return $forward if $subset_answer == 0 && length($f_purestretch) >= length($r_purestretch);
				return invert_microsat($reverse, $sequence) if $subset_answer == 0 && length($f_purestretch) < length($r_purestretch);
				if ($subset_answer == 3){
					if ($fields[$startcord] < $r_start || $fields[$endcord] > $r_end){
						if (abs($fields[$startcord] -  $r_start) < length($f_motields[0]) || abs($fields[$endcord] -  $r_end)  < length($f_motields[$#f_motields]) ){
							return $forward;
						}
						else{
							return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
						}
					}
					if ($fields[$startcord] > $r_start || $fields[$endcord] < $r_end){
						if (abs($fields[$startcord] -  $r_start) < length($r_motields[0]) || abs($fields[$endcord] -  $r_end) < length($r_motields[$#r_motields]) ){
							return invert_microsat($reverse, $sequence);
						}
						else{
							return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
						}
					}
				}
			}
			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 0){
				return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
			}
			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 2){
				return $forward;
			}
			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 3){
				return invert_microsat($reverse, $sequence);
			}
		
	}
	
	if ($fields[$typecord] eq "compound" && $rields[$typecord] =~ /nucleotide/){
#		print "one compound, one nucleotide\n" if $printer == 1; 
		return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
	}
	if ($fields[$typecord] =~ /nucleotide/ && $rields[$typecord]eq "compound"){
#		print "one compound, one nucleotide\n" if $printer == 1; 
		return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
	}
}

sub isSubset{
	my $forward = $_[0]; 	my @fields = split(/\t/,$forward);
	my $reverse = $_[1];	my @rields = split(/\t/,$reverse);
	my $seqLength = $_[2];
	my $r_start = $seqLength - $rields[$endcord] + 1;
	my $r_end = $seqLength - $rields[$startcord] + 1;
#	print "we have $fields[$startcord] -> $fields[$endcord] && $r_start -> $r_end\n" if $printer == 1;
	return "0" if $fields[$startcord] == $r_start && $fields[$endcord] == $r_end;
	return "1" if $fields[$startcord] <= $r_start && $fields[$endcord] >= $r_end;
	return "2" if $r_start <= $fields[$startcord] && $r_end >= $fields[$endcord];
	return "3";
}

sub motifBYmotif_match{
	my $forward = $_[0];
	my $reverse = $_[1];
	$forward =~ s/^\[|\]$//g;
	$reverse =~ s/^\[|\]$//g;
	my @f_motields=split(/\]\[/, $forward);
	my @r_motields=split(/\]\[/, $reverse);
	my $finalresult = 0;

	if (scalar(@f_motields) != scalar(@r_motields)){
		my $subresult = 0;
		my @mega = (); my @sub = ();
		@mega = @f_motields if scalar(@f_motields) > scalar(@r_motields);
		@sub = @f_motields if scalar(@f_motields) > scalar(@r_motields);
		@mega = @r_motields if scalar(@f_motields) < scalar(@r_motields);
		@sub = @r_motields if scalar(@f_motields) < scalar(@r_motields);
		
		for my $i (0 ... $#sub){
			my $probe = $sub[$i].$sub[$i];
#			print "probing $probe and $mega[$i]\n" if $printer == 1;
			if ($probe =~ /$mega[$i]/) {$subresult = 1; }
			else {$subresult = 0; last; }
		}
		
		return 0 if $subresult == 0;
		return 2 if $subresult == 1 && scalar(@f_motields) > scalar(@r_motields); # r is subset of f
		return 3 if $subresult == 1 && scalar(@f_motields) < scalar(@r_motields);  # ^reverse
		
	}
	else{
		for my $i (0 ... $#f_motields){
			my $probe = $f_motields[$i].$f_motields[$i];
			if ($probe =~ /$r_motields[$i]/) {$finalresult = 1 ;}
			else {$finalresult = 0 ;last;}
		}
	}
#	print "finalresult = $finalresult\n" if $printer == 1;
	return $finalresult;
}

sub merge_microsats{
	my @input = @_;
	my $sequence = pop(@input);
	$sequence =~ s/ //g;
	my @seq_string = @input;
#	print "IN: merge_microsats\n";
#	print "recieved for merging: ", join("\n", @seq_string), "\nsequence = $sequence\n"; 
	my $start;
	my $end;
	my @micros = map { $_->[0] } sort custom map { [$_, split /\t/ ] } @seq_string; 
#	print "\nrearranged into @micros \n";
	my (@motifs, @microsats, @interruptiontypes, @interruptions, @interrposes, @no_of_interruptions, @types, @starts, @ends, @mergestart, @mergeend, @mergemicro) = ();
	my @fields = ();
	for my $i (0 ... $#micros){
		chomp $micros[$i];
		@fields = split(/\t/,$micros[$i]);
		push(@types, $fields[$typecord]);
		push(@motifs, $fields[$motifcord]);		

		if (exists $fields[$interrtypecord]){ push(@interruptiontypes, $fields[$interrtypecord]);}
			else { push(@interruptiontypes, "NA"); }
		if (exists $fields[$interrcord]) {push(@interruptions, $fields[$interrcord]);}
			else { push(@interruptions, "NA"); }
		if (exists $fields[$interr_poscord]) { push(@interrposes, $fields[$interr_poscord]);}	
			else { push(@interrposes, "NA"); }
		if (exists $fields[$no_of_interruptionscord]) {push(@no_of_interruptions, $fields[$no_of_interruptionscord]);}
			else { push(@no_of_interruptions, "NA"); }
		if(exists $fields[$mergestarts]) { @mergestart = (@mergestart, split(/\./,$fields[$mergestarts]));}
			else { push(@mergestart, $fields[$startcord]); }
		if(exists $fields[$mergeends]) { @mergeend = (@mergeend, split(/\./,$fields[$mergeends]));}
			else { push(@mergeend, $fields[$endcord]); }
		if(exists $fields[$mergemicros]) { push(@mergemicro, $fields[$mergemicros]);}
			else { push(@mergemicro, $fields[$microsatcord]); }
			

	}
	$start = smallest_number(@mergestart);
	$end = largest_number(@mergeend);
	my $microsat_entry = "[".substr( $sequence, $start-1, ($end - $start + 1) )."]";
	my $microsat = join("\t", @fields[0 ... $infocord], join(".", @types), $start, $fields[$strandcord], $end, $microsat_entry , join(".", @motifs), join(".", @interruptiontypes),join(".", @interruptions),join(".", @interrposes),join(".", @no_of_interruptions), join(".", @mergestart), join(".", @mergeend) , join(".", @mergemicro));
	return $microsat;
}

sub slided_microsat{
	my $forward = $_[0]; 	my @fields = split(/\t/,$forward);
	my $reverse = $_[1];	my @rields = split(/\t/,$reverse);
	my $seqLength = $_[2];
	my $r_start = $seqLength - $rields[$endcord] + 1;
	my $r_end = $seqLength - $rields[$startcord] + 1;
	my $motlen =();
	 $motlen =1 if $fields[$typecord] eq "mononucleotide";
	 $motlen =2 if $fields[$typecord] eq "dinucleotide";
	 $motlen =3 if $fields[$typecord] eq "trinucleotide";
	 $motlen =4 if $fields[$typecord] eq "tetranucleotide";
	
	if (abs($fields[$startcord] - $r_start) < $motlen || abs($fields[$endcord] - $r_end) < $motlen ) {
		return 0;
	}
	else{
		return 1;
	}
	
}

#xxxxxxxxxxxxxx forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx  forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx  forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx 



#xxxxxxxxxxxxxx new_multispecies_t10 xxxxxxxxxxxxxx  new_multispecies_t10 xxxxxxxxxxxxxx  new_multispecies_t10 xxxxxxxxxxxxxx 
sub new_multispecies_t10{
	my $input1 = $_[0];  #gap_op_unrand_match
	my $input2 = $_[1];  #sput
	my $output = $_[2];  #output
	my $bin = $output."_bin";
	my $orgs = join("|",split(/\./,$_[3]));
	my @organisms = split(/\./,$_[3]);
	my $no_of_species = scalar(@organisms); #3
	my $t10info = $output."_info";
	$prinkter = 0;
	
	open (MATCH, "<$input1");
	open (SPUT, "<$input2");
	open (OUT, ">$output");
	open (INFO, ">$t10info");


	sub microsat_bracketer;
	sub custom;
	my %seen = ();
	$infocord = 2 + (4*$no_of_species) - 1;
	$typecord = 2 + (4*$no_of_species) + 1 - 1;
	$startcord = 2 + (4*$no_of_species) + 2 - 1;
	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
	$endcord = 2 + (4*$no_of_species) + 4 - 1;
	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
	$sequencepos = 2 + (5*$no_of_species) + 1 -1 ; 
	#---------------------------------------------------------------------------------------------------------------#
	#	MAKING A HASH FROM SPUT, WITH HASH KEYS GENERATED BELOW AND SEQUENCES STORED AS VALUES	#
	#---------------------------------------------------------------------------------------------------------------#
	my $linecounter = 0;
	my $microcounter = 0;
	while (my $line = <SPUT>){
		chomp $line;
	#	print "$org\t(chr[0-9]+)\t([0-9]+)\t([0-9])+\t \n";
		next if $line !~ /[0-9a-z]+/;
		$linecounter++;
	#		my $key = join("\t",$1 , $2, $4, $5, $6, $8, $9, $10, $12, $13);
	#		print $key, "#-#-#-#-#-#-#-#\n";
		if ($line =~ /([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
			my $key = join("\t",$1, $2, $3, $4, $5);
#			print "key = $key\n" if $prinkter == 1;
			push (@{$seen{$key}},$line);	
			$microcounter++;
		}
		else {		print "could not make ker in SPUT : \n$line \n"; 
		}
	}
#	print "done hash.. linecounter = $linecounter, microcounter = $microcounter and total keys entered = ",scalar(keys %seen),"\n";
#	print INFO  "done hash.. linecounter = $linecounter, microcounter = $microcounter and total keys entered = ",scalar(keys %seen),"\n";
	close SPUT;
	
	#----------------------------------------------------------------------------------------------------------------
	
	#-------------------------------------------------------------------------------------------------------#
	#	THE ENTIRE CODE BELOW IS DEVOTED TO GENERATING HASH KEYS FROM MATCH FOLLOWED BY			#
	#	USING THESE HASH KEYS TO CORRESPOND EACH SEQUENCE IN FIRST FILE TO ITS MICROSAT REPEATS IN			#
	#   SECOND FILE FOLLOWED BY																				#
	#	FINDING THE EXACT LOCATION OF EACH MICROSAT REPEAT WITHIN EACH SEQUENCE USING THE 'index' FUNCTION	#
	#-------------------------------------------------------------------------------------------------------#		
	my $ref = 0;
	my $ref2 = 0;
	my $ref3 = 0;
	my $ref4 = 0;
	my $deletes= 0;
	my $duplicates = 0;
	my $neighbors = 0;
	my $tooshort = 0;
	my $prevmicrol=();
	my $startnotfound = 0;
	my $matchkeysformed = 0;
	my $keysused = 0;
	
	while (my $line = <MATCH>)	{
#		print    colored ['magenta'], $line  if $prinkter == 1;
		next if $line !~ /[a-zA-Z0-9]/;
		chomp $line;	
		my @fields2 = split(/\t/,$line);
		my $key2 = ();
	#		$key2 = join("\t",$1 , $2, $4, $5, $6, $8, $9, $10, $12, $13);
		if ($line =~ /([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
			$matchkeysformed++;
			$key2 = join("\t",$1, $2, $3, $4, $5);
#			print "key = $key2 \n" if $prinkter == 1; 
		}
		else{
#			print "could not make ker in SEQ : $line\n";
			next;
		}
		my $sequence = $fields2[$sequencepos];
		$sequence =~ s/\*/-/g;
		my $count = 0;
		if (exists $seen{$key2}){
			$keysused++;
			my @unsorted_raw = @{$seen{$key2}};
			delete $seen{$key2};
			my @sequencearr = split(/\s*/, $sequence);
			
#			print "sequencearr = @sequencearr\n" if $prinkter == 1;
			
			my $counter;
	
			my %start_database = ();
			my %end_database = ();
			foreach my $uns (@unsorted_raw){
				my @uields = split(/\t/,$uns);
				$start_database{$uields[$startcord]} = $uns;
				$end_database{$uields[$endcord]} = $uns;
			}
			
			my @unsorted = ();
			my %starts = (); my %ends = ();
#			print colored ['yellow'],"unsorted, unfiltered microats = \n" if $prinkter == 1; foreach (@unsorted_raw) {print colored ['blue'],$_,"\n" if $prinkter == 1;}
			for my $u (0 ... $#unsorted_raw){
				my @tields = split(/\t/,$unsorted_raw[$u]);
				next if exists $starts{$tields[$startcord]} && exists $ends{$tields[$endcord]};
				push(@unsorted, $unsorted_raw[$u]);
				$starts{$tields[$startcord]} = $unsorted_raw[$u];
#				print "in starts : $tields[$startcord] -> $unsorted_raw[$u]\n" if $prinkter == 1;
			}
			
			my $basecounter= 0;
			my $gapcounter = 0;
			my $poscounter = 0;
			
			for my $s (@sequencearr){
				
				$poscounter++;
				if ($s eq "-"){
					$gapcounter++; next;
				}
				else{
					$basecounter++;
				}
				
				
				#print "s = $s, poscounter = $poscounter, basecounter = $basecounter, gapcpunter = $gapcounter\n" if $prinkter == 1;
				#print "s = $s, basecounter = $basecounter, gapcpunter = $gapcounter\n" if $prinkter == 1;
				#print "s = $s, gapcpunter = $gapcounter\n" if $prinkter == 1;
				
				if (exists $starts{$basecounter}){
					my $locus = $starts{$basecounter};
#					print "locus identified = $locus\n" if $prinkter == 1;
					my @fields3 = split(/\t/,$locus);
					my $start = $fields3[$startcord];
					my $end = $fields3[$endcord];
					my $motif = $fields3[$motifcord];
					my $microsat = $fields3[$microsatcord];				
					my @leftbracketpos = ();			
					my @rightbracketpos = (); 
					my $bracket_picker = 'no';
					my $leftbrackets=();
					my $rightbrackets = ();
					my $micro_cpy = $microsat;
#					print "microsat = $microsat\n" if $prinkter == 1;
					while($microsat =~ m/\[/g) {push(@leftbracketpos, (pos($microsat)));  $leftbrackets = join("__",@leftbracketpos);$bracket_picker='yes';}
					while($microsat =~ m/\]/g) {push(@rightbracketpos, (pos($microsat))); $rightbrackets = join("__",@rightbracketpos);}
					$microsat =~ s/[\[\]\-\*]//g;
#					print "microsat = $microsat\n" if $prinkter == 1;
					my $human_search = join '-*', split //, $microsat;
					my $temp = substr($sequence, $poscounter-1);
#					print "with poscounter = $poscounter\n" if $prinkter == 1;
					my $search_result = ();
					my $posnow  = ();
					while ($temp =~ /($human_search)/gi){
						$search_result = $1;
					#	$posnow  = pos($temp);
						last;
					}
					
					my @gapspos = ();
					while($search_result =~ m/-/g) {push(@gapspos, (pos($search_result))); }
					my $gaps  = join("__",@gapspos);
	
					my $final_microsat = $search_result;
					if ($bracket_picker eq "yes"){
						$final_microsat = microsat_bracketer($search_result, $gaps,$leftbrackets,$rightbrackets);
					}
					
					my $outsentence = join("\t",join ("\t",@fields3[0 ... $infocord]),$fields3[$typecord],$fields3[$motifcord],$gapcounter,$poscounter,$fields3[$strandcord],$poscounter + length($search_result) -1 ,$final_microsat);
					
					if ($bracket_picker eq "yes") {
						$outsentence = $outsentence."\t".join("\t",@fields3[($motifcord+1) ... $#fields3]);
					}
					print OUT $outsentence,"\n";	
				}			
			}
		}
	}
	my $unusedkeys = scalar(keys %seen);
	print INFO "in hash = $ref, looped = $ref4, captured = $ref3\n REMOVED: \nmicrosats with too long gaps = $deletes\n";
	print INFO "exact duplicated removed = $duplicates \nmicrosats removed due to multiple microsats defined  in +-10 bp neighboring region: $neighbors \n";
	print INFO "microsatellites too short = $tooshort\n";
	print INFO "keysused = $keysused...starts not found = $startnotfound ... matchkeysformed=$matchkeysformed ... unusedkeys=$unusedkeys\n";
	
	#print  "in hash = $ref, looped = $ref4, captured = $ref3\n REMOVED: \nmicrosats with too long gaps = $deletes\n";
	#print  "exact duplicated removed = $duplicates \nmicrosats removed due to multiple microsats defined  in +-10 bp neighboring region: $neighbors \n";
	#print  "microsatellites too short = $tooshort\n";
	#print  "keysused = $keysused...starts not found = $startnotfound ... matchkeysformed=$matchkeysformed ... unusedkeys=$unusedkeys\n";
	#print "unused keys = \n",join("\n", (keys %seen)),"\n";
	close (MATCH);
	close (SPUT);
	close (OUT);
	close (INFO);
}	

sub microsat_bracketer{
#	print "in bracketer: @_\n";
	my ($microsat, $gapspos, $leftbracketpos, $rightbracketpos) = @_;
	my @gaps = split(/__/,$gapspos);
	my @lefts = split(/__/,$leftbracketpos);
	my @rights = split(/__/,$rightbracketpos);
	my @new=();
	my $pure = $microsat;
	$pure =~ s/-//g;
	my $off = 0;
	my $finallength  = length($microsat) + scalar(@lefts)+scalar(@rights);
	push(@gaps, 0);
	push(@lefts,0);
	push(@rights,0);
	
	for my $i (1 ... $finallength){
#		print "1 current i = >$i<>, right = >$rights[0]<  gap = $gaps[0] left = >$lefts[0]< and $rights[0] == $i\n";
		if($rights[0] == $i){
	#		print "pushed a ]\n";
			push(@new, "]");
			shift(@rights);
			push(@rights,0);
			for my $j (0 ... scalar(@gaps)-1) {$gaps[$j]++;}	
			next;
		}
		if($gaps[0] == $i){
	#		print "pushed a -\n";
			push(@new, "-");
			shift(@gaps);
			push(@gaps, 0);
			for my $j (0 ... scalar(@rights)-1) {$rights[$j]++;}	
			for my $j (0 ... scalar(@lefts)-1) {$lefts[$j]++;}	

			next;
		}
		if($lefts[0] == $i){
#			print "pushed a [\n";
			push(@new, "[");
			shift(@lefts);
			push(@lefts,0);
			for my $j (0 ... scalar(@gaps)-1) {$gaps[$j]++;}	
			next;
		}
		else{
			my $pushed = substr($pure,$off,1);
			$off++;
			push(@new,$pushed );
#			print "pushed an alphabet, now new = @new, pushed = $pushed\n";
			next;
		}
	}
	my $returnmicrosat = join("",@new);
#	print "final microsat = $returnmicrosat \n";
	return($returnmicrosat);
}

#xxxxxxxxxxxxxx new_multispecies_t10 xxxxxxxxxxxxxx  new_multispecies_t10 xxxxxxxxxxxxxx  new_multispecies_t10 xxxxxxxxxxxxxx 


#xxxxxxxxxxxxxx multiSpecies_orthFinder4 xxxxxxxxxxxxxx  multiSpecies_orthFinder4 xxxxxxxxxxxxxx  multiSpecies_orthFinder4 xxxxxxxxxxxxxx 
sub multiSpecies_orthFinder4{
	#print "IN multiSpecies_orthFinder4: @_\n";
	my @handles = ();
	#1 SEPT 30TH 2008
	#2 THIS CODE (multiSpecies_orthFinder4.pl)  IS BEING MADE SO THAT IN THE REMOVAL OF MICROSATELLITES THAT ARE CLOSER TO EACH OTHER
	#3 THAN 50 BP (HE 50BP RADIUS OF EXCLUSION), WE ARE LOOKING ACCROSS ALIGNMENT BLOCKS.. AND NOT JUST LOOKING WITHIN THE ALIGNMENT BLOCKS. THIS WILL
	#4 POTENTIALLY REMOVE EVEN MORE MICROSATELLITES THAN BEFORE, BUT THIS WILL RESCUE THOSE MICROSATELLITES THAT WERE LOST
	#5 DUE TO OUR PREVIOUS REQUIREMENT FROM VERSION 3, THAT MICROSATELLITES THAT ARE CLOSER TO THE BOUNDARY THAN 25 BP NEED TO BE REMOVED
	#6 SUCH A REQUIREMENT WAS A CRUDE WAY TO IMPOSE THE ABOVE 50 BP RADIUS OF EXCLUSION ACCROSS THE ALIGNMENT BLOCKS WITHOUT ACTUALLY
	#7 CHECKING COORDINATES OF THE EXCLUDED MICROSATELLITES.
	#8 IN ORDER TO TAKE CARE OF THE CASES WHERE MICROSATELLITES ARE PRELIOUSLY CLOSE TO ENDS OF THE ALIGNMENT BLOCKS, WE IMPOSE HERE
	#9 A NEW REQUIREMENT THAT FOR A MICROSATELLITE TO BE CONSIDERED, ALL THE SPECIES NEED TO HAVE AT LEAST 10 BP OF NON-MICROSATELLITE SEQUENCE
	#10 ON EITHER SIDE OF IT.. GAPLESS. THIS INFORMATION IS STORED IN THE VARIABLE: $FLANK_SUPPORT. THIS PART, INSTEAD OF BEING INCLUDED IN
	#11 THIS CODE, WILL BE INCLUDED IN A NEW CODE THAT WE WILL BE WRITING AS PART OF THE PIPELINE: multiSpecies_microsatSetSelector.pl
	
	#1 trial run:
	#2 perl ../../../codes/multiSpecies_orthFinder4.pl /gpfs/home/ydk104/work/rhesus_microsat/axtNet/hg18.panTro2.ponAbe2.rheMac2.calJac1/chr22.hg18.panTro2.ponAbe2.rheMac2.calJac1.net.axt H.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:C.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:O.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:R.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:M.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2 orth22 hg18:panTro2:ponAbe2:rheMac2:calJac1 50
	
	$prinkter=0;
	
	#############
	my $CLUSTER_DIST = $_[4];
	#############
	
	
	my $aligns = $_[0];
	my @micros = split(/:/, $_[1]);
	my $orth = $_[2];
	#my $not_orth = "notorth";
	@tags = split(/:/, $_[3]);
	
	$no_of_species=scalar(@tags);
	my $junkfile = $orth."_junk";
	#open(JUNK,">$junkfile");
	
	#my $info = $output1."_info";
	#print "inputs are : \n"; foreach(@micros){print $_,"\n";} 
	#print "info = @_\n";
	
	
	open (BO, "<$aligns") or die "Cannot open alignment file: $aligns: $!";
	open (ORTH, ">$orth");
	my $output=$orth."_out";
	open (OUTP, ">$output");
	
	
	#open (NORTH, ">$not_orth");
	#open (INF, ">$info");
	my $i = 0;
	foreach my $path (@micros){
		$handles[$i] = IO::Handle->new();
		open ($handles[$i], "<$path") or die "Can't open microsat file $path : $!";
		$i++;
	}
	
	#print "Opened files\n";
	
	
	$infocord = 2 + (4*$no_of_species) - 1;
	$typecord = 2 + (4*$no_of_species) + 1 - 1;
	$motifcord = $typecord + 1;
	$gapcord = $motifcord+1;
	$startcord = $gapcord + 1;
	$strandcord = $startcord + 1;
	$endcord = $strandcord + 1;
	$microsatcord = $endcord + 1;
	$sequencepos = 2 + (4*$no_of_species) + 1 -1 ; 
	#$sequencepos = 17;
	#	GENERATING HASHES CONTAINING CHIMP AND HUMAN DATA FROM ABOVE FILES
	#----------------------------------------------------------------------------------------------------------------
	my @hasharr = ();
	foreach my $path (@micros){
		open(READ, "<$path") or die "Cannot open file $path :$!";
		my %single_hash = ();
		my $key = ();
		my $counter = 0;
		while (my $line = <READ>){
			$counter++;
		#	print $line;
			chomp $line;
			my @fields1 = split(/\t/,$line);
			if ($line =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
				$key = join("\t",$1, $2,  $4, $5);
	
#					print "key =  : $key\n" if $prinkter == 1;
				
#				print $line if $prinkter == 1;
				push (@{$single_hash{$key}},$line);
			}
			else{
			#	print "microsat line incompatible\n";
			}
		}
		push @hasharr, {%single_hash};
	#	print "@{$single_hash{$key}} \n";
#		print "done $path: counter = $counter\n" if $prinkter == 1;
		close READ;
	}	
#	print "Done hashes\n";
	#----------------------------------------------------------------------------------------------------------------
	my $question=();
	#----------------------------------------------------------------------------------------------------------------
	my @contigstarts = ();
	my @contigends = ();
	
	my %contigclusters = ();
	my %contigclustersFirstStartOnly=();
	my %contigclustersLastEndOnly=();
	my %contigclustersLastEndLengthOnly=();
	my %contigclustersFirstStartLengthOnly=();
	my %contigpath=();
	my $dotcounter = 0;
	while (my $line = <BO>){
#		print "x" x 60, "\n" if $prinkter == 1;
		$dotcounter++;
		
	
		
#		print "." if $dotcounter % 100 ==0;
#		print "\n" if $dotcounter % 5000 ==0;
		next if $line !~ /^[0-9]+/;
#		print $line if $prinkter == 1;
		chomp $line;	
		my @fields2 = split(/\t/,$line);
		my $key2 = ();
		if ($line =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
			$key2 = join("\t",$1, $2,  $4, $5);
		}
		else {
#			print "seq line $line incompatible\n" if $prinkter == 1; 
			next;}
		
		
		
		
		
		
		my @sequences = ();
		for (0 ... $#tags){
			my $seq = <BO>;
	#		print $seq;
			chomp $seq;
			push(@sequences , " ".$seq);
		}
		my @origsequences = @sequences;
		my $seqcopy = $sequences[0];
		my @strings = ();
		$seqcopy =~ s/[a-zA-Z]|-/x/g;
		my @string = split(/\s*/,$seqcopy);
	
		for my $s (0 ... $#tags){
			$sequences[$s] =~ s/-//g;
			$sequences[$s] =~ s/[a-zA-Z]/x/g;		
	#		print "length  of sequence = ",length($sequences[$s]),"\n";
			my @tempstring = split(/\s*/,$sequences[$s]);
			push(@strings, [@tempstring])
			
		}
	
		my @species_list = ();
		my @micro_count = 0;
		my @starthash = ();
		my $stopper = 1;
		my @endhash = ();
		
		my @currentcontigstarts=();
		my @currentcontigends=();
		my @currentcontigchrs=();
		
		for my $i (0 ... $#tags){
#			print "searching for : if exists  hasharr: $i : $tags[$i] : $key2 \n" if $prinkter == 1;
			my @temparr = (); 
	
			if (exists $hasharr[$i]{$key2}){
				@temparr =  @{$hasharr[$i]{$key2}};
				
#				print "in line $line, trying to hunt for: $tags[$i]\\s([a-zA-Z0-9])+\\s([0-9]+)\\s([0-9]+) \n" if $prinkter == 1;
				$line =~ /$tags[$i]\s([a-zA-Z0-9]+)\s([0-9]+)\s([0-9]+)/;
#				print "org = $tags[$i], and chr = $1, start = $2, end =$3 \n" if $prinkter == 1; 
				my $startkey = $1."_".$2; print "adding start key for this alignmebt block: $startkey to species $tags[$i]\n" if $prinkter == 1;
				my $endkey = $1."_".$3; print "adding end key for this alignmebt block: $endkey to species $tags[$i]\n" if $prinkter == 1;
				$contigstarts[$i]{$startkey}= $key2;
				$contigends[$i]{$endkey}= $key2;
#				print "confirming existance: \n" if $prinkter == 1;
#				print "present \n" if exists $contigends[$i]{$endkey} && $prinkter == 1;
#				print "absent \n" if !exists $contigends[$i]{$endkey} && $prinkter == 1;			
				$currentcontigchrs[$i]=$1;
				$currentcontigstarts[$i]=$2;
				$currentcontigends[$i]=$3;
				
			} # print "exists: @{$hasharr[$i]{$key2}}[0]\n"}
			else {
				push (@starthash, {0 => "0"});
				push (@endhash, {0 => "0"});
				$currentcontigchrs[$i] = 0;
				next;
			}
			$stopper = 0;
	#		print "exists: @temparr\n" if $prinkter == 1;
			push(@micro_count, scalar(@temparr));
			push(@species_list, [@temparr]);	
			my @tempstart = (); my @tempend = ();
			my %localends = ();
			my %localhash = ();
	#		print "---------------------------\n";
	
			foreach my $templine (@temparr){
#				print "templine = $templine\n" if $prinkter == 1;
				my @tields = split(/\t/,$templine);
				my $start = $tields[$startcord]; # - $tields[$gapcord];
				my $end = $tields[$endcord]; #- $tields[$gapcord];
				my $realstart = $tields[$startcord]- $tields[$gapcord];
				my $gapsinmicrosat = ($tields[$microsatcord] =~ s/-/-/g);
				$gapsinmicrosat = 0 if $gapsinmicrosat !~ /[0-9]+/;
	#			print "infocord = $infocord  typecord = $typecord  motifcord = $motifcord  gapcord = $gapcord  startcord = $startcord strandcord = $strandcord endcord = $endcord   microsatcord = $microsatcord  sequencepos = $sequencepos\n";
				my $realend = $tields[$endcord]- $tields[$gapcord]- $gapsinmicrosat;
		#		print "real start = $realstart, realend = $realend \n";
				for my $pos ($realstart ... $realend){  $strings[$i][$pos] = $strings[$i][$pos].",".$i.":".$start."-".$end;}
				push(@tempstart, $start);
				push(@tempend, $end);
				$localhash{$start."-".$end} = $templine;
				}
			push @starthash, {%localhash};
			my $foundclusters  =findClusters(join("!",@{$strings[$i]}), $CLUSTER_DIST);
	
			my @clusters = split(/_/,$foundclusters);
			
			my $clustno = 0;
			
			foreach my $cluster (@clusters) {
				my @constituenst = split(/,/,$cluster);
#				print "clusters returned: @constituenst\n" if $prinkter == 1;
			}
	
			@string = split("_",stringPainter(join("_",@string),$foundclusters));
			
			
		}
		next if $stopper == 1;
	
#		print colored ['blue'],"FINAL:\n" if $prinkter == 1;
		my $finalclusters  =findClusters(join("!",@string), 1);
#		print colored ['blue'],"----------------------\n" if $prinkter == 1;
		my @clusters = split(/,/,$finalclusters);
#			print "@string\n" if $prinkter == 1;
#			print "@clusters\n" if $prinkter == 1;
#			print "------------------------------------------------------------------\n" if $prinkter == 1;
		
		my $clustno = 0;
		
	#	foreach my $cluster (@clusters) {
	#		my @constituenst = split(/,/,$cluster);
	#		print "clusters returned: @constituenst\n";
	#	}
	
		next if (scalar @clusters == 0);
	
		my @contigcluster=();
		my $clusterno=0;
		my @contigClusterstarts=();
		my @contigClusterends = ();
		
		foreach my $clust (@clusters){
	 #		print "cluster: $clust\n";   		
			$clusterno++;
			my @localclust = split(/\./, $clust);
			my @result = ();
			my @starts = ();
			my @ends = ();
	 
			for my $i (0 ... $#localclust){
	 #			print "localclust[$i]: $localclust[$i]\n";   		
				my @pattern = split(/:/, $localclust[$i]);
				my @cords = split(/-/, $pattern[1]);
				push (@starts, $cords[0]);
				push (@ends, $cords[1]);
			}
	
			my $extremestart = smallest_number(@starts);
			my $extremeend = largest_number(@ends);
			push(@contigClusterstarts, $extremestart);
			push(@contigClusterends, $extremeend);
#			print "cluster starts from $extremestart and ends at $extremeend \n" if $prinkter == 1 ;
					
			foreach my $clustparts (@localclust){
				my @pattern = split(/:/, $clustparts);
	# 			print "printing from pattern: $pattern[1]: $starthash[$pattern[0]]{$pattern[1]}\n";
				push (@result, $starthash[$pattern[0]]{$pattern[1]});
			}
			push(@contigcluster, join("\t", @result));
#			print join("\t", @result),"<-result \n" if $prinkter == 1 ;
		}	
		
	
		my $firstclusterstart = smallest_number(@contigClusterstarts);
		my $lastclusterend = largest_number(@contigClusterends);
		
		
		$contigclustersFirstStartOnly{$key2}=$firstclusterstart;	
		$contigclustersLastEndOnly{$key2} = $lastclusterend;
		$contigclusters{$key2}=[ @contigcluster ];
#		print "currentcontigchr are @currentcontigchrs , firstclusterstart = $firstclusterstart, lastclusterend = $lastclusterend\n " if $prinkter == 1;
		for my $i (0 ... $#tags){
			#1 check if there exists adjacent alignment block wrt coordinates of this species.
			next if $currentcontigchrs[$i] eq "0"; #1 this means that there are no microsats in this species in this alignment block.. 
												 #2	no need to worry about proximity of anything in adjacent block!
			
			#1 BELOW, the following is really to calclate the distance between the end coordinate of the 
			#2 cluster and the end of the gap-free sequence of each species. this is so that if an
			#3 adjacent alignment block is found lateron, the exact distance between the potentially
			#4 adjacent microsat clusters can be found here. the exact start coordinate will be used
			#5 immediately below.
	#		print "full sequence = $origsequences[$i] and its length = ",length($origsequences[$i])," \n" if $prinkter == 1;		
			
			my $species_startsubstring = substr($origsequences[$i], 0, $firstclusterstart);
			my $species_endsubstring = ();
			
			if (length ($origsequences[$i]) <= $lastclusterend+1){ $species_endsubstring = "";}
			else{  $species_endsubstring = substr($origsequences[$i], $lastclusterend+1);}
			
#			print "\nnot defined species_endsubstring...\n"  if !defined $species_endsubstring && $prinkter == 1;
#			print "for species: $tags[$i]: \n" if $prinkter == 1;
			
			$species_startsubstring =~ s/-| //g;
			$species_endsubstring =~ s/-| //g;
			$contigclustersLastEndLengthOnly{$key2}[$i]=length($species_endsubstring);
			$contigclustersFirstStartLengthOnly{$key2}[$i]=length($species_startsubstring);
	
	
	
#			print "species_startsubstring = $species_startsubstring, and its length =",length($species_startsubstring)," \n" if $prinkter == 1;
#			print "species_endsubstring = $species_endsubstring, and its length =",length($species_endsubstring)," \n" if $prinkter == 1;
#			print "attaching to contigclustersLastEndOnly: $key2: $i\n" if $prinkter == 1;	
			
#			print "just confirming: $contigclustersLastEndLengthOnly{$key2}[$i] \n" if $prinkter == 1;
	
		}
	
	
	}
#	print "\ndone the job of filling... \n";
	#///////////////////////////////////////////////////////////////////////////////////////
	#///////////////////////////////////////////////////////////////////////////////////////
	#///////////////////////////////////////////////////////////////////////////////////////
	#///////////////////////////////////////////////////////////////////////////////////////
	$prinkter=0;
	open (BO, "<$aligns") or die "Cannot open alignment file: $aligns: $!";
	
	my %clusteringpaths=();
	my %clustersholder=();
	my %foundkeys=();
	my %clusteringpathsRev=();
	
	
	my $totalcount=();
	my $founkeys_enteredcount=();
	my $transfered=0;
	my $complete_transfered=0;
	my $plain_transfered=0;
	my $existing_removed=0;
	
	while (my $line = <BO>){
#		print "x" x 60, "\n" if $prinkter == 1;
		next if $line !~ /^[0-9]+/;
		#print $line;
		chomp $line;	
		my @fields2 = split(/\t/,$line);
		my $key2 = ();
		if ($line =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
			$key2 = join("\t",$1, $2,  $4, $5);
		}
		
		else {print "seq line $line incompatible\n"; next;}
#		print "KEY =  : $key2\n" if $prinkter == 1;
	
	
		my @currentcontigstarts=();
		my @currentcontigends=();
		my @currentcontigchrs=();
		my @clusters = ();
		my @clusterscopy=();
		if (exists $contigclusters{$key2}){
			@clusters =  @{$contigclusters{$key2}};
			@clusterscopy=@clusters;
			for my $i (0 ... $#tags){
	#			print "in line $line, trying to hunt for: $tags[$i]\\s([a-zA-Z0-9])+\\s([0-9]+)\\s([0-9]+) \n" if $prinkter == 1;
				if ($line =~ /$tags[$i]\s([a-zA-Z0-9]+)\s([0-9]+)\s([0-9]+)/){
	#				print "org = $tags[$i], and chr = $1, start = $2, end =$3 \n" if $prinkter == 1; 
					my $startkey = $1."_".$2; #print "adding start key for this alignmebt block: $startkey to species $tags[$i]\n" if $prinkter == 1;
					my $endkey = $1."_".$3; #print "adding end key for this alignmebt block: $endkey to species $tags[$i]\n" if $prinkter == 1;
					$currentcontigchrs[$i]=$1;
					$currentcontigstarts[$i]=$2;
					$currentcontigends[$i]=$3;			
				}
				else {
					$currentcontigchrs[$i] = 0;
	#				print "no microsat clusters for $key2\n" if $prinkter == 1; next;	
				}
			}
		} # print "exists: @{$hasharr[$i]{$key2}}[0]\n"}
		
		my @sequences = ();
		for (0 ... $#tags){
			my $seq = <BO>;
	#		print $seq;
			chomp $seq;
			push(@sequences , " ".$seq);
		}
		
		next if scalar @currentcontigchrs == 0;
		
	#	print "contigchrs= @currentcontigchrs \n" if $prinkter == 1;
		my %visitedcontigs=();
	
		for my $i (0 ... $#tags){
			#1 check if there exists adjacent alignment block wrt coordinates of this species.
			next if $currentcontigchrs[$i] eq "0"; #1 this means that there are no microsats in this species in this alignment block.. 
													#2	no need to worry about proximity of anything in adjacent block!
			@clusters=@clusterscopy;
			#1 BELOW, the following is really to calclate the distance between the end coordinate of the 
			#2 cluster and the end of the gap-free sequence of each species. this is so that if an
			#3 adjacent alignment block is found lateron, the exact distance between the potentially
			#4 adjacent microsat clusters can be found here. the exact start coordinate will be used
			#5 immediately below.
			my $firstclusterstart = $contigclustersFirstStartOnly{$key2};
			my $lastclusterend = $contigclustersLastEndOnly{$key2};
					
			my $key3 = $currentcontigchrs[$i]."_".($currentcontigstarts[$i]);
#			print  "check if exists $key3 in  contigends for $i\n" if $prinkter == 1; 
			
			if (exists($contigends[$i]{$key3}) && !exists $visitedcontigs{$contigends[$i]{$key3}}){
				$visitedcontigs{$contigends[$i]{$key3}} = $contigends[$i]{$key3}; #1 this array keeps track of adjacent contigs that we have already visited, thus saving computational time and potential redundancies#
	#			print "just checking the hash visitedcontigs: ",$visitedcontigs{$contigends[$i]{$key3}} ,"\n" if $prinkter == 1;
	
				#1 extract coordinates of the last cluster of this found alignment block
#				print "key of the found alignment block = ", $contigends[$i]{$key3},"\n" if $prinkter == 1;
	#			print "we are trying to mine: contigclustersAllLastEndLengthOnly_raw: $contigends[$i]{$key3}: $i \n" if $prinkter == 1;
	#			print "EXISTS\n" if exists $contigclusters{$contigends[$i]{$key3}} && $prinkter == 1;
	#			print "does NOT EXIST\n" if !exists $contigclusters{$contigends[$i]{$key3}} && $prinkter == 1;
				my @contigclustersAllFirstStartLengthOnly_raw=@{$contigclustersFirstStartLengthOnly{$key2}};
				my @contigclustersAllLastEndLengthOnly_raw=@{$contigclustersLastEndLengthOnly{$contigends[$i]{$key3}}};
				my @contigclustersAllFirstStartLengthOnly=(); my @contigclustersAllLastEndLengthOnly=();
				
				for my $val (0 ... $#contigclustersAllFirstStartLengthOnly_raw){
	#				print "val = $val\n" if $prinkter == 1;
					if (defined $contigclustersAllFirstStartLengthOnly_raw[$val]){
						push(@contigclustersAllFirstStartLengthOnly, $contigclustersAllFirstStartLengthOnly_raw[$val]) if $contigclustersAllFirstStartLengthOnly_raw[$val] =~ /[0-9]+/;
					}
				}
	#			print "-----\n" if $prinkter == 1;
				for my $val (0 ... $#contigclustersAllLastEndLengthOnly_raw){
	#				print "val = $val\n" if $prinkter == 1;
					if (defined $contigclustersAllLastEndLengthOnly_raw[$val]){
						push(@contigclustersAllLastEndLengthOnly, $contigclustersAllLastEndLengthOnly_raw[$val]) if  $contigclustersAllLastEndLengthOnly_raw[$val] =~ /[0-9]+/;
					}
				}
				
				
	#			print "our two arrays are: starts = <@contigclustersAllFirstStartLengthOnly> ......... and ends = <@contigclustersAllLastEndLengthOnly>\n" if $prinkter == 1;
	#			print "the last cluster's end in that one is: ",smallest_number(@contigclustersAllFirstStartLengthOnly) + smallest_number(@contigclustersAllLastEndLengthOnly)," = ", smallest_number(@contigclustersAllFirstStartLengthOnly)," + ",smallest_number(@contigclustersAllLastEndLengthOnly),"\n" if $prinkter == 1; 
				
	#			if ($contigclustersFirstStartLengthOnly{$key2}[$i] + $contigclustersLastEndLengthOnly{$contigends[$i]{$key3}}[$i] < 50){
				if (smallest_number(@contigclustersAllFirstStartLengthOnly) + smallest_number(@contigclustersAllLastEndLengthOnly) < $CLUSTER_DIST){
					my @regurgitate = @{$contigclusters{$contigends[$i]{$key3}}}; 
					$regurgitate[$#regurgitate]=~s/\n//g;
					$regurgitate[$#regurgitate] = $regurgitate[$#regurgitate]."\t".shift(@clusters);
					delete $contigclusters{$contigends[$i]{$key3}};
					$contigclusters{$contigends[$i]{$key3}}=[ @regurgitate ];
					delete $contigclusters{$key2};
					$contigclusters{$key2}= [ @clusters ] if scalar(@clusters) >0;
					$contigclusters{$key2}= [ "" ] if scalar(@clusters) ==0;
					
					if (scalar(@clusters) < 1){
				#		print "$key2-> $clusteringpaths{$key2} in the loners\n" if exists $foundkeys{$key2};
						$clusteringpaths{$key2}=$contigends[$i]{$key3};	
						$clusteringpathsRev{$contigends[$i]{$key3}}=$key2;				
						print OUTP "$contigends[$i]{$key3} -> $clusteringpathsRev{$contigends[$i]{$key3}}\n";
	#					print " clusteringpaths $key2 -> $contigends[$i]{$key3}\n";
						$founkeys_enteredcount-- if exists $foundkeys{$key2};
						$existing_removed++  if exists $foundkeys{$key2};
#						print "$key2->",@{$contigclusters{$key2}},"->>$foundkeys{$key2}\n" if exists $foundkeys{$key2} && $prinkter == 1;
						delete $foundkeys{$key2} if exists $foundkeys{$key2};
						$complete_transfered++;
					}
					else{
						print OUTP "$key2-> 0 not so lonely\n"  if !exists $clusteringpathsRev{$key2};
						$clusteringpaths{$key2}=$key2  if !exists $clusteringpaths{$key2};
						$clusteringpathsRev{$key2}=0 if !exists $clusteringpathsRev{$key2};
						
						$founkeys_enteredcount++ if !exists $foundkeys{$key2};
						$foundkeys{$key2} = $key2 if !exists $foundkeys{$key2};
	#					print "adding foundkeys entry $foundkeys{$key2}\n";
						$transfered++;				
					}
					#$contigclusters{$key2}=[ @contigcluster ];
				}
			}
			else{
	#					print "adjacent block with species $tags[$i] does not exist\n" if $prinkter == 1; 
						$plain_transfered++;
						print OUTP "$key2-> 0 , going straight\n"  if exists $contigclusters{$key2} && !exists $clusteringpathsRev{$key2};
						$clusteringpaths{$key2}=$key2 if exists $contigclusters{$key2} && !exists $clusteringpaths{$key2};
						$clusteringpathsRev{$key2}=0  if exists $contigclusters{$key2} && !exists $clusteringpathsRev{$key2};
						$founkeys_enteredcount++ if !exists $foundkeys{$key2} && exists $contigclusters{$key2};
						$foundkeys{$key2} = $key2 if !exists $foundkeys{$key2} && exists $contigclusters{$key2};
	#					print "adding foundkeys entry $foundkeys{$key2}\n";
				
			}
			$totalcount++;
	
		}
		
		
	}
	close BO;
	#close (NORTH);
	#///////////////////////////////////////////////////////////////////////////////////////
	#///////////////////////////////////////////////////////////////////////////////////////
	#///////////////////////////////////////////////////////////////////////////////////////
	#///////////////////////////////////////////////////////////////////////////////////////
	
	my $founkeys_count=();
	my $nopath_count=();
	my $pathed_count=0; 
	foreach my $key2 (keys %foundkeys){
		#print "x" x 60, "\n";
#		print "x" if $dotcounter % 100 ==0;
#		print "\n" if $dotcounter % 5000 ==0;
		$founkeys_count++;
		my $key = $key2;
#		print "$key2 -> $clusteringpaths{$key2}\n" if $prinkter == 1;
		if ($clusteringpaths{$key} eq $key){
#			print "printing hit the alignment block immediately... no path needed\n" if $prinkter == 1;
			$nopath_count++;
			delete $foundkeys{$key2};
			print ORTH join ("\n",@{$contigclusters{$key2}}),"\n";
		}
		else{
			my @pool=();
			my $key3=();
			$pathed_count++;
#			print "going reverse... clusteringpathsRev, $key = $clusteringpathsRev{$key}\n" if exists $clusteringpathsRev{$key} && $prinkter == 1;
#			print "going reverse... clusteringpathsRev  $key does not exist\n" if !exists $clusteringpathsRev{$key} && $prinkter == 1;
			if ($clusteringpathsRev{$key} eq "0") {
 				next;
			}
			else{
				my $yek3 = $clusteringpathsRev{$key};
				my $yek = $key;
#				print "caught in the middle of a path, now goin down from $yek to $yek3, which is $clusteringpathsRev{$key} \n" if $prinkter == 1;
				while ($yek3 ne "0"){
#					print "$yek->$yek3," if $prinkter == 1;
					$yek = $yek3;
					$yek3 = $clusteringpathsRev{$yek};
				}
#				print "\nfinally reached the end of path: $yek3, and the next in line is $yek, and its up-route is  $clusteringpaths{$yek}\n" if $prinkter == 1; 
				$key3 = $clusteringpaths{$yek};
				$key = $yek;
			}
		
#			print "now that we are at bottom of the path, lets start climbing up again\n" if $prinkter == 1;
			
			while($key ne $key3){
#				print "KEEY $key->$key3\n" if $prinkter == 1;
#				print "our contigcluster = @{$contigclusters{$key}}\n----------\n" if $prinkter == 1;
				
				if (scalar(@{$contigclusters{$key}}) > 0) {push @pool, @{$contigclusters{$key}}; print "now pool = @pool\n" if $prinkter == 1;}
				delete $foundkeys{$key3};
				$key=$key3;
				$key3=$clusteringpaths{$key};
			}
#			print "\nfinally, adding the first element of path: @{$contigclusters{$key}}\n AND printing the contents:\n" if $prinkter == 1;
			my @firstcontig= @{$contigclusters{$key}};
			delete $foundkeys{$key2} if exists $foundkeys{$key2} ;
			delete $foundkeys{$key} if exists $foundkeys{$key};
	
			unshift @pool, pop @firstcontig;
#			print join("\t",@pool),"\n" if $prinkter == 1;
			print ORTH join ("\n",@firstcontig),"\n" if scalar(@firstcontig) > 0;
			print ORTH join ("\t",@pool),"\n";		
		#	join();
		}
	
	}
	#close (NORTH);
#	print "founkeys_entered =$founkeys_enteredcount, plain_transfered=$plain_transfered,existing_removed=$existing_removed,founkeys_count =$founkeys_count, nopath_count =$nopath_count, transfered = $transfered, complete_transfered = $complete_transfered, totalcount = $totalcount, pathed=$pathed_count\n" if $prinkter == 1;
	close (BO);
	close (ORTH);
	close (OUTP);
	return 1;
	
}
sub stringPainter{
	my @string  = split(/_/,$_[0]);
#	print $_[0], " <- in stringPainter\n";
#	print $_[1], " <- in clusters\n";
	
	my @clusters = split(/,/, $_[1]);
	for my $i (0 ... $#clusters){
		my $cluster = $clusters[$i];
#		print "cluster = $cluster\n";
		my @parts = split(/\./,$cluster);
		my @cord = split(/:|-/,shift(@parts));
		my $minstart = $cord[1];
		my $maxend = $cord[2];
#		print "minstart = $minstart , maxend = $maxend\n";
		
		for my $j (0 ... $#parts){
#			print "oing thri $parts[$j]\n";
			my @cord = split(/:|-/,$parts[$j]);
			$minstart = $cord[1] if $cord[1] < $minstart;
			$maxend = $cord[2] if $cord[2] > $maxend;
		}
#		print "minstart = $minstart , maxend = $maxend\n";
		for my $pos ($minstart ... $maxend){ $string[$pos] = $string[$pos].",".$cluster;}
				
		
	}
#	print "@string <-done from function stringPainter\n";
	return join("_",@string);
}

sub findClusters{
	my $continue = 0;
	my @mapped_clusters = ();	
	my $clusterdist = $_[1];
	my $previous = 'x';
	my @localcluster = ();
	my $cluster_starts = ();
	my $cluster_ends = ();
	my $localcluster_start = ();
	my $localcluster_end = ();
	my @record_cluster = ();
	my @string = split(/\!/, $_[0]);
	my $zerolength=0;
	
	for my $pos_pos (1 ... $#string){
			my $pos = $string[$pos_pos];
#			print $pos, "\n";
			if ($continue == 0 && $pos eq "x") {next;}
			
			if ($continue == 1 && $pos eq "x" && $zerolength <= $clusterdist){ 
				if ($zerolength == 0) {$localcluster_end = $pos_pos-1};
				$zerolength++; 
				$continue = 1; 
			}

			if ($continue == 1 && $pos eq "x" && $zerolength > $clusterdist) { 
				$zerolength = 0; 
				$continue = 0; 
				my %seen;
				my @uniqed = grep !$seen{$_}++, @localcluster;
#				print "caught cluster : @uniqed \n";
				push(@mapped_clusters, [@uniqed]);
#				print "clustered:\n@uniqed\n";
				@localcluster = ();
				@record_cluster = ();
				
			}
			
			if ($pos ne "x"){
				$zerolength = 0;
				$continue = 1;
				$pos =~ s/x,//g;
				my @entries = split(/,/,$pos);
				$localcluster_end = 0;
				$localcluster_start = 0;
				push(@record_cluster,$pos);
			
				if ($continue == 0){
					@localcluster = ();
					@localcluster = (@localcluster, @entries);
					$localcluster_start = $pos_pos;
				}
			
				if ($continue == 1 ) {
					@localcluster = (@localcluster, @entries);
				}
			}
	}
	
	if (scalar(@localcluster) > 0){
		my %seen;
		my @uniqed = grep !$seen{$_}++, @localcluster;
	#	print "caught cluster : @uniqed \n";
		push(@mapped_clusters, [@uniqed]);
	#	print "clustered:\n@uniqed\n";
		@localcluster = ();
		@record_cluster = ();
	}

	my @returner = ();
	
	foreach my $clust (@mapped_clusters){
		my @localclust = @$clust;
		my @result = ();
		foreach my $clustparts (@localclust){
			push(@result,$clustparts);
		}
		push(@returner , join(".",@result));
	}	
#	print "returnig: ", join(",",@returner), "\n";
	return join(",",@returner);
}
#xxxxxxxxxxxxxx multiSpecies_orthFinder4 xxxxxxxxxxxxxx  multiSpecies_orthFinder4 xxxxxxxxxxxxxx  multiSpecies_orthFinder4 xxxxxxxxxxxxxx