changeset 0:4616cc3476d4 draft

Uploaded
author bitlab
date Sat, 15 Dec 2018 18:06:48 -0500
parents
children fc3f2aefe244
files chromeister/README.md chromeister/bin/CHROMEISTER chromeister/bin/allVsAll.sh chromeister/bin/allVsAll_incremental.sh chromeister/bin/compute_score.R chromeister/bin/generate-one-score.sh chromeister/bin/index_chromeister.sh chromeister/bin/index_chromeister_solo.sh chromeister/bin/make-cluster.R chromeister/bin/make-mean.sh chromeister/bin/plot.R chromeister/bin/plot_diags.R chromeister/bin/recompute_scores.sh chromeister/bin/run_and_plot_chromeister.sh chromeister/chromeister.xml chromeister/src/CHROMEISTER.c chromeister/src/Makefile chromeister/src/alignmentFunctions.c chromeister/src/alignmentFunctions.h chromeister/src/combine_reads.c chromeister/src/commonFunctions.c chromeister/src/commonFunctions.h chromeister/src/reverseComplement.c chromeister/src/structs.h
diffstat 24 files changed, 2940 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/README.md	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,36 @@
+# chromeister
+An ultra fast, heuristic approach to detect conserved signals in extremely large pairwise genome comparisons.
+
+## Requirements
+
+GCC compiler (any version that is not completely outdated should do) and the R-base package (default R installation should do) with no extra packages.
+Simply download the .zip and unzip it, or clone the repository (currently the active branch is "histo-kmer-ultra" DO NOT USE ANY OTHER).
+Then issue the following commands:
+
+cd chromeister/src && make all
+
+You are ready to go!
+
+## Use
+
+There are several ways in which CHROMEISTER can be used. The simplest one is to run a 1-vs-1 comparison and then compute the score and the plot.
+To do so, use the binaries at the bin folder:
+
+CHROMEISTER -query seqX -db seqY -out dotplot.mat && Rscript compute_score.R dotplot.mat
+
+This will generate the comparison matrix, the plot of the comparison with the automatic scoring and the guides to be used in an exhaustive GECKO comparison.
+
+More to be added soon!
+
+
+
+
+
+
+
+
+
+
+
+
+
Binary file chromeister/bin/CHROMEISTER has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/bin/allVsAll.sh	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+DIR=$1
+EXT=$2
+DIM=$3
+KMER=$4
+
+array=()
+x=0
+
+if [ $# != 4 ]; then
+	echo "***ERROR*** Use: $0 genomesDirectory extension dim kmer"
+	exit -1
+fi
+
+indexnameA=$(basename "$DIR")
+
+BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+for elem in $(ls -d $DIR/*.$EXT | awk -F "/" '{print $NF}' | awk -F ".$EXT" '{print $1}')
+do
+	array[$x]=$elem
+	x=`expr $x + 1`
+	#echo "X: $elem"
+done
+
+for ((i=0 ; i < ${#array[@]} ; i++))
+do
+	for ((j=i ; j < ${#array[@]} ; j++))
+	do
+		if [ $i != $j ]; then
+				seqX=${array[$i]}
+				seqY=${array[$j]}
+				echo "----------${seqX}-${seqY}-----------"
+
+				
+				if [[ ! -f ${seqX}.$EXT-${seqY}.$EXT.mat ]]; then
+					
+					$BINDIR/run_and_plot_chromeister.sh $DIR/${seqX}.$EXT $DIR/${seqY}.$EXT $KMER $DIM
+					#Rscript $BINDIR/compute_score.R $seqX.$EXT-$seqY.$EXT.mat $DIM > $seqX.$EXT-$seqY.$EXT.scr.txt
+				fi
+			
+		fi
+	done
+done
+
+# generate index
+if [[ ! -f index.csv.temp ]] && [ ! -f index-$indexnameA.csv  ]; then
+	
+	echo "Launching... $BINDIR/index_chromeister_solo.sh index-$indexnameA.csv $DIR $DIR"
+	$BINDIR/index_chromeister_solo.sh . index-$indexnameA.csv $DIR $DIR
+fi
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/bin/allVsAll_incremental.sh	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+DIR=$1
+DIR2=$2
+EXT=$3
+DIM=$4
+KMER=$5
+
+
+
+if [ $# != 5 ]; then
+	echo "***ERROR*** Use: $0 genomesDirectory1 genomesDirectory2 extension dim kmer"
+	exit -1
+fi
+
+indexnameA=$(basename "$DIR")
+indexnameB=$(basename "$DIR2")
+
+BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+
+array=()
+x=0
+array2=()
+
+for elem in $(ls -d $DIR/*.$EXT | awk -F "/" '{print $NF}' | awk -F ".$EXT" '{print $1}')
+do
+	array[$x]=$elem
+	x=`expr $x + 1`
+	#echo "X: $elem"
+done
+
+x=0
+
+for elem in $(ls -d $DIR2/*.$EXT | awk -F "/" '{print $NF}' | awk -F ".$EXT" '{print $1}')
+do
+	array2[$x]=$elem
+	x=`expr $x + 1`
+	#echo "X: $elem"
+done
+
+for ((i=0 ; i < ${#array[@]} ; i++))
+do
+	for ((j=0 ; j < ${#array2[@]} ; j++))
+	do
+				seqX=${array[$i]}
+				seqY=${array2[$j]}
+				echo "----------${seqX}-${seqY}-----------"
+
+				
+				#echo "$BINDIR/run_and_plot_chromeister.sh $DIR/${seqX}.$EXT $DIR/${seqY}.$EXT 30 10000"
+				if [[ ! -f ${seqX}.$EXT-${seqY}.$EXT.mat ]]; then
+					$BINDIR/run_and_plot_chromeister.sh $DIR/${seqX}.$EXT $DIR2/${seqY}.$EXT $KMER $DIM
+				fi
+			
+	done
+done
+
+
+# generate index
+if [[ ! -f index.csv.temp ]] && [ ! -f index-$indexnameA-$indexnameB.csv  ]; then
+	
+	echo "Launching... $BINDIR/index_chromeister_solo.sh . index-$indexnameA-$indexnameB.csv $DIR $DIR2"
+	$BINDIR/index_chromeister_solo.sh . index-$indexnameA-$indexnameB.csv $DIR $DIR2
+fi
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/bin/compute_score.R	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,478 @@
+
+#!/usr/bin/env Rscript
+args = commandArgs(trailingOnly=TRUE)
+
+if(length(args) < 2){
+  stop("USE: Rscript --vanilla plot.R <matrix> <matsize>")
+}
+
+
+
+
+
+
+growing_regions <- function(mat, reward = 6, penalty = 5, sidePenalty = 3, MAXHSPS = 500, TH = 10, WSIZE = 7){
+
+  #write(mat, file ="data.txt", ncolumns = 200)
+  
+  len <- min(length(mat[1,]), length(mat[,1]))
+  HSPS <- matrix(0, nrow=MAXHSPS, ncol=5)
+  idx <- 1
+  lH <- round(WSIZE/2) - 1
+  rH <- round(WSIZE/2) + 1
+  
+  
+  #print(paste("lH: ", lH, "rH: ", rH))
+  
+  if(WSIZE %% 2 == 0){
+    print("WSIZE MUST BE ODD")
+    stop()
+  }
+  
+  
+  
+  i <- 1
+  #readline(prompt="Press [enter] to continue")
+  while(i < len){
+
+    value <- max(mat[i,]) * reward
+    if(value == 0) i <- i + 1
+    pos <- which.max(mat[i,])
+    # these two hold ending frag
+    endfrag <- pos
+    j <- i
+    count_penalties <- 1
+
+    #print("-----------------")
+
+    while(value > 0 && j < len){
+
+      #print(paste("took values ", j, endfrag, "which have score of ", mat[j, endfrag], "current value is: ", value))
+      
+      
+      
+
+      # Reset position used
+      
+      mat[max(1,j-1), max(1,endfrag-2)] <- 0
+      mat[max(1,j-1), max(1,endfrag-1)] <- 0
+      mat[max(1,j-1), endfrag] <- 0
+      mat[max(1,j-1), min(len, endfrag+1)] <- 0
+      mat[max(1,j-1), min(len, endfrag+2)] <- 0
+      
+      mat[j, max(1,endfrag-2)] <- 0
+      mat[j, max(1,endfrag-1)] <- 0
+      mat[j, endfrag] <- 0
+      mat[j, min(len, endfrag+1)] <- 0
+      mat[j, min(len, endfrag+2)] <- 0
+      
+      #print(paste("Erasing: (",max(1,j-1), max(1,endfrag-2),")(",max(1,j-1), max(1,endfrag-1),")(",max(1,j-1), endfrag,
+      #            ")(",max(1,j-1), min(len, endfrag+1),")(",max(1,j-1), min(len, endfrag+2),")(",j, max(1,endfrag-2),
+      #            ")(",j, max(1,endfrag-1),")(",j, endfrag,")(",j, min(len, endfrag+1),")(",j, min(len, endfrag+2),")"))
+      
+      
+      # Go for next
+      j <- j + 1
+      # Check next, could be reverse or forward
+      mleft <- max(1, endfrag-lH)
+      mright <- min(len, endfrag+lH)
+      window <- mat[j, mleft:mright]
+      
+
+      #print(paste("mleft: ", mleft, "mright", mright, "j is: ", j))
+      #print(window)
+
+      v <- max(window)
+      selected <- which.max(window)
+      # Make it rather go diagonally
+      #print(paste("WIndow len: ", length(window)))
+      chose_diagonal <- FALSE
+      if(length(window) == WSIZE && v == window[lH]){
+        selected <- lH
+        chose_diagonal <- TRUE
+      }
+      
+      if(length(window) == WSIZE && v == window[rH]){
+        selected <- rH
+        chose_diagonal <- TRUE
+      }
+      
+      #print(paste("Selected value be like ", selected))
+      
+
+      if(v != 0){
+        
+        
+        
+        endfrag <- (mleft + selected ) # To make the indexing
+        if(length(window) == WSIZE) endfrag <- endfrag - 1
+        endfrag <- max(1, min(len, endfrag))
+        #print(paste("\t", " endfragnew = ", endfrag, "max of window: ", max(window), "on position", which.max(window)))
+      }
+
+      #print(paste("Chose diagonal is ", chose_diagonal))
+      
+      # If no similarity is found
+      if(v == 0){
+        value <- value - count_penalties * penalty
+        count_penalties <- count_penalties + 1
+        #print("Got penalty #########")
+        
+      }else{
+        
+        # Similarity is found
+        
+        if(!chose_diagonal){
+          value <- value + count_penalties * (-sidePenalty)
+          count_penalties <- count_penalties + 1
+          #print("Got SIDE penalty @@@ #########")
+        }else{
+          count_penalties <- 1
+          value <- value + reward
+          #print("Got reward thou #########")
+        }
+        
+      }
+      #readline(prompt="Press [enter] to continue")
+
+    }
+    # Check len of frag
+    if(j-i > TH){
+      # HSPS[idx, 1] <- i
+      # HSPS[idx, 2] <- pos
+      # HSPS[idx, 3] <- j
+      # HSPS[idx, 4] <- endfrag
+      # HSPS[idx, 5] <- abs(i-j)
+      
+      HSPS[idx, 1] <- pos
+      HSPS[idx, 2] <- i
+      HSPS[idx, 3] <- endfrag
+      HSPS[idx, 4] <- j
+      HSPS[idx, 5] <- abs(i-j)
+      
+      
+      
+      #print(paste("ACCEPT", i, pos, j, endfrag, "x-y", j-i, abs(endfrag-pos), sep = " "))
+      idx <- idx + 1
+    }else{
+      #print("REJECT")
+    }
+    # This will prevent overlappign lines, I think
+    #i <- j
+    
+    if(idx == MAXHSPS) break
+  }
+  
+  
+  
+  
+  return (HSPS)
+}
+
+detect_events <- function(HSPS, sampling){
+  
+  DIAG_SEPARATION <- 10
+  # same as HSPS but adding the event
+  output <- matrix(0, nrow=length(HSPS[,1]), ncol=1+length(HSPS[1,]))
+  colnames(output) <- c("x1", "y1", "x2", "y2", "len", "event")
+  j <- 0
+  for(i in 1:(length(HSPS[,1]))){
+    
+    if(sum(HSPS[i,]) > 0){
+      j <- j + 1
+      is_inverted = FALSE
+      is_diagonal = TRUE
+      
+      if(HSPS[i,1] > HSPS[i,3]) is_inverted = TRUE
+      if(abs(HSPS[i,1] - HSPS[i,2]) > DIAG_SEPARATION && abs(HSPS[i,3] - HSPS[i,4]) > DIAG_SEPARATION) is_diagonal = FALSE
+      
+      output[i,1] <- HSPS[i,1] * sampling
+      output[i,2] <- HSPS[i,2] * sampling
+      output[i,3] <- HSPS[i,3] * sampling
+      output[i,4] <- HSPS[i,4] * sampling
+      output[i,5] <- HSPS[i,5] * sampling
+      
+      if(is_diagonal) output[i,6] <- "synteny block"
+      if(is_diagonal && is_inverted) output[i,6] <- "inversion"
+      if(!is_diagonal && !is_inverted) output[i,6] <- "transposition"
+      if(!is_diagonal && is_inverted) output[i,6] <-"inverted transposition"
+    }
+    
+    
+  }
+  
+  return (output[1:j,])
+}
+
+
+
+paint_frags <- function(HSPS, l, sampling){
+  
+  plot(c(HSPS[1,1]*sampling, HSPS[1,3]*sampling), c(HSPS[1,2]*sampling, HSPS[1,4]*sampling), xlim = c(1,l*sampling), ylim = c(1,l*sampling), type="l", xlab="X-genome",ylab="Y-genome")
+  for(i in 2:length(HSPS[,1])){
+    if(sum(HSPS[i,]) > 0){
+      lines(c(HSPS[i,1]*sampling, HSPS[i,3]*sampling), c(HSPS[i,2]*sampling, HSPS[i,4]*sampling))
+    }
+  }
+}
+
+supersample <- function(mat, upscale){
+  l <- min(length(mat[1,]), length(mat[,1]))
+  size <- round(l*upscale)
+  m <- matrix(0, nrow=size, ncol=size)
+  
+  for(i in 1:size){
+    for(j in 1:size){
+      mleft <- max(1, i-1)
+      mright <- min(l, i+1)
+      mup <- max(1, j-1)
+      mdown <- min(l, j+1)
+      
+      ri <- max(1, i/upscale)
+      rj <- max(1, j/upscale)
+      
+      if(mat[ri, rj] > 0){
+        m[i, j] <- 1
+      }
+    }
+  }
+  return (m)
+}
+
+
+downsample <- function(mat, downscale){
+  l <- min(length(mat[1,]), length(mat[,1]))
+  size <- round(l/downscale)
+  m <- matrix(0, nrow=size, ncol=size)
+  
+  for(i in 1:l){
+    for(j in 1:l){
+      mup <- max(1, i-1)
+      mdown <- min(l, i+1)
+      mleft <- max(1, j-1)
+      mright <- min(l, j+1)
+      
+      #print(paste("Matrix at ", i, j))
+      #print(mat[mup:mdown, mleft:mright])
+      
+      if(sum(mat[mup:mdown, mleft:mright]) > 0){
+        #print(paste("goes to ", round(i/downscale), round(j/downscale)))
+        m[round(i/downscale), round(j/downscale)] <- 1
+      }
+    }
+  }
+  
+  return (m)
+}
+
+
+
+
+
+
+
+path_mat = args[1]
+
+
+fancy_name <- strsplit(path_mat, "/")
+fancy_name <- (fancy_name[[1]])[length(fancy_name[[1]])]
+
+# Read sequence lenghts
+con <- file(path_mat,"r")
+seq_lengths <- readLines(con, n=2)
+seq_x_len <- as.numeric(seq_lengths[1])
+seq_y_len <- as.numeric(seq_lengths[2])
+close(con)
+
+
+data <- as.matrix(read.csv(path_mat, sep = " ", skip=2))
+
+# Get sequence names
+name_x <- strsplit(fancy_name, "-")[[1]][1]
+name_y <- strsplit(fancy_name, "-")[[1]][2]
+
+# Max of columns
+
+len_i <- as.numeric(args[2])
+len_j <- as.numeric(args[2])
+
+
+score_density <- data
+aux_density <- data
+
+
+pmax_pos <- which.max(aux_density[,1])
+for(i in 1:len_i){
+  
+  cmax_pos <- which.max(aux_density[i,]) # get max of row
+  
+  if((aux_density[i,cmax_pos]) > 0){ # if it has value
+    #row_from_col <- which.max(aux_density[,cmax_pos]) # get max of the column pointed by maximum of row
+    score_density[i,] <- 0 # put all others in row to 0 i.e. only use this max, EXCEPT for the maximum in the column
+    #score_density[row_from_col, cmax_pos] <- 1
+    score_density[i,cmax_pos] <- 1
+    pmax_pos <- cmax_pos
+  }
+}
+
+
+pmax_pos <- which.max(aux_density[1,])
+
+
+for(i in 1:len_i){
+
+  cmax_pos <- which.max(aux_density[,i]) # get max of column
+
+  if((aux_density[cmax_pos,i]) > 0){
+    score_density[,i] <- 0
+    score_density[cmax_pos,i] <- 1
+    pmax_pos <- cmax_pos
+  }
+}
+
+
+
+
+
+
+
+
+
+score_copy <- score_density
+diag_len <- 4
+
+for(i in 6:(len_i-5)){
+  for(j in 6:(len_j-5)){
+    
+    value <- 0
+    for(w in (-diag_len/2):(diag_len/2)){
+      if(score_density[i+w,j+w] > 0){
+        value <- value + 1
+      }
+    }
+    
+    if(value >= diag_len){
+      for(k in 1:5){
+        score_copy[i+k,j+k] <- 1
+      }
+      for(k in 1:5){
+        score_copy[i-k,j-k] <- 1
+      }
+    }else if(score_copy[i,j]==0){
+      score_copy[i,j] <- 0
+    }
+  }
+}
+
+for(i in 6:(len_i-5)){
+  for(j in 6:(len_j-5)){
+    
+    value <- 0
+    for(w in (-diag_len/2):(diag_len/2)){
+      if(score_density[i-w,j+w] > 0){
+        value <- value + 1
+      }
+    }
+    
+    if(value >= diag_len){
+      for(k in 1:5){
+        score_copy[i-k,j+k] <- 1
+      }
+      for(k in 1:5){
+        score_copy[i+k,j-k] <- 1
+      }
+    }else if(score_copy[i,j]==0){
+      score_copy[i,j] <- 0
+    }
+  }
+}
+
+
+# Kernel to remove single points
+
+for(i in 1:(length(score_copy[,1]))){
+  for(j in 1:(length(score_copy[1,]))){
+    
+    value <- 0
+    
+    min_i <- max(1, i-1)
+    max_i <- min(len_i, i+1)
+    min_j <- max(1, j-1)
+    max_j <- min(len_j, j+1)
+    
+    value <- sum(score_copy[min_i:max_i, min_j:max_j])
+    
+    if(value < 2) score_copy[i,j] <- 0
+  }
+}
+
+# To compute the score
+score <- 0
+pmax_pos <- which.max(score_copy[,1])
+dist_th <- 1.5
+besti <- 1
+bestj <- pmax_pos
+dvec1 <- abs(which.max(score_copy[,2]) - which.max(score_copy[,1]))
+dvec2 <- abs(which.max(score_copy[,3]) - which.max(score_copy[,2]))
+dvec3 <- abs(which.max(score_copy[,4]) - which.max(score_copy[,3]))
+for(i in 5:len_i){
+  
+  #print(paste(paste(paste(dvec1, dvec2), dvec3), mean(c(dvec1, dvec2, dvec3))))
+  distance <- mean(c(dvec1, dvec2, dvec3))
+
+  
+  
+  dvec1 <- dvec2
+  dvec2 <- dvec3
+  dvec3 <- abs(which.max(score_copy[,i]) - which.max(score_copy[,i-1]))
+  
+  # If there is a 0 or we are too far away just add max distance!
+  if(distance > dist_th || distance == 0){
+    score <- score + len_i
+  }
+  
+}
+
+
+score <- (score/(len_i^2))
+print(score)
+
+
+sampling_value <- 5
+submat <- downsample(score_copy, sampling_value)
+m <- growing_regions((submat), WSIZE = 7, TH = 5, penalty = 15)
+events <- detect_events(m, sampling_value)
+events <- rbind(events, c(0,0,0,0,0,"Null event"))
+write(as.character(c(seq_x_len, seq_y_len)), file=paste(path_mat,".events.txt", sep=""), append = TRUE, sep =",", ncolumns=2)
+write(as.character(c("x1", "y1", "x2", "y2", "len", "event")), file=paste(path_mat,".events.txt", sep=""), append = TRUE, sep =",", ncolumns=6)
+for(i in 1:length(events[,1])){
+  write(as.character(events[i,]), file=paste(path_mat,".events.txt", sep=""), append = TRUE, sep =",", ncolumns=6)  
+}
+
+
+
+coords1 <- round(seq(from=0, to=1, by=0.2)*seq_x_len)
+coords2 <- round(seq(from=0, to=1, by=0.2)*seq_y_len)
+
+final_image <- apply((t(score_copy)), 2, rev)
+
+png(paste(path_mat, ".filt.png", sep=""), width = length(data[,1]), height = length(data[,1]))
+image(t(final_image), col = grey(seq(1, 0, length = 256)), xaxt='n', yaxt='n', main = paste(fancy_name, paste("filt. score=", score)), xlab = name_x, ylab = name_y, axes = FALSE)
+axis(1, tick = TRUE, labels = (coords1), at = c(0.0, 0.2, 0.4, 0.6, 0.8, 1))
+axis(2, tick = TRUE, labels = rev(coords2), at = c(0.0, 0.2, 0.4, 0.6, 0.8, 1))
+dev.off()
+
+
+# Output pixel coordinates of highly conserved signals
+
+# To clear it in case it existed
+write(c(paste("X", "Y")), file = paste("hits-XY-", paste(fancy_name, ".hits", sep=""), sep=""), append = FALSE, sep = "\n")
+
+
+for(i in 1:(length(score_copy[,1]))){
+  for(j in 1:(length(score_copy[1,]))){
+    if(score_copy[i,j] != 0){
+      write(c(paste(i, j)), file = paste("hits-XY-", paste(fancy_name, ".hits", sep=""), sep=""), append = TRUE, sep = "\n")
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/bin/generate-one-score.sh	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,181 @@
+#!/usr/bin/env bash
+CSV=$1
+TH=$2
+
+if [ $# -ne 2 ]; then
+   echo " ==== ERROR ... you called this script inappropriately."
+   echo ""
+   echo "   usage:  $0 <index.csv> <threshold>"
+   echo ""
+   exit -1
+fi
+
+
+BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# get first genome in list (they are sorted)
+currgenome=$(tail -n +2 "$CSV" | head -1 | awk -F "," '{print $6}')
+
+# fill array of chromosomes similarity
+
+array=()
+arraytosort=()
+names=()
+homologies=()
+condition=0
+othergencounter=0
+# for problems with chromo X and Y
+highest=1 
+# For all lines
+
+cat $CSV | tail -n +2 > $1.temp
+
+while IFS= read -r i
+do
+
+	othergenome=$(echo "$i" | awk -F "," '{print $6}')
+	if [ "$condition" -eq 0 ]; then
+		currgenome=$othergenome
+		condition=1
+	fi
+	
+	if [ "$othergenome" != "$currgenome" ]; then
+	
+		# Sort the array with temporal values
+		#printf '%s\n' "${arraytosort[@]}"
+		#echo "name is $currgenome"
+		
+		sorted=($(printf '%s\n' "${arraytosort[@]}"|sort))
+		
+		#echo "For chroomo $currgenome we have "
+		#echo $(printf '%s,' "${sorted[@]}")
+		# accumulate sum until threshold is reached
+		usedValues=1
+		usedValuesNext=2
+		first=${sorted[0]}
+		next=${sorted[${usedValues}]}
+		nextofnext=${sorted[${usedValuesNext}]}
+		finalvalue=$first
+		divisor=0
+		currdiff=$(LC_NUMERIC=POSIX awk -v a="$next" -v b="$nextofnext" 'BEGIN {print b-a }')
+		TH=$(printf '%4.6f' $TH)
+		#echo "$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("comp %f > %f = %d",a,b,a>b)} ')"
+		condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d",a>b)} ')
+		#echo "first $first next $next result $currdiff condition $condition divisor $divisor th $TH finalvalue $finalvalue"
+		while [ $condition -eq 1 -a $usedValuesNext -lt ${#sorted[@]} ];
+		do
+			usedValues=`expr $usedValues + 1`
+			usedValuesNext=`expr $usedValuesNext + 1`
+			finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$next" 'BEGIN {print (a+b)}')
+			next=${sorted[${usedValues}]}
+			nextofnext=${sorted[${usedValuesNext}]}
+			
+			currdiff=$(LC_NUMERIC=POSIX awk -v a="$nextofnext" -v b="$next" 'BEGIN {printf("%f", b-a) }')
+			condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d", a>b)} ')
+			divisor=$(LC_NUMERIC=POSIX awk -v a="$divisor" 'BEGIN {print a+0.1}')
+			
+		done
+
+		#echo "so this is what we got $finalvalue, when divided using $divisor"
+		
+		# array holds the results
+		#array[$highest]=$(awk -v a="$currsum" -v b="$othergencounter" 'BEGIN {print a/b}')
+		finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$usedValues" -v c="$divisor" 'BEGIN {printf("%f", a/(b-c))}')
+		array[$highest]=$finalvalue
+		homologies[$highest]=$usedValues
+		
+		highest=`expr $highest + 1`
+		condition=0
+		names+=($currgenome)
+		othergencounter=0
+		unset arraytosort
+		
+		
+		
+		
+		getvalue=$(echo "$i" | awk -F "," '{print $8}')
+		# Copy value to array 
+		arraytosort[$othergencounter]=$getvalue
+		#currsum=$(awk -v a="$currsum" -v b="$getvalue" 'BEGIN {print a=a+(1-b); exit}')
+		othergencounter=`expr $othergencounter + 1`
+	else
+		getvalue=$(echo "$i" | awk -F "," '{print $8}')
+		# Copy value to array 
+		arraytosort[$othergencounter]=$getvalue
+		#currsum=$(awk -v a="$currsum" -v b="$getvalue" 'BEGIN {print a=a+(1-b); exit}')
+		othergencounter=`expr $othergencounter + 1`
+
+	fi
+
+done < "$1.temp"
+
+# do the last!!!
+#if [ "$lastprint" == "$currgenome" ]; then
+# Sort the array with temporal values
+sorted=($(printf '%s\n' "${arraytosort[@]}"|sort))
+
+usedValues=1
+usedValuesNext=2
+first=${sorted[0]}
+next=${sorted[${usedValues}]}
+nextofnext=${sorted[${usedValuesNext}]}
+finalvalue=$first
+divisor=0
+currdiff=$(LC_NUMERIC=POSIX awk -v a="$next" -v b="$nextofnext" 'BEGIN {print b-a }')
+TH=$(printf '%4.6f' $TH)
+#echo "$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("comp %f > %f = %d",a,b,a>b)} ')"
+condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d",a>b)} ')
+#echo "first $first next $next result $currdiff condition $condition divisor $divisor th $TH finalvalue $finalvalue"
+while [ $condition -eq 1 -a $usedValuesNext -lt ${#sorted[@]} ];
+do
+        usedValues=`expr $usedValues + 1`
+        usedValuesNext=`expr $usedValuesNext + 1`
+        finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$next" 'BEGIN {print (a+b)}')
+        next=${sorted[${usedValues}]}
+        nextofnext=${sorted[${usedValuesNext}]}
+
+        currdiff=$(LC_NUMERIC=POSIX awk -v a="$nextofnext" -v b="$next" 'BEGIN {printf("%f", b-a) }')
+        condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d", a>b)} ')
+        divisor=$(LC_NUMERIC=POSIX awk -v a="$divisor" 'BEGIN {print a+0.1}')
+
+done
+
+#echo "so this is what we got $finalvalue, when divided using $divisor"
+
+# array holds the results
+#array[$highest]=$(awk -v a="$currsum" -v b="$othergencounter" 'BEGIN {print a/b}')
+finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$usedValues" -v c="$divisor" 'BEGIN {printf("%f", a/(b-c))}')
+array[$highest]=$finalvalue
+homologies[$highest]=$usedValues
+
+
+highest=`expr $highest + 1`
+currsum=0
+names+=($currgenome)	
+currgenome=$othergenome
+othergencounter=0
+#fi
+
+
+highest=`expr $highest - 1`
+rm $1.temp
+
+tsum=0
+echo "deleteme" > $1.inter
+rm $1.inter
+aux=0
+for ((i = 1; i <= highest; i++)); do
+	echo "${names[${aux}]} ${array[${i}]} ${homologies[${i}]}" >> $1.inter
+	#echo "${names[${aux}]} ${array[${i}]} ${homologies[${i}]}"
+	aux=`expr $aux + 1`
+	#val=${array[${i}]}
+	#tsum=$(awk -v a="$tsum" -v b="$val" '{print a=a+b}')
+done
+
+#awk -v a="$tsum" b="$highest" '{print a/b}'
+
+#sumfirst=$(awk -F "," 'BEGIN{suma=0}{suma = suma + $8}END{print suma}' "$CSV")
+#echo "$sumfirst"
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/bin/index_chromeister.sh	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+DIR=$1
+FASTAS1=$3
+FASTAS2=$4
+OUT=$2
+
+echo "Computing index CSV..." > index.csv.temp
+
+while [ "$(find . -size 0 | wc -l)" -ne 0 ]; do
+        sleep 10s
+done
+
+
+EXT="mat"
+EXTSCORE="scr.txt"
+EXTGENERAL=".fa.fasta"
+
+
+
+if [ $# != 4 ]; then
+	echo "***ERROR*** Use: $0 <directory> <out> <fastas_directory_1> <fastas_directory_2>"
+	exit -1
+fi
+
+rm $DIR/*.log
+
+for i in $DIR/*.scr; do mv $i $i.txt; done
+
+rm $OUT
+
+for elem in $(ls -d $DIR/*.$EXT | awk -F "/" '{print $NF}' | awk -F ".$EXT" '{print $1}')
+do
+	IFS='-', read -a splits <<< "$elem"
+	IFS='.', read -a getnum <<< "$elem"
+
+	scorepath=$(basename $elem .mat).$EXTSCORE
+
+	sed -i "/X.*/d" $DIR/$scorepath
+	sed -i "s/\[1\]//g" $DIR/$scorepath
+
+	score=$( head -1  $DIR/$scorepath)
+
+	file1=${splits[0]}
+	file2=$(basename ${splits[1]} .mat)
+
+	ID1=$(head -1 $FASTAS1/$file1)
+	ID2=$(head -1 $FASTAS2/$file2)
+
+	
+	#scorepath="$(basename $elem .mat).$EXTSCORE"
+	#score="$(head -1 $DIR/$scorepath)"
+
+	counter=0
+	numX=0
+	numY=0
+	for i in "${getnum[@]}" 
+	do
+		counter=`expr $counter + 1`
+		if [ "$numX" -eq 0 ] && [ "$i" == "chromosome" ]; then
+			numX=$counter
+			continue
+		fi
+		if [ "$numX" -ne 0 ] && [ "$i" == "chromosome" ]; then
+			numY=$counter
+		fi
+
+	done
+
+
+	echo "$(basename ${splits[0]} $EXTGENERAL),$(basename ${splits[1]} ${EXTGENERAL}.mat),$ID1,$ID2,$elem.$EXT.filt.png,${getnum[${numX}]},${getnum[${numY}]},$score" >> $OUT
+
+done
+
+sort -k5,5n -k6,6n -o $OUT $OUT 
+
+
+sed -i '1iSpX, SpY, IDX, IDY, IMG, CHNumberX, CHNumberY, Score' $OUT
+
+rm index.csv.temp
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/bin/index_chromeister_solo.sh	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+DIR=$1
+FASTAS1=$3
+FASTAS2=$4
+OUT=$2
+
+echo "Computing index CSV..." > index.csv.temp
+
+while [ "$(find . -size 0 | wc -l)" -ne 0 ]; do
+        sleep 10s
+done
+
+
+EXT="mat"
+EXTSCORE="scr.txt"
+EXTGENERAL=".fasta"
+
+# data is like
+# HOMSA.Chr.10.fasta
+
+
+
+if [ $# != 4 ]; then
+	echo "***ERROR*** Use: $0 <directory> <out> <fastas_directory_1> <fastas_directory_2>"
+	exit -1
+fi
+
+
+rm $OUT
+
+for elem in $(ls -d $DIR/*.$EXT | awk -F "/" '{print $NF}' | awk -F ".$EXT" '{print $1}')
+do
+
+	IFS='-', read -a splits <<< "$elem"   # yields MUSMU.Chr.8.fasta and  MUSMU.Chr.Y.fasta
+	IFS='.', read -a getnum <<< "$elem"   # yields MUSMU  Chr  8  fasta  and MUSMU Chr Y fasta
+
+	scorepath=$(basename $elem .mat).$EXTSCORE
+
+	sed -i "/X.*/d" $DIR/$scorepath
+	sed -i "s/\[1\]//g" $DIR/$scorepath
+
+	score=$( head -1  $DIR/$scorepath)
+	len1=$( head -1 $DIR/$elem.mat.events.txt | awk -F="," '{print $1}')
+	len2=$( head -1 $DIR/$elem.mat.events.txt | awk -F="," '{print $2}')
+
+	
+
+	file1=${splits[0]}
+	file2=$(basename ${splits[1]} .mat)
+
+	ID1=$(head -1 $FASTAS1/$file1)
+	ID2=$(head -1 $FASTAS2/$file2)
+
+	
+	#scorepath="$(basename $elem .mat).$EXTSCORE"
+	#score="$(head -1 $DIR/$scorepath)"
+
+	counter=0
+	numX=${getnum[2]}
+	numY=${getnum[5]}
+
+
+
+
+	echo "$(basename ${splits[0]} $EXTGENERAL),$(basename ${splits[1]} ${EXTGENERAL}),$ID1,$ID2,$elem.$EXT.filt.png,$numX,$numY,$score,$len1 $len2" >> $OUT
+
+done
+
+sort -k5,5n -k6,6n -o $OUT $OUT 
+
+
+sed -i '1iSpX, SpY, IDX, IDY, IMG, CHNumberX, CHNumberY, Score, LengthX, LengthY' $OUT
+
+rm index.csv.temp
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/bin/make-cluster.R	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,54 @@
+#!/usr/bin/env Rscript
+library(ape)
+
+args = commandArgs(trailingOnly=TRUE)
+
+if(length(args) < 1){
+  stop("USE: Rscript --vanilla make-cluster.R <csv>")
+}
+
+
+path = args[1]
+
+inputcsv <- read.csv(path, sep ="-", header=FALSE)
+
+# Make indexing table
+
+indexing_table <- table(inputcsv[,1])
+n_species <- length(indexing_table)
+
+for(i in 1:n_species){
+  indexing_table[i] <- i
+}
+
+
+distmat <- matrix(NA, nrow=n_species, ncol=n_species)
+rownames(distmat) <- rownames(indexing_table)
+colnames(distmat) <- rownames(indexing_table)
+
+for(i in 1:n_species){
+  distmat[i,i] <- 0
+}
+
+
+for(i in 1:length(inputcsv[,1])){
+  redirX <- (as.character(inputcsv[i,1]))
+  redirY <- (as.character(inputcsv[i,2]))
+  
+  distmat[indexing_table[redirX],indexing_table[redirY]] <- as.numeric(inputcsv[i,3])
+  distmat[indexing_table[redirY],indexing_table[redirX]] <- as.numeric(inputcsv[i,3])
+}
+
+cluster <- hclust(dist(distmat), method = "average")
+
+# Rooted hierarchical cluster
+#plot(cluster, main = "Clustering based on automatic scoring", xlab = "Organisms")
+
+# Unrooted
+plot(as.phylo(cluster), type = "unrooted", cex = 0.6, main = "Unrooted clustering based on automatic")
+
+# Fan cluster
+#plot(as.phylo(cluster), type = "fan")
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/bin/make-mean.sh	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+# Use in folder prior to all folders!
+
+BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+for i in *; do
+
+	
+	$BINDIR/generate-one-score.sh $i/index-$i.csv 0.2
+        value=$(awk '{sum+=$2} END { print sum/NR}' $i/index-$i.csv.inter)
+        echo "$i-$value"
+
+
+done
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/bin/plot.R	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,17 @@
+#!/usr/bin/env Rscript
+args = commandArgs(trailingOnly=TRUE)
+
+if(length(args) < 1){
+  stop("USE: Rscript --vanilla plot.R <matrix>")
+}
+
+
+path_mat = args[1]
+
+
+data <- read.csv(path_mat, header = FALSE, sep = " ", skip=1)
+
+
+png(paste(path_mat, ".png", sep=""), width = length(data[,1]), height = length(data[,1]))
+image(t(as.matrix(data)), col = grey(seq(1, 0, length = 256)), xaxt='n', yaxt='n')
+dev.off()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/bin/plot_diags.R	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,179 @@
+#!/usr/bin/env Rscript
+args = commandArgs(trailingOnly=TRUE)
+
+if(length(args) < 1){
+  stop("USE: Rscript --vanilla plot.R <matrix>")
+}
+
+
+path_mat = args[1]
+
+
+
+
+data <- as.matrix(read.csv(path_mat, sep = " "))
+percentage <- 0.1
+
+
+
+
+d <- col(data) - row(data)
+groups <- split(data, d)
+acum <- c()
+for(i in 1:length(groups)){
+  acum <- c(acum, sum(unlist(groups[i])))
+}
+
+
+# reverse
+data_r <- data
+for(i in 1:length(data_r[,1])){
+  data_r[i,] <- rev(data_r[i,])
+}
+d <- col(data_r) - row(data_r)
+groups <- split(data_r, d)
+acum_r <- c()
+for(i in 1:length(groups)){
+  acum_r <- c(acum, sum(unlist(groups[i]))/length(unlist(groups[i])) )
+}
+
+indexes <- matrix(0, nrow=length(acum), ncol = 3)
+indexes[,1] <- c(1:length(acum))
+indexes[,2] <- acum
+
+#sort
+indexes[order(indexes[,2], decreasing=TRUE),]
+
+#crop by percentage
+n_percent <- percentage * length(acum)
+
+for(i in 1:n_percent){
+  indexes[i,3] <- 1
+}
+
+# Now resort based on first column to have them again in order
+indexes[order(indexes[,1], decreasing=FALSE),]
+
+
+# same in reverse
+#put into indexes to sort
+indexes_r <- matrix(0, nrow=length(acum_r), ncol = 3)
+indexes_r[,1] <- c(1:length(acum_r))
+indexes_r[,2] <- acum_r
+
+#sort
+indexes_r[order(indexes_r[,2], decreasing=TRUE),]
+
+#crop by percentage
+n_percent <- percentage * length(acum_r)
+
+for(i in 1:n_percent){
+  indexes_r[i,3] <- 1
+}
+
+# Now resort based on first column to have them again in order
+indexes_r[order(indexes_r[,1], decreasing=FALSE),]
+
+
+
+
+finalmat <- matrix(0, nrow=length(data[,1]), ncol=length(data[1,]))
+
+
+for(i in length(data[1,]):1){
+  x <- i
+  y <- 1
+
+  if(indexes[i,3] == 1){
+    while(x < length(data[1,]) && y <= length(data[,1])){
+      
+      
+      if(x < length(data[1,]) && y <= length(data[,1])){
+        finalmat[x,y] <- 1  
+      }
+      
+      y <- y + 1
+      x <- x + 1
+      
+      
+    }  
+  }
+  
+}
+
+for(i in 2:length(data[,1])){
+  x <- 1
+  y <- i
+  
+  if(indexes[i,3] == 1){
+    
+    while(x < length(data[1,]) && y <= length(data[,1])){
+      
+      if(x < length(data[1,]) && y <= length(data[,1])){
+        finalmat[x,y] <- 1
+      }
+      y <- y + 1
+      x <- x + 1
+      
+    }  
+  }
+}
+
+
+#same for reverse
+for(i in length(data_r[1,]):1){
+  x <- i
+  y <- 1
+  
+  if(indexes[i,3] == 1){
+    while(x < length(data_r[1,]) && y <= length(data_r[,1])){
+      
+      
+      if(x < length(data_r[1,]) && y <= length(data_r[,1])){
+        finalmat[x,length(data_r[,1])-y] <- 1  
+      }
+      
+      y <- y + 1
+      x <- x + 1
+      
+      
+    }  
+  }
+  
+}
+
+for(i in 2:length(data_r[,1])){
+  x <- 1
+  y <- i
+  
+  if(indexes[i,3] == 1){
+    
+    while(x < length(data_r[1,]) && y <= length(data_r[,1])){
+      
+      if(x < length(data_r[1,]) && y <= length(data_r[,1])){
+        finalmat[x,length(data_r[,1])-y] <- 1
+      }
+      y <- y + 1
+      x <- x + 1
+      
+    }  
+  }
+}
+
+# fully write last
+
+for(i in 1:length(data[,1])){
+  for(j in 1:length(data[1,])){
+    if(finalmat[i,j] == 0){
+      
+      data[i,j] <- 0
+    }
+  }
+}
+
+png(paste(path_mat, ".png", sep=""), width = length(data[,1]), height = length(data[,1]))
+image((data), col = grey(seq(1, 0, length = 256)), xaxt='n', yaxt='n')
+dev.off()
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/bin/recompute_scores.sh	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+
+if [ $# -ne 1 ]; then
+   echo " ==== ERROR ... you called this script inappropriately."
+   echo ""
+   echo "   usage:  $0 <#threads>"
+   echo ""
+   exit -1
+fi
+
+BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+THREADS=$1
+
+
+for p in */;
+do
+
+	echo "Entering $p"
+	cd $p
+
+
+
+	thepaths=()
+	n=0
+	# Grab the routes
+	for i in *.mat ;
+	do
+
+	        thepaths[$n]=$i
+        	n=`expr $n + 1`
+
+	done
+
+	i=0
+	aux=0
+	for ((i=0; i < $n ; i+=$THREADS))
+	do
+		aux=$i
+		for ((j=0; j<$THREADS; j++))
+		do
+			if [ "$aux" -lt "$n" ]; then
+				echo "Recomputing ${thepaths[$aux]}"
+				goodpath=${thepaths[$aux]%.mat}
+				Rscript $BINDIR/compute_score.R ${thepaths[$aux]} > ${goodpath}.scr.txt &
+				aux=`expr $aux + 1`
+			fi
+		done
+
+		for job in `jobs -p`
+		do
+		    #echo $job
+		    wait $job
+		done
+		
+	done
+
+
+	echo "Exiting $p"
+	cd ..
+
+
+done
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/bin/run_and_plot_chromeister.sh	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+G1=$1
+G2=$2
+KMER=$3
+DIM=$4
+
+
+FILE1=$(basename $G1)
+FILE2=$(basename $G2)
+
+BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+
+if [ $# -lt 4 ]; then
+        echo "***ERROR*** Use: $0 <G1> <G2> <KMER> <DIMENSION>"
+        exit -1
+fi
+
+
+
+(time $BINDIR/CHROMEISTER -query $G1 -db $G2 -kmer $KMER -out $FILE1-$FILE2.mat -dimension $DIM) &> $FILE1-$FILE2.log
+(Rscript $BINDIR/compute_score.R $FILE1-$FILE2.mat $DIM) &> $FILE1-$FILE2.scr.txt
+
+#$BINDIR/plot.R $FILE1-$FILE2.mat
+
+
+#rm $FILE1-$FILE2.mat
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/chromeister.xml	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,14 @@
+<tool id="chromeister" name="Chromeister">
+   <description>A heuristic approach for ultra fast previsualization of pairwise genome comparisons</description>
+   <inputs>
+      <param name="query" type="data" format="fasta" label="Query sequence" help="Query sequence file in fasta format" />
+      <param name="db" type="data" format="fasta" label="Reference sequence" help="Reference sequence file in fasta format" />
+      <param name="dimension" type="integer" value="1000" label="Dimension" help="Compression size" />
+   </inputs>
+   <command>echo "\$PWD"; cp $query ${query.name}; cp $db ${db.name}; (/home/galaxy-bitlab/galaxy/tools/chromeister/bin/CHROMEISTER -query ${query.name} -db ${db.name} -dimension $dimension -out ${query.name}-${db.name}.mat) &amp;>/dev/null ; rm ${query.name}; rm ${db.name}; Rscript /home/galaxy-bitlab/galaxy/tools/chromeister/bin/compute_score.R  ${query.name}-${db.name}.mat $dimension; mv ${query.name}-${db.name}.mat $output; mv ${query.name}-${db.name}.mat.filt.png $outputIMAGEN ; mv ${query.name}-${db.name}.mat.events.txt $outputEVENTS; rm hits-XY-${query.name}-${db.name}.mat.hits</command>
+  <outputs>
+      <data name="output" format="txt" label="Comparison matrix"/>
+      <data name="outputIMAGEN" format="png" label="Comparison dotplot"/>
+      <data name="outputEVENTS" format="txt" label="Detected events"/>
+  </outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/src/CHROMEISTER.c	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,651 @@
+/*********
+
+File        CHROMEISTER.c
+Author      EPW <estebanpw@uma.es>
+Description Computes hits and generates a dotplot
+
+USAGE       Usage is described by calling ./CHROMEISTER --help
+
+
+
+**********/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include <ctype.h>
+#include "structs.h"
+#include "alignmentFunctions.h"
+#include "commonFunctions.h"
+#define STARTING_SEQS 1000
+#define PIECE_OF_DB_REALLOC 3200000 //half a gigabyte if divided by 8 bytes
+#define RANGE 2
+
+uint64_t custom_kmer = 32; // Defined as external in structs.h
+uint64_t diffuse_z = 4; // Defined as external in structs.h
+
+uint64_t get_seq_len(FILE * f);
+
+
+void init_args(int argc, char ** av, FILE ** query, FILE ** database, FILE ** out_database, uint64_t * custom_kmer, uint64_t * dimension, uint64_t * diffuse_z);
+
+int main(int argc, char ** av){
+    
+
+    /*
+    //Store positions of kmers
+    uint64_t n_pools_used = 0;
+    //Mempool_l * mp = (Mempool_l *) malloc(MAX_MEM_POOLS*sizeof(Mempool_l));
+    //if(mp == NULL) terror("Could not allocate vectors for memory pools");
+    Mempool_l mp[MAX_MEM_POOLS];
+    init_mem_pool_llpos(&mp[n_pools_used]);
+    //llpos * aux;
+
+    uint64_t n_pools_used_AVL = 0;
+    Mempool_AVL mp_AVL[MAX_MEM_POOLS];
+    init_mem_pool_AVL(&mp_AVL[n_pools_used_AVL]);
+    */
+    Tuple_hits * thit;
+    
+    /*
+    AVLTree * root = NULL;
+    root = insert_AVLTree(root, 10, mp_AVL, &n_pools_used_AVL, 0, mp, &n_pools_used);
+    
+    llpos * some = find_AVLTree(root, 25);
+    while(some != NULL){
+        printf("#%"PRIu64", ", some->pos); some = some->next;
+    }
+    */
+
+    uint64_t i, j;
+
+    //query to read kmers from, database to find seeds
+    FILE * query = NULL, * database = NULL, * out_database = NULL;
+    uint64_t dimension = 1000; // Default 1000 * 1000
+    
+    
+    init_args(argc, av, &query, &database, &out_database, &custom_kmer, &dimension, &diffuse_z);
+
+
+
+    unsigned char char_converter[91];
+    char_converter[(unsigned char)'A'] = 0;
+    char_converter[(unsigned char)'C'] = 1;
+    char_converter[(unsigned char)'G'] = 2;
+    char_converter[(unsigned char)'T'] = 3;
+
+    
+    // Variables to account for positions
+    // Print info
+    fprintf(stdout, "[INFO] Loading database\n");
+    // Variables to read kmers
+    char c = 'N'; //Char to read character
+    // Current length of array and variables for the buffer
+    uint64_t idx = 0, r = 0;
+    
+    //Vector to read in batches
+    char * temp_seq_buffer = NULL;
+    if ((temp_seq_buffer = calloc(READBUF, sizeof(char))) == NULL) {
+        terror("Could not allocate memory for read buffer");
+    }
+
+    //Dimensional matrix
+    uint64_t ** representation = (uint64_t **) calloc(dimension+1, sizeof(uint64_t *));
+    if(representation == NULL) terror("Could not allocate representation");
+    for(i=0; i<dimension+1; i++){
+        representation[i] = (uint64_t *) calloc(dimension+1, sizeof(uint64_t));
+        if(representation[i] == NULL) terror("Could not allocate second loop representation");
+    }
+
+    /*
+    fseek(database, 0, SEEK_END);
+    uint64_t aprox_len_query = ftell(database);
+    uint64_t aprox_len_db = aprox_len_query;
+    rewind(database);
+    */
+    uint64_t aprox_len_query = get_seq_len(database);
+    uint64_t aprox_len_db = aprox_len_query;
+
+    uint64_t a_hundreth = (aprox_len_query/100);
+
+    unsigned char curr_kmer[custom_kmer], reverse_kmer[custom_kmer];
+    curr_kmer[0] = reverse_kmer[0] = '\0';
+    uint64_t word_size = 0, word_size_rev = 0;
+
+    //To hold all information related to database
+    uint64_t current_len = 0;
+    
+    //To force reading from the buffer
+    idx = READBUF + 1;
+
+    //unsigned char aux_kmer[custom_kmer+1];
+    
+    //Vector to store query seq
+    unsigned char * seq_vector_query = (unsigned char *) malloc(READBUF*sizeof(unsigned char));
+    if(seq_vector_query == NULL) terror("Could not allocate memory for query vector");
+
+    /*
+    Container * ct = (Container *) calloc(1, sizeof(Container));
+    if(ct == NULL) terror("Could not allocate container");    
+    */
+
+
+
+    Index * ctidx = (Index *) calloc(1, sizeof(Index));
+    if(ctidx == NULL) terror("Could not allocate container");
+    
+
+    //begin = clock();
+    
+
+    c = buffered_fgetc(temp_seq_buffer, &idx, &r, database);
+    while((!feof(database) || (feof(database) && idx < r))){
+
+        if(c == '>'){
+            
+
+
+            while(c != '\n') c = buffered_fgetc(temp_seq_buffer, &idx, &r, database);  //Skip ID
+                
+
+            while(c != '>' && (!feof(database) || (feof(database) && idx < r))){ //Until next id
+                c = buffered_fgetc(temp_seq_buffer, &idx, &r, database);
+                c = toupper(c);
+                if(c == 'A' || c == 'C' || c == 'G' || c == 'T'){
+                    curr_kmer[word_size] = (unsigned char) c;
+                    if(word_size < custom_kmer) ++word_size;
+                    ++current_len;
+                    if(current_len % a_hundreth == 0){ 
+                        fprintf(stdout, "\r%"PRIu64"%%...", 1+100*current_len/aprox_len_query); 
+                        //printf("%"PRIu64"%%..wasted: (%e) (%e)", 1+100*pos_in_query/aprox_len_query, (double)(wasted_cycles_forward)/CLOCKS_PER_SEC, (double)(wasted_cycles_reverse)/CLOCKS_PER_SEC); 
+                        fflush(stdout);
+                    }
+                    
+
+
+                }else{ //It can be anything (including N, Y, X ...)
+
+                    if(c != '\n' && c != '>'){
+                        word_size = 0;
+                        // data_database.sequences[pos_in_database++] = (unsigned char) 'N'; //Convert to N
+                        ++current_len;
+
+                    } 
+                }
+                //if(current_len % 1000000 == 0) printf(" curr len %" PRIu64"\n", current_len);
+                if(word_size == custom_kmer){
+                    //write to hash table
+                    
+
+                    thit = &ctidx->table[char_converter[curr_kmer[0]]][char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]]
+                    [char_converter[curr_kmer[3]]][char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]]
+                    [char_converter[curr_kmer[6]]][char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]]
+                    [char_converter[curr_kmer[9]]][char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]];
+                    
+                    /*
+                    typedef struct tuple_hits{
+                        int repetition;
+                        int hit_count;
+                        uint64_t key;
+                        uint64_t pos;
+                    } Tuple_hits;
+                    */
+
+                    if(thit->repetition == FALSE){
+                        // Then we can insert
+                        thit->hit_count = 0;
+                        thit->key = collisioned_hash(&curr_kmer[0], custom_kmer);
+                        thit->pos = current_len;
+                    }else{
+                        // Otherwise we break it
+                        thit->repetition = TRUE;
+                    }
+
+                    //thit->root = insert_AVLTree(thit->root, hashOfWord(&curr_kmer[0], custom_kmer, FIXED_K), mp_AVL, &n_pools_used_AVL, current_len, mp, &n_pools_used);
+                    //thit->root = insert_AVLTree(thit->root, collisioned_hash(&curr_kmer[0], custom_kmer), mp_AVL, &n_pools_used_AVL, current_len, mp, &n_pools_used);
+                    
+                    
+
+                    // Non overlapping
+                    word_size = 0;
+                    
+		
+		    // Overlapping
+                    //memmove(&curr_kmer[0], &curr_kmer[1], custom_kmer-1);
+                    //--word_size;
+                }
+            }
+            word_size = 0;
+            
+        }else{
+            c = buffered_fgetc(temp_seq_buffer, &idx, &r, database);    
+        }
+        
+    }
+
+
+    //end = clock();
+
+    // data_database.total_len = pos_in_database;
+
+    //fprintf(stdout, "[INFO] Database loaded and of length %"PRIu64". Hash table building took %e seconds\n", data_database.total_len, (double)(end-begin)/CLOCKS_PER_SEC);
+    fprintf(stdout, "[INFO] Database loaded and of length %"PRIu64".\n", current_len);
+    //close database
+    fclose(database);
+
+    
+    
+    //begin = clock();
+    
+
+    
+    
+    
+    double pixel_size_db = (double) dimension / (double) current_len;
+    double ratio_db = (double) current_len / dimension;
+
+
+    // Get file length
+    
+    //fseek(query, 0, SEEK_END);
+    //aprox_len_query = ftell(query);
+    //rewind(query);
+    aprox_len_query = get_seq_len(query);
+
+    //uint64_t reallocs_hash_holder_table = 1;
+    //uint64_t n_items_hash_holder_table = aprox_len_query / 5;
+
+    //Hash_holder * hash_holder_table = (Hash_holder *) calloc(n_items_hash_holder_table, sizeof(Hash_holder));
+    //if(hash_holder_table == NULL) terror("Could not allocate hash holding table");
+
+    a_hundreth = (aprox_len_query/100);
+    double pixel_size_query = (double) dimension / (double) aprox_len_query;
+    double ratio_query = (double) aprox_len_query / dimension;
+    
+
+    double i_r_fix = MAX(1.0, custom_kmer * pixel_size_query);
+    double j_r_fix = MAX(1.0, custom_kmer * pixel_size_db);
+
+    
+    
+    fprintf(stdout, "[INFO] Ratios: Q [%e] D [%e]. Lenghts: Q [%"PRIu64"] D [%"PRIu64"]\n", ratio_query, ratio_db, aprox_len_query, current_len);
+    fprintf(stdout, "[INFO] Pixel size: Q [%e] D [%e].\n", pixel_size_query, pixel_size_db);
+
+
+    fprintf(stdout, "[INFO] Computing absolute hit numbers.\n");
+
+
+    current_len = 0;
+
+    //llpos * the_original_hit;
+
+    //To force reading from the buffer
+    idx = READBUF + 1;
+    c = buffered_fgetc(temp_seq_buffer, &idx, &r, query);    
+    //uint64_t c_hash_holder = 0;
+    
+    while((!feof(query) || (feof(query) && idx < r))){
+
+        if(c == '>'){
+            word_size = 0;
+            word_size_rev = custom_kmer-1;
+            
+
+
+
+            while(c != '\n'){ c = buffered_fgetc(temp_seq_buffer, &idx, &r, query); } //Skip ID
+                
+
+            while(c != '>' && (!feof(query) || (feof(query) && idx < r))){ //Until next id
+                c = buffered_fgetc(temp_seq_buffer, &idx, &r, query);
+                c = toupper(c);
+                if(c == 'A' || c == 'C' || c == 'G' || c == 'T'){
+                    
+                    ++current_len;
+                    if(current_len % a_hundreth == 0){ 
+                        fprintf(stdout, "\r%"PRIu64"%%...", 1+100*current_len/aprox_len_query); 
+                        fflush(stdout);
+                    }
+                    curr_kmer[word_size] = (unsigned char) c;
+                    ++word_size;
+
+                    switch(c){
+                        case ('A'): reverse_kmer[word_size_rev] = (unsigned)'T';
+                        break;
+                        case ('C'): reverse_kmer[word_size_rev] = (unsigned)'G';
+                        break;
+                        case ('G'): reverse_kmer[word_size_rev] = (unsigned)'C';
+                        break;
+                        case ('T'): reverse_kmer[word_size_rev] = (unsigned)'A';
+                        break;
+                    }
+                    if(word_size_rev != 0) --word_size_rev;
+
+
+
+                    
+                    if(word_size == custom_kmer){
+
+
+                        //hash_forward = hashOfWord(&curr_kmer[0], custom_kmer, FIXED_K);
+                        //hash_reverse = hashOfWord(&reverse_kmer[0], custom_kmer, FIXED_K);
+                        uint64_t hash_forward, hash_reverse;
+                        hash_forward = collisioned_hash(&curr_kmer[0], custom_kmer);
+                        hash_reverse = collisioned_hash(&reverse_kmer[0], custom_kmer);
+                        
+
+                        thit = &ctidx->table[char_converter[curr_kmer[0]]][char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]]
+                        [char_converter[curr_kmer[3]]][char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]]
+                        [char_converter[curr_kmer[6]]][char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]]
+                        [char_converter[curr_kmer[9]]][char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]];
+
+                        //AVLTree * search = find_AVLTree(thit->root, hash_forward);
+
+                        if(thit->repetition == FALSE && hash_forward == thit->key){
+                            // Attention ::::: you were not removing the ones with count==1 earlier 
+                            thit->pos_in_y = current_len;
+                            thit->hit_count++;
+                        }
+
+                        thit = &ctidx->table[char_converter[reverse_kmer[0]]][char_converter[reverse_kmer[1]]][char_converter[reverse_kmer[2]]]
+                        [char_converter[reverse_kmer[3]]][char_converter[reverse_kmer[4]]][char_converter[reverse_kmer[5]]]
+                        [char_converter[reverse_kmer[6]]][char_converter[reverse_kmer[7]]][char_converter[reverse_kmer[8]]]
+                        [char_converter[reverse_kmer[9]]][char_converter[reverse_kmer[10]]][char_converter[reverse_kmer[11]]];
+
+                        if(thit->repetition == FALSE && hash_reverse == thit->key){
+                            // Attention ::::: you were not removing the ones with count==1 earlier 
+                            thit->pos_in_y = current_len;
+                            thit->hit_count++;
+                        }
+
+                        /*
+                        if(search != NULL && search->count == 1){ //If count is two, then it is a rep
+                            thit->hit_count += search->count;
+                            
+                            hash_holder_table[c_hash_holder].pos = current_len;
+                            hash_holder_table[c_hash_holder].node = search;
+                            hash_holder_table[c_hash_holder].th = thit;
+                            ++c_hash_holder;
+                            if(c_hash_holder == n_items_hash_holder_table*reallocs_hash_holder_table){
+                                ++reallocs_hash_holder_table;
+                                hash_holder_table = (Hash_holder *) realloc(hash_holder_table, n_items_hash_holder_table*reallocs_hash_holder_table*sizeof(Hash_holder));
+                                if(hash_holder_table == NULL) terror("Could not realloc hash holder table");
+                            }
+                        }
+                        */
+
+                        
+
+                        
+
+                        //search = find_AVLTree(thit->root, hash_reverse);
+                        /*
+                        if(search != NULL && search->count == 1){ //If count is two, then it is a rep
+                            
+                            thit->hit_count += search->count;
+                            hash_holder_table[c_hash_holder].pos = current_len;
+                            hash_holder_table[c_hash_holder].node = search;
+                            hash_holder_table[c_hash_holder].th = thit;
+                            ++c_hash_holder;
+                            if(c_hash_holder == n_items_hash_holder_table*reallocs_hash_holder_table){
+                                ++reallocs_hash_holder_table;
+                                hash_holder_table = (Hash_holder *) realloc(hash_holder_table, n_items_hash_holder_table*reallocs_hash_holder_table*sizeof(Hash_holder));
+                                if(hash_holder_table == NULL) terror("Could not realloc hash holder table");
+                            }
+                        }
+                        */
+
+                        // Overlapping
+                        
+                        memmove(&curr_kmer[0], &curr_kmer[1], custom_kmer-1);
+                        memmove(&reverse_kmer[1], &reverse_kmer[0], custom_kmer-1);
+                        --word_size;
+
+                        // Non overlapping
+                        //word_size = 0;
+                        //word_size_rev = custom_kmer-1;
+                    }
+                }else{
+                    if(c != '\n' && c != '>'){
+                        word_size = 0;
+                        word_size_rev = custom_kmer-1;
+                        ++current_len;
+                    }
+                }
+            }
+        }else{
+            c = buffered_fgetc(temp_seq_buffer, &idx, &r, query);    
+        }
+        
+    } 
+
+    /// Out
+
+    fprintf(stdout, "Scanning hits table.\n");
+
+    a_hundreth = MAX(1, TOTAL_ENTRIES/100);
+    uint64_t t_computed = 0;
+    uint64_t w0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,w11;
+    for(w0=0;w0<4;w0++){
+        for(w1=0;w1<4;w1++){
+            for(w2=0;w2<4;w2++){
+                for(w3=0;w3<4;w3++){
+                    for(w4=0;w4<4;w4++){
+                        for(w5=0;w5<4;w5++){
+                            for(w6=0;w6<4;w6++){
+                                for(w7=0;w7<4;w7++){
+                                    for(w8=0;w8<4;w8++){
+                                        for(w9=0;w9<4;w9++){
+                                            for(w10=0;w10<4;w10++){
+                                                for(w11=0;w11<4;w11++){
+
+                                                    if(t_computed % a_hundreth == 0){ 
+                                                        fprintf(stdout, "\r%"PRIu64"%%...", 1+100*t_computed/TOTAL_ENTRIES); 
+                                                        fflush(stdout);
+                                                    }
+                                                    ++t_computed;
+                                                    Tuple_hits * taux = &ctidx->table[w0][w1][w2][w3][w4][w5][w6][w7][w8][w9][w10][w11];
+                                                    if(taux->hit_count == 1){
+                                                        // We plot it   
+                                                        // Convert scale to representation
+                                                        uint64_t redir_db = (uint64_t) (taux->pos / (ratio_db));
+                                                        uint64_t redir_query = (uint64_t) (taux->pos_in_y / (ratio_query));
+                                                        double i_r = i_r_fix; double j_r = j_r_fix;
+                                                        while((uint64_t) i_r >= 1 && (uint64_t) j_r >= 1){
+                                                            if((int64_t) redir_query - (int64_t) i_r > 0 && (int64_t) redir_db - (int64_t) j_r > 0){
+                                                                representation[(int64_t) redir_query - (int64_t) i_r][(int64_t) redir_db - (int64_t) j_r]++;
+                                                            }else{
+                                                                representation[redir_query][redir_db]++;
+                                                                break;
+                                                            }
+                                                            i_r -= MIN(1.0, pixel_size_query);
+                                                            j_r -= MIN(1.0, pixel_size_db);
+                                                        }                                                     
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    
+
+    //double average_hit = ((double) total_hits / (double) table_size);
+    //average_hit = 2.2;
+
+    /*
+    //fprintf(stdout, "[INFO] Total hit count is %"PRIu64" on a size of %"PRIu64" Avg = %e.\n", total_hits, table_size, average_hit);
+    fprintf(stdout, "[INFO] Total hit count is %"PRIu64" on a size of %"PRIu64" E' = %e.\n", total_hits, table_size, Eprime);
+
+    
+
+    
+
+    a_hundreth = MAX(1,c_hash_holder/100);
+    
+    for(current_len = 0; current_len < c_hash_holder; current_len++){
+
+        if(current_len % a_hundreth == 0){ 
+            fprintf(stdout, "\r%"PRIu64"%%...", 1+100*current_len/c_hash_holder); 
+            fflush(stdout);
+        }
+
+        aux = hash_holder_table[current_len].node->next;
+
+        //if(hash_holder_table[current_len].th->hit_count < (uint64_t) average_hit){
+        if(hash_holder_table[current_len].th->hit_count < (uint64_t) Eprime){
+            while(aux != NULL){
+                // Convert scale to representation
+                uint64_t redir_db = (uint64_t) (aux->pos / (ratio_db));
+                uint64_t redir_query = (uint64_t) (hash_holder_table[current_len].pos / (ratio_query));
+                double i_r = i_r_fix; double j_r = j_r_fix;
+                while((uint64_t) i_r >= 1 && (uint64_t) j_r >= 1){
+                    if((int64_t) redir_query - (int64_t) i_r > 0 && (int64_t) redir_db - (int64_t) j_r > 0){
+                        representation[(int64_t) redir_query - (int64_t) i_r][(int64_t) redir_db - (int64_t) j_r]++;
+                    }else{
+                        representation[redir_query][redir_db]++;
+                        break;
+                    }
+                    i_r -= MIN(1.0, pixel_size_query);
+                    j_r -= MIN(1.0, pixel_size_db);
+                }
+                aux = aux->next;
+            }
+        }
+    }
+    */
+
+
+    //end = clock();
+
+    
+    
+
+    //fprintf(stdout, "\n[INFO] Query length %"PRIu64". Hits completed. Took %e seconds\n", data_query.total_len, (double)(end-begin)/CLOCKS_PER_SEC);
+    fprintf(stdout, "\n[INFO] Query length %"PRIu64".\n", current_len);
+
+    //begin = clock();
+
+    //reads_per_thread = (uint64_t) (floorl((long double) data_query.n_seqs / (long double) n_threads));
+    
+    fprintf(stdout, "[INFO] Writing matrix.\n");
+
+
+    uint64_t unique_diffuse = 0;
+	fprintf(out_database, "%"PRIu64"\n", aprox_len_query);
+    fprintf(out_database, "%"PRIu64"\n", aprox_len_db);
+     // And replace 2's with 1's 
+	
+    for(i=0; i<dimension+1; i++){
+        for(j=0; j<dimension; j++){
+            fprintf(out_database, "%"PRIu64" ", representation[i][j]);
+	    unique_diffuse += representation[i][j];
+        }
+        fprintf(out_database, "%"PRIu64"\n",  representation[i][dimension]);
+	unique_diffuse += representation[i][dimension];
+    }
+
+    fprintf(stdout, "[INFO] Found %"PRIu64" unique hits for z = %"PRIu64".\n", unique_diffuse, diffuse_z);
+    
+
+    
+    //free(ct->table);
+    //free(hash_holder_table);
+    /*
+    for(i=0;i<=n_pools_used_AVL;i++){
+        free(mp_AVL[i].base);
+    }
+
+    for(i=0;i<=n_pools_used;i++){
+        free(mp[i].base);
+    }
+    */
+    for(i=0;i<dimension;i++){
+        free(representation[i]);
+    }
+    free(representation);
+    if(out_database != NULL) fclose(out_database);
+    
+    return 0;
+}
+
+void init_args(int argc, char ** av, FILE ** query, FILE ** database, FILE ** out_database, uint64_t * custom_kmer, uint64_t * dimension, uint64_t * diffuse_z){
+
+    int pNum = 0;
+    while(pNum < argc){
+        if(strcmp(av[pNum], "--help") == 0){
+            fprintf(stdout, "USAGE:\n");
+            fprintf(stdout, "           CHROMEISTER -query [query] -db [database] -out [outfile]\n");
+            fprintf(stdout, "OPTIONAL:\n");
+            fprintf(stdout, "           -kmer       [Integer:   k>1 (default 32)]\n");
+	    fprintf(stdout, "		-diffuse    [Integer:   z>0 (default 4)]\n");
+            fprintf(stdout, "           -dimension  Size of the output [Integer:   d>0 (default 1000)]\n");
+            fprintf(stdout, "           -out        [File path]\n");
+            fprintf(stdout, "           --help      Shows help for program usage\n");
+            fprintf(stdout, "\n");
+            fprintf(stdout, "PLEASE NOTICE: The reverse complementary is calculated for the QUERY.\n");
+            exit(1);
+        }
+        if(strcmp(av[pNum], "-query") == 0){
+            *query = fopen64(av[pNum+1], "rt");
+            if(query==NULL) terror("Could not open query file");
+        }
+        if(strcmp(av[pNum], "-db") == 0){
+            *database = fopen64(av[pNum+1], "rt");
+            if(database==NULL) terror("Could not open database file");
+        }
+        if(strcmp(av[pNum], "-out") == 0){
+            *out_database = fopen(av[pNum+1], "wt");
+            if(out_database==NULL) terror("Could not open output database file");
+        }
+        if(strcmp(av[pNum], "-kmer") == 0){
+            *custom_kmer = (uint64_t) atoi(av[pNum+1]);
+            if(*custom_kmer < BYTES_IN_MER) terror("K-mer size must be larger than 4");
+            if(*custom_kmer % BYTES_IN_MER != 0) terror("K-mer size must be a multiple of 4");
+
+        }
+        if(strcmp(av[pNum], "-diffuse") == 0){
+            *diffuse_z = (uint64_t) atoi(av[pNum+1]);
+            if(*diffuse_z == 0 || *diffuse_z > 32) terror("Z must satisfy 0<z<=32");
+
+        }
+
+        if(strcmp(av[pNum], "-dimension") == 0){
+            *dimension = (uint64_t) atoi(av[pNum+1]);
+            if(*dimension < 1) terror("Dimension must be a positive integer");
+        }
+        
+        pNum++;
+    }
+    
+    if(*query==NULL || *database==NULL || *out_database==NULL) terror("A query, database and output file is required");
+}
+
+uint64_t get_seq_len(FILE * f) {
+    char c = '\0';
+    uint64_t l = 0;
+
+    while(!feof(f)){
+        c = getc(f);
+
+        if(c == '>'){
+            while(c != '\n') c = getc(f);
+        }
+        c = toupper(c);
+        if(c >= 'A' && c <= 'Z'){
+            ++l;
+        }
+    }
+
+
+    rewind(f);
+    return l;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/src/Makefile	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,14 @@
+CC=gcc
+CXX=g++
+CFLAGS=-O3 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -Wall #-DVERBOSE
+BIN=../bin
+
+all: CHROMEISTER
+
+
+
+CHROMEISTER: CHROMEISTER.c
+	$(CC) $(CFLAGS) alignmentFunctions.c -lm commonFunctions.c -lm CHROMEISTER.c -lm -o $(BIN)/CHROMEISTER
+
+clean:
+	rm -rf $(BIN)/CHROMEISTER
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/src/alignmentFunctions.c	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,266 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <inttypes.h>
+#include <math.h>
+#include <float.h>
+#include "structs.h"
+#include "alignmentFunctions.h"
+#include "commonFunctions.h"
+
+
+
+int64_t compare_letters(unsigned char a, unsigned char b){
+    if(a != (unsigned char) 'N' && a != (unsigned char) '>') return (a == b) ? POINT : -POINT;
+    return -POINT;
+}
+
+llpos * getNewLocationllpos(Mempool_l * mp, uint64_t * n_pools_used){
+
+    if(mp[*n_pools_used].current == POOL_SIZE){
+        *n_pools_used += 1;
+        if(*n_pools_used == MAX_MEM_POOLS) terror("Reached max pools");
+        init_mem_pool_llpos(&mp[*n_pools_used]);
+        
+    }
+
+    llpos * new_pos = mp[*n_pools_used].base + mp[*n_pools_used].current;
+    mp[*n_pools_used].current++;
+
+    
+    return new_pos;
+}
+
+void init_mem_pool_llpos(Mempool_l * mp){
+    mp->base = (llpos *) calloc(POOL_SIZE, sizeof(llpos));
+    if(mp->base == NULL) terror("Could not request memory pool");
+    mp->current = 0;
+}
+
+AVLTree * getNewLocationAVLTree(Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t key){
+
+    if(mp[*n_pools_used].current == POOL_SIZE){
+        *n_pools_used += 1;
+        if(*n_pools_used == MAX_MEM_POOLS) terror("Reached max pools");
+        init_mem_pool_AVL(&mp[*n_pools_used]);
+        
+    }
+
+    AVLTree * new_pos = mp[*n_pools_used].base + mp[*n_pools_used].current;
+    mp[*n_pools_used].current++;
+
+    new_pos->key = key;
+    new_pos->count = 1;
+    new_pos->height = 1;
+
+    return new_pos;
+}
+
+void init_mem_pool_AVL(Mempool_AVL * mp){
+    mp->base = (AVLTree *) calloc(POOL_SIZE, sizeof(AVLTree));
+    if(mp->base == NULL) terror("Could not request memory pool");
+    mp->current = 0;
+}
+
+
+
+/*
+// An AVL tree node
+typedef struct AVL_Node{
+    uint64_t key;
+    struct AVL_Node * left;
+    struct AVL_Node * right;
+    uint64_t height;
+    llpos * next;
+} AVLTree;
+*/
+ 
+// A utility function to get height of the tree
+
+uint64_t height(AVLTree * N){
+    if (N == NULL)
+        return 0;
+    return N->height;
+}
+
+/* Substituted by (x == NULL) ? (0) : (x->height) */
+ 
+/* Helper function that allocates a new node with the given key and
+    NULL left and right pointers. */
+
+/* This one is substituted by AVLTree * getNewLocationAVLTree(Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t key) */
+ 
+// A utility function to right rotate subtree rooted with y
+// See the diagram given above.
+AVLTree * right_rotate(AVLTree * y){
+    AVLTree * x = y->left;
+    AVLTree * T2 = x->right;
+ 
+    // Perform rotation
+    x->right = y;
+    y->left = T2;
+ 
+    // Update heights
+    //x->height = MAX((x == NULL) ? (0) : (x->left->height), (x == NULL) ? (0) : (x->right->height))+1;
+    //y->height = MAX((y == NULL) ? (0) : (y->left->height), (y == NULL) ? (0) : (y->right->height))+1;
+    // Update heights
+    y->height = MAX(height(y->left), height(y->right))+1;
+    x->height = MAX(height(x->left), height(x->right))+1;
+ 
+    // Return new root
+    return x;
+}
+ 
+// A utility function to left rotate subtree rooted with x
+// See the diagram given above.
+AVLTree * left_rotate(AVLTree * x){
+    AVLTree * y = x->right;
+    AVLTree * T2 = y->left;
+ 
+    // Perform rotation
+    y->left = x;
+    x->right = T2;
+ 
+    //  Update heights
+    //x->height = MAX((x == NULL) ? (0) : (x->left->height), (x == NULL) ? (0) : (x->right->height))+1;
+    //y->height = MAX((y == NULL) ? (0) : (y->left->height), (y == NULL) ? (0) : (y->right->height))+1;
+    x->height = MAX(height(x->left), height(x->right))+1;
+    y->height = MAX(height(y->left), height(y->right))+1;
+ 
+    // Return new root
+    return y;
+}
+ 
+// Get Balance factor of node N
+
+int64_t get_balance(AVLTree * N){
+    if (N == NULL)
+        return 0;
+    return height(N->left) - height(N->right);
+}
+
+/* Substituted by (node == NULL) ? (0) : ((int64_t) node->left->height - (int64_t) node->right->height) */
+
+AVLTree * find_AVLTree(AVLTree * node, uint64_t key){
+    AVLTree * found = NULL;
+    if(node == NULL) return NULL;
+
+    if (key < node->key) {
+        found = find_AVLTree(node->left, key);
+    } else if (key > node->key) {
+        found = find_AVLTree(node->right, key);
+    } else { 
+        return node;
+    }
+    return found;
+} 
+
+llpos * find_AVLTree_llpos(AVLTree * node, uint64_t key){
+    llpos * aux = NULL;
+    if(node == NULL) return NULL;
+
+    if (key < node->key) {
+        aux = find_AVLTree_llpos(node->left, key);
+    } else if (key > node->key) {
+        aux = find_AVLTree_llpos(node->right, key);
+    } else { 
+        return node->next;
+    }
+    return aux;
+}
+
+// Recursive function to insert key in subtree rooted
+// with node and returns new root of subtree.
+AVLTree * insert_AVLTree(AVLTree * node, uint64_t key, Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t pos, Mempool_l * mp_l, uint64_t * n_pools_used_l){
+    /* 1.  Perform the normal BST insertion */
+    if (node == NULL){
+        
+        AVLTree * n_node = getNewLocationAVLTree(mp, n_pools_used, key);
+        llpos * aux = getNewLocationllpos(mp_l, n_pools_used_l);
+        aux->pos = pos;
+        n_node->next = aux;
+        return n_node;
+    }
+ 
+    if (key < node->key) {
+        node->left  = insert_AVLTree(node->left, key, mp, n_pools_used, pos, mp_l, n_pools_used_l);
+    } else if (key > node->key) {
+        node->right = insert_AVLTree(node->right, key, mp, n_pools_used, pos, mp_l, n_pools_used_l);
+    } else { 
+        // Equal keys are inserted as a linked list
+        if(node->count == 1){ // DO NOT INSERT MORE THAN 2. 1 is good 2 is repetition
+            llpos * aux = getNewLocationllpos(mp_l, n_pools_used_l);
+            aux->pos = pos;
+            aux->next = node->next;
+            node->next = aux;
+            ++(node->count);
+        }
+        
+        return node;
+    }
+ 
+    /* 2. Update height of this ancestor node */
+    //node->height = 1 + MAX((node->left == NULL) ? (0) : (node->left->height), (node->right == NULL) ? (0) : (node->right->height));
+    node->height = 1 + MAX(height(node->left), height(node->right));
+ 
+    /* 3. Get the balance factor of this ancestor
+          node to check whether this node became
+          unbalanced */
+    //int64_t balance = (node->left == NULL || node->right == NULL) ? (0) : ((int64_t) node->left->height - (int64_t) node->right->height);
+    int64_t balance = get_balance(node);
+ 
+    // If this node becomes unbalanced, then
+    // there are 4 cases
+ 
+    // Left Left Case
+    if (balance > 1 && key < node->left->key)
+        return right_rotate(node);
+ 
+    // Right Right Case
+    if (balance < -1 && key > node->right->key)
+        return left_rotate(node);
+ 
+    // Left Right Case
+    if (balance > 1 && key > node->left->key)
+    {
+        node->left =  left_rotate(node->left);
+        return right_rotate(node);
+    }
+ 
+    // Right Left Case
+    if (balance < -1 && key < node->right->key)
+    {
+        node->right = right_rotate(node->right);
+        return left_rotate(node);
+    }
+ 
+    /* return the (unchanged) node pointer */
+    return node;
+}
+ 
+// A utility function to print preorder traversal
+// of the tree.
+// The function also prints height of every node
+
+void pre_order(AVLTree * root){
+    if(root != NULL){
+        printf("%"PRIu64" ", root->key);
+        llpos * aux = root->next;
+        while(aux != NULL){ printf("#%"PRIu64", ", aux->pos); aux = aux->next; }
+        pre_order(root->left);
+        pre_order(root->right);
+    }
+}
+
+
+uint64_t sum_of_all_tree(AVLTree * root){
+    uint64_t mysum = 0;
+    if(root != NULL){
+        
+        mysum = root->count;
+        mysum += sum_of_all_tree(root->left);
+        mysum += sum_of_all_tree(root->right);
+    }
+    return mysum;
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/src/alignmentFunctions.h	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,49 @@
+#define QF_LAMBDA 0.275
+#define QF_KARLIN 0.333
+
+typedef struct container{
+    llpos * table[4][4][4][4][4][4][4][4][4][4][4][4];
+} Container;
+
+typedef struct index{
+    Tuple_hits table[4][4][4][4][4][4][4][4][4][4][4][4];
+} Index;
+
+/*
+    Nucleotides matching function
+*/
+int64_t compare_letters(unsigned char a, unsigned char b);
+
+/**
+ * Initialize the memory pool to later retrieve individual memory addresses for llpos
+ * 
+ */
+void init_mem_pool_llpos(Mempool_l * mp);
+
+/**
+ * Get a new memory address from the pool mp for a type llpos
+ * 
+ */
+llpos * getNewLocationllpos(Mempool_l * mp, uint64_t * n_pools_used);
+
+
+
+AVLTree * getNewLocationAVLTree(Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t key);
+
+
+void init_mem_pool_AVL(Mempool_AVL * mp);
+
+
+AVLTree * right_rotate(AVLTree * y);
+
+AVLTree * left_rotate(AVLTree * x);
+
+AVLTree * find_AVLTree(AVLTree * node, uint64_t key);
+
+llpos * find_AVLTree_llpos(AVLTree * node, uint64_t key);
+
+AVLTree * insert_AVLTree(AVLTree * node, uint64_t key, Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t pos, Mempool_l * mp_l, uint64_t * n_pools_used_l);
+
+void pre_order(AVLTree * root);
+
+uint64_t sum_of_all_tree(AVLTree * root);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/src/combine_reads.c	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,82 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "structs.h"
+#include "commonFunctions.h"
+
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) <= (y)) ? (x) : (y))
+#define STARTING_SEQS 1000
+#define PIECE_OF_DB_REALLOC 3200000 //half a gigabyte if divided by 8 bytes
+#define MATVAL 101
+
+int main(int argc, char ** av){
+    
+    if(argc != 2) terror("USE:: combine_reads <file>");
+
+    FILE * results, * data;
+    data = fopen(av[1], "rt");
+    if(data == NULL) terror("Could not open input file");
+
+    uint64_t * mat = (uint64_t *) calloc(MATVAL, sizeof(uint64_t));
+    if(mat == NULL) terror("Could not allocate matrix array");
+
+    char buffer[MAXLID];
+    if ((results = fopen("accu.log", "r")) == NULL){
+        results = fopen("accu.log", "wt");
+    }else{
+        // Load the matrix
+        uint64_t i;
+        
+        for(i=0;i<100;i++){
+            if(0 == fgets(buffer, MAXLID, results)) terror("Missing number on load");
+            
+            //fprintf(stdout, "Have %s\n", buffer);
+            buffer[strlen(buffer)-1] = '\0';
+            mat[i] = asciiToUint64(buffer);
+            //fprintf(stdout, "%"PRIu64"\n", mat[i]);
+            //getchar();
+        }
+        fclose(results);
+        results = fopen("accu.log", "wt"); // Re open
+    }
+
+    // Read file 
+    uint64_t read_id_1, read_id_2, coverage, identity, length, current, currmax, j;
+    while(!feof(data)){
+        if(0 == fgets(buffer, MAXLID, data) && !feof(data)) terror("Missing values");
+        // 2 77277 89 64 213
+        sscanf(buffer, "%"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64, &read_id_1, &read_id_2, &coverage, &identity, &length);
+        //fprintf(stdout, "%"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64"\n", read_id_1, read_id_2, coverage, identity, length);
+        currmax = MIN(identity, coverage);
+        //fprintf(stdout, "%"PRIu64"\n", currmax);
+        current = read_id_1;
+        /*
+        for(j=currmax; j > 1; j--){
+            if(current != lasts[j]){
+                mat[j]++;
+                lasts[j] = current;
+            }
+        }
+        */
+        mat[currmax] += 1;
+
+    }
+
+    for(j=99; j>0; j--){
+        mat[j] += mat[j+1];
+    }
+    mat[0] = mat[1];
+
+    for(j=0; j<100; j++){
+        fprintf(stdout, "%"PRIu64"\n", mat[j]);
+        fprintf(results, "%"PRIu64"\n", mat[j]);
+    }
+
+
+    fclose(results);
+    fclose(data);
+    free(mat);
+    return 0;
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/src/commonFunctions.c	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,218 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <inttypes.h>
+#include <ctype.h>
+#include <string.h>
+#include <pthread.h>
+#include <math.h>
+#include "structs.h"
+
+void terror(char *s) {
+    printf("ERR**** %s ****\n", s);
+    exit(-1);
+}
+
+char buffered_fgetc(char *buffer, uint64_t *pos, uint64_t *read, FILE *f) {
+    if (*pos >= READBUF) {
+        *pos = 0;
+        memset(buffer, 0, READBUF);
+        *read = fread(buffer, 1, READBUF, f);
+    }
+    *pos = *pos + 1;
+    return buffer[*pos-1];
+}
+
+Queue * generate_queue(Head * queue_head, uint64_t t_reads, uint64_t n_threads, uint64_t levels){
+    uint64_t i, j;
+    uint64_t reads_per_thread;
+    if(levels > t_reads) levels = t_reads;
+    uint64_t pieces = t_reads/levels;
+    uint64_t from, to, t_queues = 0, current_queue = 0;
+    for(i=0;i<levels;i++) t_queues += ((i+1)*n_threads);
+    Queue * queues = (Queue *) malloc(t_queues * sizeof(Queue));
+    if(queues == NULL) terror("Could not allocate queue tasks");
+    queue_head->head = &queues[0];
+    
+    for(i=0;i<levels;i++){
+
+        //reads_per_thread = (uint64_t) (floorl((long double) pieces / (long double) ((i+1)*n_threads)));
+        reads_per_thread = (uint64_t) (ceill((long double) pieces / (long double) ((i+1)*n_threads)));
+        
+
+        for(j=0;j<(i+1)*n_threads;j++){
+            from = j * reads_per_thread + (pieces*i);
+            to = (j + 1) * reads_per_thread + (pieces*i);
+            
+            if(j == (i+1)*n_threads - 1) to = pieces*(i+1);
+
+
+            if(i == levels - 1 && j == (i+1)*n_threads - 1){
+                //If its the last 
+                queues[current_queue].next = NULL;
+            }else{
+                //Else add it to the queue
+                queues[current_queue].next = &queues[current_queue+1];
+            }
+            
+
+            queues[current_queue].r1 = from;
+            queues[current_queue].r2 = to;
+            current_queue++;
+            //printf("current_piece: %"PRIu64"-%"PRIu64" diff: %"PRIu64"\n", from, to, to - from);
+
+        }
+
+    }
+    //printf("TREADS was %"PRIu64"\n", t_reads);    
+    return &queues[0];
+}
+
+void print_queue(Queue * q){
+    fprintf(stdout, "Task: %"PRIu64"-%"PRIu64"\n", q->r1, q->r2);
+}
+
+Queue * get_task_from_queue(Head * queue_head, pthread_mutex_t * lock){
+    pthread_mutex_lock(lock);
+
+    Queue * ptr = queue_head->head;
+    if(queue_head->head != NULL) queue_head->head = queue_head->head->next;
+    //if(ptr != NULL){ printf("Taking "); /*print_queue(ptr);*/ }
+
+    pthread_mutex_unlock(lock);
+
+
+    return ptr;
+}
+
+uint64_t quick_pow4(uint64_t n){
+    static uint64_t pow4[33]={1L, 4L, 16L, 64L, 256L, 1024L, 4096L, 16384L, 65536L, 
+    262144L, 1048576L, 4194304L, 16777216L, 67108864L, 268435456L, 1073741824L, 4294967296L, 
+    17179869184L, 68719476736L, 274877906944L, 1099511627776L, 4398046511104L, 17592186044416L, 
+    70368744177664L, 281474976710656L, 1125899906842624L, 4503599627370496L, 18014398509481984L, 
+    72057594037927936L, 288230376151711744L, 1152921504606846976L, 4611686018427387904L};
+    return pow4[n];
+}
+
+uint64_t quick_pow4byLetter(uint64_t n, const char c){
+    static uint64_t pow4_G[33]={2*1L, 2*4L, 2*16L, 2*64L, 2*256L, 2*1024L, 2*4096L, 2*16384L, 2*65536L, 
+    (uint64_t)2*262144L, (uint64_t)2*1048576L,(uint64_t)2*4194304L, (uint64_t)2*16777216L, (uint64_t)2*67108864L, (uint64_t)2*268435456L, (uint64_t)2*1073741824L, (uint64_t)2*4294967296L, 
+    (uint64_t)2*17179869184L, (uint64_t)2*68719476736L, (uint64_t)2*274877906944L, (uint64_t)2*1099511627776L, (uint64_t)2*4398046511104L, (uint64_t)2*17592186044416L, 
+    (uint64_t)2*70368744177664L, (uint64_t)2*281474976710656L, (uint64_t)2*1125899906842624L, (uint64_t)2*4503599627370496L, (uint64_t)2*18014398509481984L, 
+    (uint64_t)2*72057594037927936L, (uint64_t) 2*288230376151711744L, (uint64_t) 2*1152921504606846976L, (uint64_t) 2*4611686018427387904L};
+    
+    static uint64_t pow4_T[33]={3*1L, 3*4L, 3*16L, 3*64L, 3*256L, 3*1024L, 3*4096L, 3*16384L, 3*65536L, 
+    (uint64_t)3*262144L, (uint64_t) 3*1048576L, (uint64_t)3*4194304L, (uint64_t)3*16777216L, (uint64_t)3*67108864L, (uint64_t)3*268435456L, (uint64_t)3*1073741824L, (uint64_t)3*4294967296L, 
+    (uint64_t)3*17179869184L, (uint64_t)3*68719476736L, (uint64_t)3*274877906944L, (uint64_t)3*1099511627776L, (uint64_t)3*4398046511104L, (uint64_t)3*17592186044416L, 
+    (uint64_t)3*70368744177664L, (uint64_t)3*281474976710656L, (uint64_t)3*1125899906842624L, (uint64_t)3*4503599627370496L, (uint64_t)3*18014398509481984L, 
+    (uint64_t)3*72057594037927936L, (uint64_t) 3*288230376151711744L, (uint64_t) 3*1152921504606846976L, (uint64_t) 3*4611686018427387904L};
+    
+    if(c == 'A') return 0;
+    if(c == 'C') return quick_pow4(n);
+    if(c == 'G') return pow4_G[n];
+    if(c == 'T') return pow4_T[n];
+    return 0;
+}
+
+uint64_t hashOfWord(const unsigned char * word, uint32_t k, uint64_t offset){
+    
+    uint64_t value = 0, jIdx;
+    for(jIdx=0;jIdx<k-offset;jIdx++){
+        value += quick_pow4byLetter(k-(jIdx+1), (char) word[jIdx]);
+    }
+    return value;
+}
+
+void perfect_hash_to_word(unsigned char * word, uint64_t hash, uint32_t k){
+    uint64_t jIdx = (uint64_t) (k-1), upIdx = 0;
+    uint64_t v;
+    while(jIdx >= 0){
+        v = (uint64_t) floor(hash / (pow(4, jIdx)));
+        if(v == 0){ word[upIdx++] = (unsigned char) 'A'; hash -= ((uint64_t) pow(4, jIdx) * 0); }
+        if(v == 1){ word[upIdx++] = (unsigned char) 'C'; hash -= ((uint64_t) pow(4, jIdx) * 1); }
+        if(v == 2){ word[upIdx++] = (unsigned char) 'G'; hash -= ((uint64_t) pow(4, jIdx) * 2); }
+        if(v == 3){ word[upIdx++] = (unsigned char) 'T'; hash -= ((uint64_t) pow(4, jIdx) * 3); }
+        
+        if(jIdx == 0) break;
+        --jIdx;
+    }
+}
+
+uint64_t collisioned_hash(const unsigned char * word, uint32_t k){
+    uint64_t jIdx, value = 0;
+    for(jIdx=0;jIdx<k;jIdx+=diffuse_z){
+        value += quick_pow4byLetter(k - (jIdx+1), (char) word[jIdx]);
+    }
+    return value;
+}
+
+void decomposed_hash_of_word(const unsigned char * word, unsigned char * vector, uint32_t k){
+    uint64_t jIdx;
+    for(jIdx=0;jIdx<k/BYTES_IN_MER;jIdx++){
+        vector[jIdx] = (unsigned char) quick_pow4byLetter(0, (char) word[jIdx*BYTES_IN_MER]);
+        vector[jIdx] += (unsigned char) quick_pow4byLetter(1, (char) word[jIdx*BYTES_IN_MER+1]);
+        vector[jIdx] += (unsigned char) quick_pow4byLetter(2, (char) word[jIdx*BYTES_IN_MER+2]);
+        vector[jIdx] += (unsigned char) quick_pow4byLetter(3, (char) word[jIdx*BYTES_IN_MER+3]);
+    }
+}
+
+uint64_t xor_decomposed_hash(unsigned char * vector1, unsigned char * vector2, uint32_t k){
+    uint64_t jIdx;
+    uint64_t diffs = 0;
+    for(jIdx=0;jIdx<k/BYTES_IN_MER;jIdx++){
+        diffs += (uint64_t) ((vector1[jIdx] ^ vector2[jIdx]) & 0x1);
+    }
+    return diffs;
+}
+
+uint64_t asciiToUint64(const char *text){
+	uint64_t number=0;
+
+	for(;*text;text++){
+		char digit=*text-'0';           
+		number=(number*10)+digit;
+	}
+	return number;
+}
+
+unsigned char complement(unsigned char c){
+    
+    switch(c){
+    case ((unsigned)'A'): return (unsigned)'T';
+    break;
+    case ((unsigned)'C'): return (unsigned)'G';
+    break;
+    case ((unsigned)'G'): return (unsigned)'C';
+    break;
+    case ((unsigned)'T'): return (unsigned)'A';
+    break;
+    case ((unsigned)'N'): return (unsigned)'N';
+    break;
+    case ((unsigned)'-'): return (unsigned)'-'; // Gaps
+    break;
+    case ((unsigned)'\0'): return (unsigned)'\0';
+    
+    }
+    
+    printf("T B: (%c) ____\n", c);
+    terror("Unrecognized nucleotide in hit");
+    return 0; 
+}
+
+void inplace_reverse_and_complement(unsigned char *d, uint64_t l){
+    uint64_t i;
+    char c;
+    for(i=0;i<l/2;i++){
+        c = d[i];
+        d[i] = d[l-i-1];
+        d[l-i-1] = c;
+        //printf("%c%c", d[i], d[l-i-1]);
+    }
+    
+    //printf("\n");
+    for(i=0;i<l;i++){
+        //printf("eeee: %d", (int)d[i]);
+        d[i] = complement(d[i]);
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/src/commonFunctions.h	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,51 @@
+#ifndef COMMON_FUNCTIONS_H
+#define COMMON_FUNCTIONS_H
+#include "structs.h"
+/**
+ * Print the error message 's' and exit(-1)
+ */
+void terror(char *s);
+
+
+/**
+ * Function to read char by char buffered from a FILE
+ */
+char buffered_fgetc(char *buffer, uint64_t *pos, uint64_t *read, FILE *f);
+
+/*
+    Generates a queue of tasks for threads
+*/
+Queue * generate_queue(Head * queue_head, uint64_t t_reads, uint64_t n_threads, uint64_t levels);
+
+/*
+    Prints a queue task
+*/
+void print_queue(Queue * q);
+
+/*
+    Gets the next task to do when a pthread is free
+*/
+Queue * get_task_from_queue(Head * queue_head, pthread_mutex_t * lock);
+
+uint64_t quick_pow4(uint64_t n);
+
+uint64_t quick_pow4byLetter(uint64_t n, const char c);
+
+uint64_t hashOfWord(const unsigned char * word, uint32_t k, uint64_t offset);
+
+void perfect_hash_to_word(unsigned char * word, uint64_t hash, uint32_t k);
+
+uint64_t collisioned_hash(const unsigned char * word, uint32_t k);
+
+void decomposed_hash_of_word(const unsigned char * word, unsigned char * vector, uint32_t k);
+
+uint64_t xor_decomposed_hash(unsigned char * vector1, unsigned char * vector2, uint32_t k);
+
+uint64_t asciiToUint64(const char *text);
+
+unsigned char complement(unsigned char c);
+
+void inplace_reverse_and_complement(unsigned char *d, uint64_t l);
+
+
+#endif /* COMMON_FUNCTIONS_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/src/reverseComplement.c	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,124 @@
+/* reverseComplement.c
+   Program reading a FASTA file as input (one or multiple sequence)
+   and then returning the reverse complementary sequence in the output file.
+   It is important to note that for multiple sequence file the first input
+   sequence is the last one at the output file and the last one in the input
+   is the first one in the output.
+   oscart@uma.es
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <inttypes.h>
+#include "commonFunctions.h"
+
+#define SEQSIZE 2000000000
+#define READINPUT 10000
+#define WSIZE 32
+#define NREADS 1000000
+
+int main(int ac, char** av) {
+	FILE *fIn, *fOut;
+	int64_t i, j, nR, seqLen = 0;
+	char *seq, c, toW;
+	long *offset = NULL;
+
+	if (ac != 3)
+		terror("USE: reverseComplement seqFile.IN reverseComplementarySeq.OUT");
+
+	/**
+	 * Allocating memory for the sequence
+	 * Fixed amount of memory, change it to loadSeqDB?
+	 */
+	if ((seq = (char*) malloc(SEQSIZE)) == NULL)
+		terror("memory for Seq");
+
+	if ((fIn = fopen(av[1], "rt")) == NULL)
+		terror("opening IN sequence FASTA file");
+
+	if ((fOut = fopen(av[2], "wt")) == NULL)
+		terror("opening OUT sequence Words file");
+	
+	if ((offset = (long *) malloc(sizeof(long)*NREADS)) == NULL)
+		terror("memory for offsets");
+
+	for(i=0;i<NREADS;i++){
+		offset[i]=0;
+	}
+
+	nR = 0;
+	c = fgetc(fIn);
+	while(!feof(fIn)){
+		if(c == '>'){
+			offset[nR++] = ftell(fIn)-1;
+		}
+		c = fgetc(fIn);
+	}
+
+	for(i=nR-1; i>=0; i--){
+		fseek(fIn, offset[i], SEEK_SET);
+		//READ and write header
+		if(fgets(seq, READINPUT, fIn)==NULL){
+			terror("Empty file");
+		}
+		fprintf(fOut, "%s", seq);
+		//READ and write sequence
+		c = fgetc(fIn);
+		while(c != '>' && !feof(fIn)){
+			if(isupper(c) || islower(c)){
+				seq[seqLen++]=c;
+			}
+			c = fgetc(fIn);
+		}
+		for(j=seqLen-1; j >= 0; j--){
+			switch(seq[j]){
+				case 'A':
+					toW = 'T';
+					break;
+				case 'C':
+					toW = 'G';
+					break;
+				case 'G':
+					toW = 'C';
+					break;
+				case 'T':
+					toW = 'A';
+					break;
+				case 'U':
+					toW = 'A';
+					break;
+				case 'a':
+					toW = 't';
+					break;
+				case 'c':
+					toW = 'g';
+					break;
+				case 'g':
+					toW = 'c';
+					break;
+				case 't':
+					toW = 'a';
+					break;
+				case 'u':
+					toW = 'a';
+					break;
+				default:
+					toW = seq[j];
+					break;
+			}
+			fwrite(&toW, sizeof(char), 1, fOut);
+		}
+		toW='\n';
+		fwrite(&toW, sizeof(char), 1, fOut);
+		seqLen=0;
+	}
+
+	fclose(fIn);
+	fclose(fOut);
+
+	return 0;
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromeister/src/structs.h	Sat Dec 15 18:06:48 2018 -0500
@@ -0,0 +1,143 @@
+#ifndef STRUCTS_H
+#define STRUCTS_H
+
+#include <inttypes.h>
+
+//Structs required for the dotplot workflow
+#define MAXLID 200
+//#define READBUF 2000000000 //2 GB
+#define READBUF 500000000 //50MB
+#define INITSEQS 3000 //Number of starting sequences (in database)
+#define POINT 4
+
+#define FIXED_K 12
+#define TOTAL_ENTRIES 16777216
+
+#define MAXLID 200
+#define ALIGN_LEN 60 //For NW alignment
+#define MAX_READ_SIZE 20000 //Maximum length of read to have a portion of the table allocated
+#define MAX_WINDOW_SIZE 500 //Maximum window length to explore NW table
+//#define POOL_SIZE 2500000000 // by 16 bytes it is 40 GB
+#define POOL_SIZE 12500000 // 1 GB if 16 bytes
+#define MAX_MEM_POOLS 256 
+
+#define BYTES_IN_MER 4
+#define MAX_DECOMP_HASH 10
+#define FALSE 0
+#define TRUE 1
+
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) <= (y)) ? (x) : (y))
+
+extern uint64_t custom_kmer;
+extern uint64_t diffuse_z;
+
+//Struct for linked list of positions
+typedef struct linked_list_pos{
+    uint64_t pos;
+    //uint64_t extended_hash;
+    //unsigned char decomp_hash[MAX_DECOMP_HASH]; // Fits up to MAX_DECOMP_HASH*4 letters = 256 length kmer
+    //uint64_t hits_count;
+    struct linked_list_pos * next;
+} llpos;
+
+
+// An AVL tree node
+typedef struct AVL_Node{
+    uint64_t key;
+    struct AVL_Node * left;
+    struct AVL_Node * right;
+    uint64_t height;
+    uint64_t count;
+    llpos * next;
+} AVLTree;
+
+
+// Tuple of data 
+typedef struct tuple_hits{
+    int repetition;
+    int hit_count;
+    uint64_t key;
+    uint64_t pos;
+    uint64_t pos_in_y;
+} Tuple_hits;
+
+typedef struct hash_holder{
+    uint64_t key;
+    uint64_t pos;
+} Hash_holder;
+
+
+
+//Struct for memory pool por lists
+typedef struct mempool_l{
+    llpos * base;
+    uint64_t current;
+} Mempool_l;
+
+//Struct for memory pool por AVLtree
+typedef struct mempool_AVL{
+    AVLTree * base;
+    uint64_t current;
+} Mempool_AVL;
+
+//Struct for a whole sequence(s) data
+typedef struct seqinfo{
+    unsigned char * sequences;
+    uint64_t * start_pos;
+    uint64_t total_len;
+    uint64_t n_seqs;
+} SeqInfo;
+
+//Struct for the alignment of a quick hit
+typedef struct quickfrag{
+    uint64_t x_start;
+    uint64_t y_start;
+    uint64_t t_len;
+    long double coverage;
+    long double e_value;
+} Quickfrag;
+
+typedef struct point{
+    uint64_t x;
+    uint64_t y;
+} Point;
+
+
+
+struct cell{
+    int64_t score;
+    uint64_t xfrom;
+    uint64_t yfrom;
+};
+
+struct positioned_cell{
+    int64_t score;
+    uint64_t xpos;
+    uint64_t ypos;
+};
+
+struct best_cell{
+    struct positioned_cell c;
+    uint64_t j_prime;
+};
+
+typedef struct{
+    uint64_t identities;
+    uint64_t length;
+    uint64_t igaps;
+    uint64_t egaps;
+} BasicAlignment;
+
+typedef struct queue{
+    uint64_t r1; //reads region
+    uint64_t r2;
+    struct queue * next;
+} Queue;
+
+typedef struct{
+    Queue * head;
+} Head;
+
+
+#endif