changeset 1:3d1fbde7e0cc draft default tip

Deleted selected files
author alvarofaure
date Thu, 13 Dec 2018 03:41:58 -0500
parents 7fdf47a0bae8
children
files chromeister/README.md chromeister/bin/CHROMEISTER chromeister/bin/allVsAll.sh chromeister/bin/allVsAll_incremental.sh chromeister/bin/compute_score.R chromeister/bin/generate-one-score.sh chromeister/bin/index_chromeister.sh chromeister/bin/index_chromeister_solo.sh chromeister/bin/make-cluster.R chromeister/bin/make-mean.sh chromeister/bin/plot.R chromeister/bin/plot_diags.R chromeister/bin/recompute_scores.sh chromeister/bin/run_and_plot_chromeister.sh chromeister/chromeister.xml chromeister/src/CHROMEISTER.c chromeister/src/Makefile chromeister/src/alignmentFunctions.c chromeister/src/alignmentFunctions.h chromeister/src/combine_reads.c chromeister/src/commonFunctions.c chromeister/src/commonFunctions.h chromeister/src/reverseComplement.c chromeister/src/structs.h
diffstat 24 files changed, 0 insertions(+), 2940 deletions(-) [+]
line wrap: on
line diff
--- a/chromeister/README.md	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-# chromeister
-An ultra fast, heuristic approach to detect conserved signals in extremely large pairwise genome comparisons.
-
-## Requirements
-
-GCC compiler (any version that is not completely outdated should do) and the R-base package (default R installation should do) with no extra packages.
-Simply download the .zip and unzip it, or clone the repository (currently the active branch is "histo-kmer-ultra" DO NOT USE ANY OTHER).
-Then issue the following commands:
-
-cd chromeister/src && make all
-
-You are ready to go!
-
-## Use
-
-There are several ways in which CHROMEISTER can be used. The simplest one is to run a 1-vs-1 comparison and then compute the score and the plot.
-To do so, use the binaries at the bin folder:
-
-CHROMEISTER -query seqX -db seqY -out dotplot.mat && Rscript compute_score.R dotplot.mat
-
-This will generate the comparison matrix, the plot of the comparison with the automatic scoring and the guides to be used in an exhaustive GECKO comparison.
-
-More to be added soon!
-
-
-
-
-
-
-
-
-
-
-
-
-
Binary file chromeister/bin/CHROMEISTER has changed
--- a/chromeister/bin/allVsAll.sh	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-#!/usr/bin/env bash
-DIR=$1
-EXT=$2
-DIM=$3
-KMER=$4
-
-array=()
-x=0
-
-if [ $# != 4 ]; then
-	echo "***ERROR*** Use: $0 genomesDirectory extension dim kmer"
-	exit -1
-fi
-
-indexnameA=$(basename "$DIR")
-
-BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-for elem in $(ls -d $DIR/*.$EXT | awk -F "/" '{print $NF}' | awk -F ".$EXT" '{print $1}')
-do
-	array[$x]=$elem
-	x=`expr $x + 1`
-	#echo "X: $elem"
-done
-
-for ((i=0 ; i < ${#array[@]} ; i++))
-do
-	for ((j=i ; j < ${#array[@]} ; j++))
-	do
-		if [ $i != $j ]; then
-				seqX=${array[$i]}
-				seqY=${array[$j]}
-				echo "----------${seqX}-${seqY}-----------"
-
-				
-				if [[ ! -f ${seqX}.$EXT-${seqY}.$EXT.mat ]]; then
-					
-					$BINDIR/run_and_plot_chromeister.sh $DIR/${seqX}.$EXT $DIR/${seqY}.$EXT $KMER $DIM
-					#Rscript $BINDIR/compute_score.R $seqX.$EXT-$seqY.$EXT.mat $DIM > $seqX.$EXT-$seqY.$EXT.scr.txt
-				fi
-			
-		fi
-	done
-done
-
-# generate index
-if [[ ! -f index.csv.temp ]] && [ ! -f index-$indexnameA.csv  ]; then
-	
-	echo "Launching... $BINDIR/index_chromeister_solo.sh index-$indexnameA.csv $DIR $DIR"
-	$BINDIR/index_chromeister_solo.sh . index-$indexnameA.csv $DIR $DIR
-fi
-
-
-
-
--- a/chromeister/bin/allVsAll_incremental.sh	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,67 +0,0 @@
-#!/usr/bin/env bash
-DIR=$1
-DIR2=$2
-EXT=$3
-DIM=$4
-KMER=$5
-
-
-
-if [ $# != 5 ]; then
-	echo "***ERROR*** Use: $0 genomesDirectory1 genomesDirectory2 extension dim kmer"
-	exit -1
-fi
-
-indexnameA=$(basename "$DIR")
-indexnameB=$(basename "$DIR2")
-
-BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-
-array=()
-x=0
-array2=()
-
-for elem in $(ls -d $DIR/*.$EXT | awk -F "/" '{print $NF}' | awk -F ".$EXT" '{print $1}')
-do
-	array[$x]=$elem
-	x=`expr $x + 1`
-	#echo "X: $elem"
-done
-
-x=0
-
-for elem in $(ls -d $DIR2/*.$EXT | awk -F "/" '{print $NF}' | awk -F ".$EXT" '{print $1}')
-do
-	array2[$x]=$elem
-	x=`expr $x + 1`
-	#echo "X: $elem"
-done
-
-for ((i=0 ; i < ${#array[@]} ; i++))
-do
-	for ((j=0 ; j < ${#array2[@]} ; j++))
-	do
-				seqX=${array[$i]}
-				seqY=${array2[$j]}
-				echo "----------${seqX}-${seqY}-----------"
-
-				
-				#echo "$BINDIR/run_and_plot_chromeister.sh $DIR/${seqX}.$EXT $DIR/${seqY}.$EXT 30 10000"
-				if [[ ! -f ${seqX}.$EXT-${seqY}.$EXT.mat ]]; then
-					$BINDIR/run_and_plot_chromeister.sh $DIR/${seqX}.$EXT $DIR2/${seqY}.$EXT $KMER $DIM
-				fi
-			
-	done
-done
-
-
-# generate index
-if [[ ! -f index.csv.temp ]] && [ ! -f index-$indexnameA-$indexnameB.csv  ]; then
-	
-	echo "Launching... $BINDIR/index_chromeister_solo.sh . index-$indexnameA-$indexnameB.csv $DIR $DIR2"
-	$BINDIR/index_chromeister_solo.sh . index-$indexnameA-$indexnameB.csv $DIR $DIR2
-fi
-
-
-
--- a/chromeister/bin/compute_score.R	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,478 +0,0 @@
-
-#!/usr/bin/env Rscript
-args = commandArgs(trailingOnly=TRUE)
-
-if(length(args) < 2){
-  stop("USE: Rscript --vanilla plot.R <matrix> <matsize>")
-}
-
-
-
-
-
-
-growing_regions <- function(mat, reward = 6, penalty = 5, sidePenalty = 3, MAXHSPS = 500, TH = 10, WSIZE = 7){
-
-  #write(mat, file ="data.txt", ncolumns = 200)
-  
-  len <- min(length(mat[1,]), length(mat[,1]))
-  HSPS <- matrix(0, nrow=MAXHSPS, ncol=5)
-  idx <- 1
-  lH <- round(WSIZE/2) - 1
-  rH <- round(WSIZE/2) + 1
-  
-  
-  #print(paste("lH: ", lH, "rH: ", rH))
-  
-  if(WSIZE %% 2 == 0){
-    print("WSIZE MUST BE ODD")
-    stop()
-  }
-  
-  
-  
-  i <- 1
-  #readline(prompt="Press [enter] to continue")
-  while(i < len){
-
-    value <- max(mat[i,]) * reward
-    if(value == 0) i <- i + 1
-    pos <- which.max(mat[i,])
-    # these two hold ending frag
-    endfrag <- pos
-    j <- i
-    count_penalties <- 1
-
-    #print("-----------------")
-
-    while(value > 0 && j < len){
-
-      #print(paste("took values ", j, endfrag, "which have score of ", mat[j, endfrag], "current value is: ", value))
-      
-      
-      
-
-      # Reset position used
-      
-      mat[max(1,j-1), max(1,endfrag-2)] <- 0
-      mat[max(1,j-1), max(1,endfrag-1)] <- 0
-      mat[max(1,j-1), endfrag] <- 0
-      mat[max(1,j-1), min(len, endfrag+1)] <- 0
-      mat[max(1,j-1), min(len, endfrag+2)] <- 0
-      
-      mat[j, max(1,endfrag-2)] <- 0
-      mat[j, max(1,endfrag-1)] <- 0
-      mat[j, endfrag] <- 0
-      mat[j, min(len, endfrag+1)] <- 0
-      mat[j, min(len, endfrag+2)] <- 0
-      
-      #print(paste("Erasing: (",max(1,j-1), max(1,endfrag-2),")(",max(1,j-1), max(1,endfrag-1),")(",max(1,j-1), endfrag,
-      #            ")(",max(1,j-1), min(len, endfrag+1),")(",max(1,j-1), min(len, endfrag+2),")(",j, max(1,endfrag-2),
-      #            ")(",j, max(1,endfrag-1),")(",j, endfrag,")(",j, min(len, endfrag+1),")(",j, min(len, endfrag+2),")"))
-      
-      
-      # Go for next
-      j <- j + 1
-      # Check next, could be reverse or forward
-      mleft <- max(1, endfrag-lH)
-      mright <- min(len, endfrag+lH)
-      window <- mat[j, mleft:mright]
-      
-
-      #print(paste("mleft: ", mleft, "mright", mright, "j is: ", j))
-      #print(window)
-
-      v <- max(window)
-      selected <- which.max(window)
-      # Make it rather go diagonally
-      #print(paste("WIndow len: ", length(window)))
-      chose_diagonal <- FALSE
-      if(length(window) == WSIZE && v == window[lH]){
-        selected <- lH
-        chose_diagonal <- TRUE
-      }
-      
-      if(length(window) == WSIZE && v == window[rH]){
-        selected <- rH
-        chose_diagonal <- TRUE
-      }
-      
-      #print(paste("Selected value be like ", selected))
-      
-
-      if(v != 0){
-        
-        
-        
-        endfrag <- (mleft + selected ) # To make the indexing
-        if(length(window) == WSIZE) endfrag <- endfrag - 1
-        endfrag <- max(1, min(len, endfrag))
-        #print(paste("\t", " endfragnew = ", endfrag, "max of window: ", max(window), "on position", which.max(window)))
-      }
-
-      #print(paste("Chose diagonal is ", chose_diagonal))
-      
-      # If no similarity is found
-      if(v == 0){
-        value <- value - count_penalties * penalty
-        count_penalties <- count_penalties + 1
-        #print("Got penalty #########")
-        
-      }else{
-        
-        # Similarity is found
-        
-        if(!chose_diagonal){
-          value <- value + count_penalties * (-sidePenalty)
-          count_penalties <- count_penalties + 1
-          #print("Got SIDE penalty @@@ #########")
-        }else{
-          count_penalties <- 1
-          value <- value + reward
-          #print("Got reward thou #########")
-        }
-        
-      }
-      #readline(prompt="Press [enter] to continue")
-
-    }
-    # Check len of frag
-    if(j-i > TH){
-      # HSPS[idx, 1] <- i
-      # HSPS[idx, 2] <- pos
-      # HSPS[idx, 3] <- j
-      # HSPS[idx, 4] <- endfrag
-      # HSPS[idx, 5] <- abs(i-j)
-      
-      HSPS[idx, 1] <- pos
-      HSPS[idx, 2] <- i
-      HSPS[idx, 3] <- endfrag
-      HSPS[idx, 4] <- j
-      HSPS[idx, 5] <- abs(i-j)
-      
-      
-      
-      #print(paste("ACCEPT", i, pos, j, endfrag, "x-y", j-i, abs(endfrag-pos), sep = " "))
-      idx <- idx + 1
-    }else{
-      #print("REJECT")
-    }
-    # This will prevent overlappign lines, I think
-    #i <- j
-    
-    if(idx == MAXHSPS) break
-  }
-  
-  
-  
-  
-  return (HSPS)
-}
-
-detect_events <- function(HSPS, sampling){
-  
-  DIAG_SEPARATION <- 10
-  # same as HSPS but adding the event
-  output <- matrix(0, nrow=length(HSPS[,1]), ncol=1+length(HSPS[1,]))
-  colnames(output) <- c("x1", "y1", "x2", "y2", "len", "event")
-  j <- 0
-  for(i in 1:(length(HSPS[,1]))){
-    
-    if(sum(HSPS[i,]) > 0){
-      j <- j + 1
-      is_inverted = FALSE
-      is_diagonal = TRUE
-      
-      if(HSPS[i,1] > HSPS[i,3]) is_inverted = TRUE
-      if(abs(HSPS[i,1] - HSPS[i,2]) > DIAG_SEPARATION && abs(HSPS[i,3] - HSPS[i,4]) > DIAG_SEPARATION) is_diagonal = FALSE
-      
-      output[i,1] <- HSPS[i,1] * sampling
-      output[i,2] <- HSPS[i,2] * sampling
-      output[i,3] <- HSPS[i,3] * sampling
-      output[i,4] <- HSPS[i,4] * sampling
-      output[i,5] <- HSPS[i,5] * sampling
-      
-      if(is_diagonal) output[i,6] <- "synteny block"
-      if(is_diagonal && is_inverted) output[i,6] <- "inversion"
-      if(!is_diagonal && !is_inverted) output[i,6] <- "transposition"
-      if(!is_diagonal && is_inverted) output[i,6] <-"inverted transposition"
-    }
-    
-    
-  }
-  
-  return (output[1:j,])
-}
-
-
-
-paint_frags <- function(HSPS, l, sampling){
-  
-  plot(c(HSPS[1,1]*sampling, HSPS[1,3]*sampling), c(HSPS[1,2]*sampling, HSPS[1,4]*sampling), xlim = c(1,l*sampling), ylim = c(1,l*sampling), type="l", xlab="X-genome",ylab="Y-genome")
-  for(i in 2:length(HSPS[,1])){
-    if(sum(HSPS[i,]) > 0){
-      lines(c(HSPS[i,1]*sampling, HSPS[i,3]*sampling), c(HSPS[i,2]*sampling, HSPS[i,4]*sampling))
-    }
-  }
-}
-
-supersample <- function(mat, upscale){
-  l <- min(length(mat[1,]), length(mat[,1]))
-  size <- round(l*upscale)
-  m <- matrix(0, nrow=size, ncol=size)
-  
-  for(i in 1:size){
-    for(j in 1:size){
-      mleft <- max(1, i-1)
-      mright <- min(l, i+1)
-      mup <- max(1, j-1)
-      mdown <- min(l, j+1)
-      
-      ri <- max(1, i/upscale)
-      rj <- max(1, j/upscale)
-      
-      if(mat[ri, rj] > 0){
-        m[i, j] <- 1
-      }
-    }
-  }
-  return (m)
-}
-
-
-downsample <- function(mat, downscale){
-  l <- min(length(mat[1,]), length(mat[,1]))
-  size <- round(l/downscale)
-  m <- matrix(0, nrow=size, ncol=size)
-  
-  for(i in 1:l){
-    for(j in 1:l){
-      mup <- max(1, i-1)
-      mdown <- min(l, i+1)
-      mleft <- max(1, j-1)
-      mright <- min(l, j+1)
-      
-      #print(paste("Matrix at ", i, j))
-      #print(mat[mup:mdown, mleft:mright])
-      
-      if(sum(mat[mup:mdown, mleft:mright]) > 0){
-        #print(paste("goes to ", round(i/downscale), round(j/downscale)))
-        m[round(i/downscale), round(j/downscale)] <- 1
-      }
-    }
-  }
-  
-  return (m)
-}
-
-
-
-
-
-
-
-path_mat = args[1]
-
-
-fancy_name <- strsplit(path_mat, "/")
-fancy_name <- (fancy_name[[1]])[length(fancy_name[[1]])]
-
-# Read sequence lenghts
-con <- file(path_mat,"r")
-seq_lengths <- readLines(con, n=2)
-seq_x_len <- as.numeric(seq_lengths[1])
-seq_y_len <- as.numeric(seq_lengths[2])
-close(con)
-
-
-data <- as.matrix(read.csv(path_mat, sep = " ", skip=2))
-
-# Get sequence names
-name_x <- strsplit(fancy_name, "-")[[1]][1]
-name_y <- strsplit(fancy_name, "-")[[1]][2]
-
-# Max of columns
-
-len_i <- as.numeric(args[2])
-len_j <- as.numeric(args[2])
-
-
-score_density <- data
-aux_density <- data
-
-
-pmax_pos <- which.max(aux_density[,1])
-for(i in 1:len_i){
-  
-  cmax_pos <- which.max(aux_density[i,]) # get max of row
-  
-  if((aux_density[i,cmax_pos]) > 0){ # if it has value
-    #row_from_col <- which.max(aux_density[,cmax_pos]) # get max of the column pointed by maximum of row
-    score_density[i,] <- 0 # put all others in row to 0 i.e. only use this max, EXCEPT for the maximum in the column
-    #score_density[row_from_col, cmax_pos] <- 1
-    score_density[i,cmax_pos] <- 1
-    pmax_pos <- cmax_pos
-  }
-}
-
-
-pmax_pos <- which.max(aux_density[1,])
-
-
-for(i in 1:len_i){
-
-  cmax_pos <- which.max(aux_density[,i]) # get max of column
-
-  if((aux_density[cmax_pos,i]) > 0){
-    score_density[,i] <- 0
-    score_density[cmax_pos,i] <- 1
-    pmax_pos <- cmax_pos
-  }
-}
-
-
-
-
-
-
-
-
-
-score_copy <- score_density
-diag_len <- 4
-
-for(i in 6:(len_i-5)){
-  for(j in 6:(len_j-5)){
-    
-    value <- 0
-    for(w in (-diag_len/2):(diag_len/2)){
-      if(score_density[i+w,j+w] > 0){
-        value <- value + 1
-      }
-    }
-    
-    if(value >= diag_len){
-      for(k in 1:5){
-        score_copy[i+k,j+k] <- 1
-      }
-      for(k in 1:5){
-        score_copy[i-k,j-k] <- 1
-      }
-    }else if(score_copy[i,j]==0){
-      score_copy[i,j] <- 0
-    }
-  }
-}
-
-for(i in 6:(len_i-5)){
-  for(j in 6:(len_j-5)){
-    
-    value <- 0
-    for(w in (-diag_len/2):(diag_len/2)){
-      if(score_density[i-w,j+w] > 0){
-        value <- value + 1
-      }
-    }
-    
-    if(value >= diag_len){
-      for(k in 1:5){
-        score_copy[i-k,j+k] <- 1
-      }
-      for(k in 1:5){
-        score_copy[i+k,j-k] <- 1
-      }
-    }else if(score_copy[i,j]==0){
-      score_copy[i,j] <- 0
-    }
-  }
-}
-
-
-# Kernel to remove single points
-
-for(i in 1:(length(score_copy[,1]))){
-  for(j in 1:(length(score_copy[1,]))){
-    
-    value <- 0
-    
-    min_i <- max(1, i-1)
-    max_i <- min(len_i, i+1)
-    min_j <- max(1, j-1)
-    max_j <- min(len_j, j+1)
-    
-    value <- sum(score_copy[min_i:max_i, min_j:max_j])
-    
-    if(value < 2) score_copy[i,j] <- 0
-  }
-}
-
-# To compute the score
-score <- 0
-pmax_pos <- which.max(score_copy[,1])
-dist_th <- 1.5
-besti <- 1
-bestj <- pmax_pos
-dvec1 <- abs(which.max(score_copy[,2]) - which.max(score_copy[,1]))
-dvec2 <- abs(which.max(score_copy[,3]) - which.max(score_copy[,2]))
-dvec3 <- abs(which.max(score_copy[,4]) - which.max(score_copy[,3]))
-for(i in 5:len_i){
-  
-  #print(paste(paste(paste(dvec1, dvec2), dvec3), mean(c(dvec1, dvec2, dvec3))))
-  distance <- mean(c(dvec1, dvec2, dvec3))
-
-  
-  
-  dvec1 <- dvec2
-  dvec2 <- dvec3
-  dvec3 <- abs(which.max(score_copy[,i]) - which.max(score_copy[,i-1]))
-  
-  # If there is a 0 or we are too far away just add max distance!
-  if(distance > dist_th || distance == 0){
-    score <- score + len_i
-  }
-  
-}
-
-
-score <- (score/(len_i^2))
-print(score)
-
-
-sampling_value <- 5
-submat <- downsample(score_copy, sampling_value)
-m <- growing_regions((submat), WSIZE = 7, TH = 5, penalty = 15)
-events <- detect_events(m, sampling_value)
-events <- rbind(events, c(0,0,0,0,0,"Null event"))
-write(as.character(c(seq_x_len, seq_y_len)), file=paste(path_mat,".events.txt", sep=""), append = TRUE, sep =",", ncolumns=2)
-write(as.character(c("x1", "y1", "x2", "y2", "len", "event")), file=paste(path_mat,".events.txt", sep=""), append = TRUE, sep =",", ncolumns=6)
-for(i in 1:length(events[,1])){
-  write(as.character(events[i,]), file=paste(path_mat,".events.txt", sep=""), append = TRUE, sep =",", ncolumns=6)  
-}
-
-
-
-coords1 <- round(seq(from=0, to=1, by=0.2)*seq_x_len)
-coords2 <- round(seq(from=0, to=1, by=0.2)*seq_y_len)
-
-final_image <- apply((t(score_copy)), 2, rev)
-
-png(paste(path_mat, ".filt.png", sep=""), width = length(data[,1]), height = length(data[,1]))
-image(t(final_image), col = grey(seq(1, 0, length = 256)), xaxt='n', yaxt='n', main = paste(fancy_name, paste("filt. score=", score)), xlab = name_x, ylab = name_y, axes = FALSE)
-axis(1, tick = TRUE, labels = (coords1), at = c(0.0, 0.2, 0.4, 0.6, 0.8, 1))
-axis(2, tick = TRUE, labels = rev(coords2), at = c(0.0, 0.2, 0.4, 0.6, 0.8, 1))
-dev.off()
-
-
-# Output pixel coordinates of highly conserved signals
-
-# To clear it in case it existed
-write(c(paste("X", "Y")), file = paste("hits-XY-", paste(fancy_name, ".hits", sep=""), sep=""), append = FALSE, sep = "\n")
-
-
-for(i in 1:(length(score_copy[,1]))){
-  for(j in 1:(length(score_copy[1,]))){
-    if(score_copy[i,j] != 0){
-      write(c(paste(i, j)), file = paste("hits-XY-", paste(fancy_name, ".hits", sep=""), sep=""), append = TRUE, sep = "\n")
-    }
-  }
-}
--- a/chromeister/bin/generate-one-score.sh	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,181 +0,0 @@
-#!/usr/bin/env bash
-CSV=$1
-TH=$2
-
-if [ $# -ne 2 ]; then
-   echo " ==== ERROR ... you called this script inappropriately."
-   echo ""
-   echo "   usage:  $0 <index.csv> <threshold>"
-   echo ""
-   exit -1
-fi
-
-
-BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-# get first genome in list (they are sorted)
-currgenome=$(tail -n +2 "$CSV" | head -1 | awk -F "," '{print $6}')
-
-# fill array of chromosomes similarity
-
-array=()
-arraytosort=()
-names=()
-homologies=()
-condition=0
-othergencounter=0
-# for problems with chromo X and Y
-highest=1 
-# For all lines
-
-cat $CSV | tail -n +2 > $1.temp
-
-while IFS= read -r i
-do
-
-	othergenome=$(echo "$i" | awk -F "," '{print $6}')
-	if [ "$condition" -eq 0 ]; then
-		currgenome=$othergenome
-		condition=1
-	fi
-	
-	if [ "$othergenome" != "$currgenome" ]; then
-	
-		# Sort the array with temporal values
-		#printf '%s\n' "${arraytosort[@]}"
-		#echo "name is $currgenome"
-		
-		sorted=($(printf '%s\n' "${arraytosort[@]}"|sort))
-		
-		#echo "For chroomo $currgenome we have "
-		#echo $(printf '%s,' "${sorted[@]}")
-		# accumulate sum until threshold is reached
-		usedValues=1
-		usedValuesNext=2
-		first=${sorted[0]}
-		next=${sorted[${usedValues}]}
-		nextofnext=${sorted[${usedValuesNext}]}
-		finalvalue=$first
-		divisor=0
-		currdiff=$(LC_NUMERIC=POSIX awk -v a="$next" -v b="$nextofnext" 'BEGIN {print b-a }')
-		TH=$(printf '%4.6f' $TH)
-		#echo "$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("comp %f > %f = %d",a,b,a>b)} ')"
-		condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d",a>b)} ')
-		#echo "first $first next $next result $currdiff condition $condition divisor $divisor th $TH finalvalue $finalvalue"
-		while [ $condition -eq 1 -a $usedValuesNext -lt ${#sorted[@]} ];
-		do
-			usedValues=`expr $usedValues + 1`
-			usedValuesNext=`expr $usedValuesNext + 1`
-			finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$next" 'BEGIN {print (a+b)}')
-			next=${sorted[${usedValues}]}
-			nextofnext=${sorted[${usedValuesNext}]}
-			
-			currdiff=$(LC_NUMERIC=POSIX awk -v a="$nextofnext" -v b="$next" 'BEGIN {printf("%f", b-a) }')
-			condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d", a>b)} ')
-			divisor=$(LC_NUMERIC=POSIX awk -v a="$divisor" 'BEGIN {print a+0.1}')
-			
-		done
-
-		#echo "so this is what we got $finalvalue, when divided using $divisor"
-		
-		# array holds the results
-		#array[$highest]=$(awk -v a="$currsum" -v b="$othergencounter" 'BEGIN {print a/b}')
-		finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$usedValues" -v c="$divisor" 'BEGIN {printf("%f", a/(b-c))}')
-		array[$highest]=$finalvalue
-		homologies[$highest]=$usedValues
-		
-		highest=`expr $highest + 1`
-		condition=0
-		names+=($currgenome)
-		othergencounter=0
-		unset arraytosort
-		
-		
-		
-		
-		getvalue=$(echo "$i" | awk -F "," '{print $8}')
-		# Copy value to array 
-		arraytosort[$othergencounter]=$getvalue
-		#currsum=$(awk -v a="$currsum" -v b="$getvalue" 'BEGIN {print a=a+(1-b); exit}')
-		othergencounter=`expr $othergencounter + 1`
-	else
-		getvalue=$(echo "$i" | awk -F "," '{print $8}')
-		# Copy value to array 
-		arraytosort[$othergencounter]=$getvalue
-		#currsum=$(awk -v a="$currsum" -v b="$getvalue" 'BEGIN {print a=a+(1-b); exit}')
-		othergencounter=`expr $othergencounter + 1`
-
-	fi
-
-done < "$1.temp"
-
-# do the last!!!
-#if [ "$lastprint" == "$currgenome" ]; then
-# Sort the array with temporal values
-sorted=($(printf '%s\n' "${arraytosort[@]}"|sort))
-
-usedValues=1
-usedValuesNext=2
-first=${sorted[0]}
-next=${sorted[${usedValues}]}
-nextofnext=${sorted[${usedValuesNext}]}
-finalvalue=$first
-divisor=0
-currdiff=$(LC_NUMERIC=POSIX awk -v a="$next" -v b="$nextofnext" 'BEGIN {print b-a }')
-TH=$(printf '%4.6f' $TH)
-#echo "$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("comp %f > %f = %d",a,b,a>b)} ')"
-condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d",a>b)} ')
-#echo "first $first next $next result $currdiff condition $condition divisor $divisor th $TH finalvalue $finalvalue"
-while [ $condition -eq 1 -a $usedValuesNext -lt ${#sorted[@]} ];
-do
-        usedValues=`expr $usedValues + 1`
-        usedValuesNext=`expr $usedValuesNext + 1`
-        finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$next" 'BEGIN {print (a+b)}')
-        next=${sorted[${usedValues}]}
-        nextofnext=${sorted[${usedValuesNext}]}
-
-        currdiff=$(LC_NUMERIC=POSIX awk -v a="$nextofnext" -v b="$next" 'BEGIN {printf("%f", b-a) }')
-        condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d", a>b)} ')
-        divisor=$(LC_NUMERIC=POSIX awk -v a="$divisor" 'BEGIN {print a+0.1}')
-
-done
-
-#echo "so this is what we got $finalvalue, when divided using $divisor"
-
-# array holds the results
-#array[$highest]=$(awk -v a="$currsum" -v b="$othergencounter" 'BEGIN {print a/b}')
-finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$usedValues" -v c="$divisor" 'BEGIN {printf("%f", a/(b-c))}')
-array[$highest]=$finalvalue
-homologies[$highest]=$usedValues
-
-
-highest=`expr $highest + 1`
-currsum=0
-names+=($currgenome)	
-currgenome=$othergenome
-othergencounter=0
-#fi
-
-
-highest=`expr $highest - 1`
-rm $1.temp
-
-tsum=0
-echo "deleteme" > $1.inter
-rm $1.inter
-aux=0
-for ((i = 1; i <= highest; i++)); do
-	echo "${names[${aux}]} ${array[${i}]} ${homologies[${i}]}" >> $1.inter
-	#echo "${names[${aux}]} ${array[${i}]} ${homologies[${i}]}"
-	aux=`expr $aux + 1`
-	#val=${array[${i}]}
-	#tsum=$(awk -v a="$tsum" -v b="$val" '{print a=a+b}')
-done
-
-#awk -v a="$tsum" b="$highest" '{print a/b}'
-
-#sumfirst=$(awk -F "," 'BEGIN{suma=0}{suma = suma + $8}END{print suma}' "$CSV")
-#echo "$sumfirst"
-
-
-
--- a/chromeister/bin/index_chromeister.sh	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-#!/usr/bin/env bash
-DIR=$1
-FASTAS1=$3
-FASTAS2=$4
-OUT=$2
-
-echo "Computing index CSV..." > index.csv.temp
-
-while [ "$(find . -size 0 | wc -l)" -ne 0 ]; do
-        sleep 10s
-done
-
-
-EXT="mat"
-EXTSCORE="scr.txt"
-EXTGENERAL=".fa.fasta"
-
-
-
-if [ $# != 4 ]; then
-	echo "***ERROR*** Use: $0 <directory> <out> <fastas_directory_1> <fastas_directory_2>"
-	exit -1
-fi
-
-rm $DIR/*.log
-
-for i in $DIR/*.scr; do mv $i $i.txt; done
-
-rm $OUT
-
-for elem in $(ls -d $DIR/*.$EXT | awk -F "/" '{print $NF}' | awk -F ".$EXT" '{print $1}')
-do
-	IFS='-', read -a splits <<< "$elem"
-	IFS='.', read -a getnum <<< "$elem"
-
-	scorepath=$(basename $elem .mat).$EXTSCORE
-
-	sed -i "/X.*/d" $DIR/$scorepath
-	sed -i "s/\[1\]//g" $DIR/$scorepath
-
-	score=$( head -1  $DIR/$scorepath)
-
-	file1=${splits[0]}
-	file2=$(basename ${splits[1]} .mat)
-
-	ID1=$(head -1 $FASTAS1/$file1)
-	ID2=$(head -1 $FASTAS2/$file2)
-
-	
-	#scorepath="$(basename $elem .mat).$EXTSCORE"
-	#score="$(head -1 $DIR/$scorepath)"
-
-	counter=0
-	numX=0
-	numY=0
-	for i in "${getnum[@]}" 
-	do
-		counter=`expr $counter + 1`
-		if [ "$numX" -eq 0 ] && [ "$i" == "chromosome" ]; then
-			numX=$counter
-			continue
-		fi
-		if [ "$numX" -ne 0 ] && [ "$i" == "chromosome" ]; then
-			numY=$counter
-		fi
-
-	done
-
-
-	echo "$(basename ${splits[0]} $EXTGENERAL),$(basename ${splits[1]} ${EXTGENERAL}.mat),$ID1,$ID2,$elem.$EXT.filt.png,${getnum[${numX}]},${getnum[${numY}]},$score" >> $OUT
-
-done
-
-sort -k5,5n -k6,6n -o $OUT $OUT 
-
-
-sed -i '1iSpX, SpY, IDX, IDY, IMG, CHNumberX, CHNumberY, Score' $OUT
-
-rm index.csv.temp
-
--- a/chromeister/bin/index_chromeister_solo.sh	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-#!/usr/bin/env bash
-DIR=$1
-FASTAS1=$3
-FASTAS2=$4
-OUT=$2
-
-echo "Computing index CSV..." > index.csv.temp
-
-while [ "$(find . -size 0 | wc -l)" -ne 0 ]; do
-        sleep 10s
-done
-
-
-EXT="mat"
-EXTSCORE="scr.txt"
-EXTGENERAL=".fasta"
-
-# data is like
-# HOMSA.Chr.10.fasta
-
-
-
-if [ $# != 4 ]; then
-	echo "***ERROR*** Use: $0 <directory> <out> <fastas_directory_1> <fastas_directory_2>"
-	exit -1
-fi
-
-
-rm $OUT
-
-for elem in $(ls -d $DIR/*.$EXT | awk -F "/" '{print $NF}' | awk -F ".$EXT" '{print $1}')
-do
-
-	IFS='-', read -a splits <<< "$elem"   # yields MUSMU.Chr.8.fasta and  MUSMU.Chr.Y.fasta
-	IFS='.', read -a getnum <<< "$elem"   # yields MUSMU  Chr  8  fasta  and MUSMU Chr Y fasta
-
-	scorepath=$(basename $elem .mat).$EXTSCORE
-
-	sed -i "/X.*/d" $DIR/$scorepath
-	sed -i "s/\[1\]//g" $DIR/$scorepath
-
-	score=$( head -1  $DIR/$scorepath)
-	len1=$( head -1 $DIR/$elem.mat.events.txt | awk -F="," '{print $1}')
-	len2=$( head -1 $DIR/$elem.mat.events.txt | awk -F="," '{print $2}')
-
-	
-
-	file1=${splits[0]}
-	file2=$(basename ${splits[1]} .mat)
-
-	ID1=$(head -1 $FASTAS1/$file1)
-	ID2=$(head -1 $FASTAS2/$file2)
-
-	
-	#scorepath="$(basename $elem .mat).$EXTSCORE"
-	#score="$(head -1 $DIR/$scorepath)"
-
-	counter=0
-	numX=${getnum[2]}
-	numY=${getnum[5]}
-
-
-
-
-	echo "$(basename ${splits[0]} $EXTGENERAL),$(basename ${splits[1]} ${EXTGENERAL}),$ID1,$ID2,$elem.$EXT.filt.png,$numX,$numY,$score,$len1 $len2" >> $OUT
-
-done
-
-sort -k5,5n -k6,6n -o $OUT $OUT 
-
-
-sed -i '1iSpX, SpY, IDX, IDY, IMG, CHNumberX, CHNumberY, Score, LengthX, LengthY' $OUT
-
-rm index.csv.temp
-
--- a/chromeister/bin/make-cluster.R	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-#!/usr/bin/env Rscript
-library(ape)
-
-args = commandArgs(trailingOnly=TRUE)
-
-if(length(args) < 1){
-  stop("USE: Rscript --vanilla make-cluster.R <csv>")
-}
-
-
-path = args[1]
-
-inputcsv <- read.csv(path, sep ="-", header=FALSE)
-
-# Make indexing table
-
-indexing_table <- table(inputcsv[,1])
-n_species <- length(indexing_table)
-
-for(i in 1:n_species){
-  indexing_table[i] <- i
-}
-
-
-distmat <- matrix(NA, nrow=n_species, ncol=n_species)
-rownames(distmat) <- rownames(indexing_table)
-colnames(distmat) <- rownames(indexing_table)
-
-for(i in 1:n_species){
-  distmat[i,i] <- 0
-}
-
-
-for(i in 1:length(inputcsv[,1])){
-  redirX <- (as.character(inputcsv[i,1]))
-  redirY <- (as.character(inputcsv[i,2]))
-  
-  distmat[indexing_table[redirX],indexing_table[redirY]] <- as.numeric(inputcsv[i,3])
-  distmat[indexing_table[redirY],indexing_table[redirX]] <- as.numeric(inputcsv[i,3])
-}
-
-cluster <- hclust(dist(distmat), method = "average")
-
-# Rooted hierarchical cluster
-#plot(cluster, main = "Clustering based on automatic scoring", xlab = "Organisms")
-
-# Unrooted
-plot(as.phylo(cluster), type = "unrooted", cex = 0.6, main = "Unrooted clustering based on automatic")
-
-# Fan cluster
-#plot(as.phylo(cluster), type = "fan")
-
-
-
--- a/chromeister/bin/make-mean.sh	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-#!/usr/bin/env bash
-# Use in folder prior to all folders!
-
-BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-for i in *; do
-
-	
-	$BINDIR/generate-one-score.sh $i/index-$i.csv 0.2
-        value=$(awk '{sum+=$2} END { print sum/NR}' $i/index-$i.csv.inter)
-        echo "$i-$value"
-
-
-done
-
--- a/chromeister/bin/plot.R	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-#!/usr/bin/env Rscript
-args = commandArgs(trailingOnly=TRUE)
-
-if(length(args) < 1){
-  stop("USE: Rscript --vanilla plot.R <matrix>")
-}
-
-
-path_mat = args[1]
-
-
-data <- read.csv(path_mat, header = FALSE, sep = " ", skip=1)
-
-
-png(paste(path_mat, ".png", sep=""), width = length(data[,1]), height = length(data[,1]))
-image(t(as.matrix(data)), col = grey(seq(1, 0, length = 256)), xaxt='n', yaxt='n')
-dev.off()
--- a/chromeister/bin/plot_diags.R	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,179 +0,0 @@
-#!/usr/bin/env Rscript
-args = commandArgs(trailingOnly=TRUE)
-
-if(length(args) < 1){
-  stop("USE: Rscript --vanilla plot.R <matrix>")
-}
-
-
-path_mat = args[1]
-
-
-
-
-data <- as.matrix(read.csv(path_mat, sep = " "))
-percentage <- 0.1
-
-
-
-
-d <- col(data) - row(data)
-groups <- split(data, d)
-acum <- c()
-for(i in 1:length(groups)){
-  acum <- c(acum, sum(unlist(groups[i])))
-}
-
-
-# reverse
-data_r <- data
-for(i in 1:length(data_r[,1])){
-  data_r[i,] <- rev(data_r[i,])
-}
-d <- col(data_r) - row(data_r)
-groups <- split(data_r, d)
-acum_r <- c()
-for(i in 1:length(groups)){
-  acum_r <- c(acum, sum(unlist(groups[i]))/length(unlist(groups[i])) )
-}
-
-indexes <- matrix(0, nrow=length(acum), ncol = 3)
-indexes[,1] <- c(1:length(acum))
-indexes[,2] <- acum
-
-#sort
-indexes[order(indexes[,2], decreasing=TRUE),]
-
-#crop by percentage
-n_percent <- percentage * length(acum)
-
-for(i in 1:n_percent){
-  indexes[i,3] <- 1
-}
-
-# Now resort based on first column to have them again in order
-indexes[order(indexes[,1], decreasing=FALSE),]
-
-
-# same in reverse
-#put into indexes to sort
-indexes_r <- matrix(0, nrow=length(acum_r), ncol = 3)
-indexes_r[,1] <- c(1:length(acum_r))
-indexes_r[,2] <- acum_r
-
-#sort
-indexes_r[order(indexes_r[,2], decreasing=TRUE),]
-
-#crop by percentage
-n_percent <- percentage * length(acum_r)
-
-for(i in 1:n_percent){
-  indexes_r[i,3] <- 1
-}
-
-# Now resort based on first column to have them again in order
-indexes_r[order(indexes_r[,1], decreasing=FALSE),]
-
-
-
-
-finalmat <- matrix(0, nrow=length(data[,1]), ncol=length(data[1,]))
-
-
-for(i in length(data[1,]):1){
-  x <- i
-  y <- 1
-
-  if(indexes[i,3] == 1){
-    while(x < length(data[1,]) && y <= length(data[,1])){
-      
-      
-      if(x < length(data[1,]) && y <= length(data[,1])){
-        finalmat[x,y] <- 1  
-      }
-      
-      y <- y + 1
-      x <- x + 1
-      
-      
-    }  
-  }
-  
-}
-
-for(i in 2:length(data[,1])){
-  x <- 1
-  y <- i
-  
-  if(indexes[i,3] == 1){
-    
-    while(x < length(data[1,]) && y <= length(data[,1])){
-      
-      if(x < length(data[1,]) && y <= length(data[,1])){
-        finalmat[x,y] <- 1
-      }
-      y <- y + 1
-      x <- x + 1
-      
-    }  
-  }
-}
-
-
-#same for reverse
-for(i in length(data_r[1,]):1){
-  x <- i
-  y <- 1
-  
-  if(indexes[i,3] == 1){
-    while(x < length(data_r[1,]) && y <= length(data_r[,1])){
-      
-      
-      if(x < length(data_r[1,]) && y <= length(data_r[,1])){
-        finalmat[x,length(data_r[,1])-y] <- 1  
-      }
-      
-      y <- y + 1
-      x <- x + 1
-      
-      
-    }  
-  }
-  
-}
-
-for(i in 2:length(data_r[,1])){
-  x <- 1
-  y <- i
-  
-  if(indexes[i,3] == 1){
-    
-    while(x < length(data_r[1,]) && y <= length(data_r[,1])){
-      
-      if(x < length(data_r[1,]) && y <= length(data_r[,1])){
-        finalmat[x,length(data_r[,1])-y] <- 1
-      }
-      y <- y + 1
-      x <- x + 1
-      
-    }  
-  }
-}
-
-# fully write last
-
-for(i in 1:length(data[,1])){
-  for(j in 1:length(data[1,])){
-    if(finalmat[i,j] == 0){
-      
-      data[i,j] <- 0
-    }
-  }
-}
-
-png(paste(path_mat, ".png", sep=""), width = length(data[,1]), height = length(data[,1]))
-image((data), col = grey(seq(1, 0, length = 256)), xaxt='n', yaxt='n')
-dev.off()
-
-
-
--- a/chromeister/bin/recompute_scores.sh	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-#!/usr/bin/env bash
-
-if [ $# -ne 1 ]; then
-   echo " ==== ERROR ... you called this script inappropriately."
-   echo ""
-   echo "   usage:  $0 <#threads>"
-   echo ""
-   exit -1
-fi
-
-BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-THREADS=$1
-
-
-for p in */;
-do
-
-	echo "Entering $p"
-	cd $p
-
-
-
-	thepaths=()
-	n=0
-	# Grab the routes
-	for i in *.mat ;
-	do
-
-	        thepaths[$n]=$i
-        	n=`expr $n + 1`
-
-	done
-
-	i=0
-	aux=0
-	for ((i=0; i < $n ; i+=$THREADS))
-	do
-		aux=$i
-		for ((j=0; j<$THREADS; j++))
-		do
-			if [ "$aux" -lt "$n" ]; then
-				echo "Recomputing ${thepaths[$aux]}"
-				goodpath=${thepaths[$aux]%.mat}
-				Rscript $BINDIR/compute_score.R ${thepaths[$aux]} > ${goodpath}.scr.txt &
-				aux=`expr $aux + 1`
-			fi
-		done
-
-		for job in `jobs -p`
-		do
-		    #echo $job
-		    wait $job
-		done
-		
-	done
-
-
-	echo "Exiting $p"
-	cd ..
-
-
-done
-
-
--- a/chromeister/bin/run_and_plot_chromeister.sh	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-#!/usr/bin/env bash
-G1=$1
-G2=$2
-KMER=$3
-DIM=$4
-
-
-FILE1=$(basename $G1)
-FILE2=$(basename $G2)
-
-BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-
-if [ $# -lt 4 ]; then
-        echo "***ERROR*** Use: $0 <G1> <G2> <KMER> <DIMENSION>"
-        exit -1
-fi
-
-
-
-(time $BINDIR/CHROMEISTER -query $G1 -db $G2 -kmer $KMER -out $FILE1-$FILE2.mat -dimension $DIM) &> $FILE1-$FILE2.log
-(Rscript $BINDIR/compute_score.R $FILE1-$FILE2.mat $DIM) &> $FILE1-$FILE2.scr.txt
-
-#$BINDIR/plot.R $FILE1-$FILE2.mat
-
-
-#rm $FILE1-$FILE2.mat
--- a/chromeister/chromeister.xml	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-<tool id="chromeister" name="Chromeister">
-   <description>A heuristic approach for ultra fast previsualization of pairwise genome comparisons</description>
-   <inputs>
-      <param name="query" type="data" format="fasta" label="Query sequence" help="Query sequence file in fasta format" />
-      <param name="db" type="data" format="fasta" label="Reference sequence" help="Reference sequence file in fasta format" />
-      <param name="dimension" type="integer" value="1000" label="Dimension" help="Compression size" />
-   </inputs>
-   <command>echo "\$PWD"; cp $query ${query.name}; cp $db ${db.name}; (/home/galaxy-bitlab/galaxy/tools/chromeister/bin/CHROMEISTER -query ${query.name} -db ${db.name} -dimension $dimension -out ${query.name}-${db.name}.mat) &amp;>/dev/null ; rm ${query.name}; rm ${db.name}; Rscript /home/galaxy-bitlab/galaxy/tools/chromeister/bin/compute_score.R  ${query.name}-${db.name}.mat $dimension; mv ${query.name}-${db.name}.mat $output; mv ${query.name}-${db.name}.mat.filt.png $outputIMAGEN ; mv ${query.name}-${db.name}.mat.events.txt $outputEVENTS; rm hits-XY-${query.name}-${db.name}.mat.hits</command>
-  <outputs>
-      <data name="output" format="txt" label="Comparison matrix"/>
-      <data name="outputIMAGEN" format="png" label="Comparison dotplot"/>
-      <data name="outputEVENTS" format="txt" label="Detected events"/>
-  </outputs>
-</tool>
--- a/chromeister/src/CHROMEISTER.c	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,651 +0,0 @@
-/*********
-
-File        CHROMEISTER.c
-Author      EPW <estebanpw@uma.es>
-Description Computes hits and generates a dotplot
-
-USAGE       Usage is described by calling ./CHROMEISTER --help
-
-
-
-**********/
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-#include <ctype.h>
-#include "structs.h"
-#include "alignmentFunctions.h"
-#include "commonFunctions.h"
-#define STARTING_SEQS 1000
-#define PIECE_OF_DB_REALLOC 3200000 //half a gigabyte if divided by 8 bytes
-#define RANGE 2
-
-uint64_t custom_kmer = 32; // Defined as external in structs.h
-uint64_t diffuse_z = 4; // Defined as external in structs.h
-
-uint64_t get_seq_len(FILE * f);
-
-
-void init_args(int argc, char ** av, FILE ** query, FILE ** database, FILE ** out_database, uint64_t * custom_kmer, uint64_t * dimension, uint64_t * diffuse_z);
-
-int main(int argc, char ** av){
-    
-
-    /*
-    //Store positions of kmers
-    uint64_t n_pools_used = 0;
-    //Mempool_l * mp = (Mempool_l *) malloc(MAX_MEM_POOLS*sizeof(Mempool_l));
-    //if(mp == NULL) terror("Could not allocate vectors for memory pools");
-    Mempool_l mp[MAX_MEM_POOLS];
-    init_mem_pool_llpos(&mp[n_pools_used]);
-    //llpos * aux;
-
-    uint64_t n_pools_used_AVL = 0;
-    Mempool_AVL mp_AVL[MAX_MEM_POOLS];
-    init_mem_pool_AVL(&mp_AVL[n_pools_used_AVL]);
-    */
-    Tuple_hits * thit;
-    
-    /*
-    AVLTree * root = NULL;
-    root = insert_AVLTree(root, 10, mp_AVL, &n_pools_used_AVL, 0, mp, &n_pools_used);
-    
-    llpos * some = find_AVLTree(root, 25);
-    while(some != NULL){
-        printf("#%"PRIu64", ", some->pos); some = some->next;
-    }
-    */
-
-    uint64_t i, j;
-
-    //query to read kmers from, database to find seeds
-    FILE * query = NULL, * database = NULL, * out_database = NULL;
-    uint64_t dimension = 1000; // Default 1000 * 1000
-    
-    
-    init_args(argc, av, &query, &database, &out_database, &custom_kmer, &dimension, &diffuse_z);
-
-
-
-    unsigned char char_converter[91];
-    char_converter[(unsigned char)'A'] = 0;
-    char_converter[(unsigned char)'C'] = 1;
-    char_converter[(unsigned char)'G'] = 2;
-    char_converter[(unsigned char)'T'] = 3;
-
-    
-    // Variables to account for positions
-    // Print info
-    fprintf(stdout, "[INFO] Loading database\n");
-    // Variables to read kmers
-    char c = 'N'; //Char to read character
-    // Current length of array and variables for the buffer
-    uint64_t idx = 0, r = 0;
-    
-    //Vector to read in batches
-    char * temp_seq_buffer = NULL;
-    if ((temp_seq_buffer = calloc(READBUF, sizeof(char))) == NULL) {
-        terror("Could not allocate memory for read buffer");
-    }
-
-    //Dimensional matrix
-    uint64_t ** representation = (uint64_t **) calloc(dimension+1, sizeof(uint64_t *));
-    if(representation == NULL) terror("Could not allocate representation");
-    for(i=0; i<dimension+1; i++){
-        representation[i] = (uint64_t *) calloc(dimension+1, sizeof(uint64_t));
-        if(representation[i] == NULL) terror("Could not allocate second loop representation");
-    }
-
-    /*
-    fseek(database, 0, SEEK_END);
-    uint64_t aprox_len_query = ftell(database);
-    uint64_t aprox_len_db = aprox_len_query;
-    rewind(database);
-    */
-    uint64_t aprox_len_query = get_seq_len(database);
-    uint64_t aprox_len_db = aprox_len_query;
-
-    uint64_t a_hundreth = (aprox_len_query/100);
-
-    unsigned char curr_kmer[custom_kmer], reverse_kmer[custom_kmer];
-    curr_kmer[0] = reverse_kmer[0] = '\0';
-    uint64_t word_size = 0, word_size_rev = 0;
-
-    //To hold all information related to database
-    uint64_t current_len = 0;
-    
-    //To force reading from the buffer
-    idx = READBUF + 1;
-
-    //unsigned char aux_kmer[custom_kmer+1];
-    
-    //Vector to store query seq
-    unsigned char * seq_vector_query = (unsigned char *) malloc(READBUF*sizeof(unsigned char));
-    if(seq_vector_query == NULL) terror("Could not allocate memory for query vector");
-
-    /*
-    Container * ct = (Container *) calloc(1, sizeof(Container));
-    if(ct == NULL) terror("Could not allocate container");    
-    */
-
-
-
-    Index * ctidx = (Index *) calloc(1, sizeof(Index));
-    if(ctidx == NULL) terror("Could not allocate container");
-    
-
-    //begin = clock();
-    
-
-    c = buffered_fgetc(temp_seq_buffer, &idx, &r, database);
-    while((!feof(database) || (feof(database) && idx < r))){
-
-        if(c == '>'){
-            
-
-
-            while(c != '\n') c = buffered_fgetc(temp_seq_buffer, &idx, &r, database);  //Skip ID
-                
-
-            while(c != '>' && (!feof(database) || (feof(database) && idx < r))){ //Until next id
-                c = buffered_fgetc(temp_seq_buffer, &idx, &r, database);
-                c = toupper(c);
-                if(c == 'A' || c == 'C' || c == 'G' || c == 'T'){
-                    curr_kmer[word_size] = (unsigned char) c;
-                    if(word_size < custom_kmer) ++word_size;
-                    ++current_len;
-                    if(current_len % a_hundreth == 0){ 
-                        fprintf(stdout, "\r%"PRIu64"%%...", 1+100*current_len/aprox_len_query); 
-                        //printf("%"PRIu64"%%..wasted: (%e) (%e)", 1+100*pos_in_query/aprox_len_query, (double)(wasted_cycles_forward)/CLOCKS_PER_SEC, (double)(wasted_cycles_reverse)/CLOCKS_PER_SEC); 
-                        fflush(stdout);
-                    }
-                    
-
-
-                }else{ //It can be anything (including N, Y, X ...)
-
-                    if(c != '\n' && c != '>'){
-                        word_size = 0;
-                        // data_database.sequences[pos_in_database++] = (unsigned char) 'N'; //Convert to N
-                        ++current_len;
-
-                    } 
-                }
-                //if(current_len % 1000000 == 0) printf(" curr len %" PRIu64"\n", current_len);
-                if(word_size == custom_kmer){
-                    //write to hash table
-                    
-
-                    thit = &ctidx->table[char_converter[curr_kmer[0]]][char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]]
-                    [char_converter[curr_kmer[3]]][char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]]
-                    [char_converter[curr_kmer[6]]][char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]]
-                    [char_converter[curr_kmer[9]]][char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]];
-                    
-                    /*
-                    typedef struct tuple_hits{
-                        int repetition;
-                        int hit_count;
-                        uint64_t key;
-                        uint64_t pos;
-                    } Tuple_hits;
-                    */
-
-                    if(thit->repetition == FALSE){
-                        // Then we can insert
-                        thit->hit_count = 0;
-                        thit->key = collisioned_hash(&curr_kmer[0], custom_kmer);
-                        thit->pos = current_len;
-                    }else{
-                        // Otherwise we break it
-                        thit->repetition = TRUE;
-                    }
-
-                    //thit->root = insert_AVLTree(thit->root, hashOfWord(&curr_kmer[0], custom_kmer, FIXED_K), mp_AVL, &n_pools_used_AVL, current_len, mp, &n_pools_used);
-                    //thit->root = insert_AVLTree(thit->root, collisioned_hash(&curr_kmer[0], custom_kmer), mp_AVL, &n_pools_used_AVL, current_len, mp, &n_pools_used);
-                    
-                    
-
-                    // Non overlapping
-                    word_size = 0;
-                    
-		
-		    // Overlapping
-                    //memmove(&curr_kmer[0], &curr_kmer[1], custom_kmer-1);
-                    //--word_size;
-                }
-            }
-            word_size = 0;
-            
-        }else{
-            c = buffered_fgetc(temp_seq_buffer, &idx, &r, database);    
-        }
-        
-    }
-
-
-    //end = clock();
-
-    // data_database.total_len = pos_in_database;
-
-    //fprintf(stdout, "[INFO] Database loaded and of length %"PRIu64". Hash table building took %e seconds\n", data_database.total_len, (double)(end-begin)/CLOCKS_PER_SEC);
-    fprintf(stdout, "[INFO] Database loaded and of length %"PRIu64".\n", current_len);
-    //close database
-    fclose(database);
-
-    
-    
-    //begin = clock();
-    
-
-    
-    
-    
-    double pixel_size_db = (double) dimension / (double) current_len;
-    double ratio_db = (double) current_len / dimension;
-
-
-    // Get file length
-    
-    //fseek(query, 0, SEEK_END);
-    //aprox_len_query = ftell(query);
-    //rewind(query);
-    aprox_len_query = get_seq_len(query);
-
-    //uint64_t reallocs_hash_holder_table = 1;
-    //uint64_t n_items_hash_holder_table = aprox_len_query / 5;
-
-    //Hash_holder * hash_holder_table = (Hash_holder *) calloc(n_items_hash_holder_table, sizeof(Hash_holder));
-    //if(hash_holder_table == NULL) terror("Could not allocate hash holding table");
-
-    a_hundreth = (aprox_len_query/100);
-    double pixel_size_query = (double) dimension / (double) aprox_len_query;
-    double ratio_query = (double) aprox_len_query / dimension;
-    
-
-    double i_r_fix = MAX(1.0, custom_kmer * pixel_size_query);
-    double j_r_fix = MAX(1.0, custom_kmer * pixel_size_db);
-
-    
-    
-    fprintf(stdout, "[INFO] Ratios: Q [%e] D [%e]. Lenghts: Q [%"PRIu64"] D [%"PRIu64"]\n", ratio_query, ratio_db, aprox_len_query, current_len);
-    fprintf(stdout, "[INFO] Pixel size: Q [%e] D [%e].\n", pixel_size_query, pixel_size_db);
-
-
-    fprintf(stdout, "[INFO] Computing absolute hit numbers.\n");
-
-
-    current_len = 0;
-
-    //llpos * the_original_hit;
-
-    //To force reading from the buffer
-    idx = READBUF + 1;
-    c = buffered_fgetc(temp_seq_buffer, &idx, &r, query);    
-    //uint64_t c_hash_holder = 0;
-    
-    while((!feof(query) || (feof(query) && idx < r))){
-
-        if(c == '>'){
-            word_size = 0;
-            word_size_rev = custom_kmer-1;
-            
-
-
-
-            while(c != '\n'){ c = buffered_fgetc(temp_seq_buffer, &idx, &r, query); } //Skip ID
-                
-
-            while(c != '>' && (!feof(query) || (feof(query) && idx < r))){ //Until next id
-                c = buffered_fgetc(temp_seq_buffer, &idx, &r, query);
-                c = toupper(c);
-                if(c == 'A' || c == 'C' || c == 'G' || c == 'T'){
-                    
-                    ++current_len;
-                    if(current_len % a_hundreth == 0){ 
-                        fprintf(stdout, "\r%"PRIu64"%%...", 1+100*current_len/aprox_len_query); 
-                        fflush(stdout);
-                    }
-                    curr_kmer[word_size] = (unsigned char) c;
-                    ++word_size;
-
-                    switch(c){
-                        case ('A'): reverse_kmer[word_size_rev] = (unsigned)'T';
-                        break;
-                        case ('C'): reverse_kmer[word_size_rev] = (unsigned)'G';
-                        break;
-                        case ('G'): reverse_kmer[word_size_rev] = (unsigned)'C';
-                        break;
-                        case ('T'): reverse_kmer[word_size_rev] = (unsigned)'A';
-                        break;
-                    }
-                    if(word_size_rev != 0) --word_size_rev;
-
-
-
-                    
-                    if(word_size == custom_kmer){
-
-
-                        //hash_forward = hashOfWord(&curr_kmer[0], custom_kmer, FIXED_K);
-                        //hash_reverse = hashOfWord(&reverse_kmer[0], custom_kmer, FIXED_K);
-                        uint64_t hash_forward, hash_reverse;
-                        hash_forward = collisioned_hash(&curr_kmer[0], custom_kmer);
-                        hash_reverse = collisioned_hash(&reverse_kmer[0], custom_kmer);
-                        
-
-                        thit = &ctidx->table[char_converter[curr_kmer[0]]][char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]]
-                        [char_converter[curr_kmer[3]]][char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]]
-                        [char_converter[curr_kmer[6]]][char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]]
-                        [char_converter[curr_kmer[9]]][char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]];
-
-                        //AVLTree * search = find_AVLTree(thit->root, hash_forward);
-
-                        if(thit->repetition == FALSE && hash_forward == thit->key){
-                            // Attention ::::: you were not removing the ones with count==1 earlier 
-                            thit->pos_in_y = current_len;
-                            thit->hit_count++;
-                        }
-
-                        thit = &ctidx->table[char_converter[reverse_kmer[0]]][char_converter[reverse_kmer[1]]][char_converter[reverse_kmer[2]]]
-                        [char_converter[reverse_kmer[3]]][char_converter[reverse_kmer[4]]][char_converter[reverse_kmer[5]]]
-                        [char_converter[reverse_kmer[6]]][char_converter[reverse_kmer[7]]][char_converter[reverse_kmer[8]]]
-                        [char_converter[reverse_kmer[9]]][char_converter[reverse_kmer[10]]][char_converter[reverse_kmer[11]]];
-
-                        if(thit->repetition == FALSE && hash_reverse == thit->key){
-                            // Attention ::::: you were not removing the ones with count==1 earlier 
-                            thit->pos_in_y = current_len;
-                            thit->hit_count++;
-                        }
-
-                        /*
-                        if(search != NULL && search->count == 1){ //If count is two, then it is a rep
-                            thit->hit_count += search->count;
-                            
-                            hash_holder_table[c_hash_holder].pos = current_len;
-                            hash_holder_table[c_hash_holder].node = search;
-                            hash_holder_table[c_hash_holder].th = thit;
-                            ++c_hash_holder;
-                            if(c_hash_holder == n_items_hash_holder_table*reallocs_hash_holder_table){
-                                ++reallocs_hash_holder_table;
-                                hash_holder_table = (Hash_holder *) realloc(hash_holder_table, n_items_hash_holder_table*reallocs_hash_holder_table*sizeof(Hash_holder));
-                                if(hash_holder_table == NULL) terror("Could not realloc hash holder table");
-                            }
-                        }
-                        */
-
-                        
-
-                        
-
-                        //search = find_AVLTree(thit->root, hash_reverse);
-                        /*
-                        if(search != NULL && search->count == 1){ //If count is two, then it is a rep
-                            
-                            thit->hit_count += search->count;
-                            hash_holder_table[c_hash_holder].pos = current_len;
-                            hash_holder_table[c_hash_holder].node = search;
-                            hash_holder_table[c_hash_holder].th = thit;
-                            ++c_hash_holder;
-                            if(c_hash_holder == n_items_hash_holder_table*reallocs_hash_holder_table){
-                                ++reallocs_hash_holder_table;
-                                hash_holder_table = (Hash_holder *) realloc(hash_holder_table, n_items_hash_holder_table*reallocs_hash_holder_table*sizeof(Hash_holder));
-                                if(hash_holder_table == NULL) terror("Could not realloc hash holder table");
-                            }
-                        }
-                        */
-
-                        // Overlapping
-                        
-                        memmove(&curr_kmer[0], &curr_kmer[1], custom_kmer-1);
-                        memmove(&reverse_kmer[1], &reverse_kmer[0], custom_kmer-1);
-                        --word_size;
-
-                        // Non overlapping
-                        //word_size = 0;
-                        //word_size_rev = custom_kmer-1;
-                    }
-                }else{
-                    if(c != '\n' && c != '>'){
-                        word_size = 0;
-                        word_size_rev = custom_kmer-1;
-                        ++current_len;
-                    }
-                }
-            }
-        }else{
-            c = buffered_fgetc(temp_seq_buffer, &idx, &r, query);    
-        }
-        
-    } 
-
-    /// Out
-
-    fprintf(stdout, "Scanning hits table.\n");
-
-    a_hundreth = MAX(1, TOTAL_ENTRIES/100);
-    uint64_t t_computed = 0;
-    uint64_t w0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,w11;
-    for(w0=0;w0<4;w0++){
-        for(w1=0;w1<4;w1++){
-            for(w2=0;w2<4;w2++){
-                for(w3=0;w3<4;w3++){
-                    for(w4=0;w4<4;w4++){
-                        for(w5=0;w5<4;w5++){
-                            for(w6=0;w6<4;w6++){
-                                for(w7=0;w7<4;w7++){
-                                    for(w8=0;w8<4;w8++){
-                                        for(w9=0;w9<4;w9++){
-                                            for(w10=0;w10<4;w10++){
-                                                for(w11=0;w11<4;w11++){
-
-                                                    if(t_computed % a_hundreth == 0){ 
-                                                        fprintf(stdout, "\r%"PRIu64"%%...", 1+100*t_computed/TOTAL_ENTRIES); 
-                                                        fflush(stdout);
-                                                    }
-                                                    ++t_computed;
-                                                    Tuple_hits * taux = &ctidx->table[w0][w1][w2][w3][w4][w5][w6][w7][w8][w9][w10][w11];
-                                                    if(taux->hit_count == 1){
-                                                        // We plot it   
-                                                        // Convert scale to representation
-                                                        uint64_t redir_db = (uint64_t) (taux->pos / (ratio_db));
-                                                        uint64_t redir_query = (uint64_t) (taux->pos_in_y / (ratio_query));
-                                                        double i_r = i_r_fix; double j_r = j_r_fix;
-                                                        while((uint64_t) i_r >= 1 && (uint64_t) j_r >= 1){
-                                                            if((int64_t) redir_query - (int64_t) i_r > 0 && (int64_t) redir_db - (int64_t) j_r > 0){
-                                                                representation[(int64_t) redir_query - (int64_t) i_r][(int64_t) redir_db - (int64_t) j_r]++;
-                                                            }else{
-                                                                representation[redir_query][redir_db]++;
-                                                                break;
-                                                            }
-                                                            i_r -= MIN(1.0, pixel_size_query);
-                                                            j_r -= MIN(1.0, pixel_size_db);
-                                                        }                                                     
-                                                    }
-                                                }
-                                            }
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-    
-
-    //double average_hit = ((double) total_hits / (double) table_size);
-    //average_hit = 2.2;
-
-    /*
-    //fprintf(stdout, "[INFO] Total hit count is %"PRIu64" on a size of %"PRIu64" Avg = %e.\n", total_hits, table_size, average_hit);
-    fprintf(stdout, "[INFO] Total hit count is %"PRIu64" on a size of %"PRIu64" E' = %e.\n", total_hits, table_size, Eprime);
-
-    
-
-    
-
-    a_hundreth = MAX(1,c_hash_holder/100);
-    
-    for(current_len = 0; current_len < c_hash_holder; current_len++){
-
-        if(current_len % a_hundreth == 0){ 
-            fprintf(stdout, "\r%"PRIu64"%%...", 1+100*current_len/c_hash_holder); 
-            fflush(stdout);
-        }
-
-        aux = hash_holder_table[current_len].node->next;
-
-        //if(hash_holder_table[current_len].th->hit_count < (uint64_t) average_hit){
-        if(hash_holder_table[current_len].th->hit_count < (uint64_t) Eprime){
-            while(aux != NULL){
-                // Convert scale to representation
-                uint64_t redir_db = (uint64_t) (aux->pos / (ratio_db));
-                uint64_t redir_query = (uint64_t) (hash_holder_table[current_len].pos / (ratio_query));
-                double i_r = i_r_fix; double j_r = j_r_fix;
-                while((uint64_t) i_r >= 1 && (uint64_t) j_r >= 1){
-                    if((int64_t) redir_query - (int64_t) i_r > 0 && (int64_t) redir_db - (int64_t) j_r > 0){
-                        representation[(int64_t) redir_query - (int64_t) i_r][(int64_t) redir_db - (int64_t) j_r]++;
-                    }else{
-                        representation[redir_query][redir_db]++;
-                        break;
-                    }
-                    i_r -= MIN(1.0, pixel_size_query);
-                    j_r -= MIN(1.0, pixel_size_db);
-                }
-                aux = aux->next;
-            }
-        }
-    }
-    */
-
-
-    //end = clock();
-
-    
-    
-
-    //fprintf(stdout, "\n[INFO] Query length %"PRIu64". Hits completed. Took %e seconds\n", data_query.total_len, (double)(end-begin)/CLOCKS_PER_SEC);
-    fprintf(stdout, "\n[INFO] Query length %"PRIu64".\n", current_len);
-
-    //begin = clock();
-
-    //reads_per_thread = (uint64_t) (floorl((long double) data_query.n_seqs / (long double) n_threads));
-    
-    fprintf(stdout, "[INFO] Writing matrix.\n");
-
-
-    uint64_t unique_diffuse = 0;
-	fprintf(out_database, "%"PRIu64"\n", aprox_len_query);
-    fprintf(out_database, "%"PRIu64"\n", aprox_len_db);
-     // And replace 2's with 1's 
-	
-    for(i=0; i<dimension+1; i++){
-        for(j=0; j<dimension; j++){
-            fprintf(out_database, "%"PRIu64" ", representation[i][j]);
-	    unique_diffuse += representation[i][j];
-        }
-        fprintf(out_database, "%"PRIu64"\n",  representation[i][dimension]);
-	unique_diffuse += representation[i][dimension];
-    }
-
-    fprintf(stdout, "[INFO] Found %"PRIu64" unique hits for z = %"PRIu64".\n", unique_diffuse, diffuse_z);
-    
-
-    
-    //free(ct->table);
-    //free(hash_holder_table);
-    /*
-    for(i=0;i<=n_pools_used_AVL;i++){
-        free(mp_AVL[i].base);
-    }
-
-    for(i=0;i<=n_pools_used;i++){
-        free(mp[i].base);
-    }
-    */
-    for(i=0;i<dimension;i++){
-        free(representation[i]);
-    }
-    free(representation);
-    if(out_database != NULL) fclose(out_database);
-    
-    return 0;
-}
-
-void init_args(int argc, char ** av, FILE ** query, FILE ** database, FILE ** out_database, uint64_t * custom_kmer, uint64_t * dimension, uint64_t * diffuse_z){
-
-    int pNum = 0;
-    while(pNum < argc){
-        if(strcmp(av[pNum], "--help") == 0){
-            fprintf(stdout, "USAGE:\n");
-            fprintf(stdout, "           CHROMEISTER -query [query] -db [database] -out [outfile]\n");
-            fprintf(stdout, "OPTIONAL:\n");
-            fprintf(stdout, "           -kmer       [Integer:   k>1 (default 32)]\n");
-	    fprintf(stdout, "		-diffuse    [Integer:   z>0 (default 4)]\n");
-            fprintf(stdout, "           -dimension  Size of the output [Integer:   d>0 (default 1000)]\n");
-            fprintf(stdout, "           -out        [File path]\n");
-            fprintf(stdout, "           --help      Shows help for program usage\n");
-            fprintf(stdout, "\n");
-            fprintf(stdout, "PLEASE NOTICE: The reverse complementary is calculated for the QUERY.\n");
-            exit(1);
-        }
-        if(strcmp(av[pNum], "-query") == 0){
-            *query = fopen64(av[pNum+1], "rt");
-            if(query==NULL) terror("Could not open query file");
-        }
-        if(strcmp(av[pNum], "-db") == 0){
-            *database = fopen64(av[pNum+1], "rt");
-            if(database==NULL) terror("Could not open database file");
-        }
-        if(strcmp(av[pNum], "-out") == 0){
-            *out_database = fopen(av[pNum+1], "wt");
-            if(out_database==NULL) terror("Could not open output database file");
-        }
-        if(strcmp(av[pNum], "-kmer") == 0){
-            *custom_kmer = (uint64_t) atoi(av[pNum+1]);
-            if(*custom_kmer < BYTES_IN_MER) terror("K-mer size must be larger than 4");
-            if(*custom_kmer % BYTES_IN_MER != 0) terror("K-mer size must be a multiple of 4");
-
-        }
-        if(strcmp(av[pNum], "-diffuse") == 0){
-            *diffuse_z = (uint64_t) atoi(av[pNum+1]);
-            if(*diffuse_z == 0 || *diffuse_z > 32) terror("Z must satisfy 0<z<=32");
-
-        }
-
-        if(strcmp(av[pNum], "-dimension") == 0){
-            *dimension = (uint64_t) atoi(av[pNum+1]);
-            if(*dimension < 1) terror("Dimension must be a positive integer");
-        }
-        
-        pNum++;
-    }
-    
-    if(*query==NULL || *database==NULL || *out_database==NULL) terror("A query, database and output file is required");
-}
-
-uint64_t get_seq_len(FILE * f) {
-    char c = '\0';
-    uint64_t l = 0;
-
-    while(!feof(f)){
-        c = getc(f);
-
-        if(c == '>'){
-            while(c != '\n') c = getc(f);
-        }
-        c = toupper(c);
-        if(c >= 'A' && c <= 'Z'){
-            ++l;
-        }
-    }
-
-
-    rewind(f);
-    return l;
-}
--- a/chromeister/src/Makefile	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-CC=gcc
-CXX=g++
-CFLAGS=-O3 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -Wall #-DVERBOSE
-BIN=../bin
-
-all: CHROMEISTER
-
-
-
-CHROMEISTER: CHROMEISTER.c
-	$(CC) $(CFLAGS) alignmentFunctions.c -lm commonFunctions.c -lm CHROMEISTER.c -lm -o $(BIN)/CHROMEISTER
-
-clean:
-	rm -rf $(BIN)/CHROMEISTER
--- a/chromeister/src/alignmentFunctions.c	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,266 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <inttypes.h>
-#include <math.h>
-#include <float.h>
-#include "structs.h"
-#include "alignmentFunctions.h"
-#include "commonFunctions.h"
-
-
-
-int64_t compare_letters(unsigned char a, unsigned char b){
-    if(a != (unsigned char) 'N' && a != (unsigned char) '>') return (a == b) ? POINT : -POINT;
-    return -POINT;
-}
-
-llpos * getNewLocationllpos(Mempool_l * mp, uint64_t * n_pools_used){
-
-    if(mp[*n_pools_used].current == POOL_SIZE){
-        *n_pools_used += 1;
-        if(*n_pools_used == MAX_MEM_POOLS) terror("Reached max pools");
-        init_mem_pool_llpos(&mp[*n_pools_used]);
-        
-    }
-
-    llpos * new_pos = mp[*n_pools_used].base + mp[*n_pools_used].current;
-    mp[*n_pools_used].current++;
-
-    
-    return new_pos;
-}
-
-void init_mem_pool_llpos(Mempool_l * mp){
-    mp->base = (llpos *) calloc(POOL_SIZE, sizeof(llpos));
-    if(mp->base == NULL) terror("Could not request memory pool");
-    mp->current = 0;
-}
-
-AVLTree * getNewLocationAVLTree(Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t key){
-
-    if(mp[*n_pools_used].current == POOL_SIZE){
-        *n_pools_used += 1;
-        if(*n_pools_used == MAX_MEM_POOLS) terror("Reached max pools");
-        init_mem_pool_AVL(&mp[*n_pools_used]);
-        
-    }
-
-    AVLTree * new_pos = mp[*n_pools_used].base + mp[*n_pools_used].current;
-    mp[*n_pools_used].current++;
-
-    new_pos->key = key;
-    new_pos->count = 1;
-    new_pos->height = 1;
-
-    return new_pos;
-}
-
-void init_mem_pool_AVL(Mempool_AVL * mp){
-    mp->base = (AVLTree *) calloc(POOL_SIZE, sizeof(AVLTree));
-    if(mp->base == NULL) terror("Could not request memory pool");
-    mp->current = 0;
-}
-
-
-
-/*
-// An AVL tree node
-typedef struct AVL_Node{
-    uint64_t key;
-    struct AVL_Node * left;
-    struct AVL_Node * right;
-    uint64_t height;
-    llpos * next;
-} AVLTree;
-*/
- 
-// A utility function to get height of the tree
-
-uint64_t height(AVLTree * N){
-    if (N == NULL)
-        return 0;
-    return N->height;
-}
-
-/* Substituted by (x == NULL) ? (0) : (x->height) */
- 
-/* Helper function that allocates a new node with the given key and
-    NULL left and right pointers. */
-
-/* This one is substituted by AVLTree * getNewLocationAVLTree(Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t key) */
- 
-// A utility function to right rotate subtree rooted with y
-// See the diagram given above.
-AVLTree * right_rotate(AVLTree * y){
-    AVLTree * x = y->left;
-    AVLTree * T2 = x->right;
- 
-    // Perform rotation
-    x->right = y;
-    y->left = T2;
- 
-    // Update heights
-    //x->height = MAX((x == NULL) ? (0) : (x->left->height), (x == NULL) ? (0) : (x->right->height))+1;
-    //y->height = MAX((y == NULL) ? (0) : (y->left->height), (y == NULL) ? (0) : (y->right->height))+1;
-    // Update heights
-    y->height = MAX(height(y->left), height(y->right))+1;
-    x->height = MAX(height(x->left), height(x->right))+1;
- 
-    // Return new root
-    return x;
-}
- 
-// A utility function to left rotate subtree rooted with x
-// See the diagram given above.
-AVLTree * left_rotate(AVLTree * x){
-    AVLTree * y = x->right;
-    AVLTree * T2 = y->left;
- 
-    // Perform rotation
-    y->left = x;
-    x->right = T2;
- 
-    //  Update heights
-    //x->height = MAX((x == NULL) ? (0) : (x->left->height), (x == NULL) ? (0) : (x->right->height))+1;
-    //y->height = MAX((y == NULL) ? (0) : (y->left->height), (y == NULL) ? (0) : (y->right->height))+1;
-    x->height = MAX(height(x->left), height(x->right))+1;
-    y->height = MAX(height(y->left), height(y->right))+1;
- 
-    // Return new root
-    return y;
-}
- 
-// Get Balance factor of node N
-
-int64_t get_balance(AVLTree * N){
-    if (N == NULL)
-        return 0;
-    return height(N->left) - height(N->right);
-}
-
-/* Substituted by (node == NULL) ? (0) : ((int64_t) node->left->height - (int64_t) node->right->height) */
-
-AVLTree * find_AVLTree(AVLTree * node, uint64_t key){
-    AVLTree * found = NULL;
-    if(node == NULL) return NULL;
-
-    if (key < node->key) {
-        found = find_AVLTree(node->left, key);
-    } else if (key > node->key) {
-        found = find_AVLTree(node->right, key);
-    } else { 
-        return node;
-    }
-    return found;
-} 
-
-llpos * find_AVLTree_llpos(AVLTree * node, uint64_t key){
-    llpos * aux = NULL;
-    if(node == NULL) return NULL;
-
-    if (key < node->key) {
-        aux = find_AVLTree_llpos(node->left, key);
-    } else if (key > node->key) {
-        aux = find_AVLTree_llpos(node->right, key);
-    } else { 
-        return node->next;
-    }
-    return aux;
-}
-
-// Recursive function to insert key in subtree rooted
-// with node and returns new root of subtree.
-AVLTree * insert_AVLTree(AVLTree * node, uint64_t key, Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t pos, Mempool_l * mp_l, uint64_t * n_pools_used_l){
-    /* 1.  Perform the normal BST insertion */
-    if (node == NULL){
-        
-        AVLTree * n_node = getNewLocationAVLTree(mp, n_pools_used, key);
-        llpos * aux = getNewLocationllpos(mp_l, n_pools_used_l);
-        aux->pos = pos;
-        n_node->next = aux;
-        return n_node;
-    }
- 
-    if (key < node->key) {
-        node->left  = insert_AVLTree(node->left, key, mp, n_pools_used, pos, mp_l, n_pools_used_l);
-    } else if (key > node->key) {
-        node->right = insert_AVLTree(node->right, key, mp, n_pools_used, pos, mp_l, n_pools_used_l);
-    } else { 
-        // Equal keys are inserted as a linked list
-        if(node->count == 1){ // DO NOT INSERT MORE THAN 2. 1 is good 2 is repetition
-            llpos * aux = getNewLocationllpos(mp_l, n_pools_used_l);
-            aux->pos = pos;
-            aux->next = node->next;
-            node->next = aux;
-            ++(node->count);
-        }
-        
-        return node;
-    }
- 
-    /* 2. Update height of this ancestor node */
-    //node->height = 1 + MAX((node->left == NULL) ? (0) : (node->left->height), (node->right == NULL) ? (0) : (node->right->height));
-    node->height = 1 + MAX(height(node->left), height(node->right));
- 
-    /* 3. Get the balance factor of this ancestor
-          node to check whether this node became
-          unbalanced */
-    //int64_t balance = (node->left == NULL || node->right == NULL) ? (0) : ((int64_t) node->left->height - (int64_t) node->right->height);
-    int64_t balance = get_balance(node);
- 
-    // If this node becomes unbalanced, then
-    // there are 4 cases
- 
-    // Left Left Case
-    if (balance > 1 && key < node->left->key)
-        return right_rotate(node);
- 
-    // Right Right Case
-    if (balance < -1 && key > node->right->key)
-        return left_rotate(node);
- 
-    // Left Right Case
-    if (balance > 1 && key > node->left->key)
-    {
-        node->left =  left_rotate(node->left);
-        return right_rotate(node);
-    }
- 
-    // Right Left Case
-    if (balance < -1 && key < node->right->key)
-    {
-        node->right = right_rotate(node->right);
-        return left_rotate(node);
-    }
- 
-    /* return the (unchanged) node pointer */
-    return node;
-}
- 
-// A utility function to print preorder traversal
-// of the tree.
-// The function also prints height of every node
-
-void pre_order(AVLTree * root){
-    if(root != NULL){
-        printf("%"PRIu64" ", root->key);
-        llpos * aux = root->next;
-        while(aux != NULL){ printf("#%"PRIu64", ", aux->pos); aux = aux->next; }
-        pre_order(root->left);
-        pre_order(root->right);
-    }
-}
-
-
-uint64_t sum_of_all_tree(AVLTree * root){
-    uint64_t mysum = 0;
-    if(root != NULL){
-        
-        mysum = root->count;
-        mysum += sum_of_all_tree(root->left);
-        mysum += sum_of_all_tree(root->right);
-    }
-    return mysum;
-}
\ No newline at end of file
--- a/chromeister/src/alignmentFunctions.h	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-#define QF_LAMBDA 0.275
-#define QF_KARLIN 0.333
-
-typedef struct container{
-    llpos * table[4][4][4][4][4][4][4][4][4][4][4][4];
-} Container;
-
-typedef struct index{
-    Tuple_hits table[4][4][4][4][4][4][4][4][4][4][4][4];
-} Index;
-
-/*
-    Nucleotides matching function
-*/
-int64_t compare_letters(unsigned char a, unsigned char b);
-
-/**
- * Initialize the memory pool to later retrieve individual memory addresses for llpos
- * 
- */
-void init_mem_pool_llpos(Mempool_l * mp);
-
-/**
- * Get a new memory address from the pool mp for a type llpos
- * 
- */
-llpos * getNewLocationllpos(Mempool_l * mp, uint64_t * n_pools_used);
-
-
-
-AVLTree * getNewLocationAVLTree(Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t key);
-
-
-void init_mem_pool_AVL(Mempool_AVL * mp);
-
-
-AVLTree * right_rotate(AVLTree * y);
-
-AVLTree * left_rotate(AVLTree * x);
-
-AVLTree * find_AVLTree(AVLTree * node, uint64_t key);
-
-llpos * find_AVLTree_llpos(AVLTree * node, uint64_t key);
-
-AVLTree * insert_AVLTree(AVLTree * node, uint64_t key, Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t pos, Mempool_l * mp_l, uint64_t * n_pools_used_l);
-
-void pre_order(AVLTree * root);
-
-uint64_t sum_of_all_tree(AVLTree * root);
--- a/chromeister/src/combine_reads.c	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include "structs.h"
-#include "commonFunctions.h"
-
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-#define MIN(x, y) (((x) <= (y)) ? (x) : (y))
-#define STARTING_SEQS 1000
-#define PIECE_OF_DB_REALLOC 3200000 //half a gigabyte if divided by 8 bytes
-#define MATVAL 101
-
-int main(int argc, char ** av){
-    
-    if(argc != 2) terror("USE:: combine_reads <file>");
-
-    FILE * results, * data;
-    data = fopen(av[1], "rt");
-    if(data == NULL) terror("Could not open input file");
-
-    uint64_t * mat = (uint64_t *) calloc(MATVAL, sizeof(uint64_t));
-    if(mat == NULL) terror("Could not allocate matrix array");
-
-    char buffer[MAXLID];
-    if ((results = fopen("accu.log", "r")) == NULL){
-        results = fopen("accu.log", "wt");
-    }else{
-        // Load the matrix
-        uint64_t i;
-        
-        for(i=0;i<100;i++){
-            if(0 == fgets(buffer, MAXLID, results)) terror("Missing number on load");
-            
-            //fprintf(stdout, "Have %s\n", buffer);
-            buffer[strlen(buffer)-1] = '\0';
-            mat[i] = asciiToUint64(buffer);
-            //fprintf(stdout, "%"PRIu64"\n", mat[i]);
-            //getchar();
-        }
-        fclose(results);
-        results = fopen("accu.log", "wt"); // Re open
-    }
-
-    // Read file 
-    uint64_t read_id_1, read_id_2, coverage, identity, length, current, currmax, j;
-    while(!feof(data)){
-        if(0 == fgets(buffer, MAXLID, data) && !feof(data)) terror("Missing values");
-        // 2 77277 89 64 213
-        sscanf(buffer, "%"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64, &read_id_1, &read_id_2, &coverage, &identity, &length);
-        //fprintf(stdout, "%"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64"\n", read_id_1, read_id_2, coverage, identity, length);
-        currmax = MIN(identity, coverage);
-        //fprintf(stdout, "%"PRIu64"\n", currmax);
-        current = read_id_1;
-        /*
-        for(j=currmax; j > 1; j--){
-            if(current != lasts[j]){
-                mat[j]++;
-                lasts[j] = current;
-            }
-        }
-        */
-        mat[currmax] += 1;
-
-    }
-
-    for(j=99; j>0; j--){
-        mat[j] += mat[j+1];
-    }
-    mat[0] = mat[1];
-
-    for(j=0; j<100; j++){
-        fprintf(stdout, "%"PRIu64"\n", mat[j]);
-        fprintf(results, "%"PRIu64"\n", mat[j]);
-    }
-
-
-    fclose(results);
-    fclose(data);
-    free(mat);
-    return 0;
-}
\ No newline at end of file
--- a/chromeister/src/commonFunctions.c	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,218 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/time.h>
-#include <inttypes.h>
-#include <ctype.h>
-#include <string.h>
-#include <pthread.h>
-#include <math.h>
-#include "structs.h"
-
-void terror(char *s) {
-    printf("ERR**** %s ****\n", s);
-    exit(-1);
-}
-
-char buffered_fgetc(char *buffer, uint64_t *pos, uint64_t *read, FILE *f) {
-    if (*pos >= READBUF) {
-        *pos = 0;
-        memset(buffer, 0, READBUF);
-        *read = fread(buffer, 1, READBUF, f);
-    }
-    *pos = *pos + 1;
-    return buffer[*pos-1];
-}
-
-Queue * generate_queue(Head * queue_head, uint64_t t_reads, uint64_t n_threads, uint64_t levels){
-    uint64_t i, j;
-    uint64_t reads_per_thread;
-    if(levels > t_reads) levels = t_reads;
-    uint64_t pieces = t_reads/levels;
-    uint64_t from, to, t_queues = 0, current_queue = 0;
-    for(i=0;i<levels;i++) t_queues += ((i+1)*n_threads);
-    Queue * queues = (Queue *) malloc(t_queues * sizeof(Queue));
-    if(queues == NULL) terror("Could not allocate queue tasks");
-    queue_head->head = &queues[0];
-    
-    for(i=0;i<levels;i++){
-
-        //reads_per_thread = (uint64_t) (floorl((long double) pieces / (long double) ((i+1)*n_threads)));
-        reads_per_thread = (uint64_t) (ceill((long double) pieces / (long double) ((i+1)*n_threads)));
-        
-
-        for(j=0;j<(i+1)*n_threads;j++){
-            from = j * reads_per_thread + (pieces*i);
-            to = (j + 1) * reads_per_thread + (pieces*i);
-            
-            if(j == (i+1)*n_threads - 1) to = pieces*(i+1);
-
-
-            if(i == levels - 1 && j == (i+1)*n_threads - 1){
-                //If its the last 
-                queues[current_queue].next = NULL;
-            }else{
-                //Else add it to the queue
-                queues[current_queue].next = &queues[current_queue+1];
-            }
-            
-
-            queues[current_queue].r1 = from;
-            queues[current_queue].r2 = to;
-            current_queue++;
-            //printf("current_piece: %"PRIu64"-%"PRIu64" diff: %"PRIu64"\n", from, to, to - from);
-
-        }
-
-    }
-    //printf("TREADS was %"PRIu64"\n", t_reads);    
-    return &queues[0];
-}
-
-void print_queue(Queue * q){
-    fprintf(stdout, "Task: %"PRIu64"-%"PRIu64"\n", q->r1, q->r2);
-}
-
-Queue * get_task_from_queue(Head * queue_head, pthread_mutex_t * lock){
-    pthread_mutex_lock(lock);
-
-    Queue * ptr = queue_head->head;
-    if(queue_head->head != NULL) queue_head->head = queue_head->head->next;
-    //if(ptr != NULL){ printf("Taking "); /*print_queue(ptr);*/ }
-
-    pthread_mutex_unlock(lock);
-
-
-    return ptr;
-}
-
-uint64_t quick_pow4(uint64_t n){
-    static uint64_t pow4[33]={1L, 4L, 16L, 64L, 256L, 1024L, 4096L, 16384L, 65536L, 
-    262144L, 1048576L, 4194304L, 16777216L, 67108864L, 268435456L, 1073741824L, 4294967296L, 
-    17179869184L, 68719476736L, 274877906944L, 1099511627776L, 4398046511104L, 17592186044416L, 
-    70368744177664L, 281474976710656L, 1125899906842624L, 4503599627370496L, 18014398509481984L, 
-    72057594037927936L, 288230376151711744L, 1152921504606846976L, 4611686018427387904L};
-    return pow4[n];
-}
-
-uint64_t quick_pow4byLetter(uint64_t n, const char c){
-    static uint64_t pow4_G[33]={2*1L, 2*4L, 2*16L, 2*64L, 2*256L, 2*1024L, 2*4096L, 2*16384L, 2*65536L, 
-    (uint64_t)2*262144L, (uint64_t)2*1048576L,(uint64_t)2*4194304L, (uint64_t)2*16777216L, (uint64_t)2*67108864L, (uint64_t)2*268435456L, (uint64_t)2*1073741824L, (uint64_t)2*4294967296L, 
-    (uint64_t)2*17179869184L, (uint64_t)2*68719476736L, (uint64_t)2*274877906944L, (uint64_t)2*1099511627776L, (uint64_t)2*4398046511104L, (uint64_t)2*17592186044416L, 
-    (uint64_t)2*70368744177664L, (uint64_t)2*281474976710656L, (uint64_t)2*1125899906842624L, (uint64_t)2*4503599627370496L, (uint64_t)2*18014398509481984L, 
-    (uint64_t)2*72057594037927936L, (uint64_t) 2*288230376151711744L, (uint64_t) 2*1152921504606846976L, (uint64_t) 2*4611686018427387904L};
-    
-    static uint64_t pow4_T[33]={3*1L, 3*4L, 3*16L, 3*64L, 3*256L, 3*1024L, 3*4096L, 3*16384L, 3*65536L, 
-    (uint64_t)3*262144L, (uint64_t) 3*1048576L, (uint64_t)3*4194304L, (uint64_t)3*16777216L, (uint64_t)3*67108864L, (uint64_t)3*268435456L, (uint64_t)3*1073741824L, (uint64_t)3*4294967296L, 
-    (uint64_t)3*17179869184L, (uint64_t)3*68719476736L, (uint64_t)3*274877906944L, (uint64_t)3*1099511627776L, (uint64_t)3*4398046511104L, (uint64_t)3*17592186044416L, 
-    (uint64_t)3*70368744177664L, (uint64_t)3*281474976710656L, (uint64_t)3*1125899906842624L, (uint64_t)3*4503599627370496L, (uint64_t)3*18014398509481984L, 
-    (uint64_t)3*72057594037927936L, (uint64_t) 3*288230376151711744L, (uint64_t) 3*1152921504606846976L, (uint64_t) 3*4611686018427387904L};
-    
-    if(c == 'A') return 0;
-    if(c == 'C') return quick_pow4(n);
-    if(c == 'G') return pow4_G[n];
-    if(c == 'T') return pow4_T[n];
-    return 0;
-}
-
-uint64_t hashOfWord(const unsigned char * word, uint32_t k, uint64_t offset){
-    
-    uint64_t value = 0, jIdx;
-    for(jIdx=0;jIdx<k-offset;jIdx++){
-        value += quick_pow4byLetter(k-(jIdx+1), (char) word[jIdx]);
-    }
-    return value;
-}
-
-void perfect_hash_to_word(unsigned char * word, uint64_t hash, uint32_t k){
-    uint64_t jIdx = (uint64_t) (k-1), upIdx = 0;
-    uint64_t v;
-    while(jIdx >= 0){
-        v = (uint64_t) floor(hash / (pow(4, jIdx)));
-        if(v == 0){ word[upIdx++] = (unsigned char) 'A'; hash -= ((uint64_t) pow(4, jIdx) * 0); }
-        if(v == 1){ word[upIdx++] = (unsigned char) 'C'; hash -= ((uint64_t) pow(4, jIdx) * 1); }
-        if(v == 2){ word[upIdx++] = (unsigned char) 'G'; hash -= ((uint64_t) pow(4, jIdx) * 2); }
-        if(v == 3){ word[upIdx++] = (unsigned char) 'T'; hash -= ((uint64_t) pow(4, jIdx) * 3); }
-        
-        if(jIdx == 0) break;
-        --jIdx;
-    }
-}
-
-uint64_t collisioned_hash(const unsigned char * word, uint32_t k){
-    uint64_t jIdx, value = 0;
-    for(jIdx=0;jIdx<k;jIdx+=diffuse_z){
-        value += quick_pow4byLetter(k - (jIdx+1), (char) word[jIdx]);
-    }
-    return value;
-}
-
-void decomposed_hash_of_word(const unsigned char * word, unsigned char * vector, uint32_t k){
-    uint64_t jIdx;
-    for(jIdx=0;jIdx<k/BYTES_IN_MER;jIdx++){
-        vector[jIdx] = (unsigned char) quick_pow4byLetter(0, (char) word[jIdx*BYTES_IN_MER]);
-        vector[jIdx] += (unsigned char) quick_pow4byLetter(1, (char) word[jIdx*BYTES_IN_MER+1]);
-        vector[jIdx] += (unsigned char) quick_pow4byLetter(2, (char) word[jIdx*BYTES_IN_MER+2]);
-        vector[jIdx] += (unsigned char) quick_pow4byLetter(3, (char) word[jIdx*BYTES_IN_MER+3]);
-    }
-}
-
-uint64_t xor_decomposed_hash(unsigned char * vector1, unsigned char * vector2, uint32_t k){
-    uint64_t jIdx;
-    uint64_t diffs = 0;
-    for(jIdx=0;jIdx<k/BYTES_IN_MER;jIdx++){
-        diffs += (uint64_t) ((vector1[jIdx] ^ vector2[jIdx]) & 0x1);
-    }
-    return diffs;
-}
-
-uint64_t asciiToUint64(const char *text){
-	uint64_t number=0;
-
-	for(;*text;text++){
-		char digit=*text-'0';           
-		number=(number*10)+digit;
-	}
-	return number;
-}
-
-unsigned char complement(unsigned char c){
-    
-    switch(c){
-    case ((unsigned)'A'): return (unsigned)'T';
-    break;
-    case ((unsigned)'C'): return (unsigned)'G';
-    break;
-    case ((unsigned)'G'): return (unsigned)'C';
-    break;
-    case ((unsigned)'T'): return (unsigned)'A';
-    break;
-    case ((unsigned)'N'): return (unsigned)'N';
-    break;
-    case ((unsigned)'-'): return (unsigned)'-'; // Gaps
-    break;
-    case ((unsigned)'\0'): return (unsigned)'\0';
-    
-    }
-    
-    printf("T B: (%c) ____\n", c);
-    terror("Unrecognized nucleotide in hit");
-    return 0; 
-}
-
-void inplace_reverse_and_complement(unsigned char *d, uint64_t l){
-    uint64_t i;
-    char c;
-    for(i=0;i<l/2;i++){
-        c = d[i];
-        d[i] = d[l-i-1];
-        d[l-i-1] = c;
-        //printf("%c%c", d[i], d[l-i-1]);
-    }
-    
-    //printf("\n");
-    for(i=0;i<l;i++){
-        //printf("eeee: %d", (int)d[i]);
-        d[i] = complement(d[i]);
-    }
-}
-
--- a/chromeister/src/commonFunctions.h	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-#ifndef COMMON_FUNCTIONS_H
-#define COMMON_FUNCTIONS_H
-#include "structs.h"
-/**
- * Print the error message 's' and exit(-1)
- */
-void terror(char *s);
-
-
-/**
- * Function to read char by char buffered from a FILE
- */
-char buffered_fgetc(char *buffer, uint64_t *pos, uint64_t *read, FILE *f);
-
-/*
-    Generates a queue of tasks for threads
-*/
-Queue * generate_queue(Head * queue_head, uint64_t t_reads, uint64_t n_threads, uint64_t levels);
-
-/*
-    Prints a queue task
-*/
-void print_queue(Queue * q);
-
-/*
-    Gets the next task to do when a pthread is free
-*/
-Queue * get_task_from_queue(Head * queue_head, pthread_mutex_t * lock);
-
-uint64_t quick_pow4(uint64_t n);
-
-uint64_t quick_pow4byLetter(uint64_t n, const char c);
-
-uint64_t hashOfWord(const unsigned char * word, uint32_t k, uint64_t offset);
-
-void perfect_hash_to_word(unsigned char * word, uint64_t hash, uint32_t k);
-
-uint64_t collisioned_hash(const unsigned char * word, uint32_t k);
-
-void decomposed_hash_of_word(const unsigned char * word, unsigned char * vector, uint32_t k);
-
-uint64_t xor_decomposed_hash(unsigned char * vector1, unsigned char * vector2, uint32_t k);
-
-uint64_t asciiToUint64(const char *text);
-
-unsigned char complement(unsigned char c);
-
-void inplace_reverse_and_complement(unsigned char *d, uint64_t l);
-
-
-#endif /* COMMON_FUNCTIONS_H */
--- a/chromeister/src/reverseComplement.c	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,124 +0,0 @@
-/* reverseComplement.c
-   Program reading a FASTA file as input (one or multiple sequence)
-   and then returning the reverse complementary sequence in the output file.
-   It is important to note that for multiple sequence file the first input
-   sequence is the last one at the output file and the last one in the input
-   is the first one in the output.
-   oscart@uma.es
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <inttypes.h>
-#include "commonFunctions.h"
-
-#define SEQSIZE 2000000000
-#define READINPUT 10000
-#define WSIZE 32
-#define NREADS 1000000
-
-int main(int ac, char** av) {
-	FILE *fIn, *fOut;
-	int64_t i, j, nR, seqLen = 0;
-	char *seq, c, toW;
-	long *offset = NULL;
-
-	if (ac != 3)
-		terror("USE: reverseComplement seqFile.IN reverseComplementarySeq.OUT");
-
-	/**
-	 * Allocating memory for the sequence
-	 * Fixed amount of memory, change it to loadSeqDB?
-	 */
-	if ((seq = (char*) malloc(SEQSIZE)) == NULL)
-		terror("memory for Seq");
-
-	if ((fIn = fopen(av[1], "rt")) == NULL)
-		terror("opening IN sequence FASTA file");
-
-	if ((fOut = fopen(av[2], "wt")) == NULL)
-		terror("opening OUT sequence Words file");
-	
-	if ((offset = (long *) malloc(sizeof(long)*NREADS)) == NULL)
-		terror("memory for offsets");
-
-	for(i=0;i<NREADS;i++){
-		offset[i]=0;
-	}
-
-	nR = 0;
-	c = fgetc(fIn);
-	while(!feof(fIn)){
-		if(c == '>'){
-			offset[nR++] = ftell(fIn)-1;
-		}
-		c = fgetc(fIn);
-	}
-
-	for(i=nR-1; i>=0; i--){
-		fseek(fIn, offset[i], SEEK_SET);
-		//READ and write header
-		if(fgets(seq, READINPUT, fIn)==NULL){
-			terror("Empty file");
-		}
-		fprintf(fOut, "%s", seq);
-		//READ and write sequence
-		c = fgetc(fIn);
-		while(c != '>' && !feof(fIn)){
-			if(isupper(c) || islower(c)){
-				seq[seqLen++]=c;
-			}
-			c = fgetc(fIn);
-		}
-		for(j=seqLen-1; j >= 0; j--){
-			switch(seq[j]){
-				case 'A':
-					toW = 'T';
-					break;
-				case 'C':
-					toW = 'G';
-					break;
-				case 'G':
-					toW = 'C';
-					break;
-				case 'T':
-					toW = 'A';
-					break;
-				case 'U':
-					toW = 'A';
-					break;
-				case 'a':
-					toW = 't';
-					break;
-				case 'c':
-					toW = 'g';
-					break;
-				case 'g':
-					toW = 'c';
-					break;
-				case 't':
-					toW = 'a';
-					break;
-				case 'u':
-					toW = 'a';
-					break;
-				default:
-					toW = seq[j];
-					break;
-			}
-			fwrite(&toW, sizeof(char), 1, fOut);
-		}
-		toW='\n';
-		fwrite(&toW, sizeof(char), 1, fOut);
-		seqLen=0;
-	}
-
-	fclose(fIn);
-	fclose(fOut);
-
-	return 0;
-}
-
-
--- a/chromeister/src/structs.h	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,143 +0,0 @@
-#ifndef STRUCTS_H
-#define STRUCTS_H
-
-#include <inttypes.h>
-
-//Structs required for the dotplot workflow
-#define MAXLID 200
-//#define READBUF 2000000000 //2 GB
-#define READBUF 500000000 //50MB
-#define INITSEQS 3000 //Number of starting sequences (in database)
-#define POINT 4
-
-#define FIXED_K 12
-#define TOTAL_ENTRIES 16777216
-
-#define MAXLID 200
-#define ALIGN_LEN 60 //For NW alignment
-#define MAX_READ_SIZE 20000 //Maximum length of read to have a portion of the table allocated
-#define MAX_WINDOW_SIZE 500 //Maximum window length to explore NW table
-//#define POOL_SIZE 2500000000 // by 16 bytes it is 40 GB
-#define POOL_SIZE 12500000 // 1 GB if 16 bytes
-#define MAX_MEM_POOLS 256 
-
-#define BYTES_IN_MER 4
-#define MAX_DECOMP_HASH 10
-#define FALSE 0
-#define TRUE 1
-
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-#define MIN(x, y) (((x) <= (y)) ? (x) : (y))
-
-extern uint64_t custom_kmer;
-extern uint64_t diffuse_z;
-
-//Struct for linked list of positions
-typedef struct linked_list_pos{
-    uint64_t pos;
-    //uint64_t extended_hash;
-    //unsigned char decomp_hash[MAX_DECOMP_HASH]; // Fits up to MAX_DECOMP_HASH*4 letters = 256 length kmer
-    //uint64_t hits_count;
-    struct linked_list_pos * next;
-} llpos;
-
-
-// An AVL tree node
-typedef struct AVL_Node{
-    uint64_t key;
-    struct AVL_Node * left;
-    struct AVL_Node * right;
-    uint64_t height;
-    uint64_t count;
-    llpos * next;
-} AVLTree;
-
-
-// Tuple of data 
-typedef struct tuple_hits{
-    int repetition;
-    int hit_count;
-    uint64_t key;
-    uint64_t pos;
-    uint64_t pos_in_y;
-} Tuple_hits;
-
-typedef struct hash_holder{
-    uint64_t key;
-    uint64_t pos;
-} Hash_holder;
-
-
-
-//Struct for memory pool por lists
-typedef struct mempool_l{
-    llpos * base;
-    uint64_t current;
-} Mempool_l;
-
-//Struct for memory pool por AVLtree
-typedef struct mempool_AVL{
-    AVLTree * base;
-    uint64_t current;
-} Mempool_AVL;
-
-//Struct for a whole sequence(s) data
-typedef struct seqinfo{
-    unsigned char * sequences;
-    uint64_t * start_pos;
-    uint64_t total_len;
-    uint64_t n_seqs;
-} SeqInfo;
-
-//Struct for the alignment of a quick hit
-typedef struct quickfrag{
-    uint64_t x_start;
-    uint64_t y_start;
-    uint64_t t_len;
-    long double coverage;
-    long double e_value;
-} Quickfrag;
-
-typedef struct point{
-    uint64_t x;
-    uint64_t y;
-} Point;
-
-
-
-struct cell{
-    int64_t score;
-    uint64_t xfrom;
-    uint64_t yfrom;
-};
-
-struct positioned_cell{
-    int64_t score;
-    uint64_t xpos;
-    uint64_t ypos;
-};
-
-struct best_cell{
-    struct positioned_cell c;
-    uint64_t j_prime;
-};
-
-typedef struct{
-    uint64_t identities;
-    uint64_t length;
-    uint64_t igaps;
-    uint64_t egaps;
-} BasicAlignment;
-
-typedef struct queue{
-    uint64_t r1; //reads region
-    uint64_t r2;
-    struct queue * next;
-} Queue;
-
-typedef struct{
-    Queue * head;
-} Head;
-
-
-#endif