changeset 9:aec70bb1ae27 draft

Uploaded
author bitlab
date Wed, 18 Nov 2020 08:08:25 +0000
parents dbdb10e4c8b5
children 12f377f170c2
files gecko/bin/frags2align.sh gecko/bin/frags2borders.sh gecko/bin/workflow.sh gecko/src/Makefile gecko/src/csvExtractBorders.c gecko/src/csvFrags2text.c gecko/src/reverseComplement.c
diffstat 7 files changed, 897 insertions(+), 115 deletions(-) [+]
line wrap: on
line diff
--- a/gecko/bin/frags2align.sh	Tue Nov 17 13:04:23 2020 +0000
+++ b/gecko/bin/frags2align.sh	Wed Nov 18 08:08:25 2020 +0000
@@ -3,50 +3,46 @@
 if [ $# != 4 ]; then
    echo " ==== ERROR ... you called this script inappropriately."
    echo ""
-   echo "   usage:  $0 fragsFILE.frags fastaX fastaY alignments.txt"
+   echo "   usage:  $0 fragsFILE.frags/.csv fastaX fastaY alignments.txt"
    echo ""
    exit -1
 fi
 
-
-
-MYRAND=$((( RANDOM % 10000000) +1))
-MGDIR=${PWD}/${MYRAND}
-
-mkdir $MGDIR
-
 FRAGS=$1
 FASTAX=$2
 FASTAY=$3
 ALIGN=$4
 
-fragsName=$(basename "$1")
-fastaXname=$(basename "$2")
-fastaYname=$(basename "$3")
-alignName=$(basename "$4")
-fragsName="${fragsName%.*}"
-fastaXname="${fastaXname%.*}"
-fastaYname="${fastaYname%.*}"
-alignName="${alignName%.*}"
 
-cp $1 $MGDIR/${fragsName}.frags
-cp $2 $MGDIR/${fastaXname}.fasta
-cp $3 $MGDIR/${fastaYname}.fasta
 
 BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
 # generate indices
 
-$BINDIR/indexmaker $MGDIR/$fastaXname.fasta $MGDIR/$fastaXname.idx
-$BINDIR/indexmaker $MGDIR/$fastaYname.fasta $MGDIR/$fastaYname.idx
+$BINDIR/indexmaker $FASTAX $FASTAX.idx
+$BINDIR/indexmaker $FASTAY $FASTAY.idx
 
-$BINDIR/reverseComplement $MGDIR/$fastaYname.fasta $MGDIR/$fastaYname.rev
-$BINDIR/indexmaker $MGDIR/$fastaYname.rev $MGDIR/$fastaYname.rev.idx
+$BINDIR/reverseComplement $FASTAY $FASTAY.rev
+$BINDIR/indexmaker $FASTAY.rev $FASTAY.rev.idx
 
 # extract frags
 
-$BINDIR/frags2text $MGDIR/$fragsName.frags $MGDIR/$fastaXname.fasta $MGDIR/$fastaYname.fasta $MGDIR/$fastaYname.rev $MGDIR/$fastaXname.idx $MGDIR/$fastaYname.idx $MGDIR/$fastaYname.rev.idx $MGDIR/$alignName.txt
+if [ ${FRAGS: -4} == ".csv" ]
+then
+
+	echo "....using csv"
+	sed 's/,/ /g' $FRAGS > $FRAGS.fix
+	$BINDIR/csvFrags2text $FRAGS.fix $FASTAX $FASTAY $FASTAY.rev $FASTAX.idx $FASTAY.idx $FASTAY.rev.idx $ALIGN
+	rm $FRAGS.fix
 
-mv $MGDIR/$alignName.txt $ALIGN
+else
+	echo "....using frags"
+	$BINDIR/frags2text $FRAGS $FASTAX $FASTAY $FASTAY.rev $FASTAX.idx $FASTAY.idx $FASTAY.rev.idx $ALIGN
+fi
+
 
-rm -rf $MGDIR
+rm $FASTAX.idx
+rm $FASTAY.idx
+rm $FASTAY.rev
+rm $FASTAY.rev.idx
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/bin/frags2borders.sh	Wed Nov 18 08:08:25 2020 +0000
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+if [ "$#" -lt 5 ]; then
+   echo " ==== ERROR ... you called this script inappropriately."
+   echo ""
+   echo "   usage:  $0 fragsFILE.csv fastaX fastaY alignments.txt borderSize [query]"
+   echo "	include "query" as ending parameter to extract also the query sequence as fasta file "
+   echo ""
+   exit -1
+fi
+
+FRAGS=$1
+FASTAX=$2
+FASTAY=$3
+ALIGN=$4
+BORDER=$5
+ONLYQUERYSEQ=$6
+
+
+
+
+BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# generate indices
+
+$BINDIR/indexmaker $FASTAX $FASTAX.idx
+$BINDIR/indexmaker $FASTAY $FASTAY.idx
+
+$BINDIR/reverseComplement $FASTAY $FASTAY.rev
+$BINDIR/indexmaker $FASTAY.rev $FASTAY.rev.idx
+
+# extract frags
+
+sed 's/,/ /g' $FRAGS > $FRAGS.fix
+
+$BINDIR/csvExtractBorders $FRAGS.fix $FASTAX $FASTAY $FASTAY.rev $FASTAX.idx $FASTAY.idx $FASTAY.rev.idx $ALIGN $BORDER
+
+if [[ $ONLYQUERYSEQ = "query" ]]
+then
+
+	grep '>\|X:' $ALIGN | awk 'BEGIN{n=0;} {if (substr($0,1,2) ~ "X:" ) { print substr($0,8);} else { printf(">S%d-%s\n", n, $0); n = n + 1; }}' > query.fasta
+	sed 's/>/#/g' query.fasta | sed 's/^#/>/' > query-$BORDER.fasta
+	rm query.fasta
+
+fi
+
+
+rm $FRAGS.fix
+rm $FASTAX.idx
+rm $FASTAY.idx
+rm $FASTAY.rev
+rm $FASTAY.rev.idx
+
--- a/gecko/bin/workflow.sh	Tue Nov 17 13:04:23 2020 +0000
+++ b/gecko/bin/workflow.sh	Wed Nov 18 08:08:25 2020 +0000
@@ -2,38 +2,28 @@
 
 FL=1000   # frequency limit
 
-if [ $# != 8 ]; then
+if [ $# != 6 ]; then
    echo " ==== ERROR ... you called this script inappropriately."
    echo ""
-   echo "   usage:  $0 seqXName seqYName lenght similarity WL fixedL output.frags output.csv"
+   echo "   usage:  $0 seqXName seqYName lenght similarity WL fixedL"
    echo ""
    exit -1
 fi
 
-
 {
 
-
-MYRAND=$((( RANDOM % 10000000) +1))
-MGDIR=${PWD}/${MYRAND}
-echo "MGDIR $MGDIR"
-mkdir -p ${MGDIR}
-
-genoXname=$(basename "$1")
-genoYname=$(basename "$2")
-genoXname="${genoXname%.*}"
-genoYname="${genoYname%.*}"
+dirNameX=$(readlink -f $1 | xargs dirname)
+seqXName=$(basename "$1")
+extensionX="${seqXName##*.}"
+seqXName="${seqXName%.*}"
 
+dirNameY=$(readlink -f $2 | xargs dirname)
+seqYName=$(basename "$2")
+extensionY="${seqYName##*.}"
+seqYName="${seqYName%.*}"
 
-cp $1 $MGDIR/${genoXname}.fasta
-cp $2 $MGDIR/${genoYname}.fasta
-mkdir -p ${MGDIR}/dictionaries
-mkdir -p ${MGDIR}/fragments
-
-genoXExt="fasta"
-genoYExt="fasta"
-
-
+#seqXName=`basename $1 .fasta`
+#seqYName=`basename $2 .fasta`
 
 BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
@@ -41,42 +31,40 @@
 similarity=${4}
 WL=${5} # wordSize
 fixedL=${6}
-output=${7}
-csv=${8}
+strand=${7}
 
-mkdir ${MGDIR}/intermediateFiles
+mkdir intermediateFiles
 
-mkdir ${MGDIR}/intermediateFiles/${genoXname}-${genoYname}
-mkdir ${MGDIR}/results
-mkdir ${MGDIR}/intermediateFiles/dictionaries
-mkdir ${MGDIR}/intermediateFiles/hits
+mkdir intermediateFiles/${seqXName}-${seqYName}
+mkdir results
+mkdir intermediateFiles/dictionaries
+mkdir intermediateFiles/hits
 
 # Copiamos los fastas
-ln -s ${MGDIR}/${genoXname}.${genoXname} ${MGDIR}/intermediateFiles/${genoXname}-${genoYname}
-ln -s ${MGDIR}/${genoYname}.${genoYname} ${MGDIR}/intermediateFiles/${genoYname}-${genoXname}
+ln -s ${dirNameX}/${seqXName}.${extensionX} intermediateFiles/${seqXName}-${seqYName}
+ln -s ${dirNameY}/${seqYName}.${extensionY} intermediateFiles/${seqXName}-${seqYName}
 
-cd ${MGDIR}/intermediateFiles/${genoXname}-${genoYname}
+cd intermediateFiles/${seqXName}-${seqYName}
 
 ###############
 
 
-
-echo "${BINDIR}/reverseComplement ${MGDIR}/${genoYname}.${genoXExt} ${genoYname}-revercomp.${genoYExt}"
-${BINDIR}/reverseComplement ${MGDIR}/${genoYname}.${genoYExt} ${MGDIR}/${genoYname}-revercomp.${genoYExt}
+echo "${BINDIR}/reverseComplement ${seqYName}.${extensionX} ${seqYName}-revercomp.${extensionY}"
+${BINDIR}/reverseComplement ${seqYName}.${extensionX} ${seqYName}-revercomp.${extensionY}
 
-if [[ ! -f ../dictionaries/${genoXname}.d2hP ]];	then
-	echo "${BINDIR}/dictionary.sh ${MGDIR}/${genoXname}.${genoXExt} &"
-	${BINDIR}/dictionary.sh ${MGDIR}/${genoXname}.${genoXExt} &		
-fi
-
-if [[ ! -f ../dictionaries/${seqYName}.d2hP ]];	then
-	echo "${BINDIR}/dictionary.sh ${MGDIR}/${genoYname}.${genoYExt} &"
-	${BINDIR}/dictionary.sh ${MGDIR}/${genoYname}.${genoYExt} &
+if [[ ! -f ../dictionaries/${seqXName}.d2hP ]];	then
+	echo "${BINDIR}/dictionary.sh ${seqXName}.${extensionX} &"
+	${BINDIR}/dictionary.sh ${seqXName}.${extensionX} &		
 fi
 		
-if [[ ! -f ../dictionaries/${genoYname}-revercomp.d2hP ]];	then
-	echo "${BINDIR}/dictionary.sh ${MGDIR}/${genoYname}-revercomp.${genoYExt} &"
-	${BINDIR}/dictionary.sh ${MGDIR}/${genoYname}-revercomp.${genoYExt} &
+if [[ ! -f ../dictionaries/${seqYName}.d2hP ]];	then
+	echo "${BINDIR}/dictionary.sh ${seqYName}.${extensionY} &"
+	${BINDIR}/dictionary.sh ${seqYName}.${extensionY} &
+fi
+		
+if [[ ! -f ../dictionaries/${seqYName}-revercomp.d2hP ]];	then
+	echo "${BINDIR}/dictionary.sh ${seqYName}-revercomp.${extensionY} &"
+	${BINDIR}/dictionary.sh ${seqYName}-revercomp.${extensionY} &
 fi		
 
 echo "Waiting for the calculation of the dictionaries"
@@ -88,31 +76,28 @@
 done
 
 
-
-mv ${genoXname}.d2hP ../dictionaries/
-mv ${genoXname}.d2hW ../dictionaries/
-mv ${genoYname}.d2hP ../dictionaries/
-mv ${genoYname}.d2hW ../dictionaries/
-mv ${genoYname}-revercomp.d2hP ../dictionaries/
-mv ${genoYname}-revercomp.d2hW ../dictionaries/
-
-
+mv ${seqXName}.d2hP ../dictionaries/
+mv ${seqXName}.d2hW ../dictionaries/
+mv ${seqYName}.d2hP ../dictionaries/
+mv ${seqYName}.d2hW ../dictionaries/
+mv ${seqYName}-revercomp.d2hP ../dictionaries/
+mv ${seqYName}-revercomp.d2hW ../dictionaries/
 		
 # Hacemos enlace simbolico
-ln -s ../dictionaries/${genoXname}.d2hP .
-ln -s ../dictionaries/${genoXname}.d2hW .
+ln -s ../dictionaries/${seqXName}.d2hP .
+ln -s ../dictionaries/${seqXName}.d2hW .
 
-ln -s ../dictionaries/${genoYname}.d2hP .
-ln -s ../dictionaries/${genoYname}.d2hW .
+ln -s ../dictionaries/${seqYName}.d2hP .
+ln -s ../dictionaries/${seqYName}.d2hW .
 
-ln -s ../dictionaries/${genoYname}-revercomp.d2hP .
-ln -s ../dictionaries/${genoYname}-revercomp.d2hW .
+ln -s ../dictionaries/${seqYName}-revercomp.d2hP .
+ln -s ../dictionaries/${seqYName}-revercomp.d2hW .
 
-echo "${BINDIR}/comparison.sh ${MGDIR}/${genoXname}.${genoXExt} ${MGDIR}/${genoYname}.${genoYExt} ${length} ${similarity} ${WL} ${fixedL} f &"
-${BINDIR}/comparison.sh ${MGDIR}/${genoXname}.${genoXExt} ${MGDIR}/${genoYname}.${genoYExt} ${length} ${similarity} ${WL} ${fixedL} f &
+echo "${BINDIR}/comparison.sh ${seqXName}.${extensionX} ${seqYName}.${extensionY} ${length} ${similarity} ${WL} ${fixedL} f &"
+${BINDIR}/comparison.sh ${seqXName}.${extensionX} ${seqYName}.${extensionY} ${length} ${similarity} ${WL} ${fixedL} f &
 
-echo "${BINDIR}/comparison.sh ${MGDIR}/${genoXname}.${genoXExt} ${MGDIR}/${genoYname}-revercomp.${genoYExt} ${length} ${similarity} ${WL} ${fixedL} r &"
-${BINDIR}/comparison.sh ${MGDIR}/${genoXname}.${genoXExt} ${MGDIR}/${genoYname}-revercomp.${genoYExt} ${length} ${similarity} ${WL} ${fixedL} r &
+echo "${BINDIR}/comparison.sh ${seqXName}.${extensionX} ${seqYName}-revercomp.${extensionY} ${length} ${similarity} ${WL} ${fixedL} r &"
+${BINDIR}/comparison.sh ${seqXName}.${extensionX} ${seqYName}-revercomp.${extensionY} ${length} ${similarity} ${WL} ${fixedL} r &
 
 echo "Waiting for the comparisons"
 
@@ -122,47 +107,45 @@
     wait $job
 done
 
-
 #echo "rm ${seqYName}-revercomp.${extensionY}"
 #rm ${seqYName}-revercomp.${extensionY}
 
+echo "${BINDIR}/combineFrags ${seqXName}-${seqYName}-sf.frags ${seqXName}-${seqYName}-revercomp-sr.frags ${seqXName}-${seqYName}.frags"
+${BINDIR}/combineFrags ${seqXName}-${seqYName}-sf.frags ${seqXName}-${seqYName}-revercomp-sr.frags ${seqXName}-${seqYName}.frags
 
-echo "${BINDIR}/combineFrags ${genoXname}-${genoYname}-sf.frags ${genoXname}-${genoYname}-revercomp-sr.frags ${genoXname}-${genoYname}.frags"
-${BINDIR}/combineFrags ${genoXname}-${genoYname}-sf.frags ${genoXname}-${genoYname}-revercomp-sr.frags ${genoXname}-${genoYname}.frags
+#echo "${BINDIR}/newFragToBalazsVersion ${seqXName}-${seqYName}.frags ${seqXName}-${seqYName}.old.frags"
+#${BINDIR}/newFragToBalazsVersion ${seqXName}-${seqYName}.frags ${seqXName}-${seqYName}.old.frags
+
+#echo "${BINDIR}/af2pngrev ${seqXName}-${seqYName}.frags ${seqXName}-${seqYName}.png ${seqXName} ${seqYName}"
+#${BINDIR}/af2pngrev ${seqXName}-${seqYName}.frags ${seqXName}-${seqYName}.png ${seqXName} ${seqYName}
 
 #Borramos todo menos los frags y los diccionarios
 
 # Get Info from frags 
-echo "${BINDIR}/getInfo ${genoXname}-${genoYname}.frags > ${genoXname}-${genoYname}.csv"
-${BINDIR}/getInfo ${genoXname}-${genoYname}.frags > ${genoXname}-${genoYname}.csv.tmp
-cat ${genoXname}-${genoYname}.frags.INF ${genoXname}-${genoYname}.csv.tmp > ${genoXname}-${genoYname}.csv
-rm -rf ${genoXname}-${genoYname}.csv.tmp
+echo "${BINDIR}/getInfo ${seqXName}-${seqYName}.frags > ${seqXName}-${seqYName}.csv"
+${BINDIR}/getInfo ${seqXName}-${seqYName}.frags > ${seqXName}-${seqYName}.csv.tmp
+cat ${seqXName}-${seqYName}.frags.INF ${seqXName}-${seqYName}.csv.tmp > ${seqXName}-${seqYName}.csv
+rm -rf ${seqXName}-${seqYName}.csv.tmp
 	
-if [[ -L "../../${genoXname}.fasta" ]]
+if [[ -L "../../${seqXName}.fasta" ]]
 then
-	rm ../../${genoYname}.fasta
+	rm ../../${seqXName}.fasta
 fi
 
-if [[ -L "../../${genoXname}.fasta" ]]
+if [[ -L "../../${seqYName}.fasta" ]]
 then
-	rm ../../${genoYname}.fasta
+	rm ../../${seqYName}.fasta
 fi
 
 #Movemos los frags y los info
-mv ${genoXname}-${genoYname}.frags $output
-mv ${genoXname}-${genoYname}.csv $csv
-
-
+mv ${seqXName}-${seqYName}.frags ../../results
+mv ${seqXName}-${seqYName}.frags.INF ../../results
+mv ${seqXName}-${seqYName}.frags.MAT ../../results
+#mv ${seqXName}-${seqYName}.old.frags ../../results
+mv ${seqXName}-${seqYName}.csv ../../results
 
 #echo "Borrando ${seqXName}-${seqYName}"
 cd ..
-
 #rm -rf ${seqXName}-${seqYName}
 
-cd ..
-
-
-rm -r ${MGDIR}
-
 } &> /dev/null
-
--- a/gecko/src/Makefile	Tue Nov 17 13:04:23 2020 +0000
+++ b/gecko/src/Makefile	Wed Nov 18 08:08:25 2020 +0000
@@ -3,7 +3,7 @@
 CFLAGS=-O3 -D_FILE_OFFSET_BITS=64 -Wall
 BIN=../bin
 
-all: reverseComplement words sortWords w2hd wordsStat hits sortHits filterHits FragHits combineFrags hitsStat hdStat fragStat getInfoCSB filterFrags indexmaker frags2text
+all: reverseComplement words sortWords w2hd wordsStat hits sortHits filterHits FragHits combineFrags hitsStat hdStat fragStat getInfoCSB filterFrags indexmaker frags2text csvFrags2text csvExtractBorders
 
 hits: hits.c
 	$(CC) $(CFLAGS) hits.c comparisonFunctions.c dictionaryFunctions.c commonFunctions.c -o $(BIN)/hits
@@ -56,6 +56,12 @@
 frags2text:
 	$(CC) $(CFLAGS) frags2text.c fragmentv2.c commonFunctions.c comparisonFunctions.c -lm -o $(BIN)/frags2text
 
+csvFrags2text:
+	$(CC) $(CFLAGS) csvFrags2text.c fragmentv2.c commonFunctions.c comparisonFunctions.c -lm -o $(BIN)/csvFrags2text
+
+csvExtractBorders:
+	$(CC) $(CFLAGS) csvExtractBorders.c fragmentv2.c commonFunctions.c comparisonFunctions.c -lm -o $(BIN)/csvExtractBorders
+
 clean:
 	rm -rf $(BIN)/getInfo
 	rm -rf $(BIN)/filterFrags
@@ -63,3 +69,4 @@
 	rm -rf $(BIN)/frags2text
 	rm -rf $(BIN)/reverseComplement $(BIN)/words $(BIN)/sortWords $(BIN)/w2hd $(BIN)/wordsStat
 	rm -rf $(BIN)/hits $(BIN)/sortHits $(BIN)/filterHits $(BIN)/FragHits $(BIN)/combineFrags $(BIN)/hitsStat $(BIN)/fragStat $(BIN)/hdStat
+	rm -rf $(BIN)/csvExtractBorders $(BIN)/csvFrags2text.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/csvExtractBorders.c	Wed Nov 18 08:08:25 2020 +0000
@@ -0,0 +1,396 @@
+/* 
+ *
+ * Sintax: ./frags2text fragsFILE.frags fastaX fastaY fragsFILE.txt
+ 
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include "structs.h"
+#include "commonFunctions.h"
+#include "comparisonFunctions.h"
+
+#define TAB_INSERT 70
+#define READING_FRAG_BUFFER 10000
+
+#define MAX(a,b) \
+   ({ __typeof__ (a) _a = (a); \
+       __typeof__ (b) _b = (b); \
+     _a > _b ? _a : _b; })
+
+#define MIN(a,b) \
+   ({ __typeof__ (a) _a = (a); \
+       __typeof__ (b) _b = (b); \
+     _a < _b ? _a : _b; })
+
+void csv_frag_to_struct_frag(char * l, struct FragFile * f){
+
+	//0		1		2		3		4		5			6		7		8		9		10			11		12		13
+	//Type	xStart	yStart	xEnd	yEnd	strand(f/r)	block	length	score	ident	similarity	%ident	SeqX	SeqY
+	//Frag	3147493	3006054	3154663	2998884	f	0	7171	20868	6194	72.75	0.86	0	0
+
+	
+	float bin;
+
+	sscanf(l, "%*s %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %c %"PRId64" %"PRIu64" %"PRIu64" %"PRIu64" %f %f %"PRIu64" %"PRIu64, &f->xStart, &f->yStart, &f->xEnd, &f->yEnd, &f->strand, &f->block, &f->length, &f->score, &f->ident, &f->similarity, &bin, &f->seqX, &f->seqY);
+
+	//printf("Read %d items\n", items);
+
+}
+
+struct rIndex2 * loadReadsIndex(char * filename, uint64_t * nReads){
+	struct rIndex2 * RR;
+	uint64_t nR=0,i;
+	FILE *f;
+	uint64_t fsize;
+
+    if ((f=fopen(filename,"rb"))==NULL) terror("Could not open index input file");
+
+	fseeko(f, 0, SEEK_END);
+	fsize = ftello(f);
+	rewind(f);
+	nR = fsize/sizeof(struct rIndex2);
+
+    if ((RR =(struct rIndex2*) calloc(nR,sizeof(struct rIndex2)))==NULL) terror("Could not allocate index");
+
+	for (i=0; i<nR; i++){
+		if(0 == fread(&RR[i],sizeof(struct rIndex2),1,f)) break;
+	}
+	fclose(f);
+	(*nReads) = nR;
+	return RR;
+}
+
+void write_headers(FILE * f1, FILE * f2, FILE * fO, uint64_t pos1, uint64_t pos2, struct FragFile * f){
+	fseek(f1, pos1, SEEK_SET);
+	char c = fgetc(f1);
+	while(c != '\n'){ fprintf(fO, "%c", c); c = fgetc(f1); }
+	fprintf(fO, " ALIGNED WITH ");
+	fseek(f2, pos2, SEEK_SET);
+	c = fgetc(f2);
+	while(c != '\n'){ fprintf(fO, "%c", c); c = fgetc(f2); }
+	fprintf(fO, " LENGTH: %"PRIu64" IDENT: %"PRIu64" STRAND: %c @(%"PRIu64", %"PRIu64")\n", f->length, f->ident, f->strand, f->xStart, f->yStart);
+}
+
+void get_both_seqs(char * fastaX, char * fastaY, uint64_t iniX, uint64_t finX, uint64_t iniY, uint64_t finY, uint64_t posX, uint64_t posY, uint64_t LacX, uint64_t LacY, uint64_t lX, uint64_t lY, FILE * fO){
+	char copyX[TAB_INSERT+1];
+	char copyY[TAB_INSERT+1];
+	memset(copyX, 0x0, TAB_INSERT+1);
+	memset(copyY, 0x0, TAB_INSERT+1);
+	uint64_t atcopyX = 0;
+	uint64_t atcopyY = 0;
+	uint64_t i;
+
+	uint64_t pos_used_x, pos_used_y;
+	
+	// The X one
+	//fseek(fastaX, posX, SEEK_SET);
+	pos_used_x = posX;
+	char cX = fastaX[pos_used_x++];
+	while(cX != '\n'){ cX = fastaX[pos_used_x++]; }
+	uint64_t gpX = iniX - LacX;
+	uint64_t currX = 0, tab_counter;
+	while(currX < gpX){
+		cX = fastaX[pos_used_x++];
+		if(cX != '\n') ++currX;
+	}
+
+	// the other Y
+	//fseek(fastaY, posY, SEEK_SET);
+	pos_used_y = posY;
+	char cY = fastaY[pos_used_y++];
+	while(cY != '\n'){ cY = fastaY[pos_used_y++]; }
+	uint64_t gpY = iniY - LacY;
+	uint64_t currY = 0;
+	while(currY < gpY){
+		cY = fastaY[pos_used_y++];
+		if(cY != '\n') ++currY;
+	}
+
+
+	
+
+	// Reached the region to show
+	currX = 0;
+	currY = 0;
+	cX = fastaX[pos_used_x++];
+	cY = fastaY[pos_used_y++];
+	//fprintf(fO, "\t");
+	tab_counter = 0;
+	while(currX < lX && currY < lY){
+		
+		if(cX == 'A' || cX == 'C' || cX == 'G' || cX == 'T' || cX == 'N'){ copyX[atcopyX++] = cX; ++currX; }
+		cX = fastaX[pos_used_x++];
+		while(cX != 'A' && cX != 'C' && cX != 'G' && cX != 'T' && cX != 'N') cX = fastaX[pos_used_x++];
+
+
+		if(cY == 'A' || cY == 'C' || cY == 'G' || cY == 'T' || cY == 'N'){ copyY[atcopyY++] = cY; ++currY; }
+		cY = fastaY[pos_used_y++];
+		while(cY != 'A' && cY != 'C' && cY != 'G' && cY != 'T' && cY != 'N') cY = fastaY[pos_used_y++];
+
+		while(currX > currY){
+			if(cY == 'A' || cY == 'C' || cY == 'G' || cY == 'T' || cY == 'N'){ copyY[atcopyY++] = cY; ++currY; }
+			cY = fastaY[pos_used_y++];
+			while(cY != 'A' && cY != 'C' && cY != 'G' && cY != 'T' && cY != 'N') cY = fastaY[pos_used_y++];
+		}
+		while(currX < currY){
+			if(cX == 'A' || cX == 'C' || cX == 'G' || cX == 'T' || cX == 'N'){ copyX[atcopyX++] = cX; ++currX; }
+			cX = fastaX[pos_used_x++];
+			while(cX != 'A' && cX != 'C' && cX != 'G' && cX != 'T' && cX != 'N') cX = fastaX[pos_used_x++];
+		}
+
+		++tab_counter;
+		
+
+
+		if(tab_counter >= TAB_INSERT && currX == currY){ 
+			
+			copyX[TAB_INSERT] = '\0';
+			copyY[TAB_INSERT] = '\0';
+			fprintf(fO, "X:\t%.*s\n\t", TAB_INSERT, copyX);
+			for(i=0; i<TAB_INSERT; i++){
+				if(copyX[i] == copyY[i]) fprintf(fO, "|"); else fprintf(fO, " ");
+			}
+			fprintf(fO, "\nY:\t%.*s\n\n", TAB_INSERT, copyY);
+			tab_counter = 0;
+			atcopyX = 0; atcopyY = 0;
+		}
+	}
+	if(atcopyX > 0){
+		copyX[atcopyX] = '\0';
+		copyY[atcopyY] = '\0';
+		fprintf(fO, "X:\t%.*s\n\t", (int)atcopyX, copyX);
+		for(i=0; i<atcopyX; i++){
+			if(copyX[i] == copyY[i]) fprintf(fO, "|"); else fprintf(fO, " ");
+		}
+		fprintf(fO, "\nY:\t%.*s\n\n", (int)atcopyY, copyY);
+		atcopyX = 0; atcopyY = 0;
+	} 
+	fprintf(fO, "\n");
+}
+
+
+void get_seq_from_to(FILE * fasta, FILE * output, uint64_t ini, uint64_t fin, uint64_t pos, uint64_t Lac, uint64_t seqNum, uint64_t l, FILE * fO){
+	fseek(fasta, pos, SEEK_SET);
+	char c = fgetc(fasta);
+	while(c != '\n'){ c = fgetc(fasta); }
+	uint64_t gp = ini - Lac;
+	uint64_t curr = 0, tab_counter;
+	while(curr < gp){
+		c = fgetc(fasta);
+		if(c != '\n') ++curr;
+	}
+	// Reached the region to show
+	curr = 0;
+	c = fgetc(fasta);
+	fprintf(fO, "\t");
+	tab_counter = 0;
+	while(curr < l){
+		if(c != '\n') fprintf(fO, "%c", c);
+		c = fgetc(fasta);
+		if(c != '\n' || feof(fasta)){ ++curr; ++tab_counter; }
+		if(tab_counter == TAB_INSERT){ fprintf(fO, "\n\t"); tab_counter = 0;}
+	}
+	fprintf(fO, "\n");
+}
+
+
+void get_seq_from_to_rev(FILE * fasta, FILE * output, uint64_t ini, uint64_t fin, uint64_t pos, uint64_t Lac, uint64_t seqNum, uint64_t l, FILE * fO){
+	fseek(fasta, pos, SEEK_SET);
+	char c = fgetc(fasta);
+	while(c != '\n'){ c = fgetc(fasta); }
+	uint64_t gp = ini - Lac;
+	uint64_t curr = 0, tab_counter;
+	while(curr < gp){
+		c = fgetc(fasta);
+		if(c != '\n') ++curr;
+	}
+	// Reached the region to show
+	curr = 0;
+	c = fgetc(fasta);
+	fprintf(fO, "\t");
+	tab_counter = 0;
+	while(curr < l){
+		if(c != '\n') fprintf(fO, "%c", c);
+		c = fgetc(fasta);
+		if(c != '\n' || feof(fasta)){ ++curr; ++tab_counter; }
+		if(tab_counter == TAB_INSERT){ fprintf(fO, "\n\t"); tab_counter = 0;}
+	}
+	fprintf(fO, "\n");
+}
+
+
+
+int main(int ac, char** av) {
+	FILE* fFrags;
+	struct FragFile frag;
+
+	//fprintf(stdout, "[WARNING] - Remember that if using a CSV make sure that the reverse y coordinates are transformed\n");
+
+	if (ac != 10)
+		terror("USE: ./frags2text fragsFILE.csv fastaX fastaY fastaYrev indexX indexY indexYrev fragsFILE.txt borderSize");
+
+	// prepared for multiple files
+	if ((fFrags = fopen(av[1], "rt")) == NULL)
+		terror("Opening Frags csv file");
+
+	// Open fastas
+
+	FILE * fX = NULL, * fY = NULL, * fYrev = NULL, * fO = NULL;
+	fX = fopen(av[2], "rt");
+	if(fX == NULL) terror("Could not open fasta X file");
+	fY = fopen(av[3], "rt");
+	if(fY == NULL) terror("Could not open fasta Y file");
+	fYrev = fopen(av[4], "rt");
+	if(fYrev == NULL) terror("Could not open fasta Y-rev file");
+
+
+	// Get file lengths
+    fseek(fX, 0, SEEK_END);
+    uint64_t aprox_lenX = ftell(fX);
+    rewind(fX);
+	char * strfastaX = (char *) malloc(aprox_lenX*sizeof(char));
+	fseek(fY, 0, SEEK_END);
+    uint64_t aprox_lenY = ftell(fY);
+    rewind(fY);
+	char * strfastaY = (char *) malloc(aprox_lenY*sizeof(char));
+	fseek(fYrev, 0, SEEK_END);
+    uint64_t aprox_lenYrev = ftell(fYrev);
+    rewind(fYrev);
+	char * strfastaYrev = (char *) malloc(aprox_lenYrev*sizeof(char));
+
+	if(strfastaX == NULL || strfastaY == NULL || strfastaYrev == NULL) terror("Could not allocate string sequences");
+
+	if(aprox_lenX != fread(strfastaX, sizeof(char), aprox_lenX, fX)) terror("Read wrong number of chars at X sequence");
+	if(aprox_lenY != fread(strfastaY, sizeof(char), aprox_lenY, fY)) terror("Read wrong number of chars at Y sequence");
+	if(aprox_lenYrev != fread(strfastaYrev, sizeof(char), aprox_lenYrev, fYrev)) terror("Read wrong number of chars at Y reversed sequence");
+	
+
+	struct rIndex2 * RI_X, * RI_Y, * RI_Yrev;
+
+	
+	uint64_t nReads_X, nReads_Y;
+	RI_X = loadReadsIndex(av[5], &nReads_X);
+	RI_Y = loadReadsIndex(av[6], &nReads_Y);
+	RI_Yrev = loadReadsIndex(av[7], &nReads_Y);
+
+	fO = fopen(av[8], "wt");
+	if(fO == NULL) terror("Could not open output alignments file");
+
+	uint64_t border_size = (uint64_t) atoi(av[9]);
+
+
+	//readSequenceLength(&n1, fFrags);
+	//readSequenceLength(&n2, fFrags);
+	// Skip first lines
+	char buffer[READING_FRAG_BUFFER];
+	// Lengths are in lines 6 and 7
+	//SeqX length          : 249250621
+	int64_t xlen, ylen;
+	int skipper = 0; while(skipper < 18){ 
+		fgets(buffer, READING_FRAG_BUFFER, fFrags); 
+
+		if(skipper == 6){
+			sscanf(buffer, "%*s %*s : %"PRId64, &xlen);
+		}
+		
+		if(skipper == 7){
+			sscanf(buffer, "%*s %*s : %"PRId64, &ylen);
+		}
+		skipper++; 
+	}
+
+	fprintf(stdout, "Sequence lengths: (%"PRId64", %"PRId64")\n", xlen, ylen);
+	
+
+	//readFragment(&frag, fFrags);
+	
+	
+	csv_frag_to_struct_frag(buffer, &frag);
+
+	// RI_X 	is the forward index for fasta file X
+	// RI_Y 	is the forward index for fasta file Y
+	// RI_Yrev 	is the reverse index for fasta file Y
+
+	int exit = 0;
+
+	while (!feof(fFrags) && exit == 0) {
+		
+		//RI[id].pos is position in file of >
+		//RI[id].Lac is the sum of the reads length prior
+
+		if(frag.strand == 'f'){
+			write_headers(fX, fY, fO, RI_X[frag.seqX].pos, RI_Y[frag.seqY].pos, &frag);
+		}else{
+			write_headers(fX, fYrev, fO, RI_X[frag.seqX].pos, RI_Yrev[(nReads_Y - frag.seqY) - 1].pos, &frag);
+		}
+		
+
+		//get_seq_from_to(fX, fO, frag.xStart, frag.xEnd, RI_X[frag.seqX].pos, RI_X[frag.seqX].Lac, frag.seqX, frag.length, fO);
+
+
+
+		if(frag.strand == 'f'){
+
+			int64_t leftx = MAX(0, (int64_t)frag.xStart - (int64_t)border_size);
+			int64_t rightx = MIN(xlen, (int64_t)frag.xEnd + (int64_t)border_size);
+			int64_t lefty = MAX(0, (int64_t)frag.yStart - (int64_t)border_size);
+			int64_t righty = MIN(ylen, (int64_t)frag.yEnd + (int64_t) border_size);
+
+			int64_t border_left_size = MIN(frag.xStart - leftx, frag.yStart - lefty);
+			int64_t border_right_size = MIN(rightx - frag.xEnd, righty - frag.yEnd);
+
+			get_both_seqs(strfastaX, strfastaY, frag.xStart - border_left_size, frag.xEnd + border_right_size, frag.yStart - border_left_size, frag.yEnd + border_right_size, RI_X[frag.seqX].pos, RI_Y[frag.seqY].pos, RI_X[frag.seqX].Lac, RI_Y[frag.seqY].Lac, frag.length + border_left_size + border_right_size, frag.length + border_left_size + border_right_size, fO);
+			//get_seq_from_to(fY, fO, frag.yStart, frag.yEnd, RI_Y[frag.seqY].pos, RI_Y[frag.seqY].Lac, frag.seqY, frag.length, fO);
+		}else{
+			uint64_t seqYnew;
+			seqYnew = (nReads_Y - frag.seqY) - 1;
+
+			frag.yStart = ylen - frag.yStart - 1;
+			frag.yEnd = ylen - frag.yEnd - 1;
+
+			int64_t leftx = MAX(0, (int64_t)frag.xStart - (int64_t)border_size);
+            int64_t rightx = MIN(xlen, (int64_t)frag.xEnd + (int64_t)border_size);
+            int64_t lefty = MAX(0, (int64_t)frag.yStart - (int64_t)border_size);
+            int64_t righty = MIN(ylen, (int64_t)frag.yEnd + (int64_t) border_size);
+
+            int64_t border_left_size = MIN(frag.xStart - leftx, frag.yStart - lefty);
+            int64_t border_right_size = MIN(rightx - frag.xEnd, righty - frag.yEnd);
+
+
+
+			get_both_seqs(strfastaX, strfastaYrev,  frag.xStart - border_left_size, frag.xEnd + border_right_size, frag.yStart - border_left_size, frag.yEnd + border_right_size, RI_X[frag.seqX].pos, RI_Yrev[seqYnew].pos, RI_X[frag.seqX].Lac, RI_Yrev[seqYnew].Lac, frag.length + border_left_size + border_right_size, frag.length + border_left_size + border_right_size, fO);
+			//get_seq_from_to_rev(fYrev, fO, frag.yStart, frag.yEnd, RI_Yrev[seqYnew].pos, RI_Yrev[seqYnew].Lac, seqYnew, frag.length, fO);
+		}
+
+		//readFragment(&frag, fFrags);
+		if(!feof(fFrags)){
+			fgets(buffer, READING_FRAG_BUFFER, fFrags);
+			csv_frag_to_struct_frag(buffer, &frag);
+
+		
+		}else{
+			exit = 1;
+		}
+		//printf("line read: %s\n", buffer);
+		//printf("HI im a frag: %"PRIu64", %"PRIu64" - %"PRIu64", %"PRIu64"\n", frag.xStart, frag.xEnd, frag.yStart, frag.yEnd);
+		//getchar();
+	}
+
+	fclose(fFrags);
+	fclose(fX);
+	fclose(fY);
+	fclose(fO);
+
+	free(RI_X);
+	free(RI_Y);
+	free(RI_Yrev);
+
+	free(strfastaX);
+	free(strfastaY);
+	free(strfastaYrev);
+
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/csvFrags2text.c	Wed Nov 18 08:08:25 2020 +0000
@@ -0,0 +1,347 @@
+/* 
+ *
+ * Sintax: ./frags2text fragsFILE.frags fastaX fastaY fragsFILE.txt
+ 
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include "structs.h"
+#include "commonFunctions.h"
+#include "comparisonFunctions.h"
+
+#define TAB_INSERT 70
+#define READING_FRAG_BUFFER 10000
+
+void csv_frag_to_struct_frag(char * l, struct FragFile * f){
+
+	//0		1		2		3		4		5			6		7		8		9		10			11		12		13
+	//Type	xStart	yStart	xEnd	yEnd	strand(f/r)	block	length	score	ident	similarity	%ident	SeqX	SeqY
+	//Frag	3147493	3006054	3154663	2998884	f	0	7171	20868	6194	72.75	0.86	0	0
+
+	
+	float bin;
+
+	sscanf(l, "%*s %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %c %"PRId64" %"PRIu64" %"PRIu64" %"PRIu64" %f %f %"PRIu64" %"PRIu64, &f->xStart, &f->yStart, &f->xEnd, &f->yEnd, &f->strand, &f->block, &f->length, &f->score, &f->ident, &f->similarity, &bin, &f->seqX, &f->seqY);
+
+	//printf("Read %d items\n", items);
+
+}
+
+struct rIndex2 * loadReadsIndex(char * filename, uint64_t * nReads){
+	struct rIndex2 * RR;
+	uint64_t nR=0,i;
+	FILE *f;
+	uint64_t fsize;
+
+    if ((f=fopen(filename,"rb"))==NULL) terror("Could not open index input file");
+
+	fseeko(f, 0, SEEK_END);
+	fsize = ftello(f);
+	rewind(f);
+	nR = fsize/sizeof(struct rIndex2);
+
+    if ((RR =(struct rIndex2*) calloc(nR,sizeof(struct rIndex2)))==NULL) terror("Could not allocate index");
+
+	for (i=0; i<nR; i++){
+		if(0 == fread(&RR[i],sizeof(struct rIndex2),1,f)) break;
+	}
+	fclose(f);
+	(*nReads) = nR;
+	return RR;
+}
+
+void write_headers(FILE * f1, FILE * f2, FILE * fO, uint64_t pos1, uint64_t pos2, struct FragFile * f){
+	fseek(f1, pos1, SEEK_SET);
+	char c = fgetc(f1);
+	while(c != '\n'){ fprintf(fO, "%c", c); c = fgetc(f1); }
+	fprintf(fO, " ALIGNED WITH ");
+	fseek(f2, pos2, SEEK_SET);
+	c = fgetc(f2);
+	while(c != '\n'){ fprintf(fO, "%c", c); c = fgetc(f2); }
+	fprintf(fO, " LENGTH: %"PRIu64" IDENT: %"PRIu64" STRAND: %c @(%"PRIu64", %"PRIu64")\n", f->length, f->ident, f->strand, f->xStart, f->yStart);
+}
+
+void get_both_seqs(char * fastaX, char * fastaY, uint64_t iniX, uint64_t finX, uint64_t iniY, uint64_t finY, uint64_t posX, uint64_t posY, uint64_t LacX, uint64_t LacY, uint64_t lX, uint64_t lY, FILE * fO){
+	char copyX[TAB_INSERT+1];
+	char copyY[TAB_INSERT+1];
+	memset(copyX, 0x0, TAB_INSERT+1);
+	memset(copyY, 0x0, TAB_INSERT+1);
+	uint64_t atcopyX = 0;
+	uint64_t atcopyY = 0;
+	uint64_t i;
+
+	uint64_t pos_used_x, pos_used_y;
+	
+	// The X one
+	//fseek(fastaX, posX, SEEK_SET);
+	pos_used_x = posX;
+	char cX = fastaX[pos_used_x++];
+	while(cX != '\n'){ cX = fastaX[pos_used_x++]; }
+	uint64_t gpX = iniX - LacX;
+	uint64_t currX = 0, tab_counter;
+	while(currX < gpX){
+		cX = fastaX[pos_used_x++];
+		if(cX != '\n') ++currX;
+	}
+
+	// the other Y
+	//fseek(fastaY, posY, SEEK_SET);
+	pos_used_y = posY;
+	char cY = fastaY[pos_used_y++];
+	while(cY != '\n'){ cY = fastaY[pos_used_y++]; }
+	uint64_t gpY = iniY - LacY;
+	uint64_t currY = 0;
+	while(currY < gpY){
+		cY = fastaY[pos_used_y++];
+		if(cY != '\n') ++currY;
+	}
+
+
+	
+
+	// Reached the region to show
+	currX = 0;
+	currY = 0;
+	cX = fastaX[pos_used_x++];
+	cY = fastaY[pos_used_y++];
+	//fprintf(fO, "\t");
+	tab_counter = 0;
+	while(currX < lX && currY < lY){
+		
+		if(cX == 'A' || cX == 'C' || cX == 'G' || cX == 'T' || cX == 'N'){ copyX[atcopyX++] = cX; ++currX; }
+		cX = fastaX[pos_used_x++];
+		while(cX != 'A' && cX != 'C' && cX != 'G' && cX != 'T' && cX != 'N') cX = fastaX[pos_used_x++];
+
+
+		if(cY == 'A' || cY == 'C' || cY == 'G' || cY == 'T' || cY == 'N'){ copyY[atcopyY++] = cY; ++currY; }
+		cY = fastaY[pos_used_y++];
+		while(cY != 'A' && cY != 'C' && cY != 'G' && cY != 'T' && cY != 'N') cY = fastaY[pos_used_y++];
+
+		while(currX > currY){
+			if(cY == 'A' || cY == 'C' || cY == 'G' || cY == 'T' || cY == 'N'){ copyY[atcopyY++] = cY; ++currY; }
+			cY = fastaY[pos_used_y++];
+			while(cY != 'A' && cY != 'C' && cY != 'G' && cY != 'T' && cY != 'N') cY = fastaY[pos_used_y++];
+		}
+		while(currX < currY){
+			if(cX == 'A' || cX == 'C' || cX == 'G' || cX == 'T' || cX == 'N'){ copyX[atcopyX++] = cX; ++currX; }
+			cX = fastaX[pos_used_x++];
+			while(cX != 'A' && cX != 'C' && cX != 'G' && cX != 'T' && cX != 'N') cX = fastaX[pos_used_x++];
+		}
+
+		++tab_counter;
+		
+
+
+		if(tab_counter >= TAB_INSERT && currX == currY){ 
+			
+			copyX[TAB_INSERT] = '\0';
+			copyY[TAB_INSERT] = '\0';
+			fprintf(fO, "X:\t%.*s\n\t", TAB_INSERT, copyX);
+			for(i=0; i<TAB_INSERT; i++){
+				if(copyX[i] == copyY[i]) fprintf(fO, "|"); else fprintf(fO, " ");
+			}
+			fprintf(fO, "\nY:\t%.*s\n\n", TAB_INSERT, copyY);
+			tab_counter = 0;
+			atcopyX = 0; atcopyY = 0;
+		}
+	}
+	if(atcopyX > 0){
+		copyX[atcopyX] = '\0';
+		copyY[atcopyY] = '\0';
+		fprintf(fO, "X:\t%.*s\n\t", (int)atcopyX, copyX);
+		for(i=0; i<atcopyX; i++){
+			if(copyX[i] == copyY[i]) fprintf(fO, "|"); else fprintf(fO, " ");
+		}
+		fprintf(fO, "\nY:\t%.*s\n\n", (int)atcopyY, copyY);
+		atcopyX = 0; atcopyY = 0;
+	} 
+	fprintf(fO, "\n");
+}
+
+
+void get_seq_from_to(FILE * fasta, FILE * output, uint64_t ini, uint64_t fin, uint64_t pos, uint64_t Lac, uint64_t seqNum, uint64_t l, FILE * fO){
+	fseek(fasta, pos, SEEK_SET);
+	char c = fgetc(fasta);
+	while(c != '\n'){ c = fgetc(fasta); }
+	uint64_t gp = ini - Lac;
+	uint64_t curr = 0, tab_counter;
+	while(curr < gp){
+		c = fgetc(fasta);
+		if(c != '\n') ++curr;
+	}
+	// Reached the region to show
+	curr = 0;
+	c = fgetc(fasta);
+	fprintf(fO, "\t");
+	tab_counter = 0;
+	while(curr < l){
+		if(c != '\n') fprintf(fO, "%c", c);
+		c = fgetc(fasta);
+		if(c != '\n' || feof(fasta)){ ++curr; ++tab_counter; }
+		if(tab_counter == TAB_INSERT){ fprintf(fO, "\n\t"); tab_counter = 0;}
+	}
+	fprintf(fO, "\n");
+}
+
+
+void get_seq_from_to_rev(FILE * fasta, FILE * output, uint64_t ini, uint64_t fin, uint64_t pos, uint64_t Lac, uint64_t seqNum, uint64_t l, FILE * fO){
+	fseek(fasta, pos, SEEK_SET);
+	char c = fgetc(fasta);
+	while(c != '\n'){ c = fgetc(fasta); }
+	uint64_t gp = ini - Lac;
+	uint64_t curr = 0, tab_counter;
+	while(curr < gp){
+		c = fgetc(fasta);
+		if(c != '\n') ++curr;
+	}
+	// Reached the region to show
+	curr = 0;
+	c = fgetc(fasta);
+	fprintf(fO, "\t");
+	tab_counter = 0;
+	while(curr < l){
+		if(c != '\n') fprintf(fO, "%c", c);
+		c = fgetc(fasta);
+		if(c != '\n' || feof(fasta)){ ++curr; ++tab_counter; }
+		if(tab_counter == TAB_INSERT){ fprintf(fO, "\n\t"); tab_counter = 0;}
+	}
+	fprintf(fO, "\n");
+}
+
+
+
+int main(int ac, char** av) {
+	FILE* fFrags;
+	struct FragFile frag;
+
+	fprintf(stdout, "[WARNING] - Remember that if using a CSV make sure that the reverse y coordinates are transformed\n");
+
+	if (ac != 9)
+		terror("USE: ./frags2text fragsFILE.csv fastaX fastaY fastaYrev indexX indexY indexYrev fragsFILE.txt");
+
+	// prepared for multiple files
+	if ((fFrags = fopen(av[1], "rt")) == NULL)
+		terror("Opening Frags csv file");
+
+	// Open fastas
+
+	FILE * fX = NULL, * fY = NULL, * fYrev = NULL, * fO = NULL;
+	fX = fopen(av[2], "rt");
+	if(fX == NULL) terror("Could not open fasta X file");
+	fY = fopen(av[3], "rt");
+	if(fY == NULL) terror("Could not open fasta Y file");
+	fYrev = fopen(av[4], "rt");
+	if(fYrev == NULL) terror("Could not open fasta Y-rev file");
+
+
+	// Get file lengths
+    fseek(fX, 0, SEEK_END);
+    uint64_t aprox_lenX = ftell(fX);
+    rewind(fX);
+	char * strfastaX = (char *) malloc(aprox_lenX*sizeof(char));
+	fseek(fY, 0, SEEK_END);
+    uint64_t aprox_lenY = ftell(fY);
+    rewind(fY);
+	char * strfastaY = (char *) malloc(aprox_lenY*sizeof(char));
+	fseek(fYrev, 0, SEEK_END);
+    uint64_t aprox_lenYrev = ftell(fYrev);
+    rewind(fYrev);
+	char * strfastaYrev = (char *) malloc(aprox_lenYrev*sizeof(char));
+
+	if(strfastaX == NULL || strfastaY == NULL || strfastaYrev == NULL) terror("Could not allocate string sequences");
+
+	if(aprox_lenX != fread(strfastaX, sizeof(char), aprox_lenX, fX)) terror("Read wrong number of chars at X sequence");
+	if(aprox_lenY != fread(strfastaY, sizeof(char), aprox_lenY, fY)) terror("Read wrong number of chars at Y sequence");
+	if(aprox_lenYrev != fread(strfastaYrev, sizeof(char), aprox_lenYrev, fYrev)) terror("Read wrong number of chars at Y reversed sequence");
+	
+
+	struct rIndex2 * RI_X, * RI_Y, * RI_Yrev;
+
+	
+	uint64_t nReads_X, nReads_Y;
+	RI_X = loadReadsIndex(av[5], &nReads_X);
+	RI_Y = loadReadsIndex(av[6], &nReads_Y);
+	RI_Yrev = loadReadsIndex(av[7], &nReads_Y);
+
+	fO = fopen(av[8], "wt");
+	if(fO == NULL) terror("Could not open output alignments file");
+
+
+	//readSequenceLength(&n1, fFrags);
+	//readSequenceLength(&n2, fFrags);
+	// Skip first lines
+	char buffer[READING_FRAG_BUFFER];
+	int skipper = 0; while(skipper < 18){ fgets(buffer, READING_FRAG_BUFFER, fFrags); skipper++; }
+
+	
+	
+
+	//readFragment(&frag, fFrags);
+	
+	
+	csv_frag_to_struct_frag(buffer, &frag);
+
+	// RI_X 	is the forward index for fasta file X
+	// RI_Y 	is the forward index for fasta file Y
+	// RI_Yrev 	is the reverse index for fasta file Y
+
+	int exit = 0;
+
+	while (!feof(fFrags) && exit == 0) {
+		
+		//RI[id].pos is position in file of >
+		//RI[id].Lac is the sum of the reads length prior
+
+		if(frag.strand == 'f'){
+			write_headers(fX, fY, fO, RI_X[frag.seqX].pos, RI_Y[frag.seqY].pos, &frag);
+		}else{
+			write_headers(fX, fYrev, fO, RI_X[frag.seqX].pos, RI_Yrev[(nReads_Y - frag.seqY) - 1].pos, &frag);
+		}
+		
+
+		//get_seq_from_to(fX, fO, frag.xStart, frag.xEnd, RI_X[frag.seqX].pos, RI_X[frag.seqX].Lac, frag.seqX, frag.length, fO);
+
+
+
+		if(frag.strand == 'f'){
+			get_both_seqs(strfastaX, strfastaY, frag.xStart, frag.xEnd, frag.yStart, frag.yEnd, RI_X[frag.seqX].pos, RI_Y[frag.seqY].pos, RI_X[frag.seqX].Lac, RI_Y[frag.seqY].Lac, frag.length, frag.length, fO);
+			//get_seq_from_to(fY, fO, frag.yStart, frag.yEnd, RI_Y[frag.seqY].pos, RI_Y[frag.seqY].Lac, frag.seqY, frag.length, fO);
+		}else{
+			uint64_t seqYnew;
+			seqYnew = (nReads_Y - frag.seqY) - 1;
+			get_both_seqs(strfastaX, strfastaYrev, frag.xStart, frag.xEnd, frag.yStart, frag.yEnd, RI_X[frag.seqX].pos, RI_Yrev[seqYnew].pos, RI_X[frag.seqX].Lac, RI_Yrev[seqYnew].Lac, frag.length, frag.length, fO);
+			//get_seq_from_to_rev(fYrev, fO, frag.yStart, frag.yEnd, RI_Yrev[seqYnew].pos, RI_Yrev[seqYnew].Lac, seqYnew, frag.length, fO);
+		}
+
+		//readFragment(&frag, fFrags);
+		if(!feof(fFrags)){
+			fgets(buffer, READING_FRAG_BUFFER, fFrags);
+			csv_frag_to_struct_frag(buffer, &frag);
+
+		
+		}else{
+			exit = 1;
+		}
+		//printf("line read: %s\n", buffer);
+		//printf("HI im a frag: %"PRIu64", %"PRIu64" - %"PRIu64", %"PRIu64"\n", frag.xStart, frag.xEnd, frag.yStart, frag.yEnd);
+		//getchar();
+	}
+
+	fclose(fFrags);
+	fclose(fX);
+	fclose(fY);
+	fclose(fO);
+
+	free(RI_X);
+	free(RI_Y);
+	free(RI_Yrev);
+
+	free(strfastaX);
+	free(strfastaY);
+	free(strfastaYrev);
+
+	return 0;
+}
--- a/gecko/src/reverseComplement.c	Tue Nov 17 13:04:23 2020 +0000
+++ b/gecko/src/reverseComplement.c	Wed Nov 18 08:08:25 2020 +0000
@@ -15,7 +15,7 @@
 #include <inttypes.h>
 #include "commonFunctions.h"
 
-#define SEQSIZE 500000000
+#define SEQSIZE 2000000000
 #define WSIZE 32
 #define NREADS 1000000