annotate gecko/src/csvExtractBorders.c @ 9:aec70bb1ae27 draft

Uploaded
author bitlab
date Wed, 18 Nov 2020 08:08:25 +0000
parents
children cf4c0c822ca9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
1 /*
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
2 *
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
3 * Sintax: ./frags2text fragsFILE.frags fastaX fastaY fragsFILE.txt
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
4
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
5 */
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
6
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
7 #include <stdio.h>
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
8 #include <stdlib.h>
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
9 #include <string.h>
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
10 #include <inttypes.h>
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
11 #include "structs.h"
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
12 #include "commonFunctions.h"
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
13 #include "comparisonFunctions.h"
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
14
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
15 #define TAB_INSERT 70
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
16 #define READING_FRAG_BUFFER 10000
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
17
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
18 #define MAX(a,b) \
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
19 ({ __typeof__ (a) _a = (a); \
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
20 __typeof__ (b) _b = (b); \
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
21 _a > _b ? _a : _b; })
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
22
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
23 #define MIN(a,b) \
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
24 ({ __typeof__ (a) _a = (a); \
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
25 __typeof__ (b) _b = (b); \
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
26 _a < _b ? _a : _b; })
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
27
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
28 void csv_frag_to_struct_frag(char * l, struct FragFile * f){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
29
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
30 //0 1 2 3 4 5 6 7 8 9 10 11 12 13
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
31 //Type xStart yStart xEnd yEnd strand(f/r) block length score ident similarity %ident SeqX SeqY
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
32 //Frag 3147493 3006054 3154663 2998884 f 0 7171 20868 6194 72.75 0.86 0 0
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
33
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
34
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
35 float bin;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
36
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
37 sscanf(l, "%*s %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" %c %"PRId64" %"PRIu64" %"PRIu64" %"PRIu64" %f %f %"PRIu64" %"PRIu64, &f->xStart, &f->yStart, &f->xEnd, &f->yEnd, &f->strand, &f->block, &f->length, &f->score, &f->ident, &f->similarity, &bin, &f->seqX, &f->seqY);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
38
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
39 //printf("Read %d items\n", items);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
40
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
41 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
42
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
43 struct rIndex2 * loadReadsIndex(char * filename, uint64_t * nReads){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
44 struct rIndex2 * RR;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
45 uint64_t nR=0,i;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
46 FILE *f;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
47 uint64_t fsize;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
48
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
49 if ((f=fopen(filename,"rb"))==NULL) terror("Could not open index input file");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
50
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
51 fseeko(f, 0, SEEK_END);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
52 fsize = ftello(f);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
53 rewind(f);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
54 nR = fsize/sizeof(struct rIndex2);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
55
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
56 if ((RR =(struct rIndex2*) calloc(nR,sizeof(struct rIndex2)))==NULL) terror("Could not allocate index");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
57
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
58 for (i=0; i<nR; i++){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
59 if(0 == fread(&RR[i],sizeof(struct rIndex2),1,f)) break;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
60 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
61 fclose(f);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
62 (*nReads) = nR;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
63 return RR;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
64 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
65
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
66 void write_headers(FILE * f1, FILE * f2, FILE * fO, uint64_t pos1, uint64_t pos2, struct FragFile * f){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
67 fseek(f1, pos1, SEEK_SET);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
68 char c = fgetc(f1);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
69 while(c != '\n'){ fprintf(fO, "%c", c); c = fgetc(f1); }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
70 fprintf(fO, " ALIGNED WITH ");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
71 fseek(f2, pos2, SEEK_SET);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
72 c = fgetc(f2);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
73 while(c != '\n'){ fprintf(fO, "%c", c); c = fgetc(f2); }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
74 fprintf(fO, " LENGTH: %"PRIu64" IDENT: %"PRIu64" STRAND: %c @(%"PRIu64", %"PRIu64")\n", f->length, f->ident, f->strand, f->xStart, f->yStart);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
75 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
76
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
77 void get_both_seqs(char * fastaX, char * fastaY, uint64_t iniX, uint64_t finX, uint64_t iniY, uint64_t finY, uint64_t posX, uint64_t posY, uint64_t LacX, uint64_t LacY, uint64_t lX, uint64_t lY, FILE * fO){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
78 char copyX[TAB_INSERT+1];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
79 char copyY[TAB_INSERT+1];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
80 memset(copyX, 0x0, TAB_INSERT+1);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
81 memset(copyY, 0x0, TAB_INSERT+1);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
82 uint64_t atcopyX = 0;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
83 uint64_t atcopyY = 0;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
84 uint64_t i;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
85
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
86 uint64_t pos_used_x, pos_used_y;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
87
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
88 // The X one
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
89 //fseek(fastaX, posX, SEEK_SET);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
90 pos_used_x = posX;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
91 char cX = fastaX[pos_used_x++];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
92 while(cX != '\n'){ cX = fastaX[pos_used_x++]; }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
93 uint64_t gpX = iniX - LacX;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
94 uint64_t currX = 0, tab_counter;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
95 while(currX < gpX){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
96 cX = fastaX[pos_used_x++];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
97 if(cX != '\n') ++currX;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
98 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
99
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
100 // the other Y
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
101 //fseek(fastaY, posY, SEEK_SET);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
102 pos_used_y = posY;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
103 char cY = fastaY[pos_used_y++];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
104 while(cY != '\n'){ cY = fastaY[pos_used_y++]; }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
105 uint64_t gpY = iniY - LacY;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
106 uint64_t currY = 0;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
107 while(currY < gpY){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
108 cY = fastaY[pos_used_y++];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
109 if(cY != '\n') ++currY;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
110 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
111
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
112
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
113
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
114
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
115 // Reached the region to show
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
116 currX = 0;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
117 currY = 0;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
118 cX = fastaX[pos_used_x++];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
119 cY = fastaY[pos_used_y++];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
120 //fprintf(fO, "\t");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
121 tab_counter = 0;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
122 while(currX < lX && currY < lY){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
123
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
124 if(cX == 'A' || cX == 'C' || cX == 'G' || cX == 'T' || cX == 'N'){ copyX[atcopyX++] = cX; ++currX; }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
125 cX = fastaX[pos_used_x++];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
126 while(cX != 'A' && cX != 'C' && cX != 'G' && cX != 'T' && cX != 'N') cX = fastaX[pos_used_x++];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
127
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
128
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
129 if(cY == 'A' || cY == 'C' || cY == 'G' || cY == 'T' || cY == 'N'){ copyY[atcopyY++] = cY; ++currY; }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
130 cY = fastaY[pos_used_y++];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
131 while(cY != 'A' && cY != 'C' && cY != 'G' && cY != 'T' && cY != 'N') cY = fastaY[pos_used_y++];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
132
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
133 while(currX > currY){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
134 if(cY == 'A' || cY == 'C' || cY == 'G' || cY == 'T' || cY == 'N'){ copyY[atcopyY++] = cY; ++currY; }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
135 cY = fastaY[pos_used_y++];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
136 while(cY != 'A' && cY != 'C' && cY != 'G' && cY != 'T' && cY != 'N') cY = fastaY[pos_used_y++];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
137 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
138 while(currX < currY){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
139 if(cX == 'A' || cX == 'C' || cX == 'G' || cX == 'T' || cX == 'N'){ copyX[atcopyX++] = cX; ++currX; }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
140 cX = fastaX[pos_used_x++];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
141 while(cX != 'A' && cX != 'C' && cX != 'G' && cX != 'T' && cX != 'N') cX = fastaX[pos_used_x++];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
142 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
143
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
144 ++tab_counter;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
145
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
146
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
147
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
148 if(tab_counter >= TAB_INSERT && currX == currY){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
149
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
150 copyX[TAB_INSERT] = '\0';
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
151 copyY[TAB_INSERT] = '\0';
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
152 fprintf(fO, "X:\t%.*s\n\t", TAB_INSERT, copyX);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
153 for(i=0; i<TAB_INSERT; i++){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
154 if(copyX[i] == copyY[i]) fprintf(fO, "|"); else fprintf(fO, " ");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
155 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
156 fprintf(fO, "\nY:\t%.*s\n\n", TAB_INSERT, copyY);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
157 tab_counter = 0;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
158 atcopyX = 0; atcopyY = 0;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
159 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
160 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
161 if(atcopyX > 0){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
162 copyX[atcopyX] = '\0';
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
163 copyY[atcopyY] = '\0';
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
164 fprintf(fO, "X:\t%.*s\n\t", (int)atcopyX, copyX);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
165 for(i=0; i<atcopyX; i++){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
166 if(copyX[i] == copyY[i]) fprintf(fO, "|"); else fprintf(fO, " ");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
167 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
168 fprintf(fO, "\nY:\t%.*s\n\n", (int)atcopyY, copyY);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
169 atcopyX = 0; atcopyY = 0;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
170 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
171 fprintf(fO, "\n");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
172 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
173
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
174
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
175 void get_seq_from_to(FILE * fasta, FILE * output, uint64_t ini, uint64_t fin, uint64_t pos, uint64_t Lac, uint64_t seqNum, uint64_t l, FILE * fO){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
176 fseek(fasta, pos, SEEK_SET);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
177 char c = fgetc(fasta);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
178 while(c != '\n'){ c = fgetc(fasta); }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
179 uint64_t gp = ini - Lac;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
180 uint64_t curr = 0, tab_counter;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
181 while(curr < gp){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
182 c = fgetc(fasta);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
183 if(c != '\n') ++curr;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
184 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
185 // Reached the region to show
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
186 curr = 0;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
187 c = fgetc(fasta);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
188 fprintf(fO, "\t");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
189 tab_counter = 0;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
190 while(curr < l){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
191 if(c != '\n') fprintf(fO, "%c", c);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
192 c = fgetc(fasta);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
193 if(c != '\n' || feof(fasta)){ ++curr; ++tab_counter; }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
194 if(tab_counter == TAB_INSERT){ fprintf(fO, "\n\t"); tab_counter = 0;}
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
195 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
196 fprintf(fO, "\n");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
197 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
198
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
199
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
200 void get_seq_from_to_rev(FILE * fasta, FILE * output, uint64_t ini, uint64_t fin, uint64_t pos, uint64_t Lac, uint64_t seqNum, uint64_t l, FILE * fO){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
201 fseek(fasta, pos, SEEK_SET);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
202 char c = fgetc(fasta);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
203 while(c != '\n'){ c = fgetc(fasta); }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
204 uint64_t gp = ini - Lac;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
205 uint64_t curr = 0, tab_counter;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
206 while(curr < gp){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
207 c = fgetc(fasta);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
208 if(c != '\n') ++curr;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
209 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
210 // Reached the region to show
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
211 curr = 0;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
212 c = fgetc(fasta);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
213 fprintf(fO, "\t");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
214 tab_counter = 0;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
215 while(curr < l){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
216 if(c != '\n') fprintf(fO, "%c", c);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
217 c = fgetc(fasta);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
218 if(c != '\n' || feof(fasta)){ ++curr; ++tab_counter; }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
219 if(tab_counter == TAB_INSERT){ fprintf(fO, "\n\t"); tab_counter = 0;}
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
220 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
221 fprintf(fO, "\n");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
222 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
223
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
224
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
225
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
226 int main(int ac, char** av) {
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
227 FILE* fFrags;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
228 struct FragFile frag;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
229
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
230 //fprintf(stdout, "[WARNING] - Remember that if using a CSV make sure that the reverse y coordinates are transformed\n");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
231
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
232 if (ac != 10)
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
233 terror("USE: ./frags2text fragsFILE.csv fastaX fastaY fastaYrev indexX indexY indexYrev fragsFILE.txt borderSize");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
234
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
235 // prepared for multiple files
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
236 if ((fFrags = fopen(av[1], "rt")) == NULL)
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
237 terror("Opening Frags csv file");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
238
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
239 // Open fastas
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
240
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
241 FILE * fX = NULL, * fY = NULL, * fYrev = NULL, * fO = NULL;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
242 fX = fopen(av[2], "rt");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
243 if(fX == NULL) terror("Could not open fasta X file");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
244 fY = fopen(av[3], "rt");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
245 if(fY == NULL) terror("Could not open fasta Y file");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
246 fYrev = fopen(av[4], "rt");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
247 if(fYrev == NULL) terror("Could not open fasta Y-rev file");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
248
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
249
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
250 // Get file lengths
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
251 fseek(fX, 0, SEEK_END);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
252 uint64_t aprox_lenX = ftell(fX);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
253 rewind(fX);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
254 char * strfastaX = (char *) malloc(aprox_lenX*sizeof(char));
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
255 fseek(fY, 0, SEEK_END);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
256 uint64_t aprox_lenY = ftell(fY);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
257 rewind(fY);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
258 char * strfastaY = (char *) malloc(aprox_lenY*sizeof(char));
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
259 fseek(fYrev, 0, SEEK_END);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
260 uint64_t aprox_lenYrev = ftell(fYrev);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
261 rewind(fYrev);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
262 char * strfastaYrev = (char *) malloc(aprox_lenYrev*sizeof(char));
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
263
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
264 if(strfastaX == NULL || strfastaY == NULL || strfastaYrev == NULL) terror("Could not allocate string sequences");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
265
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
266 if(aprox_lenX != fread(strfastaX, sizeof(char), aprox_lenX, fX)) terror("Read wrong number of chars at X sequence");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
267 if(aprox_lenY != fread(strfastaY, sizeof(char), aprox_lenY, fY)) terror("Read wrong number of chars at Y sequence");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
268 if(aprox_lenYrev != fread(strfastaYrev, sizeof(char), aprox_lenYrev, fYrev)) terror("Read wrong number of chars at Y reversed sequence");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
269
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
270
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
271 struct rIndex2 * RI_X, * RI_Y, * RI_Yrev;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
272
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
273
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
274 uint64_t nReads_X, nReads_Y;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
275 RI_X = loadReadsIndex(av[5], &nReads_X);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
276 RI_Y = loadReadsIndex(av[6], &nReads_Y);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
277 RI_Yrev = loadReadsIndex(av[7], &nReads_Y);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
278
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
279 fO = fopen(av[8], "wt");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
280 if(fO == NULL) terror("Could not open output alignments file");
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
281
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
282 uint64_t border_size = (uint64_t) atoi(av[9]);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
283
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
284
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
285 //readSequenceLength(&n1, fFrags);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
286 //readSequenceLength(&n2, fFrags);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
287 // Skip first lines
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
288 char buffer[READING_FRAG_BUFFER];
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
289 // Lengths are in lines 6 and 7
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
290 //SeqX length : 249250621
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
291 int64_t xlen, ylen;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
292 int skipper = 0; while(skipper < 18){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
293 fgets(buffer, READING_FRAG_BUFFER, fFrags);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
294
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
295 if(skipper == 6){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
296 sscanf(buffer, "%*s %*s : %"PRId64, &xlen);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
297 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
298
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
299 if(skipper == 7){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
300 sscanf(buffer, "%*s %*s : %"PRId64, &ylen);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
301 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
302 skipper++;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
303 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
304
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
305 fprintf(stdout, "Sequence lengths: (%"PRId64", %"PRId64")\n", xlen, ylen);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
306
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
307
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
308 //readFragment(&frag, fFrags);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
309
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
310
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
311 csv_frag_to_struct_frag(buffer, &frag);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
312
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
313 // RI_X is the forward index for fasta file X
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
314 // RI_Y is the forward index for fasta file Y
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
315 // RI_Yrev is the reverse index for fasta file Y
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
316
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
317 int exit = 0;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
318
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
319 while (!feof(fFrags) && exit == 0) {
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
320
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
321 //RI[id].pos is position in file of >
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
322 //RI[id].Lac is the sum of the reads length prior
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
323
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
324 if(frag.strand == 'f'){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
325 write_headers(fX, fY, fO, RI_X[frag.seqX].pos, RI_Y[frag.seqY].pos, &frag);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
326 }else{
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
327 write_headers(fX, fYrev, fO, RI_X[frag.seqX].pos, RI_Yrev[(nReads_Y - frag.seqY) - 1].pos, &frag);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
328 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
329
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
330
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
331 //get_seq_from_to(fX, fO, frag.xStart, frag.xEnd, RI_X[frag.seqX].pos, RI_X[frag.seqX].Lac, frag.seqX, frag.length, fO);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
332
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
333
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
334
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
335 if(frag.strand == 'f'){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
336
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
337 int64_t leftx = MAX(0, (int64_t)frag.xStart - (int64_t)border_size);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
338 int64_t rightx = MIN(xlen, (int64_t)frag.xEnd + (int64_t)border_size);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
339 int64_t lefty = MAX(0, (int64_t)frag.yStart - (int64_t)border_size);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
340 int64_t righty = MIN(ylen, (int64_t)frag.yEnd + (int64_t) border_size);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
341
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
342 int64_t border_left_size = MIN(frag.xStart - leftx, frag.yStart - lefty);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
343 int64_t border_right_size = MIN(rightx - frag.xEnd, righty - frag.yEnd);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
344
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
345 get_both_seqs(strfastaX, strfastaY, frag.xStart - border_left_size, frag.xEnd + border_right_size, frag.yStart - border_left_size, frag.yEnd + border_right_size, RI_X[frag.seqX].pos, RI_Y[frag.seqY].pos, RI_X[frag.seqX].Lac, RI_Y[frag.seqY].Lac, frag.length + border_left_size + border_right_size, frag.length + border_left_size + border_right_size, fO);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
346 //get_seq_from_to(fY, fO, frag.yStart, frag.yEnd, RI_Y[frag.seqY].pos, RI_Y[frag.seqY].Lac, frag.seqY, frag.length, fO);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
347 }else{
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
348 uint64_t seqYnew;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
349 seqYnew = (nReads_Y - frag.seqY) - 1;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
350
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
351 frag.yStart = ylen - frag.yStart - 1;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
352 frag.yEnd = ylen - frag.yEnd - 1;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
353
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
354 int64_t leftx = MAX(0, (int64_t)frag.xStart - (int64_t)border_size);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
355 int64_t rightx = MIN(xlen, (int64_t)frag.xEnd + (int64_t)border_size);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
356 int64_t lefty = MAX(0, (int64_t)frag.yStart - (int64_t)border_size);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
357 int64_t righty = MIN(ylen, (int64_t)frag.yEnd + (int64_t) border_size);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
358
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
359 int64_t border_left_size = MIN(frag.xStart - leftx, frag.yStart - lefty);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
360 int64_t border_right_size = MIN(rightx - frag.xEnd, righty - frag.yEnd);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
361
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
362
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
363
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
364 get_both_seqs(strfastaX, strfastaYrev, frag.xStart - border_left_size, frag.xEnd + border_right_size, frag.yStart - border_left_size, frag.yEnd + border_right_size, RI_X[frag.seqX].pos, RI_Yrev[seqYnew].pos, RI_X[frag.seqX].Lac, RI_Yrev[seqYnew].Lac, frag.length + border_left_size + border_right_size, frag.length + border_left_size + border_right_size, fO);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
365 //get_seq_from_to_rev(fYrev, fO, frag.yStart, frag.yEnd, RI_Yrev[seqYnew].pos, RI_Yrev[seqYnew].Lac, seqYnew, frag.length, fO);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
366 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
367
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
368 //readFragment(&frag, fFrags);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
369 if(!feof(fFrags)){
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
370 fgets(buffer, READING_FRAG_BUFFER, fFrags);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
371 csv_frag_to_struct_frag(buffer, &frag);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
372
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
373
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
374 }else{
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
375 exit = 1;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
376 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
377 //printf("line read: %s\n", buffer);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
378 //printf("HI im a frag: %"PRIu64", %"PRIu64" - %"PRIu64", %"PRIu64"\n", frag.xStart, frag.xEnd, frag.yStart, frag.yEnd);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
379 //getchar();
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
380 }
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
381
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
382 fclose(fFrags);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
383 fclose(fX);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
384 fclose(fY);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
385 fclose(fO);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
386
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
387 free(RI_X);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
388 free(RI_Y);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
389 free(RI_Yrev);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
390
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
391 free(strfastaX);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
392 free(strfastaY);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
393 free(strfastaYrev);
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
394
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
395 return 0;
aec70bb1ae27 Uploaded
bitlab
parents:
diff changeset
396 }