Mercurial > repos > portiahollyoak > fastuniq
comparison source/fastq_pair_array.c @ 0:816cb55b5a2d draft default tip
planemo upload for repository https://github.com/portiahollyoak/Tools commit c4769fd68ad9583d4b9dbdf212e4ecb5968cef1c-dirty
author | portiahollyoak |
---|---|
date | Thu, 02 Jun 2016 11:34:51 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:816cb55b5a2d |
---|---|
1 /**************************************************************************** | |
2 * The 'FASTQ_PAIR_ARRAY' structure group was used to store a array of | |
3 * paired FASTQ reads, including basic operation function as well. | |
4 * | |
5 * This file was written by Haibin Xu, December 2011. | |
6 ****************************************************************************/ | |
7 | |
8 #include "fastq_pair_array.h" | |
9 | |
10 FASTQ_PAIR_ARRAY *fastq_pair_array_create() | |
11 { | |
12 /* create a FASTQ pair array. If successful, return the point to it, | |
13 * otherwise, return NULL. | |
14 */ | |
15 FASTQ_PAIR_ARRAY *fq_pair_array; | |
16 | |
17 if((fq_pair_array=(FASTQ_PAIR_ARRAY *)malloc(sizeof(FASTQ_PAIR_ARRAY)))==NULL) | |
18 return NULL; | |
19 | |
20 if((fq_pair_array->array= | |
21 (FASTQ_PAIR_ARRAY_BLOCK *)malloc(sizeof(FASTQ_PAIR_ARRAY_BLOCK)))==NULL) | |
22 { | |
23 free(fq_pair_array); | |
24 return NULL; | |
25 } | |
26 | |
27 fq_pair_array->last=fq_pair_array->array; | |
28 fq_pair_array->block_num=1; | |
29 fq_pair_array->fastq_pair_num=0; | |
30 | |
31 fq_pair_array->array->previous=NULL; | |
32 fq_pair_array->array->next=NULL; | |
33 fq_pair_array->array->num=0; | |
34 | |
35 fq_pair_array->index=NULL; | |
36 | |
37 return fq_pair_array; | |
38 } | |
39 | |
40 int fastq_pair_array_remove(FASTQ_PAIR_ARRAY *fq_pair_array) | |
41 { | |
42 /* free the FASTQ pair array. If successful, return 0, otherwise | |
43 * return 1. | |
44 */ | |
45 long i; | |
46 FASTQ_PAIR_ARRAY_BLOCK *fq_pair_array_block; | |
47 | |
48 if(fq_pair_array==NULL) | |
49 return 1; | |
50 | |
51 fq_pair_array_block=fq_pair_array->last; | |
52 for(;fq_pair_array_block!=NULL;) | |
53 { | |
54 for(i=0;i<fq_pair_array_block->num;i++) | |
55 fastq_pair_remove(fq_pair_array_block->block[i]); | |
56 | |
57 fq_pair_array_block=fq_pair_array_block->previous; | |
58 } | |
59 | |
60 if(fq_pair_array->index!=NULL) | |
61 free(fq_pair_array->index); | |
62 | |
63 return 0; | |
64 } | |
65 | |
66 int fastq_pair_array_append(FASTQ_PAIR *fq_pair, FASTQ_PAIR_ARRAY *fq_pair_array) | |
67 { | |
68 /* append a new FASTQ pair to the array. if successful, return 0, otherwise | |
69 * return 1. | |
70 */ | |
71 FASTQ_PAIR_ARRAY_BLOCK *block_temp; | |
72 | |
73 if(fq_pair_array==NULL || fq_pair==NULL) | |
74 return 1; | |
75 | |
76 if(fq_pair_array->last->num<FASTQ_PAIR_ARRAY_BLOCK_SIZE) | |
77 { | |
78 /* append to the last array_block */ | |
79 fq_pair_array->last->block[fq_pair_array->last->num++]=fq_pair; | |
80 fq_pair_array->fastq_pair_num++; | |
81 } | |
82 else | |
83 { | |
84 /* add a new array_block, amd append to it */ | |
85 if((block_temp= | |
86 (FASTQ_PAIR_ARRAY_BLOCK *)malloc(sizeof(FASTQ_PAIR_ARRAY_BLOCK)))==NULL) | |
87 return 0; | |
88 | |
89 fq_pair_array->last->next=block_temp; | |
90 block_temp->previous=fq_pair_array->last; | |
91 fq_pair_array->last=block_temp; | |
92 fq_pair_array->block_num++; | |
93 | |
94 block_temp->num=0; | |
95 block_temp->block[block_temp->num++]=fq_pair; | |
96 fq_pair_array->fastq_pair_num++; | |
97 } | |
98 | |
99 return 0; | |
100 } | |
101 | |
102 int fastq_pair_array_generate_index(FASTQ_PAIR_ARRAY *fq_pair_array) | |
103 { | |
104 /* generate the index for given FASTQ_PAIR, if successful, return 0, otherwise | |
105 * return 1. | |
106 */ | |
107 FASTQ_PAIR_ARRAY_BLOCK **temp_index; | |
108 FASTQ_PAIR_ARRAY_BLOCK *fq_array_block; | |
109 long i; | |
110 | |
111 if(fq_pair_array==NULL) | |
112 return 1; | |
113 | |
114 if(fq_pair_array->index!=NULL) | |
115 { | |
116 free(fq_pair_array->index); | |
117 fq_pair_array->index=NULL; | |
118 } | |
119 | |
120 if((temp_index=(FASTQ_PAIR_ARRAY_BLOCK **)malloc(sizeof(FASTQ_PAIR_ARRAY_BLOCK *)*(fq_pair_array->block_num)))==NULL) | |
121 return 1; | |
122 | |
123 fq_array_block=fq_pair_array->array; | |
124 for(i=0;i<fq_pair_array->block_num;i++) | |
125 { | |
126 temp_index[i]=fq_array_block; | |
127 fq_array_block=fq_array_block->next; | |
128 } | |
129 | |
130 fq_pair_array->index=temp_index; | |
131 | |
132 return 0; | |
133 | |
134 } | |
135 | |
136 FASTQ_PAIR **fastq_pair_array_get_pointer(FASTQ_PAIR_ARRAY *fq_pair_array, long position) | |
137 { | |
138 /* get double pointer to individual fastq_pair member at specific position | |
139 * in the array, if successful, return the double pointer, otherwise | |
140 * return NULL | |
141 */ | |
142 FASTQ_PAIR_ARRAY_BLOCK *fq_array_block; | |
143 long block_num, num; | |
144 long i; | |
145 | |
146 if(fq_pair_array==NULL || position<=0 || position>fq_pair_array->fastq_pair_num) | |
147 return NULL; | |
148 | |
149 block_num=position/FASTQ_PAIR_ARRAY_BLOCK_SIZE; | |
150 num=position%FASTQ_PAIR_ARRAY_BLOCK_SIZE; | |
151 | |
152 if(num==0) | |
153 num=FASTQ_PAIR_ARRAY_BLOCK_SIZE; | |
154 else | |
155 block_num++; | |
156 | |
157 if(fq_pair_array->index==NULL) | |
158 { | |
159 fq_array_block=fq_pair_array->array; | |
160 for(i=1;i<block_num;i++) | |
161 fq_array_block=fq_array_block->next; | |
162 | |
163 return &fq_array_block->block[num-1]; | |
164 } | |
165 else | |
166 return &fq_pair_array->index[block_num-1]->block[num-1]; | |
167 | |
168 return NULL; | |
169 } | |
170 | |
171 int fastq_pair_array_merge(FASTQ_PAIR_ARRAY *fq_pair_array, | |
172 FASTQ_PAIR_ARRAY *temp_fq_pair_array, | |
173 long low, long middle, long high) | |
174 { | |
175 /* merge the two sorted part in array, low-middle and middle-high, into a | |
176 * single sorted order. If successful, return 0, otherwise return 1. | |
177 */ | |
178 long i, begin1, end1, begin2, end2; | |
179 FASTQ_PAIR **fq_pair_current1, **fq_pair_current2; | |
180 FASTQ_PAIR **temp_fq_pair_current; | |
181 | |
182 if(fq_pair_array==NULL || temp_fq_pair_array==NULL || | |
183 low > middle || middle > high || | |
184 fq_pair_array->fastq_pair_num!=temp_fq_pair_array->fastq_pair_num) | |
185 return 1; | |
186 | |
187 begin1=low; | |
188 end1=middle; | |
189 begin2=middle+1; | |
190 end2=high; | |
191 | |
192 /* merge processing */ | |
193 for(i = low; begin1 <= end1 && begin2 <= end2;i++) | |
194 { | |
195 fq_pair_current1=fastq_pair_array_get_pointer(fq_pair_array, begin1); | |
196 fq_pair_current2=fastq_pair_array_get_pointer(fq_pair_array, begin2); | |
197 | |
198 temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i); | |
199 | |
200 if(fastq_pair_compare_tight(*fq_pair_current1, *fq_pair_current2)<=0) | |
201 { | |
202 *temp_fq_pair_current=*fq_pair_current1; | |
203 begin1++; | |
204 } | |
205 else | |
206 { | |
207 *temp_fq_pair_current=*fq_pair_current2; | |
208 begin2++; | |
209 } | |
210 } | |
211 | |
212 /* moving the remaining data to temp_fq_pair_array */ | |
213 if(begin1<=end1) | |
214 { | |
215 for(;begin1<=end1;) | |
216 { | |
217 temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i++); | |
218 fq_pair_current1=fastq_pair_array_get_pointer(fq_pair_array, begin1++); | |
219 *temp_fq_pair_current=*fq_pair_current1; | |
220 } | |
221 } | |
222 if(begin2<=end2) | |
223 { | |
224 for(;begin2<=end2;) | |
225 { | |
226 temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i++); | |
227 fq_pair_current2=fastq_pair_array_get_pointer(fq_pair_array, begin2++); | |
228 *temp_fq_pair_current=*fq_pair_current2; | |
229 } | |
230 } | |
231 | |
232 /* moving the merged data to original position 'fq_pair_array' */ | |
233 for(i=low;i<=high;i++) | |
234 { | |
235 fq_pair_current1=fastq_pair_array_get_pointer(fq_pair_array, i); | |
236 temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i); | |
237 *fq_pair_current1=*temp_fq_pair_current; | |
238 } | |
239 | |
240 return 0; | |
241 } | |
242 | |
243 int fastq_pair_array_sort(FASTQ_PAIR_ARRAY *fq_pair_array, FASTQ_PAIR_ARRAY *temp_fq_pair_array, | |
244 long first, long last) | |
245 { | |
246 /* sort the FASTQ pair array. If successful, return 0, otherwise | |
247 * return 1 | |
248 */ | |
249 long mid; | |
250 | |
251 if(first<last) | |
252 { | |
253 mid=(first+last)/2; | |
254 fastq_pair_array_sort(fq_pair_array, temp_fq_pair_array, first, mid); | |
255 fastq_pair_array_sort(fq_pair_array, temp_fq_pair_array, mid+1, last); | |
256 fastq_pair_array_merge(fq_pair_array, temp_fq_pair_array, first, mid, last); | |
257 } | |
258 | |
259 return 0; | |
260 } | |
261 | |
262 int fastq_pair_array_printf(FASTQ_PAIR_ARRAY *fq_pair_array, FILE *fp_out1, FILE *fp_out2, | |
263 char *format, int serial_flag, int flag_uniq) | |
264 { | |
265 /* write the pair-end reads in the array in FASTA or FASTQ format into two | |
266 * output files(format='fa' or 'fq') or in FASTA format into a single output | |
267 * file(format="fa" and fp_out2==NULL) using the original description | |
268 * (serial_flag=0) or a new serial number(serial_flag=1). Output all sequences | |
269 * (flag_uniq==0), or unique ones(flag_uniq==1). If successful, return 0, | |
270 * otherwise return 1. | |
271 */ | |
272 long i, k; | |
273 FASTQ_PAIR **temp_fq_pair, **temp_fq_pair_old; | |
274 | |
275 if(flag_uniq==0) | |
276 { | |
277 for(i=1;i<=fq_pair_array->fastq_pair_num;i++) | |
278 { | |
279 temp_fq_pair=fastq_pair_array_get_pointer(fq_pair_array, i); | |
280 | |
281 if(serial_flag==0) | |
282 fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, format, -1); | |
283 else | |
284 fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, format, i); | |
285 } | |
286 } | |
287 else | |
288 { | |
289 temp_fq_pair_old=fastq_pair_array_get_pointer(fq_pair_array, 1); | |
290 | |
291 /* the fastq_pair_array contain only one read-pair, output it */ | |
292 if(fq_pair_array->fastq_pair_num==1) | |
293 { | |
294 if(serial_flag==0) | |
295 fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, | |
296 format, -1); | |
297 else | |
298 fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, | |
299 format, k++); | |
300 } | |
301 | |
302 /* compare and output */ | |
303 for(i=2, k=1;i<=fq_pair_array->fastq_pair_num;i++) | |
304 { | |
305 temp_fq_pair=fastq_pair_array_get_pointer(fq_pair_array, i); | |
306 if(fastq_pair_compare_loose(*temp_fq_pair_old, *temp_fq_pair)!=0) | |
307 { | |
308 if(serial_flag==0) | |
309 fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, | |
310 format, -1); | |
311 else | |
312 fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, | |
313 format, k++); | |
314 | |
315 temp_fq_pair_old=temp_fq_pair; | |
316 | |
317 if(i==fq_pair_array->fastq_pair_num) | |
318 { | |
319 if(serial_flag==0) | |
320 fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, | |
321 format, -1); | |
322 else | |
323 fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, | |
324 format, k++); | |
325 } | |
326 } | |
327 else | |
328 { | |
329 if(fastq_pair_get_left_length(*temp_fq_pair_old) <= fastq_pair_get_left_length(*temp_fq_pair) && | |
330 fastq_pair_get_right_length(*temp_fq_pair_old) <= fastq_pair_get_right_length(*temp_fq_pair)) | |
331 { | |
332 temp_fq_pair_old=temp_fq_pair; | |
333 | |
334 if(i==fq_pair_array->fastq_pair_num) | |
335 { | |
336 if(serial_flag==0) | |
337 fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, | |
338 format, -1); | |
339 else | |
340 fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, | |
341 format, k++); | |
342 } | |
343 } | |
344 else | |
345 { | |
346 if(serial_flag==0) | |
347 fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, | |
348 format, -1); | |
349 else | |
350 fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, | |
351 format, k++); | |
352 | |
353 temp_fq_pair_old=temp_fq_pair; | |
354 } | |
355 } | |
356 } | |
357 } | |
358 return 0; | |
359 } | |
360 | |
361 | |
362 | |
363 | |
364 | |
365 | |
366 | |
367 | |
368 | |
369 | |
370 | |
371 | |
372 | |
373 | |
374 | |
375 | |
376 | |
377 | |
378 |