Mercurial > repos > portiahollyoak > fastuniq
diff source/fastq_pair_array.c @ 0:816cb55b5a2d draft default tip
planemo upload for repository https://github.com/portiahollyoak/Tools commit c4769fd68ad9583d4b9dbdf212e4ecb5968cef1c-dirty
author | portiahollyoak |
---|---|
date | Thu, 02 Jun 2016 11:34:51 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/source/fastq_pair_array.c Thu Jun 02 11:34:51 2016 -0400 @@ -0,0 +1,378 @@ +/**************************************************************************** + * The 'FASTQ_PAIR_ARRAY' structure group was used to store a array of + * paired FASTQ reads, including basic operation function as well. + * + * This file was written by Haibin Xu, December 2011. + ****************************************************************************/ + +#include "fastq_pair_array.h" + +FASTQ_PAIR_ARRAY *fastq_pair_array_create() +{ + /* create a FASTQ pair array. If successful, return the point to it, + * otherwise, return NULL. + */ + FASTQ_PAIR_ARRAY *fq_pair_array; + + if((fq_pair_array=(FASTQ_PAIR_ARRAY *)malloc(sizeof(FASTQ_PAIR_ARRAY)))==NULL) + return NULL; + + if((fq_pair_array->array= + (FASTQ_PAIR_ARRAY_BLOCK *)malloc(sizeof(FASTQ_PAIR_ARRAY_BLOCK)))==NULL) + { + free(fq_pair_array); + return NULL; + } + + fq_pair_array->last=fq_pair_array->array; + fq_pair_array->block_num=1; + fq_pair_array->fastq_pair_num=0; + + fq_pair_array->array->previous=NULL; + fq_pair_array->array->next=NULL; + fq_pair_array->array->num=0; + + fq_pair_array->index=NULL; + + return fq_pair_array; +} + +int fastq_pair_array_remove(FASTQ_PAIR_ARRAY *fq_pair_array) +{ + /* free the FASTQ pair array. If successful, return 0, otherwise + * return 1. + */ + long i; + FASTQ_PAIR_ARRAY_BLOCK *fq_pair_array_block; + + if(fq_pair_array==NULL) + return 1; + + fq_pair_array_block=fq_pair_array->last; + for(;fq_pair_array_block!=NULL;) + { + for(i=0;i<fq_pair_array_block->num;i++) + fastq_pair_remove(fq_pair_array_block->block[i]); + + fq_pair_array_block=fq_pair_array_block->previous; + } + + if(fq_pair_array->index!=NULL) + free(fq_pair_array->index); + + return 0; +} + +int fastq_pair_array_append(FASTQ_PAIR *fq_pair, FASTQ_PAIR_ARRAY *fq_pair_array) +{ + /* append a new FASTQ pair to the array. if successful, return 0, otherwise + * return 1. + */ + FASTQ_PAIR_ARRAY_BLOCK *block_temp; + + if(fq_pair_array==NULL || fq_pair==NULL) + return 1; + + if(fq_pair_array->last->num<FASTQ_PAIR_ARRAY_BLOCK_SIZE) + { + /* append to the last array_block */ + fq_pair_array->last->block[fq_pair_array->last->num++]=fq_pair; + fq_pair_array->fastq_pair_num++; + } + else + { + /* add a new array_block, amd append to it */ + if((block_temp= + (FASTQ_PAIR_ARRAY_BLOCK *)malloc(sizeof(FASTQ_PAIR_ARRAY_BLOCK)))==NULL) + return 0; + + fq_pair_array->last->next=block_temp; + block_temp->previous=fq_pair_array->last; + fq_pair_array->last=block_temp; + fq_pair_array->block_num++; + + block_temp->num=0; + block_temp->block[block_temp->num++]=fq_pair; + fq_pair_array->fastq_pair_num++; + } + + return 0; +} + +int fastq_pair_array_generate_index(FASTQ_PAIR_ARRAY *fq_pair_array) +{ + /* generate the index for given FASTQ_PAIR, if successful, return 0, otherwise + * return 1. + */ + FASTQ_PAIR_ARRAY_BLOCK **temp_index; + FASTQ_PAIR_ARRAY_BLOCK *fq_array_block; + long i; + + if(fq_pair_array==NULL) + return 1; + + if(fq_pair_array->index!=NULL) + { + free(fq_pair_array->index); + fq_pair_array->index=NULL; + } + + if((temp_index=(FASTQ_PAIR_ARRAY_BLOCK **)malloc(sizeof(FASTQ_PAIR_ARRAY_BLOCK *)*(fq_pair_array->block_num)))==NULL) + return 1; + + fq_array_block=fq_pair_array->array; + for(i=0;i<fq_pair_array->block_num;i++) + { + temp_index[i]=fq_array_block; + fq_array_block=fq_array_block->next; + } + + fq_pair_array->index=temp_index; + + return 0; + +} + +FASTQ_PAIR **fastq_pair_array_get_pointer(FASTQ_PAIR_ARRAY *fq_pair_array, long position) +{ + /* get double pointer to individual fastq_pair member at specific position + * in the array, if successful, return the double pointer, otherwise + * return NULL + */ + FASTQ_PAIR_ARRAY_BLOCK *fq_array_block; + long block_num, num; + long i; + + if(fq_pair_array==NULL || position<=0 || position>fq_pair_array->fastq_pair_num) + return NULL; + + block_num=position/FASTQ_PAIR_ARRAY_BLOCK_SIZE; + num=position%FASTQ_PAIR_ARRAY_BLOCK_SIZE; + + if(num==0) + num=FASTQ_PAIR_ARRAY_BLOCK_SIZE; + else + block_num++; + + if(fq_pair_array->index==NULL) + { + fq_array_block=fq_pair_array->array; + for(i=1;i<block_num;i++) + fq_array_block=fq_array_block->next; + + return &fq_array_block->block[num-1]; + } + else + return &fq_pair_array->index[block_num-1]->block[num-1]; + + return NULL; +} + +int fastq_pair_array_merge(FASTQ_PAIR_ARRAY *fq_pair_array, + FASTQ_PAIR_ARRAY *temp_fq_pair_array, + long low, long middle, long high) +{ + /* merge the two sorted part in array, low-middle and middle-high, into a + * single sorted order. If successful, return 0, otherwise return 1. + */ + long i, begin1, end1, begin2, end2; + FASTQ_PAIR **fq_pair_current1, **fq_pair_current2; + FASTQ_PAIR **temp_fq_pair_current; + + if(fq_pair_array==NULL || temp_fq_pair_array==NULL || + low > middle || middle > high || + fq_pair_array->fastq_pair_num!=temp_fq_pair_array->fastq_pair_num) + return 1; + + begin1=low; + end1=middle; + begin2=middle+1; + end2=high; + + /* merge processing */ + for(i = low; begin1 <= end1 && begin2 <= end2;i++) + { + fq_pair_current1=fastq_pair_array_get_pointer(fq_pair_array, begin1); + fq_pair_current2=fastq_pair_array_get_pointer(fq_pair_array, begin2); + + temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i); + + if(fastq_pair_compare_tight(*fq_pair_current1, *fq_pair_current2)<=0) + { + *temp_fq_pair_current=*fq_pair_current1; + begin1++; + } + else + { + *temp_fq_pair_current=*fq_pair_current2; + begin2++; + } + } + + /* moving the remaining data to temp_fq_pair_array */ + if(begin1<=end1) + { + for(;begin1<=end1;) + { + temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i++); + fq_pair_current1=fastq_pair_array_get_pointer(fq_pair_array, begin1++); + *temp_fq_pair_current=*fq_pair_current1; + } + } + if(begin2<=end2) + { + for(;begin2<=end2;) + { + temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i++); + fq_pair_current2=fastq_pair_array_get_pointer(fq_pair_array, begin2++); + *temp_fq_pair_current=*fq_pair_current2; + } + } + + /* moving the merged data to original position 'fq_pair_array' */ + for(i=low;i<=high;i++) + { + fq_pair_current1=fastq_pair_array_get_pointer(fq_pair_array, i); + temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i); + *fq_pair_current1=*temp_fq_pair_current; + } + + return 0; +} + +int fastq_pair_array_sort(FASTQ_PAIR_ARRAY *fq_pair_array, FASTQ_PAIR_ARRAY *temp_fq_pair_array, + long first, long last) +{ + /* sort the FASTQ pair array. If successful, return 0, otherwise + * return 1 + */ + long mid; + + if(first<last) + { + mid=(first+last)/2; + fastq_pair_array_sort(fq_pair_array, temp_fq_pair_array, first, mid); + fastq_pair_array_sort(fq_pair_array, temp_fq_pair_array, mid+1, last); + fastq_pair_array_merge(fq_pair_array, temp_fq_pair_array, first, mid, last); + } + + return 0; +} + +int fastq_pair_array_printf(FASTQ_PAIR_ARRAY *fq_pair_array, FILE *fp_out1, FILE *fp_out2, + char *format, int serial_flag, int flag_uniq) +{ + /* write the pair-end reads in the array in FASTA or FASTQ format into two + * output files(format='fa' or 'fq') or in FASTA format into a single output + * file(format="fa" and fp_out2==NULL) using the original description + * (serial_flag=0) or a new serial number(serial_flag=1). Output all sequences + * (flag_uniq==0), or unique ones(flag_uniq==1). If successful, return 0, + * otherwise return 1. + */ + long i, k; + FASTQ_PAIR **temp_fq_pair, **temp_fq_pair_old; + + if(flag_uniq==0) + { + for(i=1;i<=fq_pair_array->fastq_pair_num;i++) + { + temp_fq_pair=fastq_pair_array_get_pointer(fq_pair_array, i); + + if(serial_flag==0) + fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, format, -1); + else + fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, format, i); + } + } + else + { + temp_fq_pair_old=fastq_pair_array_get_pointer(fq_pair_array, 1); + + /* the fastq_pair_array contain only one read-pair, output it */ + if(fq_pair_array->fastq_pair_num==1) + { + if(serial_flag==0) + fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, + format, -1); + else + fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, + format, k++); + } + + /* compare and output */ + for(i=2, k=1;i<=fq_pair_array->fastq_pair_num;i++) + { + temp_fq_pair=fastq_pair_array_get_pointer(fq_pair_array, i); + if(fastq_pair_compare_loose(*temp_fq_pair_old, *temp_fq_pair)!=0) + { + if(serial_flag==0) + fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, + format, -1); + else + fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, + format, k++); + + temp_fq_pair_old=temp_fq_pair; + + if(i==fq_pair_array->fastq_pair_num) + { + if(serial_flag==0) + fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, + format, -1); + else + fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, + format, k++); + } + } + else + { + if(fastq_pair_get_left_length(*temp_fq_pair_old) <= fastq_pair_get_left_length(*temp_fq_pair) && + fastq_pair_get_right_length(*temp_fq_pair_old) <= fastq_pair_get_right_length(*temp_fq_pair)) + { + temp_fq_pair_old=temp_fq_pair; + + if(i==fq_pair_array->fastq_pair_num) + { + if(serial_flag==0) + fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, + format, -1); + else + fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, + format, k++); + } + } + else + { + if(serial_flag==0) + fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, + format, -1); + else + fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, + format, k++); + + temp_fq_pair_old=temp_fq_pair; + } + } + } + } + return 0; +} + + + + + + + + + + + + + + + + + + +