Mercurial > repos > portiahollyoak > fastuniq
view source/fastq_pair_array.c @ 0:816cb55b5a2d draft default tip
planemo upload for repository https://github.com/portiahollyoak/Tools commit c4769fd68ad9583d4b9dbdf212e4ecb5968cef1c-dirty
author | portiahollyoak |
---|---|
date | Thu, 02 Jun 2016 11:34:51 -0400 |
parents | |
children |
line wrap: on
line source
/**************************************************************************** * The 'FASTQ_PAIR_ARRAY' structure group was used to store a array of * paired FASTQ reads, including basic operation function as well. * * This file was written by Haibin Xu, December 2011. ****************************************************************************/ #include "fastq_pair_array.h" FASTQ_PAIR_ARRAY *fastq_pair_array_create() { /* create a FASTQ pair array. If successful, return the point to it, * otherwise, return NULL. */ FASTQ_PAIR_ARRAY *fq_pair_array; if((fq_pair_array=(FASTQ_PAIR_ARRAY *)malloc(sizeof(FASTQ_PAIR_ARRAY)))==NULL) return NULL; if((fq_pair_array->array= (FASTQ_PAIR_ARRAY_BLOCK *)malloc(sizeof(FASTQ_PAIR_ARRAY_BLOCK)))==NULL) { free(fq_pair_array); return NULL; } fq_pair_array->last=fq_pair_array->array; fq_pair_array->block_num=1; fq_pair_array->fastq_pair_num=0; fq_pair_array->array->previous=NULL; fq_pair_array->array->next=NULL; fq_pair_array->array->num=0; fq_pair_array->index=NULL; return fq_pair_array; } int fastq_pair_array_remove(FASTQ_PAIR_ARRAY *fq_pair_array) { /* free the FASTQ pair array. If successful, return 0, otherwise * return 1. */ long i; FASTQ_PAIR_ARRAY_BLOCK *fq_pair_array_block; if(fq_pair_array==NULL) return 1; fq_pair_array_block=fq_pair_array->last; for(;fq_pair_array_block!=NULL;) { for(i=0;i<fq_pair_array_block->num;i++) fastq_pair_remove(fq_pair_array_block->block[i]); fq_pair_array_block=fq_pair_array_block->previous; } if(fq_pair_array->index!=NULL) free(fq_pair_array->index); return 0; } int fastq_pair_array_append(FASTQ_PAIR *fq_pair, FASTQ_PAIR_ARRAY *fq_pair_array) { /* append a new FASTQ pair to the array. if successful, return 0, otherwise * return 1. */ FASTQ_PAIR_ARRAY_BLOCK *block_temp; if(fq_pair_array==NULL || fq_pair==NULL) return 1; if(fq_pair_array->last->num<FASTQ_PAIR_ARRAY_BLOCK_SIZE) { /* append to the last array_block */ fq_pair_array->last->block[fq_pair_array->last->num++]=fq_pair; fq_pair_array->fastq_pair_num++; } else { /* add a new array_block, amd append to it */ if((block_temp= (FASTQ_PAIR_ARRAY_BLOCK *)malloc(sizeof(FASTQ_PAIR_ARRAY_BLOCK)))==NULL) return 0; fq_pair_array->last->next=block_temp; block_temp->previous=fq_pair_array->last; fq_pair_array->last=block_temp; fq_pair_array->block_num++; block_temp->num=0; block_temp->block[block_temp->num++]=fq_pair; fq_pair_array->fastq_pair_num++; } return 0; } int fastq_pair_array_generate_index(FASTQ_PAIR_ARRAY *fq_pair_array) { /* generate the index for given FASTQ_PAIR, if successful, return 0, otherwise * return 1. */ FASTQ_PAIR_ARRAY_BLOCK **temp_index; FASTQ_PAIR_ARRAY_BLOCK *fq_array_block; long i; if(fq_pair_array==NULL) return 1; if(fq_pair_array->index!=NULL) { free(fq_pair_array->index); fq_pair_array->index=NULL; } if((temp_index=(FASTQ_PAIR_ARRAY_BLOCK **)malloc(sizeof(FASTQ_PAIR_ARRAY_BLOCK *)*(fq_pair_array->block_num)))==NULL) return 1; fq_array_block=fq_pair_array->array; for(i=0;i<fq_pair_array->block_num;i++) { temp_index[i]=fq_array_block; fq_array_block=fq_array_block->next; } fq_pair_array->index=temp_index; return 0; } FASTQ_PAIR **fastq_pair_array_get_pointer(FASTQ_PAIR_ARRAY *fq_pair_array, long position) { /* get double pointer to individual fastq_pair member at specific position * in the array, if successful, return the double pointer, otherwise * return NULL */ FASTQ_PAIR_ARRAY_BLOCK *fq_array_block; long block_num, num; long i; if(fq_pair_array==NULL || position<=0 || position>fq_pair_array->fastq_pair_num) return NULL; block_num=position/FASTQ_PAIR_ARRAY_BLOCK_SIZE; num=position%FASTQ_PAIR_ARRAY_BLOCK_SIZE; if(num==0) num=FASTQ_PAIR_ARRAY_BLOCK_SIZE; else block_num++; if(fq_pair_array->index==NULL) { fq_array_block=fq_pair_array->array; for(i=1;i<block_num;i++) fq_array_block=fq_array_block->next; return &fq_array_block->block[num-1]; } else return &fq_pair_array->index[block_num-1]->block[num-1]; return NULL; } int fastq_pair_array_merge(FASTQ_PAIR_ARRAY *fq_pair_array, FASTQ_PAIR_ARRAY *temp_fq_pair_array, long low, long middle, long high) { /* merge the two sorted part in array, low-middle and middle-high, into a * single sorted order. If successful, return 0, otherwise return 1. */ long i, begin1, end1, begin2, end2; FASTQ_PAIR **fq_pair_current1, **fq_pair_current2; FASTQ_PAIR **temp_fq_pair_current; if(fq_pair_array==NULL || temp_fq_pair_array==NULL || low > middle || middle > high || fq_pair_array->fastq_pair_num!=temp_fq_pair_array->fastq_pair_num) return 1; begin1=low; end1=middle; begin2=middle+1; end2=high; /* merge processing */ for(i = low; begin1 <= end1 && begin2 <= end2;i++) { fq_pair_current1=fastq_pair_array_get_pointer(fq_pair_array, begin1); fq_pair_current2=fastq_pair_array_get_pointer(fq_pair_array, begin2); temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i); if(fastq_pair_compare_tight(*fq_pair_current1, *fq_pair_current2)<=0) { *temp_fq_pair_current=*fq_pair_current1; begin1++; } else { *temp_fq_pair_current=*fq_pair_current2; begin2++; } } /* moving the remaining data to temp_fq_pair_array */ if(begin1<=end1) { for(;begin1<=end1;) { temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i++); fq_pair_current1=fastq_pair_array_get_pointer(fq_pair_array, begin1++); *temp_fq_pair_current=*fq_pair_current1; } } if(begin2<=end2) { for(;begin2<=end2;) { temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i++); fq_pair_current2=fastq_pair_array_get_pointer(fq_pair_array, begin2++); *temp_fq_pair_current=*fq_pair_current2; } } /* moving the merged data to original position 'fq_pair_array' */ for(i=low;i<=high;i++) { fq_pair_current1=fastq_pair_array_get_pointer(fq_pair_array, i); temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i); *fq_pair_current1=*temp_fq_pair_current; } return 0; } int fastq_pair_array_sort(FASTQ_PAIR_ARRAY *fq_pair_array, FASTQ_PAIR_ARRAY *temp_fq_pair_array, long first, long last) { /* sort the FASTQ pair array. If successful, return 0, otherwise * return 1 */ long mid; if(first<last) { mid=(first+last)/2; fastq_pair_array_sort(fq_pair_array, temp_fq_pair_array, first, mid); fastq_pair_array_sort(fq_pair_array, temp_fq_pair_array, mid+1, last); fastq_pair_array_merge(fq_pair_array, temp_fq_pair_array, first, mid, last); } return 0; } int fastq_pair_array_printf(FASTQ_PAIR_ARRAY *fq_pair_array, FILE *fp_out1, FILE *fp_out2, char *format, int serial_flag, int flag_uniq) { /* write the pair-end reads in the array in FASTA or FASTQ format into two * output files(format='fa' or 'fq') or in FASTA format into a single output * file(format="fa" and fp_out2==NULL) using the original description * (serial_flag=0) or a new serial number(serial_flag=1). Output all sequences * (flag_uniq==0), or unique ones(flag_uniq==1). If successful, return 0, * otherwise return 1. */ long i, k; FASTQ_PAIR **temp_fq_pair, **temp_fq_pair_old; if(flag_uniq==0) { for(i=1;i<=fq_pair_array->fastq_pair_num;i++) { temp_fq_pair=fastq_pair_array_get_pointer(fq_pair_array, i); if(serial_flag==0) fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, format, -1); else fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, format, i); } } else { temp_fq_pair_old=fastq_pair_array_get_pointer(fq_pair_array, 1); /* the fastq_pair_array contain only one read-pair, output it */ if(fq_pair_array->fastq_pair_num==1) { if(serial_flag==0) fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, format, -1); else fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, format, k++); } /* compare and output */ for(i=2, k=1;i<=fq_pair_array->fastq_pair_num;i++) { temp_fq_pair=fastq_pair_array_get_pointer(fq_pair_array, i); if(fastq_pair_compare_loose(*temp_fq_pair_old, *temp_fq_pair)!=0) { if(serial_flag==0) fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, format, -1); else fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, format, k++); temp_fq_pair_old=temp_fq_pair; if(i==fq_pair_array->fastq_pair_num) { if(serial_flag==0) fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, format, -1); else fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, format, k++); } } else { if(fastq_pair_get_left_length(*temp_fq_pair_old) <= fastq_pair_get_left_length(*temp_fq_pair) && fastq_pair_get_right_length(*temp_fq_pair_old) <= fastq_pair_get_right_length(*temp_fq_pair)) { temp_fq_pair_old=temp_fq_pair; if(i==fq_pair_array->fastq_pair_num) { if(serial_flag==0) fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, format, -1); else fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, format, k++); } } else { if(serial_flag==0) fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, format, -1); else fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, format, k++); temp_fq_pair_old=temp_fq_pair; } } } } return 0; }