diff source/fastq_pair_array.c @ 0:816cb55b5a2d draft default tip

planemo upload for repository https://github.com/portiahollyoak/Tools commit c4769fd68ad9583d4b9dbdf212e4ecb5968cef1c-dirty
author portiahollyoak
date Thu, 02 Jun 2016 11:34:51 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/fastq_pair_array.c	Thu Jun 02 11:34:51 2016 -0400
@@ -0,0 +1,378 @@
+/****************************************************************************
+ * The 'FASTQ_PAIR_ARRAY' structure group was used to store a array of 
+ * paired FASTQ reads, including basic operation function as well.
+ *
+ * This file was written by Haibin Xu, December 2011.
+ ****************************************************************************/
+
+#include "fastq_pair_array.h"
+
+FASTQ_PAIR_ARRAY *fastq_pair_array_create()
+{
+    /* create a FASTQ pair array. If successful, return the point to it, 
+     * otherwise, return NULL.
+     */
+    FASTQ_PAIR_ARRAY *fq_pair_array;
+    
+    if((fq_pair_array=(FASTQ_PAIR_ARRAY *)malloc(sizeof(FASTQ_PAIR_ARRAY)))==NULL)
+        return NULL;
+    
+    if((fq_pair_array->array=
+		(FASTQ_PAIR_ARRAY_BLOCK *)malloc(sizeof(FASTQ_PAIR_ARRAY_BLOCK)))==NULL)
+	{
+		free(fq_pair_array);
+		return NULL;
+	}
+    
+	fq_pair_array->last=fq_pair_array->array;
+	fq_pair_array->block_num=1;
+	fq_pair_array->fastq_pair_num=0;
+	
+	fq_pair_array->array->previous=NULL;
+	fq_pair_array->array->next=NULL;
+	fq_pair_array->array->num=0;
+	
+	fq_pair_array->index=NULL;
+	
+    return fq_pair_array;
+}
+
+int fastq_pair_array_remove(FASTQ_PAIR_ARRAY *fq_pair_array)
+{
+    /* free the FASTQ pair array. If successful, return 0, otherwise 
+     * return 1.
+     */
+    long i;
+	FASTQ_PAIR_ARRAY_BLOCK *fq_pair_array_block;
+	
+	if(fq_pair_array==NULL)
+		return 1;
+	
+	fq_pair_array_block=fq_pair_array->last;
+	for(;fq_pair_array_block!=NULL;)
+	{
+		for(i=0;i<fq_pair_array_block->num;i++)
+			fastq_pair_remove(fq_pair_array_block->block[i]);
+		
+		fq_pair_array_block=fq_pair_array_block->previous;
+	}
+	
+	if(fq_pair_array->index!=NULL)
+		free(fq_pair_array->index);
+	
+    return 0;
+}
+
+int fastq_pair_array_append(FASTQ_PAIR *fq_pair, FASTQ_PAIR_ARRAY *fq_pair_array)
+{
+    /* append a new FASTQ pair to the array. if successful, return 0, otherwise
+     * return 1.
+	 */
+	FASTQ_PAIR_ARRAY_BLOCK *block_temp;
+	
+	if(fq_pair_array==NULL || fq_pair==NULL)
+		return 1;
+	
+	if(fq_pair_array->last->num<FASTQ_PAIR_ARRAY_BLOCK_SIZE)
+	{
+		/* append to the last array_block */
+		fq_pair_array->last->block[fq_pair_array->last->num++]=fq_pair;
+		fq_pair_array->fastq_pair_num++;
+	}
+	else
+	{
+		/* add a new array_block, amd append to it */
+		if((block_temp=
+			(FASTQ_PAIR_ARRAY_BLOCK *)malloc(sizeof(FASTQ_PAIR_ARRAY_BLOCK)))==NULL)
+			return 0;
+		
+		fq_pair_array->last->next=block_temp;
+		block_temp->previous=fq_pair_array->last;
+		fq_pair_array->last=block_temp;
+		fq_pair_array->block_num++;
+		
+		block_temp->num=0;
+		block_temp->block[block_temp->num++]=fq_pair;
+		fq_pair_array->fastq_pair_num++;
+	}
+	
+    return 0;
+}
+
+int fastq_pair_array_generate_index(FASTQ_PAIR_ARRAY *fq_pair_array)
+{
+	/* generate the index for given FASTQ_PAIR, if successful, return 0, otherwise
+	 * return 1.
+	 */
+	FASTQ_PAIR_ARRAY_BLOCK **temp_index;
+	FASTQ_PAIR_ARRAY_BLOCK *fq_array_block;
+	long i;
+	
+	if(fq_pair_array==NULL)
+		return 1;
+	
+	if(fq_pair_array->index!=NULL)
+	{
+		free(fq_pair_array->index);
+		fq_pair_array->index=NULL;
+	}
+	
+	if((temp_index=(FASTQ_PAIR_ARRAY_BLOCK **)malloc(sizeof(FASTQ_PAIR_ARRAY_BLOCK *)*(fq_pair_array->block_num)))==NULL)
+        return 1;
+	
+	fq_array_block=fq_pair_array->array;
+	for(i=0;i<fq_pair_array->block_num;i++)
+	{
+		temp_index[i]=fq_array_block;
+        fq_array_block=fq_array_block->next;
+	}
+	
+	fq_pair_array->index=temp_index;
+	
+	return 0;
+	
+}
+
+FASTQ_PAIR **fastq_pair_array_get_pointer(FASTQ_PAIR_ARRAY *fq_pair_array, long position)
+{
+    /* get double pointer to individual fastq_pair member at specific position
+     * in the array, if successful, return the double pointer, otherwise
+     * return NULL
+     */
+    FASTQ_PAIR_ARRAY_BLOCK *fq_array_block;
+    long block_num, num;
+    long i;
+    
+    if(fq_pair_array==NULL || position<=0 || position>fq_pair_array->fastq_pair_num)
+        return NULL;
+    
+    block_num=position/FASTQ_PAIR_ARRAY_BLOCK_SIZE;
+    num=position%FASTQ_PAIR_ARRAY_BLOCK_SIZE;
+	
+    if(num==0)
+        num=FASTQ_PAIR_ARRAY_BLOCK_SIZE;
+    else
+        block_num++;
+    
+	if(fq_pair_array->index==NULL)
+	{
+		fq_array_block=fq_pair_array->array;
+		for(i=1;i<block_num;i++)
+			fq_array_block=fq_array_block->next;
+		
+		return &fq_array_block->block[num-1];
+	}
+    else
+		return &fq_pair_array->index[block_num-1]->block[num-1];
+	
+	return NULL;
+}
+
+int fastq_pair_array_merge(FASTQ_PAIR_ARRAY *fq_pair_array,
+						   FASTQ_PAIR_ARRAY *temp_fq_pair_array, 
+						   long low, long middle, long high)
+{
+    /* merge the two sorted part in array, low-middle and middle-high, into a 
+     * single sorted order. If successful, return 0, otherwise return 1.
+	 */
+    long i, begin1, end1, begin2, end2;
+    FASTQ_PAIR **fq_pair_current1, **fq_pair_current2;
+    FASTQ_PAIR **temp_fq_pair_current;
+	
+    if(fq_pair_array==NULL || temp_fq_pair_array==NULL || 
+	   low > middle || middle > high || 
+	   fq_pair_array->fastq_pair_num!=temp_fq_pair_array->fastq_pair_num)
+		return 1;
+	
+	begin1=low;
+    end1=middle;
+    begin2=middle+1;
+    end2=high;
+	
+	/* merge processing */
+    for(i = low; begin1 <= end1 && begin2 <= end2;i++)
+    {
+        fq_pair_current1=fastq_pair_array_get_pointer(fq_pair_array, begin1);
+        fq_pair_current2=fastq_pair_array_get_pointer(fq_pair_array, begin2);
+        
+        temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i);
+
+        if(fastq_pair_compare_tight(*fq_pair_current1, *fq_pair_current2)<=0)
+        {
+            *temp_fq_pair_current=*fq_pair_current1;
+            begin1++;
+        }
+        else
+        {
+            *temp_fq_pair_current=*fq_pair_current2;
+            begin2++;
+        }
+    }
+    
+	/* moving the remaining data to temp_fq_pair_array */
+    if(begin1<=end1)
+    {
+        for(;begin1<=end1;)
+        {
+            temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i++);
+            fq_pair_current1=fastq_pair_array_get_pointer(fq_pair_array, begin1++);
+            *temp_fq_pair_current=*fq_pair_current1;
+        }
+    }
+    if(begin2<=end2)
+    {
+		for(;begin2<=end2;)
+		{
+			temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i++);
+			fq_pair_current2=fastq_pair_array_get_pointer(fq_pair_array, begin2++);
+			*temp_fq_pair_current=*fq_pair_current2;
+		}
+    }
+    
+	/* moving the merged data to original position 'fq_pair_array' */
+    for(i=low;i<=high;i++)
+    {
+        fq_pair_current1=fastq_pair_array_get_pointer(fq_pair_array, i);
+        temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i);
+        *fq_pair_current1=*temp_fq_pair_current;
+    }
+	
+	return 0;
+}
+
+int fastq_pair_array_sort(FASTQ_PAIR_ARRAY *fq_pair_array, FASTQ_PAIR_ARRAY *temp_fq_pair_array,
+								long first, long last)
+{
+    /* sort the FASTQ pair array. If successful, return 0, otherwise
+     * return 1
+     */
+	long mid;
+    
+    if(first<last)
+    {
+        mid=(first+last)/2;
+        fastq_pair_array_sort(fq_pair_array, temp_fq_pair_array, first, mid);
+        fastq_pair_array_sort(fq_pair_array, temp_fq_pair_array, mid+1, last);
+        fastq_pair_array_merge(fq_pair_array, temp_fq_pair_array, first, mid, last);
+    }
+    
+    return 0;
+}
+
+int fastq_pair_array_printf(FASTQ_PAIR_ARRAY *fq_pair_array, FILE *fp_out1, FILE *fp_out2,
+                            char *format, int serial_flag, int flag_uniq)
+{
+    /* write the pair-end reads in the array in FASTA or FASTQ format into two 
+     * output files(format='fa' or 'fq')  or in FASTA format into a single output
+     * file(format="fa" and fp_out2==NULL) using the original description 
+     * (serial_flag=0) or a new serial number(serial_flag=1). Output all sequences
+	 * (flag_uniq==0), or unique ones(flag_uniq==1). If successful, return 0,
+	 * otherwise return 1.
+     */
+    long i, k;
+    FASTQ_PAIR **temp_fq_pair, **temp_fq_pair_old;
+    
+    if(flag_uniq==0)
+    {
+        for(i=1;i<=fq_pair_array->fastq_pair_num;i++)
+        {
+            temp_fq_pair=fastq_pair_array_get_pointer(fq_pair_array, i);
+            
+            if(serial_flag==0)
+                fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, format, -1);
+            else
+                fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, format, i);
+        }
+    }
+    else
+    {
+		temp_fq_pair_old=fastq_pair_array_get_pointer(fq_pair_array, 1);
+
+		/* the fastq_pair_array contain only one read-pair, output it */
+		if(fq_pair_array->fastq_pair_num==1)
+		{
+			if(serial_flag==0)
+				fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2,
+								  format, -1);
+			else
+				fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2,
+								  format, k++);
+		}
+		
+		/* compare and output */
+        for(i=2, k=1;i<=fq_pair_array->fastq_pair_num;i++)
+        {
+			temp_fq_pair=fastq_pair_array_get_pointer(fq_pair_array, i);
+            if(fastq_pair_compare_loose(*temp_fq_pair_old, *temp_fq_pair)!=0)
+            {
+                if(serial_flag==0)
+                    fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2,
+                                      format, -1);
+                else
+                    fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2,
+                                      format, k++);
+				
+				temp_fq_pair_old=temp_fq_pair;
+				
+				if(i==fq_pair_array->fastq_pair_num)
+				{
+					if(serial_flag==0)
+						fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2,
+										  format, -1);
+					else
+						fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2,
+										  format, k++);
+				}
+            }
+            else
+            {
+                if(fastq_pair_get_left_length(*temp_fq_pair_old) <= fastq_pair_get_left_length(*temp_fq_pair) &&
+                   fastq_pair_get_right_length(*temp_fq_pair_old) <= fastq_pair_get_right_length(*temp_fq_pair))
+				{
+                    temp_fq_pair_old=temp_fq_pair;
+
+					if(i==fq_pair_array->fastq_pair_num)
+					{
+						if(serial_flag==0)
+							fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2,
+											  format, -1);
+						else
+							fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2,
+											  format, k++);
+					}
+				}
+                else
+                {
+                    if(serial_flag==0)
+                        fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2,
+                                          format, -1);
+                    else
+                        fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2,
+                                          format, k++);
+                    
+                    temp_fq_pair_old=temp_fq_pair;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+