Mercurial > repos > portiahollyoak > fastuniq
diff source/fastq.c @ 0:816cb55b5a2d draft default tip
planemo upload for repository https://github.com/portiahollyoak/Tools commit c4769fd68ad9583d4b9dbdf212e4ecb5968cef1c-dirty
author | portiahollyoak |
---|---|
date | Thu, 02 Jun 2016 11:34:51 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/source/fastq.c Thu Jun 02 11:34:51 2016 -0400 @@ -0,0 +1,273 @@ +/**************************************************************************** + * The 'FASTQ_ALL' structure group was used to store nucleotide sequence in + * fastq format, including basic operation function as well. + * + * This file was written by Haibin Xu, December 2011. + ****************************************************************************/ + +#include "fastq.h" + +FASTQ_ALL *fastq_create() +{ + /* create a FASTQ_ALL sequence. If successful, return the point to it, + * otherwise, return NULL/. + */ + FASTQ_ALL *fq; + + if((fq=(FASTQ_ALL *)malloc(sizeof(FASTQ_ALL)))==NULL) + return NULL; + + fq->description_1=NULL; + fq->sequence=NULL; + fq->description_2=NULL; + fq->quality=NULL; + + return fq; +} + +int fastq_remove(FASTQ_ALL *fq) +{ + /* free the FASTQ sequence. If successful, return 0, otherwise return 1. + */ + if(fq==NULL) + return 1; + + if(fq->description_1!=NULL) + free(fq->description_1); + if(fq->sequence!=NULL) + free(fq->sequence); + if(fq->description_2!=NULL) + free(fq->description_2); + if(fq->quality!=NULL) + free(fq->quality); + + free(fq); + + return 0; +} + +int fastq_clear(FASTQ_ALL *fq) +{ + /* clear the FASTQ sequence. If successful, return 0, otherwise return 1. + */ + if(fq==NULL) + return 1; + + if(fq->description_1!=NULL) + { + free(fq->description_1); + fq->description_1=NULL; + } + if(fq->sequence!=NULL) + { + free(fq->sequence); + fq->sequence=NULL; + } + if(fq->description_2!=NULL) + { + free(fq->description_2); + fq->description_2=NULL; + } + if(fq->quality!=NULL) + { + free(fq->quality); + fq->quality=NULL; + } + + return 0; +} + +long fastq_get_serial(FASTQ_ALL *fq) +{ + /* get sequence serial from FASTQ description in format '@serial_number'. + * If successful return the serial, otherwise return -1. + */ + long serial; + + if(fq==NULL || fq->description_1==NULL || fq->description_1[0]=='\0') + return -1; + + if((sscanf(fq->description_1, "@%ld", &serial))!=1) + return -1; + + return serial; +} + +int fastq_scanf(FASTQ_ALL *fq, FILE *fp_in, + int whether_append_description, int whether_append_quality) +{ + /* read a FASTQ sequence from input file, including description (whether_append_description=1) + * or not (whether_append_description=0), including quality (whether_append_quality=1) or not + * (whether_append_quality=0). If successful, return 0, otherwise, clear fq + * and return 1. + */ + char description_1[FASTQ_DESCRIPTION_MAX_LENGTH], sequence[FASTQ_SEQUENCE_MAX_LENGTH]; + char description_2[FASTQ_DESCRIPTION_MAX_LENGTH], quality[FASTQ_SEQUENCE_MAX_LENGTH]; + + char *p_description_1, *p_sequence, *p_description_2, *p_quality; + + if(fp_in==NULL || fq==NULL) + return 1; + + fastq_clear(fq); + + /* read the FASTQ sequence */ + fgets(description_1, FASTQ_DESCRIPTION_MAX_LENGTH, fp_in); + fgets(sequence, FASTQ_SEQUENCE_MAX_LENGTH, fp_in); + fgets(description_2, FASTQ_DESCRIPTION_MAX_LENGTH, fp_in); + fgets(quality, FASTQ_SEQUENCE_MAX_LENGTH, fp_in); + + /* check whether integrity of the FASTQ sequence */ + if(description_1[0]=='\0' || sequence[0]=='\0' || description_2[0]=='\0' || + quality[0]=='\0' || description_1[0]!='@'|| description_2[0]!='+' || + description_1[strlen(description_1)-1]!='\n' || + sequence[strlen(sequence)-1]!='\n' || + description_2[strlen(description_2)-1]!='\n') + return 1; + + /* remove return character at the end */ + if(description_1[strlen(description_1)-1]=='\n') + description_1[strlen(description_1)-1]='\0'; + if(sequence[strlen(sequence)-1]=='\n') + sequence[strlen(sequence)-1]='\0'; + if(description_2[strlen(description_2)-1]=='\n') + description_2[strlen(description_2)-1]='\0'; + if(quality[strlen(quality)-1]=='\n') + quality[strlen(quality)-1]='\0'; + + /* append the sequence information to fq */ + if((p_sequence=(char *)malloc(strlen(sequence)+1))==NULL) + return 1; + strcpy(p_sequence, sequence); + fq->sequence=p_sequence; + + if(whether_append_quality==1) + { + if((p_quality=(char *)malloc(strlen(quality)+1))==NULL) + { + fastq_clear(fq); + return 1; + } + strcpy(p_quality, quality); + fq->quality=p_quality; + } + if(whether_append_description==1) + { + if((p_description_1=(char *)malloc(strlen(description_1)+1))==NULL) + { + fastq_clear(fq); + return 1; + } + strcpy(p_description_1, description_1); + fq->description_1=p_description_1; + + if((p_description_2=(char *)malloc(strlen(description_2)+1))==NULL) + { + fastq_clear(fq); + return 1; + } + strcpy(p_description_2, description_2); + fq->description_2=p_description_2; + } + + return 0; +} + +int fastq_printf(FASTQ_ALL *fq, FILE *fp_out, char *format, long serial) +{ + /* write sequence into output file in FASTQ format(format='fq') or FASTA format(format='fa') + * using the original description (serial=-1) or the new serial. + * If successful, return 0, otherwise return 1. + */ + if(fp_out==NULL || fq==NULL) + return 1; + + if(strcmp(format, "fq")==0) /* output in FASTQ format */ + { + if(serial==-1) + { + if(fq->description_1!=NULL) + { + fputs(fq->description_1, fp_out); + fputc('\n', fp_out); + fputs(fq->sequence, fp_out); + fputc('\n', fp_out); + fputs(fq->description_2, fp_out); + fputc('\n', fp_out); + fputs(fq->quality, fp_out); + fputc('\n', fp_out); + } + else + { + fputc('@', fp_out); + fputc('\n', fp_out); + fputs(fq->sequence, fp_out); + fputc('\n', fp_out); + fputc('+', fp_out); + fputc('\n', fp_out); + fputs(fq->quality, fp_out); + fputc('\n', fp_out); + } + } + else + { + fprintf(fp_out, "@%ld\n", serial); + fputs(fq->sequence, fp_out); + fputc('\n', fp_out); + fprintf(fp_out, "+%ld\n", serial); + fputs(fq->quality, fp_out); + fputc('\n', fp_out); + } + } + else if(strcmp(format, "fa")==0) /* output in FASTQ format */ + { + if(serial==-1) + { + if(fq->description_1!=NULL) + { + fputc('>', fp_out); + fputs(&(fq->description_1[1]), fp_out); + fputc('\n', fp_out); + fputs(fq->sequence, fp_out); + fputc('\n', fp_out); + } + else + { + fputc('>', fp_out); + fputc('\n', fp_out); + fputs(fq->sequence, fp_out); + fputc('\n', fp_out); + } + } + else + { + fprintf(fp_out, ">%ld\n", serial); + fputs(fq->sequence, fp_out); + fputc('\n', fp_out); + } + } + else + return 1; + + return 0; +} + +long fastq_get_length(FASTQ_ALL *fq) +{ + /* return the length of FASTQ sequence, is any error, return -1 + */ + + if(fq==NULL) + return -1; + if(fq->sequence==NULL) + return 0; + return strlen(fq->sequence); +} + + + + + + + +