Mercurial > repos > portiahollyoak > fastuniq
comparison source/fastq.c @ 0:816cb55b5a2d draft default tip
planemo upload for repository https://github.com/portiahollyoak/Tools commit c4769fd68ad9583d4b9dbdf212e4ecb5968cef1c-dirty
author | portiahollyoak |
---|---|
date | Thu, 02 Jun 2016 11:34:51 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:816cb55b5a2d |
---|---|
1 /**************************************************************************** | |
2 * The 'FASTQ_ALL' structure group was used to store nucleotide sequence in | |
3 * fastq format, including basic operation function as well. | |
4 * | |
5 * This file was written by Haibin Xu, December 2011. | |
6 ****************************************************************************/ | |
7 | |
8 #include "fastq.h" | |
9 | |
10 FASTQ_ALL *fastq_create() | |
11 { | |
12 /* create a FASTQ_ALL sequence. If successful, return the point to it, | |
13 * otherwise, return NULL/. | |
14 */ | |
15 FASTQ_ALL *fq; | |
16 | |
17 if((fq=(FASTQ_ALL *)malloc(sizeof(FASTQ_ALL)))==NULL) | |
18 return NULL; | |
19 | |
20 fq->description_1=NULL; | |
21 fq->sequence=NULL; | |
22 fq->description_2=NULL; | |
23 fq->quality=NULL; | |
24 | |
25 return fq; | |
26 } | |
27 | |
28 int fastq_remove(FASTQ_ALL *fq) | |
29 { | |
30 /* free the FASTQ sequence. If successful, return 0, otherwise return 1. | |
31 */ | |
32 if(fq==NULL) | |
33 return 1; | |
34 | |
35 if(fq->description_1!=NULL) | |
36 free(fq->description_1); | |
37 if(fq->sequence!=NULL) | |
38 free(fq->sequence); | |
39 if(fq->description_2!=NULL) | |
40 free(fq->description_2); | |
41 if(fq->quality!=NULL) | |
42 free(fq->quality); | |
43 | |
44 free(fq); | |
45 | |
46 return 0; | |
47 } | |
48 | |
49 int fastq_clear(FASTQ_ALL *fq) | |
50 { | |
51 /* clear the FASTQ sequence. If successful, return 0, otherwise return 1. | |
52 */ | |
53 if(fq==NULL) | |
54 return 1; | |
55 | |
56 if(fq->description_1!=NULL) | |
57 { | |
58 free(fq->description_1); | |
59 fq->description_1=NULL; | |
60 } | |
61 if(fq->sequence!=NULL) | |
62 { | |
63 free(fq->sequence); | |
64 fq->sequence=NULL; | |
65 } | |
66 if(fq->description_2!=NULL) | |
67 { | |
68 free(fq->description_2); | |
69 fq->description_2=NULL; | |
70 } | |
71 if(fq->quality!=NULL) | |
72 { | |
73 free(fq->quality); | |
74 fq->quality=NULL; | |
75 } | |
76 | |
77 return 0; | |
78 } | |
79 | |
80 long fastq_get_serial(FASTQ_ALL *fq) | |
81 { | |
82 /* get sequence serial from FASTQ description in format '@serial_number'. | |
83 * If successful return the serial, otherwise return -1. | |
84 */ | |
85 long serial; | |
86 | |
87 if(fq==NULL || fq->description_1==NULL || fq->description_1[0]=='\0') | |
88 return -1; | |
89 | |
90 if((sscanf(fq->description_1, "@%ld", &serial))!=1) | |
91 return -1; | |
92 | |
93 return serial; | |
94 } | |
95 | |
96 int fastq_scanf(FASTQ_ALL *fq, FILE *fp_in, | |
97 int whether_append_description, int whether_append_quality) | |
98 { | |
99 /* read a FASTQ sequence from input file, including description (whether_append_description=1) | |
100 * or not (whether_append_description=0), including quality (whether_append_quality=1) or not | |
101 * (whether_append_quality=0). If successful, return 0, otherwise, clear fq | |
102 * and return 1. | |
103 */ | |
104 char description_1[FASTQ_DESCRIPTION_MAX_LENGTH], sequence[FASTQ_SEQUENCE_MAX_LENGTH]; | |
105 char description_2[FASTQ_DESCRIPTION_MAX_LENGTH], quality[FASTQ_SEQUENCE_MAX_LENGTH]; | |
106 | |
107 char *p_description_1, *p_sequence, *p_description_2, *p_quality; | |
108 | |
109 if(fp_in==NULL || fq==NULL) | |
110 return 1; | |
111 | |
112 fastq_clear(fq); | |
113 | |
114 /* read the FASTQ sequence */ | |
115 fgets(description_1, FASTQ_DESCRIPTION_MAX_LENGTH, fp_in); | |
116 fgets(sequence, FASTQ_SEQUENCE_MAX_LENGTH, fp_in); | |
117 fgets(description_2, FASTQ_DESCRIPTION_MAX_LENGTH, fp_in); | |
118 fgets(quality, FASTQ_SEQUENCE_MAX_LENGTH, fp_in); | |
119 | |
120 /* check whether integrity of the FASTQ sequence */ | |
121 if(description_1[0]=='\0' || sequence[0]=='\0' || description_2[0]=='\0' || | |
122 quality[0]=='\0' || description_1[0]!='@'|| description_2[0]!='+' || | |
123 description_1[strlen(description_1)-1]!='\n' || | |
124 sequence[strlen(sequence)-1]!='\n' || | |
125 description_2[strlen(description_2)-1]!='\n') | |
126 return 1; | |
127 | |
128 /* remove return character at the end */ | |
129 if(description_1[strlen(description_1)-1]=='\n') | |
130 description_1[strlen(description_1)-1]='\0'; | |
131 if(sequence[strlen(sequence)-1]=='\n') | |
132 sequence[strlen(sequence)-1]='\0'; | |
133 if(description_2[strlen(description_2)-1]=='\n') | |
134 description_2[strlen(description_2)-1]='\0'; | |
135 if(quality[strlen(quality)-1]=='\n') | |
136 quality[strlen(quality)-1]='\0'; | |
137 | |
138 /* append the sequence information to fq */ | |
139 if((p_sequence=(char *)malloc(strlen(sequence)+1))==NULL) | |
140 return 1; | |
141 strcpy(p_sequence, sequence); | |
142 fq->sequence=p_sequence; | |
143 | |
144 if(whether_append_quality==1) | |
145 { | |
146 if((p_quality=(char *)malloc(strlen(quality)+1))==NULL) | |
147 { | |
148 fastq_clear(fq); | |
149 return 1; | |
150 } | |
151 strcpy(p_quality, quality); | |
152 fq->quality=p_quality; | |
153 } | |
154 if(whether_append_description==1) | |
155 { | |
156 if((p_description_1=(char *)malloc(strlen(description_1)+1))==NULL) | |
157 { | |
158 fastq_clear(fq); | |
159 return 1; | |
160 } | |
161 strcpy(p_description_1, description_1); | |
162 fq->description_1=p_description_1; | |
163 | |
164 if((p_description_2=(char *)malloc(strlen(description_2)+1))==NULL) | |
165 { | |
166 fastq_clear(fq); | |
167 return 1; | |
168 } | |
169 strcpy(p_description_2, description_2); | |
170 fq->description_2=p_description_2; | |
171 } | |
172 | |
173 return 0; | |
174 } | |
175 | |
176 int fastq_printf(FASTQ_ALL *fq, FILE *fp_out, char *format, long serial) | |
177 { | |
178 /* write sequence into output file in FASTQ format(format='fq') or FASTA format(format='fa') | |
179 * using the original description (serial=-1) or the new serial. | |
180 * If successful, return 0, otherwise return 1. | |
181 */ | |
182 if(fp_out==NULL || fq==NULL) | |
183 return 1; | |
184 | |
185 if(strcmp(format, "fq")==0) /* output in FASTQ format */ | |
186 { | |
187 if(serial==-1) | |
188 { | |
189 if(fq->description_1!=NULL) | |
190 { | |
191 fputs(fq->description_1, fp_out); | |
192 fputc('\n', fp_out); | |
193 fputs(fq->sequence, fp_out); | |
194 fputc('\n', fp_out); | |
195 fputs(fq->description_2, fp_out); | |
196 fputc('\n', fp_out); | |
197 fputs(fq->quality, fp_out); | |
198 fputc('\n', fp_out); | |
199 } | |
200 else | |
201 { | |
202 fputc('@', fp_out); | |
203 fputc('\n', fp_out); | |
204 fputs(fq->sequence, fp_out); | |
205 fputc('\n', fp_out); | |
206 fputc('+', fp_out); | |
207 fputc('\n', fp_out); | |
208 fputs(fq->quality, fp_out); | |
209 fputc('\n', fp_out); | |
210 } | |
211 } | |
212 else | |
213 { | |
214 fprintf(fp_out, "@%ld\n", serial); | |
215 fputs(fq->sequence, fp_out); | |
216 fputc('\n', fp_out); | |
217 fprintf(fp_out, "+%ld\n", serial); | |
218 fputs(fq->quality, fp_out); | |
219 fputc('\n', fp_out); | |
220 } | |
221 } | |
222 else if(strcmp(format, "fa")==0) /* output in FASTQ format */ | |
223 { | |
224 if(serial==-1) | |
225 { | |
226 if(fq->description_1!=NULL) | |
227 { | |
228 fputc('>', fp_out); | |
229 fputs(&(fq->description_1[1]), fp_out); | |
230 fputc('\n', fp_out); | |
231 fputs(fq->sequence, fp_out); | |
232 fputc('\n', fp_out); | |
233 } | |
234 else | |
235 { | |
236 fputc('>', fp_out); | |
237 fputc('\n', fp_out); | |
238 fputs(fq->sequence, fp_out); | |
239 fputc('\n', fp_out); | |
240 } | |
241 } | |
242 else | |
243 { | |
244 fprintf(fp_out, ">%ld\n", serial); | |
245 fputs(fq->sequence, fp_out); | |
246 fputc('\n', fp_out); | |
247 } | |
248 } | |
249 else | |
250 return 1; | |
251 | |
252 return 0; | |
253 } | |
254 | |
255 long fastq_get_length(FASTQ_ALL *fq) | |
256 { | |
257 /* return the length of FASTQ sequence, is any error, return -1 | |
258 */ | |
259 | |
260 if(fq==NULL) | |
261 return -1; | |
262 if(fq->sequence==NULL) | |
263 return 0; | |
264 return strlen(fq->sequence); | |
265 } | |
266 | |
267 | |
268 | |
269 | |
270 | |
271 | |
272 | |
273 |