comparison source/fastq.c @ 0:816cb55b5a2d draft default tip

planemo upload for repository https://github.com/portiahollyoak/Tools commit c4769fd68ad9583d4b9dbdf212e4ecb5968cef1c-dirty
author portiahollyoak
date Thu, 02 Jun 2016 11:34:51 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:816cb55b5a2d
1 /****************************************************************************
2 * The 'FASTQ_ALL' structure group was used to store nucleotide sequence in
3 * fastq format, including basic operation function as well.
4 *
5 * This file was written by Haibin Xu, December 2011.
6 ****************************************************************************/
7
8 #include "fastq.h"
9
10 FASTQ_ALL *fastq_create()
11 {
12 /* create a FASTQ_ALL sequence. If successful, return the point to it,
13 * otherwise, return NULL/.
14 */
15 FASTQ_ALL *fq;
16
17 if((fq=(FASTQ_ALL *)malloc(sizeof(FASTQ_ALL)))==NULL)
18 return NULL;
19
20 fq->description_1=NULL;
21 fq->sequence=NULL;
22 fq->description_2=NULL;
23 fq->quality=NULL;
24
25 return fq;
26 }
27
28 int fastq_remove(FASTQ_ALL *fq)
29 {
30 /* free the FASTQ sequence. If successful, return 0, otherwise return 1.
31 */
32 if(fq==NULL)
33 return 1;
34
35 if(fq->description_1!=NULL)
36 free(fq->description_1);
37 if(fq->sequence!=NULL)
38 free(fq->sequence);
39 if(fq->description_2!=NULL)
40 free(fq->description_2);
41 if(fq->quality!=NULL)
42 free(fq->quality);
43
44 free(fq);
45
46 return 0;
47 }
48
49 int fastq_clear(FASTQ_ALL *fq)
50 {
51 /* clear the FASTQ sequence. If successful, return 0, otherwise return 1.
52 */
53 if(fq==NULL)
54 return 1;
55
56 if(fq->description_1!=NULL)
57 {
58 free(fq->description_1);
59 fq->description_1=NULL;
60 }
61 if(fq->sequence!=NULL)
62 {
63 free(fq->sequence);
64 fq->sequence=NULL;
65 }
66 if(fq->description_2!=NULL)
67 {
68 free(fq->description_2);
69 fq->description_2=NULL;
70 }
71 if(fq->quality!=NULL)
72 {
73 free(fq->quality);
74 fq->quality=NULL;
75 }
76
77 return 0;
78 }
79
80 long fastq_get_serial(FASTQ_ALL *fq)
81 {
82 /* get sequence serial from FASTQ description in format '@serial_number'.
83 * If successful return the serial, otherwise return -1.
84 */
85 long serial;
86
87 if(fq==NULL || fq->description_1==NULL || fq->description_1[0]=='\0')
88 return -1;
89
90 if((sscanf(fq->description_1, "@%ld", &serial))!=1)
91 return -1;
92
93 return serial;
94 }
95
96 int fastq_scanf(FASTQ_ALL *fq, FILE *fp_in,
97 int whether_append_description, int whether_append_quality)
98 {
99 /* read a FASTQ sequence from input file, including description (whether_append_description=1)
100 * or not (whether_append_description=0), including quality (whether_append_quality=1) or not
101 * (whether_append_quality=0). If successful, return 0, otherwise, clear fq
102 * and return 1.
103 */
104 char description_1[FASTQ_DESCRIPTION_MAX_LENGTH], sequence[FASTQ_SEQUENCE_MAX_LENGTH];
105 char description_2[FASTQ_DESCRIPTION_MAX_LENGTH], quality[FASTQ_SEQUENCE_MAX_LENGTH];
106
107 char *p_description_1, *p_sequence, *p_description_2, *p_quality;
108
109 if(fp_in==NULL || fq==NULL)
110 return 1;
111
112 fastq_clear(fq);
113
114 /* read the FASTQ sequence */
115 fgets(description_1, FASTQ_DESCRIPTION_MAX_LENGTH, fp_in);
116 fgets(sequence, FASTQ_SEQUENCE_MAX_LENGTH, fp_in);
117 fgets(description_2, FASTQ_DESCRIPTION_MAX_LENGTH, fp_in);
118 fgets(quality, FASTQ_SEQUENCE_MAX_LENGTH, fp_in);
119
120 /* check whether integrity of the FASTQ sequence */
121 if(description_1[0]=='\0' || sequence[0]=='\0' || description_2[0]=='\0' ||
122 quality[0]=='\0' || description_1[0]!='@'|| description_2[0]!='+' ||
123 description_1[strlen(description_1)-1]!='\n' ||
124 sequence[strlen(sequence)-1]!='\n' ||
125 description_2[strlen(description_2)-1]!='\n')
126 return 1;
127
128 /* remove return character at the end */
129 if(description_1[strlen(description_1)-1]=='\n')
130 description_1[strlen(description_1)-1]='\0';
131 if(sequence[strlen(sequence)-1]=='\n')
132 sequence[strlen(sequence)-1]='\0';
133 if(description_2[strlen(description_2)-1]=='\n')
134 description_2[strlen(description_2)-1]='\0';
135 if(quality[strlen(quality)-1]=='\n')
136 quality[strlen(quality)-1]='\0';
137
138 /* append the sequence information to fq */
139 if((p_sequence=(char *)malloc(strlen(sequence)+1))==NULL)
140 return 1;
141 strcpy(p_sequence, sequence);
142 fq->sequence=p_sequence;
143
144 if(whether_append_quality==1)
145 {
146 if((p_quality=(char *)malloc(strlen(quality)+1))==NULL)
147 {
148 fastq_clear(fq);
149 return 1;
150 }
151 strcpy(p_quality, quality);
152 fq->quality=p_quality;
153 }
154 if(whether_append_description==1)
155 {
156 if((p_description_1=(char *)malloc(strlen(description_1)+1))==NULL)
157 {
158 fastq_clear(fq);
159 return 1;
160 }
161 strcpy(p_description_1, description_1);
162 fq->description_1=p_description_1;
163
164 if((p_description_2=(char *)malloc(strlen(description_2)+1))==NULL)
165 {
166 fastq_clear(fq);
167 return 1;
168 }
169 strcpy(p_description_2, description_2);
170 fq->description_2=p_description_2;
171 }
172
173 return 0;
174 }
175
176 int fastq_printf(FASTQ_ALL *fq, FILE *fp_out, char *format, long serial)
177 {
178 /* write sequence into output file in FASTQ format(format='fq') or FASTA format(format='fa')
179 * using the original description (serial=-1) or the new serial.
180 * If successful, return 0, otherwise return 1.
181 */
182 if(fp_out==NULL || fq==NULL)
183 return 1;
184
185 if(strcmp(format, "fq")==0) /* output in FASTQ format */
186 {
187 if(serial==-1)
188 {
189 if(fq->description_1!=NULL)
190 {
191 fputs(fq->description_1, fp_out);
192 fputc('\n', fp_out);
193 fputs(fq->sequence, fp_out);
194 fputc('\n', fp_out);
195 fputs(fq->description_2, fp_out);
196 fputc('\n', fp_out);
197 fputs(fq->quality, fp_out);
198 fputc('\n', fp_out);
199 }
200 else
201 {
202 fputc('@', fp_out);
203 fputc('\n', fp_out);
204 fputs(fq->sequence, fp_out);
205 fputc('\n', fp_out);
206 fputc('+', fp_out);
207 fputc('\n', fp_out);
208 fputs(fq->quality, fp_out);
209 fputc('\n', fp_out);
210 }
211 }
212 else
213 {
214 fprintf(fp_out, "@%ld\n", serial);
215 fputs(fq->sequence, fp_out);
216 fputc('\n', fp_out);
217 fprintf(fp_out, "+%ld\n", serial);
218 fputs(fq->quality, fp_out);
219 fputc('\n', fp_out);
220 }
221 }
222 else if(strcmp(format, "fa")==0) /* output in FASTQ format */
223 {
224 if(serial==-1)
225 {
226 if(fq->description_1!=NULL)
227 {
228 fputc('>', fp_out);
229 fputs(&(fq->description_1[1]), fp_out);
230 fputc('\n', fp_out);
231 fputs(fq->sequence, fp_out);
232 fputc('\n', fp_out);
233 }
234 else
235 {
236 fputc('>', fp_out);
237 fputc('\n', fp_out);
238 fputs(fq->sequence, fp_out);
239 fputc('\n', fp_out);
240 }
241 }
242 else
243 {
244 fprintf(fp_out, ">%ld\n", serial);
245 fputs(fq->sequence, fp_out);
246 fputc('\n', fp_out);
247 }
248 }
249 else
250 return 1;
251
252 return 0;
253 }
254
255 long fastq_get_length(FASTQ_ALL *fq)
256 {
257 /* return the length of FASTQ sequence, is any error, return -1
258 */
259
260 if(fq==NULL)
261 return -1;
262 if(fq->sequence==NULL)
263 return 0;
264 return strlen(fq->sequence);
265 }
266
267
268
269
270
271
272
273