annotate src/trim_paired.c @ 4:c70137414dcd draft

sickle v1.33
author nikhil-joshi
date Wed, 23 Jul 2014 18:35:10 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
1 #include <assert.h>
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
2 #include <ctype.h>
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
3 #include <stdlib.h>
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
4 #include <zlib.h>
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
5 #include <stdio.h>
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
6 #include <getopt.h>
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
7 #include <unistd.h>
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
8 #include "sickle.h"
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
9 #include "kseq.h"
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
10 #include "print_record.h"
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
11
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
12 __KS_GETC(gzread, BUFFER_SIZE)
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
13 __KS_GETUNTIL(gzread, BUFFER_SIZE)
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
14 __KSEQ_READ
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
15
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
16 int paired_qual_threshold = 20;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
17 int paired_length_threshold = 20;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
18
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
19 static struct option paired_long_options[] = {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
20 {"qual-type", required_argument, 0, 't'},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
21 {"pe-file1", optional_argument, 0, 'f'},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
22 {"pe-file2", optional_argument, 0, 'r'},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
23 {"pe-combo", optional_argument, 0, 'c'},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
24 {"output-pe1", optional_argument, 0, 'o'},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
25 {"output-pe2", optional_argument, 0, 'p'},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
26 {"output-single", optional_argument, 0, 's'},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
27 {"output-combo", optional_argument, 0, 'm'},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
28 {"qual-threshold", optional_argument, 0, 'q'},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
29 {"length-threshold", optional_argument, 0, 'l'},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
30 {"no-fiveprime", optional_argument, 0, 'x'},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
31 {"truncate-n", optional_argument, 0, 'n'},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
32 {"gzip-output", optional_argument, 0, 'g'},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
33 {"output-combo-all", optional_argument, 0, 'M'},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
34 {"quiet", optional_argument, 0, 'z'},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
35 {GETOPT_HELP_OPTION_DECL},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
36 {GETOPT_VERSION_OPTION_DECL},
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
37 {NULL, 0, NULL, 0}
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
38 };
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
39
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
40 void paired_usage (int status, char *msg) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
41
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
42 fprintf(stderr, "\nIf you have separate files for forward and reverse reads:\n");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
43 fprintf(stderr, "Usage: %s pe [options] -f <paired-end forward fastq file> -r <paired-end reverse fastq file> -t <quality type> -o <trimmed PE forward file> -p <trimmed PE reverse file> -s <trimmed singles file>\n\n", PROGRAM_NAME);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
44 fprintf(stderr, "If you have one file with interleaved forward and reverse reads:\n");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
45 fprintf(stderr, "Usage: %s pe [options] -c <interleaved input file> -t <quality type> -m <interleaved trimmed paired-end output> -s <trimmed singles file>\n\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
46 If you have one file with interleaved reads as input and you want ONLY one interleaved file as output:\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
47 Usage: %s pe [options] -c <interleaved input file> -t <quality type> -M <interleaved trimmed output>\n\n", PROGRAM_NAME, PROGRAM_NAME);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
48 fprintf(stderr, "Options:\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
49 Paired-end separated reads\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
50 --------------------------\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
51 -f, --pe-file1, Input paired-end forward fastq file (Input files must have same number of records)\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
52 -r, --pe-file2, Input paired-end reverse fastq file\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
53 -o, --output-pe1, Output trimmed forward fastq file\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
54 -p, --output-pe2, Output trimmed reverse fastq file. Must use -s option.\n\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
55 Paired-end interleaved reads\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
56 ----------------------------\n");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
57 fprintf(stderr,"-c, --pe-combo, Combined (interleaved) input paired-end fastq\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
58 -m, --output-combo, Output combined (interleaved) paired-end fastq file. Must use -s option.\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
59 -M, --output-combo-all, Output combined (interleaved) paired-end fastq file with any discarded read written to output file as a single N. Cannot be used with the -s option.\n\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
60 Global options\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
61 --------------\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
62 -t, --qual-type, Type of quality values (solexa (CASAVA < 1.3), illumina (CASAVA 1.3 to 1.7), sanger (which is CASAVA >= 1.8)) (required)\n");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
63 fprintf(stderr, "-s, --output-single, Output trimmed singles fastq file\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
64 -q, --qual-threshold, Threshold for trimming based on average quality in a window. Default 20.\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
65 -l, --length-threshold, Threshold to keep a read based on length after trimming. Default 20.\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
66 -x, --no-fiveprime, Don't do five prime trimming.\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
67 -n, --truncate-n, Truncate sequences at position of first N.\n");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
68
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
69
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
70 fprintf(stderr, "-g, --gzip-output, Output gzipped files.\n--quiet, do not output trimming info\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
71 --help, display this help and exit\n\
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
72 --version, output version information and exit\n\n");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
73
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
74 if (msg) fprintf(stderr, "%s\n\n", msg);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
75 exit(status);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
76 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
77
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
78
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
79 int paired_main(int argc, char *argv[]) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
80
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
81 gzFile pe1 = NULL; /* forward input file handle */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
82 gzFile pe2 = NULL; /* reverse input file handle */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
83 gzFile pec = NULL; /* combined input file handle */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
84 kseq_t *fqrec1 = NULL;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
85 kseq_t *fqrec2 = NULL;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
86 int l1, l2;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
87 FILE *outfile1 = NULL; /* forward output file handle */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
88 FILE *outfile2 = NULL; /* reverse output file handle */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
89 FILE *combo = NULL; /* combined output file handle */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
90 FILE *single = NULL; /* single output file handle */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
91 gzFile outfile1_gzip = NULL;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
92 gzFile outfile2_gzip = NULL;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
93 gzFile combo_gzip = NULL;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
94 gzFile single_gzip = NULL;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
95 int debug = 0;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
96 int optc;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
97 extern char *optarg;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
98 int qualtype = -1;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
99 cutsites *p1cut;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
100 cutsites *p2cut;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
101 char *outfn1 = NULL; /* forward file out name */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
102 char *outfn2 = NULL; /* reverse file out name */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
103 char *outfnc = NULL; /* combined file out name */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
104 char *sfn = NULL; /* single/combined file out name */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
105 char *infn1 = NULL; /* forward input filename */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
106 char *infn2 = NULL; /* reverse input filename */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
107 char *infnc = NULL; /* combined input filename */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
108 int kept_p = 0;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
109 int discard_p = 0;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
110 int kept_s1 = 0;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
111 int kept_s2 = 0;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
112 int discard_s1 = 0;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
113 int discard_s2 = 0;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
114 int quiet = 0;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
115 int no_fiveprime = 0;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
116 int trunc_n = 0;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
117 int gzip_output = 0;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
118 int combo_all=0;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
119 int combo_s=0;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
120
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
121 while (1) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
122 int option_index = 0;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
123 optc = getopt_long(argc, argv, "df:r:c:t:o:p:m:M:s:q:l:xng", paired_long_options, &option_index);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
124
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
125 if (optc == -1)
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
126 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
127
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
128 switch (optc) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
129 if (paired_long_options[option_index].flag != 0)
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
130 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
131
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
132 case 'f':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
133 infn1 = (char *) malloc(strlen(optarg) + 1);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
134 strcpy(infn1, optarg);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
135 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
136
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
137 case 'r':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
138 infn2 = (char *) malloc(strlen(optarg) + 1);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
139 strcpy(infn2, optarg);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
140 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
141
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
142 case 'c':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
143 infnc = (char *) malloc(strlen(optarg) + 1);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
144 strcpy(infnc, optarg);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
145 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
146
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
147 case 't':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
148 if (!strcmp(optarg, "illumina")) qualtype = ILLUMINA;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
149 else if (!strcmp(optarg, "solexa")) qualtype = SOLEXA;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
150 else if (!strcmp(optarg, "sanger")) qualtype = SANGER;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
151 else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
152 fprintf(stderr, "Error: Quality type '%s' is not a valid type.\n", optarg);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
153 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
154 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
155 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
156
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
157 case 'o':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
158 outfn1 = (char *) malloc(strlen(optarg) + 1);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
159 strcpy(outfn1, optarg);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
160 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
161
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
162 case 'p':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
163 outfn2 = (char *) malloc(strlen(optarg) + 1);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
164 strcpy(outfn2, optarg);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
165 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
166
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
167 case 'm':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
168 outfnc = (char *) malloc(strlen(optarg) + 1);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
169 strcpy(outfnc, optarg);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
170 combo_s = 1;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
171 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
172
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
173 case 'M':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
174 outfnc = (char *) malloc(strlen(optarg) + 1);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
175 strcpy(outfnc, optarg);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
176 combo_all = 1;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
177 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
178
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
179 case 's':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
180 sfn = (char *) malloc(strlen(optarg) + 1);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
181 strcpy(sfn, optarg);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
182 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
183
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
184 case 'q':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
185 paired_qual_threshold = atoi(optarg);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
186 if (paired_qual_threshold < 0) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
187 fprintf(stderr, "Quality threshold must be >= 0\n");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
188 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
189 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
190 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
191
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
192 case 'l':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
193 paired_length_threshold = atoi(optarg);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
194 if (paired_length_threshold < 0) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
195 fprintf(stderr, "Length threshold must be >= 0\n");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
196 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
197 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
198 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
199
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
200 case 'x':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
201 no_fiveprime = 1;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
202 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
203
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
204 case 'n':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
205 trunc_n = 1;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
206 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
207
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
208 case 'g':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
209 gzip_output = 1;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
210 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
211
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
212 case 'z':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
213 quiet = 1;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
214 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
215
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
216 case 'd':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
217 debug = 1;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
218 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
219
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
220 case_GETOPT_HELP_CHAR(paired_usage);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
221 case_GETOPT_VERSION_CHAR(PROGRAM_NAME, VERSION, AUTHORS);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
222
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
223 case '?':
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
224 paired_usage(EXIT_FAILURE, NULL);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
225 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
226
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
227 default:
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
228 paired_usage(EXIT_FAILURE, NULL);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
229 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
230 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
231 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
232
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
233 /* required: qualtype */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
234 if (qualtype == -1) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
235 paired_usage(EXIT_FAILURE, "****Error: Quality type is required.");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
236 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
237
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
238 /* make sure minimum input filenames are specified */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
239 if (!infn1 && !infnc) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
240 paired_usage(EXIT_FAILURE, "****Error: Must have either -f OR -c argument.");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
241 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
242
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
243 if (infnc) { /* using combined input file */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
244
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
245 if (infn1 || infn2 || outfn1 || outfn2) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
246 paired_usage(EXIT_FAILURE, "****Error: Cannot have -f, -r, -o, or -p options with -c.");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
247 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
248
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
249 if ((combo_all && combo_s) || (!combo_all && !combo_s)) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
250 paired_usage(EXIT_FAILURE, "****Error: Must have only one of either -m or -M options with -c.");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
251 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
252
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
253 if ((combo_s && !sfn) || (combo_all && sfn)) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
254 paired_usage(EXIT_FAILURE, "****Error: -m option must have -s option, and -M option cannot have -s option.");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
255 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
256
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
257 /* check for duplicate file names */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
258 if (!strcmp(infnc, outfnc) || (combo_s && (!strcmp(infnc, sfn) || !strcmp(outfnc, sfn)))) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
259 fprintf(stderr, "****Error: Duplicate filename between combo input, combo output, and/or single output file names.\n\n");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
260 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
261 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
262
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
263 /* get combined output file */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
264 if (!gzip_output) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
265 combo = fopen(outfnc, "w");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
266 if (!combo) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
267 fprintf(stderr, "****Error: Could not open combo output file '%s'.\n\n", outfnc);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
268 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
269 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
270 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
271 combo_gzip = gzopen(outfnc, "w");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
272 if (!combo_gzip) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
273 fprintf(stderr, "****Error: Could not open combo output file '%s'.\n\n", outfnc);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
274 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
275 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
276 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
277
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
278 pec = gzopen(infnc, "r");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
279 if (!pec) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
280 fprintf(stderr, "****Error: Could not open combined input file '%s'.\n\n", infnc);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
281 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
282 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
283
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
284 } else { /* using forward and reverse input files */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
285
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
286 if (infn1 && (!infn2 || !outfn1 || !outfn2 || !sfn)) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
287 paired_usage(EXIT_FAILURE, "****Error: Using the -f option means you must have the -r, -o, -p, and -s options.");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
288 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
289
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
290 if (infn1 && (infnc || combo_all || combo_s)) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
291 paired_usage(EXIT_FAILURE, "****Error: The -f option cannot be used in combination with -c, -m, or -M.");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
292 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
293
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
294 if (!strcmp(infn1, infn2) || !strcmp(infn1, outfn1) || !strcmp(infn1, outfn2) ||
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
295 !strcmp(infn1, sfn) || !strcmp(infn2, outfn1) || !strcmp(infn2, outfn2) ||
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
296 !strcmp(infn2, sfn) || !strcmp(outfn1, outfn2) || !strcmp(outfn1, sfn) || !strcmp(outfn2, sfn)) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
297
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
298 fprintf(stderr, "****Error: Duplicate input and/or output file names.\n\n");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
299 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
300 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
301
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
302 pe1 = gzopen(infn1, "r");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
303 if (!pe1) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
304 fprintf(stderr, "****Error: Could not open input file '%s'.\n\n", infn1);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
305 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
306 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
307
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
308 pe2 = gzopen(infn2, "r");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
309 if (!pe2) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
310 fprintf(stderr, "****Error: Could not open input file '%s'.\n\n", infn2);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
311 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
312 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
313
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
314 if (!gzip_output) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
315 outfile1 = fopen(outfn1, "w");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
316 if (!outfile1) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
317 fprintf(stderr, "****Error: Could not open output file '%s'.\n\n", outfn1);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
318 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
319 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
320
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
321 outfile2 = fopen(outfn2, "w");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
322 if (!outfile2) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
323 fprintf(stderr, "****Error: Could not open output file '%s'.\n\n", outfn2);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
324 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
325 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
326 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
327 outfile1_gzip = gzopen(outfn1, "w");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
328 if (!outfile1_gzip) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
329 fprintf(stderr, "****Error: Could not open output file '%s'.\n\n", outfn1);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
330 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
331 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
332
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
333 outfile2_gzip = gzopen(outfn2, "w");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
334 if (!outfile2_gzip) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
335 fprintf(stderr, "****Error: Could not open output file '%s'.\n\n", outfn2);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
336 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
337 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
338
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
339 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
340 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
341
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
342 /* get singles output file handle */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
343 if (sfn && !combo_all) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
344 if (!gzip_output) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
345 single = fopen(sfn, "w");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
346 if (!single) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
347 fprintf(stderr, "****Error: Could not open single output file '%s'.\n\n", sfn);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
348 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
349 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
350 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
351 single_gzip = gzopen(sfn, "w");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
352 if (!single_gzip) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
353 fprintf(stderr, "****Error: Could not open single output file '%s'.\n\n", sfn);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
354 return EXIT_FAILURE;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
355 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
356 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
357 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
358
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
359 if (pec) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
360 fqrec1 = kseq_init(pec);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
361 fqrec2 = (kseq_t *) malloc(sizeof(kseq_t));
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
362 fqrec2->f = fqrec1->f;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
363 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
364 fqrec1 = kseq_init(pe1);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
365 fqrec2 = kseq_init(pe2);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
366 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
367
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
368 while ((l1 = kseq_read(fqrec1)) >= 0) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
369
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
370 l2 = kseq_read(fqrec2);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
371 if (l2 < 0) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
372 fprintf(stderr, "Warning: PE file 2 is shorter than PE file 1. Disregarding rest of PE file 1.\n");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
373 break;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
374 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
375
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
376 p1cut = sliding_window(fqrec1, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
377 p2cut = sliding_window(fqrec2, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
378
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
379 if (debug) printf("p1cut: %d,%d\n", p1cut->five_prime_cut, p1cut->three_prime_cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
380 if (debug) printf("p2cut: %d,%d\n", p2cut->five_prime_cut, p2cut->three_prime_cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
381
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
382 /* The sequence and quality print statements below print out the sequence string starting from the 5' cut */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
383 /* and then only print out to the 3' cut, however, we need to adjust the 3' cut */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
384 /* by subtracting the 5' cut because the 3' cut was calculated on the original sequence */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
385
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
386 /* if both sequences passed quality and length filters, then output both records */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
387 if (p1cut->three_prime_cut >= 0 && p2cut->three_prime_cut >= 0) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
388 if (!gzip_output) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
389 if (pec) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
390 print_record (combo, fqrec1, p1cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
391 print_record (combo, fqrec2, p2cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
392 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
393 print_record (outfile1, fqrec1, p1cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
394 print_record (outfile2, fqrec2, p2cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
395 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
396 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
397 if (pec) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
398 print_record_gzip (combo_gzip, fqrec1, p1cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
399 print_record_gzip (combo_gzip, fqrec2, p2cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
400 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
401 print_record_gzip (outfile1_gzip, fqrec1, p1cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
402 print_record_gzip (outfile2_gzip, fqrec2, p2cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
403 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
404 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
405
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
406 kept_p += 2;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
407 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
408
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
409 /* if only one sequence passed filter, then put its record in singles and discard the other */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
410 /* or put an "N" record in if that option was chosen. */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
411 else if (p1cut->three_prime_cut >= 0 && p2cut->three_prime_cut < 0) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
412 if (!gzip_output) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
413 if (combo_all) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
414 print_record (combo, fqrec1, p1cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
415 print_record_N (combo, fqrec2, qualtype);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
416 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
417 print_record (single, fqrec1, p1cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
418 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
419 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
420 if (combo_all) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
421 print_record_gzip (combo_gzip, fqrec1, p1cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
422 print_record_N_gzip (combo_gzip, fqrec2, qualtype);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
423 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
424 print_record_gzip (single_gzip, fqrec1, p1cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
425 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
426 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
427
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
428 kept_s1++;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
429 discard_s2++;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
430 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
431
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
432 else if (p1cut->three_prime_cut < 0 && p2cut->three_prime_cut >= 0) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
433 if (!gzip_output) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
434 if (combo_all) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
435 print_record_N (combo, fqrec1, qualtype);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
436 print_record (combo, fqrec2, p2cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
437 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
438 print_record (single, fqrec2, p2cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
439 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
440 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
441 if (combo_all) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
442 print_record_N_gzip (combo_gzip, fqrec1, qualtype);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
443 print_record_gzip (combo_gzip, fqrec2, p2cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
444 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
445 print_record_gzip (single_gzip, fqrec2, p2cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
446 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
447 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
448
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
449 kept_s2++;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
450 discard_s1++;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
451
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
452 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
453
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
454 /* If both records are to be discarded, but the -M option */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
455 /* is being used, then output two "N" records */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
456 if (combo_all) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
457 if (!gzip_output) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
458 print_record_N (combo, fqrec1, qualtype);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
459 print_record_N (combo, fqrec2, qualtype);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
460 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
461 print_record_N_gzip (combo_gzip, fqrec1, qualtype);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
462 print_record_N_gzip (combo_gzip, fqrec2, qualtype);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
463 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
464 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
465
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
466 discard_p += 2;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
467 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
468
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
469 free(p1cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
470 free(p2cut);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
471 } /* end of while ((l1 = kseq_read (fqrec1)) >= 0) */
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
472
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
473 if (l1 < 0) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
474 l2 = kseq_read(fqrec2);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
475 if (l2 >= 0) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
476 fprintf(stderr, "Warning: PE file 1 is shorter than PE file 2. Disregarding rest of PE file 2.\n");
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
477 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
478 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
479
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
480 if (!quiet) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
481 fprintf(stdout, "\nFastQ paired records kept: %d (%d pairs)\n", kept_p, (kept_p / 2));
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
482 if (pec) fprintf(stdout, "FastQ single records kept: %d\n", (kept_s1 + kept_s2));
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
483 else fprintf(stdout, "FastQ single records kept: %d (from PE1: %d, from PE2: %d)\n", (kept_s1 + kept_s2), kept_s1, kept_s2);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
484
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
485 fprintf(stdout, "FastQ paired records discarded: %d (%d pairs)\n", discard_p, (discard_p / 2));
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
486
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
487 if (pec) fprintf(stdout, "FastQ single records discarded: %d\n\n", (discard_s1 + discard_s2));
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
488 else fprintf(stdout, "FastQ single records discarded: %d (from PE1: %d, from PE2: %d)\n\n", (discard_s1 + discard_s2), discard_s1, discard_s2);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
489 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
490
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
491 kseq_destroy(fqrec1);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
492 if (pec) free(fqrec2);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
493 else kseq_destroy(fqrec2);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
494
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
495 if (sfn && !combo_all) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
496 if (!gzip_output) fclose(single);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
497 else gzclose(single_gzip);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
498 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
499
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
500 if (pec) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
501 gzclose(pec);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
502 if (!gzip_output) fclose(combo);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
503 else gzclose(combo_gzip);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
504 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
505 gzclose(pe1);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
506 gzclose(pe2);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
507 if (!gzip_output) {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
508 fclose(outfile1);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
509 fclose(outfile2);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
510 } else {
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
511 gzclose(outfile1_gzip);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
512 gzclose(outfile2_gzip);
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
513 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
514 }
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
515
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
516 return EXIT_SUCCESS;
c70137414dcd sickle v1.33
nikhil-joshi
parents:
diff changeset
517 } /* end of paired_main() */