annotate src/trim_paired.c @ 13:b105c7163a5b draft default tip

Deleted selected files
author nikhil-joshi
date Sat, 14 Mar 2015 18:29:47 -0400
parents 7939dd56c4b4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
1 #include <assert.h>
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
2 #include <ctype.h>
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
3 #include <stdlib.h>
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
4 #include <zlib.h>
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
5 #include <stdio.h>
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
6 #include <getopt.h>
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
7 #include <unistd.h>
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
8 #include "sickle.h"
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
9 #include "kseq.h"
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
10 #include "print_record.h"
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
11
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
12 __KS_GETC(gzread, BUFFER_SIZE)
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
13 __KS_GETUNTIL(gzread, BUFFER_SIZE)
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
14 __KSEQ_READ
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
15
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
16 int paired_qual_threshold = 20;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
17 int paired_length_threshold = 20;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
18
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
19 static struct option paired_long_options[] = {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
20 {"qual-type", required_argument, 0, 't'},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
21 {"pe-file1", required_argument, 0, 'f'},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
22 {"pe-file2", required_argument, 0, 'r'},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
23 {"pe-combo", required_argument, 0, 'c'},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
24 {"output-pe1", required_argument, 0, 'o'},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
25 {"output-pe2", required_argument, 0, 'p'},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
26 {"output-single", required_argument, 0, 's'},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
27 {"output-combo", required_argument, 0, 'm'},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
28 {"qual-threshold", required_argument, 0, 'q'},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
29 {"length-threshold", required_argument, 0, 'l'},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
30 {"no-fiveprime", no_argument, 0, 'x'},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
31 {"truncate-n", no_argument, 0, 'n'},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
32 {"gzip-output", no_argument, 0, 'g'},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
33 {"output-combo-all", required_argument, 0, 'M'},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
34 {"quiet", no_argument, 0, 'z'},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
35 {GETOPT_HELP_OPTION_DECL},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
36 {GETOPT_VERSION_OPTION_DECL},
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
37 {NULL, 0, NULL, 0}
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
38 };
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
39
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
40 void paired_usage (int status, char *msg) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
41
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
42 fprintf(stderr, "\nIf you have separate files for forward and reverse reads:\n");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
43 fprintf(stderr, "Usage: %s pe [options] -f <paired-end forward fastq file> -r <paired-end reverse fastq file> -t <quality type> -o <trimmed PE forward file> -p <trimmed PE reverse file> -s <trimmed singles file>\n\n", PROGRAM_NAME);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
44 fprintf(stderr, "If you have one file with interleaved forward and reverse reads:\n");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
45 fprintf(stderr, "Usage: %s pe [options] -c <interleaved input file> -t <quality type> -m <interleaved trimmed paired-end output> -s <trimmed singles file>\n\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
46 If you have one file with interleaved reads as input and you want ONLY one interleaved file as output:\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
47 Usage: %s pe [options] -c <interleaved input file> -t <quality type> -M <interleaved trimmed output>\n\n", PROGRAM_NAME, PROGRAM_NAME);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
48 fprintf(stderr, "Options:\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
49 Paired-end separated reads\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
50 --------------------------\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
51 -f, --pe-file1, Input paired-end forward fastq file (Input files must have same number of records)\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
52 -r, --pe-file2, Input paired-end reverse fastq file\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
53 -o, --output-pe1, Output trimmed forward fastq file\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
54 -p, --output-pe2, Output trimmed reverse fastq file. Must use -s option.\n\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
55 Paired-end interleaved reads\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
56 ----------------------------\n");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
57 fprintf(stderr,"-c, --pe-combo, Combined (interleaved) input paired-end fastq\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
58 -m, --output-combo, Output combined (interleaved) paired-end fastq file. Must use -s option.\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
59 -M, --output-combo-all, Output combined (interleaved) paired-end fastq file with any discarded read written to output file as a single N. Cannot be used with the -s option.\n\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
60 Global options\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
61 --------------\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
62 -t, --qual-type, Type of quality values (solexa (CASAVA < 1.3), illumina (CASAVA 1.3 to 1.7), sanger (which is CASAVA >= 1.8)) (required)\n");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
63 fprintf(stderr, "-s, --output-single, Output trimmed singles fastq file\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
64 -q, --qual-threshold, Threshold for trimming based on average quality in a window. Default 20.\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
65 -l, --length-threshold, Threshold to keep a read based on length after trimming. Default 20.\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
66 -x, --no-fiveprime, Don't do five prime trimming.\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
67 -n, --truncate-n, Truncate sequences at position of first N.\n");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
68
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
69
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
70 fprintf(stderr, "-g, --gzip-output, Output gzipped files.\n--quiet, do not output trimming info\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
71 --help, display this help and exit\n\
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
72 --version, output version information and exit\n\n");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
73
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
74 if (msg) fprintf(stderr, "%s\n\n", msg);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
75 exit(status);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
76 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
77
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
78
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
79 int paired_main(int argc, char *argv[]) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
80
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
81 gzFile pe1 = NULL; /* forward input file handle */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
82 gzFile pe2 = NULL; /* reverse input file handle */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
83 gzFile pec = NULL; /* combined input file handle */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
84 kseq_t *fqrec1 = NULL;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
85 kseq_t *fqrec2 = NULL;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
86 int l1, l2;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
87 FILE *outfile1 = NULL; /* forward output file handle */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
88 FILE *outfile2 = NULL; /* reverse output file handle */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
89 FILE *combo = NULL; /* combined output file handle */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
90 FILE *single = NULL; /* single output file handle */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
91 gzFile outfile1_gzip = NULL;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
92 gzFile outfile2_gzip = NULL;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
93 gzFile combo_gzip = NULL;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
94 gzFile single_gzip = NULL;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
95 int debug = 0;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
96 int optc;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
97 extern char *optarg;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
98 int qualtype = -1;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
99 cutsites *p1cut;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
100 cutsites *p2cut;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
101 char *outfn1 = NULL; /* forward file out name */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
102 char *outfn2 = NULL; /* reverse file out name */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
103 char *outfnc = NULL; /* combined file out name */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
104 char *sfn = NULL; /* single/combined file out name */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
105 char *infn1 = NULL; /* forward input filename */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
106 char *infn2 = NULL; /* reverse input filename */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
107 char *infnc = NULL; /* combined input filename */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
108 int kept_p = 0;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
109 int discard_p = 0;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
110 int kept_s1 = 0;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
111 int kept_s2 = 0;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
112 int discard_s1 = 0;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
113 int discard_s2 = 0;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
114 int quiet = 0;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
115 int no_fiveprime = 0;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
116 int trunc_n = 0;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
117 int gzip_output = 0;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
118 int combo_all=0;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
119 int combo_s=0;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
120 int total=0;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
121
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
122 while (1) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
123 int option_index = 0;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
124 optc = getopt_long(argc, argv, "df:r:c:t:o:p:m:M:s:q:l:xng", paired_long_options, &option_index);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
125
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
126 if (optc == -1)
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
127 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
128
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
129 switch (optc) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
130 if (paired_long_options[option_index].flag != 0)
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
131 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
132
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
133 case 'f':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
134 infn1 = (char *) malloc(strlen(optarg) + 1);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
135 strcpy(infn1, optarg);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
136 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
137
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
138 case 'r':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
139 infn2 = (char *) malloc(strlen(optarg) + 1);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
140 strcpy(infn2, optarg);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
141 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
142
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
143 case 'c':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
144 infnc = (char *) malloc(strlen(optarg) + 1);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
145 strcpy(infnc, optarg);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
146 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
147
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
148 case 't':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
149 if (!strcmp(optarg, "illumina")) qualtype = ILLUMINA;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
150 else if (!strcmp(optarg, "solexa")) qualtype = SOLEXA;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
151 else if (!strcmp(optarg, "sanger")) qualtype = SANGER;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
152 else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
153 fprintf(stderr, "Error: Quality type '%s' is not a valid type.\n", optarg);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
154 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
155 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
156 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
157
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
158 case 'o':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
159 outfn1 = (char *) malloc(strlen(optarg) + 1);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
160 strcpy(outfn1, optarg);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
161 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
162
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
163 case 'p':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
164 outfn2 = (char *) malloc(strlen(optarg) + 1);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
165 strcpy(outfn2, optarg);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
166 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
167
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
168 case 'm':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
169 outfnc = (char *) malloc(strlen(optarg) + 1);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
170 strcpy(outfnc, optarg);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
171 combo_s = 1;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
172 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
173
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
174 case 'M':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
175 outfnc = (char *) malloc(strlen(optarg) + 1);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
176 strcpy(outfnc, optarg);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
177 combo_all = 1;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
178 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
179
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
180 case 's':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
181 sfn = (char *) malloc(strlen(optarg) + 1);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
182 strcpy(sfn, optarg);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
183 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
184
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
185 case 'q':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
186 paired_qual_threshold = atoi(optarg);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
187 if (paired_qual_threshold < 0) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
188 fprintf(stderr, "Quality threshold must be >= 0\n");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
189 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
190 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
191 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
192
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
193 case 'l':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
194 paired_length_threshold = atoi(optarg);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
195 if (paired_length_threshold < 0) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
196 fprintf(stderr, "Length threshold must be >= 0\n");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
197 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
198 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
199 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
200
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
201 case 'x':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
202 no_fiveprime = 1;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
203 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
204
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
205 case 'n':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
206 trunc_n = 1;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
207 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
208
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
209 case 'g':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
210 gzip_output = 1;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
211 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
212
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
213 case 'z':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
214 quiet = 1;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
215 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
216
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
217 case 'd':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
218 debug = 1;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
219 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
220
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
221 case_GETOPT_HELP_CHAR(paired_usage);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
222 case_GETOPT_VERSION_CHAR(PROGRAM_NAME, VERSION, AUTHORS);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
223
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
224 case '?':
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
225 paired_usage(EXIT_FAILURE, NULL);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
226 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
227
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
228 default:
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
229 paired_usage(EXIT_FAILURE, NULL);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
230 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
231 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
232 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
233
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
234 /* required: qualtype */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
235 if (qualtype == -1) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
236 paired_usage(EXIT_FAILURE, "****Error: Quality type is required.");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
237 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
238
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
239 /* make sure minimum input filenames are specified */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
240 if (!infn1 && !infnc) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
241 paired_usage(EXIT_FAILURE, "****Error: Must have either -f OR -c argument.");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
242 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
243
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
244 if (infnc) { /* using combined input file */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
245
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
246 if (infn1 || infn2 || outfn1 || outfn2) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
247 paired_usage(EXIT_FAILURE, "****Error: Cannot have -f, -r, -o, or -p options with -c.");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
248 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
249
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
250 if ((combo_all && combo_s) || (!combo_all && !combo_s)) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
251 paired_usage(EXIT_FAILURE, "****Error: Must have only one of either -m or -M options with -c.");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
252 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
253
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
254 if ((combo_s && !sfn) || (combo_all && sfn)) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
255 paired_usage(EXIT_FAILURE, "****Error: -m option must have -s option, and -M option cannot have -s option.");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
256 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
257
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
258 /* check for duplicate file names */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
259 if (!strcmp(infnc, outfnc) || (combo_s && (!strcmp(infnc, sfn) || !strcmp(outfnc, sfn)))) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
260 fprintf(stderr, "****Error: Duplicate filename between combo input, combo output, and/or single output file names.\n\n");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
261 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
262 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
263
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
264 /* get combined output file */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
265 if (!gzip_output) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
266 combo = fopen(outfnc, "w");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
267 if (!combo) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
268 fprintf(stderr, "****Error: Could not open combo output file '%s'.\n\n", outfnc);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
269 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
270 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
271 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
272 combo_gzip = gzopen(outfnc, "w");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
273 if (!combo_gzip) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
274 fprintf(stderr, "****Error: Could not open combo output file '%s'.\n\n", outfnc);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
275 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
276 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
277 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
278
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
279 pec = gzopen(infnc, "r");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
280 if (!pec) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
281 fprintf(stderr, "****Error: Could not open combined input file '%s'.\n\n", infnc);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
282 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
283 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
284
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
285 } else { /* using forward and reverse input files */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
286
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
287 if (infn1 && (!infn2 || !outfn1 || !outfn2 || !sfn)) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
288 paired_usage(EXIT_FAILURE, "****Error: Using the -f option means you must have the -r, -o, -p, and -s options.");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
289 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
290
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
291 if (infn1 && (infnc || combo_all || combo_s)) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
292 paired_usage(EXIT_FAILURE, "****Error: The -f option cannot be used in combination with -c, -m, or -M.");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
293 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
294
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
295 if (!strcmp(infn1, infn2) || !strcmp(infn1, outfn1) || !strcmp(infn1, outfn2) ||
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
296 !strcmp(infn1, sfn) || !strcmp(infn2, outfn1) || !strcmp(infn2, outfn2) ||
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
297 !strcmp(infn2, sfn) || !strcmp(outfn1, outfn2) || !strcmp(outfn1, sfn) || !strcmp(outfn2, sfn)) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
298
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
299 fprintf(stderr, "****Error: Duplicate input and/or output file names.\n\n");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
300 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
301 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
302
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
303 pe1 = gzopen(infn1, "r");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
304 if (!pe1) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
305 fprintf(stderr, "****Error: Could not open input file '%s'.\n\n", infn1);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
306 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
307 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
308
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
309 pe2 = gzopen(infn2, "r");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
310 if (!pe2) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
311 fprintf(stderr, "****Error: Could not open input file '%s'.\n\n", infn2);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
312 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
313 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
314
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
315 if (!gzip_output) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
316 outfile1 = fopen(outfn1, "w");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
317 if (!outfile1) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
318 fprintf(stderr, "****Error: Could not open output file '%s'.\n\n", outfn1);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
319 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
320 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
321
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
322 outfile2 = fopen(outfn2, "w");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
323 if (!outfile2) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
324 fprintf(stderr, "****Error: Could not open output file '%s'.\n\n", outfn2);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
325 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
326 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
327 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
328 outfile1_gzip = gzopen(outfn1, "w");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
329 if (!outfile1_gzip) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
330 fprintf(stderr, "****Error: Could not open output file '%s'.\n\n", outfn1);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
331 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
332 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
333
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
334 outfile2_gzip = gzopen(outfn2, "w");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
335 if (!outfile2_gzip) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
336 fprintf(stderr, "****Error: Could not open output file '%s'.\n\n", outfn2);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
337 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
338 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
339
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
340 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
341 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
342
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
343 /* get singles output file handle */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
344 if (sfn && !combo_all) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
345 if (!gzip_output) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
346 single = fopen(sfn, "w");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
347 if (!single) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
348 fprintf(stderr, "****Error: Could not open single output file '%s'.\n\n", sfn);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
349 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
350 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
351 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
352 single_gzip = gzopen(sfn, "w");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
353 if (!single_gzip) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
354 fprintf(stderr, "****Error: Could not open single output file '%s'.\n\n", sfn);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
355 return EXIT_FAILURE;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
356 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
357 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
358 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
359
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
360 if (pec) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
361 fqrec1 = kseq_init(pec);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
362 fqrec2 = (kseq_t *) malloc(sizeof(kseq_t));
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
363 fqrec2->f = fqrec1->f;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
364 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
365 fqrec1 = kseq_init(pe1);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
366 fqrec2 = kseq_init(pe2);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
367 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
368
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
369 while ((l1 = kseq_read(fqrec1)) >= 0) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
370
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
371 l2 = kseq_read(fqrec2);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
372 if (l2 < 0) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
373 fprintf(stderr, "Warning: PE file 2 is shorter than PE file 1. Disregarding rest of PE file 1.\n");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
374 break;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
375 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
376
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
377 p1cut = sliding_window(fqrec1, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
378 p2cut = sliding_window(fqrec2, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
379 total += 2;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
380
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
381 if (debug) printf("p1cut: %d,%d\n", p1cut->five_prime_cut, p1cut->three_prime_cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
382 if (debug) printf("p2cut: %d,%d\n", p2cut->five_prime_cut, p2cut->three_prime_cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
383
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
384 /* The sequence and quality print statements below print out the sequence string starting from the 5' cut */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
385 /* and then only print out to the 3' cut, however, we need to adjust the 3' cut */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
386 /* by subtracting the 5' cut because the 3' cut was calculated on the original sequence */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
387
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
388 /* if both sequences passed quality and length filters, then output both records */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
389 if (p1cut->three_prime_cut >= 0 && p2cut->three_prime_cut >= 0) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
390 if (!gzip_output) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
391 if (pec) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
392 print_record (combo, fqrec1, p1cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
393 print_record (combo, fqrec2, p2cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
394 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
395 print_record (outfile1, fqrec1, p1cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
396 print_record (outfile2, fqrec2, p2cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
397 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
398 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
399 if (pec) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
400 print_record_gzip (combo_gzip, fqrec1, p1cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
401 print_record_gzip (combo_gzip, fqrec2, p2cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
402 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
403 print_record_gzip (outfile1_gzip, fqrec1, p1cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
404 print_record_gzip (outfile2_gzip, fqrec2, p2cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
405 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
406 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
407
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
408 kept_p += 2;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
409 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
410
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
411 /* if only one sequence passed filter, then put its record in singles and discard the other */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
412 /* or put an "N" record in if that option was chosen. */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
413 else if (p1cut->three_prime_cut >= 0 && p2cut->three_prime_cut < 0) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
414 if (!gzip_output) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
415 if (combo_all) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
416 print_record (combo, fqrec1, p1cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
417 print_record_N (combo, fqrec2, qualtype);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
418 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
419 print_record (single, fqrec1, p1cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
420 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
421 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
422 if (combo_all) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
423 print_record_gzip (combo_gzip, fqrec1, p1cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
424 print_record_N_gzip (combo_gzip, fqrec2, qualtype);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
425 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
426 print_record_gzip (single_gzip, fqrec1, p1cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
427 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
428 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
429
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
430 kept_s1++;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
431 discard_s2++;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
432 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
433
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
434 else if (p1cut->three_prime_cut < 0 && p2cut->three_prime_cut >= 0) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
435 if (!gzip_output) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
436 if (combo_all) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
437 print_record_N (combo, fqrec1, qualtype);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
438 print_record (combo, fqrec2, p2cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
439 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
440 print_record (single, fqrec2, p2cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
441 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
442 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
443 if (combo_all) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
444 print_record_N_gzip (combo_gzip, fqrec1, qualtype);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
445 print_record_gzip (combo_gzip, fqrec2, p2cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
446 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
447 print_record_gzip (single_gzip, fqrec2, p2cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
448 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
449 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
450
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
451 kept_s2++;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
452 discard_s1++;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
453
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
454 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
455
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
456 /* If both records are to be discarded, but the -M option */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
457 /* is being used, then output two "N" records */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
458 if (combo_all) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
459 if (!gzip_output) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
460 print_record_N (combo, fqrec1, qualtype);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
461 print_record_N (combo, fqrec2, qualtype);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
462 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
463 print_record_N_gzip (combo_gzip, fqrec1, qualtype);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
464 print_record_N_gzip (combo_gzip, fqrec2, qualtype);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
465 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
466 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
467
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
468 discard_p += 2;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
469 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
470
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
471 free(p1cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
472 free(p2cut);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
473 } /* end of while ((l1 = kseq_read (fqrec1)) >= 0) */
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
474
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
475 if (l1 < 0) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
476 l2 = kseq_read(fqrec2);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
477 if (l2 >= 0) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
478 fprintf(stderr, "Warning: PE file 1 is shorter than PE file 2. Disregarding rest of PE file 2.\n");
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
479 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
480 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
481
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
482 if (!quiet) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
483 if (infn1 && infn2) fprintf(stdout, "\nPE forward file: %s\nPE reverse file: %s\n", infn1, infn2);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
484 if (infnc) fprintf(stdout, "\nPE interleaved file: %s\n", infnc);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
485 fprintf(stdout, "\nTotal input FastQ records: %d (%d pairs)\n", total, (total / 2));
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
486 fprintf(stdout, "\nFastQ paired records kept: %d (%d pairs)\n", kept_p, (kept_p / 2));
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
487 if (pec) fprintf(stdout, "FastQ single records kept: %d\n", (kept_s1 + kept_s2));
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
488 else fprintf(stdout, "FastQ single records kept: %d (from PE1: %d, from PE2: %d)\n", (kept_s1 + kept_s2), kept_s1, kept_s2);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
489
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
490 fprintf(stdout, "FastQ paired records discarded: %d (%d pairs)\n", discard_p, (discard_p / 2));
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
491
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
492 if (pec) fprintf(stdout, "FastQ single records discarded: %d\n\n", (discard_s1 + discard_s2));
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
493 else fprintf(stdout, "FastQ single records discarded: %d (from PE1: %d, from PE2: %d)\n\n", (discard_s1 + discard_s2), discard_s1, discard_s2);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
494 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
495
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
496 kseq_destroy(fqrec1);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
497 if (pec) free(fqrec2);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
498 else kseq_destroy(fqrec2);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
499
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
500 if (sfn && !combo_all) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
501 if (!gzip_output) fclose(single);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
502 else gzclose(single_gzip);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
503 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
504
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
505 if (pec) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
506 gzclose(pec);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
507 if (!gzip_output) fclose(combo);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
508 else gzclose(combo_gzip);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
509 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
510 gzclose(pe1);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
511 gzclose(pe2);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
512 if (!gzip_output) {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
513 fclose(outfile1);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
514 fclose(outfile2);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
515 } else {
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
516 gzclose(outfile1_gzip);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
517 gzclose(outfile2_gzip);
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
518 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
519 }
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
520
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
521 return EXIT_SUCCESS;
7939dd56c4b4 Uploaded
nikhil-joshi
parents:
diff changeset
522 } /* end of paired_main() */