0
|
1 /* The MIT License
|
|
2
|
|
3 Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
|
|
4
|
|
5 Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6 of this software and associated documentation files (the "Software"), to deal
|
|
7 in the Software without restriction, including without limitation the rights
|
|
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9 copies of the Software, and to permit persons to whom the Software is
|
|
10 furnished to do so, subject to the following conditions:
|
|
11
|
|
12 The above copyright notice and this permission notice shall be included in
|
|
13 all copies or substantial portions of the Software.
|
|
14
|
|
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21 THE SOFTWARE.
|
|
22 */
|
|
23
|
|
24 #include <stdlib.h>
|
|
25 #include <string.h>
|
|
26 #include <stdio.h>
|
|
27 #include <fcntl.h>
|
|
28 #include <unistd.h>
|
|
29 #include <errno.h>
|
|
30 #include "bgzf.h"
|
|
31
|
|
32 static const int WINDOW_SIZE = 64 * 1024;
|
|
33
|
|
34 static int bgzip_main_usage()
|
|
35 {
|
|
36 printf("\n");
|
|
37 printf("Usage: bgzip [options] [file] ...\n\n");
|
|
38 printf("Options: -c write on standard output, keep original files unchanged\n");
|
|
39 printf(" -d decompress\n");
|
|
40 // printf(" -l list compressed file contents\n");
|
|
41 printf(" -b INT decompress at virtual file pointer INT\n");
|
|
42 printf(" -s INT decompress INT bytes in the uncompressed file\n");
|
|
43 printf(" -h give this help\n");
|
|
44 printf("\n");
|
|
45 return 0;
|
|
46 }
|
|
47
|
|
48 static int write_open(const char *fn, int is_forced)
|
|
49 {
|
|
50 int fd = -1;
|
|
51 char c;
|
|
52 if (!is_forced) {
|
|
53 if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0644)) < 0 && errno == EEXIST) {
|
|
54 printf("bgzip: %s already exists; do you wish to overwrite (y or n)? ", fn);
|
|
55 scanf("%c", &c);
|
|
56 if (c != 'Y' && c != 'y') {
|
|
57 printf("bgzip: not overwritten\n");
|
|
58 exit(1);
|
|
59 }
|
|
60 }
|
|
61 }
|
|
62 if (fd < 0) {
|
|
63 if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0) {
|
|
64 fprintf(stderr, "bgzip: %s: Fail to write\n", fn);
|
|
65 exit(1);
|
|
66 }
|
|
67 }
|
|
68 return fd;
|
|
69 }
|
|
70
|
|
71 static
|
|
72 void
|
|
73 fail(BGZF* fp)
|
|
74 {
|
|
75 printf("Error: %s\n", fp->error);
|
|
76 exit(1);
|
|
77 }
|
|
78
|
|
79 int main(int argc, char **argv)
|
|
80 {
|
|
81 int c, compress, pstdout, is_forced;
|
|
82 BGZF *rz;
|
|
83 void *buffer;
|
|
84 long start, end, size;
|
|
85
|
|
86 compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
|
|
87 while((c = getopt(argc, argv, "cdlhfb:s:")) >= 0){
|
|
88 switch(c){
|
|
89 case 'h': return bgzip_main_usage();
|
|
90 case 'd': compress = 0; break;
|
|
91 case 'c': pstdout = 1; break;
|
|
92 // case 'l': compress = 2; break;
|
|
93 case 'b': start = atol(optarg); break;
|
|
94 case 's': size = atol(optarg); break;
|
|
95 case 'f': is_forced = 1; break;
|
|
96 }
|
|
97 }
|
|
98 if (size >= 0) end = start + size;
|
|
99 if(end >= 0 && end < start){
|
|
100 fprintf(stderr, " -- Illegal region: [%ld, %ld] --\n", start, end);
|
|
101 return 1;
|
|
102 }
|
|
103 if(compress == 1){
|
|
104 int f_src, f_dst = -1;
|
|
105 if(argc > optind){
|
|
106 if((f_src = open(argv[optind], O_RDONLY)) < 0){
|
|
107 fprintf(stderr, " -- Cannot open file: %s --\n", argv[optind]);
|
|
108 return 1;
|
|
109 }
|
|
110 if(pstdout){
|
|
111 f_dst = fileno(stdout);
|
|
112 } else {
|
|
113 char *name = malloc(sizeof(strlen(argv[optind]) + 5));
|
|
114 strcpy(name, argv[optind]);
|
|
115 strcat(name, ".gz");
|
|
116 f_dst = write_open(name, is_forced);
|
|
117 if (f_dst < 0) return 1;
|
|
118 free(name);
|
|
119 }
|
|
120 } else if(pstdout){
|
|
121 f_src = fileno(stdin);
|
|
122 f_dst = fileno(stdout);
|
|
123 } else return bgzip_main_usage();
|
|
124 rz = bgzf_fdopen(f_dst, "w");
|
|
125 buffer = malloc(WINDOW_SIZE);
|
|
126 while((c = read(f_src, buffer, WINDOW_SIZE)) > 0) {
|
|
127 if (bgzf_write(rz, buffer, c) < 0) {
|
|
128 fail(rz);
|
|
129 }
|
|
130 }
|
|
131 // f_dst will be closed here
|
|
132 if (bgzf_close(rz) < 0) {
|
|
133 fail(rz);
|
|
134 }
|
|
135 if (argc > optind) unlink(argv[optind]);
|
|
136 free(buffer);
|
|
137 close(f_src);
|
|
138 return 0;
|
|
139 } else {
|
|
140 if(argc <= optind) return bgzip_main_usage();
|
|
141 int f_dst;
|
|
142 if (argc > optind && !pstdout) {
|
|
143 char *name;
|
|
144 if (strstr(argv[optind], ".gz") - argv[optind] != strlen(argv[optind]) - 3) {
|
|
145 printf("bgzip: %s: unknown suffix -- ignored\n", argv[optind]);
|
|
146 return 1;
|
|
147 }
|
|
148 name = strdup(argv[optind]);
|
|
149 name[strlen(name) - 3] = '\0';
|
|
150 f_dst = write_open(name, is_forced);
|
|
151 free(name);
|
|
152 } else f_dst = fileno(stdout);
|
|
153 rz = bgzf_open(argv[optind], "r");
|
|
154 if (rz == NULL) {
|
|
155 printf("Could not open file: %s\n", argv[optind]);
|
|
156 return 1;
|
|
157 }
|
|
158 buffer = malloc(WINDOW_SIZE);
|
|
159 if (bgzf_seek(rz, start, SEEK_SET) < 0) {
|
|
160 fail(rz);
|
|
161 }
|
|
162 while(1){
|
|
163 if(end < 0) c = bgzf_read(rz, buffer, WINDOW_SIZE);
|
|
164 else c = bgzf_read(rz, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
|
|
165 if(c == 0) break;
|
|
166 if (c < 0) fail(rz);
|
|
167 start += c;
|
|
168 write(f_dst, buffer, c);
|
|
169 if(end >= 0 && start >= end) break;
|
|
170 }
|
|
171 free(buffer);
|
|
172 if (bgzf_close(rz) < 0) {
|
|
173 fail(rz);
|
|
174 }
|
|
175 if (!pstdout) unlink(argv[optind]);
|
|
176 return 0;
|
|
177 }
|
|
178 }
|
|
179
|