comparison srf2fastq/io_lib-1.12.2/progs/hash_tar.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d901c9f41a6a
1 #include <stdio.h>
2 #include <string.h>
3 #include <errno.h>
4 #include <stdlib.h>
5 #include <fcntl.h>
6 #include <io_lib/tar_format.h>
7 #include <io_lib/hash_table.h>
8
9 typedef struct {
10 char member[256];
11 uint64_t pos;
12 uint32_t size;
13 } tar_file;
14
15 void seek_forward(FILE *fp, int size) {
16 if (fp != stdin) {
17 fseek(fp, size, SEEK_CUR);
18 } else {
19 /* Seeking on a pipe isn't supported, even for fwd seeks */
20 char buf[8192];
21 while (size) {
22 size -= fread(buf, 1, size > 8192 ? 8192 : size, fp);
23 }
24 }
25 }
26
27 int main(int argc, char **argv) {
28 int directories = 0;
29 FILE *fp;
30 tar_block blk;
31 char member[256];
32 size_t size, extra;
33 int LongLink = 0;
34 size_t offset = 0;
35 int verbose = 0;
36 HashFile *hf;
37 tar_file *files = NULL;
38 int nfiles = 1024;
39 int fileno = 0;
40 int i;
41 char *header = NULL, *footer = NULL;
42 int found_header, found_footer;
43 int basename = 0;
44 char *archive = NULL;
45 int append_mode = 0;
46 int prepend_mode = 0;
47
48 files = (tar_file *)malloc(nfiles * sizeof(tar_file));
49
50 hf = HashFileCreate(0, HASH_DYNAMIC_SIZE);
51
52 /* process command line arguments of the form -arg */
53 for (argc--, argv++; argc > 0; argc--, argv++) {
54 if (**argv != '-' || strcmp(*argv, "--") == 0)
55 break;
56
57 if (strcmp(*argv, "-a") == 0 && argc > 1) {
58 archive = argv[1];
59 argv++;
60 argc--;
61 }
62
63 if (strcmp(*argv, "-A") == 0)
64 append_mode = 1;
65
66 if (strcmp(*argv, "-O") == 0)
67 prepend_mode = 1;
68
69 if (strcmp(*argv, "-d") == 0)
70 directories = 1;
71
72 if (strcmp(*argv, "-v") == 0)
73 verbose = 1;
74
75 if (strcmp(*argv, "-b") == 0)
76 basename = 1;
77
78 if (strcmp(*argv, "-h") == 0 && argc > 1) {
79 /* Common header */
80 hf->headers = (HashFileSection *)
81 realloc(hf->headers, (hf->nheaders+1) *
82 sizeof(HashFileSection));
83 header = argv[1];
84 hf->nheaders++;
85 argv++;
86 argc--;
87 }
88
89 if (strcmp(*argv, "-f") == 0 && argc > 1) {
90 /* Common footer */
91 hf->footers = (HashFileSection *)
92 realloc(hf->footers, (hf->nfooters+1) *
93 sizeof(HashFileSection));
94 footer = argv[1];
95 hf->nfooters++;
96 argv++;
97 argc--;
98 }
99 }
100
101 if (argc != 1 && !archive) {
102 fprintf(stderr, "Usage: hash_tar [options] [tarfile] > tarfile.hash\n");
103 fprintf(stderr, " -a fname Tar archive filename: use if reading from stdin\n");
104 fprintf(stderr, " -A Force no archive name (eg will concat to archive itself)\n");
105 fprintf(stderr, " -O Set arc. offset to size of hash (use when prepending)\n");
106 fprintf(stderr, " -v Verbose mode\n");
107 fprintf(stderr, " -d Index directory names (useless?)\n");
108 fprintf(stderr, " -h name Set tar entry 'name' to be a file header\n");
109 fprintf(stderr, " -f name Set tar entry 'name' to be a file footer\n");
110 fprintf(stderr, " -b Use only the filename portion of a pathname\n");
111 return 1;
112 }
113
114 /* open the tarfile */
115 if (argc >= 1) {
116 archive = argv[0];
117 if (NULL == (fp = fopen(archive, "rb"))) {
118 perror(archive);
119 return 1;
120 }
121 } else {
122 fp = stdin;
123 if (!archive) {
124 fprintf(stderr, "If reading from stdin you must use the "
125 "\"-a archivename\" option\n");
126 return 1;
127 }
128 }
129
130 /* Fill out the files[] array with the offsets, size and names */
131 while(fread(&blk, sizeof(blk), 1, fp) == 1) {
132 /*
133 * If a directory is too large to fit in the name (>100) but short
134 * enough to fit in the prefix the name field will be empty, this is
135 * not the cas for ordinary files where the name field is always
136 * non-empty
137 */
138 if (!blk.header.name[0] && !blk.header.prefix[0])
139 break;
140
141 /* get size of member, rounded to a multiple of TBLOCK */
142 size = strtoul(blk.header.size, NULL, 8);
143 extra = TBLOCK*((size+TBLOCK-1)/TBLOCK) - size;
144
145 /* skip directories unless requested */
146 if (directories || blk.header.typeflag != DIRTYPE) {
147
148 /*
149 * extract member name (prefix + name), unless last member
150 * was ././@LongLink
151 */
152 if (LongLink == 0) {
153 (void) strncpy(member, blk.header.prefix, 155);
154 if (strlen(blk.header.prefix) > 0 && blk.header.name[0])
155 (void) strcat(member, "/");
156 (void) strncat(member, blk.header.name, 100);
157 }
158
159 /* account for gtar ././@LongLink */
160 if (strcmp(member, "././@LongLink") == 0) {
161 /* still expect filenames to fit into 256 bytes */
162 if (size > 256) {
163 fread(member, 1, size > 256 ? 256 : size, fp);
164 fprintf(stderr,"././@LongLink too long size=%ld\n",
165 (long)size);
166 fprintf(stderr,"%s...\n", member);
167 exit(1);
168 }
169 /*
170 * extract full name of next member then rewind to start
171 * of header
172 */
173 fread(member, 1, size > 256 ? 256 : size, fp);
174 fseek(fp, -size, SEEK_CUR);
175 LongLink = 1;
176 } else {
177 /* output offset, member name */
178 /* printf("%lu %.256s\n", (long)offset, member); */
179 LongLink = 0;
180
181 if (fileno >= nfiles) {
182 nfiles *= 2;
183 files = (tar_file *)realloc(files,nfiles*sizeof(tar_file));
184 }
185 if (basename) {
186 char *cp = strrchr(member, '/');
187 strcpy(files[fileno].member, cp ? cp+1 : member);
188 } else {
189 strcpy(files[fileno].member, member);
190 }
191 files[fileno].pos = offset+sizeof(blk);
192 files[fileno].size = size;
193 if (verbose)
194 fprintf(stderr, "File %d: pos %010ld+%06d: %s\n",
195 fileno,
196 (long)files[fileno].pos,
197 files[fileno].size,
198 files[fileno].member);
199
200 fileno++;
201 }
202 }
203
204 /* increment offset */
205 size += extra;
206 seek_forward(fp, size);
207 offset += sizeof(blk) + size;
208 }
209
210 /*
211 * Find the header/footer if specified. For now we only support one of
212 * each.
213 */
214 found_header = found_footer = 0;
215 for (i = 0; i < fileno; i++) {
216 if (header && strncmp(header, files[i].member, 256) == 0) {
217 hf->headers[0].pos = files[i].pos;
218 hf->headers[0].size = files[i].size;
219 hf->headers[0].cached_data = NULL;
220 found_header++;
221 }
222 if (footer && strncmp(footer, files[i].member, 256) == 0) {
223 hf->footers[0].pos = files[i].pos;
224 hf->footers[0].size = files[i].size;
225 hf->footers[0].cached_data = NULL;
226 found_footer++;
227 }
228 }
229 if (header && !found_header) {
230 fprintf(stderr, "Warning: could not find header '%s' in file\n",
231 header);
232 hf->nheaders = 0;
233 }
234 if (footer && !found_footer) {
235 fprintf(stderr, "Warning: could not find footer '%s' in file\n",
236 footer);
237 hf->nfooters = 0;
238 }
239
240 /*
241 * Construct the hash
242 */
243 for (i = 0; i < fileno; i++) {
244 HashData hd;
245 HashFileItem *hfi = (HashFileItem *)calloc(1, sizeof(*hfi));
246
247 /* Just use the last head/foot defined as we only allow 1 at the mo. */
248 hfi->header = hf->nheaders;
249 hfi->footer = hf->nfooters;
250 hfi->pos = files[i].pos;
251 hfi->size = files[i].size;
252 hd.p = hfi;
253 HashTableAdd(hf->h, files[i].member, strlen(files[i].member),
254 hd, NULL);
255 }
256
257 fclose(fp);
258
259 HashTableStats(hf->h, stderr);
260 if (!append_mode)
261 hf->archive = strdup(archive);
262
263 #ifdef _WIN32
264 _setmode(_fileno(stdout), _O_BINARY);
265 #endif
266 HashFileSave(hf, stdout, prepend_mode ? HASHFILE_PREPEND : 0);
267 HashFileDestroy(hf);
268
269 free(files);
270
271 return 0;
272 }