diff srf2fastq/io_lib-1.12.2/progs/hash_tar.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/srf2fastq/io_lib-1.12.2/progs/hash_tar.c	Tue Jun 07 17:48:05 2011 -0400
@@ -0,0 +1,272 @@
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <io_lib/tar_format.h>
+#include <io_lib/hash_table.h>
+
+typedef struct {
+    char member[256];
+    uint64_t pos;
+    uint32_t size;
+} tar_file;
+
+void seek_forward(FILE *fp, int size) {
+    if (fp != stdin) {
+	fseek(fp, size, SEEK_CUR);
+    } else {
+	/* Seeking on a pipe isn't supported, even for fwd seeks */
+	char buf[8192];
+	while (size) {
+	    size -= fread(buf, 1, size > 8192 ? 8192 : size, fp);
+	}
+    }
+}
+
+int main(int argc, char **argv) {
+    int directories = 0;
+    FILE *fp;
+    tar_block blk;
+    char member[256];
+    size_t size, extra;
+    int LongLink = 0;
+    size_t offset = 0;
+    int verbose = 0;
+    HashFile *hf;
+    tar_file *files = NULL;
+    int nfiles = 1024;
+    int fileno = 0;
+    int i;
+    char *header = NULL, *footer = NULL;
+    int found_header, found_footer;
+    int basename = 0;
+    char *archive = NULL;
+    int append_mode = 0;
+    int prepend_mode = 0;
+
+    files = (tar_file *)malloc(nfiles * sizeof(tar_file));
+
+    hf = HashFileCreate(0, HASH_DYNAMIC_SIZE);
+
+    /* process command line arguments of the form -arg */
+    for (argc--, argv++; argc > 0; argc--, argv++) {
+	if (**argv != '-' || strcmp(*argv, "--") == 0)
+	    break;
+
+	if (strcmp(*argv, "-a") == 0 && argc > 1) {
+	    archive = argv[1];
+	    argv++;
+	    argc--;
+	}
+
+	if (strcmp(*argv, "-A") == 0)
+	    append_mode = 1;
+
+	if (strcmp(*argv, "-O") == 0)
+	    prepend_mode = 1;
+
+	if (strcmp(*argv, "-d") == 0)
+	    directories = 1;
+
+	if (strcmp(*argv, "-v") == 0)
+	    verbose = 1;
+
+	if (strcmp(*argv, "-b") == 0)
+	    basename = 1;
+
+	if (strcmp(*argv, "-h") == 0 && argc > 1) {
+	    /* Common header */
+	    hf->headers = (HashFileSection *)
+		realloc(hf->headers, (hf->nheaders+1) *
+			sizeof(HashFileSection));
+	    header = argv[1];
+	    hf->nheaders++;
+	    argv++;
+	    argc--;
+	}
+
+	if (strcmp(*argv, "-f") == 0 && argc > 1) {
+	    /* Common footer */
+	    hf->footers = (HashFileSection *)
+		realloc(hf->footers, (hf->nfooters+1) *
+			sizeof(HashFileSection));
+	    footer = argv[1];
+	    hf->nfooters++;
+	    argv++;
+	    argc--;
+	}
+    }
+
+    if (argc != 1 && !archive) {
+	fprintf(stderr, "Usage: hash_tar [options] [tarfile] > tarfile.hash\n");
+	fprintf(stderr, "    -a fname  Tar archive filename: use if reading from stdin\n");
+	fprintf(stderr, "    -A        Force no archive name (eg will concat to archive itself)\n");
+	fprintf(stderr, "    -O        Set arc. offset to size of hash (use when prepending)\n");
+	fprintf(stderr, "    -v        Verbose mode\n");
+	fprintf(stderr, "    -d        Index directory names (useless?)\n");
+	fprintf(stderr, "    -h name   Set tar entry 'name' to be a file header\n");
+	fprintf(stderr, "    -f name   Set tar entry 'name' to be a file footer\n");
+	fprintf(stderr, "    -b        Use only the filename portion of a pathname\n");
+	return 1;
+    }
+
+    /* open the tarfile */
+    if (argc >= 1) {
+	archive = argv[0];
+	if (NULL == (fp = fopen(archive, "rb"))) {
+	    perror(archive);
+	    return 1;
+	}
+    } else {
+	fp = stdin;
+	if (!archive) {
+	    fprintf(stderr, "If reading from stdin you must use the "
+		    "\"-a archivename\" option\n");
+	    return 1;
+	}
+    }
+
+    /* Fill out the files[] array with the offsets, size and names */
+    while(fread(&blk, sizeof(blk), 1, fp) == 1) {
+	/*
+	 * If a directory is too large to fit in the name (>100) but short
+	 * enough to fit in the prefix the name field will be empty, this is
+	 * not the cas for ordinary files where the name field is always
+	 * non-empty
+	 */
+	if (!blk.header.name[0] && !blk.header.prefix[0])
+	    break;
+
+        /* get size of member, rounded to a multiple of TBLOCK */
+	size = strtoul(blk.header.size, NULL, 8);
+        extra = TBLOCK*((size+TBLOCK-1)/TBLOCK) - size;
+
+        /* skip directories unless requested */
+        if (directories || blk.header.typeflag != DIRTYPE) {
+
+            /*
+	     * extract member name (prefix + name), unless last member
+	     * was ././@LongLink
+	     */
+            if (LongLink == 0) {
+                (void) strncpy(member, blk.header.prefix, 155);
+	        if (strlen(blk.header.prefix) > 0 && blk.header.name[0])
+		    (void) strcat(member, "/");
+    	        (void) strncat(member, blk.header.name, 100);
+            }
+            
+            /* account for gtar ././@LongLink */
+            if (strcmp(member, "././@LongLink") == 0) {
+                /* still expect filenames to fit into 256 bytes */
+                if (size > 256) {
+                    fread(member, 1, size > 256 ? 256 : size, fp);
+                    fprintf(stderr,"././@LongLink too long size=%ld\n",
+			    (long)size);
+                    fprintf(stderr,"%s...\n", member);
+                    exit(1);
+                }
+                /*
+		 * extract full name of next member then rewind to start
+		 * of header
+		 */
+                fread(member, 1, size > 256 ? 256 : size, fp);
+                fseek(fp, -size, SEEK_CUR);
+                LongLink = 1;
+            } else {
+                /* output offset, member name */
+                /* printf("%lu %.256s\n", (long)offset, member); */
+                LongLink = 0;
+
+		if (fileno >= nfiles) {
+		    nfiles *= 2;
+		    files = (tar_file *)realloc(files,nfiles*sizeof(tar_file));
+		}
+		if (basename) {
+		    char *cp = strrchr(member, '/');
+		    strcpy(files[fileno].member, cp ? cp+1 : member);
+		} else {
+		    strcpy(files[fileno].member, member);
+		}
+		files[fileno].pos = offset+sizeof(blk);
+		files[fileno].size = size;
+		if (verbose)
+		    fprintf(stderr, "File %d: pos %010ld+%06d: %s\n",
+			    fileno,
+			    (long)files[fileno].pos,
+			    files[fileno].size,
+			    files[fileno].member);
+
+		fileno++;
+            }
+        }
+
+        /* increment offset */
+        size += extra;
+	seek_forward(fp, size);
+        offset += sizeof(blk) + size;
+    }
+   
+    /*
+     * Find the header/footer if specified. For now we only support one of
+     * each.
+     */
+    found_header = found_footer = 0;
+    for (i = 0; i < fileno; i++) {
+	if (header && strncmp(header, files[i].member, 256) == 0) {
+	    hf->headers[0].pos  = files[i].pos;
+	    hf->headers[0].size = files[i].size;
+	    hf->headers[0].cached_data = NULL;
+	    found_header++;
+	}
+	if (footer && strncmp(footer, files[i].member, 256) == 0) {
+	    hf->footers[0].pos  = files[i].pos;
+	    hf->footers[0].size = files[i].size;
+	    hf->footers[0].cached_data = NULL;
+	    found_footer++;
+	}
+    }
+    if (header && !found_header) {
+	fprintf(stderr, "Warning: could not find header '%s' in file\n",
+		header);
+	hf->nheaders = 0;
+    }
+    if (footer && !found_footer) {
+	fprintf(stderr, "Warning: could not find footer '%s' in file\n",
+		footer);
+	hf->nfooters = 0;
+    }
+
+    /*
+     * Construct the hash
+     */
+    for (i = 0; i < fileno; i++) {
+	HashData hd;
+	HashFileItem *hfi = (HashFileItem *)calloc(1, sizeof(*hfi));
+
+	/* Just use the last head/foot defined as we only allow 1 at the mo. */
+	hfi->header = hf->nheaders;
+	hfi->footer = hf->nfooters;
+	hfi->pos = files[i].pos;
+	hfi->size = files[i].size;
+	hd.p = hfi;
+	HashTableAdd(hf->h, files[i].member, strlen(files[i].member),
+		     hd, NULL);
+    }
+
+    fclose(fp);
+   
+    HashTableStats(hf->h, stderr);
+    if (!append_mode)
+	hf->archive = strdup(archive);
+	
+#ifdef _WIN32
+    _setmode(_fileno(stdout), _O_BINARY);
+#endif
+    HashFileSave(hf, stdout, prepend_mode ? HASHFILE_PREPEND : 0);
+    HashFileDestroy(hf);
+
+    free(files);
+
+    return 0;
+}