annotate srf2fastq/io_lib-1.12.2/progs/hash_sff.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
1 /*
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
2 * This adds a hash table index (".hsh" v1.00 format) to an SFF archive.
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
3 * It does this either inline on the file itself (provided it doesn't already
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
4 * have an index) or by producing a new indexed SFF archive.
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
5 *
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
6 * It has been coded to require only the memory needed to store the index
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
7 * and so does quite a lot of I/O but with minimised memory. For a 460,000
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
8 * SFF archive it took about 22 seconds real time on a 1.7GHz P4 when copying
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
9 * or 10 seconds when updating inline.
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
10 */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
11
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
12 /* ---------------------------------------------------------------------- */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
13
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
14 #include <stdio.h>
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
15 #include <string.h>
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
16 #include <errno.h>
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
17 #include <stdlib.h>
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
18 #include <io_lib/hash_table.h>
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
19 #include <io_lib/sff.h>
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
20 #include <io_lib/os.h>
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
21 #include <io_lib/mFILE.h>
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
22
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
23 /*
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
24 * Override the sff.c functions to use FILE pointers instead. This means
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
25 * we don't have to load the entire archive into memory, which is optimal when
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
26 * dealing with a single file (ie in sff/sff.c), but not when indexing it.
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
27 *
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
28 * Done with minimal error checking I'll admit...
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
29 */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
30 static sff_read_header *fread_sff_read_header(FILE *fp) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
31 sff_read_header *h;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
32 unsigned char rhdr[16];
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
33
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
34 if (16 != fread(rhdr, 1, 16, fp))
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
35 return NULL;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
36 h = decode_sff_read_header(rhdr);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
37
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
38 if (h->name_len != fread(h->name, 1, h->name_len, fp))
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
39 return free_sff_read_header(h), NULL;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
40
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
41 /* Pad to 8 chars */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
42 fseek(fp, (ftell(fp) + 7)& ~7, SEEK_SET);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
43
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
44 return h;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
45 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
46
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
47 static sff_common_header *fread_sff_common_header(FILE *fp) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
48 sff_common_header *h;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
49 unsigned char chdr[31];
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
50
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
51 if (31 != fread(chdr, 1, 31, fp))
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
52 return NULL;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
53 h = decode_sff_common_header(chdr);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
54 if (h->flow_len != fread(h->flow, 1, h->flow_len, fp))
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
55 return free_sff_common_header(h), NULL;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
56 if (h->key_len != fread(h->key , 1, h->key_len, fp))
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
57 return free_sff_common_header(h), NULL;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
58
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
59 /* Pad to 8 chars */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
60 fseek(fp, (ftell(fp) + 7)& ~7, SEEK_SET);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
61
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
62 return h;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
63 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
64
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
65 void usage(void) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
66 fprintf(stderr, "Usage: hash_sff [-o outfile] [-t] sff_file ...\n");
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
67 exit(1);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
68 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
69
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
70 int main(int argc, char **argv) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
71 HashFile *hf;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
72 sff_common_header *ch;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
73 sff_read_header *rh;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
74 int i, dot, arg;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
75 char *sff;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
76 char hdr[31];
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
77 uint64_t index_offset = 0;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
78 uint32_t index_size, index_skipped;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
79 FILE *fp, *fpout = NULL;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
80 int copy_archive = 1;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
81
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
82
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
83 /* process command line arguments of the form -arg */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
84 for (argc--, argv++; argc > 0; argc--, argv++) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
85 if (**argv != '-' || strcmp(*argv, "--") == 0)
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
86 break;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
87
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
88 if (strcmp(*argv, "-o") == 0 && argc > 1) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
89 if (NULL == (fpout = fopen(argv[1], "wb+"))) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
90 perror(argv[1]);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
91 return 1;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
92 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
93 argv++;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
94 argc--;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
95
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
96 } else if (strcmp(*argv, "-t") == 0) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
97 copy_archive = 0;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
98
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
99 } else if (**argv == '-') {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
100 usage();
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
101 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
102
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
103 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
104
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
105 if (argc < 1)
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
106 usage();
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
107
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
108 if (copy_archive == 0 && argc != 1) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
109 fprintf(stderr, "-t option only supported with a single sff argument\n");
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
110 return 1;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
111 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
112
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
113 /* Create the hash table */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
114 hf = HashFileCreate(0, HASH_DYNAMIC_SIZE);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
115 hf->nheaders = 0;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
116 hf->headers = NULL;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
117
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
118 for (arg = 0; arg < argc; arg++) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
119 /* open (and read) the entire sff file */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
120 sff = argv[arg];
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
121
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
122 printf("Indexing %s:\n", sff);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
123 if (fpout) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
124 if (NULL == (fp = fopen(sff, "rb"))) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
125 perror(sff);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
126 return 1;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
127 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
128 } else {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
129 if (NULL == (fp = fopen(sff, "rb+"))) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
130 perror(sff);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
131 return 1;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
132 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
133 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
134
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
135 /* Read the common header */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
136 ch = fread_sff_common_header(fp);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
137
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
138 if (ch->index_len && !fpout) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
139 fprintf(stderr, "Archive already contains index.\nReplacing the"
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
140 " index requires the \"-o outfile\" option.\n");
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
141 return 1;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
142 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
143
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
144 /* Add the SFF common header as a hash file-header */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
145 hf->nheaders++;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
146 hf->headers = (HashFileSection *)realloc(hf->headers, hf->nheaders *
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
147 sizeof(*hf->headers));
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
148 hf->headers[hf->nheaders-1].pos = 0;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
149 hf->headers[hf->nheaders-1].size = ch->header_len;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
150 hf->headers[hf->nheaders-1].cached_data = NULL;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
151
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
152 /* Read the index items, adding to the hash */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
153 index_skipped = 0;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
154 dot = 0;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
155 printf(" |\r|");
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
156 for (i = 0; i < ch->nreads; i++) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
157 int dlen;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
158 uint32_t offset;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
159 HashData hd;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
160 HashFileItem *hfi;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
161
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
162 if (i >= dot * (ch->nreads/69)) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
163 putchar('.');
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
164 fflush(stdout);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
165 dot++;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
166 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
167
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
168 /* Skip old index if present */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
169 offset = ftell(fp);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
170 if (offset == ch->index_offset) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
171 fseek(fp, ch->index_len, SEEK_CUR);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
172 index_skipped = ch->index_len;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
173 continue;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
174 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
175
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
176 hfi = (HashFileItem *)calloc(1, sizeof(*hfi));
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
177 rh = fread_sff_read_header(fp);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
178 dlen = (2*ch->flow_len + 3*rh->nbases + 7) & ~7;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
179 fseek(fp, dlen, SEEK_CUR);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
180
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
181 hfi->header = hf->nheaders;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
182 hfi->footer = 0;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
183 hfi->pos = offset - index_skipped;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
184 hfi->size = (ftell(fp) - index_skipped) - hfi->pos;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
185 hd.p = hfi;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
186
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
187 HashTableAdd(hf->h, rh->name, rh->name_len, hd, NULL);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
188 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
189 printf("\n");
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
190 HashTableStats(hf->h, stdout);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
191
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
192 index_offset = ftell(fp) - index_skipped;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
193
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
194 /* Copy the archive if needed, minus the old index */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
195 if (fpout && copy_archive) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
196 char block[8192];
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
197 size_t len;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
198 uint64_t pos = 0;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
199
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
200 printf("\nCopying archive\n");
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
201
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
202 fseek(fp, 0, SEEK_SET);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
203 while (len = fread(block, 1, 8192, fp)) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
204 /* Skip previous index */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
205 if (pos < ch->index_offset && pos+len > ch->index_offset) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
206 len = ch->index_offset - pos;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
207 fseek(fp, ch->index_offset + ch->index_len, SEEK_SET);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
208 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
209 if (len && len != fwrite(block, 1, len, fpout)) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
210 fprintf(stderr, "Failed to output new archive\n");
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
211 return 1;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
212 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
213 pos += len;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
214 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
215 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
216
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
217 if (!fpout) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
218 /* Save the hash */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
219 printf("Saving index\n");
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
220 fseek(fp, 0, SEEK_END);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
221 index_size = HashFileSave(hf, fp, 0);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
222 HashFileDestroy(hf);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
223
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
224 /* Update the common header */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
225 fseek(fp, 0, SEEK_SET);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
226 fread(hdr, 1, 31, fp);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
227 *(uint64_t *)(hdr+8) = be_int8(index_offset);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
228 *(uint32_t *)(hdr+16) = be_int4(index_size);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
229 fseek(fp, 0, SEEK_SET);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
230 fwrite(hdr, 1, 31, fp);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
231 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
232
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
233 fclose(fp);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
234 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
235
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
236 if (fpout) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
237 /* Save the hash */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
238 printf("Saving index\n");
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
239
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
240 if (!copy_archive) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
241 hf->archive = strdup(argv[0]);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
242 index_offset = 0;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
243 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
244
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
245 fseek(fpout, 0, SEEK_END);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
246 index_size = HashFileSave(hf, fpout, 0);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
247 HashFileDestroy(hf);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
248
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
249 /* Update the common header to indicate index location */
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
250 if (copy_archive) {
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
251 fseek(fpout, 0, SEEK_SET);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
252 fread(hdr, 1, 31, fpout);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
253 *(uint64_t *)(hdr+8) = be_int8(index_offset);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
254 *(uint32_t *)(hdr+16) = be_int4(index_size);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
255 fseek(fpout, 0, SEEK_SET);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
256 fwrite(hdr, 1, 31, fpout);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
257 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
258 fclose(fpout);
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
259 }
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
260
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
261 return 0;
d901c9f41a6a Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
dawe
parents:
diff changeset
262 }