0
|
1 #include <stdarg.h>
|
|
2 #include <stdio.h>
|
|
3 #include <ctype.h>
|
|
4 #include <string.h>
|
|
5 #include <stdint.h>
|
|
6 #include "kstring.h"
|
|
7
|
|
8 int ksprintf(kstring_t *s, const char *fmt, ...)
|
|
9 {
|
|
10 va_list ap;
|
|
11 int l;
|
|
12 va_start(ap, fmt);
|
|
13 l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); // This line does not work with glibc 2.0. See `man snprintf'.
|
|
14 va_end(ap);
|
|
15 if (l + 1 > s->m - s->l) {
|
|
16 s->m = s->l + l + 2;
|
|
17 kroundup32(s->m);
|
|
18 s->s = (char*)realloc(s->s, s->m);
|
|
19 va_start(ap, fmt);
|
|
20 l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap);
|
|
21 }
|
|
22 va_end(ap);
|
|
23 s->l += l;
|
|
24 return l;
|
|
25 }
|
|
26
|
|
27 // s MUST BE a null terminated string; l = strlen(s)
|
|
28 int ksplit_core(char *s, int delimiter, int *_max, int **_offsets)
|
|
29 {
|
|
30 int i, n, max, last_char, last_start, *offsets, l;
|
|
31 n = 0; max = *_max; offsets = *_offsets;
|
|
32 l = strlen(s);
|
|
33
|
|
34 #define __ksplit_aux do { \
|
|
35 if (_offsets) { \
|
|
36 s[i] = 0; \
|
|
37 if (n == max) { \
|
|
38 max = max? max<<1 : 2; \
|
|
39 offsets = (int*)realloc(offsets, sizeof(int) * max); \
|
|
40 } \
|
|
41 offsets[n++] = last_start; \
|
|
42 } else ++n; \
|
|
43 } while (0)
|
|
44
|
|
45 for (i = 0, last_char = last_start = 0; i <= l; ++i) {
|
|
46 if (delimiter == 0) {
|
|
47 if (isspace(s[i]) || s[i] == 0) {
|
|
48 if (isgraph(last_char)) __ksplit_aux; // the end of a field
|
|
49 } else {
|
|
50 if (isspace(last_char) || last_char == 0) last_start = i;
|
|
51 }
|
|
52 } else {
|
|
53 if (s[i] == delimiter || s[i] == 0) {
|
|
54 if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field
|
|
55 } else {
|
|
56 if (last_char == delimiter || last_char == 0) last_start = i;
|
|
57 }
|
|
58 }
|
|
59 last_char = s[i];
|
|
60 }
|
|
61 *_max = max; *_offsets = offsets;
|
|
62 return n;
|
|
63 }
|
|
64
|
|
65 /**********************
|
|
66 * Boyer-Moore search *
|
|
67 **********************/
|
|
68
|
|
69 // reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html
|
|
70 int *ksBM_prep(const uint8_t *pat, int m)
|
|
71 {
|
|
72 int i, *suff, *prep, *bmGs, *bmBc;
|
|
73 prep = calloc(m + 256, 1);
|
|
74 bmGs = prep; bmBc = prep + m;
|
|
75 { // preBmBc()
|
|
76 for (i = 0; i < 256; ++i) bmBc[i] = m;
|
|
77 for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1;
|
|
78 }
|
|
79 suff = calloc(m, sizeof(int));
|
|
80 { // suffixes()
|
|
81 int f = 0, g;
|
|
82 suff[m - 1] = m;
|
|
83 g = m - 1;
|
|
84 for (i = m - 2; i >= 0; --i) {
|
|
85 if (i > g && suff[i + m - 1 - f] < i - g)
|
|
86 suff[i] = suff[i + m - 1 - f];
|
|
87 else {
|
|
88 if (i < g) g = i;
|
|
89 f = i;
|
|
90 while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g;
|
|
91 suff[i] = f - g;
|
|
92 }
|
|
93 }
|
|
94 }
|
|
95 { // preBmGs()
|
|
96 int j = 0;
|
|
97 for (i = 0; i < m; ++i) bmGs[i] = m;
|
|
98 for (i = m - 1; i >= 0; --i)
|
|
99 if (suff[i] == i + 1)
|
|
100 for (; j < m - 1 - i; ++j)
|
|
101 if (bmGs[j] == m)
|
|
102 bmGs[j] = m - 1 - i;
|
|
103 for (i = 0; i <= m - 2; ++i)
|
|
104 bmGs[m - 1 - suff[i]] = m - 1 - i;
|
|
105 }
|
|
106 free(suff);
|
|
107 return prep;
|
|
108 }
|
|
109
|
|
110 int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches)
|
|
111 {
|
|
112 int i, j, *prep, *bmGs, *bmBc;
|
|
113 int *matches = 0, mm = 0, nm = 0;
|
|
114 prep = _prep? _prep : ksBM_prep(pat, m);
|
|
115 bmGs = prep; bmBc = prep + m;
|
|
116 j = 0;
|
|
117 while (j <= n - m) {
|
|
118 for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i);
|
|
119 if (i < 0) {
|
|
120 if (nm == mm) {
|
|
121 mm = mm? mm<<1 : 1;
|
|
122 matches = realloc(matches, mm * sizeof(int));
|
|
123 }
|
|
124 matches[nm++] = j;
|
|
125 j += bmGs[0];
|
|
126 } else {
|
|
127 int max = bmBc[str[i+j]] - m + 1 + i;
|
|
128 if (max < bmGs[i]) max = bmGs[i];
|
|
129 j += max;
|
|
130 }
|
|
131 }
|
|
132 *n_matches = nm;
|
|
133 if (_prep == 0) free(prep);
|
|
134 return matches;
|
|
135 }
|
|
136
|
|
137 #ifdef KSTRING_MAIN
|
|
138 #include <stdio.h>
|
|
139 int main()
|
|
140 {
|
|
141 kstring_t *s;
|
|
142 int *fields, n, i;
|
|
143 s = (kstring_t*)calloc(1, sizeof(kstring_t));
|
|
144 // test ksprintf()
|
|
145 ksprintf(s, " abcdefg: %d ", 100);
|
|
146 printf("'%s'\n", s->s);
|
|
147 // test ksplit()
|
|
148 fields = ksplit(s, 0, &n);
|
|
149 for (i = 0; i < n; ++i)
|
|
150 printf("field[%d] = '%s'\n", i, s->s + fields[i]);
|
|
151 free(s);
|
|
152
|
|
153 {
|
|
154 static char *str = "abcdefgcdg";
|
|
155 static char *pat = "cd";
|
|
156 int n, *matches;
|
|
157 matches = ksBM_search(str, strlen(str), pat, strlen(pat), 0, &n);
|
|
158 printf("%d: \n", n);
|
|
159 for (i = 0; i < n; ++i)
|
|
160 printf("- %d\n", matches[i]);
|
|
161 free(matches);
|
|
162 }
|
|
163 return 0;
|
|
164 }
|
|
165 #endif
|