Mercurial > repos > clustalomega > clustalomega
comparison clustalomega/clustal-omega-1.0.2/src/squid/hsregex.c @ 1:bc707542e5de
Uploaded
author | clustalomega |
---|---|
date | Thu, 21 Jul 2011 13:35:08 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:ff1768533a07 | 1:bc707542e5de |
---|---|
1 /***************************************************************** | |
2 * SQUID - a library of functions for biological sequence analysis | |
3 * Copyright (C) 1992-2002 Washington University School of Medicine | |
4 * | |
5 * This source code is freely distributed under the terms of the | |
6 * GNU General Public License. See the files COPYRIGHT and LICENSE | |
7 * for details. | |
8 *****************************************************************/ | |
9 | |
10 /***************************************************************** | |
11 * This code is an altered version of Henry Spencer's | |
12 * regex library. Alterations are limited to minor streamlining, | |
13 * and some name changes to protect the SQUID namespace. | |
14 * Henry's copyright notice appears below. | |
15 * You can obtain the original from | |
16 * ftp://ftp.zoo.toronto.edu/pub/bookregex.tar.Z | |
17 * Thanks, Henry! | |
18 * | |
19 * The magic word for compiling a testdriver: NBA_TEAM_IN_STL | |
20 * gcc -o test -g -DNBA_TEAM_IN_STL -L. hsregex.c -lsquid -lm | |
21 * | |
22 * Usage: | |
23 * test <pattern> <ntok> <string> | |
24 * | |
25 * SRE, Fri Aug 28 11:10:17 1998 | |
26 * CVS $Id: hsregex.c,v 1.7 2001/08/09 17:50:17 eddy Exp) | |
27 *****************************************************************/ | |
28 | |
29 #include <stdio.h> | |
30 #include <stdlib.h> | |
31 #include <string.h> | |
32 #include <ctype.h> | |
33 #include "squid.h" | |
34 | |
35 /* global sqd_parse[] are managed by Strparse(). | |
36 * WARNING: TODO: this code is not threadsafe, and needs to be revised. | |
37 */ | |
38 char *sqd_parse[10]; | |
39 | |
40 /* Function: Strparse() | |
41 * | |
42 * Purpose: Match a regexp to a string. Returns 1 if pattern matches, | |
43 * else 0. | |
44 * | |
45 * Much like Perl, Strparse() makes copies of the matching | |
46 * substrings available via globals, sqd_parse[]. | |
47 * sqd_parse[0] contains a copy of the complete matched | |
48 * text. sqd_parse[1-9] contain copies of up to nine | |
49 * different substrings matched within parentheses. | |
50 * The memory for these strings is internally managed and | |
51 * volatile; the next call to Strparse() may destroy them. | |
52 * If the caller needs the matched substrings to persist | |
53 * beyond a new Strparse() call, it must make its own | |
54 * copies. | |
55 * | |
56 * A minor drawback of the memory management is that | |
57 * there will be a small amount of unfree'd memory being | |
58 * managed by Strparse() when a program exits; this may | |
59 * confuse memory debugging (Purify, dbmalloc). The | |
60 * general cleanup function SqdClean() is provided; | |
61 * you can call this before exiting. | |
62 * | |
63 * Uses an extended POSIX regular expression interface. | |
64 * A copylefted GNU implementation is included in the squid | |
65 * implementation (gnuregex.c) for use on non-POSIX compliant | |
66 * systems. POSIX 1003.2-compliant systems (all UNIX, | |
67 * some WinNT, I believe) can omit the GNU code if necessary. | |
68 * | |
69 * I built this for ease of use, not speed nor efficiency. | |
70 * | |
71 * Example: Strparse("foo-...-baz", "foo-bar-baz") returns 0 | |
72 * Strparse("foo-(...)-baz", "foo-bar-baz") | |
73 * returns 0; sqd_parse[0] is "foo-bar-baz"; | |
74 * sqd_parse[1] is "bar". | |
75 * | |
76 * A real example: | |
77 * s = ">gnl|ti|3 G10P69425RH2.T0 {SUB 81..737} /len=657" | |
78 * pat = "SUB ([0-9]+)" | |
79 * Strparse(pat, s, 1) | |
80 * returns 1; sqd_parse[1] is "81". | |
81 * | |
82 * Args: rexp - regular expression, extended POSIX form | |
83 * s - string to match against | |
84 * ntok - number of () substrings we will save (maximum NSUBEXP-1) | |
85 * | |
86 * Return: 1 on match, 0 if no match | |
87 */ | |
88 int | |
89 Strparse(char *rexp, char *s, int ntok) | |
90 { | |
91 sqd_regexp *pat; | |
92 int code; | |
93 int len; | |
94 int i; | |
95 /* sanity check */ | |
96 if (ntok >= NSUBEXP ) Die("Strparse(): ntok must be <= %d", NSUBEXP-1); | |
97 | |
98 /* Free previous global substring buffers | |
99 */ | |
100 for (i = 0; i <= ntok; i++) | |
101 if (sqd_parse[i] != NULL) | |
102 { | |
103 free(sqd_parse[i]); | |
104 sqd_parse[i] = NULL; | |
105 } | |
106 | |
107 /* Compile and match the pattern, using our modified | |
108 * copy of Henry Spencer's regexp library | |
109 */ | |
110 if ((pat = sqd_regcomp(rexp)) == NULL) | |
111 Die("regexp compilation failed."); | |
112 code = sqd_regexec(pat, s); | |
113 | |
114 /* Fill the global substring buffers | |
115 */ | |
116 if (code == 1) | |
117 for (i = 0; i <= ntok; i++) | |
118 if (pat->startp[i] != NULL && pat->endp[i] != NULL) | |
119 { | |
120 len = pat->endp[i] - pat->startp[i]; | |
121 sqd_parse[i] = (char *) MallocOrDie(sizeof(char) * (len+1)); | |
122 strncpy(sqd_parse[i], pat->startp[i], len); | |
123 sqd_parse[i][len] = '\0'; | |
124 } | |
125 | |
126 free(pat); | |
127 return code; | |
128 } | |
129 | |
130 /* Function: SqdClean() | |
131 * Date: SRE, Wed Oct 29 12:52:08 1997 [TWA 721] | |
132 * | |
133 * Purpose: Clean up any squid library allocations before exiting | |
134 * a program, so we don't leave unfree'd memory around | |
135 * and confuse a malloc debugger like Purify or dbmalloc. | |
136 */ | |
137 void | |
138 SqdClean(void) | |
139 { | |
140 int i; | |
141 | |
142 /* Free global substring buffers that Strparse() uses | |
143 */ | |
144 for (i = 0; i <= 9; i++) | |
145 if (sqd_parse[i] != NULL) { | |
146 free(sqd_parse[i]); | |
147 sqd_parse[i] = NULL; | |
148 } | |
149 } | |
150 | |
151 | |
152 | |
153 /* all code below is: | |
154 * Copyright (c) 1986, 1993, 1995 by University of Toronto. | |
155 * Written by Henry Spencer. Not derived from licensed software. | |
156 * | |
157 * Permission is granted to anyone to use this software for any | |
158 * purpose on any computer system, and to redistribute it in any way, | |
159 * subject to the following restrictions: | |
160 * | |
161 * 1. The author is not responsible for the consequences of use of | |
162 * this software, no matter how awful, even if they arise | |
163 * from defects in it. | |
164 * | |
165 * 2. The origin of this software must not be misrepresented, either | |
166 * by explicit claim or by omission. | |
167 * | |
168 * 3. Altered versions must be plainly marked as such, and must not | |
169 * be misrepresented (by explicit claim or omission) as being | |
170 * the original software. | |
171 * | |
172 * 4. This notice must not be removed or altered. | |
173 */ | |
174 | |
175 /* | |
176 * sqd_regcomp and sqd_regexec -- sqd_regsub and sqd_regerror are elsewhere | |
177 */ | |
178 | |
179 /* | |
180 * The first byte of the regexp internal "program" is actually this magic | |
181 * number; the start node begins in the second byte. | |
182 */ | |
183 #define SQD_REGMAGIC 0234 | |
184 | |
185 /* | |
186 * The "internal use only" fields in regexp.h are present to pass info from | |
187 * compile to execute that permits the execute phase to run lots faster on | |
188 * simple cases. They are: | |
189 * | |
190 * regstart char that must begin a match; '\0' if none obvious | |
191 * reganch is the match anchored (at beginning-of-line only)? | |
192 * regmust string (pointer into program) that match must include, or NULL | |
193 * regmlen length of regmust string | |
194 * | |
195 * Regstart and reganch permit very fast decisions on suitable starting points | |
196 * for a match, cutting down the work a lot. Regmust permits fast rejection | |
197 * of lines that cannot possibly match. The regmust tests are costly enough | |
198 * that sqd_regcomp() supplies a regmust only if the r.e. contains something | |
199 * potentially expensive (at present, the only such thing detected is * or + | |
200 * at the start of the r.e., which can involve a lot of backup). Regmlen is | |
201 * supplied because the test in sqd_regexec() needs it and sqd_regcomp() is computing | |
202 * it anyway. | |
203 */ | |
204 | |
205 /* | |
206 * Structure for regexp "program". This is essentially a linear encoding | |
207 * of a nondeterministic finite-state machine (aka syntax charts or | |
208 * "railroad normal form" in parsing technology). Each node is an opcode | |
209 * plus a "next" pointer, possibly plus an operand. "Next" pointers of | |
210 * all nodes except BRANCH implement concatenation; a "next" pointer with | |
211 * a BRANCH on both ends of it is connecting two alternatives. (Here we | |
212 * have one of the subtle syntax dependencies: an individual BRANCH (as | |
213 * opposed to a collection of them) is never concatenated with anything | |
214 * because of operator precedence.) The operand of some types of node is | |
215 * a literal string; for others, it is a node leading into a sub-FSM. In | |
216 * particular, the operand of a BRANCH node is the first node of the branch. | |
217 * (NB this is *not* a tree structure: the tail of the branch connects | |
218 * to the thing following the set of BRANCHes.) The opcodes are: | |
219 */ | |
220 | |
221 /* definition number opnd? meaning */ | |
222 #define END 0 /* no End of program. */ | |
223 #define BOL 1 /* no Match beginning of line. */ | |
224 #define EOL 2 /* no Match end of line. */ | |
225 #define ANY 3 /* no Match any character. */ | |
226 #define ANYOF 4 /* str Match any of these. */ | |
227 #define ANYBUT 5 /* str Match any but one of these. */ | |
228 #define BRANCH 6 /* node Match this, or the next..\&. */ | |
229 #define BACK 7 /* no "next" ptr points backward. */ | |
230 #define EXACTLY 8 /* str Match this string. */ | |
231 #define NOTHING 9 /* no Match empty string. */ | |
232 #define STAR 10 /* node Match this 0 or more times. */ | |
233 #define PLUS 11 /* node Match this 1 or more times. */ | |
234 #define OPEN 20 /* no Sub-RE starts here. */ | |
235 /* OPEN+1 is number 1, etc. */ | |
236 #define CLOSE 30 /* no Analogous to OPEN. */ | |
237 | |
238 /* | |
239 * Opcode notes: | |
240 * | |
241 * BRANCH The set of branches constituting a single choice are hooked | |
242 * together with their "next" pointers, since precedence prevents | |
243 * anything being concatenated to any individual branch. The | |
244 * "next" pointer of the last BRANCH in a choice points to the | |
245 * thing following the whole choice. This is also where the | |
246 * final "next" pointer of each individual branch points; each | |
247 * branch starts with the operand node of a BRANCH node. | |
248 * | |
249 * BACK Normal "next" pointers all implicitly point forward; BACK | |
250 * exists to make loop structures possible. | |
251 * | |
252 * STAR,PLUS '?', and complex '*' and '+', are implemented as circular | |
253 * BRANCH structures using BACK. Simple cases (one character | |
254 * per match) are implemented with STAR and PLUS for speed | |
255 * and to minimize recursive plunges. | |
256 * | |
257 * OPEN,CLOSE ...are numbered at compile time. | |
258 */ | |
259 | |
260 /* | |
261 * A node is one char of opcode followed by two chars of "next" pointer. | |
262 * "Next" pointers are stored as two 8-bit pieces, high order first. The | |
263 * value is a positive offset from the opcode of the node containing it. | |
264 * An operand, if any, simply follows the node. (Note that much of the | |
265 * code generation knows about this implicit relationship.) | |
266 * | |
267 * Using two bytes for the "next" pointer is vast overkill for most things, | |
268 * but allows patterns to get big without disasters. | |
269 */ | |
270 #define OP(p) (*(p)) | |
271 #define NEXT(p) (((*((p)+1)&0177)<<8) + (*((p)+2)&0377)) | |
272 #define OPERAND(p) ((p) + 3) | |
273 | |
274 /* | |
275 * Utility definitions. | |
276 */ | |
277 #define FAIL(m) { sqd_regerror(m); return(NULL); } | |
278 #define ISREPN(c) ((c) == '*' || (c) == '+' || (c) == '?') | |
279 #define META "^$.[()|?+*\\" | |
280 | |
281 /* | |
282 * Flags to be passed up and down. | |
283 */ | |
284 #define HASWIDTH 01 /* Known never to match null string. */ | |
285 #define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */ | |
286 #define SPSTART 04 /* Starts with * or +. */ | |
287 #define WORST 0 /* Worst case. */ | |
288 | |
289 /* | |
290 * Work-variable struct for sqd_regcomp(). | |
291 */ | |
292 struct comp { | |
293 char *regparse; /* Input-scan pointer. */ | |
294 int regnpar; /* () count. */ | |
295 char *regcode; /* Code-emit pointer; ®dummy = don't. */ | |
296 char regdummy[3]; /* NOTHING, 0 next ptr */ | |
297 long regsize; /* Code size. */ | |
298 }; | |
299 #define EMITTING(cp) ((cp)->regcode != (cp)->regdummy) | |
300 | |
301 /* | |
302 * Forward declarations for sqd_regcomp()'s friends. | |
303 */ | |
304 static char *reg(struct comp *cp, int paren, int *flagp); | |
305 static char *regbranch(struct comp *cp, int *flagp); | |
306 static char *regpiece(struct comp *cp, int *flagp); | |
307 static char *regatom(struct comp *cp, int *flagp); | |
308 static char *regnode(struct comp *cp, int op); | |
309 static char *regnext(char *node); | |
310 static void regc(struct comp *cp, int c); | |
311 static void reginsert(struct comp *cp, int op, char *opnd); | |
312 static void regtail(struct comp *cp, char *p, char *val); | |
313 static void regoptail(struct comp *cp, char *p, char *val); | |
314 | |
315 /* | |
316 - sqd_regcomp - compile a regular expression into internal code | |
317 * | |
318 * We can't allocate space until we know how big the compiled form will be, | |
319 * but we can't compile it (and thus know how big it is) until we've got a | |
320 * place to put the code. So we cheat: we compile it twice, once with code | |
321 * generation turned off and size counting turned on, and once "for real". | |
322 * This also means that we don't allocate space until we are sure that the | |
323 * thing really will compile successfully, and we never have to move the | |
324 * code and thus invalidate pointers into it. (Note that it has to be in | |
325 * one piece because free() must be able to free it all.) | |
326 * | |
327 * Beware that the optimization-preparation code in here knows about some | |
328 * of the structure of the compiled regexp. | |
329 */ | |
330 sqd_regexp * | |
331 sqd_regcomp(exp) | |
332 const char *exp; | |
333 { | |
334 register sqd_regexp *r; | |
335 register char *scan; | |
336 int flags; | |
337 struct comp co; | |
338 | |
339 if (exp == NULL) | |
340 FAIL("NULL argument to sqd_regcomp"); | |
341 | |
342 /* First pass: determine size, legality. */ | |
343 co.regparse = (char *)exp; | |
344 co.regnpar = 1; | |
345 co.regsize = 0L; | |
346 co.regdummy[0] = NOTHING; | |
347 co.regdummy[1] = co.regdummy[2] = 0; | |
348 co.regcode = co.regdummy; | |
349 regc(&co, SQD_REGMAGIC); | |
350 if (reg(&co, 0, &flags) == NULL) | |
351 return(NULL); | |
352 | |
353 /* Small enough for pointer-storage convention? */ | |
354 if (co.regsize >= 0x7fffL) /* Probably could be 0xffffL. */ | |
355 FAIL("regexp too big"); | |
356 | |
357 /* Allocate space. */ | |
358 r = (sqd_regexp *)malloc(sizeof(sqd_regexp) + (size_t)co.regsize); | |
359 if (r == NULL) | |
360 FAIL("out of space"); | |
361 | |
362 /* Second pass: emit code. */ | |
363 co.regparse = (char *)exp; | |
364 co.regnpar = 1; | |
365 co.regcode = r->program; | |
366 regc(&co, SQD_REGMAGIC); | |
367 if (reg(&co, 0, &flags) == NULL) | |
368 return(NULL); | |
369 | |
370 /* Dig out information for optimizations. */ | |
371 r->regstart = '\0'; /* Worst-case defaults. */ | |
372 r->reganch = 0; | |
373 r->regmust = NULL; | |
374 r->regmlen = 0; | |
375 scan = r->program+1; /* First BRANCH. */ | |
376 if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ | |
377 scan = OPERAND(scan); | |
378 | |
379 /* Starting-point info. */ | |
380 if (OP(scan) == EXACTLY) | |
381 r->regstart = *OPERAND(scan); | |
382 else if (OP(scan) == BOL) | |
383 r->reganch = 1; | |
384 | |
385 /* | |
386 * If there's something expensive in the r.e., find the | |
387 * longest literal string that must appear and make it the | |
388 * regmust. Resolve ties in favor of later strings, since | |
389 * the regstart check works with the beginning of the r.e. | |
390 * and avoiding duplication strengthens checking. Not a | |
391 * strong reason, but sufficient in the absence of others. | |
392 */ | |
393 if (flags&SPSTART) { | |
394 register char *longest = NULL; | |
395 register size_t len = 0; | |
396 | |
397 for (; scan != NULL; scan = regnext(scan)) | |
398 if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { | |
399 longest = OPERAND(scan); | |
400 len = strlen(OPERAND(scan)); | |
401 } | |
402 r->regmust = longest; | |
403 r->regmlen = (int)len; | |
404 } | |
405 } | |
406 | |
407 return(r); | |
408 } | |
409 | |
410 /* | |
411 - reg - regular expression, i.e. main body or parenthesized thing | |
412 * | |
413 * Caller must absorb opening parenthesis. | |
414 * | |
415 * Combining parenthesis handling with the base level of regular expression | |
416 * is a trifle forced, but the need to tie the tails of the branches to what | |
417 * follows makes it hard to avoid. | |
418 */ | |
419 static char * | |
420 reg(cp, paren, flagp) | |
421 register struct comp *cp; | |
422 int paren; /* Parenthesized? */ | |
423 int *flagp; | |
424 { | |
425 register char *ret = NULL; /* SRE: NULL init added to silence gcc */ | |
426 register char *br; | |
427 register char *ender; | |
428 register int parno = 0; /* SRE: init added to silence gcc */ | |
429 int flags; | |
430 | |
431 *flagp = HASWIDTH; /* Tentatively. */ | |
432 | |
433 if (paren) { | |
434 /* Make an OPEN node. */ | |
435 if (cp->regnpar >= NSUBEXP) | |
436 FAIL("too many ()"); | |
437 parno = cp->regnpar; | |
438 cp->regnpar++; | |
439 ret = regnode(cp, OPEN+parno); | |
440 } | |
441 | |
442 /* Pick up the branches, linking them together. */ | |
443 br = regbranch(cp, &flags); | |
444 if (br == NULL) | |
445 return(NULL); | |
446 if (paren) | |
447 regtail(cp, ret, br); /* OPEN -> first. */ | |
448 else | |
449 ret = br; | |
450 *flagp &= ~(~flags&HASWIDTH); /* Clear bit if bit 0. */ | |
451 *flagp |= flags&SPSTART; | |
452 while (*cp->regparse == '|') { | |
453 cp->regparse++; | |
454 br = regbranch(cp, &flags); | |
455 if (br == NULL) | |
456 return(NULL); | |
457 regtail(cp, ret, br); /* BRANCH -> BRANCH. */ | |
458 *flagp &= ~(~flags&HASWIDTH); | |
459 *flagp |= flags&SPSTART; | |
460 } | |
461 | |
462 /* Make a closing node, and hook it on the end. */ | |
463 ender = regnode(cp, (paren) ? CLOSE+parno : END); | |
464 regtail(cp, ret, ender); | |
465 | |
466 /* Hook the tails of the branches to the closing node. */ | |
467 for (br = ret; br != NULL; br = regnext(br)) | |
468 regoptail(cp, br, ender); | |
469 | |
470 /* Check for proper termination. */ | |
471 if (paren && *cp->regparse++ != ')') { | |
472 FAIL("unterminated ()"); | |
473 } else if (!paren && *cp->regparse != '\0') { | |
474 if (*cp->regparse == ')') { | |
475 FAIL("unmatched ()"); | |
476 } else | |
477 FAIL("internal error: junk on end"); | |
478 /* NOTREACHED */ | |
479 } | |
480 | |
481 return(ret); | |
482 } | |
483 | |
484 /* | |
485 - regbranch - one alternative of an | operator | |
486 * | |
487 * Implements the concatenation operator. | |
488 */ | |
489 static char * | |
490 regbranch(cp, flagp) | |
491 register struct comp *cp; | |
492 int *flagp; | |
493 { | |
494 register char *ret; | |
495 register char *chain; | |
496 register char *latest; | |
497 int flags; | |
498 register int c; | |
499 | |
500 *flagp = WORST; /* Tentatively. */ | |
501 | |
502 ret = regnode(cp, BRANCH); | |
503 chain = NULL; | |
504 while ((c = *cp->regparse) != '\0' && c != '|' && c != ')') { | |
505 latest = regpiece(cp, &flags); | |
506 if (latest == NULL) | |
507 return(NULL); | |
508 *flagp |= flags&HASWIDTH; | |
509 if (chain == NULL) /* First piece. */ | |
510 *flagp |= flags&SPSTART; | |
511 else | |
512 regtail(cp, chain, latest); | |
513 chain = latest; | |
514 } | |
515 if (chain == NULL) /* Loop ran zero times. */ | |
516 (void) regnode(cp, NOTHING); | |
517 | |
518 return(ret); | |
519 } | |
520 | |
521 /* | |
522 - regpiece - something followed by possible [*+?] | |
523 * | |
524 * Note that the branching code sequences used for ? and the general cases | |
525 * of * and + are somewhat optimized: they use the same NOTHING node as | |
526 * both the endmarker for their branch list and the body of the last branch. | |
527 * It might seem that this node could be dispensed with entirely, but the | |
528 * endmarker role is not redundant. | |
529 */ | |
530 static char * | |
531 regpiece(cp, flagp) | |
532 register struct comp *cp; | |
533 int *flagp; | |
534 { | |
535 register char *ret; | |
536 register char op; | |
537 register char *next; | |
538 int flags; | |
539 | |
540 ret = regatom(cp, &flags); | |
541 if (ret == NULL) | |
542 return(NULL); | |
543 | |
544 op = *cp->regparse; | |
545 if (!ISREPN(op)) { | |
546 *flagp = flags; | |
547 return(ret); | |
548 } | |
549 | |
550 if (!(flags&HASWIDTH) && op != '?') | |
551 FAIL("*+ operand could be empty"); | |
552 switch (op) { | |
553 case '*': *flagp = WORST|SPSTART; break; | |
554 case '+': *flagp = WORST|SPSTART|HASWIDTH; break; | |
555 case '?': *flagp = WORST; break; | |
556 } | |
557 | |
558 if (op == '*' && (flags&SIMPLE)) | |
559 reginsert(cp, STAR, ret); | |
560 else if (op == '*') { | |
561 /* Emit x* as (x&|), where & means "self". */ | |
562 reginsert(cp, BRANCH, ret); /* Either x */ | |
563 regoptail(cp, ret, regnode(cp, BACK)); /* and loop */ | |
564 regoptail(cp, ret, ret); /* back */ | |
565 regtail(cp, ret, regnode(cp, BRANCH)); /* or */ | |
566 regtail(cp, ret, regnode(cp, NOTHING)); /* null. */ | |
567 } else if (op == '+' && (flags&SIMPLE)) | |
568 reginsert(cp, PLUS, ret); | |
569 else if (op == '+') { | |
570 /* Emit x+ as x(&|), where & means "self". */ | |
571 next = regnode(cp, BRANCH); /* Either */ | |
572 regtail(cp, ret, next); | |
573 regtail(cp, regnode(cp, BACK), ret); /* loop back */ | |
574 regtail(cp, next, regnode(cp, BRANCH)); /* or */ | |
575 regtail(cp, ret, regnode(cp, NOTHING)); /* null. */ | |
576 } else if (op == '?') { | |
577 /* Emit x? as (x|) */ | |
578 reginsert(cp, BRANCH, ret); /* Either x */ | |
579 regtail(cp, ret, regnode(cp, BRANCH)); /* or */ | |
580 next = regnode(cp, NOTHING); /* null. */ | |
581 regtail(cp, ret, next); | |
582 regoptail(cp, ret, next); | |
583 } | |
584 cp->regparse++; | |
585 if (ISREPN(*cp->regparse)) | |
586 FAIL("nested *?+"); | |
587 | |
588 return(ret); | |
589 } | |
590 | |
591 /* | |
592 - regatom - the lowest level | |
593 * | |
594 * Optimization: gobbles an entire sequence of ordinary characters so that | |
595 * it can turn them into a single node, which is smaller to store and | |
596 * faster to run. Backslashed characters are exceptions, each becoming a | |
597 * separate node; the code is simpler that way and it's not worth fixing. | |
598 */ | |
599 static char * | |
600 regatom(cp, flagp) | |
601 register struct comp *cp; | |
602 int *flagp; | |
603 { | |
604 register char *ret; | |
605 int flags; | |
606 | |
607 *flagp = WORST; /* Tentatively. */ | |
608 | |
609 switch (*cp->regparse++) { | |
610 case '^': | |
611 ret = regnode(cp, BOL); | |
612 break; | |
613 case '$': | |
614 ret = regnode(cp, EOL); | |
615 break; | |
616 case '.': | |
617 ret = regnode(cp, ANY); | |
618 *flagp |= HASWIDTH|SIMPLE; | |
619 break; | |
620 case '[': { | |
621 register int range; | |
622 register int rangeend; | |
623 register int c; | |
624 | |
625 if (*cp->regparse == '^') { /* Complement of range. */ | |
626 ret = regnode(cp, ANYBUT); | |
627 cp->regparse++; | |
628 } else | |
629 ret = regnode(cp, ANYOF); | |
630 if ((c = *cp->regparse) == ']' || c == '-') { | |
631 regc(cp, c); | |
632 cp->regparse++; | |
633 } | |
634 while ((c = *cp->regparse++) != '\0' && c != ']') { | |
635 if (c != '-') | |
636 regc(cp, c); | |
637 else if ((c = *cp->regparse) == ']' || c == '\0') | |
638 regc(cp, '-'); | |
639 else { | |
640 range = (unsigned char)*(cp->regparse-2); | |
641 rangeend = (unsigned char)c; | |
642 if (range > rangeend) | |
643 FAIL("invalid [] range"); | |
644 for (range++; range <= rangeend; range++) | |
645 regc(cp, range); | |
646 cp->regparse++; | |
647 } | |
648 } | |
649 regc(cp, '\0'); | |
650 if (c != ']') | |
651 FAIL("unmatched []"); | |
652 *flagp |= HASWIDTH|SIMPLE; | |
653 break; | |
654 } | |
655 case '(': | |
656 ret = reg(cp, 1, &flags); | |
657 if (ret == NULL) | |
658 return(NULL); | |
659 *flagp |= flags&(HASWIDTH|SPSTART); | |
660 break; | |
661 case '\0': | |
662 case '|': | |
663 case ')': | |
664 /* supposed to be caught earlier */ | |
665 FAIL("internal error: \\0|) unexpected"); | |
666 break; | |
667 case '?': | |
668 case '+': | |
669 case '*': | |
670 FAIL("?+* follows nothing"); | |
671 break; | |
672 case '\\': | |
673 if (*cp->regparse == '\0') | |
674 FAIL("trailing \\"); | |
675 ret = regnode(cp, EXACTLY); | |
676 regc(cp, *cp->regparse++); | |
677 regc(cp, '\0'); | |
678 *flagp |= HASWIDTH|SIMPLE; | |
679 break; | |
680 default: { | |
681 register size_t len; | |
682 register char ender; | |
683 | |
684 cp->regparse--; | |
685 len = strcspn(cp->regparse, META); | |
686 if (len == 0) | |
687 FAIL("internal error: strcspn 0"); | |
688 ender = *(cp->regparse+len); | |
689 if (len > 1 && ISREPN(ender)) | |
690 len--; /* Back off clear of ?+* operand. */ | |
691 *flagp |= HASWIDTH; | |
692 if (len == 1) | |
693 *flagp |= SIMPLE; | |
694 ret = regnode(cp, EXACTLY); | |
695 for (; len > 0; len--) | |
696 regc(cp, *cp->regparse++); | |
697 regc(cp, '\0'); | |
698 break; | |
699 } | |
700 } | |
701 | |
702 return(ret); | |
703 } | |
704 | |
705 /* | |
706 - regnode - emit a node | |
707 */ | |
708 static char * /* Location. */ | |
709 regnode(cp, op) | |
710 register struct comp *cp; | |
711 char op; | |
712 { | |
713 register char *const ret = cp->regcode; | |
714 register char *ptr; | |
715 | |
716 if (!EMITTING(cp)) { | |
717 cp->regsize += 3; | |
718 return(ret); | |
719 } | |
720 | |
721 ptr = ret; | |
722 *ptr++ = op; | |
723 *ptr++ = '\0'; /* Null next pointer. */ | |
724 *ptr++ = '\0'; | |
725 cp->regcode = ptr; | |
726 | |
727 return(ret); | |
728 } | |
729 | |
730 /* | |
731 - regc - emit (if appropriate) a byte of code | |
732 */ | |
733 static void | |
734 regc(cp, b) | |
735 register struct comp *cp; | |
736 char b; | |
737 { | |
738 if (EMITTING(cp)) | |
739 *cp->regcode++ = b; | |
740 else | |
741 cp->regsize++; | |
742 } | |
743 | |
744 /* | |
745 - reginsert - insert an operator in front of already-emitted operand | |
746 * | |
747 * Means relocating the operand. | |
748 */ | |
749 static void | |
750 reginsert(cp, op, opnd) | |
751 register struct comp *cp; | |
752 char op; | |
753 char *opnd; | |
754 { | |
755 register char *place; | |
756 | |
757 if (!EMITTING(cp)) { | |
758 cp->regsize += 3; | |
759 return; | |
760 } | |
761 | |
762 (void) memmove(opnd+3, opnd, (size_t)(cp->regcode - opnd)); | |
763 cp->regcode += 3; | |
764 | |
765 place = opnd; /* Op node, where operand used to be. */ | |
766 *place++ = op; | |
767 *place++ = '\0'; | |
768 *place++ = '\0'; | |
769 } | |
770 | |
771 /* | |
772 - regtail - set the next-pointer at the end of a node chain | |
773 */ | |
774 static void | |
775 regtail(cp, p, val) | |
776 register struct comp *cp; | |
777 char *p; | |
778 char *val; | |
779 { | |
780 register char *scan; | |
781 register char *temp; | |
782 register int offset; | |
783 | |
784 if (!EMITTING(cp)) | |
785 return; | |
786 | |
787 /* Find last node. */ | |
788 for (scan = p; (temp = regnext(scan)) != NULL; scan = temp) | |
789 continue; | |
790 | |
791 offset = (OP(scan) == BACK) ? scan - val : val - scan; | |
792 *(scan+1) = (offset>>8)&0177; | |
793 *(scan+2) = offset&0377; | |
794 } | |
795 | |
796 /* | |
797 - regoptail - regtail on operand of first argument; nop if operandless | |
798 */ | |
799 static void | |
800 regoptail(cp, p, val) | |
801 register struct comp *cp; | |
802 char *p; | |
803 char *val; | |
804 { | |
805 /* "Operandless" and "op != BRANCH" are synonymous in practice. */ | |
806 if (!EMITTING(cp) || OP(p) != BRANCH) | |
807 return; | |
808 regtail(cp, OPERAND(p), val); | |
809 } | |
810 | |
811 /* | |
812 * sqd_regexec and friends | |
813 */ | |
814 | |
815 /* | |
816 * Work-variable struct for sqd_regexec(). | |
817 */ | |
818 struct exec { | |
819 char *reginput; /* String-input pointer. */ | |
820 char *regbol; /* Beginning of input, for ^ check. */ | |
821 char **regstartp; /* Pointer to startp array. */ | |
822 char **regendp; /* Ditto for endp. */ | |
823 }; | |
824 | |
825 /* | |
826 * Forwards. | |
827 */ | |
828 static int regtry(struct exec *ep, sqd_regexp *rp, char *string); | |
829 static int regmatch(struct exec *ep, char *prog); | |
830 static size_t regrepeat(struct exec *ep, char *node); | |
831 | |
832 #ifdef DEBUG | |
833 int regnarrate = 0; | |
834 void regdump(); | |
835 static char *regprop(); | |
836 #endif | |
837 | |
838 /* | |
839 - sqd_regexec - match a regexp against a string | |
840 */ | |
841 int | |
842 sqd_regexec(prog, str) | |
843 register sqd_regexp *prog; | |
844 const char *str; | |
845 { | |
846 register char *string = (char *)str; /* avert const poisoning */ | |
847 register char *s; | |
848 struct exec ex; | |
849 | |
850 /* Be paranoid. */ | |
851 if (prog == NULL || string == NULL) { | |
852 sqd_regerror("NULL argument to sqd_regexec"); | |
853 return(0); | |
854 } | |
855 | |
856 /* Check validity of program. */ | |
857 if ((unsigned char)*prog->program != SQD_REGMAGIC) { | |
858 sqd_regerror("corrupted regexp"); | |
859 return(0); | |
860 } | |
861 | |
862 /* If there is a "must appear" string, look for it. */ | |
863 if (prog->regmust != NULL && strstr(string, prog->regmust) == NULL) | |
864 return(0); | |
865 | |
866 /* Mark beginning of line for ^ . */ | |
867 ex.regbol = string; | |
868 ex.regstartp = prog->startp; | |
869 ex.regendp = prog->endp; | |
870 | |
871 /* Simplest case: anchored match need be tried only once. */ | |
872 if (prog->reganch) | |
873 return(regtry(&ex, prog, string)); | |
874 | |
875 /* Messy cases: unanchored match. */ | |
876 if (prog->regstart != '\0') { | |
877 /* We know what char it must start with. */ | |
878 for (s = string; s != NULL; s = strchr(s+1, prog->regstart)) | |
879 if (regtry(&ex, prog, s)) | |
880 return(1); | |
881 return(0); | |
882 } else { | |
883 /* We don't -- general case. */ | |
884 for (s = string; !regtry(&ex, prog, s); s++) | |
885 if (*s == '\0') | |
886 return(0); | |
887 return(1); | |
888 } | |
889 /* NOTREACHED */ | |
890 } | |
891 | |
892 /* | |
893 - regtry - try match at specific point | |
894 */ | |
895 static int /* 0 failure, 1 success */ | |
896 regtry(ep, prog, string) | |
897 register struct exec *ep; | |
898 sqd_regexp *prog; | |
899 char *string; | |
900 { | |
901 register int i; | |
902 register char **stp; | |
903 register char **enp; | |
904 | |
905 ep->reginput = string; | |
906 | |
907 stp = prog->startp; | |
908 enp = prog->endp; | |
909 for (i = NSUBEXP; i > 0; i--) { | |
910 *stp++ = NULL; | |
911 *enp++ = NULL; | |
912 } | |
913 if (regmatch(ep, prog->program + 1)) { | |
914 prog->startp[0] = string; | |
915 prog->endp[0] = ep->reginput; | |
916 return(1); | |
917 } else | |
918 return(0); | |
919 } | |
920 | |
921 /* | |
922 - regmatch - main matching routine | |
923 * | |
924 * Conceptually the strategy is simple: check to see whether the current | |
925 * node matches, call self recursively to see whether the rest matches, | |
926 * and then act accordingly. In practice we make some effort to avoid | |
927 * recursion, in particular by going through "ordinary" nodes (that don't | |
928 * need to know whether the rest of the match failed) by a loop instead of | |
929 * by recursion. | |
930 */ | |
931 static int /* 0 failure, 1 success */ | |
932 regmatch(ep, prog) | |
933 register struct exec *ep; | |
934 char *prog; | |
935 { | |
936 register char *scan; /* Current node. */ | |
937 char *next; /* Next node. */ | |
938 | |
939 #ifdef DEBUG | |
940 if (prog != NULL && regnarrate) | |
941 fprintf(stderr, "%s(\n", regprop(prog)); | |
942 #endif | |
943 for (scan = prog; scan != NULL; scan = next) { | |
944 #ifdef DEBUG | |
945 if (regnarrate) | |
946 fprintf(stderr, "%s...\n", regprop(scan)); | |
947 #endif | |
948 next = regnext(scan); | |
949 | |
950 switch (OP(scan)) { | |
951 case BOL: | |
952 if (ep->reginput != ep->regbol) | |
953 return(0); | |
954 break; | |
955 case EOL: | |
956 if (*ep->reginput != '\0') | |
957 return(0); | |
958 break; | |
959 case ANY: | |
960 if (*ep->reginput == '\0') | |
961 return(0); | |
962 ep->reginput++; | |
963 break; | |
964 case EXACTLY: { | |
965 register size_t len; | |
966 register char *const opnd = OPERAND(scan); | |
967 | |
968 /* Inline the first character, for speed. */ | |
969 if (*opnd != *ep->reginput) | |
970 return(0); | |
971 len = strlen(opnd); | |
972 if (len > 1 && strncmp(opnd, ep->reginput, len) != 0) | |
973 return(0); | |
974 ep->reginput += len; | |
975 break; | |
976 } | |
977 case ANYOF: | |
978 if (*ep->reginput == '\0' || | |
979 strchr(OPERAND(scan), *ep->reginput) == NULL) | |
980 return(0); | |
981 ep->reginput++; | |
982 break; | |
983 case ANYBUT: | |
984 if (*ep->reginput == '\0' || | |
985 strchr(OPERAND(scan), *ep->reginput) != NULL) | |
986 return(0); | |
987 ep->reginput++; | |
988 break; | |
989 case NOTHING: | |
990 break; | |
991 case BACK: | |
992 break; | |
993 case OPEN+1: case OPEN+2: case OPEN+3: | |
994 case OPEN+4: case OPEN+5: case OPEN+6: | |
995 case OPEN+7: case OPEN+8: case OPEN+9: { | |
996 register const int no = OP(scan) - OPEN; | |
997 register char *const input = ep->reginput; | |
998 | |
999 if (regmatch(ep, next)) { | |
1000 /* | |
1001 * Don't set startp if some later | |
1002 * invocation of the same parentheses | |
1003 * already has. | |
1004 */ | |
1005 if (ep->regstartp[no] == NULL) | |
1006 ep->regstartp[no] = input; | |
1007 return(1); | |
1008 } else | |
1009 return(0); | |
1010 break; | |
1011 } | |
1012 case CLOSE+1: case CLOSE+2: case CLOSE+3: | |
1013 case CLOSE+4: case CLOSE+5: case CLOSE+6: | |
1014 case CLOSE+7: case CLOSE+8: case CLOSE+9: { | |
1015 register const int no = OP(scan) - CLOSE; | |
1016 register char *const input = ep->reginput; | |
1017 | |
1018 if (regmatch(ep, next)) { | |
1019 /* | |
1020 * Don't set endp if some later | |
1021 * invocation of the same parentheses | |
1022 * already has. | |
1023 */ | |
1024 if (ep->regendp[no] == NULL) | |
1025 ep->regendp[no] = input; | |
1026 return(1); | |
1027 } else | |
1028 return(0); | |
1029 break; | |
1030 } | |
1031 case BRANCH: { | |
1032 register char *const save = ep->reginput; | |
1033 | |
1034 if (OP(next) != BRANCH) /* No choice. */ | |
1035 next = OPERAND(scan); /* Avoid recursion. */ | |
1036 else { | |
1037 while (OP(scan) == BRANCH) { | |
1038 if (regmatch(ep, OPERAND(scan))) | |
1039 return(1); | |
1040 ep->reginput = save; | |
1041 scan = regnext(scan); | |
1042 } | |
1043 return(0); | |
1044 /* NOTREACHED */ | |
1045 } | |
1046 break; | |
1047 } | |
1048 case STAR: case PLUS: { | |
1049 register const char nextch = | |
1050 (OP(next) == EXACTLY) ? *OPERAND(next) : '\0'; | |
1051 register size_t no; | |
1052 register char *const save = ep->reginput; | |
1053 register const size_t min = (OP(scan) == STAR) ? 0 : 1; | |
1054 | |
1055 for (no = regrepeat(ep, OPERAND(scan)) + 1; no > min; no--) { | |
1056 ep->reginput = save + no - 1; | |
1057 /* If it could work, try it. */ | |
1058 if (nextch == '\0' || *ep->reginput == nextch) | |
1059 if (regmatch(ep, next)) | |
1060 return(1); | |
1061 } | |
1062 return(0); | |
1063 break; | |
1064 } | |
1065 case END: | |
1066 return(1); /* Success! */ | |
1067 break; | |
1068 default: | |
1069 sqd_regerror("regexp corruption"); | |
1070 return(0); | |
1071 break; | |
1072 } | |
1073 } | |
1074 | |
1075 /* | |
1076 * We get here only if there's trouble -- normally "case END" is | |
1077 * the terminating point. | |
1078 */ | |
1079 sqd_regerror("corrupted pointers"); | |
1080 return(0); | |
1081 } | |
1082 | |
1083 /* | |
1084 - regrepeat - report how many times something simple would match | |
1085 */ | |
1086 static size_t | |
1087 regrepeat(ep, node) | |
1088 register struct exec *ep; | |
1089 char *node; | |
1090 { | |
1091 register size_t count; | |
1092 register char *scan; | |
1093 register char ch; | |
1094 | |
1095 switch (OP(node)) { | |
1096 case ANY: | |
1097 return(strlen(ep->reginput)); | |
1098 break; | |
1099 case EXACTLY: | |
1100 ch = *OPERAND(node); | |
1101 count = 0; | |
1102 for (scan = ep->reginput; *scan == ch; scan++) | |
1103 count++; | |
1104 return(count); | |
1105 break; | |
1106 case ANYOF: | |
1107 return(strspn(ep->reginput, OPERAND(node))); | |
1108 break; | |
1109 case ANYBUT: | |
1110 return(strcspn(ep->reginput, OPERAND(node))); | |
1111 break; | |
1112 default: /* Oh dear. Called inappropriately. */ | |
1113 sqd_regerror("internal error: bad call of regrepeat"); | |
1114 return(0); /* Best compromise. */ | |
1115 break; | |
1116 } | |
1117 /* NOTREACHED */ | |
1118 } | |
1119 | |
1120 /* | |
1121 - regnext - dig the "next" pointer out of a node | |
1122 */ | |
1123 static char * | |
1124 regnext(p) | |
1125 register char *p; | |
1126 { | |
1127 register const int offset = NEXT(p); | |
1128 | |
1129 if (offset == 0) | |
1130 return(NULL); | |
1131 | |
1132 return((OP(p) == BACK) ? p-offset : p+offset); | |
1133 } | |
1134 | |
1135 #ifdef DEBUG | |
1136 | |
1137 static char *regprop(); | |
1138 | |
1139 /* | |
1140 - regdump - dump a regexp onto stdout in vaguely comprehensible form | |
1141 */ | |
1142 void | |
1143 regdump(r) | |
1144 sqd_regexp *r; | |
1145 { | |
1146 register char *s; | |
1147 register char op = EXACTLY; /* Arbitrary non-END op. */ | |
1148 register char *next; | |
1149 | |
1150 | |
1151 s = r->program + 1; | |
1152 while (op != END) { /* While that wasn't END last time... */ | |
1153 op = OP(s); | |
1154 printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ | |
1155 next = regnext(s); | |
1156 if (next == NULL) /* Next ptr. */ | |
1157 printf("(0)"); | |
1158 else | |
1159 printf("(%d)", (s-r->program)+(next-s)); | |
1160 s += 3; | |
1161 if (op == ANYOF || op == ANYBUT || op == EXACTLY) { | |
1162 /* Literal string, where present. */ | |
1163 while (*s != '\0') { | |
1164 putchar(*s); | |
1165 s++; | |
1166 } | |
1167 s++; | |
1168 } | |
1169 putchar('\n'); | |
1170 } | |
1171 | |
1172 /* Header fields of interest. */ | |
1173 if (r->regstart != '\0') | |
1174 printf("start `%c' ", r->regstart); | |
1175 if (r->reganch) | |
1176 printf("anchored "); | |
1177 if (r->regmust != NULL) | |
1178 printf("must have \"%s\"", r->regmust); | |
1179 printf("\n"); | |
1180 } | |
1181 | |
1182 /* | |
1183 - regprop - printable representation of opcode | |
1184 */ | |
1185 static char * | |
1186 regprop(op) | |
1187 char *op; | |
1188 { | |
1189 register char *p; | |
1190 static char buf[50]; | |
1191 | |
1192 (void) strcpy(buf, ":"); | |
1193 | |
1194 switch (OP(op)) { | |
1195 case BOL: | |
1196 p = "BOL"; | |
1197 break; | |
1198 case EOL: | |
1199 p = "EOL"; | |
1200 break; | |
1201 case ANY: | |
1202 p = "ANY"; | |
1203 break; | |
1204 case ANYOF: | |
1205 p = "ANYOF"; | |
1206 break; | |
1207 case ANYBUT: | |
1208 p = "ANYBUT"; | |
1209 break; | |
1210 case BRANCH: | |
1211 p = "BRANCH"; | |
1212 break; | |
1213 case EXACTLY: | |
1214 p = "EXACTLY"; | |
1215 break; | |
1216 case NOTHING: | |
1217 p = "NOTHING"; | |
1218 break; | |
1219 case BACK: | |
1220 p = "BACK"; | |
1221 break; | |
1222 case END: | |
1223 p = "END"; | |
1224 break; | |
1225 case OPEN+1: | |
1226 case OPEN+2: | |
1227 case OPEN+3: | |
1228 case OPEN+4: | |
1229 case OPEN+5: | |
1230 case OPEN+6: | |
1231 case OPEN+7: | |
1232 case OPEN+8: | |
1233 case OPEN+9: | |
1234 sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN); | |
1235 p = NULL; | |
1236 break; | |
1237 case CLOSE+1: | |
1238 case CLOSE+2: | |
1239 case CLOSE+3: | |
1240 case CLOSE+4: | |
1241 case CLOSE+5: | |
1242 case CLOSE+6: | |
1243 case CLOSE+7: | |
1244 case CLOSE+8: | |
1245 case CLOSE+9: | |
1246 sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE); | |
1247 p = NULL; | |
1248 break; | |
1249 case STAR: | |
1250 p = "STAR"; | |
1251 break; | |
1252 case PLUS: | |
1253 p = "PLUS"; | |
1254 break; | |
1255 default: | |
1256 sqd_regerror("corrupted opcode"); | |
1257 break; | |
1258 } | |
1259 if (p != NULL) | |
1260 (void) strcat(buf, p); | |
1261 return(buf); | |
1262 } | |
1263 #endif | |
1264 | |
1265 | |
1266 /* | |
1267 - sqd_regsub - perform substitutions after a regexp match | |
1268 */ | |
1269 void | |
1270 sqd_regsub(rp, source, dest) | |
1271 const sqd_regexp *rp; | |
1272 const char *source; | |
1273 char *dest; | |
1274 { | |
1275 register sqd_regexp * const prog = (sqd_regexp *)rp; | |
1276 register char *src = (char *)source; | |
1277 register char *dst = dest; | |
1278 register char c; | |
1279 register int no; | |
1280 register size_t len; | |
1281 | |
1282 if (prog == NULL || source == NULL || dest == NULL) { | |
1283 sqd_regerror("NULL parameter to sqd_regsub"); | |
1284 return; | |
1285 } | |
1286 if ((unsigned char)*(prog->program) != SQD_REGMAGIC) { | |
1287 sqd_regerror("damaged regexp"); | |
1288 return; | |
1289 } | |
1290 | |
1291 while ((c = *src++) != '\0') { | |
1292 if (c == '&') | |
1293 no = 0; | |
1294 else if (c == '\\' && isdigit((int) (*src))) | |
1295 no = *src++ - '0'; | |
1296 else | |
1297 no = -1; | |
1298 | |
1299 if (no < 0) { /* Ordinary character. */ | |
1300 if (c == '\\' && (*src == '\\' || *src == '&')) | |
1301 c = *src++; | |
1302 *dst++ = c; | |
1303 } else if (prog->startp[no] != NULL && prog->endp[no] != NULL && | |
1304 prog->endp[no] > prog->startp[no]) { | |
1305 len = prog->endp[no] - prog->startp[no]; | |
1306 (void) strncpy(dst, prog->startp[no], len); | |
1307 dst += len; | |
1308 if (*(dst-1) == '\0') { /* strncpy hit NUL. */ | |
1309 sqd_regerror("damaged match string"); | |
1310 return; | |
1311 } | |
1312 } | |
1313 } | |
1314 *dst++ = '\0'; | |
1315 } | |
1316 | |
1317 | |
1318 void | |
1319 sqd_regerror(s) | |
1320 char *s; | |
1321 { | |
1322 fprintf(stderr, "regexp(3): %s\n", s); | |
1323 exit(EXIT_FAILURE); | |
1324 /* NOTREACHED */ | |
1325 } | |
1326 | |
1327 #ifdef NBA_TEAM_IN_STL | |
1328 int | |
1329 main(int argc, char **argv) | |
1330 { | |
1331 char *pat; | |
1332 int ntok; | |
1333 char *s; | |
1334 int status; | |
1335 | |
1336 pat = argv[1]; | |
1337 ntok = atoi(argv[2]); | |
1338 s = argv[3]; | |
1339 | |
1340 status = Strparse(pat, s, ntok); | |
1341 if (status == 0) { | |
1342 printf("no match\n"); | |
1343 } else { | |
1344 int i; | |
1345 printf("MATCH.\n"); | |
1346 for (i = 1; i <= ntok; i++) | |
1347 printf("matched token %1d: %s\n", i, sqd_parse[i]); | |
1348 } | |
1349 } | |
1350 #endif /*NBA_TEAM_IN_STL*/ |