annotate clustalomega/clustal-omega-1.0.2/src/squid/hsregex.c @ 1:bc707542e5de

Uploaded
author clustalomega
date Thu, 21 Jul 2011 13:35:08 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1 /*****************************************************************
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2 * SQUID - a library of functions for biological sequence analysis
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
3 * Copyright (C) 1992-2002 Washington University School of Medicine
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
4 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
5 * This source code is freely distributed under the terms of the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
6 * GNU General Public License. See the files COPYRIGHT and LICENSE
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
7 * for details.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
8 *****************************************************************/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
9
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
10 /*****************************************************************
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
11 * This code is an altered version of Henry Spencer's
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
12 * regex library. Alterations are limited to minor streamlining,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
13 * and some name changes to protect the SQUID namespace.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
14 * Henry's copyright notice appears below.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
15 * You can obtain the original from
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
16 * ftp://ftp.zoo.toronto.edu/pub/bookregex.tar.Z
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
17 * Thanks, Henry!
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
18 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
19 * The magic word for compiling a testdriver: NBA_TEAM_IN_STL
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
20 * gcc -o test -g -DNBA_TEAM_IN_STL -L. hsregex.c -lsquid -lm
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
21 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
22 * Usage:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
23 * test <pattern> <ntok> <string>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
24 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
25 * SRE, Fri Aug 28 11:10:17 1998
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
26 * CVS $Id: hsregex.c,v 1.7 2001/08/09 17:50:17 eddy Exp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
27 *****************************************************************/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
28
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
29 #include <stdio.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
30 #include <stdlib.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
31 #include <string.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
32 #include <ctype.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
33 #include "squid.h"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
34
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
35 /* global sqd_parse[] are managed by Strparse().
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
36 * WARNING: TODO: this code is not threadsafe, and needs to be revised.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
37 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
38 char *sqd_parse[10];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
39
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
40 /* Function: Strparse()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
41 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
42 * Purpose: Match a regexp to a string. Returns 1 if pattern matches,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
43 * else 0.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
44 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
45 * Much like Perl, Strparse() makes copies of the matching
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
46 * substrings available via globals, sqd_parse[].
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
47 * sqd_parse[0] contains a copy of the complete matched
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
48 * text. sqd_parse[1-9] contain copies of up to nine
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
49 * different substrings matched within parentheses.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
50 * The memory for these strings is internally managed and
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
51 * volatile; the next call to Strparse() may destroy them.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
52 * If the caller needs the matched substrings to persist
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
53 * beyond a new Strparse() call, it must make its own
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
54 * copies.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
55 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
56 * A minor drawback of the memory management is that
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
57 * there will be a small amount of unfree'd memory being
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
58 * managed by Strparse() when a program exits; this may
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
59 * confuse memory debugging (Purify, dbmalloc). The
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
60 * general cleanup function SqdClean() is provided;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
61 * you can call this before exiting.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
62 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
63 * Uses an extended POSIX regular expression interface.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
64 * A copylefted GNU implementation is included in the squid
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
65 * implementation (gnuregex.c) for use on non-POSIX compliant
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
66 * systems. POSIX 1003.2-compliant systems (all UNIX,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
67 * some WinNT, I believe) can omit the GNU code if necessary.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
68 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
69 * I built this for ease of use, not speed nor efficiency.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
70 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
71 * Example: Strparse("foo-...-baz", "foo-bar-baz") returns 0
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
72 * Strparse("foo-(...)-baz", "foo-bar-baz")
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
73 * returns 0; sqd_parse[0] is "foo-bar-baz";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
74 * sqd_parse[1] is "bar".
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
75 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
76 * A real example:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
77 * s = ">gnl|ti|3 G10P69425RH2.T0 {SUB 81..737} /len=657"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
78 * pat = "SUB ([0-9]+)"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
79 * Strparse(pat, s, 1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
80 * returns 1; sqd_parse[1] is "81".
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
81 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
82 * Args: rexp - regular expression, extended POSIX form
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
83 * s - string to match against
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
84 * ntok - number of () substrings we will save (maximum NSUBEXP-1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
85 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
86 * Return: 1 on match, 0 if no match
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
87 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
88 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
89 Strparse(char *rexp, char *s, int ntok)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
90 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
91 sqd_regexp *pat;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
92 int code;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
93 int len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
94 int i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
95 /* sanity check */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
96 if (ntok >= NSUBEXP ) Die("Strparse(): ntok must be <= %d", NSUBEXP-1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
97
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
98 /* Free previous global substring buffers
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
99 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
100 for (i = 0; i <= ntok; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
101 if (sqd_parse[i] != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
102 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
103 free(sqd_parse[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
104 sqd_parse[i] = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
105 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
106
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
107 /* Compile and match the pattern, using our modified
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
108 * copy of Henry Spencer's regexp library
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
109 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
110 if ((pat = sqd_regcomp(rexp)) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
111 Die("regexp compilation failed.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
112 code = sqd_regexec(pat, s);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
113
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
114 /* Fill the global substring buffers
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
115 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
116 if (code == 1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
117 for (i = 0; i <= ntok; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
118 if (pat->startp[i] != NULL && pat->endp[i] != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
119 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
120 len = pat->endp[i] - pat->startp[i];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
121 sqd_parse[i] = (char *) MallocOrDie(sizeof(char) * (len+1));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
122 strncpy(sqd_parse[i], pat->startp[i], len);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
123 sqd_parse[i][len] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
124 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
125
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
126 free(pat);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
127 return code;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
128 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
129
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
130 /* Function: SqdClean()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
131 * Date: SRE, Wed Oct 29 12:52:08 1997 [TWA 721]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
132 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
133 * Purpose: Clean up any squid library allocations before exiting
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
134 * a program, so we don't leave unfree'd memory around
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
135 * and confuse a malloc debugger like Purify or dbmalloc.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
136 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
137 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
138 SqdClean(void)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
139 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
140 int i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
141
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
142 /* Free global substring buffers that Strparse() uses
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
143 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
144 for (i = 0; i <= 9; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
145 if (sqd_parse[i] != NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
146 free(sqd_parse[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
147 sqd_parse[i] = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
148 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
149 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
150
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
151
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
152
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
153 /* all code below is:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
154 * Copyright (c) 1986, 1993, 1995 by University of Toronto.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
155 * Written by Henry Spencer. Not derived from licensed software.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
156 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
157 * Permission is granted to anyone to use this software for any
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
158 * purpose on any computer system, and to redistribute it in any way,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
159 * subject to the following restrictions:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
160 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
161 * 1. The author is not responsible for the consequences of use of
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
162 * this software, no matter how awful, even if they arise
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
163 * from defects in it.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
164 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
165 * 2. The origin of this software must not be misrepresented, either
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
166 * by explicit claim or by omission.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
167 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
168 * 3. Altered versions must be plainly marked as such, and must not
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
169 * be misrepresented (by explicit claim or omission) as being
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
170 * the original software.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
171 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
172 * 4. This notice must not be removed or altered.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
173 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
174
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
175 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
176 * sqd_regcomp and sqd_regexec -- sqd_regsub and sqd_regerror are elsewhere
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
177 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
178
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
179 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
180 * The first byte of the regexp internal "program" is actually this magic
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
181 * number; the start node begins in the second byte.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
182 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
183 #define SQD_REGMAGIC 0234
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
184
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
185 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
186 * The "internal use only" fields in regexp.h are present to pass info from
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
187 * compile to execute that permits the execute phase to run lots faster on
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
188 * simple cases. They are:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
189 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
190 * regstart char that must begin a match; '\0' if none obvious
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
191 * reganch is the match anchored (at beginning-of-line only)?
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
192 * regmust string (pointer into program) that match must include, or NULL
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
193 * regmlen length of regmust string
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
194 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
195 * Regstart and reganch permit very fast decisions on suitable starting points
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
196 * for a match, cutting down the work a lot. Regmust permits fast rejection
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
197 * of lines that cannot possibly match. The regmust tests are costly enough
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
198 * that sqd_regcomp() supplies a regmust only if the r.e. contains something
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
199 * potentially expensive (at present, the only such thing detected is * or +
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
200 * at the start of the r.e., which can involve a lot of backup). Regmlen is
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
201 * supplied because the test in sqd_regexec() needs it and sqd_regcomp() is computing
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
202 * it anyway.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
203 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
204
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
205 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
206 * Structure for regexp "program". This is essentially a linear encoding
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
207 * of a nondeterministic finite-state machine (aka syntax charts or
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
208 * "railroad normal form" in parsing technology). Each node is an opcode
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
209 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
210 * all nodes except BRANCH implement concatenation; a "next" pointer with
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
211 * a BRANCH on both ends of it is connecting two alternatives. (Here we
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
212 * have one of the subtle syntax dependencies: an individual BRANCH (as
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
213 * opposed to a collection of them) is never concatenated with anything
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
214 * because of operator precedence.) The operand of some types of node is
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
215 * a literal string; for others, it is a node leading into a sub-FSM. In
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
216 * particular, the operand of a BRANCH node is the first node of the branch.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
217 * (NB this is *not* a tree structure: the tail of the branch connects
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
218 * to the thing following the set of BRANCHes.) The opcodes are:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
219 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
220
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
221 /* definition number opnd? meaning */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
222 #define END 0 /* no End of program. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
223 #define BOL 1 /* no Match beginning of line. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
224 #define EOL 2 /* no Match end of line. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
225 #define ANY 3 /* no Match any character. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
226 #define ANYOF 4 /* str Match any of these. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
227 #define ANYBUT 5 /* str Match any but one of these. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
228 #define BRANCH 6 /* node Match this, or the next..\&. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
229 #define BACK 7 /* no "next" ptr points backward. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
230 #define EXACTLY 8 /* str Match this string. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
231 #define NOTHING 9 /* no Match empty string. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
232 #define STAR 10 /* node Match this 0 or more times. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
233 #define PLUS 11 /* node Match this 1 or more times. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
234 #define OPEN 20 /* no Sub-RE starts here. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
235 /* OPEN+1 is number 1, etc. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
236 #define CLOSE 30 /* no Analogous to OPEN. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
237
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
238 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
239 * Opcode notes:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
240 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
241 * BRANCH The set of branches constituting a single choice are hooked
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
242 * together with their "next" pointers, since precedence prevents
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
243 * anything being concatenated to any individual branch. The
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
244 * "next" pointer of the last BRANCH in a choice points to the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
245 * thing following the whole choice. This is also where the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
246 * final "next" pointer of each individual branch points; each
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
247 * branch starts with the operand node of a BRANCH node.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
248 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
249 * BACK Normal "next" pointers all implicitly point forward; BACK
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
250 * exists to make loop structures possible.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
251 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
252 * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
253 * BRANCH structures using BACK. Simple cases (one character
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
254 * per match) are implemented with STAR and PLUS for speed
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
255 * and to minimize recursive plunges.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
256 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
257 * OPEN,CLOSE ...are numbered at compile time.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
258 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
259
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
260 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
261 * A node is one char of opcode followed by two chars of "next" pointer.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
262 * "Next" pointers are stored as two 8-bit pieces, high order first. The
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
263 * value is a positive offset from the opcode of the node containing it.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
264 * An operand, if any, simply follows the node. (Note that much of the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
265 * code generation knows about this implicit relationship.)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
266 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
267 * Using two bytes for the "next" pointer is vast overkill for most things,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
268 * but allows patterns to get big without disasters.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
269 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
270 #define OP(p) (*(p))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
271 #define NEXT(p) (((*((p)+1)&0177)<<8) + (*((p)+2)&0377))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
272 #define OPERAND(p) ((p) + 3)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
273
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
274 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
275 * Utility definitions.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
276 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
277 #define FAIL(m) { sqd_regerror(m); return(NULL); }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
278 #define ISREPN(c) ((c) == '*' || (c) == '+' || (c) == '?')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
279 #define META "^$.[()|?+*\\"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
280
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
281 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
282 * Flags to be passed up and down.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
283 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
284 #define HASWIDTH 01 /* Known never to match null string. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
285 #define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
286 #define SPSTART 04 /* Starts with * or +. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
287 #define WORST 0 /* Worst case. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
288
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
289 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
290 * Work-variable struct for sqd_regcomp().
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
291 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
292 struct comp {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
293 char *regparse; /* Input-scan pointer. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
294 int regnpar; /* () count. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
295 char *regcode; /* Code-emit pointer; &regdummy = don't. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
296 char regdummy[3]; /* NOTHING, 0 next ptr */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
297 long regsize; /* Code size. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
298 };
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
299 #define EMITTING(cp) ((cp)->regcode != (cp)->regdummy)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
300
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
301 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
302 * Forward declarations for sqd_regcomp()'s friends.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
303 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
304 static char *reg(struct comp *cp, int paren, int *flagp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
305 static char *regbranch(struct comp *cp, int *flagp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
306 static char *regpiece(struct comp *cp, int *flagp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
307 static char *regatom(struct comp *cp, int *flagp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
308 static char *regnode(struct comp *cp, int op);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
309 static char *regnext(char *node);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
310 static void regc(struct comp *cp, int c);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
311 static void reginsert(struct comp *cp, int op, char *opnd);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
312 static void regtail(struct comp *cp, char *p, char *val);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
313 static void regoptail(struct comp *cp, char *p, char *val);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
314
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
315 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
316 - sqd_regcomp - compile a regular expression into internal code
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
317 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
318 * We can't allocate space until we know how big the compiled form will be,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
319 * but we can't compile it (and thus know how big it is) until we've got a
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
320 * place to put the code. So we cheat: we compile it twice, once with code
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
321 * generation turned off and size counting turned on, and once "for real".
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
322 * This also means that we don't allocate space until we are sure that the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
323 * thing really will compile successfully, and we never have to move the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
324 * code and thus invalidate pointers into it. (Note that it has to be in
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
325 * one piece because free() must be able to free it all.)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
326 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
327 * Beware that the optimization-preparation code in here knows about some
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
328 * of the structure of the compiled regexp.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
329 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
330 sqd_regexp *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
331 sqd_regcomp(exp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
332 const char *exp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
333 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
334 register sqd_regexp *r;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
335 register char *scan;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
336 int flags;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
337 struct comp co;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
338
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
339 if (exp == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
340 FAIL("NULL argument to sqd_regcomp");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
341
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
342 /* First pass: determine size, legality. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
343 co.regparse = (char *)exp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
344 co.regnpar = 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
345 co.regsize = 0L;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
346 co.regdummy[0] = NOTHING;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
347 co.regdummy[1] = co.regdummy[2] = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
348 co.regcode = co.regdummy;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
349 regc(&co, SQD_REGMAGIC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
350 if (reg(&co, 0, &flags) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
351 return(NULL);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
352
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
353 /* Small enough for pointer-storage convention? */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
354 if (co.regsize >= 0x7fffL) /* Probably could be 0xffffL. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
355 FAIL("regexp too big");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
356
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
357 /* Allocate space. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
358 r = (sqd_regexp *)malloc(sizeof(sqd_regexp) + (size_t)co.regsize);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
359 if (r == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
360 FAIL("out of space");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
361
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
362 /* Second pass: emit code. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
363 co.regparse = (char *)exp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
364 co.regnpar = 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
365 co.regcode = r->program;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
366 regc(&co, SQD_REGMAGIC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
367 if (reg(&co, 0, &flags) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
368 return(NULL);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
369
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
370 /* Dig out information for optimizations. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
371 r->regstart = '\0'; /* Worst-case defaults. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
372 r->reganch = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
373 r->regmust = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
374 r->regmlen = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
375 scan = r->program+1; /* First BRANCH. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
376 if (OP(regnext(scan)) == END) { /* Only one top-level choice. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
377 scan = OPERAND(scan);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
378
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
379 /* Starting-point info. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
380 if (OP(scan) == EXACTLY)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
381 r->regstart = *OPERAND(scan);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
382 else if (OP(scan) == BOL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
383 r->reganch = 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
384
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
385 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
386 * If there's something expensive in the r.e., find the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
387 * longest literal string that must appear and make it the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
388 * regmust. Resolve ties in favor of later strings, since
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
389 * the regstart check works with the beginning of the r.e.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
390 * and avoiding duplication strengthens checking. Not a
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
391 * strong reason, but sufficient in the absence of others.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
392 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
393 if (flags&SPSTART) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
394 register char *longest = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
395 register size_t len = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
396
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
397 for (; scan != NULL; scan = regnext(scan))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
398 if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
399 longest = OPERAND(scan);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
400 len = strlen(OPERAND(scan));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
401 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
402 r->regmust = longest;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
403 r->regmlen = (int)len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
404 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
405 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
406
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
407 return(r);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
408 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
409
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
410 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
411 - reg - regular expression, i.e. main body or parenthesized thing
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
412 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
413 * Caller must absorb opening parenthesis.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
414 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
415 * Combining parenthesis handling with the base level of regular expression
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
416 * is a trifle forced, but the need to tie the tails of the branches to what
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
417 * follows makes it hard to avoid.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
418 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
419 static char *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
420 reg(cp, paren, flagp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
421 register struct comp *cp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
422 int paren; /* Parenthesized? */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
423 int *flagp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
424 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
425 register char *ret = NULL; /* SRE: NULL init added to silence gcc */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
426 register char *br;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
427 register char *ender;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
428 register int parno = 0; /* SRE: init added to silence gcc */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
429 int flags;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
430
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
431 *flagp = HASWIDTH; /* Tentatively. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
432
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
433 if (paren) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
434 /* Make an OPEN node. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
435 if (cp->regnpar >= NSUBEXP)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
436 FAIL("too many ()");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
437 parno = cp->regnpar;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
438 cp->regnpar++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
439 ret = regnode(cp, OPEN+parno);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
440 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
441
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
442 /* Pick up the branches, linking them together. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
443 br = regbranch(cp, &flags);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
444 if (br == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
445 return(NULL);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
446 if (paren)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
447 regtail(cp, ret, br); /* OPEN -> first. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
448 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
449 ret = br;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
450 *flagp &= ~(~flags&HASWIDTH); /* Clear bit if bit 0. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
451 *flagp |= flags&SPSTART;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
452 while (*cp->regparse == '|') {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
453 cp->regparse++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
454 br = regbranch(cp, &flags);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
455 if (br == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
456 return(NULL);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
457 regtail(cp, ret, br); /* BRANCH -> BRANCH. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
458 *flagp &= ~(~flags&HASWIDTH);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
459 *flagp |= flags&SPSTART;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
460 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
461
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
462 /* Make a closing node, and hook it on the end. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
463 ender = regnode(cp, (paren) ? CLOSE+parno : END);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
464 regtail(cp, ret, ender);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
465
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
466 /* Hook the tails of the branches to the closing node. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
467 for (br = ret; br != NULL; br = regnext(br))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
468 regoptail(cp, br, ender);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
469
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
470 /* Check for proper termination. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
471 if (paren && *cp->regparse++ != ')') {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
472 FAIL("unterminated ()");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
473 } else if (!paren && *cp->regparse != '\0') {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
474 if (*cp->regparse == ')') {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
475 FAIL("unmatched ()");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
476 } else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
477 FAIL("internal error: junk on end");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
478 /* NOTREACHED */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
479 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
480
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
481 return(ret);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
482 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
483
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
484 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
485 - regbranch - one alternative of an | operator
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
486 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
487 * Implements the concatenation operator.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
488 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
489 static char *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
490 regbranch(cp, flagp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
491 register struct comp *cp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
492 int *flagp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
493 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
494 register char *ret;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
495 register char *chain;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
496 register char *latest;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
497 int flags;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
498 register int c;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
499
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
500 *flagp = WORST; /* Tentatively. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
501
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
502 ret = regnode(cp, BRANCH);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
503 chain = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
504 while ((c = *cp->regparse) != '\0' && c != '|' && c != ')') {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
505 latest = regpiece(cp, &flags);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
506 if (latest == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
507 return(NULL);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
508 *flagp |= flags&HASWIDTH;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
509 if (chain == NULL) /* First piece. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
510 *flagp |= flags&SPSTART;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
511 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
512 regtail(cp, chain, latest);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
513 chain = latest;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
514 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
515 if (chain == NULL) /* Loop ran zero times. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
516 (void) regnode(cp, NOTHING);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
517
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
518 return(ret);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
519 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
520
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
521 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
522 - regpiece - something followed by possible [*+?]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
523 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
524 * Note that the branching code sequences used for ? and the general cases
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
525 * of * and + are somewhat optimized: they use the same NOTHING node as
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
526 * both the endmarker for their branch list and the body of the last branch.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
527 * It might seem that this node could be dispensed with entirely, but the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
528 * endmarker role is not redundant.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
529 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
530 static char *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
531 regpiece(cp, flagp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
532 register struct comp *cp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
533 int *flagp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
534 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
535 register char *ret;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
536 register char op;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
537 register char *next;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
538 int flags;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
539
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
540 ret = regatom(cp, &flags);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
541 if (ret == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
542 return(NULL);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
543
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
544 op = *cp->regparse;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
545 if (!ISREPN(op)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
546 *flagp = flags;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
547 return(ret);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
548 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
549
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
550 if (!(flags&HASWIDTH) && op != '?')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
551 FAIL("*+ operand could be empty");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
552 switch (op) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
553 case '*': *flagp = WORST|SPSTART; break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
554 case '+': *flagp = WORST|SPSTART|HASWIDTH; break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
555 case '?': *flagp = WORST; break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
556 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
557
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
558 if (op == '*' && (flags&SIMPLE))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
559 reginsert(cp, STAR, ret);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
560 else if (op == '*') {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
561 /* Emit x* as (x&|), where & means "self". */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
562 reginsert(cp, BRANCH, ret); /* Either x */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
563 regoptail(cp, ret, regnode(cp, BACK)); /* and loop */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
564 regoptail(cp, ret, ret); /* back */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
565 regtail(cp, ret, regnode(cp, BRANCH)); /* or */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
566 regtail(cp, ret, regnode(cp, NOTHING)); /* null. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
567 } else if (op == '+' && (flags&SIMPLE))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
568 reginsert(cp, PLUS, ret);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
569 else if (op == '+') {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
570 /* Emit x+ as x(&|), where & means "self". */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
571 next = regnode(cp, BRANCH); /* Either */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
572 regtail(cp, ret, next);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
573 regtail(cp, regnode(cp, BACK), ret); /* loop back */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
574 regtail(cp, next, regnode(cp, BRANCH)); /* or */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
575 regtail(cp, ret, regnode(cp, NOTHING)); /* null. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
576 } else if (op == '?') {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
577 /* Emit x? as (x|) */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
578 reginsert(cp, BRANCH, ret); /* Either x */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
579 regtail(cp, ret, regnode(cp, BRANCH)); /* or */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
580 next = regnode(cp, NOTHING); /* null. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
581 regtail(cp, ret, next);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
582 regoptail(cp, ret, next);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
583 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
584 cp->regparse++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
585 if (ISREPN(*cp->regparse))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
586 FAIL("nested *?+");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
587
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
588 return(ret);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
589 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
590
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
591 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
592 - regatom - the lowest level
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
593 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
594 * Optimization: gobbles an entire sequence of ordinary characters so that
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
595 * it can turn them into a single node, which is smaller to store and
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
596 * faster to run. Backslashed characters are exceptions, each becoming a
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
597 * separate node; the code is simpler that way and it's not worth fixing.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
598 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
599 static char *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
600 regatom(cp, flagp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
601 register struct comp *cp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
602 int *flagp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
603 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
604 register char *ret;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
605 int flags;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
606
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
607 *flagp = WORST; /* Tentatively. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
608
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
609 switch (*cp->regparse++) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
610 case '^':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
611 ret = regnode(cp, BOL);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
612 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
613 case '$':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
614 ret = regnode(cp, EOL);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
615 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
616 case '.':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
617 ret = regnode(cp, ANY);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
618 *flagp |= HASWIDTH|SIMPLE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
619 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
620 case '[': {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
621 register int range;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
622 register int rangeend;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
623 register int c;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
624
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
625 if (*cp->regparse == '^') { /* Complement of range. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
626 ret = regnode(cp, ANYBUT);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
627 cp->regparse++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
628 } else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
629 ret = regnode(cp, ANYOF);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
630 if ((c = *cp->regparse) == ']' || c == '-') {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
631 regc(cp, c);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
632 cp->regparse++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
633 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
634 while ((c = *cp->regparse++) != '\0' && c != ']') {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
635 if (c != '-')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
636 regc(cp, c);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
637 else if ((c = *cp->regparse) == ']' || c == '\0')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
638 regc(cp, '-');
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
639 else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
640 range = (unsigned char)*(cp->regparse-2);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
641 rangeend = (unsigned char)c;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
642 if (range > rangeend)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
643 FAIL("invalid [] range");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
644 for (range++; range <= rangeend; range++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
645 regc(cp, range);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
646 cp->regparse++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
647 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
648 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
649 regc(cp, '\0');
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
650 if (c != ']')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
651 FAIL("unmatched []");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
652 *flagp |= HASWIDTH|SIMPLE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
653 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
654 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
655 case '(':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
656 ret = reg(cp, 1, &flags);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
657 if (ret == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
658 return(NULL);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
659 *flagp |= flags&(HASWIDTH|SPSTART);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
660 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
661 case '\0':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
662 case '|':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
663 case ')':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
664 /* supposed to be caught earlier */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
665 FAIL("internal error: \\0|) unexpected");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
666 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
667 case '?':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
668 case '+':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
669 case '*':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
670 FAIL("?+* follows nothing");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
671 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
672 case '\\':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
673 if (*cp->regparse == '\0')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
674 FAIL("trailing \\");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
675 ret = regnode(cp, EXACTLY);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
676 regc(cp, *cp->regparse++);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
677 regc(cp, '\0');
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
678 *flagp |= HASWIDTH|SIMPLE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
679 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
680 default: {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
681 register size_t len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
682 register char ender;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
683
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
684 cp->regparse--;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
685 len = strcspn(cp->regparse, META);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
686 if (len == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
687 FAIL("internal error: strcspn 0");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
688 ender = *(cp->regparse+len);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
689 if (len > 1 && ISREPN(ender))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
690 len--; /* Back off clear of ?+* operand. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
691 *flagp |= HASWIDTH;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
692 if (len == 1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
693 *flagp |= SIMPLE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
694 ret = regnode(cp, EXACTLY);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
695 for (; len > 0; len--)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
696 regc(cp, *cp->regparse++);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
697 regc(cp, '\0');
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
698 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
699 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
700 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
701
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
702 return(ret);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
703 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
704
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
705 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
706 - regnode - emit a node
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
707 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
708 static char * /* Location. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
709 regnode(cp, op)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
710 register struct comp *cp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
711 char op;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
712 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
713 register char *const ret = cp->regcode;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
714 register char *ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
715
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
716 if (!EMITTING(cp)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
717 cp->regsize += 3;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
718 return(ret);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
719 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
720
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
721 ptr = ret;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
722 *ptr++ = op;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
723 *ptr++ = '\0'; /* Null next pointer. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
724 *ptr++ = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
725 cp->regcode = ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
726
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
727 return(ret);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
728 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
729
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
730 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
731 - regc - emit (if appropriate) a byte of code
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
732 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
733 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
734 regc(cp, b)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
735 register struct comp *cp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
736 char b;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
737 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
738 if (EMITTING(cp))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
739 *cp->regcode++ = b;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
740 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
741 cp->regsize++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
742 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
743
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
744 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
745 - reginsert - insert an operator in front of already-emitted operand
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
746 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
747 * Means relocating the operand.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
748 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
749 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
750 reginsert(cp, op, opnd)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
751 register struct comp *cp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
752 char op;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
753 char *opnd;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
754 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
755 register char *place;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
756
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
757 if (!EMITTING(cp)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
758 cp->regsize += 3;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
759 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
760 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
761
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
762 (void) memmove(opnd+3, opnd, (size_t)(cp->regcode - opnd));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
763 cp->regcode += 3;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
764
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
765 place = opnd; /* Op node, where operand used to be. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
766 *place++ = op;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
767 *place++ = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
768 *place++ = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
769 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
770
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
771 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
772 - regtail - set the next-pointer at the end of a node chain
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
773 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
774 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
775 regtail(cp, p, val)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
776 register struct comp *cp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
777 char *p;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
778 char *val;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
779 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
780 register char *scan;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
781 register char *temp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
782 register int offset;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
783
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
784 if (!EMITTING(cp))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
785 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
786
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
787 /* Find last node. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
788 for (scan = p; (temp = regnext(scan)) != NULL; scan = temp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
789 continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
790
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
791 offset = (OP(scan) == BACK) ? scan - val : val - scan;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
792 *(scan+1) = (offset>>8)&0177;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
793 *(scan+2) = offset&0377;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
794 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
795
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
796 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
797 - regoptail - regtail on operand of first argument; nop if operandless
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
798 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
799 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
800 regoptail(cp, p, val)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
801 register struct comp *cp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
802 char *p;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
803 char *val;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
804 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
805 /* "Operandless" and "op != BRANCH" are synonymous in practice. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
806 if (!EMITTING(cp) || OP(p) != BRANCH)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
807 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
808 regtail(cp, OPERAND(p), val);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
809 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
810
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
811 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
812 * sqd_regexec and friends
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
813 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
814
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
815 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
816 * Work-variable struct for sqd_regexec().
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
817 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
818 struct exec {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
819 char *reginput; /* String-input pointer. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
820 char *regbol; /* Beginning of input, for ^ check. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
821 char **regstartp; /* Pointer to startp array. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
822 char **regendp; /* Ditto for endp. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
823 };
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
824
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
825 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
826 * Forwards.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
827 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
828 static int regtry(struct exec *ep, sqd_regexp *rp, char *string);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
829 static int regmatch(struct exec *ep, char *prog);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
830 static size_t regrepeat(struct exec *ep, char *node);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
831
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
832 #ifdef DEBUG
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
833 int regnarrate = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
834 void regdump();
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
835 static char *regprop();
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
836 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
837
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
838 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
839 - sqd_regexec - match a regexp against a string
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
840 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
841 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
842 sqd_regexec(prog, str)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
843 register sqd_regexp *prog;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
844 const char *str;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
845 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
846 register char *string = (char *)str; /* avert const poisoning */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
847 register char *s;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
848 struct exec ex;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
849
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
850 /* Be paranoid. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
851 if (prog == NULL || string == NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
852 sqd_regerror("NULL argument to sqd_regexec");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
853 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
854 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
855
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
856 /* Check validity of program. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
857 if ((unsigned char)*prog->program != SQD_REGMAGIC) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
858 sqd_regerror("corrupted regexp");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
859 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
860 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
861
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
862 /* If there is a "must appear" string, look for it. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
863 if (prog->regmust != NULL && strstr(string, prog->regmust) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
864 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
865
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
866 /* Mark beginning of line for ^ . */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
867 ex.regbol = string;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
868 ex.regstartp = prog->startp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
869 ex.regendp = prog->endp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
870
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
871 /* Simplest case: anchored match need be tried only once. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
872 if (prog->reganch)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
873 return(regtry(&ex, prog, string));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
874
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
875 /* Messy cases: unanchored match. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
876 if (prog->regstart != '\0') {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
877 /* We know what char it must start with. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
878 for (s = string; s != NULL; s = strchr(s+1, prog->regstart))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
879 if (regtry(&ex, prog, s))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
880 return(1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
881 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
882 } else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
883 /* We don't -- general case. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
884 for (s = string; !regtry(&ex, prog, s); s++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
885 if (*s == '\0')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
886 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
887 return(1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
888 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
889 /* NOTREACHED */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
890 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
891
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
892 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
893 - regtry - try match at specific point
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
894 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
895 static int /* 0 failure, 1 success */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
896 regtry(ep, prog, string)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
897 register struct exec *ep;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
898 sqd_regexp *prog;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
899 char *string;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
900 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
901 register int i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
902 register char **stp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
903 register char **enp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
904
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
905 ep->reginput = string;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
906
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
907 stp = prog->startp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
908 enp = prog->endp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
909 for (i = NSUBEXP; i > 0; i--) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
910 *stp++ = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
911 *enp++ = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
912 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
913 if (regmatch(ep, prog->program + 1)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
914 prog->startp[0] = string;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
915 prog->endp[0] = ep->reginput;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
916 return(1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
917 } else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
918 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
919 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
920
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
921 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
922 - regmatch - main matching routine
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
923 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
924 * Conceptually the strategy is simple: check to see whether the current
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
925 * node matches, call self recursively to see whether the rest matches,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
926 * and then act accordingly. In practice we make some effort to avoid
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
927 * recursion, in particular by going through "ordinary" nodes (that don't
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
928 * need to know whether the rest of the match failed) by a loop instead of
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
929 * by recursion.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
930 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
931 static int /* 0 failure, 1 success */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
932 regmatch(ep, prog)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
933 register struct exec *ep;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
934 char *prog;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
935 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
936 register char *scan; /* Current node. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
937 char *next; /* Next node. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
938
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
939 #ifdef DEBUG
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
940 if (prog != NULL && regnarrate)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
941 fprintf(stderr, "%s(\n", regprop(prog));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
942 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
943 for (scan = prog; scan != NULL; scan = next) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
944 #ifdef DEBUG
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
945 if (regnarrate)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
946 fprintf(stderr, "%s...\n", regprop(scan));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
947 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
948 next = regnext(scan);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
949
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
950 switch (OP(scan)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
951 case BOL:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
952 if (ep->reginput != ep->regbol)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
953 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
954 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
955 case EOL:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
956 if (*ep->reginput != '\0')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
957 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
958 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
959 case ANY:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
960 if (*ep->reginput == '\0')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
961 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
962 ep->reginput++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
963 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
964 case EXACTLY: {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
965 register size_t len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
966 register char *const opnd = OPERAND(scan);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
967
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
968 /* Inline the first character, for speed. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
969 if (*opnd != *ep->reginput)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
970 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
971 len = strlen(opnd);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
972 if (len > 1 && strncmp(opnd, ep->reginput, len) != 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
973 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
974 ep->reginput += len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
975 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
976 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
977 case ANYOF:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
978 if (*ep->reginput == '\0' ||
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
979 strchr(OPERAND(scan), *ep->reginput) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
980 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
981 ep->reginput++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
982 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
983 case ANYBUT:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
984 if (*ep->reginput == '\0' ||
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
985 strchr(OPERAND(scan), *ep->reginput) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
986 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
987 ep->reginput++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
988 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
989 case NOTHING:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
990 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
991 case BACK:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
992 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
993 case OPEN+1: case OPEN+2: case OPEN+3:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
994 case OPEN+4: case OPEN+5: case OPEN+6:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
995 case OPEN+7: case OPEN+8: case OPEN+9: {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
996 register const int no = OP(scan) - OPEN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
997 register char *const input = ep->reginput;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
998
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
999 if (regmatch(ep, next)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1000 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1001 * Don't set startp if some later
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1002 * invocation of the same parentheses
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1003 * already has.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1004 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1005 if (ep->regstartp[no] == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1006 ep->regstartp[no] = input;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1007 return(1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1008 } else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1009 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1010 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1011 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1012 case CLOSE+1: case CLOSE+2: case CLOSE+3:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1013 case CLOSE+4: case CLOSE+5: case CLOSE+6:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1014 case CLOSE+7: case CLOSE+8: case CLOSE+9: {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1015 register const int no = OP(scan) - CLOSE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1016 register char *const input = ep->reginput;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1017
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1018 if (regmatch(ep, next)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1019 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1020 * Don't set endp if some later
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1021 * invocation of the same parentheses
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1022 * already has.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1023 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1024 if (ep->regendp[no] == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1025 ep->regendp[no] = input;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1026 return(1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1027 } else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1028 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1029 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1030 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1031 case BRANCH: {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1032 register char *const save = ep->reginput;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1033
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1034 if (OP(next) != BRANCH) /* No choice. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1035 next = OPERAND(scan); /* Avoid recursion. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1036 else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1037 while (OP(scan) == BRANCH) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1038 if (regmatch(ep, OPERAND(scan)))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1039 return(1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1040 ep->reginput = save;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1041 scan = regnext(scan);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1042 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1043 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1044 /* NOTREACHED */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1045 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1046 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1047 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1048 case STAR: case PLUS: {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1049 register const char nextch =
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1050 (OP(next) == EXACTLY) ? *OPERAND(next) : '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1051 register size_t no;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1052 register char *const save = ep->reginput;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1053 register const size_t min = (OP(scan) == STAR) ? 0 : 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1054
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1055 for (no = regrepeat(ep, OPERAND(scan)) + 1; no > min; no--) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1056 ep->reginput = save + no - 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1057 /* If it could work, try it. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1058 if (nextch == '\0' || *ep->reginput == nextch)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1059 if (regmatch(ep, next))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1060 return(1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1061 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1062 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1063 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1064 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1065 case END:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1066 return(1); /* Success! */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1067 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1068 default:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1069 sqd_regerror("regexp corruption");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1070 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1071 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1072 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1073 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1074
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1075 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1076 * We get here only if there's trouble -- normally "case END" is
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1077 * the terminating point.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1078 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1079 sqd_regerror("corrupted pointers");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1080 return(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1081 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1082
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1083 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1084 - regrepeat - report how many times something simple would match
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1085 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1086 static size_t
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1087 regrepeat(ep, node)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1088 register struct exec *ep;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1089 char *node;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1090 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1091 register size_t count;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1092 register char *scan;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1093 register char ch;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1094
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1095 switch (OP(node)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1096 case ANY:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1097 return(strlen(ep->reginput));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1098 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1099 case EXACTLY:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1100 ch = *OPERAND(node);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1101 count = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1102 for (scan = ep->reginput; *scan == ch; scan++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1103 count++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1104 return(count);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1105 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1106 case ANYOF:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1107 return(strspn(ep->reginput, OPERAND(node)));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1108 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1109 case ANYBUT:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1110 return(strcspn(ep->reginput, OPERAND(node)));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1111 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1112 default: /* Oh dear. Called inappropriately. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1113 sqd_regerror("internal error: bad call of regrepeat");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1114 return(0); /* Best compromise. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1115 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1116 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1117 /* NOTREACHED */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1118 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1119
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1120 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1121 - regnext - dig the "next" pointer out of a node
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1122 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1123 static char *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1124 regnext(p)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1125 register char *p;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1126 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1127 register const int offset = NEXT(p);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1128
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1129 if (offset == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1130 return(NULL);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1131
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1132 return((OP(p) == BACK) ? p-offset : p+offset);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1133 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1134
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1135 #ifdef DEBUG
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1136
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1137 static char *regprop();
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1138
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1139 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1140 - regdump - dump a regexp onto stdout in vaguely comprehensible form
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1141 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1142 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1143 regdump(r)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1144 sqd_regexp *r;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1145 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1146 register char *s;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1147 register char op = EXACTLY; /* Arbitrary non-END op. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1148 register char *next;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1149
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1150
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1151 s = r->program + 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1152 while (op != END) { /* While that wasn't END last time... */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1153 op = OP(s);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1154 printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1155 next = regnext(s);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1156 if (next == NULL) /* Next ptr. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1157 printf("(0)");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1158 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1159 printf("(%d)", (s-r->program)+(next-s));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1160 s += 3;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1161 if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1162 /* Literal string, where present. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1163 while (*s != '\0') {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1164 putchar(*s);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1165 s++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1166 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1167 s++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1168 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1169 putchar('\n');
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1170 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1171
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1172 /* Header fields of interest. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1173 if (r->regstart != '\0')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1174 printf("start `%c' ", r->regstart);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1175 if (r->reganch)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1176 printf("anchored ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1177 if (r->regmust != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1178 printf("must have \"%s\"", r->regmust);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1179 printf("\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1180 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1181
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1182 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1183 - regprop - printable representation of opcode
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1184 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1185 static char *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1186 regprop(op)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1187 char *op;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1188 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1189 register char *p;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1190 static char buf[50];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1191
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1192 (void) strcpy(buf, ":");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1193
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1194 switch (OP(op)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1195 case BOL:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1196 p = "BOL";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1197 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1198 case EOL:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1199 p = "EOL";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1200 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1201 case ANY:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1202 p = "ANY";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1203 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1204 case ANYOF:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1205 p = "ANYOF";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1206 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1207 case ANYBUT:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1208 p = "ANYBUT";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1209 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1210 case BRANCH:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1211 p = "BRANCH";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1212 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1213 case EXACTLY:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1214 p = "EXACTLY";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1215 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1216 case NOTHING:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1217 p = "NOTHING";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1218 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1219 case BACK:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1220 p = "BACK";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1221 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1222 case END:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1223 p = "END";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1224 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1225 case OPEN+1:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1226 case OPEN+2:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1227 case OPEN+3:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1228 case OPEN+4:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1229 case OPEN+5:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1230 case OPEN+6:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1231 case OPEN+7:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1232 case OPEN+8:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1233 case OPEN+9:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1234 sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1235 p = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1236 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1237 case CLOSE+1:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1238 case CLOSE+2:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1239 case CLOSE+3:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1240 case CLOSE+4:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1241 case CLOSE+5:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1242 case CLOSE+6:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1243 case CLOSE+7:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1244 case CLOSE+8:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1245 case CLOSE+9:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1246 sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1247 p = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1248 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1249 case STAR:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1250 p = "STAR";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1251 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1252 case PLUS:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1253 p = "PLUS";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1254 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1255 default:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1256 sqd_regerror("corrupted opcode");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1257 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1258 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1259 if (p != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1260 (void) strcat(buf, p);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1261 return(buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1262 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1263 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1264
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1265
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1266 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1267 - sqd_regsub - perform substitutions after a regexp match
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1268 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1269 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1270 sqd_regsub(rp, source, dest)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1271 const sqd_regexp *rp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1272 const char *source;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1273 char *dest;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1274 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1275 register sqd_regexp * const prog = (sqd_regexp *)rp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1276 register char *src = (char *)source;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1277 register char *dst = dest;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1278 register char c;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1279 register int no;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1280 register size_t len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1281
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1282 if (prog == NULL || source == NULL || dest == NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1283 sqd_regerror("NULL parameter to sqd_regsub");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1284 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1285 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1286 if ((unsigned char)*(prog->program) != SQD_REGMAGIC) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1287 sqd_regerror("damaged regexp");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1288 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1289 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1290
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1291 while ((c = *src++) != '\0') {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1292 if (c == '&')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1293 no = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1294 else if (c == '\\' && isdigit((int) (*src)))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1295 no = *src++ - '0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1296 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1297 no = -1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1298
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1299 if (no < 0) { /* Ordinary character. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1300 if (c == '\\' && (*src == '\\' || *src == '&'))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1301 c = *src++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1302 *dst++ = c;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1303 } else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1304 prog->endp[no] > prog->startp[no]) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1305 len = prog->endp[no] - prog->startp[no];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1306 (void) strncpy(dst, prog->startp[no], len);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1307 dst += len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1308 if (*(dst-1) == '\0') { /* strncpy hit NUL. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1309 sqd_regerror("damaged match string");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1310 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1311 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1312 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1313 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1314 *dst++ = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1315 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1316
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1317
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1318 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1319 sqd_regerror(s)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1320 char *s;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1321 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1322 fprintf(stderr, "regexp(3): %s\n", s);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1323 exit(EXIT_FAILURE);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1324 /* NOTREACHED */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1325 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1326
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1327 #ifdef NBA_TEAM_IN_STL
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1328 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1329 main(int argc, char **argv)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1330 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1331 char *pat;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1332 int ntok;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1333 char *s;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1334 int status;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1335
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1336 pat = argv[1];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1337 ntok = atoi(argv[2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1338 s = argv[3];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1339
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1340 status = Strparse(pat, s, ntok);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1341 if (status == 0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1342 printf("no match\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1343 } else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1344 int i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1345 printf("MATCH.\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1346 for (i = 1; i <= ntok; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1347 printf("matched token %1d: %s\n", i, sqd_parse[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1348 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1349 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1350 #endif /*NBA_TEAM_IN_STL*/