Mercurial > hg > aim92
comparison tools/strmatch.c @ 0:5242703e91d3 tip
Initial checkin for AIM92 aimR8.2 (last updated May 1997).
author | tomwalters |
---|---|
date | Fri, 20 May 2011 15:19:45 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5242703e91d3 |
---|---|
1 /*************************************************************************** | |
2 | |
3 strmatch.c String matching routines to supplement those defined in | |
4 ---------- <strings.h>. | |
5 | |
6 | |
7 Character classification macros defined in <ctype.h> | |
8 These return truth values [0,1]. | |
9 | |
10 | |
11 int isalpha(c) c is a letter | |
12 int c; | |
13 | |
14 int isupper(c) c is an uppercase letter | |
15 int c; | |
16 | |
17 int islower(c) c is a lowercase letter | |
18 int c; | |
19 | |
20 int isdigit(c) c is a digit | |
21 int c; | |
22 | |
23 int isxdigit(c) c is a hexadecimal digit, by default [0-9], [A-F], or | |
24 int c; [a-f]. | |
25 | |
26 int isalnum(c) c is an alphanumeric character | |
27 int c; | |
28 | |
29 int isspace(c) c is a space, tab, carriage return, new line, or form | |
30 int c; feed. | |
31 | |
32 int ispunct(c) c is a punctuation character (neither control, | |
33 int c; alphanumeric, nor space) | |
34 | |
35 | |
36 int isprint(c) c is a printing character, by default code 040(8) | |
37 int c; (space) through 0176 (tilde) | |
38 | |
39 | |
40 int isgraph(c) c is a printing character, like isprint except false | |
41 int c; for space. | |
42 | |
43 int iscntrl(c) c is a delete character (0177) or ordinary control | |
44 int c; character (less than 040) except for space characters | |
45 | |
46 | |
47 int isascii(c) c is an ASCII character, code less than 0200 | |
48 int c; | |
49 | |
50 | |
51 Character translation macros defined in <ctype.h> | |
52 | |
53 | |
54 int toupper(c) return upper-case letter corresponding to c. | |
55 int c; | |
56 | |
57 int tolower(c) return lower-case letter corresponding to c. | |
58 int c; | |
59 | |
60 int toascii(c) return ascii value corresponding to c. | |
61 int c; | |
62 | |
63 | |
64 String handling routines defined in <strings.h> | |
65 A `span' is the length of a segment of a string, ie a number of characters. | |
66 | |
67 | |
68 char *strcat(s1, s2) Append a copy of string s2 to the end of | |
69 char *s1, *s2; string s1. Return a ptr to s1. | |
70 | |
71 char *strncat(s1, s2, n) Append n chars of s2 to the end of string s1. | |
72 char *s1, *s2; Return a ptr to s1. | |
73 | |
74 int strcmp(s1, s2) Compare strings. Return 0 if equal. | |
75 unsigned char *s1, *s2; Otherwise return difference number of chars. | |
76 | |
77 int strncmp(s1, s2, n) Compare n chars of strings. Return 0 if equal. | |
78 unsigned char *s1, *s2; Otherwise return difference number of chars. | |
79 int n | |
80 | |
81 strcasecmp(s1, s2) As strcmp, but case insensitive. | |
82 char *s1, *s2; | |
83 | |
84 strncasecmp(s1, s2, n) As strncmp, but case insensitive. | |
85 char *s1, *s2; | |
86 | |
87 char *strcpy(s1, s2) Copy s2 to s1, including null char. | |
88 char *s1, *s2; | |
89 | |
90 char *strncpy(s1, s2, n) Copy n chars of s2 to s1. Truncate or pad s2 | |
91 char *s1, *s2; with nulls to make up n chars. If s2 needs to | |
92 int n be truncated, s1 will not be null terminated. | |
93 | |
94 int strlen(s) Return number of chars in s, not including | |
95 char *s; the terminating null character. | |
96 | |
97 char *strstr(s1, s2) Return a ptr to the first occurrence of s2 | |
98 char *s1, *s2; in s1. Otherwise return a null ptr. | |
99 | |
100 char *strchr(s, c) Return a ptr to the first occurrence of c | |
101 char *s; in s. Otherwise return a null ptr. | |
102 int c; | |
103 | |
104 char *strrchr(s, c) Return a ptr to the last occurrence of c | |
105 char *s; in s. Otherwise return a null ptr. | |
106 int c; | |
107 | |
108 char *strpbrk(s1, s2) Return a ptr to the first occurrence of any | |
109 char *s1, *s2; char in s2 in s1. Otherwise return a null ptr. | |
110 | |
111 int strspn(s1, s2) Return the span from the head of s1 which | |
112 char *s1, *s2; consists of chars which are in s2. | |
113 | |
114 int strcspn(s1, s2) Return the span from the head of s1 which | |
115 char *s1, *s2; consists of chars which are not in s2. | |
116 | |
117 char *strtok(s1, s2) See below: | |
118 char *s1, *s2; | |
119 | |
120 The strtok subroutine considers the string s1 to consist of a sequence | |
121 of zero or more text tokens separated by spans of one or more characters | |
122 from the separator string s2. The first call (with pointer s1 speci- | |
123 fied) returns a pointer to the first character of the first token, and | |
124 will have written a null character into s1 immediately following the | |
125 returned token. The function keeps track of its position in the string | |
126 between separate calls, so that subsequent calls (which must be made | |
127 with the first argument a NULL pointer) will work through the string s1 | |
128 immediately following that token. In this way, subsequent calls will | |
129 work through the string s1 until no tokens remain. The separator string | |
130 s2 may be different from call to call. When no token remains in s1, a | |
131 NULL pointer is returned. | |
132 | |
133 | |
134 ***************************************************************************/ | |
135 | |
136 | |
137 | |
138 #include <math.h> | |
139 #include "strmatch.h" | |
140 | |
141 | |
142 /* | |
143 Test for a NULL pointer to a character or string | |
144 */ | |
145 | |
146 int isnull( s ) | |
147 char *s ; | |
148 { | |
149 if ( s == (char *)0 ) return 1 ; | |
150 else return 0 ; | |
151 } | |
152 | |
153 /* | |
154 Test for an empty string | |
155 */ | |
156 | |
157 | |
158 int isempty( s ) | |
159 char *s ; | |
160 { | |
161 if ( *s == '\0' ) return 1 ; | |
162 else return 0 ; | |
163 } | |
164 | |
165 /* | |
166 Test for NULL string pointer or empty string | |
167 */ | |
168 | |
169 int isnullorempty( s ) | |
170 char *s ; | |
171 { | |
172 if ( s == (char *)0 || *s == '\0' ) return 1 ; | |
173 else return 0 ; | |
174 } | |
175 | |
176 | |
177 /* | |
178 Return a pointer to the terminator '\0' at the tail of string `s'. | |
179 */ | |
180 | |
181 char *terminator( s ) | |
182 char *s ; | |
183 { | |
184 return ( s + strlen( s ) ) ; | |
185 } | |
186 | |
187 | |
188 /* | |
189 Test strings s1==s2. Return 1 if true, 0 otherwise. | |
190 */ | |
191 | |
192 int isstr( s1, s2 ) | |
193 char *s1, *s2 ; | |
194 { | |
195 return ( strcmp( s1, s2 ) == 0 ) ; | |
196 } | |
197 | |
198 | |
199 /* | |
200 Test strings s1==s2 up to the length of string s1 (ie allowing truncation). | |
201 Return 1 if true, 0 otherwise. | |
202 */ | |
203 | |
204 int iststr( s1, s2 ) | |
205 char *s1, *s2 ; | |
206 { | |
207 return ( strncmp( s1, s2, strlen( s1 ) ) == 0 ) ; | |
208 } | |
209 | |
210 | |
211 /* | |
212 Copy s2 to s1 up to (but not including) the first occurrence of character c | |
213 in s2. Ensure s1 is then null terminated. | |
214 Return s1 or a null ptr if c is not found in s2. | |
215 */ | |
216 | |
217 char *strccpy( s1, s2, c ) | |
218 char *s1, *s2 ; | |
219 char c ; | |
220 { | |
221 char *s ; | |
222 | |
223 if ( ( s = strchr( s2, c ) ) == (char *)0 ) return (char *)0 ; | |
224 strncpy( s1, s2, (int)( s - s2 ) ) ; | |
225 *( s1 + ( s - s2 ) ) == '\0' ; | |
226 return ( s1 ) ; | |
227 } | |
228 | |
229 | |
230 /* | |
231 Return a ptr to the first occurrence of any char in s1 which is not in s2. | |
232 Otherwise return a null ptr. (This complements strpbrk() in the Unix string | |
233 library). | |
234 */ | |
235 | |
236 char *strcbrk(s1, s2) | |
237 char *s1, *s2; | |
238 { | |
239 int spn ; | |
240 | |
241 if ( ( spn = strspn( s1, s2 ) ) == strlen( s1 ) ) | |
242 return ( (char *)0 ) ; | |
243 return ( (char *)( s1 + spn ) ) ; | |
244 } | |
245 | |
246 | |
247 | |
248 /* | |
249 Return the span from the head of s1 to the first occurrence of any char in s2. | |
250 Otherwise (if no such char found) return -1. | |
251 */ | |
252 | |
253 int strspnbrk(s1, s2) | |
254 char *s1, *s2; | |
255 { | |
256 int spn ; | |
257 | |
258 if ( ( spn = strcspn( s1, s2 ) ) == strlen( s1 ) ) | |
259 return ( -1 ) ; | |
260 return ( spn ) ; | |
261 } | |
262 | |
263 | |
264 /* | |
265 Return the span from the head of s1 to the first occurrence of any char not | |
266 in s2. Otherwise (if no such char found) return -1. | |
267 */ | |
268 | |
269 int strcspnbrk(s1, s2) | |
270 char *s1, *s2; | |
271 { | |
272 int spn ; | |
273 | |
274 if ( ( spn = strspn( s1, s2 ) ) == strlen( s1 ) ) | |
275 return ( -1 ) ; | |
276 return ( spn ) ; | |
277 } | |
278 | |
279 | |
280 | |
281 /* | |
282 Compare the heads of strings s1 and s2 up to the length of string s1. | |
283 Return 0 if equal, otherwise return difference number of chars. | |
284 */ | |
285 | |
286 int strtcmp( s1, s2 ) | |
287 char *s1, *s2 ; | |
288 { | |
289 return ( strncmp( s1, s2, strlen( s1 ) ) ) ; | |
290 } | |
291 | |
292 | |
293 /* | |
294 Compare the tails of strings s1 and s2 back to the length of string s2. | |
295 Return 0 if equal, otherwise return difference number of chars. | |
296 */ | |
297 | |
298 int strtrcmp( s1, s2 ) | |
299 char *s1, *s2 ; | |
300 { | |
301 int i, j ; | |
302 | |
303 if ( ( i = strlen( s1 ) ) >= ( j = strlen( s2 ) ) ) | |
304 return ( strcmp( s1+i-j, s2 ) ) ; | |
305 return ( i-j ) ; | |
306 } | |
307 | |
308 | |
309 /* | |
310 Return the span (number of chars) over which the head of string s1 equals the | |
311 head of string s2. | |
312 */ | |
313 | |
314 int streqspn( s1, s2 ) | |
315 char *s1, *s2 ; | |
316 { | |
317 int spn=0 ; | |
318 | |
319 while ( !isempty(s1) && *s1++ == *s2++ ) spn++ ; | |
320 return spn ; | |
321 } | |
322 | |
323 | |
324 /* | |
325 Return the span (number of chars) of a <number> at the head of string s. | |
326 <number> = [-]<digits>[.]<digits> | |
327 where either, but not both, of the digit strings may be empty. | |
328 */ | |
329 | |
330 int strnumspn( s ) | |
331 char *s ; | |
332 { | |
333 int j0=0, j1, j2 ; | |
334 | |
335 if ( *s == '-' ) j0++ ; /* span of '-' */ | |
336 j1 = strspn( s+j0, "0123456789" ) ; /* span of digits left of point */ | |
337 if ( *( s+j0+j1 ) == '.' ) j1++ ; /* span of '.' */ | |
338 j2 = strspn( s+j0+j1, "0123456789" ) ; /* span of digits right of point */ | |
339 if ( *( s+j0+j1+j2 ) == '.' ) j2++ ; | |
340 | |
341 if ( j1>0 || j2>0 ) | |
342 return ( j0+j1+j2 ) ; | |
343 return 0 ; /* zero span means no number */ | |
344 } | |
345 | |
346 | |
347 /* | |
348 Test for a <number>. | |
349 */ | |
350 | |
351 int isnumber( s ) | |
352 char *s ; | |
353 { | |
354 if ( strnumspn( s ) > 0 ) return 1 ; | |
355 else return 0 ; | |
356 } | |
357 | |
358 | |
359 /* | |
360 Return a ptr to the first char after a <number> at the head of string s. | |
361 */ | |
362 | |
363 char *strnumptr( s ) | |
364 char *s ; | |
365 { | |
366 return ( s + strnumspn( s ) ) ; | |
367 } | |
368 | |
369 | |
370 /* | |
371 Return a ptr to the first occurrence of any number char in s. | |
372 (where a number char includes '-' and '.' as well as any digit). | |
373 */ | |
374 | |
375 char *strpnum(s) | |
376 char *s ; | |
377 { | |
378 return ( strpbrk( s, "-.0123456789" ) ) ; | |
379 } | |
380 | |
381 /* | |
382 Return a ptr to the first occurrence of any char in s not a number char. | |
383 (where a number char includes '-' and '.' as well as any digit). | |
384 */ | |
385 | |
386 char *strcnum(s) | |
387 char *s ; | |
388 { | |
389 return ( strcbrk( s, "-.0123456789" ) ) ; | |
390 } | |
391 | |
392 | |
393 /* | |
394 Separate string `s1' into two string tokens at the first occurrence of a | |
395 separator character `s2'. (Given as a string of one char). | |
396 Numbers at the head of `s1' are skipped. (This skips leading hyphens or points | |
397 which are part of a number, and so allows negative numbers with splitting | |
398 hyphens, real numbers with splitting decimal point, etc.). | |
399 Four possible outcomes, depending upon form of `s1': | |
400 1. Null or empty. - return NULL ptr, (missing 1st and 2nd tokens). | |
401 2. No separator char. - return ptr to empty string, (empty 2nd token). | |
402 3. Separator is last char. - return NULL ptr, (missing 2nd token). | |
403 4. Separator within `s1'. - return ptr to 2nd token, (two correct tokens). | |
404 String `s1' is unchanged at the end. | |
405 */ | |
406 | |
407 char *strpsep( s1, s2 ) | |
408 char *s1, *s2 ; | |
409 { | |
410 char *s ; | |
411 | |
412 if ( isnullorempty( s1 ) ) | |
413 return ( (char *)0 ) ; | |
414 | |
415 s1 = strnumptr( s1 ) ; /* skip leading numbers */ | |
416 | |
417 if ( ( s = strpbrk( s1, s2 ) ) == (char *)0 ) | |
418 return ( s1 + strlen( s1 ) ) ; /* ptr to empty string at end s1 */ | |
419 | |
420 if ( isempty( ( s2 = s+1 ) ) ) | |
421 return ( (char *)0 ) ; /* separator is last char */ | |
422 | |
423 return ( s2 ) ; | |
424 } | |
425 | |
426 /* Replace above last 4 lines of routine, and also mod strsep, to get proper | |
427 string separator | |
428 if ( ( s2 = strcbrk( s, s2 ) ) == (char *)0 ) | |
429 return ( (char *)0 ) ; | |
430 while ( --s2 > s && isnumber( s2 ) ) ; | |
431 return ( ++s2 ) ; | |
432 */ | |
433 | |
434 /* | |
435 Separate string `s1' into two string tokens. | |
436 strsep() is the same as strpsep() except that, in the event of two correct | |
437 tokens, insert '\0' at the separator. | |
438 String `s1' is thus separated and becomes the first token. | |
439 */ | |
440 | |
441 char *strsep( s1, s2 ) | |
442 char *s1, *s2 ; | |
443 { | |
444 char *s = strpsep( s1, s2 ) ; /* ptr to 2nd token */ | |
445 | |
446 if ( isnullorempty( s ) ) | |
447 return s ; | |
448 | |
449 *(s-1) = '\0' ; /* insert '\0' and return 2nd token */ | |
450 return ( s ) ; | |
451 } | |
452 | |
453 | |
454 /* | |
455 Compare the head of string `s' with null-terminated list of strings `list'. | |
456 Return the longest matching string from `list'. | |
457 Return a NULL pointer if no match is found, or if the (possibly abbreviated) | |
458 head of string `s' is ambiguous (ie. matches more than once in the list). | |
459 */ | |
460 | |
461 char *listcmp( list, s ) | |
462 char **list ; | |
463 char *s ; | |
464 { | |
465 int i, j = (-1) ; | |
466 | |
467 for ( i=0; list[i] != (char *)0 ; i++) | |
468 if ( strtcmp( s, list[i] ) == 0 ) { | |
469 if ( j >= 0 ) return (char *)0 ; /* ambiguous match */ | |
470 else j = i; | |
471 } | |
472 if ( j < 0 ) return (char *)0 ; /* match not found */ | |
473 return ( list[j] ) ; | |
474 } | |
475 | |
476 | |
477 /* | |
478 Compare the tail of string `s' with null-terminated list of strings `list'. | |
479 Return the longest matching string from `list'. | |
480 Return a NULL pointer if no match is found, or if the (possibly abbreviated) | |
481 tail of string `s' is ambiguous (ie. matches more than once in the list). | |
482 */ | |
483 | |
484 char *listrcmp( list, s ) | |
485 char **list ; | |
486 char *s ; | |
487 { | |
488 int i, j = (-1), k, maxlen = 0 ; | |
489 | |
490 for ( i=0; list[i] != (char *)0 ; i++) | |
491 if ( strtrcmp( s, list[i] ) == 0 && ( k = strlen( list[i] ) ) > maxlen ) { | |
492 maxlen = k ; | |
493 j = i ; | |
494 } | |
495 if ( j < 0 ) return (char *)0 ; /* match not found */ | |
496 return ( list[j] ) ; | |
497 } | |
498 | |
499 | |
500 /* | |
501 Compare the head of string `s' with null-terminated list of strings `list'. | |
502 Find the string in `list' having the longest matching span with the head of | |
503 `s' (which is possibly abbreviated). | |
504 Return the list index of the matching string. | |
505 Return (-1) if there is no match in the list (all spans are zero). | |
506 Return (-2) if the longest matching span is ambiguous (ie occurs more | |
507 than once in the list). | |
508 */ | |
509 | |
510 int listindex( list, s ) | |
511 char **list ; | |
512 char *s ; | |
513 { | |
514 int i, j, jmax = 0, index = (-1) ; | |
515 | |
516 for ( i=0; list[i] != (char *)0 ; i++) { | |
517 if ( ( j = streqspn( s, list[i] ) ) > jmax ) { | |
518 jmax = j ; | |
519 index = i ; | |
520 } | |
521 else if ( j > 0 && j == jmax ) | |
522 index = (-2) ; | |
523 } | |
524 return index ; | |
525 } | |
526 | |
527 | |
528 /* | |
529 Return the length of the null-terminated list of strings `list', ie. the | |
530 number of strings it contains. | |
531 */ | |
532 | |
533 listsize( list ) | |
534 char **list ; | |
535 { | |
536 int i ; | |
537 | |
538 for ( i = 0 ; list[i] != (char *)0 ; i++ ) | |
539 ; | |
540 return i ; | |
541 } | |
542 | |
543 | |
544 /* | |
545 For each string in list1, find the index of the matching string in list2 | |
546 and store it in the index array, (which must be at least the size of list1). | |
547 The string match allows for abbreviations in the list1 strings. | |
548 Return the number of matching strings found. (If this is less than the size | |
549 of list1 then list1 contains an unknown or an ambiguous string). | |
550 */ | |
551 | |
552 mapindices( list1, list2, index ) | |
553 char **list1, **list2 ; | |
554 int *index ; | |
555 { | |
556 int i, j, n = 0 ; | |
557 | |
558 for ( i = 0 ; list1[i] != (char *)0 ; i++ ) | |
559 if ( ( j = listindex( list2, list1[i] ) ) >= 0 ) | |
560 index[n++] = j ; | |
561 | |
562 return n ; | |
563 } | |
564 | |
565 | |
566 /* | |
567 Split the given string `str' into tokens at occurrences of separator char c. | |
568 Store pointers to each token in `list', and null-terminate each token | |
569 by overwriting the separator char with null. | |
570 The list must contain at most n pointers. | |
571 Return the number of tokens, or 0 if an error. | |
572 */ | |
573 | |
574 tokens( str, list, n, c ) | |
575 char *str ; | |
576 char **list ; | |
577 int n ; | |
578 char c ; | |
579 { | |
580 int i ; | |
581 char *s ; | |
582 | |
583 if ( isempty( str ) || *str == c || n <= 0 ) return 0 ; | |
584 list[0] = str ; | |
585 | |
586 for ( i = 1 ; i < n && ( s = strchr( str, c ) ) != (char *)0 ; i++ ) { | |
587 *s = '\0' ; | |
588 str = s + 1 ; | |
589 if ( isempty( str ) || *str == c ) return 0 ; | |
590 list[i] = str ; | |
591 } | |
592 | |
593 if ( i == n && strchr( str, c ) != (char *)0 ) return 0 ; | |
594 | |
595 return i ; | |
596 } | |
597 | |
598 | |
599 | |
600 /* | |
601 Return ASCII string of integer i. (Inverse of atoi()). | |
602 */ | |
603 | |
604 char *itoa( i ) | |
605 int i ; | |
606 { | |
607 char s[64], *s1 ; | |
608 | |
609 sprintf( s, "%d", i ) ; | |
610 s1 = (char *)malloc( strlen( s ) + 1 ) ; | |
611 strcpy( s1, s ) ; | |
612 return s1 ; | |
613 } |