comparison tools/strmatch.c @ 0:5242703e91d3 tip

Initial checkin for AIM92 aimR8.2 (last updated May 1997).
author tomwalters
date Fri, 20 May 2011 15:19:45 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5242703e91d3
1 /***************************************************************************
2
3 strmatch.c String matching routines to supplement those defined in
4 ---------- <strings.h>.
5
6
7 Character classification macros defined in <ctype.h>
8 These return truth values [0,1].
9
10
11 int isalpha(c) c is a letter
12 int c;
13
14 int isupper(c) c is an uppercase letter
15 int c;
16
17 int islower(c) c is a lowercase letter
18 int c;
19
20 int isdigit(c) c is a digit
21 int c;
22
23 int isxdigit(c) c is a hexadecimal digit, by default [0-9], [A-F], or
24 int c; [a-f].
25
26 int isalnum(c) c is an alphanumeric character
27 int c;
28
29 int isspace(c) c is a space, tab, carriage return, new line, or form
30 int c; feed.
31
32 int ispunct(c) c is a punctuation character (neither control,
33 int c; alphanumeric, nor space)
34
35
36 int isprint(c) c is a printing character, by default code 040(8)
37 int c; (space) through 0176 (tilde)
38
39
40 int isgraph(c) c is a printing character, like isprint except false
41 int c; for space.
42
43 int iscntrl(c) c is a delete character (0177) or ordinary control
44 int c; character (less than 040) except for space characters
45
46
47 int isascii(c) c is an ASCII character, code less than 0200
48 int c;
49
50
51 Character translation macros defined in <ctype.h>
52
53
54 int toupper(c) return upper-case letter corresponding to c.
55 int c;
56
57 int tolower(c) return lower-case letter corresponding to c.
58 int c;
59
60 int toascii(c) return ascii value corresponding to c.
61 int c;
62
63
64 String handling routines defined in <strings.h>
65 A `span' is the length of a segment of a string, ie a number of characters.
66
67
68 char *strcat(s1, s2) Append a copy of string s2 to the end of
69 char *s1, *s2; string s1. Return a ptr to s1.
70
71 char *strncat(s1, s2, n) Append n chars of s2 to the end of string s1.
72 char *s1, *s2; Return a ptr to s1.
73
74 int strcmp(s1, s2) Compare strings. Return 0 if equal.
75 unsigned char *s1, *s2; Otherwise return difference number of chars.
76
77 int strncmp(s1, s2, n) Compare n chars of strings. Return 0 if equal.
78 unsigned char *s1, *s2; Otherwise return difference number of chars.
79 int n
80
81 strcasecmp(s1, s2) As strcmp, but case insensitive.
82 char *s1, *s2;
83
84 strncasecmp(s1, s2, n) As strncmp, but case insensitive.
85 char *s1, *s2;
86
87 char *strcpy(s1, s2) Copy s2 to s1, including null char.
88 char *s1, *s2;
89
90 char *strncpy(s1, s2, n) Copy n chars of s2 to s1. Truncate or pad s2
91 char *s1, *s2; with nulls to make up n chars. If s2 needs to
92 int n be truncated, s1 will not be null terminated.
93
94 int strlen(s) Return number of chars in s, not including
95 char *s; the terminating null character.
96
97 char *strstr(s1, s2) Return a ptr to the first occurrence of s2
98 char *s1, *s2; in s1. Otherwise return a null ptr.
99
100 char *strchr(s, c) Return a ptr to the first occurrence of c
101 char *s; in s. Otherwise return a null ptr.
102 int c;
103
104 char *strrchr(s, c) Return a ptr to the last occurrence of c
105 char *s; in s. Otherwise return a null ptr.
106 int c;
107
108 char *strpbrk(s1, s2) Return a ptr to the first occurrence of any
109 char *s1, *s2; char in s2 in s1. Otherwise return a null ptr.
110
111 int strspn(s1, s2) Return the span from the head of s1 which
112 char *s1, *s2; consists of chars which are in s2.
113
114 int strcspn(s1, s2) Return the span from the head of s1 which
115 char *s1, *s2; consists of chars which are not in s2.
116
117 char *strtok(s1, s2) See below:
118 char *s1, *s2;
119
120 The strtok subroutine considers the string s1 to consist of a sequence
121 of zero or more text tokens separated by spans of one or more characters
122 from the separator string s2. The first call (with pointer s1 speci-
123 fied) returns a pointer to the first character of the first token, and
124 will have written a null character into s1 immediately following the
125 returned token. The function keeps track of its position in the string
126 between separate calls, so that subsequent calls (which must be made
127 with the first argument a NULL pointer) will work through the string s1
128 immediately following that token. In this way, subsequent calls will
129 work through the string s1 until no tokens remain. The separator string
130 s2 may be different from call to call. When no token remains in s1, a
131 NULL pointer is returned.
132
133
134 ***************************************************************************/
135
136
137
138 #include <math.h>
139 #include "strmatch.h"
140
141
142 /*
143 Test for a NULL pointer to a character or string
144 */
145
146 int isnull( s )
147 char *s ;
148 {
149 if ( s == (char *)0 ) return 1 ;
150 else return 0 ;
151 }
152
153 /*
154 Test for an empty string
155 */
156
157
158 int isempty( s )
159 char *s ;
160 {
161 if ( *s == '\0' ) return 1 ;
162 else return 0 ;
163 }
164
165 /*
166 Test for NULL string pointer or empty string
167 */
168
169 int isnullorempty( s )
170 char *s ;
171 {
172 if ( s == (char *)0 || *s == '\0' ) return 1 ;
173 else return 0 ;
174 }
175
176
177 /*
178 Return a pointer to the terminator '\0' at the tail of string `s'.
179 */
180
181 char *terminator( s )
182 char *s ;
183 {
184 return ( s + strlen( s ) ) ;
185 }
186
187
188 /*
189 Test strings s1==s2. Return 1 if true, 0 otherwise.
190 */
191
192 int isstr( s1, s2 )
193 char *s1, *s2 ;
194 {
195 return ( strcmp( s1, s2 ) == 0 ) ;
196 }
197
198
199 /*
200 Test strings s1==s2 up to the length of string s1 (ie allowing truncation).
201 Return 1 if true, 0 otherwise.
202 */
203
204 int iststr( s1, s2 )
205 char *s1, *s2 ;
206 {
207 return ( strncmp( s1, s2, strlen( s1 ) ) == 0 ) ;
208 }
209
210
211 /*
212 Copy s2 to s1 up to (but not including) the first occurrence of character c
213 in s2. Ensure s1 is then null terminated.
214 Return s1 or a null ptr if c is not found in s2.
215 */
216
217 char *strccpy( s1, s2, c )
218 char *s1, *s2 ;
219 char c ;
220 {
221 char *s ;
222
223 if ( ( s = strchr( s2, c ) ) == (char *)0 ) return (char *)0 ;
224 strncpy( s1, s2, (int)( s - s2 ) ) ;
225 *( s1 + ( s - s2 ) ) == '\0' ;
226 return ( s1 ) ;
227 }
228
229
230 /*
231 Return a ptr to the first occurrence of any char in s1 which is not in s2.
232 Otherwise return a null ptr. (This complements strpbrk() in the Unix string
233 library).
234 */
235
236 char *strcbrk(s1, s2)
237 char *s1, *s2;
238 {
239 int spn ;
240
241 if ( ( spn = strspn( s1, s2 ) ) == strlen( s1 ) )
242 return ( (char *)0 ) ;
243 return ( (char *)( s1 + spn ) ) ;
244 }
245
246
247
248 /*
249 Return the span from the head of s1 to the first occurrence of any char in s2.
250 Otherwise (if no such char found) return -1.
251 */
252
253 int strspnbrk(s1, s2)
254 char *s1, *s2;
255 {
256 int spn ;
257
258 if ( ( spn = strcspn( s1, s2 ) ) == strlen( s1 ) )
259 return ( -1 ) ;
260 return ( spn ) ;
261 }
262
263
264 /*
265 Return the span from the head of s1 to the first occurrence of any char not
266 in s2. Otherwise (if no such char found) return -1.
267 */
268
269 int strcspnbrk(s1, s2)
270 char *s1, *s2;
271 {
272 int spn ;
273
274 if ( ( spn = strspn( s1, s2 ) ) == strlen( s1 ) )
275 return ( -1 ) ;
276 return ( spn ) ;
277 }
278
279
280
281 /*
282 Compare the heads of strings s1 and s2 up to the length of string s1.
283 Return 0 if equal, otherwise return difference number of chars.
284 */
285
286 int strtcmp( s1, s2 )
287 char *s1, *s2 ;
288 {
289 return ( strncmp( s1, s2, strlen( s1 ) ) ) ;
290 }
291
292
293 /*
294 Compare the tails of strings s1 and s2 back to the length of string s2.
295 Return 0 if equal, otherwise return difference number of chars.
296 */
297
298 int strtrcmp( s1, s2 )
299 char *s1, *s2 ;
300 {
301 int i, j ;
302
303 if ( ( i = strlen( s1 ) ) >= ( j = strlen( s2 ) ) )
304 return ( strcmp( s1+i-j, s2 ) ) ;
305 return ( i-j ) ;
306 }
307
308
309 /*
310 Return the span (number of chars) over which the head of string s1 equals the
311 head of string s2.
312 */
313
314 int streqspn( s1, s2 )
315 char *s1, *s2 ;
316 {
317 int spn=0 ;
318
319 while ( !isempty(s1) && *s1++ == *s2++ ) spn++ ;
320 return spn ;
321 }
322
323
324 /*
325 Return the span (number of chars) of a <number> at the head of string s.
326 <number> = [-]<digits>[.]<digits>
327 where either, but not both, of the digit strings may be empty.
328 */
329
330 int strnumspn( s )
331 char *s ;
332 {
333 int j0=0, j1, j2 ;
334
335 if ( *s == '-' ) j0++ ; /* span of '-' */
336 j1 = strspn( s+j0, "0123456789" ) ; /* span of digits left of point */
337 if ( *( s+j0+j1 ) == '.' ) j1++ ; /* span of '.' */
338 j2 = strspn( s+j0+j1, "0123456789" ) ; /* span of digits right of point */
339 if ( *( s+j0+j1+j2 ) == '.' ) j2++ ;
340
341 if ( j1>0 || j2>0 )
342 return ( j0+j1+j2 ) ;
343 return 0 ; /* zero span means no number */
344 }
345
346
347 /*
348 Test for a <number>.
349 */
350
351 int isnumber( s )
352 char *s ;
353 {
354 if ( strnumspn( s ) > 0 ) return 1 ;
355 else return 0 ;
356 }
357
358
359 /*
360 Return a ptr to the first char after a <number> at the head of string s.
361 */
362
363 char *strnumptr( s )
364 char *s ;
365 {
366 return ( s + strnumspn( s ) ) ;
367 }
368
369
370 /*
371 Return a ptr to the first occurrence of any number char in s.
372 (where a number char includes '-' and '.' as well as any digit).
373 */
374
375 char *strpnum(s)
376 char *s ;
377 {
378 return ( strpbrk( s, "-.0123456789" ) ) ;
379 }
380
381 /*
382 Return a ptr to the first occurrence of any char in s not a number char.
383 (where a number char includes '-' and '.' as well as any digit).
384 */
385
386 char *strcnum(s)
387 char *s ;
388 {
389 return ( strcbrk( s, "-.0123456789" ) ) ;
390 }
391
392
393 /*
394 Separate string `s1' into two string tokens at the first occurrence of a
395 separator character `s2'. (Given as a string of one char).
396 Numbers at the head of `s1' are skipped. (This skips leading hyphens or points
397 which are part of a number, and so allows negative numbers with splitting
398 hyphens, real numbers with splitting decimal point, etc.).
399 Four possible outcomes, depending upon form of `s1':
400 1. Null or empty. - return NULL ptr, (missing 1st and 2nd tokens).
401 2. No separator char. - return ptr to empty string, (empty 2nd token).
402 3. Separator is last char. - return NULL ptr, (missing 2nd token).
403 4. Separator within `s1'. - return ptr to 2nd token, (two correct tokens).
404 String `s1' is unchanged at the end.
405 */
406
407 char *strpsep( s1, s2 )
408 char *s1, *s2 ;
409 {
410 char *s ;
411
412 if ( isnullorempty( s1 ) )
413 return ( (char *)0 ) ;
414
415 s1 = strnumptr( s1 ) ; /* skip leading numbers */
416
417 if ( ( s = strpbrk( s1, s2 ) ) == (char *)0 )
418 return ( s1 + strlen( s1 ) ) ; /* ptr to empty string at end s1 */
419
420 if ( isempty( ( s2 = s+1 ) ) )
421 return ( (char *)0 ) ; /* separator is last char */
422
423 return ( s2 ) ;
424 }
425
426 /* Replace above last 4 lines of routine, and also mod strsep, to get proper
427 string separator
428 if ( ( s2 = strcbrk( s, s2 ) ) == (char *)0 )
429 return ( (char *)0 ) ;
430 while ( --s2 > s && isnumber( s2 ) ) ;
431 return ( ++s2 ) ;
432 */
433
434 /*
435 Separate string `s1' into two string tokens.
436 strsep() is the same as strpsep() except that, in the event of two correct
437 tokens, insert '\0' at the separator.
438 String `s1' is thus separated and becomes the first token.
439 */
440
441 char *strsep( s1, s2 )
442 char *s1, *s2 ;
443 {
444 char *s = strpsep( s1, s2 ) ; /* ptr to 2nd token */
445
446 if ( isnullorempty( s ) )
447 return s ;
448
449 *(s-1) = '\0' ; /* insert '\0' and return 2nd token */
450 return ( s ) ;
451 }
452
453
454 /*
455 Compare the head of string `s' with null-terminated list of strings `list'.
456 Return the longest matching string from `list'.
457 Return a NULL pointer if no match is found, or if the (possibly abbreviated)
458 head of string `s' is ambiguous (ie. matches more than once in the list).
459 */
460
461 char *listcmp( list, s )
462 char **list ;
463 char *s ;
464 {
465 int i, j = (-1) ;
466
467 for ( i=0; list[i] != (char *)0 ; i++)
468 if ( strtcmp( s, list[i] ) == 0 ) {
469 if ( j >= 0 ) return (char *)0 ; /* ambiguous match */
470 else j = i;
471 }
472 if ( j < 0 ) return (char *)0 ; /* match not found */
473 return ( list[j] ) ;
474 }
475
476
477 /*
478 Compare the tail of string `s' with null-terminated list of strings `list'.
479 Return the longest matching string from `list'.
480 Return a NULL pointer if no match is found, or if the (possibly abbreviated)
481 tail of string `s' is ambiguous (ie. matches more than once in the list).
482 */
483
484 char *listrcmp( list, s )
485 char **list ;
486 char *s ;
487 {
488 int i, j = (-1), k, maxlen = 0 ;
489
490 for ( i=0; list[i] != (char *)0 ; i++)
491 if ( strtrcmp( s, list[i] ) == 0 && ( k = strlen( list[i] ) ) > maxlen ) {
492 maxlen = k ;
493 j = i ;
494 }
495 if ( j < 0 ) return (char *)0 ; /* match not found */
496 return ( list[j] ) ;
497 }
498
499
500 /*
501 Compare the head of string `s' with null-terminated list of strings `list'.
502 Find the string in `list' having the longest matching span with the head of
503 `s' (which is possibly abbreviated).
504 Return the list index of the matching string.
505 Return (-1) if there is no match in the list (all spans are zero).
506 Return (-2) if the longest matching span is ambiguous (ie occurs more
507 than once in the list).
508 */
509
510 int listindex( list, s )
511 char **list ;
512 char *s ;
513 {
514 int i, j, jmax = 0, index = (-1) ;
515
516 for ( i=0; list[i] != (char *)0 ; i++) {
517 if ( ( j = streqspn( s, list[i] ) ) > jmax ) {
518 jmax = j ;
519 index = i ;
520 }
521 else if ( j > 0 && j == jmax )
522 index = (-2) ;
523 }
524 return index ;
525 }
526
527
528 /*
529 Return the length of the null-terminated list of strings `list', ie. the
530 number of strings it contains.
531 */
532
533 listsize( list )
534 char **list ;
535 {
536 int i ;
537
538 for ( i = 0 ; list[i] != (char *)0 ; i++ )
539 ;
540 return i ;
541 }
542
543
544 /*
545 For each string in list1, find the index of the matching string in list2
546 and store it in the index array, (which must be at least the size of list1).
547 The string match allows for abbreviations in the list1 strings.
548 Return the number of matching strings found. (If this is less than the size
549 of list1 then list1 contains an unknown or an ambiguous string).
550 */
551
552 mapindices( list1, list2, index )
553 char **list1, **list2 ;
554 int *index ;
555 {
556 int i, j, n = 0 ;
557
558 for ( i = 0 ; list1[i] != (char *)0 ; i++ )
559 if ( ( j = listindex( list2, list1[i] ) ) >= 0 )
560 index[n++] = j ;
561
562 return n ;
563 }
564
565
566 /*
567 Split the given string `str' into tokens at occurrences of separator char c.
568 Store pointers to each token in `list', and null-terminate each token
569 by overwriting the separator char with null.
570 The list must contain at most n pointers.
571 Return the number of tokens, or 0 if an error.
572 */
573
574 tokens( str, list, n, c )
575 char *str ;
576 char **list ;
577 int n ;
578 char c ;
579 {
580 int i ;
581 char *s ;
582
583 if ( isempty( str ) || *str == c || n <= 0 ) return 0 ;
584 list[0] = str ;
585
586 for ( i = 1 ; i < n && ( s = strchr( str, c ) ) != (char *)0 ; i++ ) {
587 *s = '\0' ;
588 str = s + 1 ;
589 if ( isempty( str ) || *str == c ) return 0 ;
590 list[i] = str ;
591 }
592
593 if ( i == n && strchr( str, c ) != (char *)0 ) return 0 ;
594
595 return i ;
596 }
597
598
599
600 /*
601 Return ASCII string of integer i. (Inverse of atoi()).
602 */
603
604 char *itoa( i )
605 int i ;
606 {
607 char s[64], *s1 ;
608
609 sprintf( s, "%d", i ) ;
610 s1 = (char *)malloc( strlen( s ) + 1 ) ;
611 strcpy( s1, s ) ;
612 return s1 ;
613 }