Mercurial > hg > aim92
diff tools/strmatch.c @ 0:5242703e91d3 tip
Initial checkin for AIM92 aimR8.2 (last updated May 1997).
author | tomwalters |
---|---|
date | Fri, 20 May 2011 15:19:45 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/strmatch.c Fri May 20 15:19:45 2011 +0100 @@ -0,0 +1,613 @@ +/*************************************************************************** + +strmatch.c String matching routines to supplement those defined in +---------- <strings.h>. + + +Character classification macros defined in <ctype.h> +These return truth values [0,1]. + + + int isalpha(c) c is a letter + int c; + + int isupper(c) c is an uppercase letter + int c; + + int islower(c) c is a lowercase letter + int c; + + int isdigit(c) c is a digit + int c; + + int isxdigit(c) c is a hexadecimal digit, by default [0-9], [A-F], or + int c; [a-f]. + + int isalnum(c) c is an alphanumeric character + int c; + + int isspace(c) c is a space, tab, carriage return, new line, or form + int c; feed. + + int ispunct(c) c is a punctuation character (neither control, + int c; alphanumeric, nor space) + + + int isprint(c) c is a printing character, by default code 040(8) + int c; (space) through 0176 (tilde) + + + int isgraph(c) c is a printing character, like isprint except false + int c; for space. + + int iscntrl(c) c is a delete character (0177) or ordinary control + int c; character (less than 040) except for space characters + + + int isascii(c) c is an ASCII character, code less than 0200 + int c; + + +Character translation macros defined in <ctype.h> + + + int toupper(c) return upper-case letter corresponding to c. + int c; + + int tolower(c) return lower-case letter corresponding to c. + int c; + + int toascii(c) return ascii value corresponding to c. + int c; + + +String handling routines defined in <strings.h> +A `span' is the length of a segment of a string, ie a number of characters. + + + char *strcat(s1, s2) Append a copy of string s2 to the end of + char *s1, *s2; string s1. Return a ptr to s1. + + char *strncat(s1, s2, n) Append n chars of s2 to the end of string s1. + char *s1, *s2; Return a ptr to s1. + + int strcmp(s1, s2) Compare strings. Return 0 if equal. + unsigned char *s1, *s2; Otherwise return difference number of chars. + + int strncmp(s1, s2, n) Compare n chars of strings. Return 0 if equal. + unsigned char *s1, *s2; Otherwise return difference number of chars. + int n + + strcasecmp(s1, s2) As strcmp, but case insensitive. + char *s1, *s2; + + strncasecmp(s1, s2, n) As strncmp, but case insensitive. + char *s1, *s2; + + char *strcpy(s1, s2) Copy s2 to s1, including null char. + char *s1, *s2; + + char *strncpy(s1, s2, n) Copy n chars of s2 to s1. Truncate or pad s2 + char *s1, *s2; with nulls to make up n chars. If s2 needs to + int n be truncated, s1 will not be null terminated. + + int strlen(s) Return number of chars in s, not including + char *s; the terminating null character. + + char *strstr(s1, s2) Return a ptr to the first occurrence of s2 + char *s1, *s2; in s1. Otherwise return a null ptr. + + char *strchr(s, c) Return a ptr to the first occurrence of c + char *s; in s. Otherwise return a null ptr. + int c; + + char *strrchr(s, c) Return a ptr to the last occurrence of c + char *s; in s. Otherwise return a null ptr. + int c; + + char *strpbrk(s1, s2) Return a ptr to the first occurrence of any + char *s1, *s2; char in s2 in s1. Otherwise return a null ptr. + + int strspn(s1, s2) Return the span from the head of s1 which + char *s1, *s2; consists of chars which are in s2. + + int strcspn(s1, s2) Return the span from the head of s1 which + char *s1, *s2; consists of chars which are not in s2. + + char *strtok(s1, s2) See below: + char *s1, *s2; + + The strtok subroutine considers the string s1 to consist of a sequence + of zero or more text tokens separated by spans of one or more characters + from the separator string s2. The first call (with pointer s1 speci- + fied) returns a pointer to the first character of the first token, and + will have written a null character into s1 immediately following the + returned token. The function keeps track of its position in the string + between separate calls, so that subsequent calls (which must be made + with the first argument a NULL pointer) will work through the string s1 + immediately following that token. In this way, subsequent calls will + work through the string s1 until no tokens remain. The separator string + s2 may be different from call to call. When no token remains in s1, a + NULL pointer is returned. + + +***************************************************************************/ + + + +#include <math.h> +#include "strmatch.h" + + +/* +Test for a NULL pointer to a character or string +*/ + +int isnull( s ) +char *s ; +{ + if ( s == (char *)0 ) return 1 ; + else return 0 ; +} + +/* +Test for an empty string +*/ + + +int isempty( s ) +char *s ; +{ + if ( *s == '\0' ) return 1 ; + else return 0 ; +} + +/* +Test for NULL string pointer or empty string +*/ + +int isnullorempty( s ) +char *s ; +{ + if ( s == (char *)0 || *s == '\0' ) return 1 ; + else return 0 ; +} + + +/* +Return a pointer to the terminator '\0' at the tail of string `s'. +*/ + +char *terminator( s ) +char *s ; +{ + return ( s + strlen( s ) ) ; +} + + +/* +Test strings s1==s2. Return 1 if true, 0 otherwise. +*/ + +int isstr( s1, s2 ) +char *s1, *s2 ; +{ + return ( strcmp( s1, s2 ) == 0 ) ; +} + + +/* +Test strings s1==s2 up to the length of string s1 (ie allowing truncation). +Return 1 if true, 0 otherwise. +*/ + +int iststr( s1, s2 ) +char *s1, *s2 ; +{ + return ( strncmp( s1, s2, strlen( s1 ) ) == 0 ) ; +} + + +/* +Copy s2 to s1 up to (but not including) the first occurrence of character c +in s2. Ensure s1 is then null terminated. +Return s1 or a null ptr if c is not found in s2. +*/ + +char *strccpy( s1, s2, c ) +char *s1, *s2 ; +char c ; +{ + char *s ; + + if ( ( s = strchr( s2, c ) ) == (char *)0 ) return (char *)0 ; + strncpy( s1, s2, (int)( s - s2 ) ) ; + *( s1 + ( s - s2 ) ) == '\0' ; + return ( s1 ) ; +} + + +/* +Return a ptr to the first occurrence of any char in s1 which is not in s2. +Otherwise return a null ptr. (This complements strpbrk() in the Unix string +library). +*/ + +char *strcbrk(s1, s2) +char *s1, *s2; +{ + int spn ; + + if ( ( spn = strspn( s1, s2 ) ) == strlen( s1 ) ) + return ( (char *)0 ) ; + return ( (char *)( s1 + spn ) ) ; +} + + + +/* +Return the span from the head of s1 to the first occurrence of any char in s2. +Otherwise (if no such char found) return -1. +*/ + +int strspnbrk(s1, s2) +char *s1, *s2; +{ + int spn ; + + if ( ( spn = strcspn( s1, s2 ) ) == strlen( s1 ) ) + return ( -1 ) ; + return ( spn ) ; +} + + +/* +Return the span from the head of s1 to the first occurrence of any char not +in s2. Otherwise (if no such char found) return -1. +*/ + +int strcspnbrk(s1, s2) +char *s1, *s2; +{ + int spn ; + + if ( ( spn = strspn( s1, s2 ) ) == strlen( s1 ) ) + return ( -1 ) ; + return ( spn ) ; +} + + + +/* +Compare the heads of strings s1 and s2 up to the length of string s1. +Return 0 if equal, otherwise return difference number of chars. +*/ + +int strtcmp( s1, s2 ) +char *s1, *s2 ; +{ + return ( strncmp( s1, s2, strlen( s1 ) ) ) ; +} + + +/* +Compare the tails of strings s1 and s2 back to the length of string s2. +Return 0 if equal, otherwise return difference number of chars. +*/ + +int strtrcmp( s1, s2 ) +char *s1, *s2 ; +{ + int i, j ; + + if ( ( i = strlen( s1 ) ) >= ( j = strlen( s2 ) ) ) + return ( strcmp( s1+i-j, s2 ) ) ; + return ( i-j ) ; +} + + +/* +Return the span (number of chars) over which the head of string s1 equals the +head of string s2. +*/ + +int streqspn( s1, s2 ) +char *s1, *s2 ; +{ + int spn=0 ; + + while ( !isempty(s1) && *s1++ == *s2++ ) spn++ ; + return spn ; +} + + +/* +Return the span (number of chars) of a <number> at the head of string s. + <number> = [-]<digits>[.]<digits> +where either, but not both, of the digit strings may be empty. +*/ + +int strnumspn( s ) +char *s ; +{ + int j0=0, j1, j2 ; + + if ( *s == '-' ) j0++ ; /* span of '-' */ + j1 = strspn( s+j0, "0123456789" ) ; /* span of digits left of point */ + if ( *( s+j0+j1 ) == '.' ) j1++ ; /* span of '.' */ + j2 = strspn( s+j0+j1, "0123456789" ) ; /* span of digits right of point */ + if ( *( s+j0+j1+j2 ) == '.' ) j2++ ; + + if ( j1>0 || j2>0 ) + return ( j0+j1+j2 ) ; + return 0 ; /* zero span means no number */ +} + + +/* +Test for a <number>. +*/ + +int isnumber( s ) +char *s ; +{ + if ( strnumspn( s ) > 0 ) return 1 ; + else return 0 ; +} + + +/* +Return a ptr to the first char after a <number> at the head of string s. +*/ + +char *strnumptr( s ) +char *s ; +{ + return ( s + strnumspn( s ) ) ; +} + + +/* +Return a ptr to the first occurrence of any number char in s. +(where a number char includes '-' and '.' as well as any digit). +*/ + +char *strpnum(s) +char *s ; +{ + return ( strpbrk( s, "-.0123456789" ) ) ; +} + +/* +Return a ptr to the first occurrence of any char in s not a number char. +(where a number char includes '-' and '.' as well as any digit). +*/ + +char *strcnum(s) +char *s ; +{ + return ( strcbrk( s, "-.0123456789" ) ) ; +} + + +/* +Separate string `s1' into two string tokens at the first occurrence of a +separator character `s2'. (Given as a string of one char). +Numbers at the head of `s1' are skipped. (This skips leading hyphens or points +which are part of a number, and so allows negative numbers with splitting +hyphens, real numbers with splitting decimal point, etc.). +Four possible outcomes, depending upon form of `s1': +1. Null or empty. - return NULL ptr, (missing 1st and 2nd tokens). +2. No separator char. - return ptr to empty string, (empty 2nd token). +3. Separator is last char. - return NULL ptr, (missing 2nd token). +4. Separator within `s1'. - return ptr to 2nd token, (two correct tokens). +String `s1' is unchanged at the end. +*/ + +char *strpsep( s1, s2 ) +char *s1, *s2 ; +{ + char *s ; + + if ( isnullorempty( s1 ) ) + return ( (char *)0 ) ; + + s1 = strnumptr( s1 ) ; /* skip leading numbers */ + + if ( ( s = strpbrk( s1, s2 ) ) == (char *)0 ) + return ( s1 + strlen( s1 ) ) ; /* ptr to empty string at end s1 */ + + if ( isempty( ( s2 = s+1 ) ) ) + return ( (char *)0 ) ; /* separator is last char */ + + return ( s2 ) ; +} + +/* Replace above last 4 lines of routine, and also mod strsep, to get proper + string separator + if ( ( s2 = strcbrk( s, s2 ) ) == (char *)0 ) + return ( (char *)0 ) ; + while ( --s2 > s && isnumber( s2 ) ) ; + return ( ++s2 ) ; +*/ + +/* +Separate string `s1' into two string tokens. +strsep() is the same as strpsep() except that, in the event of two correct +tokens, insert '\0' at the separator. +String `s1' is thus separated and becomes the first token. +*/ + +char *strsep( s1, s2 ) +char *s1, *s2 ; +{ + char *s = strpsep( s1, s2 ) ; /* ptr to 2nd token */ + + if ( isnullorempty( s ) ) + return s ; + + *(s-1) = '\0' ; /* insert '\0' and return 2nd token */ + return ( s ) ; +} + + +/* +Compare the head of string `s' with null-terminated list of strings `list'. +Return the longest matching string from `list'. +Return a NULL pointer if no match is found, or if the (possibly abbreviated) +head of string `s' is ambiguous (ie. matches more than once in the list). +*/ + +char *listcmp( list, s ) +char **list ; +char *s ; +{ + int i, j = (-1) ; + + for ( i=0; list[i] != (char *)0 ; i++) + if ( strtcmp( s, list[i] ) == 0 ) { + if ( j >= 0 ) return (char *)0 ; /* ambiguous match */ + else j = i; + } + if ( j < 0 ) return (char *)0 ; /* match not found */ + return ( list[j] ) ; +} + + +/* +Compare the tail of string `s' with null-terminated list of strings `list'. +Return the longest matching string from `list'. +Return a NULL pointer if no match is found, or if the (possibly abbreviated) +tail of string `s' is ambiguous (ie. matches more than once in the list). +*/ + +char *listrcmp( list, s ) +char **list ; +char *s ; +{ + int i, j = (-1), k, maxlen = 0 ; + + for ( i=0; list[i] != (char *)0 ; i++) + if ( strtrcmp( s, list[i] ) == 0 && ( k = strlen( list[i] ) ) > maxlen ) { + maxlen = k ; + j = i ; + } + if ( j < 0 ) return (char *)0 ; /* match not found */ + return ( list[j] ) ; +} + + +/* +Compare the head of string `s' with null-terminated list of strings `list'. +Find the string in `list' having the longest matching span with the head of +`s' (which is possibly abbreviated). +Return the list index of the matching string. +Return (-1) if there is no match in the list (all spans are zero). +Return (-2) if the longest matching span is ambiguous (ie occurs more +than once in the list). +*/ + +int listindex( list, s ) +char **list ; +char *s ; +{ + int i, j, jmax = 0, index = (-1) ; + + for ( i=0; list[i] != (char *)0 ; i++) { + if ( ( j = streqspn( s, list[i] ) ) > jmax ) { + jmax = j ; + index = i ; + } + else if ( j > 0 && j == jmax ) + index = (-2) ; + } + return index ; +} + + +/* +Return the length of the null-terminated list of strings `list', ie. the +number of strings it contains. +*/ + +listsize( list ) +char **list ; +{ + int i ; + + for ( i = 0 ; list[i] != (char *)0 ; i++ ) + ; + return i ; +} + + +/* +For each string in list1, find the index of the matching string in list2 +and store it in the index array, (which must be at least the size of list1). +The string match allows for abbreviations in the list1 strings. +Return the number of matching strings found. (If this is less than the size +of list1 then list1 contains an unknown or an ambiguous string). +*/ + +mapindices( list1, list2, index ) +char **list1, **list2 ; +int *index ; +{ + int i, j, n = 0 ; + + for ( i = 0 ; list1[i] != (char *)0 ; i++ ) + if ( ( j = listindex( list2, list1[i] ) ) >= 0 ) + index[n++] = j ; + + return n ; +} + + +/* +Split the given string `str' into tokens at occurrences of separator char c. +Store pointers to each token in `list', and null-terminate each token +by overwriting the separator char with null. +The list must contain at most n pointers. +Return the number of tokens, or 0 if an error. +*/ + +tokens( str, list, n, c ) +char *str ; +char **list ; +int n ; +char c ; +{ + int i ; + char *s ; + + if ( isempty( str ) || *str == c || n <= 0 ) return 0 ; + list[0] = str ; + + for ( i = 1 ; i < n && ( s = strchr( str, c ) ) != (char *)0 ; i++ ) { + *s = '\0' ; + str = s + 1 ; + if ( isempty( str ) || *str == c ) return 0 ; + list[i] = str ; + } + + if ( i == n && strchr( str, c ) != (char *)0 ) return 0 ; + + return i ; +} + + + +/* +Return ASCII string of integer i. (Inverse of atoi()). +*/ + +char *itoa( i ) +int i ; +{ + char s[64], *s1 ; + + sprintf( s, "%d", i ) ; + s1 = (char *)malloc( strlen( s ) + 1 ) ; + strcpy( s1, s ) ; + return s1 ; +}