Mercurial > hg > aim92
view tools/strmatch.c @ 0:5242703e91d3 tip
Initial checkin for AIM92 aimR8.2 (last updated May 1997).
author | tomwalters |
---|---|
date | Fri, 20 May 2011 15:19:45 +0100 |
parents | |
children |
line wrap: on
line source
/*************************************************************************** strmatch.c String matching routines to supplement those defined in ---------- <strings.h>. Character classification macros defined in <ctype.h> These return truth values [0,1]. int isalpha(c) c is a letter int c; int isupper(c) c is an uppercase letter int c; int islower(c) c is a lowercase letter int c; int isdigit(c) c is a digit int c; int isxdigit(c) c is a hexadecimal digit, by default [0-9], [A-F], or int c; [a-f]. int isalnum(c) c is an alphanumeric character int c; int isspace(c) c is a space, tab, carriage return, new line, or form int c; feed. int ispunct(c) c is a punctuation character (neither control, int c; alphanumeric, nor space) int isprint(c) c is a printing character, by default code 040(8) int c; (space) through 0176 (tilde) int isgraph(c) c is a printing character, like isprint except false int c; for space. int iscntrl(c) c is a delete character (0177) or ordinary control int c; character (less than 040) except for space characters int isascii(c) c is an ASCII character, code less than 0200 int c; Character translation macros defined in <ctype.h> int toupper(c) return upper-case letter corresponding to c. int c; int tolower(c) return lower-case letter corresponding to c. int c; int toascii(c) return ascii value corresponding to c. int c; String handling routines defined in <strings.h> A `span' is the length of a segment of a string, ie a number of characters. char *strcat(s1, s2) Append a copy of string s2 to the end of char *s1, *s2; string s1. Return a ptr to s1. char *strncat(s1, s2, n) Append n chars of s2 to the end of string s1. char *s1, *s2; Return a ptr to s1. int strcmp(s1, s2) Compare strings. Return 0 if equal. unsigned char *s1, *s2; Otherwise return difference number of chars. int strncmp(s1, s2, n) Compare n chars of strings. Return 0 if equal. unsigned char *s1, *s2; Otherwise return difference number of chars. int n strcasecmp(s1, s2) As strcmp, but case insensitive. char *s1, *s2; strncasecmp(s1, s2, n) As strncmp, but case insensitive. char *s1, *s2; char *strcpy(s1, s2) Copy s2 to s1, including null char. char *s1, *s2; char *strncpy(s1, s2, n) Copy n chars of s2 to s1. Truncate or pad s2 char *s1, *s2; with nulls to make up n chars. If s2 needs to int n be truncated, s1 will not be null terminated. int strlen(s) Return number of chars in s, not including char *s; the terminating null character. char *strstr(s1, s2) Return a ptr to the first occurrence of s2 char *s1, *s2; in s1. Otherwise return a null ptr. char *strchr(s, c) Return a ptr to the first occurrence of c char *s; in s. Otherwise return a null ptr. int c; char *strrchr(s, c) Return a ptr to the last occurrence of c char *s; in s. Otherwise return a null ptr. int c; char *strpbrk(s1, s2) Return a ptr to the first occurrence of any char *s1, *s2; char in s2 in s1. Otherwise return a null ptr. int strspn(s1, s2) Return the span from the head of s1 which char *s1, *s2; consists of chars which are in s2. int strcspn(s1, s2) Return the span from the head of s1 which char *s1, *s2; consists of chars which are not in s2. char *strtok(s1, s2) See below: char *s1, *s2; The strtok subroutine considers the string s1 to consist of a sequence of zero or more text tokens separated by spans of one or more characters from the separator string s2. The first call (with pointer s1 speci- fied) returns a pointer to the first character of the first token, and will have written a null character into s1 immediately following the returned token. The function keeps track of its position in the string between separate calls, so that subsequent calls (which must be made with the first argument a NULL pointer) will work through the string s1 immediately following that token. In this way, subsequent calls will work through the string s1 until no tokens remain. The separator string s2 may be different from call to call. When no token remains in s1, a NULL pointer is returned. ***************************************************************************/ #include <math.h> #include "strmatch.h" /* Test for a NULL pointer to a character or string */ int isnull( s ) char *s ; { if ( s == (char *)0 ) return 1 ; else return 0 ; } /* Test for an empty string */ int isempty( s ) char *s ; { if ( *s == '\0' ) return 1 ; else return 0 ; } /* Test for NULL string pointer or empty string */ int isnullorempty( s ) char *s ; { if ( s == (char *)0 || *s == '\0' ) return 1 ; else return 0 ; } /* Return a pointer to the terminator '\0' at the tail of string `s'. */ char *terminator( s ) char *s ; { return ( s + strlen( s ) ) ; } /* Test strings s1==s2. Return 1 if true, 0 otherwise. */ int isstr( s1, s2 ) char *s1, *s2 ; { return ( strcmp( s1, s2 ) == 0 ) ; } /* Test strings s1==s2 up to the length of string s1 (ie allowing truncation). Return 1 if true, 0 otherwise. */ int iststr( s1, s2 ) char *s1, *s2 ; { return ( strncmp( s1, s2, strlen( s1 ) ) == 0 ) ; } /* Copy s2 to s1 up to (but not including) the first occurrence of character c in s2. Ensure s1 is then null terminated. Return s1 or a null ptr if c is not found in s2. */ char *strccpy( s1, s2, c ) char *s1, *s2 ; char c ; { char *s ; if ( ( s = strchr( s2, c ) ) == (char *)0 ) return (char *)0 ; strncpy( s1, s2, (int)( s - s2 ) ) ; *( s1 + ( s - s2 ) ) == '\0' ; return ( s1 ) ; } /* Return a ptr to the first occurrence of any char in s1 which is not in s2. Otherwise return a null ptr. (This complements strpbrk() in the Unix string library). */ char *strcbrk(s1, s2) char *s1, *s2; { int spn ; if ( ( spn = strspn( s1, s2 ) ) == strlen( s1 ) ) return ( (char *)0 ) ; return ( (char *)( s1 + spn ) ) ; } /* Return the span from the head of s1 to the first occurrence of any char in s2. Otherwise (if no such char found) return -1. */ int strspnbrk(s1, s2) char *s1, *s2; { int spn ; if ( ( spn = strcspn( s1, s2 ) ) == strlen( s1 ) ) return ( -1 ) ; return ( spn ) ; } /* Return the span from the head of s1 to the first occurrence of any char not in s2. Otherwise (if no such char found) return -1. */ int strcspnbrk(s1, s2) char *s1, *s2; { int spn ; if ( ( spn = strspn( s1, s2 ) ) == strlen( s1 ) ) return ( -1 ) ; return ( spn ) ; } /* Compare the heads of strings s1 and s2 up to the length of string s1. Return 0 if equal, otherwise return difference number of chars. */ int strtcmp( s1, s2 ) char *s1, *s2 ; { return ( strncmp( s1, s2, strlen( s1 ) ) ) ; } /* Compare the tails of strings s1 and s2 back to the length of string s2. Return 0 if equal, otherwise return difference number of chars. */ int strtrcmp( s1, s2 ) char *s1, *s2 ; { int i, j ; if ( ( i = strlen( s1 ) ) >= ( j = strlen( s2 ) ) ) return ( strcmp( s1+i-j, s2 ) ) ; return ( i-j ) ; } /* Return the span (number of chars) over which the head of string s1 equals the head of string s2. */ int streqspn( s1, s2 ) char *s1, *s2 ; { int spn=0 ; while ( !isempty(s1) && *s1++ == *s2++ ) spn++ ; return spn ; } /* Return the span (number of chars) of a <number> at the head of string s. <number> = [-]<digits>[.]<digits> where either, but not both, of the digit strings may be empty. */ int strnumspn( s ) char *s ; { int j0=0, j1, j2 ; if ( *s == '-' ) j0++ ; /* span of '-' */ j1 = strspn( s+j0, "0123456789" ) ; /* span of digits left of point */ if ( *( s+j0+j1 ) == '.' ) j1++ ; /* span of '.' */ j2 = strspn( s+j0+j1, "0123456789" ) ; /* span of digits right of point */ if ( *( s+j0+j1+j2 ) == '.' ) j2++ ; if ( j1>0 || j2>0 ) return ( j0+j1+j2 ) ; return 0 ; /* zero span means no number */ } /* Test for a <number>. */ int isnumber( s ) char *s ; { if ( strnumspn( s ) > 0 ) return 1 ; else return 0 ; } /* Return a ptr to the first char after a <number> at the head of string s. */ char *strnumptr( s ) char *s ; { return ( s + strnumspn( s ) ) ; } /* Return a ptr to the first occurrence of any number char in s. (where a number char includes '-' and '.' as well as any digit). */ char *strpnum(s) char *s ; { return ( strpbrk( s, "-.0123456789" ) ) ; } /* Return a ptr to the first occurrence of any char in s not a number char. (where a number char includes '-' and '.' as well as any digit). */ char *strcnum(s) char *s ; { return ( strcbrk( s, "-.0123456789" ) ) ; } /* Separate string `s1' into two string tokens at the first occurrence of a separator character `s2'. (Given as a string of one char). Numbers at the head of `s1' are skipped. (This skips leading hyphens or points which are part of a number, and so allows negative numbers with splitting hyphens, real numbers with splitting decimal point, etc.). Four possible outcomes, depending upon form of `s1': 1. Null or empty. - return NULL ptr, (missing 1st and 2nd tokens). 2. No separator char. - return ptr to empty string, (empty 2nd token). 3. Separator is last char. - return NULL ptr, (missing 2nd token). 4. Separator within `s1'. - return ptr to 2nd token, (two correct tokens). String `s1' is unchanged at the end. */ char *strpsep( s1, s2 ) char *s1, *s2 ; { char *s ; if ( isnullorempty( s1 ) ) return ( (char *)0 ) ; s1 = strnumptr( s1 ) ; /* skip leading numbers */ if ( ( s = strpbrk( s1, s2 ) ) == (char *)0 ) return ( s1 + strlen( s1 ) ) ; /* ptr to empty string at end s1 */ if ( isempty( ( s2 = s+1 ) ) ) return ( (char *)0 ) ; /* separator is last char */ return ( s2 ) ; } /* Replace above last 4 lines of routine, and also mod strsep, to get proper string separator if ( ( s2 = strcbrk( s, s2 ) ) == (char *)0 ) return ( (char *)0 ) ; while ( --s2 > s && isnumber( s2 ) ) ; return ( ++s2 ) ; */ /* Separate string `s1' into two string tokens. strsep() is the same as strpsep() except that, in the event of two correct tokens, insert '\0' at the separator. String `s1' is thus separated and becomes the first token. */ char *strsep( s1, s2 ) char *s1, *s2 ; { char *s = strpsep( s1, s2 ) ; /* ptr to 2nd token */ if ( isnullorempty( s ) ) return s ; *(s-1) = '\0' ; /* insert '\0' and return 2nd token */ return ( s ) ; } /* Compare the head of string `s' with null-terminated list of strings `list'. Return the longest matching string from `list'. Return a NULL pointer if no match is found, or if the (possibly abbreviated) head of string `s' is ambiguous (ie. matches more than once in the list). */ char *listcmp( list, s ) char **list ; char *s ; { int i, j = (-1) ; for ( i=0; list[i] != (char *)0 ; i++) if ( strtcmp( s, list[i] ) == 0 ) { if ( j >= 0 ) return (char *)0 ; /* ambiguous match */ else j = i; } if ( j < 0 ) return (char *)0 ; /* match not found */ return ( list[j] ) ; } /* Compare the tail of string `s' with null-terminated list of strings `list'. Return the longest matching string from `list'. Return a NULL pointer if no match is found, or if the (possibly abbreviated) tail of string `s' is ambiguous (ie. matches more than once in the list). */ char *listrcmp( list, s ) char **list ; char *s ; { int i, j = (-1), k, maxlen = 0 ; for ( i=0; list[i] != (char *)0 ; i++) if ( strtrcmp( s, list[i] ) == 0 && ( k = strlen( list[i] ) ) > maxlen ) { maxlen = k ; j = i ; } if ( j < 0 ) return (char *)0 ; /* match not found */ return ( list[j] ) ; } /* Compare the head of string `s' with null-terminated list of strings `list'. Find the string in `list' having the longest matching span with the head of `s' (which is possibly abbreviated). Return the list index of the matching string. Return (-1) if there is no match in the list (all spans are zero). Return (-2) if the longest matching span is ambiguous (ie occurs more than once in the list). */ int listindex( list, s ) char **list ; char *s ; { int i, j, jmax = 0, index = (-1) ; for ( i=0; list[i] != (char *)0 ; i++) { if ( ( j = streqspn( s, list[i] ) ) > jmax ) { jmax = j ; index = i ; } else if ( j > 0 && j == jmax ) index = (-2) ; } return index ; } /* Return the length of the null-terminated list of strings `list', ie. the number of strings it contains. */ listsize( list ) char **list ; { int i ; for ( i = 0 ; list[i] != (char *)0 ; i++ ) ; return i ; } /* For each string in list1, find the index of the matching string in list2 and store it in the index array, (which must be at least the size of list1). The string match allows for abbreviations in the list1 strings. Return the number of matching strings found. (If this is less than the size of list1 then list1 contains an unknown or an ambiguous string). */ mapindices( list1, list2, index ) char **list1, **list2 ; int *index ; { int i, j, n = 0 ; for ( i = 0 ; list1[i] != (char *)0 ; i++ ) if ( ( j = listindex( list2, list1[i] ) ) >= 0 ) index[n++] = j ; return n ; } /* Split the given string `str' into tokens at occurrences of separator char c. Store pointers to each token in `list', and null-terminate each token by overwriting the separator char with null. The list must contain at most n pointers. Return the number of tokens, or 0 if an error. */ tokens( str, list, n, c ) char *str ; char **list ; int n ; char c ; { int i ; char *s ; if ( isempty( str ) || *str == c || n <= 0 ) return 0 ; list[0] = str ; for ( i = 1 ; i < n && ( s = strchr( str, c ) ) != (char *)0 ; i++ ) { *s = '\0' ; str = s + 1 ; if ( isempty( str ) || *str == c ) return 0 ; list[i] = str ; } if ( i == n && strchr( str, c ) != (char *)0 ) return 0 ; return i ; } /* Return ASCII string of integer i. (Inverse of atoi()). */ char *itoa( i ) int i ; { char s[64], *s1 ; sprintf( s, "%d", i ) ; s1 = (char *)malloc( strlen( s ) + 1 ) ; strcpy( s1, s ) ; return s1 ; }