diff tools/strmatch.c @ 0:5242703e91d3 tip

Initial checkin for AIM92 aimR8.2 (last updated May 1997).
author tomwalters
date Fri, 20 May 2011 15:19:45 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/strmatch.c	Fri May 20 15:19:45 2011 +0100
@@ -0,0 +1,613 @@
+/***************************************************************************
+
+strmatch.c      String matching routines to supplement those defined in
+----------      <strings.h>.
+
+
+Character classification macros defined in <ctype.h>
+These return truth values [0,1].
+
+
+     int isalpha(c)     c is a letter
+     int c;
+
+     int isupper(c)     c is an uppercase letter
+     int c;
+
+     int islower(c)     c is a lowercase letter
+     int c;
+
+     int isdigit(c)     c is a digit
+     int c;
+
+     int isxdigit(c)    c is a hexadecimal digit, by default [0-9], [A-F], or
+     int c;             [a-f].
+
+     int isalnum(c)     c is an alphanumeric character
+     int c;
+
+     int isspace(c)     c is a space, tab, carriage return, new line, or form
+     int c;             feed.
+
+     int ispunct(c)     c is a punctuation character (neither control,
+     int c;             alphanumeric, nor space)
+
+
+     int isprint(c)     c is a printing character, by default code 040(8)
+     int c;             (space) through 0176 (tilde)
+
+
+     int isgraph(c)     c is a printing character, like isprint except false
+     int c;             for space.
+
+     int iscntrl(c)     c is a delete character (0177) or ordinary control
+     int c;             character (less than 040) except for space characters
+
+
+     int isascii(c)     c is an ASCII character, code less than 0200
+     int c;
+
+
+Character translation macros defined in <ctype.h>
+
+
+     int toupper(c)     return upper-case letter corresponding to c.
+     int c;
+
+     int tolower(c)     return lower-case letter corresponding to c.
+     int c;
+
+     int toascii(c)     return ascii value corresponding to c.
+     int c;
+
+
+String handling routines defined in <strings.h>
+A `span' is the length of a segment of a string, ie a number of characters.
+
+
+     char *strcat(s1, s2)       Append a copy of string s2 to the end of
+     char *s1, *s2;             string s1. Return a ptr to s1.
+
+     char *strncat(s1, s2, n)   Append n chars of s2 to the end of string s1.
+     char *s1, *s2;             Return a ptr to s1.
+
+     int strcmp(s1, s2)         Compare strings. Return 0 if equal.
+     unsigned char *s1, *s2;    Otherwise return difference number of chars.
+
+     int strncmp(s1, s2, n)     Compare n chars of strings. Return 0 if equal.
+     unsigned char *s1, *s2;    Otherwise return difference number of chars.
+     int n
+
+     strcasecmp(s1, s2)         As strcmp, but case insensitive.
+     char *s1, *s2;
+
+     strncasecmp(s1, s2, n)     As strncmp, but case insensitive.
+     char *s1, *s2;
+
+     char *strcpy(s1, s2)       Copy s2 to s1, including null char.
+     char *s1, *s2;
+
+     char *strncpy(s1, s2, n)   Copy n chars of s2 to s1. Truncate or pad s2
+     char *s1, *s2;             with nulls to make up n chars. If s2 needs to
+     int n                      be truncated, s1 will not be null terminated.
+
+     int strlen(s)              Return number of chars in s, not including
+     char *s;                   the terminating null character.
+
+     char *strstr(s1, s2)       Return a ptr to the first occurrence of s2
+     char *s1, *s2;             in s1. Otherwise return a null ptr.
+
+     char *strchr(s, c)         Return a ptr to the first occurrence of c
+     char *s;                   in s. Otherwise return a null ptr.
+     int c;
+
+     char *strrchr(s, c)        Return a ptr to the last occurrence of c
+     char *s;                   in s. Otherwise return a null ptr.
+     int c;
+
+     char *strpbrk(s1, s2)      Return a ptr to the first occurrence of any
+     char *s1, *s2;             char in s2 in s1. Otherwise return a null ptr.
+
+     int strspn(s1, s2)         Return the span from the head of s1 which
+     char *s1, *s2;             consists of chars which are in s2.
+
+     int strcspn(s1, s2)        Return the span from the head of s1 which
+     char *s1, *s2;             consists of chars which are not in s2.
+
+     char *strtok(s1, s2)       See below:
+     char *s1, *s2;
+
+     The strtok	subroutine considers the string	s1 to consist of a sequence
+     of	zero or	more text tokens separated by spans of one or more characters
+     from the separator	string s2.  The	first call (with pointer s1 speci-
+     fied) returns a pointer to	the first character of the first token,	and
+     will have written a null character	into s1	immediately following the
+     returned token.  The function keeps track of its position in the string
+     between separate calls, so	that subsequent	calls (which must be made
+     with the first argument a NULL pointer) will work through the string s1
+     immediately following that	token.	In this	way, subsequent	calls will
+     work through the string s1	until no tokens	remain.	 The separator string
+     s2	may be different from call to call.  When no token remains in s1, a
+     NULL pointer is returned.
+
+
+***************************************************************************/
+
+
+
+#include <math.h>
+#include "strmatch.h"
+
+
+/*
+Test for a NULL pointer to a character or string
+*/
+
+int isnull( s )
+char *s ;
+{
+    if ( s == (char *)0 ) return 1 ;
+    else                  return 0 ;
+}
+
+/*
+Test for an empty string
+*/
+
+
+int isempty( s )
+char *s ;
+{
+    if ( *s == '\0' ) return 1 ;
+    else              return 0 ;
+}
+
+/*
+Test for NULL string pointer or empty string
+*/
+
+int isnullorempty( s )
+char *s ;
+{
+    if ( s == (char *)0 || *s == '\0' ) return 1 ;
+    else                                return 0 ;
+}
+
+
+/*
+Return a pointer to the terminator '\0' at the tail of string `s'.
+*/
+
+char *terminator( s )
+char *s ;
+{
+    return ( s + strlen( s ) ) ;
+}
+
+
+/*
+Test strings s1==s2. Return 1 if true, 0 otherwise.
+*/
+
+int isstr( s1, s2 )
+char *s1, *s2 ;
+{
+    return ( strcmp( s1, s2 ) == 0 ) ;
+}
+
+
+/*
+Test strings s1==s2 up to the length of string s1 (ie allowing truncation).
+Return 1 if true, 0 otherwise.
+*/
+
+int iststr( s1, s2 )
+char *s1, *s2 ;
+{
+    return ( strncmp( s1, s2, strlen( s1 ) ) == 0 ) ;
+}
+
+
+/*
+Copy s2 to s1 up to (but not including) the first occurrence of character c
+in s2. Ensure s1 is then null terminated.
+Return s1 or a null ptr if c is not found in s2.
+*/
+
+char *strccpy( s1, s2, c )
+char *s1, *s2 ;
+char  c ;
+{
+    char *s ;
+
+    if ( ( s = strchr( s2, c ) ) == (char *)0 ) return (char *)0 ;
+    strncpy( s1, s2, (int)( s - s2 ) ) ;
+    *( s1 +  ( s - s2 ) ) == '\0' ;
+    return ( s1 ) ;
+}
+
+
+/*
+Return a ptr to the first occurrence of any char in s1 which is not in s2.
+Otherwise return a null ptr. (This complements strpbrk() in the Unix string
+library).
+*/
+
+char *strcbrk(s1, s2)
+char *s1, *s2;
+{
+    int  spn ;
+
+    if ( ( spn = strspn( s1, s2 ) ) == strlen( s1 ) )
+	return ( (char *)0 ) ;
+    return ( (char *)( s1 + spn ) ) ;
+}
+
+
+
+/*
+Return the span from the head of s1 to the first occurrence of any char in s2.
+Otherwise (if no such char found) return -1.
+*/
+
+int strspnbrk(s1, s2)
+char *s1, *s2;
+{
+    int  spn ;
+
+    if ( ( spn = strcspn( s1, s2 ) ) == strlen( s1 ) )
+	return ( -1 ) ;
+    return ( spn ) ;
+}
+
+
+/*
+Return the span from the head of s1 to the first occurrence of any char not
+in s2. Otherwise (if no such char found) return -1.
+*/
+
+int strcspnbrk(s1, s2)
+char *s1, *s2;
+{
+    int  spn ;
+
+    if ( ( spn = strspn( s1, s2 ) ) == strlen( s1 ) )
+	return ( -1 ) ;
+    return ( spn ) ;
+}
+
+
+
+/*
+Compare the heads of strings s1 and s2 up to the length of string s1.
+Return 0 if equal, otherwise return difference number of chars.
+*/
+
+int strtcmp( s1, s2 )
+char *s1, *s2 ;
+{
+    return ( strncmp( s1, s2, strlen( s1 ) ) ) ;
+}
+
+
+/*
+Compare the tails of strings s1 and s2 back to the length of string s2.
+Return 0 if equal, otherwise return difference number of chars.
+*/
+
+int strtrcmp( s1, s2 )
+char *s1, *s2 ;
+{
+    int  i, j ;
+
+    if ( ( i = strlen( s1 ) ) >=  ( j = strlen( s2 ) ) )
+	return ( strcmp( s1+i-j, s2 ) ) ;
+    return ( i-j ) ;
+}
+
+
+/*
+Return the span (number of chars) over which the head of string s1 equals the
+head of string s2.
+*/
+
+int streqspn( s1, s2 )
+char *s1, *s2 ;
+{
+    int  spn=0 ;
+
+    while ( !isempty(s1) && *s1++ == *s2++ ) spn++ ;
+    return spn ;
+}
+
+
+/*
+Return the span (number of chars) of a <number> at the head of string s.
+   <number> = [-]<digits>[.]<digits>
+where either, but not both, of the digit strings may be empty.
+*/
+
+int strnumspn( s )
+char *s ;
+{
+    int j0=0, j1, j2 ;
+
+    if ( *s == '-' )  j0++ ;               /* span of '-'                   */
+    j1 = strspn( s+j0, "0123456789" ) ;    /* span of digits left  of point */
+    if ( *( s+j0+j1 ) == '.' ) j1++ ;      /* span of '.'                   */
+    j2 = strspn( s+j0+j1, "0123456789" ) ; /* span of digits right of point */
+    if ( *( s+j0+j1+j2 ) == '.' ) j2++ ;
+
+    if ( j1>0 || j2>0 )
+	return ( j0+j1+j2 ) ;
+    return 0 ;                             /* zero span means no number     */
+}
+
+
+/*
+Test for a <number>.
+*/
+
+int isnumber( s )
+char *s ;
+{
+    if ( strnumspn( s ) > 0 ) return 1 ;
+    else                      return 0 ;
+}
+
+
+/*
+Return a ptr to the first char after a <number> at the head of string s.
+*/
+
+char *strnumptr( s )
+char *s ;
+{
+    return ( s + strnumspn( s ) ) ;
+}
+
+
+/*
+Return a ptr to the first occurrence of any number char in s.
+(where a number char includes '-' and '.' as well as any digit).
+*/
+
+char *strpnum(s)
+char *s ;
+{
+    return ( strpbrk( s, "-.0123456789" ) ) ;
+}
+
+/*
+Return a ptr to the first occurrence of any char in s not a number char.
+(where a number char includes '-' and '.' as well as any digit).
+*/
+
+char *strcnum(s)
+char *s ;
+{
+    return ( strcbrk( s, "-.0123456789" ) ) ;
+}
+
+
+/*
+Separate string `s1' into two string tokens at the first occurrence of a
+separator character `s2'. (Given as a string of one char).
+Numbers at the head of `s1' are skipped. (This skips leading hyphens or points
+which are part of a number, and so allows negative numbers with splitting
+hyphens, real numbers with splitting decimal point, etc.).
+Four possible outcomes, depending upon form of `s1':
+1. Null or empty.           - return NULL ptr, (missing 1st and 2nd tokens).
+2. No separator char.       - return ptr to empty string, (empty 2nd token).
+3. Separator is last char.  - return NULL ptr, (missing 2nd token).
+4. Separator within `s1'.   - return ptr to 2nd token, (two correct tokens).
+String `s1' is unchanged at the end.
+*/
+
+char *strpsep( s1, s2 )
+char *s1, *s2 ;
+{
+    char *s ;
+
+    if ( isnullorempty( s1 ) )
+	return ( (char *)0 ) ;
+
+    s1 = strnumptr( s1 ) ;              /* skip leading numbers */
+
+    if ( ( s = strpbrk( s1, s2 ) ) == (char *)0 )
+	return ( s1 + strlen( s1 ) ) ;  /* ptr to empty string at end s1 */
+
+    if ( isempty( ( s2 = s+1 ) ) )
+	return ( (char *)0 ) ;          /* separator is last char */
+
+    return ( s2 ) ;
+}
+
+/* Replace above last 4 lines of routine, and also mod strsep, to get proper
+   string separator
+    if ( ( s2 = strcbrk( s, s2 ) ) == (char *)0 )
+	return ( (char *)0 ) ;
+    while ( --s2 > s && isnumber( s2 ) ) ;
+    return ( ++s2 ) ;
+*/
+
+/*
+Separate string `s1' into two string tokens.
+strsep() is the same as strpsep() except that, in the event of two correct
+tokens, insert '\0' at the separator.
+String `s1' is thus separated and becomes the first token.
+*/
+
+char *strsep( s1, s2 )
+char *s1, *s2 ;
+{
+    char *s = strpsep( s1, s2 ) ;       /* ptr to 2nd token */
+
+    if ( isnullorempty( s ) )
+	return s ;
+
+    *(s-1) = '\0' ;     /* insert '\0' and return 2nd token */
+    return ( s ) ;
+}
+
+
+/*
+Compare the head of string `s' with null-terminated list of strings `list'.
+Return the longest matching string from `list'.
+Return a NULL pointer if no match is found, or if the (possibly abbreviated)
+head of string `s' is ambiguous (ie. matches more than once in the list).
+*/
+
+char *listcmp( list, s )
+char **list ;
+char  *s  ;
+{
+    int  i, j = (-1) ;
+
+    for ( i=0; list[i] != (char *)0 ; i++)
+	if ( strtcmp( s, list[i] ) == 0 ) {
+	    if ( j >= 0 ) return (char *)0 ;    /* ambiguous match */
+	    else j = i;
+	}
+    if ( j < 0 ) return (char *)0 ;             /* match not found */
+    return ( list[j] ) ;
+}
+
+
+/*
+Compare the tail of string `s' with null-terminated list of strings `list'.
+Return the longest matching string from `list'.
+Return a NULL pointer if no match is found, or if the (possibly abbreviated)
+tail of string `s' is ambiguous (ie. matches more than once in the list).
+*/
+
+char *listrcmp( list, s )
+char **list ;
+char  *s ;
+{
+    int  i, j = (-1), k, maxlen = 0 ;
+
+    for ( i=0; list[i] != (char *)0 ; i++)
+	if ( strtrcmp( s, list[i] ) == 0 && ( k = strlen( list[i] ) ) > maxlen ) {
+	    maxlen = k ;
+	    j      = i ;
+	}
+    if ( j < 0 ) return (char *)0 ;             /* match not found */
+    return ( list[j] ) ;
+}
+
+
+/*
+Compare the head of string `s' with null-terminated list of strings `list'.
+Find the string in `list' having the longest matching span with the head of
+`s' (which is possibly abbreviated).
+Return the list index of the matching string.
+Return (-1) if there is no match in the list (all spans are zero).
+Return (-2) if the longest matching span is ambiguous (ie occurs more
+than once in the list).
+*/
+
+int listindex( list, s )
+char   **list ;
+char    *s    ;
+{
+    int  i, j, jmax = 0, index = (-1) ;
+
+    for ( i=0; list[i] != (char *)0 ; i++) {
+	if ( ( j = streqspn( s, list[i] ) ) > jmax ) {
+	    jmax   = j ;
+	    index  = i ;
+	}
+	else if ( j > 0 && j == jmax )
+		index = (-2) ;
+    }
+    return index ;
+}
+
+
+/*
+Return the length of the null-terminated list of strings `list', ie. the
+number of strings it contains.
+*/
+
+listsize( list )
+char **list ;
+{
+    int  i ;
+
+    for ( i = 0 ; list[i] != (char *)0 ; i++ )
+	;
+    return i ;
+}
+
+
+/*
+For each string in list1, find the index of the matching string in list2
+and store it in the index array, (which must be at least the size of list1).
+The string match allows for abbreviations in the list1 strings.
+Return the number of matching strings found. (If this is less than the size
+of list1 then list1 contains an unknown or an ambiguous string).
+*/
+
+mapindices( list1, list2, index )
+char **list1, **list2 ;
+int   *index ;
+{
+    int  i, j, n = 0 ;
+
+    for ( i = 0 ; list1[i] != (char *)0 ; i++ )
+	if ( ( j = listindex( list2, list1[i] ) ) >= 0 )
+	    index[n++] = j ;
+
+    return n ;
+}
+
+
+/*
+Split the given string `str' into tokens at occurrences of separator char c.
+Store pointers to each token in `list', and null-terminate each token
+by overwriting the separator char with null.
+The list must contain at most n pointers.
+Return the number of tokens, or 0 if an error.
+*/
+
+tokens( str, list, n, c )
+char  *str  ;
+char **list ;
+int    n    ;
+char   c    ;
+{
+    int   i ;
+    char *s ;
+
+    if ( isempty( str ) || *str == c || n <= 0 )  return 0 ;
+    list[0] = str ;
+
+    for ( i = 1 ; i < n && ( s = strchr( str, c ) ) != (char *)0 ; i++ ) {
+	*s = '\0' ;
+	str = s + 1 ;
+	if ( isempty( str ) || *str == c )  return 0 ;
+	list[i] = str ;
+    }
+
+    if ( i == n && strchr( str, c ) != (char *)0 ) return 0 ;
+
+    return i ;
+}
+
+
+
+/*
+Return ASCII string of integer i. (Inverse of atoi()).
+*/
+
+char *itoa( i )
+int  i ;
+{
+    char s[64], *s1 ;
+
+    sprintf( s, "%d", i ) ;
+    s1 = (char *)malloc( strlen( s ) + 1 ) ;
+    strcpy( s1, s ) ;
+    return s1 ;
+}