view tools/strmatch.c @ 0:5242703e91d3 tip

Initial checkin for AIM92 aimR8.2 (last updated May 1997).
author tomwalters
date Fri, 20 May 2011 15:19:45 +0100
parents
children
line wrap: on
line source
/***************************************************************************

strmatch.c      String matching routines to supplement those defined in
----------      <strings.h>.


Character classification macros defined in <ctype.h>
These return truth values [0,1].


     int isalpha(c)     c is a letter
     int c;

     int isupper(c)     c is an uppercase letter
     int c;

     int islower(c)     c is a lowercase letter
     int c;

     int isdigit(c)     c is a digit
     int c;

     int isxdigit(c)    c is a hexadecimal digit, by default [0-9], [A-F], or
     int c;             [a-f].

     int isalnum(c)     c is an alphanumeric character
     int c;

     int isspace(c)     c is a space, tab, carriage return, new line, or form
     int c;             feed.

     int ispunct(c)     c is a punctuation character (neither control,
     int c;             alphanumeric, nor space)


     int isprint(c)     c is a printing character, by default code 040(8)
     int c;             (space) through 0176 (tilde)


     int isgraph(c)     c is a printing character, like isprint except false
     int c;             for space.

     int iscntrl(c)     c is a delete character (0177) or ordinary control
     int c;             character (less than 040) except for space characters


     int isascii(c)     c is an ASCII character, code less than 0200
     int c;


Character translation macros defined in <ctype.h>


     int toupper(c)     return upper-case letter corresponding to c.
     int c;

     int tolower(c)     return lower-case letter corresponding to c.
     int c;

     int toascii(c)     return ascii value corresponding to c.
     int c;


String handling routines defined in <strings.h>
A `span' is the length of a segment of a string, ie a number of characters.


     char *strcat(s1, s2)       Append a copy of string s2 to the end of
     char *s1, *s2;             string s1. Return a ptr to s1.

     char *strncat(s1, s2, n)   Append n chars of s2 to the end of string s1.
     char *s1, *s2;             Return a ptr to s1.

     int strcmp(s1, s2)         Compare strings. Return 0 if equal.
     unsigned char *s1, *s2;    Otherwise return difference number of chars.

     int strncmp(s1, s2, n)     Compare n chars of strings. Return 0 if equal.
     unsigned char *s1, *s2;    Otherwise return difference number of chars.
     int n

     strcasecmp(s1, s2)         As strcmp, but case insensitive.
     char *s1, *s2;

     strncasecmp(s1, s2, n)     As strncmp, but case insensitive.
     char *s1, *s2;

     char *strcpy(s1, s2)       Copy s2 to s1, including null char.
     char *s1, *s2;

     char *strncpy(s1, s2, n)   Copy n chars of s2 to s1. Truncate or pad s2
     char *s1, *s2;             with nulls to make up n chars. If s2 needs to
     int n                      be truncated, s1 will not be null terminated.

     int strlen(s)              Return number of chars in s, not including
     char *s;                   the terminating null character.

     char *strstr(s1, s2)       Return a ptr to the first occurrence of s2
     char *s1, *s2;             in s1. Otherwise return a null ptr.

     char *strchr(s, c)         Return a ptr to the first occurrence of c
     char *s;                   in s. Otherwise return a null ptr.
     int c;

     char *strrchr(s, c)        Return a ptr to the last occurrence of c
     char *s;                   in s. Otherwise return a null ptr.
     int c;

     char *strpbrk(s1, s2)      Return a ptr to the first occurrence of any
     char *s1, *s2;             char in s2 in s1. Otherwise return a null ptr.

     int strspn(s1, s2)         Return the span from the head of s1 which
     char *s1, *s2;             consists of chars which are in s2.

     int strcspn(s1, s2)        Return the span from the head of s1 which
     char *s1, *s2;             consists of chars which are not in s2.

     char *strtok(s1, s2)       See below:
     char *s1, *s2;

     The strtok	subroutine considers the string	s1 to consist of a sequence
     of	zero or	more text tokens separated by spans of one or more characters
     from the separator	string s2.  The	first call (with pointer s1 speci-
     fied) returns a pointer to	the first character of the first token,	and
     will have written a null character	into s1	immediately following the
     returned token.  The function keeps track of its position in the string
     between separate calls, so	that subsequent	calls (which must be made
     with the first argument a NULL pointer) will work through the string s1
     immediately following that	token.	In this	way, subsequent	calls will
     work through the string s1	until no tokens	remain.	 The separator string
     s2	may be different from call to call.  When no token remains in s1, a
     NULL pointer is returned.


***************************************************************************/



#include <math.h>
#include "strmatch.h"


/*
Test for a NULL pointer to a character or string
*/

int isnull( s )
char *s ;
{
    if ( s == (char *)0 ) return 1 ;
    else                  return 0 ;
}

/*
Test for an empty string
*/


int isempty( s )
char *s ;
{
    if ( *s == '\0' ) return 1 ;
    else              return 0 ;
}

/*
Test for NULL string pointer or empty string
*/

int isnullorempty( s )
char *s ;
{
    if ( s == (char *)0 || *s == '\0' ) return 1 ;
    else                                return 0 ;
}


/*
Return a pointer to the terminator '\0' at the tail of string `s'.
*/

char *terminator( s )
char *s ;
{
    return ( s + strlen( s ) ) ;
}


/*
Test strings s1==s2. Return 1 if true, 0 otherwise.
*/

int isstr( s1, s2 )
char *s1, *s2 ;
{
    return ( strcmp( s1, s2 ) == 0 ) ;
}


/*
Test strings s1==s2 up to the length of string s1 (ie allowing truncation).
Return 1 if true, 0 otherwise.
*/

int iststr( s1, s2 )
char *s1, *s2 ;
{
    return ( strncmp( s1, s2, strlen( s1 ) ) == 0 ) ;
}


/*
Copy s2 to s1 up to (but not including) the first occurrence of character c
in s2. Ensure s1 is then null terminated.
Return s1 or a null ptr if c is not found in s2.
*/

char *strccpy( s1, s2, c )
char *s1, *s2 ;
char  c ;
{
    char *s ;

    if ( ( s = strchr( s2, c ) ) == (char *)0 ) return (char *)0 ;
    strncpy( s1, s2, (int)( s - s2 ) ) ;
    *( s1 +  ( s - s2 ) ) == '\0' ;
    return ( s1 ) ;
}


/*
Return a ptr to the first occurrence of any char in s1 which is not in s2.
Otherwise return a null ptr. (This complements strpbrk() in the Unix string
library).
*/

char *strcbrk(s1, s2)
char *s1, *s2;
{
    int  spn ;

    if ( ( spn = strspn( s1, s2 ) ) == strlen( s1 ) )
	return ( (char *)0 ) ;
    return ( (char *)( s1 + spn ) ) ;
}



/*
Return the span from the head of s1 to the first occurrence of any char in s2.
Otherwise (if no such char found) return -1.
*/

int strspnbrk(s1, s2)
char *s1, *s2;
{
    int  spn ;

    if ( ( spn = strcspn( s1, s2 ) ) == strlen( s1 ) )
	return ( -1 ) ;
    return ( spn ) ;
}


/*
Return the span from the head of s1 to the first occurrence of any char not
in s2. Otherwise (if no such char found) return -1.
*/

int strcspnbrk(s1, s2)
char *s1, *s2;
{
    int  spn ;

    if ( ( spn = strspn( s1, s2 ) ) == strlen( s1 ) )
	return ( -1 ) ;
    return ( spn ) ;
}



/*
Compare the heads of strings s1 and s2 up to the length of string s1.
Return 0 if equal, otherwise return difference number of chars.
*/

int strtcmp( s1, s2 )
char *s1, *s2 ;
{
    return ( strncmp( s1, s2, strlen( s1 ) ) ) ;
}


/*
Compare the tails of strings s1 and s2 back to the length of string s2.
Return 0 if equal, otherwise return difference number of chars.
*/

int strtrcmp( s1, s2 )
char *s1, *s2 ;
{
    int  i, j ;

    if ( ( i = strlen( s1 ) ) >=  ( j = strlen( s2 ) ) )
	return ( strcmp( s1+i-j, s2 ) ) ;
    return ( i-j ) ;
}


/*
Return the span (number of chars) over which the head of string s1 equals the
head of string s2.
*/

int streqspn( s1, s2 )
char *s1, *s2 ;
{
    int  spn=0 ;

    while ( !isempty(s1) && *s1++ == *s2++ ) spn++ ;
    return spn ;
}


/*
Return the span (number of chars) of a <number> at the head of string s.
   <number> = [-]<digits>[.]<digits>
where either, but not both, of the digit strings may be empty.
*/

int strnumspn( s )
char *s ;
{
    int j0=0, j1, j2 ;

    if ( *s == '-' )  j0++ ;               /* span of '-'                   */
    j1 = strspn( s+j0, "0123456789" ) ;    /* span of digits left  of point */
    if ( *( s+j0+j1 ) == '.' ) j1++ ;      /* span of '.'                   */
    j2 = strspn( s+j0+j1, "0123456789" ) ; /* span of digits right of point */
    if ( *( s+j0+j1+j2 ) == '.' ) j2++ ;

    if ( j1>0 || j2>0 )
	return ( j0+j1+j2 ) ;
    return 0 ;                             /* zero span means no number     */
}


/*
Test for a <number>.
*/

int isnumber( s )
char *s ;
{
    if ( strnumspn( s ) > 0 ) return 1 ;
    else                      return 0 ;
}


/*
Return a ptr to the first char after a <number> at the head of string s.
*/

char *strnumptr( s )
char *s ;
{
    return ( s + strnumspn( s ) ) ;
}


/*
Return a ptr to the first occurrence of any number char in s.
(where a number char includes '-' and '.' as well as any digit).
*/

char *strpnum(s)
char *s ;
{
    return ( strpbrk( s, "-.0123456789" ) ) ;
}

/*
Return a ptr to the first occurrence of any char in s not a number char.
(where a number char includes '-' and '.' as well as any digit).
*/

char *strcnum(s)
char *s ;
{
    return ( strcbrk( s, "-.0123456789" ) ) ;
}


/*
Separate string `s1' into two string tokens at the first occurrence of a
separator character `s2'. (Given as a string of one char).
Numbers at the head of `s1' are skipped. (This skips leading hyphens or points
which are part of a number, and so allows negative numbers with splitting
hyphens, real numbers with splitting decimal point, etc.).
Four possible outcomes, depending upon form of `s1':
1. Null or empty.           - return NULL ptr, (missing 1st and 2nd tokens).
2. No separator char.       - return ptr to empty string, (empty 2nd token).
3. Separator is last char.  - return NULL ptr, (missing 2nd token).
4. Separator within `s1'.   - return ptr to 2nd token, (two correct tokens).
String `s1' is unchanged at the end.
*/

char *strpsep( s1, s2 )
char *s1, *s2 ;
{
    char *s ;

    if ( isnullorempty( s1 ) )
	return ( (char *)0 ) ;

    s1 = strnumptr( s1 ) ;              /* skip leading numbers */

    if ( ( s = strpbrk( s1, s2 ) ) == (char *)0 )
	return ( s1 + strlen( s1 ) ) ;  /* ptr to empty string at end s1 */

    if ( isempty( ( s2 = s+1 ) ) )
	return ( (char *)0 ) ;          /* separator is last char */

    return ( s2 ) ;
}

/* Replace above last 4 lines of routine, and also mod strsep, to get proper
   string separator
    if ( ( s2 = strcbrk( s, s2 ) ) == (char *)0 )
	return ( (char *)0 ) ;
    while ( --s2 > s && isnumber( s2 ) ) ;
    return ( ++s2 ) ;
*/

/*
Separate string `s1' into two string tokens.
strsep() is the same as strpsep() except that, in the event of two correct
tokens, insert '\0' at the separator.
String `s1' is thus separated and becomes the first token.
*/

char *strsep( s1, s2 )
char *s1, *s2 ;
{
    char *s = strpsep( s1, s2 ) ;       /* ptr to 2nd token */

    if ( isnullorempty( s ) )
	return s ;

    *(s-1) = '\0' ;     /* insert '\0' and return 2nd token */
    return ( s ) ;
}


/*
Compare the head of string `s' with null-terminated list of strings `list'.
Return the longest matching string from `list'.
Return a NULL pointer if no match is found, or if the (possibly abbreviated)
head of string `s' is ambiguous (ie. matches more than once in the list).
*/

char *listcmp( list, s )
char **list ;
char  *s  ;
{
    int  i, j = (-1) ;

    for ( i=0; list[i] != (char *)0 ; i++)
	if ( strtcmp( s, list[i] ) == 0 ) {
	    if ( j >= 0 ) return (char *)0 ;    /* ambiguous match */
	    else j = i;
	}
    if ( j < 0 ) return (char *)0 ;             /* match not found */
    return ( list[j] ) ;
}


/*
Compare the tail of string `s' with null-terminated list of strings `list'.
Return the longest matching string from `list'.
Return a NULL pointer if no match is found, or if the (possibly abbreviated)
tail of string `s' is ambiguous (ie. matches more than once in the list).
*/

char *listrcmp( list, s )
char **list ;
char  *s ;
{
    int  i, j = (-1), k, maxlen = 0 ;

    for ( i=0; list[i] != (char *)0 ; i++)
	if ( strtrcmp( s, list[i] ) == 0 && ( k = strlen( list[i] ) ) > maxlen ) {
	    maxlen = k ;
	    j      = i ;
	}
    if ( j < 0 ) return (char *)0 ;             /* match not found */
    return ( list[j] ) ;
}


/*
Compare the head of string `s' with null-terminated list of strings `list'.
Find the string in `list' having the longest matching span with the head of
`s' (which is possibly abbreviated).
Return the list index of the matching string.
Return (-1) if there is no match in the list (all spans are zero).
Return (-2) if the longest matching span is ambiguous (ie occurs more
than once in the list).
*/

int listindex( list, s )
char   **list ;
char    *s    ;
{
    int  i, j, jmax = 0, index = (-1) ;

    for ( i=0; list[i] != (char *)0 ; i++) {
	if ( ( j = streqspn( s, list[i] ) ) > jmax ) {
	    jmax   = j ;
	    index  = i ;
	}
	else if ( j > 0 && j == jmax )
		index = (-2) ;
    }
    return index ;
}


/*
Return the length of the null-terminated list of strings `list', ie. the
number of strings it contains.
*/

listsize( list )
char **list ;
{
    int  i ;

    for ( i = 0 ; list[i] != (char *)0 ; i++ )
	;
    return i ;
}


/*
For each string in list1, find the index of the matching string in list2
and store it in the index array, (which must be at least the size of list1).
The string match allows for abbreviations in the list1 strings.
Return the number of matching strings found. (If this is less than the size
of list1 then list1 contains an unknown or an ambiguous string).
*/

mapindices( list1, list2, index )
char **list1, **list2 ;
int   *index ;
{
    int  i, j, n = 0 ;

    for ( i = 0 ; list1[i] != (char *)0 ; i++ )
	if ( ( j = listindex( list2, list1[i] ) ) >= 0 )
	    index[n++] = j ;

    return n ;
}


/*
Split the given string `str' into tokens at occurrences of separator char c.
Store pointers to each token in `list', and null-terminate each token
by overwriting the separator char with null.
The list must contain at most n pointers.
Return the number of tokens, or 0 if an error.
*/

tokens( str, list, n, c )
char  *str  ;
char **list ;
int    n    ;
char   c    ;
{
    int   i ;
    char *s ;

    if ( isempty( str ) || *str == c || n <= 0 )  return 0 ;
    list[0] = str ;

    for ( i = 1 ; i < n && ( s = strchr( str, c ) ) != (char *)0 ; i++ ) {
	*s = '\0' ;
	str = s + 1 ;
	if ( isempty( str ) || *str == c )  return 0 ;
	list[i] = str ;
    }

    if ( i == n && strchr( str, c ) != (char *)0 ) return 0 ;

    return i ;
}



/*
Return ASCII string of integer i. (Inverse of atoi()).
*/

char *itoa( i )
int  i ;
{
    char s[64], *s1 ;

    sprintf( s, "%d", i ) ;
    s1 = (char *)malloc( strlen( s ) + 1 ) ;
    strcpy( s1, s ) ;
    return s1 ;
}