/* kwset.c - search for any of a set of keywords.
Copyright 1989, 1998, 2000 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA. */
/* Written August 1989 by Mike Haertel.
The author may be reached (Email) at the address mike@ai.mit.edu,
or (US mail) as Mike Haertel c/o Free Software Foundation. */
/* The algorithm implemented by these routines bears a startling resemblence
to one discovered by Beate Commentz-Walter, although it is not identical.
See "A String Matching Algorithm Fast on the Average," Technical Report,
IBM-Germany, Scientific Center Heidelberg, Tiergartenstrasse 15, D-6900
Heidelberg, Germany. See also Aho, A.V., and M. Corasick, "Efficient
String Matching: An Aid to Bibliographic Search," CACM June 1975,
Vol. 18, No. 6, which describes the failure function used below. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <sys/types.h>
#include "system.h"
#include "kwset.h"
#include "obstack.h"
#ifdef GREP
extern char *xmalloc();
# undef malloc
# define malloc xmalloc
#endif
#define NCHAR (UCHAR_MAX + 1)
#define obstack_chunk_alloc malloc
#define obstack_chunk_free free
/* Balanced tree of edges and labels leaving a given trie node. */
struct tree
{
struct tree *llink; /* Left link; MUST be first field. */
struct tree *rlink; /* Right link (to larger labels). */
struct trie *trie; /* Trie node pointed to by this edge. */
unsigned char label; /* Label on this edge. */
char balance; /* Difference in depths of subtrees. */
};
/* Node of a trie representing a set of reversed keywords. */
struct trie
{
unsigned int accepting; /* Word index of accepted word, or zero. */
struct tree *links; /* Tree of edges leaving this node. */
struct trie *parent; /* Parent of this node. */
struct trie *next; /* List of all trie nodes in level order. */
struct trie *fail; /* Aho-Corasick failure function. */
int depth; /* Depth of this node from the root. */
int shift; /* Shift function for search failures. */
int maxshift; /* Max shift of self and descendents. */
};
/* Structure returned opaquely to the caller, containing everything. */
struct kwset
{
struct obstack obstack; /* Obstack for node allocation. */
int words; /* Number of words in the trie. */
struct trie *trie; /* The trie itself. */
int mind; /* Minimum depth of an accepting node. */
int maxd; /* Maximum depth of any node. */
unsigned char delta[NCHAR]; /* Delta table for rapid search. */
struct trie *next[NCHAR]; /* Table of children of the root. */
char *target; /* Target string if there's only one. */
int mind2; /* Used in Boyer-Moore search for one string. */
char const *trans; /* Character translation table. */
};
/* Allocate and initialize a keyword set object, returning an opaque
pointer to it. Return NULL if memory is not available. */
kwset_t
kwsalloc (char const *trans)
{
struct kwset *kwset;
kwset = (struct kwset *) malloc(sizeof (struct kwset));
if (!kwset)
return 0;
obstack_init(&kwset->obstack);
kwset->words = 0;
kwset->trie
=1= |