/* dfa.h - declarations for GNU deterministic regexp compiler
Copyright (C) 1988, 1998 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */
/* Written June, 1988 by Mike Haertel */
/* FIXME:
2. We should not export so much of the DFA internals.
In addition to clobbering modularity, we eat up valuable
name space. */
#ifdef __STDC__
# ifndef _PTR_T
# define _PTR_T
typedef void * ptr_t;
# endif
#else
# ifndef _PTR_T
# define _PTR_T
typedef char * ptr_t;
# endif
#endif
#ifdef PARAMS
# undef PARAMS
#endif
#if PROTOTYPES
# define PARAMS(x) x
#else
# define PARAMS(x) ()
#endif
/* Number of bits in an unsigned char. */
#ifndef CHARBITS
#define CHARBITS 8
#endif
/* First integer value that is greater than any character code. */
#define NOTCHAR (1 << CHARBITS)
/* INTBITS need not be exact, just a lower bound. */
#ifndef INTBITS
#define INTBITS (CHARBITS * sizeof (int))
#endif
/* Number of ints required to hold a bit for every character. */
#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
typedef int charclass[CHARCLASS_INTS];
/* The regexp is parsed into an array of tokens in postfix form. Some tokens
are operators and others are terminal symbols. Most (but not all) of these
codes are returned by the lexical analyzer. */
typedef enum
{
END = -1, /* END is a terminal symbol that matches the
end of input; any value of END or less in
the parse tree is such a symbol. Accepting
states of the DFA are those that would have
a transition on END. */
/* Ordinary character values are terminal symbols that match themselves. */
EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
the empty string. */
BACKREF, /* BACKREF is generated by \<digit>; it
it not completely handled. If the scanner
detects a transition on backref, it returns
a kind of "semi-success" indicating that
the match will have to be verified with
a backtracking matcher. */
BEGLINE, /* BEGLINE is a terminal symbol that matches
the empty string if it is at the beginning
of a line. */
ENDLINE, /* ENDLINE is a terminal symbol that matches
the empty string if it is at the end of
a line. */
BEGWORD, /* BEGWORD is a terminal symbol that matches
the empty string if it is at the beginning
of a word. */
=1= |