Back to index

courier  0.68.2
re.h
Go to the documentation of this file.
00001 #ifndef       re_h
00002 #define       re_h
00003 
00004 
00005 #include      "config.h"
00006 #include      <sys/types.h>
00007 #include      "funcs.h"
00008 #include      "reeval.h"
00009 
00010 class ReMatch;
00011 
00013 //
00014 //  The Re class represents a regular expression.   The regular expression
00015 //  is translated into a non-deterministic automaton, stored as a list
00016 //  of RegExpNodes.
00017 //
00018 //  Then, one or more strings are matched against the regular expression.
00019 //
00020 //  The Re object may dynamically allocate another Re object in order to
00021 //  implement the ! operator.  Each ! operator introduces a dynamically-
00022 //  allocated Re object, which contains the next chained regular expression.
00023 //  Another ! operator causes another object to be allocated.
00024 //
00025 //  The ^ and $ anchors are implemented here.  The ABSENCE of a ^ anchor
00026 //  causes a dummy "[.\n]*" expression to be created in the first Re object,
00027 //  with the real expression being parsed in the 2nd Re object.
00028 //
00029 //  When a string is matched against a regular expression, when the current
00030 //  state includes a FINAL state, and there is a chained Re object, the
00031 //  remainder of the string gets matched against the chained Re object.
00032 //  If the chained matched succeeds, the entire match succeeds, otherwise,
00033 //  we continue matching the original string.
00034 //
00035 //  If a match is succesfull, MatchCount() may be called to return the number
00036 //  of characters that were matched.  If an ! operator is used, the optional
00037 //  argument to MatchCount(), if not null, can be used to call MatchCount()
00038 //  to return the count that the next expression matched.
00039 //
00041 
00042 class  RegExpNode;
00043 
00044 class Re {
00045 
00046        Re     *chainedre;          // Chained regular expression
00047        Re     *prevre;
00048        RegExpNode *nodes;          // Singly-linked list of nodes
00049        RegExpNode *first;          // Starting node
00050        RegExpNode *final;          // Final node
00051        unsigned nextid;            // When creating, next ID to assign
00052 
00053        RegExpNode    *allocnode();
00054        const  char *expr, *origexpr;
00055 
00056        // When matching:
00057        int    matched;
00058        off_t matchedpos;
00059        ReEval *curstate, *nextstate;
00060        unsigned final_id;
00061 
00062        int    curchar() { return ((int)(unsigned char)*expr); }
00063        void   nextchar() { ++expr; }
00064        int    casesensitive;
00065        int    matchFull;
00066        int    isCaret;
00067        int    isDummy;
00068 public:
00069        Re();
00070        ~Re();
00071 
00072        int Compile(const char *, int, int &);
00073                      // Compile regular expression
00074 private:
00075        int CompileS(const char *, int, int &);
00076 
00077 
00078        void init();
00079        RegExpNode **CompileAtom(RegExpNode **);
00080        RegExpNode **CompileAtomString(RegExpNode **);
00081        RegExpNode **CompileOrClause(RegExpNode **);
00082        RegExpNode **CompileElement(RegExpNode **);
00083        void is_sets(RegExpNode *);
00084 
00085        int    parsechar();
00086 
00087 // Evaluation
00088 
00089        ReEval state1, state2;
00090        unsigned charsmatched;
00091 public:
00092        int    Match(ReMatch &);
00093        unsigned MatchCount(Re **p =0) {
00094                                    if (p) *p=chainedre;
00095                                    return (charsmatched); }
00096        int    IsDummy()     { return (isDummy); }
00097        int    IsAnchorStart()      { return (isCaret); }
00098 } ;
00099 
00100 #endif