Listing 4b: Class RegularExpression implementation

#include "regexp.h"
     
void RegularExpression::
   ConvertToFiniteAutomaton(const char *regExp)
{
  // Traverses the input expression and constructs a finite 
  // automaton expressing it 
  bool retval = true;
  bool backslashFound = false;
  bool orOperatorFound = false;
  bool inSet = false;
  bool firstSetChar = true;
     
  int outerFromState = invalidState;
  int innerFromState = invalidState;
  int innerToState = invalidState;
  int outerToState = invalidState;
  int tempState = invalidState;
     
  innerToState = startState;
  outerToState = NewState();
     
  // For a regular expression, the empty event is assumed 
  // to be '\0'
  AddEmptyTransition(innerToState, outerToState);
     
  for(char ch; (ch = *regExp) != 0; regExp++) 
  {
    if(backslashFound || !SpecialChar(ch)) 
    {
      //if the previous character was a '\' then 
      //character may need translation.
      if(backslashFound)
      {
         ch = TranslateCh(ch);
         backslashFound = false;
      }
     
      if(inSet)
      {
        // Is the first character in the set? 
        if(firstSetChar)
        {
          // Add new states for either side of set expression
          innerFromState = NewState();
          innerToState = NewState();
          firstSetChar = false;
        }
        // Add transition for new character
        AddTransition(innerFromState, ch, innerToState);
      }
      else
      {
        // Single character transition
        innerFromState = NewState();
        innerToState = NewState();
        AddTransition(innerFromState, ch, innerToState);
     
        // Connect new transition to rest of automaton
        ConnectSubExpr(
           outerFromState, innerFromState, innerToState, 
           outerToState, orOperatorFound);
     
        orOperatorFound = false;
        backslashFound = false;
      }
    }
    else
    {
      // Handle special characters
      switch (ch)
      {
         case '|':   // OR operator
           orOperatorFound = true;
           break;
     
         case '\\':  // escape character
           backslashFound = true;
           break;
     
         case '[':   // opening a set
           if(!inSet)
           {
             inSet = true;
             firstSetChar = true;
           }
           break;
     
         case ']':   // closing a set
            if (inSet && !firstSetChar)
            {
               inSet = false;
               firstSetChar = true;
               // Connect set transitions to rest of automaton
               ConnectSubExpr(

outerFromState, innerFromState, innerToState,

                  outerToState, orOperatorFound);
            }
            break;
     
         case '*':   // zero or more
           AddEmptyTransition(innerFromState, innerToState);
           AddEmptyTransition(innerToState, innerFromState);
           break;
     
         case '+':   // one or more
           AddEmptyTransition(innerToState, innerFromState);
           continue;
     
         case '?':   // any character
           AddEmptyTransition(innerFromState, innerToState);
           continue;
     
         case '(':   // opening parenthesised sub-exp
            innerFromState = NewState();
            Push(innerFromState, outerFromState, outerToState, 
               orOperatorFound);
            outerToState = innerFromState;
            orOperatorFound = false;
            break;
     
         case ')':   // closing parenthesised sub-exp
            innerToState = outerToState;
            Pop(innerFromState, outerFromState, outerToState, 
               orOperatorFound);
            ConnectSubExpr(
               outerFromState, innerFromState, innerToState,
               outerToState, orOperatorFound);
            break;
      }
   }
  }
  SetAcceptState(outerToState);
}
     
     
// Implementation for helper functions
char RegularExpression::TranslateCh(char inChar) 
{
   char retval = inChar;
   switch (inChar)
   {
      case 'r': retval = '\r'; break;
      case 'n': retval = '\n'; break;
      case 't': retval = '\t'; break;
   }
   return(retval);
}
     
// Determines whether the passed character is in any way 'special', 
// that is whether or not it is a meta-character controlling the 
// meaning of the regular expression string
bool RegularExpression::SpecialChar(char ch) 
{
  bool retval = false;
  switch (ch)
  {
    case '\\':
    case '(':
    case ')':
    case '|':
    case '[':
    case ']':
    case '*':
    case '+':
    case '-':
      retval = true;
      break;
  }
  return(retval);
}
     
void RegularExpression::Dump(ostream &out) 
{
  // Print regular expression represented 
  out << "RegularExpression::exp "
      << exp << '\n';
}
     
void RegularExpression::Push(
   int i_s1, int i_s2, int i_s3, bool i_f1)
{
   subExpStack.AddHead(new subExpState(i_s1, i_s2, i_s3, i_f1));
}
     
void RegularExpression::Pop(
   int &o_s1, int &o_s2, int &o_s3, bool &o_f1)
{
  if(subExpStack.IsEmpty())
    throw "stack underflow";
     
  subExpState *subExp = (subExpState *)subExpStack.RemoveHead();

o_s1 = subExp->m_s1;

  o_s2 = subExp->m_s2;
  o_s3 = subExp->m_s3;
  o_f1 = subExp->m_f1;
  delete subExp;
}
     
void RegularExpression::ConnectSubExpr(
   int& outerFromState, int innerFromState, int innerToState,
   int& outerToState, bool& orOperatorFound)
{
  // was the char before the subexpression an OR operator?
  if (orOperatorFound)
  {
    // Yes, so connect it up accordingly 
    AddEmptyTransition(outerFromState, innerFromState);
    AddEmptyTransition(innerToState, outerToState);
  }
  else
  {
    // No, so concatenate the sub-exp onto the automaton
    outerFromState = outerToState;
    AddEmptyTransition(outerFromState, innerFromState);
    outerToState = NewState();
    AddEmptyTransition(innerToState, outerToState);
  }
  orOperatorFound = false;
}
     
//////////////////////////////////////////////// 
// Constructor
RegularExpression::RegularExpression(CFABuild& faBuild) 
  :pFABuild(&faBuild)
{
}
     
//////////////////////////////////////////////// 
// Destructor
RegularExpression::~RegularExpression() 
{
  // clear out the sub-exp stack.
  while(!subExpStack.IsEmpty())
  {
    subExpState *subExp = (subExpState *)subExpStack.RemoveHead();
    delete subExp;
  }
}
     
     
//////////////////////////////////////////////// 
// envelope/letter delegation methods.
     
int RegularExpression::NewState()
{
  return pFABuild->NewState();
}
     
void RegularExpression::AddTransition(
   int sourceState, char event, int targetState)
{
  pFABuild->AddTransition(sourceState, event, targetState);
}
     
void RegularExpression::AddEmptyTransition(
   int sourceState, int targetState)
{
  pFABuild->AddEmptyTransition(sourceState, targetState);
}
     
void RegularExpression::SetAcceptState(int a_acceptState)
{
  pFABuild->SetAcceptState(a_acceptState);
}
     
//
////////////////////////////////////
     
void RegularExpression::SetFABuild(CFABuild* ipFaBuild)
{
  pFABuild = ipFaBuild;
}
     
CFABuild* RegularExpression::GetFABuild() const 
{
  return pFABuild;
}
     
//End of File