Listing 4

/***********************************************************************
* Module    :  Lexical Analyzer --- Process the input text file into tokens
*                  that the parser can understand.
*
*              Copyright (C) 1990 John W. M. Stevens, All Rights Reserved
*
* Routines  :  Lex     - Return the next token from the file.
*              OpenPrg - Open the source file.
*              ParsErr - Report a parsing error.
*
* Author    :  John W. M. Stevens
***********************************************************************/

#include   <stdio.h>
#include   <stdlib.h>
#include   <ctype.h>
#include   <string.h>

#include   "lex.h"

/* Structure of trie branch.   */
typedef struct  key_st  {
   char    c;
   TKNS    token;
   struct  key_st *child;
} NODE;

/* Constants local to this file.   */
#define     MAX_STR     256
#define     NOT_FND     -2

/* Object Data. */
static  char    word[MAX_STR + 1];  /* Last string analyzed.                */
static  char    PrvWd[MAX_STR + 1]; /* Previous word.                       */
static  int     LnNo = 0;           /* The current line number in the file. */
static  FILE    *PrgFl;             /* File pointer.                        */

/* Trie data structure containing all the keywords and punctuation
marks for
*   the language being tokenized.
*/
static
NODE    T5[2]= {
   {    ' ',             2, NULL   },
   {    'n',        ACTION, NULL   }
};

static
NODE    T4[2] = {
   {    ' ',             2, NULL   },
   {    'o',             0, T5  }
};

static
NODE    T3[2] = {
   {    ' ',             2, NULL   },
   {    'i',             0, T4  }
};

static
NODE    T2[2] = {
   {    ' ',             2, NULL   },
   {    't',             0, T3  }
};

static
NODE    T8[2] = {
   {    ' ',             2, NULL   },
   {    'r',         AFTER, NULL   }
};

static
NODE    T7[2] = {
   {    ' ',             2, NULL   },
   (    'e',             0, T8  }
};

static
NODE    T6[2] = {
   {    ' ',             2, NULL   },
   {    't',             0, T7  }
};

static
NODE    T9[2] = {
   {    ' ',             2, NULL   },
   {    'd',           AND, NULL   }
};

static
NODE    Te[2] = {
   {    ' ',             2, NULL   },
   {    'e',       ARCHIVE, NULL   }
};

static
NODE    Td[2] = {
   {   ' ',              2, NULL   },
   {   'v',              0, Te  }
};

static
NODE    Tc[2] = {
   {    ' ',             2, NULL   },
   {    'i',             0, Td  }
};

static
NODE    Tb[2] = {
   {    ' ',             2, NULL   },
   {    'h',             0, Tc  }
};

static
NODE    Ta[2] = {
   {    ' ',             2, NULL   },
   {    'c',             0, Tb  }
};

static
NODE    T16[2] = {
   {    ' ',             2, NULL   },
   {    's',    ATTRIBUTES, NULL   }
};

static
NODE    T15[2] = {
   {    ' ',             2, NULL   },
   {    'e',             0, T16 }
};

static
NODE    T14[2] = {
   {    ' ',             2, NULL   },
   {    't',             0, T15 }
};

static
NODE    T13[2] = {
   {    ' ',             2, NULL   },
   {    'u',             0, T14 }
};

static
NODE    T12[2] = {
   {    ' ',             2, NULL   },
   {    'b',             0, T13 }
};

static
NODE    T11[2] = {
   {    ' ',             2, NULL   },
   {    'i',             0, T12 }
};

static
NODE    T10[2] = {
   {    ' ',             2, NULL   },
   {    'r',             0, T11 }
};

static
NODE    Tf[2] = {
   {    ' ',             2, NULL   },
   {    't',             0, T10 }
};

static
NODE    T1[6] = {
   {    ' ',             6, NULL   },
   {    'c',             0, T2  },
   {    'f',             0, T6  },
   {    'n',             0, T9  },
   {    'r',             0, Ta  },
   {    't',             0, Tf  }
};

static
NODE    T1b[2] = {
   {    ' ',             2, NULL   },
   {    'e',        BEFORE, NULL   }
};

static
NODE    T1a[2] = {
   {    ' ',             2, NULL   },
   {    'r',             0, T1b }
};

static
NODE    T19[2] = {
   {    ' ',             2, NULL   },
   {    'o',             0, T1a }
};

static
NODE    T18[2] = {
   {    ' ',             2, NULL   },
   {    'f',             0, T19 }
};

static
NODE    T17[2] = {
   {    ' ',             2, NULL   },
   {    'e',             0, T18 }
};

static
NODE    T23[2] = {
   {    ' ',             2, NULL   },
   {    'y',   DIRECTORY_T, NULL }
};

static
NODE    T22[2] = {
   {   ' ',              2, NULL   },
   {   'r',              0, T23 }
};

static
NODE    T21[2] = {
   {    ' ',             2, NULL   },
   {    'o',             0, T22 }
};

static
NODE    T20[2] = {
   {    ' ',             2, NULL   },
   {    't',             0, T21 }
};

static
NODE    T1f[2] = {
   {    ' ',             2, NULL   },
   {    't',             0, T20 }
};

static
NODE    T1e[2] = {
   {    ' ',             2, NULL   },
   {    'e',             0, T1f }
};

static
NODE    T1d[2] = {
   {    ' ',             2, NULL   },
   {    'r',             0, T1e }
};

static
NODE    T1c[2] = {
   {    ' ',             2, NULL   },
   {    'i',             0, T1d }
};

static
NODE    T26[2] = {
   {    ' ',             2, NULL   },
   {    'c',          EXEC, NULL   }
};

static
NODE    T25[2] = {
   {    ' ',             2, NULL   },
   {    'e',             0, T26 }
};

static
NODE    T24[2] = {
   {    ' ',             2, NULL   },
   {    'x',             0, T25 }
};

static
NODE    T2a[2] = {
   {    ' ',             2, NULL   },
   {    's',         FILES, NULL   }
};

static
NODE    T29[2] = {
   {    ' ',             2, NULL   },
   {    'e',             0, T2a }
};

static
NODE    T28[2] = {
   {    ' ',             2, NULL   },
   {    '1',             0, T29 }
};

static
NODE    T27[2] = {
   {    ' ',             2, NULL   },
   {    'i',             0, T28 }
};

static
NODE    T2f[2] = {
   {    ' ',             2, NULL   },
   {    'n',        HIDDEN, NULL   }
};

static
NODE    T2e[2] = {
   {    ' ',             2, NULL   },
   {    'e',             0, T2f }
};

static
NODE    T2d[2] = {
   {    ' ',             2, NULL   },
   {    'd',             0, T2e }
};

static
NODE    T2c[2] = {
   {    ' ',             2, NULL   },
   {    'd',             0, T2d }
};

static
NODE    T2b[2] = {
   {    ' ',             2, NULL   },
   {    'i',             0, T2c }
};

static
NODE    T33[2] = {
   {    ' ',             2, NULL   },
   {    'l',         LABEL, NULL   }
};

static
NODE    T32[2] = {
   {    ' ',             2, NULL   },
   {    'e',             0, T33 }
};

static
NODE    T31[2] = {
   {    ' ',             2, NULL   },
   {    'b',             0, T32 }
};

static
NODE    T30[2] = {
   {    ' ',             2, NULL   },
   {    'a',             0, T31 }
};

static
NODE    T3a[2] = {
   {    ' ',             2, NULL   },
   {    'd',      MODIFIED, NULL   }
};

static
NODE    T39[2] = {
   {    ' ',             2, NULL   },
   {    'e',             0, T3a }
};

static
NODE    T38[2] = {
   {    ' ',             2, NULL   },
   {    'i',             0, T39 }
};

static
NODE    T37[2] = {
   {    ' ',             2, NULL   },
   {    'f',             0, T38 }
};

static
NODE    T36[2] = {
   {    ' ',             2, NULL   },
   {    'i',             0, T37 }
};

static
NODE    T35[2] = {
   {    ' ',             2, NULL   },
   {    'd',             0, T36 }
};

static
NODE    T34[2] = {
   {    ' ',             2, NULL   },
   {    'o',             0, T35 }
};

static
NODE    T3d[2] = {
   {    ' ',             2, NULL   },
   {    'e',          NAME, NULL   }
};

static
NODE    T3c[2] = {
   {    ' ',             2, NULL   },
   {    'm',             0, T3d }
};

static
NODE    T3e[2] = {
   {    ' ',             2, NULL   },
   {    't',           NOT, NULL   }
};

static
NODE    T3b[3] = {
   {    ' ',             3, NULL   },
   {    'a',             0, T3c },
   {    'o',             0, T3e }
};

static
NODE    T3f[2] = {
   {    ' ',             2, NULL   },
   {    'r',            OR, NULL   },
};

static
NODE    T43[2] = {
   {    ' ',             2, NULL   },
   {    't',         PRINT, NULL   }
};

static
NODE    T42[2] = {
   {    ' ',             2, NULL   },
   {    'n',             0, T43 }
};

static
NODE    T41[2] = {
   {    ' ',             2, NULL   },
   {    'i',             0, T42 }
};

static
NODE    T40[2] = {
   {    ' ',             2, NULL   },
   {    'r',             0, T41 }
};

static
NODE    T4a[2] = {
   {    ' ',             2, NULL   },
   {    'y',      READONLY, NULL   }
};

static
NODE    T49[2] = {
   {    ' ',             2, NULL   },
   {    'l',             0, T4a }
};

static
NODE    T48[2] = {
   {    ' ',             2, NULL   },
   {    'n',             0, T49 }
};

static
NODE    T47[2] = {
   {    ' ',             2, NULL   },
   {    'o',             0, T48 }
};

static
NODE    T46[2] = {
   {    ' ',             2, NULL   },
   {    'd',             0, T47 }
};

static
NODE     T4d[2] = {
   {     ' ',            2, NULL   },
   {     's',       RECURS, NULL  }
};

static
NODE    T4c[2] = {
   {    ' ',             2, NULL   },
   {    'r',             0, T4d }
};

static
NODE    T4b[2] = {
   {    ' ',             2, NULL   },
   {    'u',             0, T4c }
};

static
NODE    T45[3] = {
   {    ' ',             3, NULL   },
   {    'a',             0, T46 },
   {    'c',             0, T4b }
};

static
NODE    T44[2] = {
   {    ' ',             2, NULL   },
   {    'e',             0, T45 }
};

static
NODE    T52[2] = {
   {    ' ',             2, NULL   },
   {    'h',        SEARCH, NULL  }
};

static
NODE    T51[2] = {
   {    ' ',             2, NULL   },
   {    'c',             0, T52 }
};

static
NODE    T50[2] = {
   {    ' ',             2, NULL   },
   {    'r',             0, T51 }
};

static
NODE    T55[2] = {
   {    ' ',             2, NULL   },
   {    't',        SELECT, NULL   }
};

static
NODE    T54[2] = {
   {    ' ',             2, NULL   },
   {    'c',             0, T55 }
};

static
NODE    T53[2] = {
   {    ' ',             2, NULL   },
   {    'e',             0, T54 }
};

static
NODE    T4f[3] = {
   {    ' ',             2, NULL   },
   {    'a',             0, T50 },
   {    'l',             0, T53 }
};

static
NODE    T59[2] = {
   {    ' ',             2, NULL   },
   {    'm',        SYSTEM, NULL   }
};

static
NODE    T58[2] = {
   {    ' ',             2, NULL   },
   {    'e',             0, T59 }
};

static
NODE    T57[2] = {
   {    ' ',             2, NULL   },
   {    't',             0, T58 }
};

static
NODE    T56[2] = {
   {    ' ',             2, NULL   },
   {    's',             0, T57 }
};

static
NODE    T4e[3] = {
   {    ' ',             3, NULL   },
   {    'e',             0, T4f },
   {    'y',             0, T56 }
};

static
NODE    T0[21] = {
   {    ' ',            21, NULL   },
   {    '(',       L_PAREN, NULL   },
   {    ')',       R_PAREN, NULL   },
   {    ',',         COMMA, NULL   },
   {    '/',       F_SLASH, NULL   },
   {    'a',             0, T1  },
   {    'b',             0, T17 },
   {    'd',             0, T1c },
   {    'e',             0, T24 },
   {    'f',             0, T27 },
   {    'h',             0, T2b },
   {    '1',             0, T30 },
   {    'm',             0, T34 },
   {    'n',             0, T3b },
   {    'o',             0, T3f },
   {    'p',             0, T40 },
   {    'r',             0, T44 },
   {    's',             0, T4e },
   {    '{',       L_BRACE, NULL   },
   {    '|',           BAR, NULL   },
   {    '}',       R_BRACE, NULL   }
};

/*----------------------------------------------------------
| Routine   :   OpenPrg() --- Open the ASCII text file
|                    that contains the back up program.
|
| Inputs    :   FileNm - File name of source file.
----------------------------------------------------------*/

void     OpenPrg(char    *FileNm)
{
   /* Open the program script file.    */
   if ((PrgFl = fopen(FileNm, "rt")) == NULL)
   {
      fprintf(stderr, "OpenPrg (fopen) : Could not open
file '%s' for "
          "reading.\n", FileNm);
      exit( -1 );
   }

   /* Initialize object variables. */
   *word = *PrvWd = '\0';
}

/*----------------------------------------------------------------------
| Routine   :   ParsErr() --- Report a parse error.
|
| Inputs    :   Err - Error string.
----------------------------------------------------------------------*/

void   ParsErr(char   *Err)
{
   /* Print line number and error massage. */
   fprintf(stderr, "Error in Line: %d, %s.\n", LnNo + 1, Err);

   /* If there is a previous word, show it.    */
   if ( *word )
      fprintf(stderr, "\t0n or after word '%s'\n", word);
   exit( -1 );
}

/*---------------------------------------------------------------------
| Routine   :   TrieSrch() --- Search the trie for a key word.
|
| Inputs    :   Trie    - The trie level pointer.
|               ch      - The current character to search for.
|               WordPtr - The pointer to the current byte of the word buffer.
| Returns   :   Returns either a token value or
|                   NOT_FND - For key word not found.
|                   EOF     - For end of file.
----------------------------------------------------------------------*/

static
int       TrieSrch(NODE     *Trie,
                int     ch,
                char    *WordPtr)
{
   register      int       mid;       /* Mid point of array piece.           */
   register      TKNS      ret;       /* Return value of comparison.         */

   auto          int       lo;        /* Limits of current array piece for   */
   auto          int       hi;        /*      binary search.                 */

   /* Make sure that input is lower case.  */
   ch = tolower( ch );

   /* Search for a token. */
   hi = Trie[0].token - 1;
   lo = 1;
   do
   {
      /* Find mid point of current array piece.  */
      mid = (lo + hi) >> 1;

      /* Do character comparison.  */
      ret = ch - Trie[mid].c;

      /* Fix the array limits.     */
      if (ret <= 0)
         hi = mid - 1;
      if (ret >= 0)
         lo = mid + 1;
         
   } while (hi >= lo);

   /* If the character matches one of the entries in this level and this
   *   entry has a child, recurse. If a match is found but the matching
   *   entry has no child, return the token value associated with the
   *   match. If the return value from the recursive call indicates that
   *   no match was found at a lower level, return the token value
   *   associated with the match at this level of the trie.
   */
   if (ret == 0)
   {
      /* Save the current character in the buffer for error reporting. */
      *WordPtr++ = ch;

   /* Are we looking for more characters in this string?  */
   if ( Trie[mid].child )
   {
      /* Get the next character.  */
      if ((ch = fgetc( PrgFl )) == EOF)
         return( EOF );
         
      /* Search next level.   */
      if ((ret = TrieSrch(Trie[mid].child, ch, WordPtr)) == NOT_FND)
      {
         ungetc(ch, PrgFl);
         return( Trie[mid].token );
      }
      return( ret );
   }
   else
      {
          /* Properly NUL terminate the buffer that the keyword is
         *   being saved in and return the token value.
         */
         *WordPtr = '\0';
         return( Trie[mid].token );
      }
   }
   
   /* Terminate string in keyword buffer and return not found. */
   *WordPtr = '\0';
   return( NOT_FND );
}

/*----------------------------------------------------------------------
| Routine   :   GetNo --- Get a number from the file.
|
| Inputs    :   word    - Pointer to word buffer for error reporting.
| Outputs   :   RetNo   - Returns the number read from the file.
|
| Returns   :   Returns the last character read from the file or EOF.
----------------------------------------------------------------------*/

static
int     GetNo(char  **word,
            long  *RetNo)
{
   auto         int     c;

   /* Get number.  */
   *RetNo = OL;
   while ((c = fgetc ( PrgFl )) >= '0' && c <= '9')
   {
      /* Save character in word buffer.    */
      *(*word)++ = c;

      /* Calculate value of number. */
      *RetNo = *RetNo * 10L + (long) (c - '0');
   }
   return( c );
}

/*----------------------------------------------------------------------
| Routine   :   Lex() --- Get the next key word from the input file.
|
| Outputs   :   sym - The symbolic data read from the file.
|
| Returns   :   Returns the token read or EOF.
----------------------------------------------------------------------*/

TKNS    Lex(TOKEN    *sym)
{
   register    int     i;
   register    int     tkn;
   auto        int     ch;
   auto        char    *bf;
   
   /* Strip comments and white space. If the character read is a '#',
   *   every thing to the end of the line is a comment.
   */
   ch = fgetc( PrgFl );
   while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '#')
   {
      /* Process the special characters '#' and '\n'.     */
      if (ch == '\n')
         /* End of line, increment the line number.   */
         LnNo++;
      else if (ch == '#')
      {
         /* Found a comment character, strip all characters to end
         *   of line and increment the line number.
         */
         while (fgetc( PrgFl ) != '\n')
            ;
         LnNo++;
      }

      /* Get the next character. */
      ch = fgetc( PrgFl );
   }
   
   /* Get strings, etc.    */
   if (ch == ' " ')
   {

      /* Get contents of string. */
      bf = sym->str;
      for (i = 0; i < MAX_STR; i++)
         if ((ch = fgetc( PrgFl )) != ' " ' && ch != EOF)
            *bf++ = ch;
         else
            break;
      *bf = '\0';

      /* Return string token. */
      strcpy(word, sym->str);
      return( STRING );
   }
   else if (ch >= '0' && ch <= '9')
   {
      auto        long        no;
      
      /* Establish a pointer to the word buffer and unget the
      *   numeric character for re-reading.
      */
      bf = word;
      ungetc(ch, PrgFl);
      
      /* Get number, time or date.    */
      if ((ch = GetNo(&bf, &no)) == ':')
      {
         /* Getting time, not number.    */
         *bf++ = ch;

      sym->ftime.ti_hour = (unsigned char) no;
      sym->ftime.ti_hund = (unsigned char) 0;
      
      /* Get minutes.     */
      if ((ch = GetNo(&bf, &no)) == ':')
      {
         /* Save minutes.    */
         *bf++ = ch;
         sym->ftime.ti_min = (unsigned char) no;
         
         /* Get seconds. */
         if ((ch = GetNo(&bf, &no)) == '.')
         {
            *bf = '\0';
            ParsErr( "Hundredths of seconds not allowed in "
                     "time expressions" );
         }
         sym->ftime.ti_sec = (unsigned char) no;
      }
      else
      {
         /* No seconds to get.   */
         sym->ftime.ti_min = (unsigned char) no;
         sym->ftime.ti_sec  = (unsigned char) 0;
      }
      
      /* This is a time. */
      tkn = TIME;
   }
   else if (ch == '/')
   {
      /* Getting date, not number.    */
      *bf++ = ch;
      sym->fdate.da_mon = (char) no;

      /* Get day.     */
      if ((ch = GetNo(&bf, &no)) == '/')
      {
         /* Save character. */
         *bf++ = ch;
         sym->fdate.da_day = (char) no;
         
         /* Get year.    */
         ch = GetNo(&bf, &no);
         if (no > 1980L)
            no -= 1980L;
         else if (no > 80L && no < 100L)
            no -= 80L;
         else
         {
            *bf = '\0';
            ParsErr( "Error, bad year value in date expression." );
         }
         sym->fdate.da_year = (int) no;
      }
      else
      {
         *bf = '\0';
         ParsErr( "Missing year in date expression" );
      }
      
      /* This is a date. */
      tkn = DATE;
   }
   else
   {
      /* Just an integer constant.   */
      sym->no = no;
      tkn = NUMBER;
      }

      /* Return the unused character. */
      *bf = '\0';
      ungetc(ch, PrgFl);
      return( tkn );
   }
   else if (ch == EOF)
      return( EOF );
      
   /* Call the trie search routine to return the next token, EOF
   *   or NOT_FND.  If not found, print an error and quit.
   */
   if ((tkn = TrieSrch(TO, ch, word)) == NOT_FND)
   {
      /* Illegal first character in word. */
      if ( *PrvWd )
         fprintf(stderr, "Parse - Error in Line: %d, cannot identify "
            "word after '%s'.\n", LnNo + 1, PrvWd);
      else
         fprintf(stderr, "Parse - Error in Line: %d, cannot identify "
            "first word in file.\n", LnNo + 1);
      exit( -1 );
   }
   else if (tkn == 0)
   {
      /* Illegal word.    */
      fprintf(stderr, "Parse - Error in Line: %d, cannot identify "
         "word '%s'.\n", LnNo + 1, word);
      exit( -1 );
   }
   strcpy(PrvWd, word);

   /* Return the token found. */
   return( tkn );
}

/* End of File */