Listing 1

/* Speech Recognizer */

/* bj gleason, Upsala College, Computer Science Department */
/* East Orange, nj 07019 (201)-998-1037                  */

#include <stdio.h>
#include <conio.h>

#define RAWBUFFERSIZE 150  /* 100 samples/second 1.5 seconds   */
#define VOCABSIZE 10       /* 10 digits, 0 - 9                */
#define NUMSAMPLES 16      /* Number of samples to extract     */
#define NUMBANDS 4         /* High, Low Freq, High, Low Energy */
#define BIGNUM 32767       /* large number for min diff. calc  */


int rawspeech[RAWBUFFERSIZE][NUMBANDS];  /* to hold raw speech */
int index[VOCABSIZE];            /* indicate if digit trained */
int template[VOCABSIZE][NUMSAMPLES][NUMBANDS]; /* known templates */
int unknown[NUMSAMPLES][NUMBANDS];      /* unknown voice template */
int min_diff[VOCABSIZE];                /* min diff. each digit   */
int sam_size;                           /* current sample size */
int debug;                              /* show debugging info */

/* getspeech will read in a file from disk. The length in bytes */
/* will be returned. The rawspeech buffer will be modified.     */
int getspeech()
{
 FILE *fptr;
 int i,j;
 char fname[80];

   if (debug) printf("\nReading in Speech");
   printf("\nEnter name of file?");
   gets(fname);
   if ((fptr=fopen(fname,"rt"))==NULL)
       {
     printf("\nCant find file %s",fname);
     return(0);
       }
   else
       {
     for(i=0;i<=RAWBUFFERSIZE;i++)
       for(j=0;j < NUMBANDS;j++)
          {
        if ((fscanf(fptr,"%i",&rawspeech[i][j]))==EOF)
           {
            fclose(fptr);
            return(i);
           }
          }
       }
 }

 int plot_it()
 {

 int i,j,x,y;

 printf("\n\n");
 for (i=0;i < sam_size;i++)
   {
     for (j=NUMBANDS-1; j >= 0; j--)
      {
    x=rawspeech[i][j]+(j*2);
    gotoxy(x,wherey());
    putchar(j+48);
      }
   printf("\n");
   }
}

/* the closest match routine compares the unknown template with */
/* known templates. It builds a minimum difference list that is */
/* the difference between unknown and each known. We then scan  */
/* list to find the closest match.                              */
int closest_match()
{
 int p,i,j;
 int low, next_low,digit,next_digit;

  if (debug) printf("\nFinding Closest Match");

  for (p = 0; p < VOCABSIZE; p++)
      if (index[p] != 0)
     {
       min_diff[p] = 0;
       for(i = 0; i < NUMSAMPLES; i++)
          for(j = 0; j < NUMBANDS; j++)
         /* for each digit, find the absolute difference */
         /* between known and unknown templates          */
         min_diff[p] = min_diff[p] + abs(unknown[i][j]
                      -template [p] [i] [j]);
     }
      else
     {
      min_diff[p]=BIGNUM;      /* put in a big number if digit not */
     }                         /* trained.                         */

  /* min_diff now has the difference for each template. Search      */
  /* to find the smallest difference. This will be our digit.       */
  /* Find the next lowest match to calculate the delta diff.        */
  digit = -1;
  next_digit = -1;
  low = BIGNUM;
  next_low = BIGNUM;
  if (debug) printf("\nTP# Diff Low Digit");
  for (p = 0; p < VOCABSIZE; p++)
      {
    if(min_diff[p] < low)
      {
       next_low = low;
        next_digit = digit;
        digit = p;
        low = min_diff[p];
       }
     if (debug) printf("\n%3i %5i %5i %2i",p,min_diff[p],low,digit);
       }
   if (debug == 1)
      {
    printf("\nMinimun Difference was %i, Digit is %i",low, digit);
    printf("\nNext Closest Diff was %i, Digit is %i",next_low
                         ,next_digit);
    printf("\nWith the delta difference of %i",next_low-low);
      }

   /* it would be right here where your would add the code      */
   /* to set a rejection limit or a delta difference limit.     */
   /* If the digit is rejected, send back error, such as -1.   */
   return(digit);
 }

 /* Extract template will extract a template from the raw      */
 /* speech buffer. This is to reduce the size of the           */
 /* template and to elimate time warping.                      */
 /* the rate is kept in floating point to prevent truncation   */
 /* errors.                                                    */
 int extract_template()
 {
  int i,j,p;
  float rate,x;

   if (debug) printf("\nExtracting Template");
    rate = (float) sam_size / NUMSAMPLES;
    p = 0;
    if (debug)
       {
     printf("\nExtracting %i elements from Raw Speech",
                         NUMSAMPLES);
     printf("\nTake every %f element", rate);
     printf("\n\n UN RS");
       }
    for (x = 0; x < sam_size ; x = x + rate)
        {
      for (j = 0; j < NUMBANDS; j++)
          unknown[p][j] = rawspeech[(int)x][j];
      if (debug) printf("\n%3i %3i",p,(int)x);
      p++;
        }
}
/* During the training phase, this will take the extracted template  */
/* and store it in the array of known templates.                     */
int store_template(int position)
{
  int i,j;

  if (debug) printf("\nStoring template at position %i",position);
  for (i = 0; i < NUMSAMPLES ; i++)
      for (j = 0; j < NUMBANDS; j++)
      {
        template[position][i][j] = unknown[i][j];
      }
}

/* Perform - Get the speech, extract an unknown template, compare   */
/*   against the rest, and print the resulting digit.                */
int perform()
{
 int digit;

    sam_size = getspeech();
    if (debug) plot_it();
    if (debug) printf("\nSize of Sample = %i",sam_size);

    extract_template();     /* break raw buffer up and       */
             /* place into unknown template */

    digit = closest_match();
    printf("\nDigit spoken was %i",digit);

}

/* Training - Get the speech, extract an unknown template,        */
/* find from the user what digit it was, then store it in         */
/* the known template array.                                      */
int train()
{
 char ans[10];
 int digit;

   sam_size = getspeech();
   if (debug) plot_it();
   if (debug) printf("\nSize of Sample = %i",sam_size);
   printf("\nEnter the digit spoken ?");
   gets(ans);
   digit = atoi(ans);

   index[digit] = 1;      /* indicate this digit is trained       */

   extract_template();    /* break raw buffer up and              */
              /* place into unknown template */

   store_template(digit); /* store the template                   */

}

/* Eztrain - This is to quickly load in files a0 - a9 */
int eztrain(char fname[80], int digit)
{
 FILE *fptr;
 int i,j;
    if ((fptr=fopen[fname,"rt"))!=NULL)
        {
     sam_size = 0;
     printf("\nReading file %s", fname);
     for(i=0;i<=RAWBUFFERSIZE;i++)
        for(j=0;j < NUMBANDS;j++)
           if ((fscanf(fptr,"%i",&rawspeech[i][j]))!=EOF)
          sam_size = i;
     fclose(fptr);
     if (debug) plot_it();
     if (debug) printf("\nSize of Sample = %i",sam_size);
     index[digit] = 1;
     extract_template();
     store_template(digit);
        }
 }
 
 
 main()
 {
  int i;
  char ans[80];
  char choice;
  
  /* clear the training index... nothing has been entered */
  for (i=0; i<VOCABSIZE; i++)
     index[i] = 0;


 printf("\nWelcome to Speech Recognition Demo, Version 1.0\n");

 debug = 1;             /* display debugging information     */
 do
  {
     printf("\n\nTrain, Perform, Load A or B, Debug ");
    if (debug) printf("Off"); else printf("On");
    printf(", or Quit? (T/P/A/B/D/Q)");
     gets(ans);
     choice = toupper(ans[0]);
     if (choice == 'A')
    {
      eztrain["a0",0); eztrain["a1",1); eztrain["a2",2);
      eztrain["a3",3); eztrain["a4",4); eztrain["a5",5);
      eztrain("a6",6); eztrain("a7",7); eztrain("a8",8);
      eztrain("a9",9);
    }
     if (choice == 'B')
    {
      eztrain("b0",0); eztrain("b1",1); eztrain("b2",2);
      eztrain("b3",3); eztrain("b4",4); eztrain("b5",5);
      eztrain("b6",6); eztrain("b7",7); eztrain("b8",8);
      eztrain("b9",9);
    }
     if (choice == 'D')

     {
      debug = !debug;
      printf("\n    Debugging Trace ");
      if (debug) printf("On"); else printf("Off");
    }
     if (choice == 'T') train();
     if (choice == 'P') perform();
  }
  while(choice != 'Q');
 printf("\n\nAll done.");
}