Dr. Dobb's is part of the Informa Tech Division of Informa PLC

This site is operated by a business or businesses owned by Informa PLC and all copyright resides with them. Informa PLC's registered office is 5 Howick Place, London SW1P 1WG. Registered in England and Wales. Number 8860726.


Channels ▼
RSS

Natural Language Expansions for Tense and Number


June 1993/Natural Language Expansions for Tense and Number/Listing 1

Listing 1 The expansion code for tense and number

/* Copyright (c) 1993 Russell Suereth */

#include "natural.h"
void check_aux_verb(void);
void check_number(void);
void make_response(void);
void make_answer(int);
void get_verb(char, char, char);
int  match_verb(char, char, char);
char response[200];
unsigned char verb_tense[5];
unsigned char verb_number[5];
unsigned char verb_usage;
unsigned char aux_tense[5];
unsigned char aux_number[5];
unsigned char aux_usage;
unsigned char subject_number;
unsigned char tenses[20];
unsigned char numbers[20];
unsigned char usages[20];
unsigned char subjects_type[20];
unsigned char aux_meaning[20] [5];
char auxiliaries[20][25];


/*****************************************************/
/* Compare the passed word with the word in the      */
/* current dictionary record. If they are the same,  */
/* then extract the type (NOUN, VERB, etc.). If the  */
/* type is PRON, then extract pronoun information.   */
/* If the type is VERB, then extract verb            */
/* information.                                      */
/*****************************************************/
int match_record(char *pass_word, int types)
{
   int i, j;
   char *root;
   char *dic_word;
   dic_word = extract_word();
   /* Check if passed word equals dictionary word     */
   if (strcmpi(pass_word, dic_word) != 0) return(1);

   /* Word found, get the type                        */
   for (i=24,j=0; i<28; i++) {
     if (isspace(dic_record[i])) break;
     type_array [word_ct] [types] [j++] = dic_record [i];
   }
   /* Trim the type                                    */
   type_array[word_ct] [types][j] = '\0';

   if (strcmp(type_array[word_ct][types],
             "PRON") == 0)
     subject_number = dic_record[41];

   if (strcmp(type_array[word_ct][types],
             "VERB") == 0) {
     root = extract_root();
     strcpy(root_array[word_ct], root);
     verb_usage = dic_record[29];
     for (i=30,j=0; i<34; i++,j++) {
       if (isspace(dic_record[i])) break;
       verb_tense[j] = dic_record[i];
     }
     verb_tense[j] = '\0';
     for (i=41,j=0; i<43; i++,j++) {
       if (isspace(dic_record[i])) break;
       verb_number[j] = dic_record[i];
     }
     verb_number[j] = '\0';
   }
   return(0);
}

/*****************************************************/
/* Determine if the input sentence contains a known, */
/* underlying structure. If it does, then assign the */
/* correct types and phrases for the words.          */
/*****************************************************/
int check_underlying()
{
   int i = 0;

   /* Structure WH-AUX-PRON-VERB                    */
   if ( (check_type("WH",     i) == 0) &&
       (check_type("AUX",  i+1) == 0) &&
       (check_type("PRON", i+2) == 0) &&
       (check_type("VERB", i+3) == 0) ) {
      strcpy(prime_types[i],   "WH");

      strcpy(prime_types[i+1], "AUX");
      strcpy(prime_types[i+2], "PRON");
      strcpy(prime_types[i+3], "VERB");
      strcpy(phrases[i],   "WHQUESTION");
      strcpy(phrases[i+1], "VERBPHRASE");
      strcpy(phrases[i+2], "NOUNPHRASE");
      strcpy(phrases[i+3], "VERBPHRASE");
      strcpy(auxiliaries[sentence], word_array[i+1]);
      get_aux();
      return(0);
   }

   /* Structure PRON-AUX-VERB-PREP-DET-NOUN         */
   if ( (check_type("PRON",   i) == 0) &&
       (check_type("AUX",  i+1) == 0) &&
       (check_type("VERB", i+2) == 0) &&
       (check_type("PREP", i+3) == 0) &&
       (check_type("DET",  i+4) == 0) &&
       (check_type("NOUN", i+5) == 0) ) {
      strcpy(prime_types[i],   "PRON");
      strcpy(prime_types[i+1], "AUX");
      strcpy(prime_types[i+2], "VERB");
      strcpy(prime_types[i+3], "PREP");
      strcpy(prime_types[i+4], "DET");
      strcpy(prime_types[i+5], "NOUN");
      strcpy(phrases[i],   "NOUNPHRASE");
      strcpy(phrases[i+1], "VERBPHRASE");
      strcpy(phrases[i+2], "VERBPHRASE");
      strcpy(phrases[i+3], "PREPPHRASE");
      strcpy(phrases[i+4], "PREPPHRASE");
      strcpy(phrases[i+5], "PREPPHRASE");
      strcpy(auxiliaries[sentence], word_array[i+1]);
      get_aux();
      return(0);
   }

   /* Structure WH-AUX-NAME-VERB                    */
   if ( (check_type("WH",     i) == 0) &&
       (check_type("AUX",  i+1) == 0) &&
       (check_type("NAME", i+2) == 0) &&
       (check_type("VERB", i+3) == 0) ) {
      strcpy(prime_types[i],    "WH");
      strcpy(prime_types[i+1], "AUX");
      strcpy(prime_types[i+2], "NAME");
      strcpy(prime_types[i+3], "VERB");
      strcpy(phrases[i],   "WHQUESTION");
      strcpy(phrases[i+1], "VERBPHRASE");
      strcpy(phrases[i+2], "NOUNPHRASE");
      strcpy(phrases[i+3], "VERBPHRASE");
      strcpy(auxiliaries[sentence], word_array[i+1]);
      get_aux();
      return (0);
   }

   /* Structure NAME-AUX-AUX-AUX-VERB-PREP-DET-NOUN */
   if ( (check_type("NAME",   i) == 0) &&
       (check_type("AUX",  i+1) == 0) &&
       (check_type("AUX",  i+2) == 0) &&
       (check_type("AUX",  i+3) == 0) &&
       (check_type("VERB", i+4) == 0) &&
       (check_type("PREP", i+5) == 0) &&
       (check_type("DET",  i+6) == 0) &&
       (check_type("NOUN", i+7) == 0) ) {
      strcpy(prime_types[i],   "NAME");
      strcpy(prime_types[i+l], "AUX");
      strcpy(prime_types[i+2], "AUX");
      strcpy(prime_types[i+3], "AUX");
      strcpy(prime_types[i+4], "VERB");
      strcpy(prime_types[i+5], "PREP");
      strcpy(prime_types[i+6], "DET");
      strcpy(prime_types[i+7], "NOUN");
      strcpy(phrases [i],  "NOUNPHRASE");
      strcpy(phrases[i+1], "VERBPHRASE");
      strcpy(phrases[i+2], "VERBPHRASE");
      strcpy(phrases[i+3], "VERBPHRASE");
      strcpy(phrases[i+4], "VERBPHRASE");
      strcpy(phrases[i+5], "PREPPHRASE");
      strcpy(phrases[i+6], "PREPPHRASE");
      strcpy(phrases[i+7], "PREPPHRASE");
      strcpy(auxiliaries[sentence], word_array[i+1]);
      strcat(auxiliaries[sentence], " ");
      strcat(auxiliaries[sentence], word_array[i+2]);
      strcat(auxiliaries[sentence], " ");
      strcat(auxiliaries[sentence], word_array[i+3]);
      get_aux();
      return(0);
   }

   /* Structure NAME-AUX-AUX-VERB-PREP-DET-NOUN  */
   if ( (check_type("NAME",   i) == 0) &&
       (check_type("AUX",  i+1) == 0) &&
       (check_type("AUX",  i+2) == 0) &&
       (check_type("VERB", i+3) == 0) &&
       (check_type("PREP", i+4) == 0) &&
       (check_type("DET",  i+5) == 0) &&
       (check_type("NOUN", i+6) == 0) ) {
      strcpy(prime_types[i],   "NAME");
      strcpy(prime_types[i+1], "AUX");
      strcpy(prime_types[i+2], "AUX");
      strcpy(prime_types[i+3], "VERB");
      strcpy(prime_types[i+4], "PREP");
      strcpy(prime_types[i+5], "DET");
      strcpy(prime_types[i+6], "NOUN");
      strcpy(phrases[i],   "NOUNPHRASE");
      strcpy(phrases[i+1], "VERBPHRASE");
      strcpy(phrases[i+2], "VERBPHRASE");
      strcpy(phrases[i+3], "VERBPHRASE");
      strcpy(phrases[i+4], "PREPPHRASE");
      strcpy(phrases[i+5], "PREPPHRASE");
      strcpy(phrases[i+6], "PREPPHRASE");
      strcpy(auxiliaries[sentence], word_array[i+1]);
      strcat(auxiliaries[sentence], " ");
      strcat(auxiliaries[sentence], word_array[i+2]);
      get_aux();
      return(0);
   }

   /* Structure NAME-AUX-VERB-PREP-DET-NOUN     */
   if ( (check_type("NAME",   i) == 0) &&
       (check_type("AUX",  i+1) == 0) &&
       (check_type("VERB", i+2) == 0) &&
       (check_type("PREP", i+3) == 0) &&
       (check_type("DET",  i+4) == 0) &&
       (check_type("NOUN", i+5) == 0) ) {
      strcpy(prime_types[i],   "NAME");
      strcpy(prime_types[i+1], "AUX");
      strcpy(prime_types[i+2], "VERB");
      strcpy(prime_types[i+3], "PREP");
      strcpy(prima_types[i+4], "DET");
      strcpy(prime_types[i+5], "NOUN");
      strcpy(phrases[i],   "NOUNPHRASE");
      strcpy(phrases[i+1], "VERBPHRASE");
      strcpy(phrases[i+2], "VERBPHRASE");
      strcpy(phrases[i+3], "PREPPHRASE");
      strcpy(phrases[i+4], "PREPPHRASE");
      strcpy(phrases[i+5], "PREPPHRASE");
      strcpy(auxiliaries[sentence], word_array [i +1]);
      get_aux ();
      return(0);
   }

   /* Structure NAME-VERB-PREP-DET-NOUN           */
   if ( (check_type("NAME",   i) == 0) &&
       (check_type("VERB", i+1) == 0) &&
       (check_type("PREP", i+2) == 0) &&
       (check_type("DET",  i+3) == 0) &&
       (check_type("NOUN", i+4) == 0) ) {
      strcpy(prime_types[i],   "NAME");
      strcpy(prime_types[i+1], "VERB");
      strcpy(prime_types[i+2], "PREP");
      strcpy(prime_types[i+3], "DET");
      strcpy(prime_types[i+4], "NOUN");
      strcpy(phrases[i],   "NOUNPHRASE");
      strcpy(phrases[i+1], "VERBPHRASE");
      strcpy(phrases[i+2], "PREPPHRASE");
      strcpy(phrases[i+3], "PREPPHRASE");
      strcpy(phrases[i+4], "PREPPHRASE");
      return (0);
   }
   return(1);
   }

/*****************************************************/
/* Compare the passed type with all the types for    */
/* this word in the type_array. If the type is       */
/* found, then return 0. The pass_number parameter   */
/* identifies the word in the input sentence.        */
/*****************************************************/
int check_type(char *pass_type, int pass_number)
{
   int i;
   for (i=0; type_array[pass_number][i][0]; i++) {
      if (strcmp(type_array[pass_number][i],
                pass_type) == 0)
         /* Passed type is found in array           */
         return (0);
   }
   /* Passed type is not found in array              */
   return(1);
}

/*****************************************************/
/* If the correct type is "NAME" or "PRON" then the  */
/* word refers to a subject so copy the word to the  */
/* subjects array.                                   */
/*****************************************************/
void check_subject()
{
   int i;
   for (i=0; i<word_ct; i++) {
       if (strcmp(prime_types[i], "NAME") == 0) {

          strcpy(subjects[sentence], word_array[i]);
          subject_number = SINGULAR;
          subjects_type[sentence] = NAME;
          break;
       }
       if (strcmp(prime_types[i], "PRON") == 0) {
          strcpy(subjects[sentence], word_array[i]);
          subjects_type[sentence] = PRONOUN;
          break;
       }
  }
  return;
}

/*****************************************************/
/* Determine the sentence tense and usage by         */
/* matching auxiliary and verb information, or by    */
/* matching previous sentence information.           */
/*****************************************************/
void check_aux_verb()
{
   int i, j, matches;
   char *result;
   char temp_tenses[5];

   /*************************************************/
   /* Auxiliary in sentence                         */
   /*************************************************/
   if (strlen(auxiliaries[sentence]) > 0) {
       if (aux_usage != verb_usage) {
           tenses[sentence] = UNKNOWN;
           usages[sentence] = UNKNOWN;
           return;
       }
       for (i=0,j=0,matches=0; aux_tense[i]; i++) {
         if ((result = strchr(verb_tense,aux_tense[i]))
                                        != NULL) {
             temp_tenses[j++] = *result;
             matches++;
         }
       }
       temp_tenses[j] = '\0';
       if (matches == 0) {
           tenses[sentence] = UNKNOWN;
           usages[sentence] = UNKNOWN;
           return;
       }
       usages[sentence] = aux_usage;
       if (matches == 1) {
           tenses[sentence] = temp_tenses[0];
           return;
       }
       for (i=sentence-1; i>=0 && i>=sentence-3; i--) {
          if ((strcmpi(subjects[i],
                     subjects[sentence]) == 0)    &&
             (strcmpi(actions[i],
                     actions[sentence]) == 0)     &&
             (strchr(temp_tenses, tenses[i])
                                     != NULL) &&
             (strlen(places[i]) > 0)) {
             tenses[sentence]    = tenses[i];
             return;
          }
       }
       tenses[sentence]    = PRESENT;
       return;
   }

   /**************************************************/
   /* No auxiliary in sentence                       */
   /**************************************************/
   usages[sentence]  = verb_usage;
   if (strchr(verb_tense, PAST) != NULL) {
       tenses[sentence]   = PAST;
       return;
   }

   /**************************************************/
   /* No auxiliary, verb tense is present or future  */
   /**************************************************/
   for (i=sentence-1; i>=0 && i>=sentence-3; i--) {
      if ((strcmpi(subjects[i],
                 subjects[sentence]) == 0)       &&
          (strcmpi(actions[i],
                 actions[sentence]) == 0)        &&
          (strchr(verb_tense, tenses[i]) != NULL) &&
          (strlen(places[i]) > 0)) {
          tenses[sentence]  = tenses[i];
          return;
      }
   }
   tenses[sentence]  = PRESENT;
   return;
}

/*****************************************************/
/* Match the subject, verb, and auxiliary number.    */
/* If the match is successful, then the sentence     */
/* number is the matched number.                     */
/*****************************************************/
void check_number()
{
   if (strchr(verb_number, subject_number) == NULL) {
      numbers[sentence] = UNKNOWN;
      return;
   }
   if ((strten(auxiliaries[sentence]) > 0) &&
      (strchr(aux_number, subject_number) == NULL)) {
      numbers[sentence] = UNKNOWN;
      return;
   }
   numbers[sentence] = subject_number;
   return;
}

/*****************************************************/
/* Read the dictionary to extract the auxiliary      */
/* information.                                      */
/*****************************************************/
void get_aux()
{
   rewind(infile);
   fgets(dic_record, 80, infile);
   while (! feof(infile)) {
       if (match_aux() == 0)
          return;
       fgets(dic_record, 80, infile);
   }
   return;
}

/*****************************************************/
/* If the sentence auxiliary matches the word in the */
/* current dictionary record, then extract the       */
/* auxiliary information from the dictionary.        */
/*****************************************************/
int match_aux()
{
   int i,j;
   char *dic_word;
   dic_word = extract_word();
   if (strcmpi(auxiliaries[sentence], dic_word) != 0)
      return (1);

   aux_usage = dic_record[29];
   for (i=30,j=0; i<34; i++,j++) {
      if (isspace(dic_record[i])) break;
      aux_tense[j] = dic_record[i];
   }
   /* Trim the tense  */
   aux_tense[j] = '\0';
   for (i=41,j=0; i<43; i++,j++) {
      if (isspace(dic_record[i])) break;
      aux_number[j] = dic_record[i];
   }
   /* Trim the number*/
   aux_number[j] = '\0';
   for (i=44,j=0; i<47; i++,j++) {
      if (isspace(dic_record[i])) break;
      aux_meaning [sentence] [j] = dicrecord [i];
   }
   return(0);
}

/*****************************************************/
/* Generate a response with information from a       */
/* matching, previous sentence.                      */
/*****************************************************/
void make_response()
{
  int i;

  /***************************************************/
  /* Input sentence is not asking for information.   */
  /***************************************************/
  if (strcmpi(word_array[0], "where") != 0) {
    strcpy(response, "OK");
    return;
  }

  /***************************************************/
  /* Match subject, action, tense, and meaning.      */
  /***************************************************/
  for (i=sentence-1; i>=0; i--) {
    if ((strcmpi(subjects[i],subjects[sentence])==0) &&
        (strcmpi(actions[i], actions[sentence]) ==0) &&
        (strlen(places[i])                      > 0) &&
        (tenses[i]              == tenses[sentence]) &&
        (strpbrk(aux_meaning[i],aux_meaning[sentence])
                                      != NULL)) {
        make_answer(i);
        return;
    }
  }
  /***************************************************/
  /* Match subject, action, and tense.               */
  /***************************************************/
  for (i=sentence-1; i>=0; i--) {
    if ((strcmpi(subjects[i],subjects[sentence])==0) &&
        (strcmpi(actions[i], actions[sentence]) ==0) &&
        (strlen(places[i])                      > 0) &&
        (tenses[i]              == tenses[sentence])) {
    make_answer (i);
    return;
    }
  }
  /***************************************************/
  /*Match subject, action, and meaning.              */
  /***************************************************/
  for (i=sentence-1; i>=0; i--) {
    if ((strcmpi(subjects[i],subjects[sentence])==0) &&
        (strcmpi(actions[i], actions[sentence]) ==0) &&
        (strlen(places[i])  > 0) &&
        (strpbrk(aux_meaning[i],aux_meaning[sentence])
                                      != NULL)) {
        strcpy(response, 'I'm not sure, but ");
       make_answer(i);
       return;
    }
  }
  /***************************************************/
  /* Match subject and action.                       */
  /***************************************************/
  for (i=sentence-1; i>=0; i--) {
    if ((strcmpi(subjects[i],subjects[sentence])==0) &&
        (strcmpi(actions[i], actions[sentence]) ==0) &&
        (strlen(places[i])  > 0)) {
       strcpy(response, 'I'm not sure, but ");
       make_answer(i);
       return;
    }
  }
  strcpy(response, "I don't know");
  return;
}

/*****************************************************/
/* Move information from a previous sentence to the  */
/* response.                                         */
/*****************************************************/
void make_answer(int prev_sentence)
{
   if (subjects_type[prev_sentence] == PRONOUN) {
       if (strlen(response) == 0) {
          subjects[prev_sentence][0] =
            (char) toupper(subjects [prev_sentence] [0] );
       }
       else {
          subjects[prev_sentence] [0] =
          (char) tolower(subjects[prev_sentence][0]);
       }
   }
   strcat(response, subjects[prev_sentence]);
   strcat(response, " ");
   if (strlen(auxiliaries[prev_sentence]) > 0) {
       strcat(response, auxiliaries[prev_sentence]);
       strcat(response, " ");
   }
   get_verb(tenses[prev_sentence],
           numbers[prev_sentence],
           usages[prev_sentence]);
   strcat(response, places[prev_sentence]);
   return;
}

/*****************************************************/
/* Get the correct verb from the dictionary.         */
/*****************************************************/
void get_verb(char pass_tense,
            char pass_number, char pass_usage)
{
   rewind{infile);
   fgets(dic_record, 80, infile);
   while (! feof(infile)) {
       if (match_verb(pass_tense,
                    pass_number, pass_usage) == 0)
           break;
       fgets(dic_record, 80, infile);
   }
   return;
}

/*****************************************************/
/* If the verb information in the current record     */
/* matches the passed information, then move the     */
/* correct verb to the response.                     */
/*****************************************************/
int match_verb(char pass_tense,
             char pass_number, char pass_usage)
{
  int i;
  char *root;
  char *dic_word;
  root = extract_root();

  /* Match verb with root  */
  if (strcmpi(actions[sentence], root) == 0) {
     /* Match verb with tense  */
     for (i=301 i<34; i++) {
        if (isspace(dic_record[i])) return(1);
        if (dic_record[i] -= pass_tense) break;
     }
     /* Match verb with number  */
     for (i=41; i<43; i++) {
        if (isspace(dic_record[i])) return(1);
        if (dic_record[i] == pass_number) break;
     }
     /* Match verb with usage  */
     if (dic record[29] == pass_usage) {
        dic_word = extract_word();
        strcat(response, dic_word);
        return(0);
     }
  }
  return(1);
}

/* End of File */

Related Reading


More Insights






Currently we allow the following HTML tags in comments:

Single tags

These tags can be used alone and don't need an ending tag.

<br> Defines a single line break

<hr> Defines a horizontal line

Matching tags

These require an ending tag - e.g. <i>italic text</i>

<a> Defines an anchor

<b> Defines bold text

<big> Defines big text

<blockquote> Defines a long quotation

<caption> Defines a table caption

<cite> Defines a citation

<code> Defines computer code text

<em> Defines emphasized text

<fieldset> Defines a border around elements in a form

<h1> This is heading 1

<h2> This is heading 2

<h3> This is heading 3

<h4> This is heading 4

<h5> This is heading 5

<h6> This is heading 6

<i> Defines italic text

<p> Defines a paragraph

<pre> Defines preformatted text

<q> Defines a short quotation

<samp> Defines sample computer code text

<small> Defines small text

<span> Defines a section in a document

<s> Defines strikethrough text

<strike> Defines strikethrough text

<strong> Defines strong text

<sub> Defines subscripted text

<sup> Defines superscripted text

<u> Defines underlined text

Dr. Dobb's encourages readers to engage in spirited, healthy debate, including taking us to task. However, Dr. Dobb's moderates all comments posted to our site, and reserves the right to modify or remove any content that it determines to be derogatory, offensive, inflammatory, vulgar, irrelevant/off-topic, racist or obvious marketing or spam. Dr. Dobb's further reserves the right to disable the profile of any commenter participating in said activities.

 
Disqus Tips To upload an avatar photo, first complete your Disqus profile. | View the list of supported HTML tags you can use to style comments. | Please read our commenting policy.