/* 
 * this is required to get asprintf() on GNU/Linux
 */
#define _GNU_SOURCE

#include <sys/types.h>
#include <errno.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <limits.h>

#include "callcheck.h"
#include "token.h"
#include "myname.h"
#include "fatal_error.h"
#include "hash_table.h"
#include "parser.h"
#include "keywords.h"
#include "wrappers.h"


#define INDENT_CHAR ' '

#define PATTERN_NUMBER "[0-9]+"
#define PATTERN_IDENTIFIER "[0-9a-z]*[a-z][0-9a-z]*"
#define PATTERN_INPUT "^(-?[0-9][0-9]*)(:)([0-9a-zA-Z_ ][0-9a-zA-Z_ ]*)(:)(.*)$"
#define PATTERN_INPUT_NSUB 5
#define PMATCH_LINENO 1
#define PMATCH_SEP1 2
#define PMATCH_ERROR 3
#define PMATCH_SEP2 4
#define PMATCH_TOKEN 5

#define BASE_10 10

#define CHANGE_SIGN -1

#define NULL_TERM 1

#define READ_MODE "r"
#define WRITE_MODE "w"

#define EMPTY 0
#define FALSE 0
#define TRUE 1
#define DOUBLE 2
#define INITIAL_BUFF_SIZE 1
#define NO_DIFF 0
#define MATCH 0
#define NO_NMATCH 0
#define NO_EFLAGS 0

#define START_KEYWORD 0
#define NEXT 1
#define ARG_KEYWORD 0
#define ARG_FIRST 1
#define ARG_SECOND 2

#define NO_ARG 1
#define SINGLE_ARG 2
#define TWO_ARG 3

#define INITIAL_LINE 1
#define INDENT_NONE 0
#define INDENT_INITIAL 1
#define INDENT_INCREASE 1
#define LEVEL_UP 1

#define HASHTABLE_INITIAL_SIZE 65536
#define HASHTABLE_LOADFACTOR 8

#define NO_ERROR 0

#define KEYWORD_ARRAY_POW2 64
#define KEYWORD_ARRAY_COUNT 35


/*
 * returns an indent string. The buffer is static
 */
char *get_indent (int level) {
  static char *indent = NULL;
  static size_t indent_size = INDENT_NONE;
  int i;

  /* 
   * this function has not been run before; initialize everything
   */
  if (indent == NULL) {
    indent_size = level + NULL_TERM;
    indent = ra_malloc(indent_size);
    scallcheck(indent != NULL, "parser error allocating memory");

    for (i = INDENT_NONE; i < indent_size - NULL_TERM; i++) {
      indent[i] = INDENT_CHAR;
    }
    indent[i] = '\0';
  }
  /*
   * the buffer isn't big enough, grow it
   */
  else if (indent_size < level + NULL_TERM) {
    indent_size = level + NULL_TERM;
    indent = ra_realloc(indent, indent_size);
    scallcheck(indent != NULL, "parser error allocating memory");

    for (i = EMPTY; i < indent_size - NULL_TERM; i++) {
      indent[i] = INDENT_CHAR;
    }
    indent[i] = '\0';
  }

  return &(indent[indent_size - NULL_TERM - level]);
}


/*
 * returns a heap allocated representation of the value
 */
char *format_value (token_t token) {
  int ret;
  char *buff = NULL;

  if (token.type == T_NUMBER) {
    ret = asprintf(&buff, "%s", token.value) + NULL_TERM;
    scallcheck(ret > EMPTY, "parser output formatting error");

    return buff;
  }
  else if (token.type == T_IDENTIFIER) {
    ret = asprintf(&buff, "variable[\"%s\"]", token.value);
    scallcheck(ret >= EMPTY, "parser output formatting error");

    return buff;
  }
  else {
    return NULL;
  }
}


/*
 * returns the next line from input, possibly reallocates the buffer
 */
int readline (FILE *input, char **buff_in, size_t *buff_size_in) {
  int inchar, i = EMPTY, ret = FALSE;
  char *buff = *buff_in, *temp;
  size_t buff_size = *buff_size_in;

  /* this function has not been run before: initialize everything */
  if (buff == NULL) {
    buff_size = INITIAL_BUFF_SIZE;
    buff = ra_malloc(buff_size);
    scallcheck(buff != NULL, "parser error allocating memory");
  }

  /*
   * I/O loop
   */
  while (TRUE) {
    /*
     * the buffer is not big enough, grow it
     */
    if ((size_t) i >= buff_size) {
      buff_size *= DOUBLE;
      temp = ra_realloc(buff, buff_size);
      scallcheck(temp != NULL, "parser error allocating memory");
      buff = temp;
    }

    inchar = fgetc(input);
    scallcheck(ferror(input) == NO_ERROR, "parser error with I/O");

    /*
     * no more input, so terminate the string
     */
    if (feof(input)) {
      buff[i++] = '\0';
      break;
    }
    /*
     * the line is over, so terminate teh strong
     */
    else if (inchar == '\n') {
      ret = TRUE;
      buff[i++] = '\0';
      break;
    }
    /*
     * continue
     */
    else {
      ret = TRUE;
      buff[i++] = (char) inchar;
    }
  }

  /*
   * Buffer was either allocated, reallocated, or left the same. In no case do
   * we need to free the memory.
   */
  *buff_in = buff;
  *buff_size_in = buff_size;

  return ret;
}


/*
 * the last token will be returned again
 */
void unget_token (io_state_t *io) {
  io->unget = TRUE;
}


/*
 * returns the next token
 */
int get_token (io_state_t *io, token_t *token) {
  int ret;
  regmatch_t pmatch[PATTERN_INPUT_NSUB + 1];

  static char *buff = NULL, *end;
  static size_t buff_size = EMPTY;
  static token_t token_buff;

  /*
   * unget is set, return the token we've already parsed
   */
  if (io->unget) {
    io->unget = FALSE;
    *token = token_buff;

    return TRUE;
  }

  ret = readline(io->input, &buff, &buff_size);
  if (!ret) return FALSE;

  ret = regexec(&(io->preg), buff, PATTERN_INPUT_NSUB + 1, pmatch, EMPTY);

  mcallcheck(ret == NO_ERROR, "input format to the parser not correct");

  /*
   * This allows the strings to be used directly from the buffer.
   */
  buff[pmatch[PMATCH_SEP1].rm_so] = '\0';
  buff[pmatch[PMATCH_SEP2].rm_so] = '\0';
  
  token_buff.lineno = strtoll(&(buff[pmatch[PMATCH_LINENO].rm_so]),
			      &end, BASE_10);
  mcallcheck(buff != end && errno == NO_ERROR,
	     "input format to the parser not correct");

  ret = token_str2err(&(buff[pmatch[PMATCH_ERROR].rm_so]),
		      &(token_buff.error));
  mcallcheck(ret, "input format to the parser not correct");

  token_buff.type = T_UNKNOWN;
  /*
   * start of the string is at buff[pmatch[PMATCH_TOKEN].rm_so]
   */
  token_buff.value = &(buff[pmatch[PMATCH_TOKEN].rm_so]);

  switch (token_buff.error) {
  case E_NOERROR:
    token_buff.type = T_UNKNOWN;
    break;
  case E_EOF:
    token_buff.type = T_EOF;
    break;
  case E_ILLEGALCHAR:
    fatal_error_illegalchar(token_buff);
    /* never returns */
    break;
  }

  /*
   * copy these results into the caller's token object
   */
  *token = token_buff;

  return TRUE;
}


/*
 * allocates heap storage for the in.value string of this token, so it can
 * be kept
 */
token_t copy_token (token_t in) {
  token_t outgo;
  size_t len;
  char *ret;

  outgo = in;

  len = strlen(in.value) + NULL_TERM;
  ret = ra_malloc(len);
  scallcheck(ret != NULL, "parser error with allocating memory");

  strncpy(ret, in.value, len);

  outgo.value = ret;

  return outgo;
}


/*
 * runs the parser with the given input and output streams
 */
void parser (FILE *input, FILE *output) {
  int ret;
  io_state_t io;

  io.input = input;
  io.output = output;
  io.unget = FALSE;

  ret = regcomp(&(io.preg), PATTERN_INPUT, REG_EXTENDED);
  scallcheck(ret == NO_ERROR, "parser error with regex");
  scallcheck(io.preg.re_nsub == PATTERN_INPUT_NSUB,
	     "parser regex nsub mismatch. Is your regex library POSIX?");

  parse_program(&io);
}


/*
 * top level nanologo grammar parser
 *
 * prints the header and footer for Python stuff, runs until an EOF token is
 * reached.
 */
int parse_program (io_state_t *io) {
  hash_table_t variable;
  token_t errbuff;
  int ret, empty = TRUE;

  hash_init(&variable, HASHTABLE_INITIAL_SIZE, HASHTABLE_LOADFACTOR,
	    str_hash_code, (int (*)(const void *, const void *)) strcmp);

  fprintf(io->output, "import sys\n");
  fprintf(io->output, "import time\n");
  fprintf(io->output, "import turtle\n");
  fprintf(io->output, "\n");
  fprintf(io->output, "variable = {}\n");
  fprintf(io->output, "\n");
  fprintf(io->output, "def logoMain ():\n");

  /*
   * while we haven't recieved the EOF token
   */
  while (!parse_eof(io, TRUE)) {
    ret = parse_keyword(io, INDENT_INITIAL, &variable);

    /*
     * don't print a "pass" for this block because it's not empty
     */
    empty = FALSE;

    if (!ret) {
      ret = get_token(io, &errbuff);
      scallcheck(ret, "parser error with I/O");

      fatal_error_expected_keyword(errbuff);
    }
  }

  /*
   * the block is empty so we have to print a "pass" otherwise Python will
   * be unhappy
   */
  if (empty) {
    fprintf(io->output, "%cpass\n", INDENT_CHAR);
  }
  
  fprintf(io->output, "\n");
  fprintf(io->output, "try:\n");
  fprintf(io->output, "%ctry:\n", INDENT_CHAR);
  fprintf(io->output, "%c%cturtle.title(\"LOGO\")\n",
	  INDENT_CHAR, INDENT_CHAR);
  fprintf(io->output, "%c%cturtle.setup(width=1024, height=768)\n",\
	  INDENT_CHAR, INDENT_CHAR);
  fprintf(io->output, "%cexcept AttributeError:\n", INDENT_CHAR);
  fprintf(io->output, "%c%cpass # cosmetic, no big deal if unsupported\n",
	  INDENT_CHAR, INDENT_CHAR);  
  fprintf(io->output, "%clogoMain()\n", INDENT_CHAR);
  fprintf(io->output, "%ctime.sleep(60)\n", INDENT_CHAR);
  fprintf(io->output, "except KeyError, e:\n");
  fprintf(io->output, "%csys.stderr.write(\"error: undefined variable '\")\n",
	  INDENT_CHAR);
  fprintf(io->output, "%csys.stderr.write(str(e).strip(\"'\"))\n",
	  INDENT_CHAR);
  fprintf(io->output, "%csys.stderr.write(\"'\\n\")\n",
	  INDENT_CHAR);
  fprintf(io->output, "%csys.exit(1)\n",
	  INDENT_CHAR);
  fprintf(io->output, "except KeyboardInterrupt, e:\n");
  fprintf(io->output, "%csys.stderr.write(\"User abort caught. Exit.\\n\")\n",
	  INDENT_CHAR);
  fprintf(io->output, "%csys.exit(1)\n",
	  INDENT_CHAR);

  return TRUE;
}


/*
 * parses a [ ... ] block
 *
 * runs until a ']' token is reached
 */
int parse_subprogram (io_state_t *io, int indent, hash_table_t *variable) {
  token_t errbuff;
  int ret, empty = TRUE;

  while (!parse_closebrace(io, TRUE)) {
    parse_eof(io, FALSE);

    /*
     * don't print a "pass" for this block because it's not empty
     */
    empty = FALSE;

    ret = parse_keyword(io, indent, variable);

    if (!ret) {
      ret = get_token(io, &errbuff);
      scallcheck(ret, "parser error with I/O");

      fatal_error_expected_keyword(errbuff);
    }
  }

  /*
   * the block is empty so we have to print a "pass" otherwise Python will
   * be unhappy
   */
  if (empty) {
    fprintf(io->output, "%spass\n", get_indent(indent));
  }

  return TRUE;
}


/*
 * parses a keyword and arguments.
 *
 * This function handles all keywords. It consults the keywords array (see
 * keywords.h) to discover what is a keyword and how to handle it.
 *
 * This is a top-level function that handles setup and teardown, look at
 * parse_keyword_n for the recursive decent section.
 */
int parse_keyword (io_state_t *io, int indent, hash_table_t *variable) {
  int ret;
  token_t token;

  ret = get_token(io, &token);
  scallcheck(ret, "parser error with I/O");

  ret = parse_keyword_n(io, indent, variable, token, 0);

  if (ret == TRUE) {
    /*
     * a keyword was found, and has been handled appropriately
     */
    return TRUE;
  }
  else {
    /*
     * something was encountered that's not a keyword, do not consume it.
     * Instead, return with error and allow something else to deal with the
     * situation.
     */
    unget_token(io);
    return FALSE;
  }

  return ret;
}


/*
 * parses a keyword and arguments.
 *
 * This function handles all keywords. It consults the keywords array (see
 * keywords.h) to discover what is a keyword and how to handle it.
 *
 * I thought about replacing this recursion with a binary search, since a) the
 * recursion is equivilant to a linear search along the array, and b) a linear
 * search is an unecessarily slow search on sorted data. However, I decided
 * that this stretched the wording of the assignment too far, it should be
 * "recursive descent", not a simulation thereof.
 */
int parse_keyword_n (io_state_t *io, int indent, hash_table_t *variable,
		     token_t token, int i) {
  int ret; 

  if (keyword[i].word == NULL) { /* end of array, nothing found */
    return FALSE;
  }

  ret = strcmp(keyword[i].word, token.value);
  if (ret == NO_DIFF) { /* match */
    token.type = T_KEYWORD;
    token = copy_token(token);
 
    return keyword[i].handler(io, indent, token, i, variable);
  }
  else { /* no match, try next */
    return parse_keyword_n(io, indent, variable, token, i + 1);
  }
}


/*
 * generic handler for keywords that take zero arguments. See keywords.h for
 * the keyword_format array.
 */
int parse_noargs (io_state_t *io, int indent, token_t token, int key,
		  hash_table_t *variable) {
  fprintf(io->output, "%s%s\n", get_indent(indent), keyword[key].format);

  free(token.value);
  return TRUE;
}


/*
 * generic handler for keywords that take one argument See keywords.h for the
 * keyword_format array.
 */
int parse_onearg (io_state_t *io, int indent, token_t token, int key,
		  hash_table_t *variable) {
  char *buff = NULL, *argbuff;
  token_t arg, errbuff;
  int ret;

  ret = parse_number(io, &arg, variable);

  /*
   * expecting a value
   */
  if (!ret) {
    ret = get_token(io, &errbuff);
    scallcheck(ret, "parser error with I/O");

    fatal_error_expected_value(errbuff);
  }

  argbuff = format_value(arg);

  ret = asprintf(&buff, keyword[key].format, argbuff);
  scallcheck(ret >= EMPTY, "parser output formatting error");

  fprintf(io->output, "%s%s\n", get_indent(indent), buff);

  free(token.value);
  free(arg.value);
  free(argbuff);
  free(buff);
  return TRUE;
}


/*
 * handles the "make" keyword, which always takes two arguments
 */
int parse_make (io_state_t *io, int indent, token_t token, int key,
		hash_table_t *variable) {
  char *buff, *argbuff1, *argbuff2;
  token_t arg1, arg2, errbuff;
  int ret;

  /*
   * might not be defined yet
   */
  ret = parse_identifier_nodefcheck(io, &arg1);
  if (!ret) {
    ret = get_token(io, &errbuff);
    scallcheck(ret, "parser error with I/O");
    
    fatal_error_expected_identifier(errbuff);
  }

  ret = parse_number(io, &arg2, variable);

  /*
   * expecting a value
   */
  if (!ret) {
    ret = get_token(io, &errbuff);
    scallcheck(ret, "parser error with I/O");

    fatal_error_expected_value(errbuff);
  }

  argbuff1 = format_value(arg1);
  argbuff2 = format_value(arg2);

  ret = asprintf(&buff, keyword[key].format, argbuff1, argbuff2);
  scallcheck(ret >= EMPTY, "parser output formatting error");

  fprintf(io->output, "%s%s\n", get_indent(indent), buff);
  
  if (!hash_test(variable, arg1.value)) {
    /*
     * arg1.value must not be freed because it is in the hash table
     */
    hash_insert(variable, arg1.value);
  }
  else {
    /*
     * arg1.value should be freed because there is an identical copy already
     * in the hash table
     */
    free(arg1.value);
  }

  free(arg2.value);
  free(token.value);
  free(argbuff1);
  free(argbuff2);
  free(buff);
  return TRUE;
}


/*
 * handler for "incr" and "decr" keywords, which always take two arguments,
 * the first of which must be a defined identifier
 */
int parse_incdec (io_state_t *io, int indent, token_t token, int key,
		  hash_table_t *variable) {
  char *buff, *argbuff1, *argbuff2;
  token_t arg1, arg2, errbuff;
  int ret;

  /*
   * must be a defined identifier
   */
  ret = parse_identifier(io, &arg1, variable);
  if (!ret) {
    ret = get_token(io, &errbuff);
    scallcheck(ret, "parser error with I/O");
    
    fatal_error_expected_identifier(errbuff);
  }

  ret = parse_number(io, &arg2, variable);
         
  /*
   * expecting a value
   */
  if (!ret) {
    ret = get_token(io, &errbuff);
    scallcheck(ret, "parser error with I/O");

    fatal_error_expected_value(errbuff);
  }

  argbuff1 = format_value(arg1);
  argbuff2 = format_value(arg2);

  ret = asprintf(&buff, keyword[key].format, argbuff1, argbuff2);
  scallcheck(ret >= EMPTY, "parser output formatting error");

  fprintf(io->output, "%s%s\n", get_indent(indent), buff);
  
  free(arg1.value);
  free(arg2.value);
  free(token.value);
  free(argbuff1);
  free(argbuff2);
  free(buff);
  return TRUE;
}


/*
 * handler for the "output" keyword, which can take a value or a string
 */
int parse_output (io_state_t *io, int indent, token_t token, int key,
		  hash_table_t *variable) {
  char *argbuff = NULL, *buff, *temp;
  token_t arg;
  int ret, i, j;
  size_t size;

  ret = parse_number(io, &arg, variable);
  if (ret) {
    /*
     * found a number
     */
    argbuff = format_value(arg);

    free(arg.value);
  }
  else {
    /*
     * it must be a string
     */
    ret = get_token(io, &arg);
    scallcheck(ret, "parser error with I/O");

    if (strcmp("\"", arg.value) != NO_DIFF) {
      fatal_error_expected_printable(arg);
    }

    ret = get_token(io, &arg);
    scallcheck(ret, "parser error with I/O");

    if (arg.error == E_EOF) {
      unget_token(io);
    }

    size = strlen(arg.value);

    /*
     * The string might be all quotes, and we have to escape those. If we have
     * n characters and all are quotes, then we need 2n characters for the
     * output buffer, one for the leading blackslash and one for the quote
     * itself. I just allocate 2n rather than do anything complicated.
     */
    temp = ra_malloc((size * DOUBLE) + NULL_TERM); 
    scallcheck(ret, "parser error allocating memory");

    for (i = EMPTY, j = EMPTY; i < size;) {
      if (arg.value[i] == '"') {
	temp[j++] = '\\';
      }

      temp[j++] = arg.value[i++];
    }
    temp[j] = '\0';

    ret = asprintf(&argbuff, "\"%s\"", temp);
    scallcheck(ret >= EMPTY, "parser output formatting error");

    free(temp);
  }

  ret = asprintf(&buff, keyword[key].format, argbuff);
  scallcheck(ret >= EMPTY, "parser output formatting error");

  fprintf(io->output, "%s%s\n", get_indent(indent), buff);
 
  free(token.value);
  free(argbuff);
  free(buff);

  return TRUE;
}


/*
 * handler for the "color" keyword, which must take a string
 */
int parse_color (io_state_t *io, int indent, token_t token, int key,
		 hash_table_t *variable) {
  char *argbuff = NULL, *buff, *temp;
  token_t arg;
  int ret, i, j;
  size_t size;

  ret = get_token(io, &arg);
  scallcheck(ret, "parser error with I/O");
  
  if (strcmp("\"", arg.value) != NO_DIFF) {
    fatal_error_expected_string(arg);
  }

  ret = get_token(io, &arg);
  scallcheck(ret, "parser error with I/O");

  size = strlen(arg.value);

  /*
   * The string might be all quotes, and we have to escape those. For n
   * quotes, we can have up to 2n characters. I just allocate that much
   * rather than do anything complicated.
   */
  temp = ra_malloc((size * DOUBLE) + NULL_TERM); 
  scallcheck(ret, "parser error allocating memory");
  
  for (i = EMPTY, j = EMPTY; i < size;) {
    if (arg.value[i] == '"') {
      temp[j++] = '\\';
    }
    
    temp[j++] = arg.value[i++];
  }
  temp[j] = '\0';

  ret = asprintf(&argbuff, "\"%s\"", temp);
  scallcheck(ret >= EMPTY, "parser output formatting error");

  free(temp);

  ret = asprintf(&buff, keyword[key].format, argbuff);
  scallcheck(ret >= EMPTY, "parser output formatting error");

  fprintf(io->output, "%s%s\n", get_indent(indent), buff);
 
  free(token.value);
  free(argbuff);
  free(buff);

  return TRUE;
}


/*
 * handler for the "write" keyword, which must take a string
 */
int parse_write (io_state_t *io, int indent, token_t token, int key,
		 hash_table_t *variable) {
  char *argbuff = NULL, *buff, *temp;
  token_t arg;
  int ret, i, j;
  size_t size;

  ret = get_token(io, &arg);
  scallcheck(ret, "parser error with I/O");
  
  if (strcmp("\"", arg.value) != NO_DIFF) {
    fatal_error_expected_string(arg);
  }

  ret = get_token(io, &arg);
  scallcheck(ret, "parser error with I/O");

  size = strlen(arg.value);

  /*
   * The string might be all quotes, and we have to escape those. For n
   * quotes, we can have up to 2n characters. I just allocate that much
   * rather than do anything complicated.
   */
  temp = ra_malloc((size * DOUBLE) + NULL_TERM); 
  scallcheck(ret, "parser error allocating memory");
  
  for (i = EMPTY, j = EMPTY; i < size;) {
    if (arg.value[i] == '"') {
      temp[j++] = '\\';
    }
    
    temp[j++] = arg.value[i++];
  }
  temp[j] = '\0';

  ret = asprintf(&argbuff, "\"%s\"", temp);
  scallcheck(ret >= EMPTY, "parser output formatting error");

  free(temp);

  ret = asprintf(&buff, keyword[key].format, argbuff);
  scallcheck(ret >= EMPTY, "parser output formatting error");

  fprintf(io->output, "%s%s\n", get_indent(indent), buff);
 
  free(token.value);
  free(argbuff);
  free(buff);

  return TRUE;
}


/*
 * handler for the "goto" keywords, which always take two arguments
 */
int parse_goto (io_state_t *io, int indent, token_t token, int key,
		hash_table_t *variable) {
  char *buff, *argbuff1, *argbuff2;
  token_t arg1, arg2, errbuff;
  int ret;

  ret = parse_number(io, &arg1, variable);
  /*
   * expecting a value
   */
  if (!ret) {
    ret = get_token(io, &errbuff);
    scallcheck(ret, "parser error with I/O");
    
    fatal_error_expected_identifier(errbuff);
  }

  ret = parse_number(io, &arg2, variable);
  /*
   * expecting a value
   */
  if (!ret) {
    ret = get_token(io, &errbuff);
    scallcheck(ret, "parser error with I/O");

    fatal_error_expected_value(errbuff);
  }

  argbuff1 = format_value(arg1);
  argbuff2 = format_value(arg2);

  ret = asprintf(&buff, keyword[key].format, argbuff1, argbuff2);
  scallcheck(ret >= EMPTY, "parser output formatting error");

  fprintf(io->output, "%s%s\n", get_indent(indent), buff);
  
  free(arg1.value);
  free(arg2.value);
  free(token.value);
  free(argbuff1);
  free(argbuff2);
  free(buff);
  return TRUE;
}


/*
 * handler for the "repeat" keyword, which always takes two arguments, the
 * second of which must be a '['
 */
int parse_repeat (io_state_t *io, int indent, token_t token, int key,
		  hash_table_t *variable) {
  char *buff = NULL, *argbuff;
  token_t arg, errbuff;
  int ret;

  ret = parse_number(io, &arg, variable);
  /*
   * expecting a value
   */
  if (!ret) {
    ret = get_token(io, &errbuff);
    scallcheck(ret, "parser error with I/O");
    
    fatal_error_expected_value(errbuff);
  }

  ret = parse_openbrace(io, TRUE);
  /*
   * expecting a [
   */
  if (!ret) {
    ret = get_token(io, &errbuff);
    scallcheck(ret, "parser error with I/O");
    
    fatal_error_expected_openbrace(errbuff);
  }

  argbuff = format_value(arg);

  ret = asprintf(&buff, keyword[key].format, indent, argbuff);
  scallcheck(ret >= EMPTY, "parser output formatting error");

  fprintf(io->output, "%s%s\n", get_indent(indent), buff);

  parse_subprogram(io, indent + NEXT, variable);

  free(token.value);
  free(arg.value);
  free(argbuff);
  free(buff);
  return TRUE;
}


/*
 * handler for the "if" keyword, which always takes two arguments, the second
 * of which must be a '['
 */
int parse_if (io_state_t *io, int indent, token_t token, int key,
	      hash_table_t *variable) {
  char *buff = NULL, *argbuff;
  token_t arg, errbuff;
  int ret;

  ret = parse_number(io, &arg, variable);
  /*
   * expecting a value
   */
  if (!ret) {
    ret = get_token(io, &errbuff);
    scallcheck(ret, "parser error with I/O");
    
    fatal_error_expected_value(errbuff);
  }

  ret = parse_openbrace(io, TRUE);
  /*
   * expecting a [
   */
  if (!ret) {
    ret = get_token(io, &errbuff);
    scallcheck(ret, "parser error with I/O");
    
    fatal_error_expected_openbrace(errbuff);
  }

  argbuff = format_value(arg);

  ret = asprintf(&buff, keyword[key].format, argbuff);
  scallcheck(ret >= EMPTY, "parser output formatting error");

  fprintf(io->output, "%s%s\n", get_indent(indent), buff);

  parse_subprogram(io, indent + NEXT, variable);

  free(token.value);
  free(arg.value);
  free(argbuff);
  free(buff);
  return TRUE;
}


/*
 * parses a number, if that fails it recurses on parse_identifier
 *
 * arg->value is set to point to heap allocated storage for the string if
 * successful. The results to arg are undefined on failure, but it isn't set to
 * point to anything on the heap in that case.
 */
int parse_number (io_state_t *io, token_t *arg, hash_table_t *variable) {
  static regex_t re_num;
  static int re_num_ready = FALSE;
  
  token_t token;
  int ret, i;

  /*
   * initializes stuff if this is the first time this function is used
   */
  if (!re_num_ready) {
    ret = regcomp(&re_num, PATTERN_NUMBER,
                  REG_EXTENDED | REG_ICASE | REG_NOSUB);
    scallcheck(ret == MATCH, "parser couldn't compile regex.");

    re_num_ready = TRUE;
  }

  /*
   * test for EOF
   */
  parse_eof(io, FALSE);

  ret = get_token(io, &token);
  scallcheck(ret, "parser error with I/O");

  /*
   * tests if the token is a keyword, it is an error for this function to
   * encounter a keyword
   */
  for (i = EMPTY; keyword[i].word != NULL; i++) {
    if (strcmp(token.value, keyword[i].word) == NO_DIFF) {
      fatal_error_expected_value_keyword(token);
    }
  }
      
  /* this is indeed a number */
  if (regexec(&re_num, token.value, NO_NMATCH, NULL, NO_EFLAGS) == MATCH) {
    token.type = T_NUMBER;

    *arg = copy_token(token);
    return TRUE;
  }
  /* this is not a number, recurse on parse_identifier */
  else {
    unget_token(io);
    return parse_identifier(io, arg, variable);
  }
}


/*
 * parses an identifier WITHOUT a check that it is defined
 *
 * arg->value is set to point to heap allocated storage for the string if
 * successful. The results to arg are undefined on failure, but it isn't set to
 * point to anything on the heap in that case.
 */
int parse_identifier_nodefcheck (io_state_t *io, token_t *arg) {
  static regex_t re_ident;
  static int re_ident_ready = FALSE;
  
  token_t token;
  int ret, i;

  /*
   * initializes stuff if this is the first time this function is used
   */
  if (!re_ident_ready) {
    ret = regcomp(&re_ident, PATTERN_IDENTIFIER,
                  REG_EXTENDED | REG_ICASE | REG_NOSUB);
    scallcheck(ret == MATCH, "parser couldn't compile regex.");

    re_ident_ready = TRUE;
  }

  /*
   * test for EOF
   */
  parse_eof(io, FALSE);

  ret = get_token(io, &token);
  scallcheck(ret, "parser error with I/O");

  /*
   * tests if the token is a keyword, it is an error for this function to
   * encounter a keyword
   */
  for (i = EMPTY; keyword[i].word != NULL; i++) {
    if (strcmp(token.value, keyword[i].word) == NO_DIFF) {
      fatal_error_expected_identifier_keyword(token);
    }
  }
      
  /*
   * expecting a value
   */
  if (regexec(&re_ident, token.value, NO_NMATCH, NULL, NO_EFLAGS) == MATCH) {
    token.type = T_IDENTIFIER;

    *arg = copy_token(token);
    return TRUE;
  }
  else {
    unget_token(io);
    return FALSE;
  }
}


/*
 * parses an identifier WITH a check that it is defined
 *
 * arg->value is set to point to heap allocated storage for the string if
 * successful. The results to arg are undefined on failure, but it isn't set to
 * point to anything on the heap in that case.
 */
int parse_identifier (io_state_t *io, token_t *arg, hash_table_t *variable) {
  static regex_t re_ident;
  static int re_ident_ready = FALSE;
  
  token_t token;
  int ret, i;

  if (!re_ident_ready) {
    ret = regcomp(&re_ident, PATTERN_IDENTIFIER,
                  REG_EXTENDED | REG_ICASE | REG_NOSUB);
    scallcheck(ret == MATCH, "parser couldn't compile regex.");

    re_ident_ready = TRUE;
  }

  /*
   * test for EOF
   */
  parse_eof(io, FALSE);

  ret = get_token(io, &token);
  scallcheck(ret, "parser error with I/O");

  /*
   * tests if the token is a keyword, it is an error for this function to
   * encounter a keyword
   */
  for (i = EMPTY; keyword[i].word != NULL; i++) {
    if (strcmp(token.value, keyword[i].word) == NO_DIFF) {
      fatal_error_expected_value_keyword(token);
    }
  }
      
  /*
   * expecting a value
   */
  if (regexec(&re_ident, token.value, NO_NMATCH, NULL, NO_EFLAGS) == MATCH) {
    if (!hash_test(variable, token.value)) {
      fatal_error_notdefined(token);
    }

    token.type = T_IDENTIFIER;

    *arg = copy_token(token);

    return TRUE;
  }
  else {
    unget_token(io);
    return FALSE;
  }
}


/*
 * parses an openbrace token. All this really needs to do is return TRUE
 * or FALSE, or exit with error if the brace is not expected.
 */
int parse_openbrace (io_state_t *io, int openbrace_expected) {
  token_t token;
  int ret;

  /*
   * test of EOF
   */
  parse_eof(io, FALSE);

  ret = get_token(io, &token);
  scallcheck(ret, "parser error with I/O");

  if (strcmp("[", token.value) == NO_DIFF) {
    if (!openbrace_expected) {
      fatal_error_unexpected_openbrace(token);
    }

    return TRUE;
  }
  else {
    unget_token(io);
    return FALSE;
  }
}


/*
 * parses a closebrace token. All this really needs to do is return TRUE
 * or FALSE, or exit with error if the brace is not expected.
 */
int parse_closebrace (io_state_t *io, int closebrace_expected) {
  token_t token;
  int ret;

  ret = get_token(io, &token);
  scallcheck(ret, "parser error with I/O");

  /*
   * a closebrace is not expcted here
   */
  if (strcmp("]", token.value) == NO_DIFF) {
    token.type = T_CLOSEBRACE;

    if (!closebrace_expected) {
      fatal_error_unexpected_closebrace(token);
    }

    return TRUE;
  }
  else {
    unget_token(io);
    return FALSE;
  }
}


/*
 * parses an EOF token. All this really needs to do is return TRUE
 * or FALSE, or exit with error if the brace is not expected.
 *
 * I use this heavily as a kind of "assert(!EOF)" type thing. This is always
 * called on a token before anything else attempts to parse it, which is how
 * I catch EOF's that try to sneak up on me.
 */
int parse_eof (io_state_t *io, int eof_expected) {
  token_t token;
  int ret;

  ret = get_token(io, &token);
  scallcheck(ret, "parser error with I/O");

  /*
   * not expecting an EOF
   */
  if (token.type == T_EOF) {
    if (!eof_expected) {
      fatal_error_unexpected_eof(token);
    }

    return TRUE;
  }
  else {
    unget_token(io);
    return FALSE;
  }
}

