#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#ifdef USE_NLLIBC
#include <nllibc.h>
#endif

#include "config.h"
#include "lib.h"
#include "objlist.h"
#include "type.h"
#include "syntax.h"
#include "model.h"
#include "lex.h"

static struct word {
  c_type_t type;
  char *word;
  unsigned int flags;
#define WORD_FLAG_NONE     0
#define WORD_FLAG_MARK     (1 << 0)
} words[] = {
  { C_TYPE_COMMENT_IN,   "/*",       WORD_FLAG_MARK },
  { C_TYPE_COMMENT_OUT,  "*/",       WORD_FLAG_MARK },
  { C_TYPE_SQUOAT,       "\'",       WORD_FLAG_MARK },
  { C_TYPE_DQUOAT,       "\"",       WORD_FLAG_MARK },

  { C_TYPE_IF,           "if",       WORD_FLAG_NONE },
  { C_TYPE_ELSE,         "else",     WORD_FLAG_NONE },
  { C_TYPE_FOR,          "for",      WORD_FLAG_NONE },
  { C_TYPE_WHILE,        "while",    WORD_FLAG_NONE },
  { C_TYPE_DO,           "do",       WORD_FLAG_NONE },
  { C_TYPE_SWITCH,       "switch",   WORD_FLAG_NONE },
  { C_TYPE_CASE,         "case",     WORD_FLAG_NONE },
  { C_TYPE_DEFAULT,      "default",  WORD_FLAG_NONE },
  { C_TYPE_GOTO,         "goto",     WORD_FLAG_NONE },
  { C_TYPE_BREAK,        "break",    WORD_FLAG_NONE },
  { C_TYPE_CONTINUE,     "continue", WORD_FLAG_NONE },
  { C_TYPE_RETURN,       "return",   WORD_FLAG_NONE },

  { C_TYPE_EXTERN,       "extern",   WORD_FLAG_NONE },
  { C_TYPE_STATIC,       "static",   WORD_FLAG_NONE },

  { C_TYPE_SIGNED,       "signed",   WORD_FLAG_NONE },
  { C_TYPE_UNSIGNED,     "unsigned", WORD_FLAG_NONE },
  { C_TYPE_SHORT,        "short",    WORD_FLAG_NONE },
  { C_TYPE_LONG,         "long",     WORD_FLAG_NONE },
  { C_TYPE_CHAR,         "char",     WORD_FLAG_NONE },
  { C_TYPE_INT,          "int",      WORD_FLAG_NONE },
  { C_TYPE_FLOAT,        "float",    WORD_FLAG_NONE },
  { C_TYPE_DOUBLE,       "double",   WORD_FLAG_NONE },
  { C_TYPE_VOID,         "void",     WORD_FLAG_NONE },
  { C_TYPE_STRUCT,       "struct",   WORD_FLAG_NONE },
  { C_TYPE_UNION,        "union",    WORD_FLAG_NONE },
  { C_TYPE_ENUM,         "enum",     WORD_FLAG_NONE },
  { C_TYPE_TYPEDEF,      "typedef",  WORD_FLAG_NONE },
  { C_TYPE_CONST,        "const",    WORD_FLAG_NONE },
  { C_TYPE_RESTRICT,     "restrict", WORD_FLAG_NONE },
  { C_TYPE_REGISTER,     "register", WORD_FLAG_NONE },
  { C_TYPE_INLINE,       "inline",   WORD_FLAG_NONE },
  { C_TYPE_VOLATILE,     "volatile", WORD_FLAG_NONE },

  { C_TYPE_SIZEOF,       "sizeof",   WORD_FLAG_NONE },
  { C_TYPE_RSHIFTEQ,     ">>=",      WORD_FLAG_MARK },
  { C_TYPE_LSHIFTEQ,     "<<=",      WORD_FLAG_MARK },
  { C_TYPE_VARARG,       "...",      WORD_FLAG_MARK },

  { C_TYPE_ARROW,        "->",       WORD_FLAG_MARK },
  { C_TYPE_INC,          "++",       WORD_FLAG_MARK },
  { C_TYPE_DEC,          "--",       WORD_FLAG_MARK },
  { C_TYPE_ADDEQ,        "+=",       WORD_FLAG_MARK },
  { C_TYPE_SUBEQ,        "-=",       WORD_FLAG_MARK },
  { C_TYPE_MULEQ,        "*=",       WORD_FLAG_MARK },
  { C_TYPE_DIVEQ,        "/=",       WORD_FLAG_MARK },
  { C_TYPE_MODEQ,        "%=",       WORD_FLAG_MARK },
  { C_TYPE_ANDEQ,        "&=",       WORD_FLAG_MARK },
  { C_TYPE_XOREQ,        "^=",       WORD_FLAG_MARK },
  { C_TYPE_OREQ,         "|=",       WORD_FLAG_MARK },
  { C_TYPE_RSHIFT,       ">>",       WORD_FLAG_MARK },
  { C_TYPE_LSHIFT,       "<<",       WORD_FLAG_MARK },
  { C_TYPE_GTEQ,         ">=",       WORD_FLAG_MARK },
  { C_TYPE_LTEQ,         "<=",       WORD_FLAG_MARK },
  { C_TYPE_EQEQ,         "==",       WORD_FLAG_MARK },
  { C_TYPE_NEQ,          "!=",       WORD_FLAG_MARK },
  { C_TYPE_ANDAND,       "&&",       WORD_FLAG_MARK },
  { C_TYPE_OROR,         "||",       WORD_FLAG_MARK },

  { C_TYPE_ADD,          "+",        WORD_FLAG_MARK },
  { C_TYPE_SUB,          "-",        WORD_FLAG_MARK },
  { C_TYPE_MUL,          "*",        WORD_FLAG_MARK },
  { C_TYPE_DIV,          "/",        WORD_FLAG_MARK },
  { C_TYPE_MOD,          "%",        WORD_FLAG_MARK },
  { C_TYPE_AND,          "&",        WORD_FLAG_MARK },
  { C_TYPE_XOR,          "^",        WORD_FLAG_MARK },
  { C_TYPE_OR,           "|",        WORD_FLAG_MARK },
  { C_TYPE_INV,          "~",        WORD_FLAG_MARK },
  { C_TYPE_GT,           ">",        WORD_FLAG_MARK },
  { C_TYPE_LT,           "<",        WORD_FLAG_MARK },
  { C_TYPE_EQ,           "=",        WORD_FLAG_MARK },
  { C_TYPE_NOT,          "!",        WORD_FLAG_MARK },
  { C_TYPE_DOT,          ".",        WORD_FLAG_MARK },
  { C_TYPE_COMMA,        ",",        WORD_FLAG_MARK },
  { C_TYPE_QUESTION,     "?",        WORD_FLAG_MARK },

  { C_TYPE_COLON,        ":",        WORD_FLAG_MARK },
  { C_TYPE_SEMICOLON,    ";",        WORD_FLAG_MARK },
  { C_TYPE_BRACKET_IN,   "(",        WORD_FLAG_MARK },
  { C_TYPE_BRACKET_OUT,  ")",        WORD_FLAG_MARK },
  { C_TYPE_MBRACKET_IN,  "{",        WORD_FLAG_MARK },
  { C_TYPE_MBRACKET_OUT, "}",        WORD_FLAG_MARK },
  { C_TYPE_BBRACKET_IN,  "[",        WORD_FLAG_MARK },
  { C_TYPE_BBRACKET_OUT, "]",        WORD_FLAG_MARK },

  { C_TYPE_EXT_EXTENSION,   "__extension__", WORD_FLAG_NONE },
  { C_TYPE_EXT_ATTRIBUTE,   "__attribute__", WORD_FLAG_NONE },
  { C_TYPE_EXT_CONST,       "__const"      , WORD_FLAG_NONE },
  { C_TYPE_EXT_RESTRICT,    "__restrict"   , WORD_FLAG_NONE },
  { C_TYPE_EXT_INLINE,      "__inline__"   , WORD_FLAG_NONE },
  { C_TYPE_EXT_INLINE,      "__inline"     , WORD_FLAG_NONE },
  { C_TYPE_EXT_ASM,         "__asm__"      , WORD_FLAG_NONE },
  { C_TYPE_EXT_ASM,         "__asm"        , WORD_FLAG_NONE },
  { C_TYPE_EXT_DEAD2,       "__dead2"      , WORD_FLAG_NONE },
  { C_TYPE_EXT_PURE2,       "__pure2"      , WORD_FLAG_NONE },
  { C_TYPE_EXT_UNUSED,      "__unused"     , WORD_FLAG_NONE },
  { C_TYPE_EXT_USED,        "__used"       , WORD_FLAG_NONE },
  { C_TYPE_EXT_PACKED,      "__packed"     , WORD_FLAG_NONE },
  { C_TYPE_EXT_ALIGNED,     "__aligned"    , WORD_FLAG_NONE },
  { C_TYPE_EXT_ALIGNOF,     "__alignof"    , WORD_FLAG_NONE },
  { C_TYPE_EXT_SECTION,     "__section"    , WORD_FLAG_NONE },
  { C_TYPE_EXT_WEAK_SYMBOL, "__weak_symbol", WORD_FLAG_NONE },

  { C_TYPE_C11_ALIGNAS,       "_Alignas"      , WORD_FLAG_NONE },
  { C_TYPE_C11_ALIGNOF,       "_Alignof"      , WORD_FLAG_NONE },
  { C_TYPE_C11_ATOMIC,        "_Atomic"       , WORD_FLAG_NONE },
  { C_TYPE_C11_NORETURN,      "_Noreturn"     , WORD_FLAG_NONE },
  { C_TYPE_C11_STATIC_ASSERT, "_Static_assert", WORD_FLAG_NONE },
  { C_TYPE_C11_THREAD_LOCAL,  "_Thread_local" , WORD_FLAG_NONE },

  { C_TYPE_CLANG_NONNULL,  "_Nonnull" , WORD_FLAG_NONE },
  { C_TYPE_CLANG_NULLABLE, "_Nullable", WORD_FLAG_NONE },

  { C_TYPE_BUILTIN_EXPECT,         "__builtin_expect"        , WORD_FLAG_NONE },
  { C_TYPE_BUILTIN_PREFETCH,       "__builtin_prefetch"      , WORD_FLAG_NONE },
  { C_TYPE_BUILTIN_CONSTANT_P,     "__builtin_constant_p"    , WORD_FLAG_NONE },
  { C_TYPE_BUILTIN_FRAME_ADDRESS,  "__builtin_frame_address" , WORD_FLAG_NONE },
  { C_TYPE_BUILTIN_RETURN_ADDRESS, "__builtin_return_address", WORD_FLAG_NONE },

  { C_TYPE_NLCC_TYPE_TYPE,         "__nlcc_type_type"      , WORD_FLAG_NONE },
  { C_TYPE_NLCC_TYPE_SYMBOL,       "__nlcc_type_symbol"    , WORD_FLAG_NONE },
  { C_TYPE_NLCC_ATTR_FUNCNOINIT,   "__nlcc_attr_funcnoinit", WORD_FLAG_NONE },

  { C_TYPE_SHARP,        "#",        WORD_FLAG_MARK },

  { C_TYPE_NONE,         NULL,       WORD_FLAG_NONE }
};

/*****************************************************************
 * library
 */

static int is_symbol(char c)
{
  return (isalnum(c) || (c == '_')) ? 1 : 0;
}

static int h2i(int c)
{
  int i;
  if (isdigit(c))      i = c - '0';
  else if (islower(c)) i = c - 'a' + 10;
  else                 i = c - 'A' + 10;
  return i;
}

static int read_char(char *buffer, int size, int *ch)
{
  int len, number, c;

  if (size < 1)
    return -1;

  switch (buffer[0]) {
  case '\\':
    if (size < 2)
      return -1;
    switch (buffer[1]) {
    case 't' : number = '\t'; len = 1; break;
    case 'n' : number = '\n'; len = 1; break;
    case 'v' : number = '\v'; len = 1; break;
    case 'f' : number = '\f'; len = 1; break;
    case 'r' : number = '\r'; len = 1; break;
    case 'a' : number = '\a'; len = 1; break;
    case 'b' : number = '\b'; len = 1; break;
    case '\'': number = '\''; len = 1; break;
    case '\"': number = '\"'; len = 1; break;
    case '\\': number = '\\'; len = 1; break;
    case '0':
      number = 0;
      for (len = 0; 2 + len < size; len++) {
	c = buffer[2 + len];
	if (!isdigit(c) || (c == '8') || (c == '9'))
	  break;
	number = number * 8 + (c - '0');
      }
      len++;
      break;
    case 'x':
      number = 0;
      for (len = 0; 2 + len < size; len++) {
	c = buffer[2 + len];
	if (!isxdigit(c))
	  break;
	number = number * 16 + h2i(c);
      }
      len++;
      break;
    default: number = buffer[1]; len = 1; break;
    }
    len++;
    break;
  default: number = buffer[0]; len = 1; break;
  }

  *ch = number;

  return len;
}

static int read_number(char *buffer, int size, long *n,
		       c_type_t *s, c_type_t *t)
{
  int len, c, period = 0;
  long number = 0;
  c_type_t sign = C_TYPE_SIGNED;
  c_type_t type = C_TYPE_INT;

  if (size < 1)
    return -1;

  if (buffer[0] == '0') {
    if (size < 2)
      return -1;
    if ((buffer[1] == 'x') || (buffer[1] == 'X')) {
      for (len = 2; len < size; len++) {
	c = buffer[len];
	if (c == '.')
	  period = 1;
	else if (!isxdigit(c))
	  break;
	if (!period)
	  number = number * 16 + h2i(c);
      }
    } else {
      for (len = 1; len < size; len++) {
	c = buffer[len];
	if (c == '.')
	  period = 1;
	else if (!isdigit(c) || (c == '8') || (c == '9'))
	  break;
	if (!period)
	  number = number * 8 + (c - '0');
      }
    }
  } else {
    for (len = 0; len < size; len++) {
      c = buffer[len];
      if (c == '.')
	period = 1;
      else if (!isdigit(c))
	break;
      if (!period)
	number = number * 10 + (c - '0');
    }
  }

  if (period)
    type = C_TYPE_LONGLONG;

  for (; len < size; len++) {
    c = buffer[len];
    switch (c) {
    case 'u':
    case 'U':
      sign = C_TYPE_UNSIGNED;
      continue;
    case 'l':
    case 'L':
      if (type == C_TYPE_LONG)
	type = C_TYPE_LONGLONG;
      else
	type = C_TYPE_LONG;
      continue;
    default:
      break;
    }
    break;
  }

  *n = number;
  if (s != NULL) *s = sign;
  if (t != NULL) *t = type;

  return len;
}

static int clear_buffer(char *buffer, int size)
{
  int i, line = 0;
  for (i = 0; i < size; i++) {
    if (buffer[i] == '\n')
      line++;
    buffer[i] = ' ';
  }
  return line;
}

objlist_t lex_list_read(lex_read_func_t lex_read_func)
{
  objlist_t list;
  int size = 0, len, r, i, comment = 0, c, line = 1;
  long number = 0;
  struct word *w;
  c_type_t sign, type;
  model_t model;

  /*
   * To reduce the stack initialization size,
   * buffer is put after variables with initialization values.
   */
  char buffer[256];
  char string[1024];

  list = objlist_create(NULL);

  while (1) {
    do {
      for (len = 0; len < size; len++) {
	if (!isspace(buffer[len]))
	  break;
      }
      if (len > 0) {
	line += clear_buffer(buffer, len);
	memmove(buffer, buffer + len, size - len);
	size -= len;
      }

      saveline(line);

      if (size >= sizeof(buffer))
	ERREXIT(1);
      r = lex_read_func(buffer + size, sizeof(buffer) - size);
      if (r > 0)
	size += r;
    } while ((size > 0) && isspace(buffer[0]));

    if (size == 0)
      break;

    for (w = words; w->word; w++) {
      if (comment) {
	if (w->type != C_TYPE_COMMENT_OUT)
	  continue;
      }

      len = strlen(w->word);
      if (size < len)
	continue;
      if (strncmp(w->word, buffer, len))
	continue;

      if (!(w->flags & WORD_FLAG_MARK) && is_symbol(buffer[len]))
	continue;

      break;
    }

    if (w->word != NULL) { /* found */
      if (w->type == C_TYPE_SHARP) { /* # <line number> <file name> args... */
	i = 0;
	for (len = 0; 1 + len < size; len++) {
	  if (buffer[1 + len] == '\n')
	    break;
	  if (isspace(buffer[1 + len]))
	    continue;
	  switch (i) {
	  case 0:
	    if (!isdigit(buffer[1 + len]))
	      break;
	    r = read_number(buffer + (1 + len), size - (1 + len),
			    &number, NULL, NULL);
	    if (r <= 0)
	      ERREXIT(1);
	    line = number - 1;
	    len += (r - 1);
	    i++;
	    break;
	  case 1:
	  default:
	    break;
	  }
	}
	if (1 + len == size)
	  ERREXIT(1);
	line += clear_buffer(buffer, 1 + len + 1);
	continue;
      }
      if (w->type == C_TYPE_COMMENT_IN) {
	comment = 1;
      }
      if (w->type == C_TYPE_COMMENT_OUT) {
	comment = 0;
      }
      if (w->type == C_TYPE_SQUOAT) {
	len = read_char(buffer + 1, size - 1, &c);
	if (len <= 0)
	  ERREXIT(1);
	if (buffer[1 + len] != w->word[0])
	  ERREXIT(1);
	model = model_create(C_TYPE_INT);
	syntax_insert_tail(list, C_TYPE_NUMBER, line, model, c, NULL);
	line += clear_buffer(buffer, 1 + len + 1);
	continue;
      }
      if (w->type == C_TYPE_DQUOAT) {
	for (i = 0, len = 0; 1 + len < size;) {
	  if (buffer[1 + len] == w->word[0])
	    break;
	  r = read_char(buffer + 1 + len, size - (1 + len), &c);
	  if (r < 0)
	    ERREXIT(1);
	  if (i + 1 >= sizeof(string))
	    ERREXIT(1);
	  string[i++] = c;
	  len += r;
	}
	if (1 + len == size)
	  ERREXIT(1);
	string[i] = '\0';
	model = model_create(C_TYPE_CHAR);
	syntax_insert_tail(list, C_TYPE_STRING, line, model, i, string);
	line += clear_buffer(buffer, 1 + len + 1);
	continue;
      }

      syntax_insert_tail(list, w->type, line, NULL, 0, NULL);

      line += clear_buffer(buffer, len);
      continue;
    }

    /* not found */

    if (comment) {
      line += clear_buffer(buffer, 1);
      continue;
    }

    if (isdigit(buffer[0])) {
      len = read_number(buffer, size, &number, &sign, &type);
      if (len <= 0)
	ERREXIT(1);
      model = model_create(type);
      if (sign == C_TYPE_UNSIGNED)
	model->flags |= MODEL_FLAG_UNSIGNED;
      syntax_insert_tail(list, C_TYPE_NUMBER, line, model, number, NULL);
      line += clear_buffer(buffer, len);
      continue;
    } else { /* symbol */
      for (len = 0; len < size; len++) {
	if (!is_symbol(buffer[len]))
	  break;
      }

      if (len > 0) {
	if (len + 1 >= sizeof(string))
	  ERREXIT(1);
	memcpy(string, buffer, len);
	string[len] = '\0';
	syntax_insert_tail(list, C_TYPE_WORD, line, NULL, 0, string);
	line += clear_buffer(buffer, len);
	continue;
      }
    }

    ERREXIT(1);
  }

  return list;
}

void lex_list_print(objlist_t list)
{
  objentry_t entry;
  syntax_t syntax;
  int line = 0;

  for (entry = objlist_get_head(list);
       !objlist_is_end(list, entry);
       entry = objentry_get_next(entry)) {
    syntax = objentry_get_syntax(entry);

    switch (syntax->type) {
    case C_TYPE_SYMBOL:
      switch (syntax->obj.symbol->type) {
      case C_TYPE_LABEL:
	switch (syntax->obj.symbol->value->type) {
	case C_TYPE_STRING:
	  printf("%s", syntax->obj.symbol->value->obj.string.s); break;
	default: break;
	}
	break;
      case C_TYPE_VARIABLE:
	switch (syntax->obj.symbol->value->type) {
	case C_TYPE_NUMBER:
	  printf("%ld", syntax->obj.symbol->value->obj.number); break;
	default: break;
	}
	break;
      default: ERREXIT(1);
      }
      break;
    default: printf("%s", type_get_word(syntax->type)); break;
    }
    if (syntax->line != line) {
      printf("\n");
      line = syntax->line;
    } else {
      printf(" ");
    }
  }
}
