package yarnparser

import (
	"encoding/json"
	"errors"
	"strconv"
)

type Tokenizer struct {
	col  int
	line int
}

func NewTokenizer() *Tokenizer {
	return &Tokenizer{
		col:  0,
		line: 1,
	}
}

func (t *Tokenizer) Tokenize(input []byte) ([]*Token, error) {
	lastNewline := false

	result := make([]*Token, 0)
	for len(input) > 0 {
		chop := 0

		switch {
		case input[0] == '\n':
			result = append(result, t.NewToken(NEWLINE, nil))
			chop++
			t.line++
			t.col = 0
		case input[0] == '#':
			chop++
			val := make([]byte, 0)
			for _, ch := range input[chop:] {
				if ch == '\n' {
					break
				}
				chop++
				val = append(val, ch)
			}
			result = append(result, t.NewToken(COMMENT, string(val)))
		case input[0] == ' ':
			if lastNewline {
				indent := 0
				for _, ch := range input {
					if ch != ' ' {
						break
					}
					indent++
				}

				if indent%2 != 0 {
					return nil, errors.New("invalid number of spaces")
				}
				chop = indent
				result = append(result, t.NewToken(INDENT, indent/2))
			} else {
				chop++
			}
		case input[0] == '"':
			val := make([]byte, 0)
			for i, ch := range input {
				chop++
				val = append(val, ch)
				if i > 0 && ch == '"' {
					isEscaped := chop > 2 && input[chop-1] == '\\' && input[chop-2] != '\\'
					if !isEscaped {
						break
					}
				}
			}
			var pVal string
			err := json.Unmarshal(val, &pVal)
			if err != nil {
				return nil, err
			}

			result = append(result, t.NewToken(STRING, pVal))
		case input[0] == ':':
			chop++
			result = append(result, t.NewToken(COLON, nil))
		case input[0] == ',':
			chop++
			result = append(result, t.NewToken(COMMA, nil))
		case isDigit(input[0]):
			val := make([]byte, 0)
			for _, ch := range input {
				if !isDigit(ch) {
					break
				}
				val = append(val, ch)
				chop++
			}
			num, err := strconv.Atoi(string(val))
			if err != nil {
				return nil, err
			}
			result = append(result, t.NewToken(NUMBER, num))
		case isName(input[0]):
			val := make([]byte, 0)
			for _, ch := range input {
				if ch == ':' || ch == ' ' || ch == '\n' || ch == ',' {
					break
				}
				val = append(val, ch)
				chop++
			}

			result = append(result, t.NewToken(STRING, string(val)))
		case len(input) >= 4 && string(input[:4]) == "true":
			chop += 4
			result = append(result, t.NewToken(BOOLEAN, true))
		case len(input) >= 5 && string(input[:5]) == "false":
			chop += 5
			result = append(result, t.NewToken(BOOLEAN, false))
		default:
			chop++
			result = append(result, t.NewToken(ILLEGAL, nil))
		}

		t.col += chop
		lastNewline = input[0] == '\n'
		input = input[chop:]
	}
	result = append(result, t.NewToken(EOF, nil))
	return result, nil
}

func (t *Tokenizer) NewToken(ttype TokenType, tval TokenValue) *Token {
	return &Token{
		Line: t.line,
		Col:  t.col,
		Type: ttype,
		Val:  tval,
	}
}

func isDigit(ch byte) bool {
	return ch >= '0' && ch <= '9'
}

func isName(ch byte) bool {
	return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '/' || ch == '-'
}
