package backend

import (
	"os"
	"regexp"
	"strings"
	"sync"

	"github.com/jbrukh/bayesian"
)

var (
	wordsRegexp *regexp.Regexp
)

const (
	classGood bayesian.Class = "Good"
	classBad  bayesian.Class = "Bad"
	maxWords                 = 50
)

type Classifier interface {
	LearnGood(data string)
	LearnBad(data string)
	GetBadProbability(data string) float64
	Persist() error
}

type classifierImpl struct {
	bayes    *bayesian.Classifier
	savePath string
	mutex    *sync.RWMutex
}

func NewClassifier(path string) (Classifier, error) {
	var bayes *bayesian.Classifier
	if _, err := os.Stat(path); err == nil {
		bayes, err = bayesian.NewClassifierFromFile(path)
		if err != nil {
			return nil, err
		}
	} else {
		bayes = bayesian.NewClassifier(classGood, classBad)
	}
	return &classifierImpl{
		bayes:    bayes,
		savePath: path,
		mutex:    &sync.RWMutex{},
	}, nil
}

func init() {
	wordsRegexp = regexp.MustCompile(`([\p{L}\d@_$]+)`)
}

func (c *classifierImpl) LearnGood(data string) {
	c.learn(data, classGood)
}

func (c *classifierImpl) LearnBad(data string) {
	c.learn(data, classBad)
}

func (c *classifierImpl) learn(data string, class bayesian.Class) {
	words := getWords(data)
	if words != nil {
		c.mutex.Lock()
		c.bayes.Learn(words, class)
		c.mutex.Unlock()
	}
}

func (c *classifierImpl) GetBadProbability(data string) float64 {
	words := getWords(data)
	if words == nil {
		return 0.0
	}
	if len(words) > maxWords {
		words = words[:maxWords]
	}
	c.mutex.RLock()
	scores, _, _ := c.bayes.ProbScores(words)
	c.mutex.RUnlock()
	return scores[1]
}

func (c *classifierImpl) Persist() error {
	c.mutex.RLock()
	defer c.mutex.RUnlock()
	return c.bayes.WriteToFile(c.savePath)
}

func getWords(data string) []string {
	matches := wordsRegexp.FindAllStringSubmatch(data, -1)
	if matches != nil {
		words := make([]string, 0, len(matches))
		for _, v := range matches {
			words = append(words, strings.ToLower(v[1]))
		}
		return words
	}
	return nil
}
