package main

import (
	"bytes"
	"fmt"
	"strings"
	"sync"
	"time"

	logger "a.yandex-team.ru/direct/infra/go-libs/pkg/logformat"
)

// START CHECK CheckLoadedStat
type ResponseLoadedStat struct {
	Data ClickLoadedStats `json:"data"`
	Rows int              `json:"rows"`
}

type ClickLoadedStat struct {
	Hostname string `json:"hostname"`
	Ident    string `json:"ident"`
	LogType  string `json:"log_type"`
	Counts   int    `json:"counts"`
}

func (l ClickLoadedStat) topic() string {
	return fmt.Sprintf("%s--%s", l.Ident, l.LogType)
}

type ClickLoadedStats []ClickLoadedStat

func (ls ClickLoadedStats) find(val2 ClickLoadedStat) (bool, ClickLoadedStat) {
	for _, val1 := range ls {
		if val1.Hostname == val2.Hostname &&
			val1.Ident == val2.Ident &&
			val1.LogType == val2.LogType {
			return true, val1
		}
	}
	return false, ClickLoadedStat{}
}

type LoadedStat struct {
	fileStat       ClickLoadedStat //lines in file
	logshatterStat ClickLoadedStat //lines in CH
	percentLoaded  float64
	code           int
	startCheckTime time.Time
	endCheckTime   time.Time
	err            error
}

type LoadedStats []LoadedStat

func (stats LoadedStats) LogFormat(verbose bool) string {
	result := bytes.NewBufferString("")
	const GREEN = "\033[92mOK\033[0m"
	const RED = "\033[91mFAIL\033[0m"
	for _, stat := range stats {
		color := GREEN
		fstat := stat.fileStat
		lstat := stat.logshatterStat
		if (stat.code == 0) && (!verbose) {
			continue
		}
		if stat.code > 0 {
			color = RED
		}
		msg := fmt.Sprintf("%s [CheckLoadedStat] %s", color, stat.err)
		if stat.err == nil {
			msg = fmt.Sprintf("%s [CheckLoadedStat] %s %s (start'%s' end'%s') lines loaded_logshatter=%d file_count=%d percent_loaded=%f status_code=%d\n",
				color, fstat.Hostname, fstat.topic(), stat.startCheckTime.Format("2006-01-02 15:04:05"),
				stat.endCheckTime.Format("2006-01-02 15:04:05"), lstat.Counts, fstat.Counts, stat.percentLoaded, stat.code)
		}
		result.WriteString(msg)
	}
	return result.String()
}

func foundInList(value string, array []string) bool {
	for _, v := range array {
		if strings.Contains(value, v) {
			return true
		}
	}
	return false
}

func (stats LoadedStats) MonrunFormat() (int, string) {
	warn := bytes.NewBufferString("")
	var dublicateMsg []string
	for _, stat := range stats {
		var msg string
		fstat := stat.fileStat
		if stat.code == 0 {
			continue
		}
		if foundInList(fstat.LogType, dublicateMsg) {
			continue
		}
		if stat.err == nil {
			msg = fmt.Sprintf("%s(%f);", fstat.LogType, stat.percentLoaded)
		} else {
			msg = fmt.Sprintf("%s;", strings.Trim(stat.err.Error(), "\n"))
		}
		dublicateMsg = append(dublicateMsg, fstat.LogType)
		warn.WriteString(msg)
	}
	if warn.Len() != 0 {
		msg := fmt.Sprintf("[CheckLoadedStat] %s", warn.String())
		return 1, msg
	}
	msg := "[CheckLoadedStat] OK;"
	return 0, msg
}

func (ts *Tasks) CheckLoadedStat(ch ClickHouse, params interface{}) Status {
	var responseFileStat, responseLogshStat ClickLoadedStats
	var result LoadedStats
	var tmp ResponseLoadedStat

	nowTime := time.Now()
	startCheckTime := nowTime.Add(-180 * time.Minute)
	endCheckTime := nowTime.Add(-150 * time.Minute)

	requestCountTables := map[string]string{
		"direct-common-data-log": "SELECT host AS hostname, 'direct' AS ident," +
			" 'direct-common-data-log' AS log_type, toInt32(count(*)) AS counts" +
			" FROM campaign_balance where (log_date=toDate(%[1]d) OR log_date=toDate(%[2]d)) AND" +
			" toDateTime(%[1]d)<=log_time AND log_time<toDateTime(%[2]d)" +
			" GROUP BY hostname, ident, log_type" +
			" ORDER BY hostname FORMAT JSON",
		"direct-bsexport-log": "SELECT extract(source,'file:(.*)/v') AS hostname, 'direct' AS ident," +
			" 'direct-bsexport-log' AS log_type, toInt32(count(*)) AS counts" +
			" FROM bsexport_data where (log_date=toDate(%[1]d) OR log_date=toDate(%[2]d)) AND" +
			" toDateTime(%[1]d)<=log_time AND log_time<toDateTime(%[2]d)" +
			" AND data_type = 'response' GROUP BY hostname, ident, log_type" +
			" ORDER BY hostname FORMAT JSON",
		"direct-dbshards-ids-log": "SELECT host AS hostname, 'direct' AS ident," +
			" 'direct-dbshards-ids-log' AS log_type, toInt32(count(*)) AS counts" +
			" FROM dbshards_ids WHERE (log_date=toDate(%[1]d) OR log_date=toDate(%[2]d)) AND" +
			" toDateTime(%[1]d)<=log_time AND log_time<toDateTime(%[2]d)" +
			" GROUP BY hostname, ident, log_type" +
			" ORDER BY hostname FORMAT JSON",
		"direct-mediaplan-log": "SELECT host AS hostname, 'direct' AS ident," +
			" 'direct-mediaplan-log' AS log_type, toInt32(count(*)) AS counts" +
			" FROM mediaplan WHERE (log_date=toDate(%[1]d) OR log_date=toDate(%[2]d)) AND" +
			" toDateTime(%[1]d)<=log_time AND log_time<toDateTime(%[2]d)" +
			" GROUP BY hostname, ident, log_type" +
			" ORDER BY hostname FORMAT JSON",
		"direct-messages-log": "SELECT host AS hostname, 'direct' AS ident," +
			" 'direct-messages-log' AS log_type, toInt32(count(*)) AS counts" +
			" FROM messages WHERE (log_date=toDate(%[1]d) OR log_date=toDate(%[2]d)) AND" +
			" toDateTime(%[1]d)<=log_time AND log_time<toDateTime(%[2]d)" +
			" GROUP BY hostname, ident, log_type" +
			" ORDER BY hostname FORMAT JSON",
		"direct-moderate-log": "SELECT extract(source,'file:(.*)/v') AS hostname, 'direct' AS ident," +
			" 'direct-moderate-log' AS log_type, toInt32(count(*)) AS counts " +
			" FROM moderate WHERE (log_date=toDate(%[1]d) OR log_date=toDate(%[2]d)) AND" +
			" toDateTime(%[1]d)<=log_time AND log_time<toDateTime(%[2]d-20)" + //логи заливаются с задержкой
			" GROUP BY hostname, ident, log_type" +
			" ORDER BY hostname FORMAT JSON",
		"direct-ppclog-api-log": "SELECT host AS hostname, 'direct' AS ident," +
			" 'direct-ppclog-api-log' AS log_type, toInt32(count(*)) AS counts" +
			" FROM ppclog_api WHERE (log_date=toDate(%[1]d) OR log_date=toDate(%[2]d)) AND" +
			" toDateTime(%[1]d)<=log_time AND log_time<toDateTime(%[2]d)" +
			" GROUP BY hostname, ident, log_type" +
			" ORDER BY hostname FORMAT JSON",
		"direct-ppclog-cmd-log": "SELECT host AS hostname, 'direct' AS ident," +
			" 'direct-ppclog-cmd-log' AS log_type, toInt32(count(*)) AS counts" +
			" FROM ppclog_cmd WHERE (log_date=toDate(%[1]d) OR log_date=toDate(%[2]d)) AND" +
			" toDateTime(%[1]d)<=log_time AND log_time<toDateTime(%[2]d)" +
			" GROUP BY hostname, ident, log_type" +
			" ORDER BY hostname FORMAT JSON",
		"direct-ppclog-price-log": "SELECT host AS hostname, 'direct' AS ident," +
			" 'direct-ppclog-price-log' AS log_type, toInt32(count(*)) AS counts" +
			" FROM ppclog_price WHERE (log_date=toDate(%[1]d) OR log_date=toDate(%[2]d)) AND" +
			" toDateTime(%[1]d)<=log_time AND log_time<toDateTime(%[2]d)" +
			" GROUP BY hostname, ident, log_type" +
			" ORDER BY hostname FORMAT JSON",
		"nginx-access-log": "SELECT hostname, 'direct' AS ident," +
			" 'nginx-access-log' AS log_type, toInt32(count(*)) AS counts" +
			" FROM nginx_access WHERE (log_date=toDate(%[1]d) OR log_date=toDate(%[2]d)) AND" +
			" toDateTime(%[1]d)<=log_time AND log_time<toDateTime(%[2]d)" +
			" GROUP BY hostname, ident, log_type" +
			" ORDER BY hostname FORMAT JSON",
		"direct-trace-log": "SELECT host AS hostname, 'direct' AS ident," +
			" 'direct-trace-log' AS log_type, toInt32(count(*)) AS counts" +
			" FROM trace WHERE (log_date=toDate(%[1]d) OR log_date=toDate(%[2]d)) AND" +
			" toDateTime(%[1]d)<=log_time AND log_time<toDateTime(%[2]d)" +
			" GROUP BY hostname, ident, log_type" +
			" ORDER BY hostname FORMAT JSON",
		"direct-balance-log": "SELECT host AS hostname, 'direct' AS ident," +
			" 'direct-balance-log' AS log_type, toInt32(count(*)) AS counts" +
			" FROM balance WHERE (log_date=toDate(%[1]d) OR log_date=toDate(%[2]d)) AND" +
			" toDateTime(%[1]d)<=log_time AND log_time<toDateTime(%[2]d)" +
			" GROUP BY hostname, ident, log_type" +
			" ORDER BY hostname FORMAT JSON",
		"direct-java-threaddump-log": "SELECT host AS hostname, 'direct' AS ident," +
			" 'direct-java-threaddump-log' AS log_type, toInt32(count(*)) AS counts" +
			" FROM java_threaddump WHERE (log_date=toDate(%[1]d) OR log_date=toDate(%[2]d)) AND" +
			" toDateTime(%[1]d)<=log_time AND log_time<toDateTime(%[2]d)" +
			" GROUP BY hostname, ident, log_type" +
			" ORDER BY hostname FORMAT JSON",
	}

	requestPushlogs := "SELECT  hostname, ident, log_type, toInt32(sum(count)) AS counts FROM (" +
		" SELECT hostname, name as path, inode, ident, log_type, toInt32(max(count_lines) - min(count_lines)) AS count" +
		" FROM monitor.pushlogs WHERE (log_date=toDate(%[1]d) OR log_date=toDate(%[2]d)) AND" +
		" toDateTime(%[1]d)<=log_time and log_time<toDateTime(%[2]d)" +
		" GROUP BY hostname, name, inode, ident, log_type ORDER BY hostname)" +
		" GROUP BY hostname, ident, log_type FORMAT JSON"

	pquery := fmt.Sprintf(requestPushlogs, startCheckTime.Unix(), endCheckTime.Unix())
	logger.Debug(pquery)
	if err := ch.executeJSON(pquery, &tmp); err != nil {
		stat := LoadedStat{err: err, code: 1}
		result = append(result, stat)
	} else {
		responseFileStat = append(responseFileStat, tmp.Data...)
	}

	var done sync.WaitGroup
	threadsNum := make(chan int, 6)
	for _, templateQuery := range requestCountTables {
		var tmp2 ResponseLoadedStat
		request := templateQuery
		threadsNum <- 1
		done.Add(1)
		go func(string, ClickLoadedStats, *sync.WaitGroup, chan int) {
			defer done.Done()
			defer func() {
				<-threadsNum
			}()
			cquery := fmt.Sprintf(request, startCheckTime.Unix(), endCheckTime.Unix())
			logger.Debug(cquery)
			if err := ch.executeJSON(cquery, &tmp2); err != nil {
				stat := LoadedStat{err: err, code: 1}
				result = append(result, stat)
				return
			}
			responseLogshStat = append(responseLogshStat, tmp2.Data...)
		}(request, responseLogshStat, &done, threadsNum)
	}
	done.Wait()

	var ok bool
	var fstat, lstat ClickLoadedStat
	var percLoaded float64
	cnf := params.(map[interface{}]interface{})
	for _, fstat = range responseFileStat {
		code := 0
		perc := 100.0
		kf := 1.0 //коэфициент пропорции при малом количестве логов(меньше 100)

		if p, ok := cnf[fstat.LogType]; ok {
			perc = p.(float64)
		}
		if perc == 0 {
			continue //ignored perc = 0
		}
		if ok, lstat = responseLogshStat.find(fstat); !ok {
			lstat = ClickLoadedStat{Hostname: fstat.Hostname, LogType: fstat.LogType,
				Ident: fstat.Ident, Counts: 0}
		}
		// если отличие в логах больше 98%, то считаем crit.
		if lstat.Counts < 100 {
			kf = 1.65
		}
		if percLoaded = (float64(lstat.Counts) / float64(fstat.Counts)) * 100.0 * kf; (lstat.Counts < fstat.Counts) && (percLoaded < perc) {
			code = 1
		}
		stat := LoadedStat{fstat, lstat, percLoaded, code, startCheckTime, endCheckTime, nil}
		result = append(result, stat)
	}
	return result
}
