package main

import (
	"encoding/json"
	"fmt"
	"log"
	"net"
	"strconv"
	"sync"
	"sync/atomic"
	"time"

	"a.yandex-team.ru/solomon/tools/discovery/internal/config"
	"a.yandex-team.ru/solomon/tools/discovery/internal/holder"
	"a.yandex-team.ru/solomon/tools/discovery/internal/metrics"
	"a.yandex-team.ru/solomon/tools/discovery/internal/unroller"

	"a.yandex-team.ru/solomon/tools/discovery/proto"
)

// ==========================================================================================

type SmallBucket struct {
	maxSize int
	delta   time.Duration
	data0   map[interface{}]time.Time
	data1   map[interface{}]time.Time
}

func NewSmallBucket(maxSize int, maxAge time.Duration) *SmallBucket {
	s := &SmallBucket{
		maxSize: maxSize,
		delta:   maxAge,
		data0:   make(map[interface{}]time.Time, maxSize),
		data1:   make(map[interface{}]time.Time),
	}
	return s
}

func (s *SmallBucket) Has(v interface{}) bool {
	ts := time.Now()

	if t, ok := s.data0[v]; ok {
		s.data0[v] = ts
		return ts.Sub(t) < s.delta
	} else if t, ok := s.data1[v]; ok {
		s.data0[v] = ts
		return ts.Sub(t) < s.delta
	} else {
		s.data0[v] = ts
	}
	if len(s.data0) >= s.maxSize {
		s.data0, s.data1 = make(map[interface{}]time.Time, s.maxSize), s.data0
	}
	return false
}

// ==========================================================================================

type ForceReqKey struct {
	EnvType     string
	ServiceName string
	Hash        uint64
}

type DataKey struct {
	EnvType     string
	ServiceName string
}

type DataStore struct {
	Hash                 uint64                     // hash of discovery data
	HolderMap            map[DataKey]*holder.Holder // discovery data holders
	Structure            map[string][]string        // map from env to services list
	DNS                  map[string]net.IP          // map from hosts to ip for DNS
	UpdatesFailedUnsafe  int64                      // counter for updates failed due to safety
	UpdatesForcedBadHash int64                      // counter for force updates, resulted in different hash
}

// ==========================================================================================

type Processor struct {
	LogPrefix             string
	DataSaveDir           string
	DataUpdateInterval    time.Duration
	SafetyParams          holder.SafetyParams
	VerboseLevel          int
	unroller              *unroller.Unroller
	stopChan              chan struct{}
	mutex                 sync.RWMutex
	cronWg                sync.WaitGroup
	onUpdateMutex         sync.Mutex
	onUpdateCallbacks     []func([]*pbData.DataRef)
	forcedMutex           sync.Mutex
	forcedRequests        *SmallBucket
	forcedSource          *SmallBucket
	dataSourceMap         atomic.Value // map[DataKey]*holder.Source
	dataStore             atomic.Value // *DataStore
	metrics               atomic.Value // *Metrics
	metricsUpdateInterval time.Duration
	startTime             time.Time
}

func NewProcessor(c *config.MainConfig, unroller *unroller.Unroller) (*Processor, error) {
	p := &Processor{
		LogPrefix:          "[processor] ",
		DataSaveDir:        c.DataSaveDir,
		DataUpdateInterval: c.DataUpdateInterval.Duration,
		SafetyParams: holder.SafetyParams{
			SafeChangeUpFraction:   c.SafeChangeUpFraction,
			SafeChangeDownFraction: c.SafeChangeDownFraction,
			SafeChangeUpCount:      c.SafeChangeUpCount,
			SafeChangeDownCount:    c.SafeChangeDownCount,
			GrowthIsAlwaysSafe:     c.GrowthIsAlwaysSafe,
			// XXX Do we need EmptyGroupIsOk=true at start?
			// Probably yes, otherwise empty group could prevent service to start
			//
			EmptyGroupIsOk:   c.EmptyGroupIsOk,
			BestEffortUnroll: c.BestEffortUnroll,
		},
		VerboseLevel:      c.VerboseLevel,
		unroller:          unroller,
		stopChan:          make(chan struct{}),
		onUpdateCallbacks: []func([]*pbData.DataRef){},
		// Check and filter out these many force requests within this period
		forcedRequests: NewSmallBucket(100, 60*time.Second),
		// Check and filter out these many sources of force requests within this period
		forcedSource:          NewSmallBucket(100, 10*time.Second),
		metricsUpdateInterval: 5 * time.Second,
		startTime:             time.Now(),
	}
	ds := make(map[DataKey]*holder.Source)
	for env, serviceMap := range c.ServiceData.Services {
		for service, sourceData := range serviceMap {
			key := DataKey{EnvType: env, ServiceName: service}
			ports, ok := c.ServiceData.Ports[sourceData.Ports]
			if !ok {
				return nil, fmt.Errorf("processor prepare failed, unknown ports=%s for key=%v", sourceData.Ports, key)
			}
			portsInv := map[string]string{}
			for k, v := range ports {
				portsInv[strconv.Itoa(v)] = k
			}
			ds[key] = &holder.Source{
				Ports:     ports,
				PortsInv:  portsInv,
				Endpoints: sourceData.Endpoints,
			}
		}
	}
	p.dataSourceMap.Store(ds)
	p.dataStore.Store(&DataStore{})
	p.metrics.Store(metrics.NewMetrics())

	if err := p.prepare(); err != nil {
		return nil, fmt.Errorf("processor prepare failed, %v", err)
	}

	p.cronWg.Add(1)
	go p.cron()

	p.log(0, nil, "started")
	return p, nil
}

func (p *Processor) log(lvl int, ts *time.Time, format string, v ...interface{}) {
	if p.VerboseLevel >= lvl {
		tsStr := ""
		if ts != nil {
			tsStr = ", " + time.Since(*ts).String()
		}
		log.Printf(p.LogPrefix+format+tsStr, v...)
	}
}

// ==========================================================================================

func (p *Processor) cron() {
	defer p.cronWg.Done()

	tickerUpdate := time.NewTicker(p.DataUpdateInterval)
	defer tickerUpdate.Stop()
	tickerMetrics := time.NewTicker(p.metricsUpdateInterval)
	defer tickerMetrics.Stop()

	safetyCheck := true
	for {
		select {
		case <-tickerUpdate.C:
			if err := p.updateDataStoreSafe(nil, safetyCheck); err != nil {
				p.log(0, nil, "%v", err)
			}
		case <-tickerMetrics.C:
			initTime := time.Now()
			p.updateMetricsSafe()
			p.log(1, &initTime, "metrics updated")
		case <-p.stopChan:
			p.log(1, nil, "exiting cron task")
			return
		}
	}
}

func (p *Processor) prepare() error {
	safetyCheck := false
	if err := p.updateDataStoreSafe(nil, safetyCheck); err != nil {
		return err
	}
	p.updateMetricsSafe()
	p.log(1, nil, "data store and metrics updated")

	return nil
}

// ==========================================================================================

func (p *Processor) Shutdown() {
	p.log(1, nil, "begin shutdown")
	defer p.log(1, nil, "stopped")

	p.mutex.RLock()
	defer p.mutex.RUnlock()

	close(p.stopChan)

	p.cronWg.Wait()
}

func (p *Processor) Save() error {
	p.mutex.RLock()
	defer p.mutex.RUnlock()

	hm := p.dataStore.Load().(*DataStore).HolderMap
	for _, h := range hm {
		if err := h.Save(); err != nil {
			return err
		}
	}
	return nil
}

func (p *Processor) AddOnUpdateCallback(cb func([]*pbData.DataRef)) {
	// new callback should know about current data
	hm := p.dataStore.Load().(*DataStore).HolderMap
	keys := make([]*pbData.DataRef, 0, len(hm))
	for key, h := range hm {
		currHash, prevHash := h.GetHashes()
		keys = append(keys, &pbData.DataRef{
			EnvType:     key.EnvType,
			ServiceName: key.ServiceName,
			Hash:        currHash,
			PrevHash:    prevHash,
		})
	}

	p.log(1, nil, "ready to add onUpdate callback")
	p.onUpdateMutex.Lock()
	p.onUpdateCallbacks = append(p.onUpdateCallbacks, cb)
	go func() {
		cb(keys)
		p.log(1, nil, "onUpdate callback added")
		p.onUpdateMutex.Unlock()
	}()
}

// ==========================================================================================

func (p *Processor) GetDNSMap() map[string]net.IP {
	return p.dataStore.Load().(*DataStore).DNS
}

func (p *Processor) GetDataStore() *DataStore {
	return p.dataStore.Load().(*DataStore)
}

func (p *Processor) GetDataStruct() ([]byte, error) {
	d, err := json.MarshalIndent(p.dataStore.Load().(*DataStore).Structure, "", "    ")
	if err != nil {
		p.log(0, nil, "failed to marshal full data structure %v", err)
		return nil, fmt.Errorf("failed to create full data structure")
	}
	return d, nil
}

func (p *Processor) GetServicesByEnv(env string) ([]byte, error) {
	ss, ok := p.dataStore.Load().(*DataStore).Structure[env]
	if !ok {
		return nil, fmt.Errorf("unknown environment")
	}
	d, err := json.MarshalIndent(ss, "", "    ")
	if err != nil {
		p.log(0, nil, "failed to marshal service list for %s, %v", env, err)
		return nil, fmt.Errorf("failed to create service list for %s", env)
	}
	return d, nil
}

func (p *Processor) GetEnvsByService(service string) ([]byte, error) {
	envList := []string{}
	data := p.dataStore.Load().(*DataStore).Structure
	for e, ss := range data {
		for _, s := range ss {
			if service == s {
				envList = append(envList, e)
				break
			}
		}
	}
	if len(envList) == 0 {
		return nil, fmt.Errorf("unknown service")
	}
	d, err := json.MarshalIndent(envList, "", "    ")
	if err != nil {
		p.log(0, nil, "failed to marshal env list for %s, %v", service, err)
		return nil, fmt.Errorf("failed to create env list for %s", service)
	}
	return d, nil
}

func (p *Processor) GetDiscoveryData(env, service, dc, port string) ([]byte, error) {
	h, ok := p.dataStore.Load().(*DataStore).HolderMap[DataKey{EnvType: env, ServiceName: service}]
	if !ok {
		return nil, fmt.Errorf("unknown environment + service")
	}

	if _, db, err := h.GetBytes(dc, port); err != nil {
		return nil, err
	} else {
		return db, nil
	}
}

func (p *Processor) GetHostsList(env, service, dc string) ([]byte, error) {
	h, ok := p.dataStore.Load().(*DataStore).HolderMap[DataKey{EnvType: env, ServiceName: service}]
	if !ok {
		return nil, fmt.Errorf("unknown environment + service")
	}

	if hl, _, err := h.GetBytes(dc, ""); err != nil {
		return nil, err
	} else {
		return hl, nil
	}
}

func (p *Processor) GetMetrics() *metrics.Metrics {
	return p.metrics.Load().(*metrics.Metrics)
}

func (p *Processor) ForceUpdate(forceRefs []*pbData.DataRef, name string) {
	p.forcedMutex.Lock()
	defer p.forcedMutex.Unlock()

	if p.forcedSource.Has(name) {
		p.log(1, nil, "skipping request to force update %d keys from %s: too many requests from source", len(forceRefs), name)
		return
	}
	p.log(1, nil, "got request to force update %d keys from %s", len(forceRefs), name)

	forceRefsCut := []*pbData.DataRef{}
	for _, ref := range forceRefs {
		if !p.forcedRequests.Has(ForceReqKey{EnvType: ref.EnvType, ServiceName: ref.ServiceName, Hash: ref.Hash}) {
			forceRefsCut = append(forceRefsCut, ref)
		}
	}
	if len(forceRefsCut) == 0 {
		p.log(1, nil, "no need to force update: all keys were updated recently")
		return
	}

	safetyCheck := true
	if err := p.updateDataStoreSafe(forceRefs, safetyCheck); err != nil {
		p.log(0, nil, "failed to force update: %v", err)
	}
}

// ==========================================================================================

// Metrics:
//
func (p *Processor) updateMetricsSafe() {
	m := metrics.NewMetrics()

	dataStore := p.dataStore.Load().(*DataStore)

	m.AddDGauge(nil, "uptime", time.Since(p.startTime).Seconds())
	m.AddRate(nil, "updates_failed_unsafe", dataStore.UpdatesFailedUnsafe)
	m.AddRate(nil, "updates_forced_bad_hash", dataStore.UpdatesForcedBadHash)

	for key, h := range dataStore.HolderMap {
		labels := map[string]string{
			"env":          key.EnvType,
			"service_name": key.ServiceName,
		}
		hm := h.GetHolderMetrics()
		m.AddIGauge(labels, "hosts.count", hm.HostsCount)
		m.AddDGauge(labels, "data.stale_seconds", hm.StaleSeconds)
		m.AddRate(labels, "data.requests_total", hm.RequestsTotal)
		m.AddRate(labels, "data.updates_total", hm.UpdatesTotal)
		m.AddRate(labels, "data.updates_forced", hm.UpdatesForced)
		m.AddRate(labels, "data.updates_failed_unsafe", hm.UpdatesFailedUnsafe)
		m.AddRate(labels, "data.updates_failed_unroll", hm.UpdatesFailedUnroll)
	}
	p.metrics.Store(m)
}

// ==========================================================================================

func (p *Processor) updateDataStoreSafe(forceRefs []*pbData.DataRef, safetyCheck bool) error {
	var added, removed, updated, failed int
	var dataStoreHash uint64
	var minLastUpdate *time.Time
	var forceKeys map[DataKey]uint64
	var wg sync.WaitGroup
	var mutex sync.Mutex
	reqTime := time.Now()

	// Lock for updates, one at a time
	p.mutex.Lock()
	defer p.mutex.Unlock()
	p.log(1, &reqTime, "updating data store (forced=%v): got lock", forceRefs != nil)

	dataSourceMap := p.dataSourceMap.Load().(map[DataKey]*holder.Source)
	dataStore := p.dataStore.Load().(*DataStore)

	// Check keys to force update under lock
	if forceRefs != nil {
		forceKeys = map[DataKey]uint64{}
		for _, ref := range forceRefs {
			key := DataKey{EnvType: ref.EnvType, ServiceName: ref.ServiceName}
			h, ok := dataStore.HolderMap[key]

			// If key is not present, skip it
			// XXX when we will be updating the config on the fly, this must be changed
			if !ok {
				continue
			}
			// Update
			// XXX eventually the logic here is currHash != ref.Hash
			// but there could be some cases, where this logic will fail.
			// Need more thorough examination.
			currHash, _ := h.GetHashes()
			if currHash != ref.Hash {
				forceKeys[key] = ref.Hash
			}
		}
		if len(forceKeys) == 0 {
			p.log(1, &reqTime, "no need to force update keys: up to date")
			return nil
		}
		// Force update only once per this delta
		minLastUpdate = &time.Time{}
		*minLastUpdate = reqTime.Add(-1000 * time.Millisecond)

		keySlice := make([]DataKey, 0, len(forceKeys))
		for k := range forceKeys {
			keySlice = append(keySlice, k)
		}
		p.log(0, &reqTime, "force updating keys: %v", keySlice)
	}

	oldSize := len(dataStore.HolderMap)
	newSize := len(dataSourceMap)
	hm := make(map[DataKey]*holder.Holder, newSize)
	updatedRefs := make([]*pbData.DataRef, 0)
	updatesForcedBadHash := dataStore.UpdatesForcedBadHash

	// Start updates
	for key, src := range dataSourceMap {
		h, ok := dataStore.HolderMap[key]
		if !ok {
			h = holder.NewHolder(key.EnvType, key.ServiceName, p.DataSaveDir, &p.SafetyParams, p.unroller, p.VerboseLevel)
			added++
		}
		hm[key] = h

		refHash, forceUpdate := forceKeys[key]
		if forceKeys == nil || forceUpdate || !ok {
			// Create goroutines to update data
			wg.Add(1)
			go func(h *holder.Holder, src *holder.Source, refHash uint64) {
				defer wg.Done()

				isUpdated, isFailed := h.Update(src, minLastUpdate, safetyCheck)

				mutex.Lock()
				defer mutex.Unlock()

				if isFailed {
					failed++
				}
				if isUpdated {
					updated++
					currHash, prevHash := h.GetHashes()
					updatedRefs = append(updatedRefs, &pbData.DataRef{
						EnvType:     h.EnvType,
						ServiceName: h.ServiceName,
						Hash:        currHash,
						PrevHash:    prevHash,
					})
					if refHash != 0 && currHash != refHash {
						updatesForcedBadHash++
						p.log(0, &reqTime, "bad force update {%s %s} result: hash=%d refHash=%d", h.EnvType, h.ServiceName, currHash, refHash)
					}
				}
			}(h, src, refHash)
		}
	}
	wg.Wait()

	for _, h := range hm {
		currHash, _ := h.GetHashes()
		dataStoreHash += currHash
	}
	if dataStore.Hash == dataStoreHash {
		if updated != 0 {
			// Should never happen
			panic(fmt.Sprintf("data store hash=%d has not changed, but %d key(s) updated", dataStoreHash, updated))
		}
		p.log(0, &reqTime, "generated main data, dataStora hash has not changed keys total=%d hash=%d", newSize, dataStoreHash)
		return nil
	}

	// Fail on safety check if needed
	removed = oldSize - (newSize - added)
	if safetyCheck && !(p.isSafeChange(oldSize, newSize-added) && p.isSafeChange(oldSize, newSize)) {
		// XXX
		// maybe there is no need in atomic: the variable is only growing
		_ = atomic.AddInt64(&dataStore.UpdatesFailedUnsafe, 1)

		return fmt.Errorf("not updating main data: unsafe size change %d->%d, keys removed=%d added=%d",
			oldSize, newSize, removed, added,
		)
	}
	p.log(0, &reqTime, "generated main data, keys added=%d updated=%d failed=%d removed=%d total=%d hash=%d",
		added, updated, failed, removed, newSize, dataStoreHash,
	)

	// Update data store
	structData := make(map[string][]string, newSize)
	dnsData := make(map[string]net.IP)
	for key, h := range hm {
		h.UpdateDNSMap(dnsData)
		structData[key.EnvType] = append(structData[key.EnvType], key.ServiceName)
	}
	p.dataStore.Store(&DataStore{
		Hash:                 dataStoreHash,
		HolderMap:            hm,
		Structure:            structData,
		DNS:                  dnsData,
		UpdatesFailedUnsafe:  dataStore.UpdatesFailedUnsafe,
		UpdatesForcedBadHash: updatesForcedBadHash,
	})

	// Exec callbacks if updated
	if updated > 0 {
		reqTime := time.Now()
		p.log(1, &reqTime, "updated keys (ready to run onUpdate callbacks): %v", updatedRefs)

		p.onUpdateMutex.Lock()
		go func() {
			p.log(1, &reqTime, "onUpdate callbacks started")
			for _, cb := range p.onUpdateCallbacks {
				cb(updatedRefs)
			}
			p.log(1, &reqTime, "onUpdate callbacks finished")
			p.onUpdateMutex.Unlock()
		}()
	}
	return nil
}

func (p *Processor) isSafeChange(curr, next int) bool {
	if curr == next {
		return true
	} else if curr > next {
		return float64(curr) < float64(next)*(1+p.SafetyParams.SafeChangeDownFraction) || curr <= next+p.SafetyParams.SafeChangeDownCount
	} else if p.SafetyParams.GrowthIsAlwaysSafe {
		return true
	}
	return float64(curr) > float64(next)*(1-p.SafetyParams.SafeChangeUpFraction) || curr >= next-p.SafetyParams.SafeChangeUpCount
}
