package service

import (
	"fmt"
	"sync"
	"time"

	"a.yandex-team.ru/drive/analytics/gotasks"
	"a.yandex-team.ru/library/go/core/log"
	"a.yandex-team.ru/yt/go/mapreduce"
	"a.yandex-team.ru/yt/go/mapreduce/spec"
	"a.yandex-team.ru/yt/go/ypath"
	"a.yandex-team.ru/yt/go/yt"
	"github.com/spf13/cobra"
)

func init() {
	// Register subcommands.
	ytMergeCmd := cobra.Command{
		Use: "yt-merge",
		Run: gotasks.WrapMain(ytMergeMain),
	}
	ytMergeCmd.Flags().String("yt", "hahn", "Name of YT connection")
	ytMergeCmd.Flags().StringArray(
		"path", []string{"//home/carsharing"}, "YT path for merging",
	)
	ytMergeCmd.Flags().Int("tasks", 8, "Amount of parallel tasks")
	ytMergeCmd.Flags().String("pool", "", "YT pool for merge")
	ytMergeCmd.Flags().Duration("delay", 36*time.Hour, "Minimal delay of merge before last update")
	ytMergeCmd.Flags().Int64("ratio", 1024*1024*512, "Ratio of disk space and chunk count")
	ServiceCmd.AddCommand(&ytMergeCmd)
}

type ytTableMerge struct {
	Path ypath.Path
	Info ytNodeInfo
}

func isNeedMerge(info ytNodeInfo, ratio int64, delay time.Duration) bool {
	if info.Stats.ChunkCount > 0 && isNeedBrotli8(info) {
		return true
	}
	if info.Stats.ChunkCount <= 1 {
		return false
	}
	if r := info.Stats.DiskSpace / info.Stats.ChunkCount; r > ratio {
		return false
	}
	return time.Since(info.UpdateTime) >= delay
}

func isNeedBrotli8(info ytNodeInfo) bool {
	return time.Since(info.UpdateTime) > 365*24*time.Hour &&
		info.Codec != "brotli_8"
}

func ytMergeMain(ctx *gotasks.Context) error {
	ytName, err := ctx.Cmd.Flags().GetString("yt")
	if err != nil {
		return err
	}
	paths, err := ctx.Cmd.Flags().GetStringArray("path")
	if err != nil {
		return err
	}
	tasks, err := ctx.Cmd.Flags().GetInt("tasks")
	if err != nil {
		return err
	}
	pool, err := ctx.Cmd.Flags().GetString("pool")
	if err != nil {
		return err
	}
	delay, err := ctx.Cmd.Flags().GetDuration("delay")
	if err != nil {
		return err
	}
	ratio, err := ctx.Cmd.Flags().GetInt64("ratio")
	if err != nil {
		return err
	}
	yc, ok := ctx.YTs[ytName]
	if !ok {
		return fmt.Errorf("yt %q does not exists", ytName)
	}
	mergeTable := func(path ypath.Path, info ytNodeInfo) {
		select {
		case <-ctx.Context.Done():
			return
		default:
		}
		if !isNeedMerge(info, ratio, delay) {
			return
		}
		ctx.Logger.Info(
			"Merging table",
			log.String("table", path.String()),
			log.Duration("ts", time.Since(info.UpdateTime)),
		)
		opSpec := spec.Spec{
			InputTablePaths: []ypath.YPath{path},
			OutputTablePath: path,
			MergeMode:       "ordered",
			CombineChunks:   true,
			Pool:            pool,
			DataSizePerJob:  ratio * 3 / 2,
		}
		tx, err := yc.BeginTx(ctx.Context, nil)
		if err != nil {
			ctx.Logger.Error("Unable to start Tx", log.Error(err))
			return
		}
		success := false
		defer func() {
			if !success {
				_ = tx.Abort()
				return
			}
			if err := tx.Commit(); err != nil {
				ctx.Logger.Error("Unable to commit Tx", log.Error(err))
			}
		}()
		if _, err := tx.LockNode(
			ctx.Context, path, yt.LockExclusive, nil,
		); err != nil {
			ctx.Logger.Error("Unable to lock node", log.Error(err))
			return
		}
		if isNeedBrotli8(info) {
			err := tx.SetNode(
				ctx.Context, path.Attr("compression_codec"), "brotli_8",
				nil,
			)
			if err != nil {
				ctx.Logger.Error(
					"Unable to change compression codec", log.Error(err),
				)
				return
			}
			opSpec.ForceTransform = true
		}
		mr := mapreduce.New(yc).WithTx(tx)
		op, err := mr.Merge(opSpec.Merge())
		if err != nil {
			ctx.Logger.Error(
				"Unable to start operation",
				log.Error(err),
			)
			return
		}
		if err := op.Wait(); err != nil {
			ctx.Logger.Error("Operation wait error", log.Error(err))
			return
		}
		success = true
	}
	var waiter sync.WaitGroup
	defer waiter.Wait()
	merges := make(chan ytTableMerge)
	defer close(merges)
	for i := 0; i < tasks; i++ {
		waiter.Add(1)
		go func() {
			defer waiter.Done()
			for merge := range merges {
				mergeTable(merge.Path, merge.Info)
			}
		}()
	}
	var mergeMap func(path ypath.Path)
	mergeMap = func(path ypath.Path) {
		var infos []ytNodeInfo
		err := yc.ListNode(ctx.Context, path, &infos, &yt.ListNodeOptions{
			Attributes: []string{
				"type", "resource_usage", "modification_time",
				"compression_codec",
			},
		})
		if err != nil {
			ctx.Logger.Info("Unable to list node", log.Error(err))
		}
		for _, info := range infos {
			select {
			case <-ctx.Context.Done():
				return
			default:
			}
			child := path.Child(info.Value.(string))
			switch info.Type {
			case yt.NodeTable:
				merges <- ytTableMerge{Path: child, Info: info}
			case yt.NodeMap:
				mergeMap(child)
			}
		}
	}
	for _, path := range paths {
		var info ytNodeInfo
		err := yc.GetNode(
			ctx.Context, ypath.Path(path), &info,
			&yt.GetNodeOptions{
				Attributes: []string{
					"type", "resource_usage", "modification_time",
				},
			},
		)
		if err != nil {
			ctx.Logger.Error(
				"Unable to determine node type", log.Error(err),
			)
			continue
		}
		switch info.Type {
		case yt.NodeTable:
			merges <- ytTableMerge{Path: ypath.Path(path), Info: info}
		case yt.NodeMap:
			mergeMap(ypath.Path(path))
		default:
			ctx.Logger.Warn(
				"Unsupported node type",
				log.String("type", string(info.Type)),
				log.String("path", path),
			)
		}
	}
	return nil
}
