package cars

import (
	"context"
	"fmt"
	"sort"
	"time"

	"github.com/spf13/cobra"

	"a.yandex-team.ru/drive/analytics/gotasks"
	"a.yandex-team.ru/yt/go/mapreduce"
	"a.yandex-team.ru/yt/go/mapreduce/spec"
	"a.yandex-team.ru/yt/go/schema"
	"a.yandex-team.ru/yt/go/ypath"
	"a.yandex-team.ru/yt/go/yson"
	"a.yandex-team.ru/yt/go/yt"
	"a.yandex-team.ru/yt/go/yterrors"
	"a.yandex-team.ru/zootopia/analytics/drive/helpers"
	"a.yandex-team.ru/zootopia/library/go/goyt"
)

func init() {
	updateStatusLogCmd := cobra.Command{
		Use: "update-status-log",
		Run: gotasks.WrapMain(updateStatusLogMain),
	}
	updateStatusLogCmd.Flags().String("yt-proxy", "hahn", "YT proxy")
	CarsCmd.AddCommand(&updateStatusLogCmd)
	// Register MapReduce operations
	mapreduce.Register(backendLogStatusMapper{})
	mapreduce.Register(oldStatusLogMapper{})
	mapreduce.Register(&statusLogReducer{})
}

func updateStatusLogMain(ctx *gotasks.Context) (errMain error) {
	ytProxy, err := ctx.Cmd.Flags().GetString("yt-proxy")
	if err != nil {
		return err
	}
	yc, ok := ctx.YTs[ytProxy]
	if !ok {
		return fmt.Errorf("invalid YT proxy %q", ytProxy)
	}
	tx, err := yc.BeginTx(ctx.Context, nil)
	if err != nil {
		return err
	}
	defer func() {
		if errMain == nil {
			errMain = tx.Commit()
			return
		}
		_ = tx.Abort()
	}()
	lastTable, err := getLastTable(ctx, tx)
	if err != nil {
		return err
	}
	tables, err := backendLogTableList(ctx, tx, lastTable)
	if err != nil {
		return err
	}
	if len(tables) == 0 {
		ctx.Logger.Warn("List of tables is empty")
		return nil
	}
	mr := mapreduce.New(yc).WithTx(tx)
	tempTable, err := goyt.TempTable(ctx.Context, tx, "")
	if err != nil {
		return err
	}
	defer func() {
		if errMain == nil {
			errMain = tx.RemoveNode(ctx.Context, tempTable, nil)
		}
	}()
	mapSpec := spec.Spec{
		InputTablePaths:  tables,
		OutputTablePaths: []ypath.YPath{tempTable},
		Pool:             "carsharing",
		Mapper: &spec.UserScript{
			MemoryLimit: 2 * 1024 * 1024 * 1024,
		},
	}
	mapOp, err := mr.Map(backendLogStatusMapper{}, mapSpec.Map())
	if err != nil {
		return err
	}
	if err := mapOp.Wait(); err != nil {
		return err
	}
	logTable := ctx.Config.YTPaths.CarStatusLogTable.Rich()
	logStateTable := ctx.Config.YTPaths.CarStatusLogStateTable.Rich().
		SetSchema(schema.MustInfer(CarStatusRow{}))
	logStateExists, err := tx.NodeExists(
		ctx.Context, logStateTable, nil,
	)
	if err != nil {
		return err
	}
	reducer := statusLogReducer{
		State: map[string]CarStatusRow{},
	}
	logTableSchema := schema.MustInfer(CarStatusRangeRow{})
	for i, column := range logTableSchema.Columns {
		if column.Name == "time" {
			column.SortOrder = schema.SortAscending
			// Sorted column should be first.
			logTableSchema.Columns[i] = logTableSchema.Columns[0]
			logTableSchema.Columns[0] = column
			break
		}
	}
	if _, err := tx.CreateNode(
		ctx.Context, logTable, yt.NodeTable,
		&yt.CreateNodeOptions{
			IgnoreExisting: true,
			Attributes: map[string]interface{}{
				"schema": logTableSchema,
			},
		},
	); err != nil {
		return err
	}
	if logStateExists {
		state, err := readStatusLogState(ctx.Context, tx, logStateTable)
		if err != nil {
			return err
		}
		reducer.State = state
	} else {
		oldTables, err := oldStatusLogTableList(ctx, tx)
		if err != nil {
			return err
		}
		appendTempTable := tempTable.Rich().SetAppend()
		mapSpec := spec.Spec{
			InputTablePaths:  oldTables,
			OutputTablePaths: []ypath.YPath{appendTempTable},
			Pool:             "carsharing",
		}
		mapOp, err := mr.Map(oldStatusLogMapper{}, mapSpec.Map())
		if err != nil {
			return err
		}
		if err := mapOp.Wait(); err != nil {
			return err
		}
	}
	tempLogTable, err := goyt.TempTable(ctx.Context, tx, tempDir)
	if err != nil {
		return err
	}
	defer func() {
		if errMain == nil {
			errMain = tx.RemoveNode(ctx.Context, tempLogTable, nil)
		}
	}()
	reduceSpec := spec.Spec{
		InputTablePaths:  []ypath.YPath{tempTable},
		OutputTablePaths: []ypath.YPath{tempLogTable, logStateTable},
		ReduceBy:         []string{"car_id"},
		SortBy:           []string{"car_id", "time"},
		Pool:             "carsharing",
	}
	reduceOp, err := mr.MapReduce(nil, &reducer, reduceSpec.MapReduce())
	if err != nil {
		return err
	}
	if err := reduceOp.Wait(); err != nil {
		return err
	}
	sortSpec := spec.Spec{
		InputTablePaths: []ypath.YPath{tempLogTable},
		OutputTablePath: tempLogTable,
		SortBy:          []string{"time"},
		Pool:            "carsharing",
	}
	sortOp, err := mr.Sort(sortSpec.Sort())
	if err != nil {
		return err
	}
	if err := sortOp.Wait(); err != nil {
		return err
	}
	path := ctx.Config.YTPaths.CarStatusLogStateTable.Attr(cursorLastTableAttr)
	parts, err := ypath.SplitTokens(tables[0].YPath().String())
	if err != nil {
		return err
	}
	if err := tx.SetNode(
		ctx.Context, path, parts[len(parts)-1][1:], nil,
	); err != nil {
		return err
	}
	mergeSpec := spec.Spec{
		InputTablePaths: []ypath.YPath{logTable, tempLogTable},
		OutputTablePath: logTable,
		CombineChunks:   true,
		MergeMode:       "sorted",
		Pool:            "carsharing",
	}
	mergeOp, err := mr.Merge(mergeSpec.Merge())
	if err != nil {
		return err
	}
	return mergeOp.Wait()
}

type backendLogRow struct {
	Time  int64         `yson:"unixtime"`
	Event string        `yson:"event"`
	Data  yson.RawValue `yson:"data"`
}

type CarStatusRow struct {
	CarID     string   `yson:"car_id"`
	Time      int64    `yson:"time"`
	Status    string   `yson:"status"`
	City      *string  `yson:"city"`
	Latitude  *float64 `yson:"latitude"`
	Longitude *float64 `yson:"longitude"`
}

type CarStatusRangeRow struct {
	CarStatusRow
	EndTime int64 `yson:"end_time"`
}

const (
	// backendLog1dDir     = "//home/logfeller/logs/carsharing-backend-events-log/1d"
	// backendLog5mDir     = "//home/logfeller/logs/carsharing-backend-events-log/stream/5min"
	// oldStatusLogDir     = "//home/carsharing/production/car/status_log"
	// statusLogTable      = "//home/carsharing/production/data/cars/status_log"
	// statusLogStateTable = "//home/carsharing/production/data/cars/_status_log_state"
	tempDir             = ""
	cursorLastTableAttr = "_cursor_last_table"
)

type backendLogStatusMapper struct{}

func getFloatPtr(v interface{}) (*float64, error) {
	switch v := v.(type) {
	case float64:
		return &v, nil
	case int64:
		fv := float64(v)
		return &fv, nil
	case nil:
		return nil, nil
	default:
		return nil, fmt.Errorf("unsupported type: %T", v)
	}
}

func (backendLogStatusMapper) Do(
	ctx mapreduce.JobContext, in mapreduce.Reader, out []mapreduce.Writer,
) error {
	for in.Next() {
		var row backendLogRow
		if err := in.Scan(&row); err != nil {
			return err
		}
		if row.Event != "ObjectStates" {
			continue
		}
		var objects []struct {
			ID       string `yson:"id"`
			Status   string `yson:"status"`
			Location struct {
				Latitude  interface{} `yson:"latitude"`
				Longitude interface{} `yson:"longitude"`
			} `yson:"location"`
			LocationTags []string `yson:"location_tags"`
		}
		if err := yson.Unmarshal(row.Data, &objects); err != nil {
			return err
		}
		for _, object := range objects {
			var city *string
			if v := helpers.CityByPtags(object.LocationTags); v != "" {
				city = &v
			}
			if object.ID == "" {
				continue
			}
			lat, err := getFloatPtr(object.Location.Latitude)
			if err != nil {
				return err
			}
			lon, err := getFloatPtr(object.Location.Longitude)
			if err != nil {
				return err
			}
			if err := out[0].Write(CarStatusRow{
				CarID:     object.ID,
				Time:      row.Time,
				Status:    object.Status,
				City:      city,
				Latitude:  lat,
				Longitude: lon,
			}); err != nil {
				return err
			}
		}
	}
	return nil
}

func (backendLogStatusMapper) InputTypes() []interface{} {
	return []interface{}{backendLogRow{}}
}

func (backendLogStatusMapper) OutputTypes() []interface{} {
	return []interface{}{CarStatusRow{}}
}

type oldStatusLogRow struct {
	ID       string  `yson:"id"`
	Time     float64 `yson:"timestamp"`
	Status   string  `yson:"status"`
	Location *struct {
		Lat interface{} `yson:"lat"`
		Lon interface{} `yson:"lon"`
	} `yson:"location"`
}

type oldStatusLogMapper struct{}

func (oldStatusLogMapper) Do(
	ctx mapreduce.JobContext, in mapreduce.Reader, out []mapreduce.Writer,
) error {
	for in.Next() {
		var row oldStatusLogRow
		if err := in.Scan(&row); err != nil {
			return err
		}
		if row.ID == "" {
			continue
		}
		resultRow := CarStatusRow{
			CarID:  row.ID,
			Time:   int64(row.Time),
			Status: row.Status,
		}
		if row.Location != nil {
			lat, err := getFloatPtr(row.Location.Lat)
			if err != nil {
				return err
			}
			lon, err := getFloatPtr(row.Location.Lon)
			if err != nil {
				return err
			}
			var city *string
			if lat != nil && lon != nil {
				if v := helpers.CityByLocation(*lon, *lat); v != "" {
					city = &v
				}
			}
			resultRow.Latitude = lat
			resultRow.Longitude = lon
			resultRow.City = city
		}
		if err := out[0].Write(resultRow); err != nil {
			return err
		}
	}
	return nil
}

func (oldStatusLogMapper) InputTypes() []interface{} {
	return []interface{}{oldStatusLogRow{}}
}

func (oldStatusLogMapper) OutputTypes() []interface{} {
	return []interface{}{CarStatusRow{}}
}

func readStatusLogState(
	ctx context.Context, tx yt.Tx, table ypath.YPath,
) (map[string]CarStatusRow, error) {
	in, err := tx.ReadTable(ctx, table, nil)
	if err != nil {
		return nil, err
	}
	defer func() {
		_ = in.Close()
	}()
	state := map[string]CarStatusRow{}
	for in.Next() {
		var row CarStatusRow
		if err := in.Scan(&row); err != nil {
			return nil, err
		}
		state[row.CarID] = row
	}
	if err := in.Err(); err != nil {
		return nil, err
	}
	return state, nil
}

type statusLogReducer struct {
	State map[string]CarStatusRow
}

func (r *statusLogReducer) Do(
	ctx mapreduce.JobContext, in mapreduce.Reader, out []mapreduce.Writer,
) error {
	return mapreduce.GroupKeys(in, func(in mapreduce.Reader) error {
		return r.reduceGroup(in, out)
	})
}

func stringPtrEq(lhs, rhs *string) bool {
	if lhs != nil && rhs != nil {
		return *lhs == *rhs
	}
	if lhs == nil && rhs == nil {
		return true
	}
	return false
}

func (r *statusLogReducer) reduceGroup(
	in mapreduce.Reader, out []mapreduce.Writer,
) error {
	var prevRow CarStatusRow
	for in.Next() {
		var row CarStatusRow
		if err := in.Scan(&row); err != nil {
			return err
		}
		// Initialize prevRow.
		if prevRow.CarID == "" {
			prevRow = r.State[row.CarID]
			// This is first occurrence of car.
			if prevRow.CarID == "" {
				prevRow = row
				continue
			}
		}
		date := time.Unix(row.Time, 0)
		prevDate := time.Unix(prevRow.Time, 0)
		// Skip old dates.
		if date.Before(prevDate) {
			continue
		}
		// Split ranges by hours.
		if date.Hour() != prevDate.Hour() {
			// We assume that logs does not have gaps logner than 1 hour.
			splitDate := date.Truncate(time.Hour)
			out[0].MustWrite(CarStatusRangeRow{
				CarStatusRow: prevRow,
				EndTime:      splitDate.Unix(),
			})
			prevRow.Time = splitDate.Unix()
		}
		if row.Status != prevRow.Status ||
			(!stringPtrEq(row.City, prevRow.City) &&
				row.Status != "returned") {
			out[0].MustWrite(CarStatusRangeRow{
				CarStatusRow: prevRow,
				EndTime:      row.Time,
			})
			prevRow = row
		}
	}
	return out[1].Write(prevRow)
}

func (r *statusLogReducer) InputTypes() []interface{} {
	return []interface{}{CarStatusRow{}}
}

func (r *statusLogReducer) OutputTypes() []interface{} {
	return []interface{}{CarStatusRangeRow{}, CarStatusRow{}}
}

type nodeSorter []goyt.Node

func (s nodeSorter) Len() int {
	return len(s)
}

func (s nodeSorter) Less(i, j int) bool {
	return s[i].Name > s[j].Name
}

func (s nodeSorter) Swap(i, j int) {
	s[i], s[j] = s[j], s[i]
}

func backendLogTableList(
	ctx *gotasks.Context, tx yt.Tx, lastName string,
) ([]ypath.YPath, error) {
	cols := []string{"unixtime", "event", "data"}
	// Find all 5m tables.
	path5m := ctx.Config.YTPaths.BackendLog5minDir
	nodes5m, err := goyt.ListDir(ctx.Context, tx, path5m)
	if err != nil {
		return nil, err
	}
	sort.Sort(nodeSorter(nodes5m))
	var tables []ypath.YPath
	for _, node := range nodes5m {
		tables = append(
			tables,
			path5m.Child(node.Name).Rich().SetColumns(cols),
		)
		if node.Name <= lastName {
			break
		}
	}
	if len(tables) != len(nodes5m) {
		return tables, nil
	}
	// Find all 1d tables.
	path1d := ctx.Config.YTPaths.BackendLog1dDir
	nodes1d, err := goyt.ListDir(ctx.Context, tx, path1d)
	if err != nil {
		return nil, err
	}
	sort.Sort(nodeSorter(nodes1d))
	for _, node := range nodes1d {
		if node.Name < "2019-04-25" {
			break
		}
		tables = append(
			tables,
			path1d.Child(node.Name).Rich().SetColumns(cols),
		)
		if node.Name <= lastName {
			break
		}
	}
	return tables, nil
}

func oldStatusLogTableList(
	ctx *gotasks.Context, tx yt.Tx,
) ([]ypath.YPath, error) {
	path := ctx.Config.YTPaths.OldCarStatusLogDir
	if len(path.String()) == 0 {
		return nil, nil
	}
	nodes, err := goyt.ListDir(ctx.Context, tx, path)
	if err != nil {
		return nil, err
	}
	var tables []ypath.YPath
	for _, node := range nodes {
		if node.Name > "2019-04-30" {
			continue
		}
		tables = append(tables, path.Child(node.Name))
	}
	return tables, nil
}

func getLastTable(ctx *gotasks.Context, tx yt.Tx) (string, error) {
	path := ctx.Config.YTPaths.CarStatusLogStateTable.Attr(cursorLastTableAttr)
	var value *string
	if err := tx.GetNode(ctx.Context, path, &value, nil); err != nil &&
		!yterrors.ContainsErrorCode(err, yterrors.CodeResolveError) {
		return "", err
	}
	if value == nil {
		return "", nil
	}
	return *value, nil
}
