package resync

import (
	"context"
	"sync"

	"code.justin.tv/feeds/errors"
	"code.justin.tv/feeds/graphdb/cmd/graphdb/internal/api/twirpserver"
	"code.justin.tv/feeds/graphdb/cmd/graphdb/internal/graphdbmodel"
	"code.justin.tv/feeds/graphdb/proto/graphdb"
	"code.justin.tv/feeds/graphdb/proto/graphdbadmin"
	"code.justin.tv/feeds/log"
	"code.justin.tv/hygienic/statsdsender"
	"golang.org/x/sync/errgroup"
)

// Resync edges between GraphDB and Cohesion.  This is a multi step process where we have to iterate edges between both
// stores, then match up edges that are in both and update the data that is different between them.  Then, we have
// to either add or remove edges that exist in one of the databases and not in another.
type Resync struct {
	Log           log.Logger
	SourceOfTruth func() (*ListPQL, error)
	ExistingData  Lister
	Stats         *statsdsender.ErrorlessStatSender `nilcheck:"nodepth"`
	// Note: Only call this from storageWithStats
	WriteAPI Storage
}

// Storage is GraphDB.  We only do Multi requests to it from here.
type Storage interface {
	Multi(ctx context.Context, req *graphdb.MultiRequest) (*graphdb.MultiResponse, error)
}

// Lister is the source of truth source (cohesion)
type Lister interface {
	List(ctx context.Context, from graphdbmodel.Node, edgeKind string, page graphdbmodel.PagedRequest) (*graphdbmodel.ListResult, error)
}

// storageWithStats does the add/remove/update operations we need to keep the databases in sync.  It checks each operation
// to keep stats about how many of each we have to do.
func (r *Resync) storageWithStats(ctx context.Context, req *graphdb.MultiRequest, stats *graphdbadmin.ResyncResponse) (*graphdb.MultiResponse, error) {
	for _, r := range req.Requests {
		switch casted := r.RequestType.(type) {
		case *graphdb.MultiRequest_SingleRequest_EdgeCreate:
			stats.CreatedEdgeCount++
			stats.CreatedEdgeSample = appendSampledEdge(stats.CreatedEdgeSample, casted.EdgeCreate.Edge)
		case *graphdb.MultiRequest_SingleRequest_EdgeDelete:
			stats.DeletedEdgeCount++
			stats.DeletedEdgeSample = appendSampledEdge(stats.DeletedEdgeSample, casted.EdgeDelete.Edge)
		case *graphdb.MultiRequest_SingleRequest_EdgeUpdate:
			stats.UpdatedEdgeCount++
			stats.UpdatedEdgeSample = appendSampledEdge(stats.UpdatedEdgeSample, casted.EdgeUpdate.Edge)
		}
	}
	return r.WriteAPI.Multi(ctx, req)
}

// Rather than send update requests one at a time, we batch any outstanding requests inside the channel into a larger
// graphdb.MultiRequest and issue that update all at once.
func drainExtra(in chan *graphdb.MultiRequest, groupInto int) *graphdb.MultiRequest {
	var retReq graphdb.MultiRequest
	for {
		select {
		case req, ok := <-in:
			if !ok {
				return &retReq
			}
			// Merge previous update Request objects with this one
			retReq.Requests = append(retReq.Requests, req.Requests...)
			if len(retReq.Requests) >= groupInto {
				// Rather than issue one overly large MultiRequest, break out if we've already merged a large number
				// together
				return &retReq
			}
		default:
			// The default block happens when the channel is empty.  Rather than wait for more data, process the requests
			// we already have
			return &retReq
		}
	}
}

func (r *Resync) Resync(ctx context.Context, from graphdbmodel.Node, edgeKind string) (*graphdbadmin.ResyncResponse, error) {
	var ret graphdbadmin.ResyncResponse
	rbucket := resyncBucket{
		sendTo: make(chan *graphdb.MultiRequest, 10000),
	}
	groupInto := 25
	eg, egCtx := errgroup.WithContext(ctx)
	finishedStreamingNewResults := sync.WaitGroup{}
	finishedStreamingNewResults.Add(2)
	sourceOfTruthLister, err := r.SourceOfTruth()
	if err != nil {
		return nil, err
	}
	defer func() {
		if err := sourceOfTruthLister.Close(); err != nil {
			r.Log.Log("err", err)
		}
	}()
	eg.Go(func() error {
		defer finishedStreamingNewResults.Done()
		// Dump all the source of truth data into rbucket
		if err := dump(egCtx, sourceOfTruthLister, from, edgeKind, rbucket.addSourceOfTruth); err != nil {
			return errors.Wrap(err, "unable to dump source of truth")
		}
		return nil
	})
	eg.Go(func() error {
		defer finishedStreamingNewResults.Done()
		// Dump all the existing GraphDB data into rbucket
		if err := dump(egCtx, r.ExistingData, from, edgeKind, rbucket.addExistingData); err != nil {
			return errors.Wrap(err, "unable to dump existing data")
		}
		return nil
	})
	eg.Go(func() error {
		// If we are done streaming from both the source of truth and the existing edges, we can close the destination
		// bucket of rbucket.sendTo.  This will signal the below eg.Go that there is no more data coming and it can
		// end
		finishedStreamingNewResults.Wait() // ---- When we've finished List on both cohesion and graphdb
		close(rbucket.sendTo)              // ---- Signal the below goroutine to finish
		return nil
	})
	eg.Go(func() error {
		for {
			select {
			case <-egCtx.Done():
				return egCtx.Err()
			case mr, ok := <-rbucket.sendTo:
				if !ok {
					// This happens when we've finished iterating the source of truth and the existing data
					return nil
				}
				// Only update requests should get this far.
				extraReqs := drainExtra(rbucket.sendTo, groupInto-1)
				extraReqs.Requests = append(extraReqs.Requests, mr.Requests...)
				// Actually issue the update request
				_, err := r.storageWithStats(egCtx, extraReqs, &ret)
				if err != nil {
					return errors.Wrap(err, "unable to update some edges")
				}
			}
		}
	})
	if err := eg.Wait(); err != nil {
		return nil, errors.Wrap(err, "errgroup ended")
	}
	// At this point none of the nodes in resyncBucket.sourceOfTruth or resyncBucket.existingData should be the same.
	// Remove the ones inside sourceOfTruth and directly insert the ones inside existingData
	for _, req := range groupRequests(rbucket.finishDraining(), groupInto) {
		_, err := r.storageWithStats(ctx, req, &ret)
		if err != nil {
			return nil, errors.Wrap(err, "unable to finish draining a few edges")
		}
	}
	r.Stats.IncC("created", int64(ret.CreatedEdgeCount), .1)
	r.Stats.IncC("deleted", int64(ret.DeletedEdgeCount), .1)
	r.Stats.IncC("updated", int64(ret.UpdatedEdgeCount), .1)
	return &ret, nil
}

// groupRequests takes a large number of MultiRequest and groups them into fewer MultiRequest, but each with approx
// number of groupInto individual requests.  The return result should be logically equivalent to `in`, but doable in
// fewer Multi requests
func groupRequests(in []*graphdb.MultiRequest, groupInto int) []*graphdb.MultiRequest {
	ret := make([]*graphdb.MultiRequest, 0, len(in))
	var currentReqs []*graphdb.MultiRequest_SingleRequest
	for _, req := range in {
		currentReqs = append(currentReqs, req.Requests...)
		if len(currentReqs) >= groupInto {
			ret = append(ret, &graphdb.MultiRequest{
				Requests: currentReqs,
			})
			currentReqs = nil
		}
	}
	if len(currentReqs) > 0 {
		ret = append(ret, &graphdb.MultiRequest{
			Requests: currentReqs,
		})
	}
	return ret
}

// makeDataMatch returns whatever MultiRequest is required to make the edge inside sourceOfTruth match the edge inside
// existingData
func makeDataMatch(edge graphdbmodel.Edge, sourceOfTruth *graphdbmodel.LoadedData, existingData *graphdbmodel.LoadedData) *graphdb.MultiRequest {
	// Edge is the same.  Make sure the data and metadata match
	// Don't bother checking 'updated-at' or 'version'
	sourceOfTruth.UpdatedAt = existingData.UpdatedAt
	sourceOfTruth.Version = existingData.Version
	var updateRequest graphdb.EdgeUpdateRequest
	if !sourceOfTruth.CreatedAt.Equal(existingData.CreatedAt) {
		// force created at to be equal
		updateRequest.CreatedAt = twirpserver.ToProtoTime(sourceOfTruth.CreatedAt)
	}
	// Subtract the databags.  If they aren't empty, then update to make them match.  This logic is ok with extra data
	// being inside existingData, just not the same keys with different data
	dataToCreate := sourceOfTruth.Data.Subtract(existingData.Data)
	if !dataToCreate.IsEmpty() {
		updateRequest.Data = twirpserver.ToProtoDataBag(dataToCreate)
	}

	// If data or creation date don't match, update to make them match
	if updateRequest.Data != nil || updateRequest.CreatedAt != nil {
		updateRequest.Edge = twirpserver.ToProtoEdge(&edge)
		return &graphdb.MultiRequest{
			Requests: []*graphdb.MultiRequest_SingleRequest{
				{
					RequestType: &graphdb.MultiRequest_SingleRequest_EdgeUpdate{
						EdgeUpdate: &updateRequest,
					},
				},
			},
		}
	}
	return nil
}

// resyncBucket stores the edges that exist in both the source of truth and the existing data, keyed by the edge.
type resyncBucket struct {
	// The edge data that is correct
	sourceOfTruth map[graphdbmodel.Edge]*graphdbmodel.LoadedData
	// The edge data currently stored in graphdb
	existingData map[graphdbmodel.Edge]*graphdbmodel.LoadedData
	// Where we send MultiRequests that are needed to sync the two above stores
	sendTo chan *graphdb.MultiRequest
	// Lock on the maps
	mu sync.Mutex
}

// finishDraining is only called when we're done iterating and matching nodes.  At this point we have to remove/delete
// edges that are only in one store
func (r *resyncBucket) finishDraining() []*graphdb.MultiRequest {
	r.mu.Lock()
	defer r.mu.Unlock()
	ret := make([]*graphdb.MultiRequest, 0, len(r.sourceOfTruth)+len(r.existingData))
	for k, v := range r.sourceOfTruth {
		// This edge is only in the source of truth.  We need to insert it into GraphDB since it's not in graphdb.
		req := &graphdb.MultiRequest{
			Requests: []*graphdb.MultiRequest_SingleRequest{
				{
					RequestType: &graphdb.MultiRequest_SingleRequest_EdgeCreate{
						EdgeCreate: &graphdb.EdgeCreateRequest{
							Edge:      twirpserver.ToProtoEdge(&k),
							Data:      twirpserver.ToProtoDataBag(v.Data),
							CreatedAt: twirpserver.ToProtoTime(v.CreatedAt),
						},
					},
				},
			},
		}
		ret = append(ret, req)
	}

	for k := range r.existingData {
		// This edge is only in GraphDB.  We have to remove it since it's not in the source of truth: cohesion.
		req := &graphdb.MultiRequest{
			Requests: []*graphdb.MultiRequest_SingleRequest{
				{
					RequestType: &graphdb.MultiRequest_SingleRequest_EdgeDelete{
						EdgeDelete: &graphdb.EdgeDeleteRequest{
							Edge: twirpserver.ToProtoEdge(&k),
						},
					},
				},
			},
		}
		ret = append(ret, req)
	}
	r.sourceOfTruth = nil
	r.existingData = nil
	return ret
}

// attemptDrain will look for edge in both the source of truth and the existing data.  If it is in both, then we will
// check both data to see if it needs to be resynced (maybe the creation date or data of the edge is off). If there is
// nothing to resync, we do nothing.  If there is something to resync, we queue up a resync operation that is handled
// later inside Resync
func (r *resyncBucket) attemptDrain(edge graphdbmodel.Edge) {
	s, exists := r.sourceOfTruth[edge]
	if !exists {
		return
	}
	e, exists := r.existingData[edge]
	if !exists {
		return
	}
	// At this point we either throw away the data (because it's the same) or sync the two to match, so we can
	// remove it from both in memory sets
	delete(r.sourceOfTruth, edge)
	delete(r.existingData, edge)
	req := makeDataMatch(edge, s, e)
	// If there is nothing that needs to change, we don't need to later process this edge
	if req != nil {
		r.sendTo <- req
	}
}

// addSourceOfTruth adds an edge into the sourceOfTruth list, and attempts to drain the edge if there is a data
// mismatch between the source of truth and existing data.
func (r *resyncBucket) addSourceOfTruth(edge *graphdbmodel.LoadedEdge) {
	r.mu.Lock()
	defer r.mu.Unlock()
	if r.sourceOfTruth == nil {
		r.sourceOfTruth = make(map[graphdbmodel.Edge]*graphdbmodel.LoadedData)
	}
	r.sourceOfTruth[edge.Edge] = &edge.LoadedData
	r.attemptDrain(edge.Edge)
}

// addExistingData adds an edge into the sourceOfTruth existingData, and attempts to drain the edge if there is a data
//// mismatch between the source of truth and existing data.
func (r *resyncBucket) addExistingData(edge *graphdbmodel.LoadedEdge) {
	r.mu.Lock()
	defer r.mu.Unlock()
	if r.existingData == nil {
		r.existingData = make(map[graphdbmodel.Edge]*graphdbmodel.LoadedData)
	}
	r.existingData[edge.Edge] = &edge.LoadedData
	r.attemptDrain(edge.Edge)
}

// dump reads data from any source and adds the edge data back into resyncBucket
func dump(ctx context.Context, lister Lister, from graphdbmodel.Node, edgeKind string, edgeAddFunction func(edge *graphdbmodel.LoadedEdge)) error {
	cursor := ""
	for {
		p := graphdbmodel.PagedRequest{
			Limit:  500,
			Cursor: cursor,
		}
		res, err := lister.List(ctx, from, edgeKind, p)
		if err != nil {
			return errors.Wrap(err, "unable to list edges")
		}
		for _, t := range res.To {
			t := t
			edgeAddFunction(&t.LoadedEdge)
		}
		if len(res.To) == 0 || res.Cursor == "" {
			return nil
		}
		cursor = res.Cursor
	}
}

// appendSampledEdge lets us log out which edges we had to change.  We sample that output since it could be
// very large: this will only append 3 edges to the original array.
func appendSampledEdge(appendTo []*graphdb.Edge, edge *graphdb.Edge) []*graphdb.Edge {
	if len(appendTo) > 3 {
		return appendTo
	}
	return append(appendTo, edge)
}
