package async

import (
	"context"
	"errors"
	"strconv"
	"time"

	"sync/atomic"

	"code.justin.tv/feeds/distconf"
	"code.justin.tv/feeds/graphdb/cmd/graphdb/internal/accesslog"
	"code.justin.tv/feeds/graphdb/cmd/graphdb/internal/graphdbmodel"
	"code.justin.tv/feeds/graphdb/cmd/graphdb/internal/interngraphdb"
	"code.justin.tv/feeds/graphdb/proto/graphdb"
	"code.justin.tv/feeds/log"
	"code.justin.tv/hygienic/statsdsender"
	"github.com/golang/protobuf/ptypes/timestamp"
)

type ProcessorConfig struct {
	RetryDelay *distconf.Duration
}

func (p *ProcessorConfig) Load(d *distconf.Distconf) error {
	p.RetryDelay = d.Duration("graphdb.processor.retry_delay", time.Minute*15)
	return nil
}

// CountRepair updates the edge count, after a creation or deletion, only if the inline increment or decrement fails
type CountRepair interface {
	BlockingRepair(from graphdbmodel.Node, edgeKind string, to graphdbmodel.Node, isCreation bool) error
}

// toProtoTime converts a timestamp into the protobuf version of the timestamp
func toProtoTime(t time.Time) *timestamp.Timestamp {
	n := t.UnixNano()
	return &timestamp.Timestamp{
		Seconds: n / time.Second.Nanoseconds(),
		Nanos:   int32(n % time.Second.Nanoseconds()),
	}
}

// Processor is a forever running service that continuously listens on an SQS queue for things GraphDB needed to do
// out of band: such as repair edge counts or MultiAsync requests
type Processor struct {
	// Queue is the SQS queue
	Queue           *Queue
	ProcessorConfig *ProcessorConfig
	Log             log.Logger
	AccessLog       *accesslog.AccessLog
	Hostname        string
	Client          graphdb.GraphDB
	Repair          CountRepair
	Stats           statsdsender.ErrorlessStatSender `nilcheck:"nodepth"`
	// currentDelay allows us to backoff failures to process an SQS queue
	currentDelay time.Duration
	closeSignal  chan struct{}

	// asyncBackoffDelay should only be used as an atomic integer
	asyncBackoffDelay int64
}

func (p *Processor) Setup() error {
	p.closeSignal = make(chan struct{})
	return nil
}

// BackoffAsync allows other parts of GraphDB to back off how quickly we drain from SQS
func (p *Processor) BackoffAsync() {
	atomic.StoreInt64(&p.asyncBackoffDelay, 1)
}

func (p *Processor) Start() error {
	for {
		select {
		case <-p.closeSignal:
			return nil
		default:
		}
		// If other parts of GraphDB are a bit slow, wait a sec between SQS messages
		if atomic.CompareAndSwapInt64(&p.asyncBackoffDelay, 1, 0) {
			select {
			case <-p.closeSignal:
				return nil
			case <-time.After(time.Second):
			}
		}
		ctx := context.Background()
		msgs, err := p.Queue.ReceiveMessages(ctx)
		if err != nil {
			p.backoff(err)
			continue
		}
		shouldBackoffReadingQueue := false
		// We get multiple messages at once from the SQS queue
		for _, msg := range msgs {
			startTime := time.Now()
			ctx = accesslog.WithTrace(ctx)
			// This line is the meat, where we actually do what the message wants us to do
			plog := p.processMessage(ctx, msg)
			if plog.RequeuedMsgLen != 0 {
				shouldBackoffReadingQueue = true
				// We had to requeue messages. Let's backoff
			}
			e := &interngraphdb.AccessLog{
				Api:       interngraphdb.AccessLog_QUEUE,
				LatencyMs: time.Since(startTime).Nanoseconds() / time.Millisecond.Nanoseconds(),
				Hostname:  p.Hostname,
				MsgTime:   toProtoTime(time.Now()),
				AsyncStats: &interngraphdb.AccessLog_AsyncStats{
					NumRequests:         int64(len(msg.Msg.Requests)),
					NumCountRetries:     int64(len(msg.Msg.CountRetries)),
					NumCountNodeRetries: int64(len(msg.Msg.CountNodeRetries)),
				},
				MsgDelayMs: appendMsgDelay(msg),
				ProcessLog: plog,
			}
			if e.MsgDelayMs != 0 {
				p.Stats.TimingC("msg_delay", e.GetMsgDelayMs(), .25)
			}
			p.AccessLog.Event(ctx, e)
		}
		if shouldBackoffReadingQueue {
			p.backoff(errors.New("backing off queue because of resent messages"))
		} else {
			p.clearBackoff()
		}
	}
}

// appendMsgDelay allows us to add how old the SQS message is into the access logs
func appendMsgDelay(msg *Message) int64 {
	if msg == nil || msg.msg == nil || msg.msg.Attributes == nil {
		return 0
	}
	// time the message was sent to the queue (in milliseconds)
	sentTimestampStr, exists := msg.msg.Attributes["SentTimestamp"]
	if !exists {
		return 0
	}
	sentTimestamp, err := strconv.ParseInt(*sentTimestampStr, 10, 64)
	if err != nil {
		return 0
	}
	return time.Since(time.Unix(0, sentTimestamp*time.Millisecond.Nanoseconds())).Nanoseconds() / time.Millisecond.Nanoseconds()
}

func processMessageRequest(req *graphdb.MultiAsyncRequest_SingleRequest) *graphdb.MultiRequest_SingleRequest {
	switch casted := req.RequestType.(type) {
	case *graphdb.MultiAsyncRequest_SingleRequest_EdgeCreate:
		return &graphdb.MultiRequest_SingleRequest{
			RequestType: &graphdb.MultiRequest_SingleRequest_EdgeCreate{
				EdgeCreate: casted.EdgeCreate,
			},
		}
	case *graphdb.MultiAsyncRequest_SingleRequest_EdgeDelete:
		return &graphdb.MultiRequest_SingleRequest{
			RequestType: &graphdb.MultiRequest_SingleRequest_EdgeDelete{
				EdgeDelete: casted.EdgeDelete,
			},
		}
	case *graphdb.MultiAsyncRequest_SingleRequest_EdgeChangeType:
		return &graphdb.MultiRequest_SingleRequest{
			RequestType: &graphdb.MultiRequest_SingleRequest_EdgeChangeType{
				EdgeChangeType: casted.EdgeChangeType,
			},
		}
	case *graphdb.MultiAsyncRequest_SingleRequest_EdgeUpdate:
		return &graphdb.MultiRequest_SingleRequest{
			RequestType: &graphdb.MultiRequest_SingleRequest_EdgeUpdate{
				EdgeUpdate: casted.EdgeUpdate,
			},
		}
	}
	return nil
}

// processMessage breaks down the parts of a Message and processes each of them individually
func (p *Processor) processMessage(ctx context.Context, msg *Message) *interngraphdb.AccessLog_AsyncProcessLog {
	// I'm not happy with this logic.  It should be clearer the relationship between reading a message and deleting it
	//  We should rewrite this some day.
	var ret interngraphdb.AccessLog_AsyncProcessLog
	// Process the AsyncMulti requests
	p.processAsyncRequests(ctx, msg, &ret)
	// Process fixing count repair
	p.processCounts(ctx, msg)
	return &ret
}

// processCounts fixes all the previously failed inline count increment/decrement operations.  This is usually when,
// for example, someone creates a follow edge, but the DynamoDB request to `incr` the edge counts fails.  To keep the
// counts correct, we later do the count when DynamoDB will allow us.
func (p *Processor) processCounts(ctx context.Context, msg *Message) {
	if len(msg.Msg.CountRetries) == 0 {
		return
	}
	startTime := time.Now()
	defer func() {
		p.Stats.TimingDurationC("counts", time.Since(startTime), .25)
	}()
	p.Stats.IncC("recounts", int64(len(msg.Msg.CountRetries)), .25)
	for _, countRetry := range msg.Msg.CountRetries {
		from := graphdbmodel.Node{
			Type: countRetry.Edge.From.Type,
			ID:   countRetry.Edge.From.Id,
		}
		to := graphdbmodel.Node{
			Type: countRetry.Edge.To.Type,
			ID:   countRetry.Edge.To.Id,
		}
		if err := p.Repair.BlockingRepair(from, countRetry.Edge.Type, to, countRetry.IsCreation); err != nil {
			// The repair process will requeue the repair itself
			p.Log.Log("err", err)
		}
	}
	if err := p.Queue.DeleteMessage(ctx, msg); err != nil {
		p.Log.Log("err", err, "unable to remove previous request.  Some messages will be count incremented twice")
	} else {
		p.Stats.IncC("counts.deleted_message", 1, .25)
	}
}

// createRequeueRequest does the out of band Async request, iterates the errors from the request, and returns a new SQS message
// for the parts of the multi request that failed. Any that failed, we try them again.
func (p *Processor) createRequeueRequest(ctx context.Context, multiRequest graphdb.MultiRequest, byOriginalReq []*graphdb.MultiAsyncRequest_SingleRequest) interngraphdb.AsyncRequestQueueMessage {
	multiResp, err := p.Client.Multi(ctx, &multiRequest)
	if err != nil {
		p.Log.Log("err", err, "unable to do multi request")
	}
	var requeueRequest interngraphdb.AsyncRequestQueueMessage
	for idx, resp := range multiResp.Response {
		if err := resp.GetError(); err != nil {
			p.Log.Log("err", errors.New(err.Message), "single part of request failed")
			requeueRequest.Requests = append(requeueRequest.Requests, byOriginalReq[idx])
		}
	}
	return requeueRequest
}

// processAsyncRequests is where the logic for MultiAsync calls is executed
func (p *Processor) processAsyncRequests(ctx context.Context, msg *Message, processLog *interngraphdb.AccessLog_AsyncProcessLog) {
	if len(msg.Msg.Requests) == 0 {
		return
	}
	startTime := time.Now()
	defer func() {
		p.Stats.TimingDurationC("requests", time.Since(startTime), .25)
	}()
	// Step 1 is to create a Multi request from the MultiAsync request
	var multiRequest graphdb.MultiRequest
	byOriginalReq := make([]*graphdb.MultiAsyncRequest_SingleRequest, 0, len(msg.Msg.Requests))
	for _, req := range msg.Msg.Requests {
		convertedRequest := processMessageRequest(req)
		if convertedRequest != nil {
			byOriginalReq = append(byOriginalReq, req)
			multiRequest.Requests = append(multiRequest.Requests, convertedRequest)
		} else {
			p.Log.Log("req_type", req.GetRequestType(), "unknown request type")
		}
	}

	//  After we have a Multi request, we execute it and get back the parts of the request that failed.
	requeueRequest := p.createRequeueRequest(ctx, multiRequest, byOriginalReq)
	p.Stats.IncC("async.processed", int64(len(msg.Msg.GetRequests())-len(requeueRequest.GetRequests())), .25)
	if len(requeueRequest.Requests) == len(msg.Msg.Requests) {
		processLog.TotalFailure = true
		p.Stats.IncC("async.total_failure", 1, .25)
		// Every request failed.  Don't do anything (let the msg wait time expire naturally and try again)
		return
	}
	if len(requeueRequest.Requests) != 0 {
		// We should throttle reading SQS requests if this happens
		processLog.RequeuedMsgLen = int64(len(requeueRequest.Requests))
		p.Stats.IncC("async.requeued_requests", int64(len(requeueRequest.Requests)), .25)
		// Some requests failed.  We have to requeue those
		err := p.Queue.SendMessage(ctx, &requeueRequest, p.ProcessorConfig.RetryDelay.Get())
		if err != nil {
			processLog.RequeueFailure = err.Error()
			p.Log.Log("err", err, "unable to requeue partial request")
			return
		}
	}
	if err := p.Queue.DeleteMessage(ctx, msg); err != nil {
		processLog.DeleteMsgFailure = err.Error()
		p.Log.Log("err", err, "unable to remove previous request.  Some messages will be tried twice")
	} else {
		p.Stats.IncC("async.deleted_message", 1, .25)
	}
}

// backoff if we're having continuous errors processing the SQS queue
func (p *Processor) backoff(err error) {
	p.Log.Log("err", err, "backing off because of error")
	p.currentDelay += time.Second
	p.currentDelay *= 2
	if p.currentDelay > time.Minute*5 {
		p.currentDelay = time.Minute * 5
	}
	select {
	case <-time.After(p.currentDelay):
	case <-p.closeSignal:
	}
}

func (p *Processor) clearBackoff() {
	p.currentDelay = 0
}

// Close ends the SQS listener
func (p *Processor) Close() error {
	close(p.closeSignal)
	return nil
}
