package StarfruitTSMClient

import (
	"context"
	"math/rand"
	"sync"
	"time"

	"code.justin.tv/amzn/StarfruitTSMClient/internal/errorbucket"
	rpc "code.justin.tv/amzn/StarfruitTranscodeStateMgrTwirp"
	"github.com/pkg/errors"
	"golang.org/x/sync/errgroup"
)

const (
	// A static number of retries that can occur in total for a single parallel scan invocation.
	// This is used to increase availability in the face of rare request timeouts without overloading TSM.
	staticRetryCount = 3

	// The maximum amount of delay before making an individual scan request.
	// Requests are slightly delayed with jitter to space requests out evenly.
	maxDelayPerRequestMilliseconds = 5 * time.Millisecond

	DefaultConcurrency = 100
)

type TSMScanner interface {
	ScanTranscodes(ctx context.Context, req *rpc.ScanTranscodesRequest) (*rpc.ScanTranscodesResponse, error)
}

type ParallelScanner struct {
	concurrency       uint64
	caller            string
	perRequestTimeout time.Duration
	client            TSMScanner
}

func NewParallelScanner(client TSMScanner, concurrency uint64, caller string, perRequestTimeout time.Duration) *ParallelScanner {
	if concurrency == 0 {
		concurrency = DefaultConcurrency
	}

	return &ParallelScanner{
		concurrency:       concurrency,
		caller:            caller,
		perRequestTimeout: perRequestTimeout,
		client:            client,
	}
}

// Scan invokes TSM's Scan RPC across many goroutines in parallel. Each individual request made by a goroutine will use the ParallelScanner's
// perRequestTimeout. The context passed to this function should have a timeout against the entire Scan operation.
// For instance, the context passed to Scan might have a timeout of 1 minute, where each individual request might have a timeout of 3 seconds.
func (p *ParallelScanner) Scan(ctx context.Context) ([]*rpc.Transcode, error) {
	g, ctx := errgroup.WithContext(ctx)

	mu := &sync.Mutex{}
	transcodes := make([]*rpc.Transcode, 0)

	callback := func(ts []*rpc.Transcode) {
		mu.Lock()
		transcodes = append(transcodes, ts...)
		mu.Unlock()
	}

	errbucket := &errorbucket.Bucket{
		Count: staticRetryCount,
	}

	for i := uint64(0); i < p.concurrency; i++ {
		g.Go(p.scanLoop(ctx, i, callback, errbucket))
	}

	err := g.Wait()
	if err != nil {
		return nil, err
	}

	return transcodes, nil
}

func (p *ParallelScanner) scanLoop(baseCtx context.Context, segment uint64, callback func(transcodes []*rpc.Transcode), errbucket *errorbucket.Bucket) func() error {
	return func() error {
		var last string
		var count uint64

		for {
			// jitter sleep a tiny bit before each request to space requests out
			time.Sleep(jitter(maxDelayPerRequestMilliseconds))

			req := &rpc.ScanTranscodesRequest{
				Caller:        p.caller,
				Segment:       int64(segment),
				TotalSegments: int64(p.concurrency),

				LastTranscodeId: last,
			}

			ctx, cancel := context.WithTimeout(baseCtx, p.perRequestTimeout)
			resp, err := p.client.ScanTranscodes(ctx, req)
			cancel()
			if err != nil {
				// The errorbucket permits a static number of retries to account for
				// rare timeouts. This is useful because we don't want to fail
				// the whole scan if a single request out of a thousand times out.
				//
				// If allowed to retry, just make the same request again without modifying it.
				if ok := errbucket.Allow(); ok {
					continue
				}

				return errors.Wrapf(err, "scan segment=%v call_count=%v last_id=%q", segment, count, last)
			}

			callback(resp.GetTranscodes())
			last = resp.LastTranscodeId

			// There are no more transcodes to process in this segment.
			if last == "" {
				break
			}

			count++
		}

		return nil
	}
}

type TSMCustomerIDGetter interface {
	GetTranscodesByCustomerIDParallel(ctx context.Context, req *rpc.GetTranscodesByCustomerIDParallelRequest) (*rpc.GetTranscodesByCustomerIDParallelResponse, error)
}

type ParallelCustomerIDGetter struct {
	perRequestTimeout time.Duration
	client            TSMCustomerIDGetter
}

func NewParallelCustomerIDGetter(client TSMCustomerIDGetter, perRequestTimeout time.Duration) *ParallelCustomerIDGetter {
	return &ParallelCustomerIDGetter{
		client:            client,
		perRequestTimeout: perRequestTimeout,
	}
}

func (p *ParallelCustomerIDGetter) GetByCustomerID(ctx context.Context, customerID string) ([]*rpc.Transcode, error) {
	g, ctx := errgroup.WithContext(ctx)

	mu := &sync.Mutex{}
	transcodes := make([]*rpc.Transcode, 0)

	callback := func(ts []*rpc.Transcode) {
		mu.Lock()
		transcodes = append(transcodes, ts...)
		mu.Unlock()
	}

	errbucket := &errorbucket.Bucket{
		Count: staticRetryCount,
	}

	// TSM shards writes to its CustomerID GSI over 100 partitions, from the range [0, 100)
	const tsmPartitionCount = 100
	for i := uint64(0); i < tsmPartitionCount; i++ {
		g.Go(p.getByCustomerIDLoop(ctx, customerID, i, callback, errbucket))
	}

	err := g.Wait()
	if err != nil {
		return nil, err
	}

	return transcodes, nil
}

func (p *ParallelCustomerIDGetter) getByCustomerIDLoop(baseCtx context.Context, customerID string, partition uint64, callback func(transcodes []*rpc.Transcode), errbucket *errorbucket.Bucket) func() error {
	return func() error {
		var last string
		var count uint64

		for {
			// jitter sleep a tiny bit before each request to space requests out
			time.Sleep(jitter(maxDelayPerRequestMilliseconds))

			req := &rpc.GetTranscodesByCustomerIDParallelRequest{
				Partition:       int64(partition),
				LastTranscodeId: last,
				CustomerId:      customerID,
			}

			ctx, cancel := context.WithTimeout(baseCtx, p.perRequestTimeout)
			resp, err := p.client.GetTranscodesByCustomerIDParallel(ctx, req)
			cancel()
			if err != nil {
				// The errorbucket permits a static number of retries to account for
				// rare timeouts. This is useful because we don't want to fail
				// the whole operation if a single request out of a thousand times out.
				//
				// If allowed to retry, just make the same request again without modifying it.
				if ok := errbucket.Allow(); ok {
					continue
				}

				return errors.Wrapf(err, "getByCustomerID partition=%v call_count=%v last_id=%q", partition, count, last)
			}

			callback(resp.GetTranscodes())
			last = resp.LastTranscodeId

			// There are no more transcodes to process in this segment.
			if last == "" {
				break
			}

			count++
		}

		return nil
	}
}

func jitter(d time.Duration) time.Duration {
	r := rand.New(rand.NewSource(time.Now().UnixNano()))
	jitter := r.Float64() * float64(d)
	return time.Duration(jitter)
}
