package main

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"log"
	"net/url"
	"os"
	"runtime"
	"runtime/debug"
	"strings"

	"code.justin.tv/common/chitin"
	_ "code.justin.tv/common/golibs/bininfo"
	"code.justin.tv/release/trace/persistent"
	"github.com/aws/aws-lambda-go/events"
	"github.com/aws/aws-lambda-go/lambda"
	"github.com/aws/aws-lambda-go/lambdacontext"
	"github.com/aws/aws-sdk-go/aws"
	"github.com/aws/aws-sdk-go/aws/session"
	"github.com/aws/aws-sdk-go/service/s3"
	"github.com/aws/aws-sdk-go/service/s3/s3iface"
	"github.com/aws/aws-sdk-go/service/s3/s3manager"
	"github.com/golang/protobuf/proto"
	"github.com/syndtr/goleveldb/leveldb"
	"github.com/syndtr/goleveldb/leveldb/opt"
	"github.com/syndtr/goleveldb/leveldb/storage"
	"github.com/syndtr/goleveldb/leveldb/util"
)

func main() {
	// Lambda shows us two CPUs, but limits the amount of time we get to run
	// code on them. For functions with small memory/CPU quotas, we probably
	// can't run code on both cores at the same time. That's a problem for the
	// garbage collector, which needs to synchronize across all cores.
	//
	// This particular Lambda function runs with a small enough memory size
	// that Lambda only gives it one core anyway. Let the Go runtime know.
	if os.Getenv("GOMAXPROCS") == "" {
		runtime.GOMAXPROCS(1)
	}

	if os.Getenv("GOGC") == "" {
		debug.SetGCPercent(10)
	}

	// TODO: figure out how to get data from Lambda into Trace
	err := chitin.ExperimentalTraceProcessOptIn()
	if err != nil {
		log.Fatalf("chitin setup: %v", err)
	}

	lambda.Start(handleS3Event)
}

func handleS3Event(ctx context.Context, ev events.S3Event) (json.RawMessage, error) {
	defer resetHeap()

	config := aws.NewConfig()
	config = config.WithRegion(os.Getenv("AWS_DEFAULT_REGION"))
	sess, err := session.NewSession(config)
	if err != nil {
		return nil, err
	}
	s3c := s3.New(sess)

	for _, rec := range ev.Records {
		err := handleFileCreation(ctx, s3c, rec)
		if err != nil {
			return nil, err
		}
	}

	return nil, nil
}

func handleFileCreation(ctx context.Context, s3c s3iface.S3API, rec events.S3EventRecord) error {
	log.Printf("bucket=%q key=%q", rec.S3.Bucket.Name, rec.S3.Object.Key)

	obj, err := s3c.GetObjectWithContext(ctx, &s3.GetObjectInput{
		Bucket: aws.String(rec.S3.Bucket.Name),
		Key:    aws.String(rec.S3.Object.Key),
	})
	if err != nil {
		return err
	}

	l := aws.Int64Value(obj.ContentLength)
	s3File := bytes.NewBuffer(make([]byte, 0, int(l)))
	_, err = io.CopyN(s3File, obj.Body, l)
	if err != nil {
		return err
	}
	log.Printf("read len=%d", l)

	idb, err := newIndexDB()
	if err != nil {
		return err
	}
	defer idb.Close()

	// Firehose output files are the concatenation of Firehose records. Each
	// record here contains a zip-formatted leveldb database, prefixed with
	// the protobuf-format length of the file.
	pb := proto.NewBuffer(s3File.Bytes())
	for {
		idx, err := pb.DecodeRawBytes(false)
		if err == io.ErrUnexpectedEOF {
			break
		} else if err != nil {
			return err
		}

		err = idb.AddZip(idx)
		if err != nil {
			return err
		}
	}

	err = idb.ldb.CompactRange(util.Range{Start: nil, Limit: nil})
	if err != nil {
		return err
	}
	log.Printf("DB write complete")

	// Close the LevelDB database, but don't close the underlying storage; we
	// need to zip that and save it in S3.
	err = idb.ldb.Close()
	if err != nil {
		log.Printf("ldb.Close err=%q", err)
	}

	var reqID string
	if lc, ok := lambdacontext.FromContext(ctx); ok {
		reqID = lc.AwsRequestID
	}
	s3Key := fmt.Sprintf("index/%s_%s",
		strings.TrimPrefix(rec.S3.Object.Key, "index-fragment/"),
		url.PathEscape(reqID))

	err = saveZip(ctx, s3c, idb.stor, s3Key)
	if err != nil {
		return err
	}
	return nil
}

func saveZip(ctx context.Context, s3c s3iface.S3API, stor storage.Storage, key string) error {
	zipper := persistent.NewZipWriter(stor)

	pr, pw := io.Pipe()
	go func() {
		defer pw.Close()
		_, err := zipper.WriteTo(pw)
		if err != nil {
			pw.CloseWithError(err)
		}
	}()

	defer pr.Close()
	s3m := s3manager.NewUploaderWithClient(s3c)
	_, err := s3m.UploadWithContext(ctx, &s3manager.UploadInput{
		Bucket: aws.String("twitch-bs-video-trace-archive"),
		Key:    aws.String(key),
		Body:   pr,
	})
	if err != nil {
		return err
	}

	return nil
}

type indexDB struct {
	stor storage.Storage
	ldb  *leveldb.DB
}

func newIndexDB() (*indexDB, error) {
	var err error
	var idx indexDB

	idx.stor = storage.NewMemStorage()
	idx.ldb, err = leveldb.Open(idx.stor, nil)
	if err != nil {
		return nil, err
	}

	return &idx, nil
}

func (idx *indexDB) Close() error {
	dberr := idx.ldb.Close()
	serr := idx.stor.Close()
	if dberr != nil {
		return dberr
	}
	if serr != nil {
		return serr
	}
	return nil
}

func (idx *indexDB) AddZip(data []byte) error {
	zs, err := persistent.NewZipLDBStorage(bytes.NewReader(data), int64(len(data)))
	if err != nil {
		return err
	}
	defer zs.Close()

	zdb, err := leveldb.Open(zs, &opt.Options{ReadOnly: true})
	if err != nil {
		return err
	}
	defer zdb.Close()

	it := zdb.NewIterator(&util.Range{Limit: nil, Start: nil}, nil)
	defer it.Release()

	batch := new(leveldb.Batch)
	for it.Next() {
		batch.Put(it.Key(), it.Value())
	}

	err = idx.ldb.Write(batch, nil)
	if err != nil {
		return err
	}

	return nil
}

// resetHeap forces a garbage collection, returns unused memory to the
// operating system, and logs the current memory allocation stats.
//
// Even when the heap is garbage-collected after every request, this program's
// memory usage increases over time. That doesn't work well for Lambda's model
// of limiting and billing on memory usage. It looks like the memory usage (as
// measured and logged by Lambda) remains inflated even after waiting for 5
// minutes of wall clock time or 5 minutes of unfrozen lambda process
// execution time. The scavenger is supposed to return memory to the OS after
// five minutes of the runtime not using the memory. The memory shows up as
// HeapIdle in runtime.MemStats.
//
// I suspect the memory is large spans from leveldb's in-memory storage, which
// is backed by bytes.Buffers. When those grow to larger than 32kB, they're
// backed by individual large allocations. When large allocations are freed,
// they end up in a treap that is consulted for new large allocations. If a
// match is found, any remaining portion is put back into the treap. The
// growth behavior of bytes.Buffer is to double the current capacity and add
// the size of the current Write call, which could result in many different
// sizes of large allocations. Splitting large allocations could cause many of
// them to appear to the scavenger as recently used. I don't have data to
// support this.
//
// TODO: decide how to add autoprof to Lambda functions: Should it sample
// randomly as usual? Should it occasionally collect a bundle at the start and
// end of an invocation? Should it occasionally collect a CPU profile for a
// complete invocation?
func resetHeap() {
	debug.FreeOSMemory()
	logMemStats()
}

// logMemStats logs the program's current memory allocation statistics.
func logMemStats() {
	var m runtime.MemStats
	runtime.ReadMemStats(&m)
	mbuf, _ := json.Marshal(&m)
	log.Printf("memstats %s", mbuf)
}
