package fim

import (
	"log"
	"reflect"
	"strings"
	"sync"
	"sync/atomic"
	"time"
	"unsafe"
)

const (
	sha256Len = 32
)

type hashResult struct {
	// Used for symlinks.
	realPath string
	// Categories are stored separated by zero byte ("\000").
	categories string
	// String is smaller than the byte slice (no cap).
	sha256Value string

	// See hashGeneration in runner.
	gcGeneration int64
}

type dockerHashResult struct {
	hashResult
	container containerInfo
}

// Unlike the standard conversions, does not copy the contents on conversion, but requires extra care to not
// modify the string contents through the slice. Taken from https://github.com/golang/go/issues/25484
func byteSliceToString(b []byte) string {
	var ret string
	hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b))
	retHdr := (*reflect.StringHeader)(unsafe.Pointer(&ret))
	retHdr.Data = hdr.Data
	retHdr.Len = hdr.Len
	return ret
}

func stringToByteSlice(s string) []byte {
	var ret []byte
	hdr := (*reflect.StringHeader)(unsafe.Pointer(&s))
	retHdr := (*reflect.SliceHeader)(unsafe.Pointer(&ret))
	retHdr.Data = hdr.Data
	retHdr.Len = hdr.Len
	retHdr.Cap = hdr.Len
	return ret
}

func categoriesFromSlice(s []string) string {
	// The fast path is the categories consisting of one element and the join being a no-op.
	return strings.Join(s, "\000")
}

func categoriesToSlice(s string) []string {
	return strings.Split(s, "\000")
}

type containerInfo struct {
	containerID        string
	containerNames     string
	imageID            string
	imageTags          string
	containerPrefixLen int
}

type hashesShard struct {
	mu *sync.Mutex
	// The key is the file path. Using *hashResult instead of hashResult (surprisingly?) reduces the overall
	// memory usage.
	results map[string]*hashResult
	// The key is the "real" path, i.e. the path in the host FS.
	dockerResults map[string]*dockerHashResult
}

type hashesState struct {
	// Sharded hash map path -> hashing result. Note that the key is the "real" path (i.e.
	// /var/lib/docker/overlay2/... for files in the container). The reader should substract containerPathLen
	// prefix from the path to get path inside the container). Shard key is the worker index (i.e. path hash).
	//
	// We could use sync.Map, but we split all files between workers by file path hash anyway (see runner)
	//
	// Another alternative would be double buffering: run a separate goroutine creating a new map from hashed
	// files and atomically replacing the map with new map), but it requires 2x memory.
	shards []hashesShard

	// The periodic rehashing performs a GC-style cleanup: the generation counter is incremented before
	// starting the hashing jobs and is stored in each new hashResult. After finishing all the rehashing jobs,
	// the results with older counter are removed from hashes.
	gcGeneration int64
}

func (s *hashesState) init(numShards int) {
	s.shards = make([]hashesShard, numShards)
	for i := 0; i < numShards; i++ {
		s.shards[i].mu = &sync.Mutex{}
		s.shards[i].results = map[string]*hashResult{}
		s.shards[i].dockerResults = map[string]*dockerHashResult{}
	}
}

func (s *hashesState) runGC(verbose bool) {
	startTime := time.Now()

	curGen := atomic.LoadInt64(&s.gcGeneration)
	total := int64(0)
	purged := int64(0)
	// While we are doing the GC, compute the memory usage.
	totalMem := int64(0)
	mapEntrySize := getHashesEntrySize()

	for _, shard := range s.shards {
		// Use anonymous functions for defer scoping.
		func() {
			shard.mu.Lock()
			defer shard.mu.Unlock()
			for path, result := range shard.results {
				if result.gcGeneration == curGen {
					total++
					totalMem += mapEntrySize + int64(len(path)) + getResultMemorySize(result)
				} else {
					delete(shard.results, path)
					purged++
				}
			}
			for path, result := range shard.dockerResults {
				if result.gcGeneration == curGen {
					total++
					totalMem += mapEntrySize + int64(len(path)) + getDockerResultMemorySize(result)
				} else {
					delete(shard.dockerResults, path)
					purged++
				}
			}
		}()
	}

	if verbose {
		log.Printf("GC for generation %d done in %v\nTotal hashes: %d, purged %d, approx memory usage %dMb\n",
			curGen, time.Since(startTime), total, purged, totalMem/(1024*1024))
	}
}

func getHashesEntrySize() int64 {
	var key string
	var value hashResult
	return int64(unsafe.Sizeof(key)) + int64(unsafe.Sizeof(value))
}

func getResultMemorySize(result *hashResult) int64 {
	return int64(sha256Len)
}

func getDockerResultMemorySize(result *dockerHashResult) int64 {
	ret := sha256Len + len(result.container.containerID) + len(result.container.containerNames) +
		len(result.container.imageID) + len(result.container.imageTags)
	return int64(ret)
}
