package fim

import (
	"io/fs"
	"log"
	"os"
	"path/filepath"
	"runtime"
	"runtime/debug"
	"strings"
	"sync"
	"sync/atomic"
	"time"

	"github.com/OneOfOne/xxhash"

	"a.yandex-team.ru/security/osquery/extensions/osquery-fim/internal/container"
	"a.yandex-team.ru/security/osquery/extensions/osquery-fim/internal/platform"
)

const (
	// Size of job queue: do not take all the memory but do not constantly park-unpark goroutines.
	jobQueueSize = 10000

	// Recheck the delayed hash jobs each second.
	recheckDelayHashJobs = time.Second

	// See doHashJob for the explanation of retry hack.
	retryHashJobDelay = time.Second
)

type runner struct {
	matchers        []*pathMatcher
	onlyExecutables container.StringSet
	period          time.Duration
	disablePeriodic bool
	useNotify       bool
	delayHashPeriod time.Duration
	dockerClient    *platform.DockerClient
	limiter         *bandwidthLimiter

	roots []pathRootWithMatchers

	// All files are assigned to specific worker by file path hash, see getShardIdx(). Each worker gets
	// assigned one shard from hashesState.
	//
	// This path -> worker assignment is used to prevent ordering inconsistencies when two events for one file
	// arrive one after another: file modify, then file rename. We need to process them in order, otherwise
	// the rename job may be run before the modify job. For directory renames when multiple file paths may by
	// affected, so a job is issued for each of the workers.
	numWorkers int
	// Use two channels: for periodic rehashing and for processing notifications. Notifications are sharded
	// per worker to enforce ordering between updates, deletes and renames. Periodic rehashing is one queue
	// per system to prevent queue bubbles.
	notifyJobCh       []chan notifyJob
	periodicHashJobCh chan hashJob

	hashes hashesState

	notifier platform.FileNotifier

	firstRunWg sync.WaitGroup

	verbose          bool
	verboseNotify    bool
	numNotifications int64
	numRetries       int64
	numFailedRetries int64
	periodicMetrics  hasherMetrics
	notifyMetrics    hasherMetrics
	dumper           *debugDumper
}

type hashJob struct {
	path string
	// See categories in hashResult.
	categories string
	// Categories for executables. See categories in hashResult.
	executableCategories string

	container *containerInfo

	// Must be signalled upon job completion. This is not the standard task -> future -> result pattern,
	// the tasks write to the hashes map themselves.
	wg *sync.WaitGroup

	// See doHashJob for the explanation of retry hack.
	numRetries int
}

type notifyJobOp int

const (
	notifyJobHash = notifyJobOp(iota + 1)
	notifyJobDelete
	notifyJobMove
)

type notifyJob struct {
	op    notifyJobOp
	path  string
	isDir bool
	// Valid only if op == notifyJobMove.
	newPath string
}

func (r *runner) init() {
	if len(r.matchers) == 0 {
		return
	}

	r.numWorkers = runtime.NumCPU()
	r.hashes.init(r.numWorkers)

	r.roots = prepareRoots(r.matchers)
	for _, root := range r.roots {
		r.logVerbose("Got root: %#v, matchers: %v\n", root.root, root.matchers)
	}

	r.notifyJobCh = make([]chan notifyJob, r.numWorkers)
	for i := 0; i < r.numWorkers; i++ {
		// Limit the chan size so that the periodic rehashing does not interfere with rehashing due to file
		// notification.
		r.notifyJobCh[i] = make(chan notifyJob, jobQueueSize)
	}
	r.periodicHashJobCh = make(chan hashJob, jobQueueSize)

	for i := 0; i < r.numWorkers; i++ {
		go r.workerMain(i)
	}

	if r.useNotify {
		r.initNotify()
	}

	r.firstRunWg.Add(1)
}

func (r *runner) logVerbose(format string, v ...interface{}) {
	if r.verbose {
		log.Printf(format, v...)
	}
}

func (r *runner) logVerboseNotify(format string, v ...interface{}) {
	if r.verboseNotify {
		log.Printf(format, v...)
	}
}

// The main work loop happens in this function.
func (r *runner) run() error {
	if len(r.matchers) == 0 {
		log.Printf("WARNING: No matchers specified, disabling periodic rehashing\n")
		r.firstRunWg.Done()
		return nil
	}

	if r.disablePeriodic {
		log.Printf("WARNING: Debug option disablePeriodic enabled\n")
		r.firstRunWg.Done()
		return nil
	}

	firstRun := true
	lastStartTime := time.Now().Add(-r.period)
	for {
		remainingSleep := time.Until(lastStartTime.Add(r.period))
		if remainingSleep > 0 {
			r.logVerbose("Remaining sleep before next hashing %v, sleeping\n", remainingSleep)
			time.Sleep(remainingSleep)
		} else {
			r.logVerbose("No time to sleep\n")
		}
		lastStartTime = time.Now()
		r.logVerbose("Starting hashing\n")
		r.periodicRun()
		if firstRun {
			r.firstRunWg.Done()
			firstRun = false
		}
	}
}

func (r *runner) waitUntilFirstRun() {
	r.firstRunWg.Wait()
}

func (r *runner) periodicRun() {
	defer func() {
		if r := recover(); r != nil {
			log.Printf("ERROR: panic in periodicRun, recovering: %v\n%s",
				r, string(debug.Stack()))
		}
	}()

	wg := &sync.WaitGroup{}
	startTime := time.Now()

	newGen := atomic.AddInt64(&r.hashes.gcGeneration, 1)
	r.logVerbose("New GC generation: %v\n", newGen)

	// Walk all roots spawn tasks for hashing the files. NOTE: Multithreading can potentially improve
	// performance here but probably not enough compared to multithreading computing the hash.
	r.walkRoots(wg)
	r.logVerbose("Walking trees done in %v\n", time.Since(startTime))
	startDockerTime := time.Now()
	if r.dockerClient.Enabled() {
		// Docker walking requires multiple calls to the docker daemon, start after walking the regular
		// filesystem.
		r.walkDocker(wg)
		r.logVerbose("Walking docker tress done in %v\n", time.Since(startDockerTime))
	}

	r.logVerbose("Pending jobs: %d\n", len(r.periodicHashJobCh))

	// Wait for all hashing jobs to complete before running the garbage collection.
	wg.Wait()
	r.logVerbose("Periodic hashing done in %v\n", time.Since(startTime))

	r.hashes.runGC(r.verbose)

	r.logMetrics()

	dumpStartTime := time.Now()
	r.dumper.writeDump(&r.hashes)
	r.logVerbose("Dumped results to %s in %v\n", r.dumper.path, time.Since(dumpStartTime))
}

func (r *runner) initNotify() {
	var err error
	r.notifier, err = platform.NewNotifier(r.verboseNotify)
	if err != nil {
		log.Printf("ERROR: could not create file notifier, watching files disabled: %v\n", err)
		return
	}

	r.notifier.SetOnAttrib(func(path string, isDir bool) {
		// We need to track attribute changes if the file executable bit changes: if the file is already in
		// the results, then ignore the event (even if e.g. it transitioned executable -> non executable). We
		// ignore the directory attribute changes.
		if !isDir && !r.isInResults(path) {
			r.onNotify(notifyJob{op: notifyJobHash, path: path, isDir: false})
		}
	})
	r.notifier.SetOnDelete(func(path string, isDir bool) {
		// Unlike the OnMove, we should have received OnDelete for individual files in the directory, before
		// receiving the OnDelete for the directory itself, so skip processing it.
		r.onNotify(notifyJob{op: notifyJobDelete, path: path, isDir: isDir})
	})
	r.notifier.SetOnModify(func(path string, isDir bool) {
		if isDir {
			log.Printf("ERROR: received a OnModify for directory %s\n", path)
		} else {
			r.onNotify(notifyJob{op: notifyJobHash, path: path, isDir: false})
		}
	})
	r.notifier.SetOnMove(func(oldPath string, newPath string, isDir bool) {
		r.onNotify(notifyJob{op: notifyJobMove, path: oldPath, isDir: isDir, newPath: newPath})
	})

	for _, root := range r.roots {
		err = r.notifier.Add(root.root.path)
		if err != nil {
			log.Printf("ERROR: could not subscribe to notifications to %s: %v\n", root.root.path, err)
		}
	}
	if r.dockerClient.Enabled() {
		// Re-creating a bunch of subscriptions on each docker container create-destroy looks like a hassle,
		// simply subscribe to the whole overlay directory, hopefully there not too many file events (this
		// makes zero difference for fanotify, since it monitors the whole filesystem).
		err = r.notifier.Add(r.dockerClient.GetOverlayDir())
		if err != nil {
			log.Printf("ERROR: could not subscribe to notifications to docker overlay dir %s: %v\n",
				r.dockerClient.GetOverlayDir(), err)
		}
	}
}

func (r *runner) onNotify(job notifyJob) {
	r.logVerboseNotify("notify job: %v\n", job)
	if job.isDir {
		// Push the job to all the workers: if we move/delete the directory, files inside it may get assigned
		// to any worker.
		for i := 0; i < r.numWorkers; i++ {
			r.notifyJobCh[i] <- job
		}
	} else {
		workerIdx := r.getShardIdx(job.path)
		r.notifyJobCh[workerIdx] <- job
	}
	atomic.AddInt64(&r.numNotifications, 1)
}

func (r *runner) walkRoots(wg *sync.WaitGroup) {
	r.walkRootsImpl("", nil, wg)
}

func (r *runner) walkDocker(wg *sync.WaitGroup) {
	containers, err := r.dockerClient.GetContainers()
	if err != nil {
		log.Printf("ERROR: error when getting containers: %v\n", err)
		return
	}
	for _, c := range containers {
		containerInfo := convertContainerInfo(c)
		r.walkRootsImpl(c.MergedDir, containerInfo, wg)
	}
}

func convertContainerInfo(c *platform.DockerContainerInfo) *containerInfo {
	return &containerInfo{
		containerID:    c.ContainerID,
		containerNames: c.ContainerNames,
		imageID:        c.ImageID,
		imageTags:      c.ImageTags,
		// We rely on merged dir path being cleaned by docker.go. TODO: Check that this works for docker + inotify.
		containerPrefixLen: len(c.MergedDir),
	}
}

func (r *runner) walkRootsImpl(realRoot string, container *containerInfo, wg *sync.WaitGroup) {
	for _, root := range r.roots {
		rootPath := filepath.Join(realRoot, root.root.path)
		if root.root.isFile {
			r.walkSingleFile(rootPath, root.matchers, container, wg)
		} else if root.root.recursive {
			err := r.walkTree(rootPath, root.matchers, container, wg)
			if err != nil {
				r.logVerbose("WARNING: walking directory %s failed: %v\n", root.root.path, err)
			}
		} else {
			err := r.walkSingleDir(rootPath, root.matchers, container, wg)
			if err != nil {
				r.logVerbose("WARNING: walking directory %s failed: %v\n", root.root.path, err)
			}
		}
	}
}

func (r *runner) walkSingleFile(path string, matchers pathMatcherSlice, container *containerInfo,
	wg *sync.WaitGroup) {
	// Do not hash our own dump file.
	if path == r.dumper.path {
		return
	}

	job, ok := r.newHashJob(path, container, matchers)
	if !ok {
		return
	}
	job.wg = wg
	wg.Add(1)
	r.periodicHashJobCh <- job
}

func (r *runner) walkSingleDir(dirpath string, matchers pathMatcherSlice, container *containerInfo,
	wg *sync.WaitGroup) error {
	children, err := os.ReadDir(dirpath)
	if err != nil {
		return err
	}
	for _, child := range children {
		if child.Type()&os.ModeType == 0 {
			fullpath := filepath.Join(dirpath, child.Name())
			r.walkSingleFile(fullpath, matchers, container, wg)
		}
	}
	return nil
}

func (r *runner) walkTree(rootpath string, matchers pathMatcherSlice, container *containerInfo,
	wg *sync.WaitGroup) error {
	return filepath.WalkDir(rootpath, func(path string, dirent fs.DirEntry, err error) error {
		if err != nil {
			r.logVerbose("ERROR: walking directory %s failed: %v\n", path, err)
			if dirent != nil && dirent.IsDir() {
				return filepath.SkipDir
			}
			return nil
		}

		if dirent.Type()&os.ModeType == 0 || dirent.Type()&os.ModeSymlink != 0 {
			r.walkSingleFile(path, matchers, container, wg)
		} else if dirent.IsDir() {
			// We cannot do this unfortunately: e.g. /dev contains /dev/shm, which is user-writable.
			//
			// if platform.ShouldIgnorePath(path) {
			// 	return filepath.SkipDir
			// }

			// Docker overlay files are processed separately.
			if r.dockerClient.Enabled() && path == r.dockerClient.GetOverlayDir() {
				return filepath.SkipDir
			}
		}
		return nil
	})
}

func (r *runner) isPathFromDockerOverlay(path string) bool {
	return strings.HasPrefix(path, r.dockerClient.GetOverlayDir())
}

func (r *runner) logMetrics() {
	// Print various stats.
	var memStats runtime.MemStats
	runtime.ReadMemStats(&memStats)
	r.logVerbose("Workers: %d\nMem stats: Alloc %dMb, Sys %dMb, HeapSys %dMb\n", r.numWorkers,
		memStats.Alloc/(1024*1024), memStats.Sys/(1024*1024), memStats.HeapSys/(1024*1024))

	r.logVerbose("Periodic metrics:\n%s", r.periodicMetrics.log())
	r.periodicMetrics.reset()
	r.logVerbose("Notification metrics:\n%s", r.notifyMetrics.log())
	r.notifyMetrics.reset()

	numNotifications := atomic.SwapInt64(&r.numNotifications, 0)
	numRetries := atomic.SwapInt64(&r.numRetries, 0)
	numFailedRetries := atomic.SwapInt64(&r.numFailedRetries, 0)
	r.logVerbose("Got %d file notifications since last time, %d retries (%d failed)\n", numNotifications,
		numRetries, numFailedRetries)
}

func (r *runner) getShardIdx(path string) int {
	return int(uint(xxhash.ChecksumString32(path)) % uint(len(r.hashes.shards)))
}

type delayedHashJob struct {
	hashJob hashJob
	// firstTime is kept for logging only
	firstTime time.Time
	hashTime  time.Time
}

type workerState struct {
	workerIdx int
	hasher    *fileHasher

	// All hash jobs from notifier are put in a map waiting until delayHashingPeriod expires. When the new
	// modification notification arrives, either a new entry is added to pendingHashJobs or an existing entry
	// is updated. If the map is non-empty, the ticker is started to check for jobs that are ready to be run.
	pendingHashJobs map[string]*delayedHashJob
	delayTicker     platform.RestartableTicker
}

func (r *runner) workerMain(workerIdx int) {
	state := &workerState{
		workerIdx:       workerIdx,
		pendingHashJobs: map[string]*delayedHashJob{},
	}
	var err error
	state.hasher, err = newFileHasher(Sha256)
	state.delayTicker = platform.NewRestartableTicker(recheckDelayHashJobs)
	if err != nil {
		log.Fatalf("ERROR: could not create fileHasher: %v", err)
	}
	for {
		select {
		case job := <-r.periodicHashJobCh:
			r.doPeriodicHashJob(state, job)
		case job := <-r.notifyJobCh[workerIdx]:
			r.doNotifyJob(state, job)
		case <-state.delayTicker.C:
			r.processDelayedJobs(state)
		}
	}
}

func (r *runner) doPeriodicHashJob(state *workerState, job hashJob) {
	defer func() {
		if r := recover(); r != nil {
			log.Printf("ERROR: panic while processing periodic hash job, recovering: %v\n%s",
				r, string(debug.Stack()))
		}
		if job.wg != nil {
			job.wg.Done()
		}
	}()

	err := r.doHashJob(state, job, true)
	if err != nil && !os.IsNotExist(err) {
		log.Printf("ERROR: hashing %s failed: %v\n", job.path, err)
	}
}

func (r *runner) doNotifyJob(state *workerState, job notifyJob) {
	defer func() {
		if r := recover(); r != nil {
			log.Printf("ERROR: panic while processing notify job, recovering: %v\n%s",
				r, string(debug.Stack()))
		}
	}()

	switch job.op {
	case notifyJobDelete:
		r.doDeleteJob(state, job.path, job.isDir)
	case notifyJobHash:
		hashJob, ok := r.convertNotifyHashJob(job.path)
		if ok {
			if r.delayHashPeriod == 0 {
				r.doHashJobFromNotify(state, hashJob)
			} else {
				r.addToDelayedJobs(state, hashJob, r.delayHashPeriod)
			}
		}
	case notifyJobMove:
		r.doMoveJob(state, job.path, job.newPath, job.isDir)
	}
}

func (r *runner) addToDelayedJobs(state *workerState, job hashJob, delay time.Duration) {
	prevJob, ok := state.pendingHashJobs[job.path]
	now := time.Now()
	hashTime := now.Add(delay)
	if ok {
		prevJob.hashTime = hashTime
	} else {
		state.pendingHashJobs[job.path] = &delayedHashJob{hashJob: job, firstTime: now, hashTime: hashTime}
	}
	state.delayTicker.Start()
}

func (r *runner) processDelayedJobs(state *workerState) {
	// We iterate the whole map every time, but this is still fast enough, no need for more complicated
	// solutions.
	now := time.Now()
	num := 0
	for path, job := range state.pendingHashJobs {
		if job.hashTime.Before(now) {
			r.logVerboseNotify("Delay timer for hashing %s expired after %v\n", path, time.Since(job.firstTime))
			// NOTE: We must remove the job here, because we may later re-add the job in doHashJobFromNotify.
			delete(state.pendingHashJobs, path)
			r.doHashJobFromNotify(state, job.hashJob)
			num++
		}
	}
	r.logVerboseNotify("Processed %d pending hashing jobs\n", num)
	if len(state.pendingHashJobs) == 0 {
		state.delayTicker.Stop()
		r.logVerboseNotify("All pending hash jobs complete, stopping the delay timer\n")
	}
}

func (r *runner) doHashJobFromNotify(state *workerState, job hashJob) {
	err := r.doHashJob(state, job, false)
	if err != nil {
		if os.IsNotExist(err) {
			if job.numRetries == 0 {
				// HACK: If we've got a path from notification and it currently does not exist, try again in
				// the nearest future. Most likely we will either get a delete or move notification in this
				// time. Limit to 1 retry for now. Note, that this delay is not related to the delayed
				// hashing.
				job.numRetries++
				r.addToDelayedJobs(state, job, retryHashJobDelay)
				atomic.AddInt64(&r.numRetries, 1)
				r.logVerboseNotify("Retrying hashing %s in %v\n", job.path, retryHashJobDelay)
			} else {
				atomic.AddInt64(&r.numFailedRetries, 1)
				r.logVerboseNotify("Hashing %s failed after retry\n", job.path)
			}
		} else {
			log.Printf("ERROR: hashing %s failed: %v\n", job.path, err)
		}
	}
}

func (r *runner) doDeleteJob(state *workerState, path string, isDir bool) {
	// Remove all delayed hash jobs, they will not complete anyway.
	if isDir {
		for p := range state.pendingHashJobs {
			if strings.HasPrefix(p, path) {
				delete(state.pendingHashJobs, p)
				r.logVerboseNotify("Deleted %s from pending hash jobs\n", p)
			}
		}
	} else {
		delete(state.pendingHashJobs, path)
	}

	workerIdx := state.workerIdx
	r.hashes.shards[workerIdx].mu.Lock()
	defer r.hashes.shards[workerIdx].mu.Unlock()
	if isDir {
		// Remove all files below path. We could use the PathTree instead of the map to prevent iterating the
		// whole shard, but unfortunately the PathTree consumes significantly more memory.
		if r.isPathFromDockerOverlay(path) {
			for p := range r.hashes.shards[workerIdx].dockerResults {
				// NOTE: We operate on host paths here.
				if strings.HasPrefix(p, path) {
					r.logVerboseNotify("Dropped file result %s\n", p)
					delete(r.hashes.shards[workerIdx].dockerResults, p)
				}
			}
		} else {
			for p := range r.hashes.shards[workerIdx].results {
				if strings.HasPrefix(p, path) {
					r.logVerboseNotify("Dropped file result %s\n", p)
					delete(r.hashes.shards[workerIdx].results, p)
				}
			}
		}
	} else {
		r.logVerboseNotify("Dropping file info %s\n", path)
		delete(r.hashes.shards[workerIdx].results, path)
		delete(r.hashes.shards[workerIdx].dockerResults, path)
	}
}

func (r *runner) doHashJob(state *workerState, job hashJob, isPeriodic bool) error {
	if platform.ShouldIgnorePath(job.path) {
		return nil
	}

	startTime := time.Now()
	fileInfo, err := os.Stat(job.path)
	if err != nil {
		return err
	}
	if !fileInfo.Mode().IsRegular() {
		return nil
	}

	matchingCategories := job.categories
	if platform.IsExecutable(fileInfo) {
		matchingCategories = job.executableCategories
	}
	if len(matchingCategories) == 0 {
		return nil
	}

	if isPeriodic {
		// This check is done mostly to prevent from computing the hashes of files still being written to.
		// Calls stat() at most once per file: if the file is already in the check passed.
		if r.isTooYoung(job.path, fileInfo) {
			r.logVerbose("File %s is too young, skipping\n", job.path)
			return nil
		}
	}

	// realPath is empty if the file is not a symlink. The motivation is lower memory consumption: we do not
	// keep two copies of path between two periodic rehashing. Empty realPath will be replaced with file path
	// when writing the table contents.
	realPath, err := platform.GetRealPath(job.path)
	if err != nil {
		return err
	}
	if realPath != "" {
		if platform.ShouldIgnorePath(realPath) {
			return nil
		}
	} else {
		if platform.ShouldIgnorePath(job.path) {
			return nil
		}
	}

	// Only use direct I/O for periodic rehashing of big files. If the job was triggered by a notification,
	// the file contents is most likely in file cache.
	sha256Value, fileSize, err := state.hasher.hashFile(job.path, r.limiter, fileInfo.Size())
	if err != nil {
		return err
	}
	hashTime := time.Since(startTime)

	result := hashResult{
		realPath:     realPath,
		categories:   matchingCategories,
		sha256Value:  byteSliceToString(sha256Value),
		gcGeneration: atomic.LoadInt64(&r.hashes.gcGeneration),
	}
	// shardIdx may be different from state.workerIdx, for periodic jobs or after renaming the file.
	shardIdx := r.getShardIdx(job.path)
	r.hashes.shards[shardIdx].mu.Lock()
	if job.container != nil {
		r.hashes.shards[shardIdx].dockerResults[job.path] = &dockerHashResult{
			hashResult: result,
			container:  *job.container,
		}
	} else {
		r.hashes.shards[shardIdx].results[job.path] = &result
	}
	r.hashes.shards[shardIdx].mu.Unlock()

	if isPeriodic {
		r.periodicMetrics.add(fileSize, hashTime)
	} else {
		r.notifyMetrics.add(fileSize, hashTime)
	}
	return nil
}

// Notifier returns only file path, determine everything else from the path.
func (r *runner) convertNotifyHashJob(path string) (hashJob, bool) {
	dockerContainer, _, err := r.dockerClient.GetContainerForPath(path)
	var containerInfo *containerInfo
	if err != nil {
		log.Printf("WARNING: Failed to get container info for %s: %v", path, err)
	} else if dockerContainer != nil {
		containerInfo = convertContainerInfo(dockerContainer)
	}

	return r.newHashJob(path, containerInfo, r.matchers)
}

// Checks the path against the list of matchers and returns a new hashJob if at least one matches.
func (r *runner) newHashJob(path string, container *containerInfo, matchers pathMatcherSlice) (hashJob, bool) {
	matchedPath := path
	if container != nil {
		matchedPath = path[container.containerPrefixLen:]
	}
	var categories []string
	var executableCategories []string
	for _, matcher := range matchers {
		if matcher.pathMatches(matchedPath) {
			executableCategories = append(executableCategories, matcher.category)
			if !r.onlyExecutables.Contains(matcher.category) {
				categories = append(categories, matcher.category)
			}
		}
	}
	if len(executableCategories) == 0 {
		return hashJob{}, false
	}

	return hashJob{
		path:                 path,
		categories:           categoriesFromSlice(categories),
		executableCategories: categoriesFromSlice(executableCategories),
		container:            container,
	}, true
}

func (r *runner) isInResults(path string) bool {
	shardIdx := r.getShardIdx(path)
	r.hashes.shards[shardIdx].mu.Lock()
	defer r.hashes.shards[shardIdx].mu.Unlock()
	_, ok := r.hashes.shards[shardIdx].results[path]
	if ok {
		return true
	}
	_, ok = r.hashes.shards[shardIdx].dockerResults[path]
	return ok
}

func (r *runner) isTooYoung(path string, fileInfo os.FileInfo) bool {
	if r.isInResults(path) {
		return false
	}

	ctime, err := platform.GetFileCreationTime(path, fileInfo)
	if err != nil {
		// Silently ignore the error if file has been deleted.
		if !os.IsNotExist(err) {
			log.Printf("ERROR: could not stat %s: %v\n", path, err)
		}
		return false
	}
	return time.Now().Before(ctime.Add(r.delayHashPeriod))
}

func (r *runner) doMoveJob(state *workerState, fromPath string, toPath string, isDir bool) {
	// Update all pending jobs, otherwise they will fail with file "does not exist". Note, that working with
	// files is inherently racy: there is a possibility that we started hashing the file after the move but
	// before processing the move notification. We use retries for "file not found" errors for jobs started by
	// notifier.
	//
	// NOTE: This way we may get paths with different shard index in pendingHashJobs. This is still processed
	// correctly by doHashJob.
	if isDir {
		for p, job := range state.pendingHashJobs {
			if strings.HasPrefix(p, fromPath) {
				delete(state.pendingHashJobs, p)
				newPath := toPath + p[len(fromPath):]
				job.hashJob.path = newPath
				state.pendingHashJobs[newPath] = job
				r.logVerboseNotify("Updated pending hash job: %s -> %s\n", p, newPath)
			}
		}
	} else {
		job, ok := state.pendingHashJobs[fromPath]
		if ok {
			delete(state.pendingHashJobs, fromPath)
			job.hashJob.path = toPath
			state.pendingHashJobs[toPath] = job
			r.logVerboseNotify("Updated pending hash job: %s -> %s\n", fromPath, toPath)
		}
	}

	if r.isPathFromDockerOverlay(fromPath) {
		r.moveDockerResults(state, fromPath, toPath, isDir)
	} else {
		r.moveResults(state, fromPath, toPath, isDir)
	}
}

func (r *runner) moveResults(state *workerState, fromPath string, toPath string, isDir bool) {
	fromResults := map[string]*hashResult{}
	workerIdx := state.workerIdx
	r.hashes.shards[workerIdx].mu.Lock()
	if isDir {
		for p, result := range r.hashes.shards[workerIdx].results {
			if strings.HasPrefix(p, fromPath) {
				fromResults[p] = result
				delete(r.hashes.shards[workerIdx].results, p)
			}
		}
	} else {
		result, ok := r.hashes.shards[workerIdx].results[fromPath]
		if ok {
			fromResults[fromPath] = result
			delete(r.hashes.shards[workerIdx].results, fromPath)
		}
	}
	r.hashes.shards[workerIdx].mu.Unlock()

	// Group results by shard to reduce Lock/Unlock calls.
	var toResults []map[string]*hashResult
	for i := 0; i < len(r.hashes.shards); i++ {
		toResults = append(toResults, make(map[string]*hashResult, len(fromResults)))
	}
	for path, result := range fromResults {
		newPath := toPath + path[len(fromPath):]
		shardIdx := r.getShardIdx(newPath)
		toResults[shardIdx][newPath] = result
		r.logVerboseNotify("Moved file result %s -> %s\n", path, newPath)
	}

	for shardIdx, shardResults := range toResults {
		r.hashes.shards[shardIdx].mu.Lock()
		for path, result := range shardResults {
			r.hashes.shards[shardIdx].results[path] = result
		}
		r.hashes.shards[shardIdx].mu.Unlock()
	}
}

func (r *runner) moveDockerResults(state *workerState, fromPath string, toPath string, isDir bool) {
	fromResults := map[string]*dockerHashResult{}
	workerIdx := state.workerIdx
	r.hashes.shards[workerIdx].mu.Lock()
	if isDir {
		for p, result := range r.hashes.shards[workerIdx].dockerResults {
			if strings.HasPrefix(p, fromPath) {
				fromResults[p] = result
				delete(r.hashes.shards[workerIdx].dockerResults, p)
			}
		}
	} else {
		result, ok := r.hashes.shards[workerIdx].dockerResults[fromPath]
		if ok {
			fromResults[fromPath] = result
			delete(r.hashes.shards[workerIdx].dockerResults, fromPath)
		}
	}
	r.hashes.shards[workerIdx].mu.Unlock()

	// Group results by shard to reduce Lock/Unlock calls.
	var toResults []map[string]*dockerHashResult
	for i := 0; i < len(r.hashes.shards); i++ {
		toResults = append(toResults, make(map[string]*dockerHashResult, len(fromResults)))
	}
	for path, result := range fromResults {
		newPath := toPath + path[len(fromPath):]
		shardIdx := r.getShardIdx(newPath)
		toResults[shardIdx][newPath] = result
		r.logVerboseNotify("Moved file result %s -> %s\n", path, newPath)
	}

	for shardIdx, shardResults := range toResults {
		r.hashes.shards[shardIdx].mu.Lock()
		for path, result := range shardResults {
			r.hashes.shards[shardIdx].dockerResults[path] = result
		}
		r.hashes.shards[shardIdx].mu.Unlock()
	}
}
