//go:build linux
// +build linux

package platform

import (
	"fmt"
	"io/fs"
	"log"
	"os"
	"path/filepath"
	"runtime/debug"
	"strings"
	"sync"
	"time"
	"unsafe"

	"golang.org/x/sys/unix"

	"a.yandex-team.ru/security/osquery/extensions/osquery-fim/internal/container"
)

// inotify-based file watcher. Recursively adds watches newly created subdirectories. Main limitations:
//  1) can easily hit the max_user_watches limit (~12k by default);
//  2) the handling of renames is very complicated and prone to races;
//  3) does not support deleting or renaming the top-level watched directories.

const (
	delayAfterMoveFrom = time.Second
)

type inotifier struct {
	verbose bool

	fd int

	onAttrib FileNotifyFn
	onDelete FileNotifyFn
	onModify FileNotifyFn
	onMove   FileMoveNotifyFn

	// Maps watch descriptor to directory path and back.
	wdToPathMu *sync.Mutex
	wdToPath   map[int]string
	pathToWd   container.PathTrie

	// See runAddRemoveJobs()
	addRemovePathJobCh chan addRemovePathJob

	// See runMoveMatcher()
	movesCh chan pathMove
}

type addRemovePathJob struct {
	path     string
	doRemove bool

	// Used only if doRemove is false.
	notifyForFiles bool

	errCh chan<- error
}

type pathMove struct {
	isFrom bool
	path   string
	isDir  bool
	cookie uint32
}

func (n *inotifier) Add(path string) error {
	path = filepath.Clean(path)
	errCh := make(chan error)
	n.addRemovePathJobCh <- addRemovePathJob{
		path:           path,
		doRemove:       false,
		notifyForFiles: false,
		errCh:          errCh,
	}
	return <-errCh
}

func (n *inotifier) SetOnModify(callback FileNotifyFn) {
	n.onModify = callback
}

func (n *inotifier) SetOnDelete(callback FileNotifyFn) {
	n.onDelete = callback
}

func (n *inotifier) SetOnAttrib(callback FileNotifyFn) {
	n.onAttrib = callback
}

func (n *inotifier) SetOnMove(callback FileMoveNotifyFn) {
	n.onMove = callback
}

func (n *inotifier) runAddRemovePathJobs() {
	for job := range n.addRemovePathJobCh {
		if n.verbose {
			log.Printf("Running add-remove job %+v\n", job)
		}
		if job.doRemove {
			n.runRemovePathJob(job)
		} else {
			n.runAddPathJob(job)
		}
	}
}

func (n *inotifier) runRemovePathJob(job addRemovePathJob) {
	defer func() {
		if r := recover(); r != nil {
			log.Printf("ERROR: panic while running removing path %s, recovering: %v\n%s", job.path, r,
				string(debug.Stack()))
			if job.errCh != nil {
				job.errCh <- fmt.Errorf("panic while adding path: %v", r)
			}
		}
	}()

	// Remove all the watches below the path.
	n.wdToPathMu.Lock()
	var paths []string
	var wds []int
	n.pathToWd.WalkFrom(job.path, func(path string, value interface{}) {
		paths = append(paths, path)
		wds = append(wds, value.(int))
	})
	for i, path := range paths {
		wd := wds[i]
		delete(n.wdToPath, wd)
		n.pathToWd.Remove(path)
		if n.verbose {
			log.Printf("Removed mapping wd %d to path %s\n", wd, path)
		}
	}
	n.wdToPathMu.Unlock()

	for _, wd := range wds {
		_, err := unix.InotifyRmWatch(n.fd, uint32(wd))
		if err != nil {
			log.Printf("ERROR: error while removing inotify wd %d: %v\n", wd, err)
		}
		if n.verbose {
			log.Printf("Removed wd %d\n", wd)
		}
	}

	if job.errCh != nil {
		job.errCh <- nil
	}
}

func (n *inotifier) runAddPathJob(job addRemovePathJob) {
	defer func() {
		if r := recover(); r != nil {
			log.Printf("ERROR: panic while running adding path %s, recovering: %v\n%s", job.path, r,
				string(debug.Stack()))
			if job.errCh != nil {
				job.errCh <- fmt.Errorf("panic while adding path: %v", r)
			}
		}
	}()

	err := filepath.WalkDir(job.path, func(path string, dirent fs.DirEntry, err error) error {
		if err != nil {
			log.Printf("ERROR: walking directory %s failed: %v\n", path, err)
			if dirent != nil && dirent.IsDir() {
				return filepath.SkipDir
			}
			return nil
		}

		if dirent.IsDir() {
			if ShouldIgnorePath(path) {
				return filepath.SkipDir
			}

			// No need to process IN_MOVE_SELF: it is done by processing IN_MOVED_TO+IN_MOVED_FROM for the
			// parent directory.
			const flags = uint32(unix.IN_ATTRIB | unix.IN_CLOSE_WRITE | unix.IN_CREATE | unix.IN_DELETE |
				unix.IN_DELETE_SELF | unix.IN_MOVED_TO | unix.IN_MOVED_FROM)
			wd, err := unix.InotifyAddWatch(n.fd, path, flags)
			if err != nil {
				return err
			}
			if n.verbose {
				log.Printf("Added watch %d for path %s\n", wd, path)
			}

			n.wdToPathMu.Lock()
			n.wdToPath[wd] = path
			n.pathToWd.Insert(path, wd)
			n.wdToPathMu.Unlock()
		} else if job.notifyForFiles && (dirent.Type()&os.ModeType == 0 || dirent.Type()&os.ModeSymlink != 0) {
			// If files are created too fast in new directory, they may happen between the time the new
			// directory has been created and the time we set up the notifications. Call onModify for all
			// the files which we discovered during walking. We process the parents before processing the
			// children, so no events should be missed.
			if n.onModify != nil {
				n.onModify(path, false)
			}
		}
		return nil
	})
	if job.errCh != nil {
		job.errCh <- err
	}
}

type moveFrom struct {
	path  string
	isDir bool
	ts    time.Time
}

func (n *inotifier) runMoveMatcher() {
	// Inotify sends two complementary events on each move. Unfortunately, we may miss one of the events
	// because the subtree the file or directory was moved from or moved to is not being watched by us. Use
	// simple heuristics: if the paired event does not arrive in the nearest 1s, consider it to be a "one-way"
	// move.
	//
	// RANT: This all is extremely complicated and inefficient and could be massively simplified by switching
	// to fanotify on newer kernels if only it could support different mount namespaces.

	// Maps from cookie to the MOVE_FROM event.
	cookieToMoveFrom := map[uint32]moveFrom{}
	ticker := NewRestartableTicker(delayAfterMoveFrom)
	for {
		select {
		case move := <-n.movesCh:
			n.runMoveMatcherIter(move, cookieToMoveFrom)
		case <-ticker.C:
			n.processUnmatchedMoves(cookieToMoveFrom)
		}

		if len(cookieToMoveFrom) == 0 {
			// Stop the ticker if there are no elements to match.
			if ticker.Stop() {
				if n.verbose {
					log.Printf("Stopping the delayed move ticker\n")
				}
			}
		} else {
			// Restart the ticker.
			if ticker.Start() {
				if n.verbose {
					log.Printf("Restarting the delayed move ticker\n")
				}
			}
		}
	}
}

func (n *inotifier) runMoveMatcherIter(move pathMove, cookieToMoveFrom map[uint32]moveFrom) {
	defer func() {
		if r := recover(); r != nil {
			log.Printf("ERROR: panic while running move matcher, recovering: %v\n%s", r,
				string(debug.Stack()))
		}
	}()

	if move.isFrom {
		cookieToMoveFrom[move.cookie] = moveFrom{path: move.path, isDir: move.isDir, ts: time.Now()}
	} else {
		if from, ok := cookieToMoveFrom[move.cookie]; ok {
			// We've got a paired MOVE_FROM.
			delete(cookieToMoveFrom, move.cookie)
			n.processMovePair(from.path, move.path, move.isDir)
		} else {
			// The system does not reorder the MOVE_FROM/MOVE_TO, so that if we do not have a corresponding
			// MOVE_FROM, this must be a move from outside. Start adding the files.
			//
			// Do not wait until the job completes. This may introduce inconsistencies in watch descriptors,
			// but reduces the probability of overflowing the queue.
			if move.isDir {
				n.addRemovePathJobCh <- addRemovePathJob{
					path:           move.path,
					doRemove:       false,
					notifyForFiles: true,
				}
			} else {
				if n.onModify != nil {
					n.onModify(move.path, false)
				}
			}
		}
	}
}

func (n *inotifier) processMovePair(fromPath string, toPath string, isDir bool) {
	// The order is important: change the path mappings first and only then fire onMove. This is done so that
	// if the modification of file in the moved directory arrives soon after the move, we either issue
	// modified event under the new name, or issue modified event under the older name and then issue the
	// rename event.
	if n.verbose {
		log.Printf("Got pair move: from %s to %s\n", fromPath, toPath)
	}

	// Change all wd <-> path mappings for paths starting with fromPath. It's really unfortunate that we have
	// to take the lock for extended periods of time while walking the trie.
	n.wdToPathMu.Lock()
	var oldPaths []string
	var wds []int
	n.pathToWd.WalkFrom(fromPath, func(path string, value interface{}) {
		oldPaths = append(oldPaths, path)
		wds = append(wds, value.(int))
	})
	for i, oldPath := range oldPaths {
		wd := wds[i]
		newPath := toPath + oldPath[len(fromPath):]
		n.wdToPath[wd] = newPath
		n.pathToWd.Remove(oldPath)
		n.pathToWd.Insert(newPath, wd)
		if n.verbose {
			log.Printf("Changed wd %d path from %s to %s\n", wd, oldPath, newPath)
		}
	}
	n.wdToPathMu.Unlock()

	if isDir {
		// This is a hack to fix the cases such as creation of a large tree of files (e.g. unpacking an
		// archive) immediately followed by the rename. inotify guarantees that it returns the same watch
		// descriptor for one path added multiple times, so this is safe to do.
		n.addRemovePathJobCh <- addRemovePathJob{
			path:           toPath,
			doRemove:       false,
			notifyForFiles: true,
		}
	}

	if n.onMove != nil {
		n.onMove(fromPath, toPath, isDir)
	}

}

func (n *inotifier) processUnmatchedMoves(cookieToMoveFrom map[uint32]moveFrom) {
	defer func() {
		if r := recover(); r != nil {
			log.Printf("ERROR: panic while processing unmatched moves, recovering: %v\n%s", r,
				string(debug.Stack()))
		}
	}()

	threshold := time.Now().Add(delayAfterMoveFrom)
	for cookie, move := range cookieToMoveFrom {
		if move.ts.Before(threshold) {
			if n.verbose {
				log.Printf("Unmatched move for path %s, removing\n", move.path)
			}

			// Do not wait until the job completes.
			n.addRemovePathJobCh <- addRemovePathJob{
				path:     move.path,
				doRemove: true,
			}

			if n.onDelete != nil {
				n.onDelete(move.path, move.isDir)
			}
			delete(cookieToMoveFrom, cookie)
		}
	}
}

func (n *inotifier) runEventReader() {
	buf := newByteBuffer(64 * 1024)
	for {
		// Separate function to allow recover()ing.
		n.runEventReaderIter(buf)
	}
}

func (n *inotifier) runEventReaderIter(buf *byteBuffer) {
	defer func() {
		if r := recover(); r != nil {
			log.Printf("ERROR: panic while reading events, recovering: %v\n%s", r, string(debug.Stack()))
		}
	}()

	sizeofEvent := int(unix.SizeofInotifyEvent)

	var err error
	if buf.remaining() == 0 {
		buf.reset()
		buf.length, err = unix.Read(n.fd, buf.buf)
		if err != nil {
			log.Printf("ERROR: error while reading from inotify: %v\n", err)
			return
		}
	}
	if buf.remaining() < unix.SizeofInotifyEvent {
		log.Printf("ERROR: remaining bytes in inotify buffer: %d, required: %d\n",
			buf.remaining(), sizeofEvent)
		buf.reset()
		return
	}

	event := (*unix.InotifyEvent)(unsafe.Pointer(&buf.buf[buf.offset]))
	buf.offset += sizeofEvent
	rawName := string(buf.readSlice(int(event.Len)))

	n.processEvent(event, rawName)
}

func (n *inotifier) processEvent(event *unix.InotifyEvent, rawName string) {
	if event.Mask&unix.IN_Q_OVERFLOW != 0 {
		log.Printf("WARNING: overflow in inotify\n")
		return
	}
	if event.Mask&unix.IN_IGNORED != 0 {
		return
	}

	name := strings.TrimRight(rawName, "\000")

	n.wdToPathMu.Lock()
	wdPath, ok := n.wdToPath[int(event.Wd)]
	n.wdToPathMu.Unlock()
	if !ok {
		if n.verbose {
			// This can happen if we added a watch descriptor in runJobs() but did not add it to the wdToPath
			log.Printf("WARNING: unknown inotify wd %d (is it a newly created directory?), filename %s\n",
				event.Wd, name)
		}
		return
	}

	fullPath := filepath.Join(wdPath, name)
	isDir := event.Mask&unix.IN_ISDIR != 0
	if n.verbose {
		log.Printf("Got event for wd %d, path %s, isDir %v, mask %d, cookie %d\n", event.Wd, fullPath,
			isDir, event.Mask&^unix.IN_ISDIR, event.Cookie)
	}

	if event.Mask&unix.IN_ATTRIB != 0 {
		if !isDir {
			if n.onAttrib != nil {
				n.onAttrib(fullPath, isDir)
			}
		}
	}
	if event.Mask&unix.IN_CLOSE_WRITE != 0 {
		if n.onModify != nil {
			n.onModify(fullPath, false)
		}
	}
	if event.Mask&unix.IN_CREATE != 0 {
		if isDir {
			// Do not wait until the job completes. This may introduce inconsistencies in watch descriptors,
			// but reduces the probability of overflowing the queue.
			n.addRemovePathJobCh <- addRemovePathJob{
				path:           fullPath,
				doRemove:       false,
				notifyForFiles: true,
			}
		} else {
			// If the file is a regular file, wait until it has been closed after writing. However, if the
			// fils is a symlink, no further events will come.
			st, err := os.Lstat(fullPath)
			if err != nil {
				if n.verbose {
					// The file could've been deleted immediately after creation.
					log.Printf("Got error when lstat()ting newly created file in inotify: %s: %v\n",
						fullPath, err)
				}
				return
			}
			if st.Mode()&os.ModeSymlink != 0 {
				if n.onModify != nil {
					n.onModify(fullPath, false)
				}
			}
		}
	}
	if event.Mask&unix.IN_DELETE != 0 {
		// Directory would be handled by IN_DELETE_SELF.
		if !isDir {
			// NOTE: May send events regarding the device/pipe/socket, should filter in the callback.
			if n.onDelete != nil {
				n.onDelete(fullPath, isDir)
			}
		}
	}
	if event.Mask&unix.IN_DELETE_SELF != 0 {
		// Remove the entry from watch descriptors map, the watch itself will be removed by the inotify
		// itself.
		n.wdToPathMu.Lock()
		// Re-check that we've still got the path and remove it.
		if path, ok := n.wdToPath[int(event.Wd)]; ok {
			delete(n.wdToPath, int(event.Wd))
			n.pathToWd.Remove(path)
			if n.verbose {
				log.Printf("Removing wd %d, path %s from map\n", event.Wd, path)
			}
		}
		n.wdToPathMu.Unlock()
	}
	if event.Mask&unix.IN_MOVED_TO != 0 {
		n.movesCh <- pathMove{
			isFrom: false,
			path:   fullPath,
			isDir:  isDir,
			cookie: event.Cookie,
		}
	}
	if event.Mask&unix.IN_MOVED_FROM != 0 {
		n.movesCh <- pathMove{
			isFrom: true,
			path:   fullPath,
			isDir:  isDir,
			cookie: event.Cookie,
		}
	}
}

func NewINotifier(verbose bool) (FileNotifier, error) {
	fd, err := unix.InotifyInit()
	if err != nil {
		return nil, err
	}

	ret := &inotifier{
		verbose:            verbose,
		fd:                 fd,
		wdToPathMu:         &sync.Mutex{},
		wdToPath:           map[int]string{},
		addRemovePathJobCh: make(chan addRemovePathJob),
		movesCh:            make(chan pathMove),
	}
	go ret.runAddRemovePathJobs()
	go ret.runMoveMatcher()
	go ret.runEventReader()
	return ret, nil
}
