package fim

import (
	"fmt"
	"path/filepath"
	"regexp"
	"strings"

	"a.yandex-team.ru/security/osquery/extensions/osquery-fim/internal/container"
)

// pathMatcher matches paths for one category.
type pathMatcher struct {
	category string

	// Concatenated globs converted into one regexp.
	matchRegexp   *regexp.Regexp
	excludeRegexp *regexp.Regexp

	// List of top-level roots for this matcher.
	roots []pathRoot
}

// Path roots are used for listing files and/or subscribing to file notifications. Listed
type pathRoot struct {
	// isFile is true for individual files, root is the file path in this case, recursive is ignored
	isFile    bool
	path      string
	recursive bool
}

type pathMatcherSlice []*pathMatcher

func (m *pathMatcher) String() string {
	return fmt.Sprintf("%s: match:\"%v\" exclude:\"%v\"", m.category, m.matchRegexp, m.excludeRegexp)
}

type pathRootWithMatchers struct {
	root     pathRoot
	matchers pathMatcherSlice
}

// Returns true if path matches the pathMatcher.
func (m *pathMatcher) pathMatches(path string) bool {
	return m.matchRegexp.MatchString(path) && !m.excludeRegexp.MatchString(path)
}

// Returns a new path matcher.
func newPathMatcher(category string, globs []string, excludeGlobs []string) (*pathMatcher, error) {
	if len(globs) == 0 {
		return nil, fmt.Errorf("at least one file pattern must be set for %s", category)
	}
	pathsRe := []string{}
	cleanGlobs := []string{}
	for _, glob := range globs {
		cleanGlob := filepath.Clean(glob)
		cleanGlobs = append(cleanGlobs, cleanGlob)
		if !filepath.IsAbs(cleanGlob) {
			return nil, fmt.Errorf("file pattern must be absolute: %s", glob)
		}
		pathRe, err := globToRegexp(cleanGlob)
		if err != nil {
			return nil, err
		}
		pathsRe = append(pathsRe, pathRe)
	}

	matchRegexp, err := regexp.Compile(joinRegexps(pathsRe))
	if err != nil {
		return nil, fmt.Errorf("could not compile regex for glob %s: %v", pathsRe, err)
	}

	excludeRe := []string{}
	for _, glob := range excludeGlobs {
		cleanGlob := filepath.Clean(glob)
		if !filepath.IsAbs(cleanGlob) {
			return nil, fmt.Errorf("file pattern must be absolute: %s", glob)
		}
		pathRe, err := globToRegexp(cleanGlob)
		if err != nil {
			return nil, err
		}
		excludeRe = append(excludeRe, pathRe)
	}
	excludeRegexp, err := regexp.Compile(joinRegexps(excludeRe))
	if err != nil {
		return nil, fmt.Errorf("could not compile regex for glob %s: %v", excludeRe, err)
	}

	ret := &pathMatcher{
		category:      category,
		matchRegexp:   matchRegexp,
		excludeRegexp: excludeRegexp,
		roots:         getFileRoots(cleanGlobs),
	}

	return ret, nil
}

func globToRegexp(glob string) (string, error) {
	// According to https://osquery.readthedocs.io/en/stable/deployment/file-integrity-monitoring/
	// accept % as equivalent to *.
	ret := strings.ReplaceAll(glob, "%", "*")
	// Both * and ? should be quoted by QuoteMeta, replace the quoted versions with corresponding regexps.
	ret = regexp.QuoteMeta(ret)
	sep := string(filepath.Separator)
	ret = strings.ReplaceAll(ret, "\\?", "[^"+sep+"]")
	ret = strings.ReplaceAll(ret, "\\*\\*", ".*")
	ret = strings.ReplaceAll(ret, "\\*", "[^"+sep+"]*")
	return ret, nil
}

func joinRegexps(re []string) string {
	// Golang regexp lacks FullMatch method, append ^ and $ to match the whole path.
	return "^" + strings.Join(re, "|") + "$"
}

func getFileRoots(globs []string) []pathRoot {
	roots := []pathRoot{}
	for _, glob := range globs {
		roots = append(roots, globToFileRoot(glob))
	}
	return roots
}

func globToFileRoot(glob string) pathRoot {
	// According to https://osquery.readthedocs.io/en/stable/deployment/file-integrity-monitoring/
	// accept % as equivalent to *.
	glob = strings.ReplaceAll(glob, "%", "*")
	firstIdx := strings.IndexAny(glob, "*?")
	if firstIdx == -1 {
		// We've got an exact match, but will still scan the directory
		return pathRoot{
			isFile:    true,
			path:      glob,
			recursive: false,
		}
	}
	// The glob looks like "/some/dir/bef?ore**other.txt", cut to "/some/dir"
	dir := filepath.Dir(glob[0:firstIdx])
	// The only way the glob can be non-recursive iff only the last part (filename) contains the special
	// symbols (? or *) and there are no "**".
	recursive := strings.Contains(glob, "**") || strings.ContainsRune(glob[firstIdx:], filepath.Separator)
	return pathRoot{
		isFile:    false,
		path:      dir,
		recursive: recursive,
	}
}

// Folds a bunch of matchers into set of disjoint roots.
func prepareRoots(matchers []*pathMatcher) []pathRootWithMatchers {
	recursiveRoots := container.PathTrie{}
	// Add all recursive roots to the trie.
	for _, matcher := range matchers {
		for _, root := range matcher.roots {
			if root.recursive {
				recursiveRoots.Insert(root.path, &pathMatcherSlice{})
			}
		}
	}
	recursiveRoots.LeaveOnlyRoots()
	// Add recursive matchers to the corresponding root.
	for _, matcher := range matchers {
		for _, root := range matcher.roots {
			if root.recursive {
				v, _ := recursiveRoots.GetParent(root.path)
				fms := v.(*pathMatcherSlice)
				*fms = append(*fms, matcher)
			}
		}
	}

	// Add non-recursive non-file matchers to the corresponding recursive or duplicate non-recursive root.
	nonRecursiveRoots := map[string]*pathMatcherSlice{}
	for _, matcher := range matchers {
		for _, root := range matcher.roots {
			if !root.recursive && !root.isFile {
				if v, ok := recursiveRoots.GetParent(root.path); ok {
					fms := v.(*pathMatcherSlice)
					*fms = append(*fms, matcher)
				} else if fms, ok := nonRecursiveRoots[root.path]; ok {
					*fms = append(*fms, matcher)
				} else {
					nonRecursiveRoots[root.path] = &pathMatcherSlice{matcher}
				}
			}
		}
	}

	// Add file roots to recursive roots, non-recursive roots or the same file roots (if one file is present)
	// in several matchers.
	fileRoots := map[string]*pathMatcherSlice{}
	for _, matcher := range matchers {
		for _, root := range matcher.roots {
			if root.isFile {
				dirPath := filepath.Dir(root.path)
				if v, ok := recursiveRoots.GetParent(root.path); ok {
					fms := v.(*pathMatcherSlice)
					*fms = append(*fms, matcher)
				} else if fms, ok := nonRecursiveRoots[dirPath]; ok {
					*fms = append(*fms, matcher)
				} else if fms, ok := fileRoots[root.path]; ok {
					*fms = append(*fms, matcher)
				} else {
					fileRoots[root.path] = &pathMatcherSlice{matcher}
				}
			}
		}
	}

	ret := []pathRootWithMatchers{}
	recursiveRoots.Walk(func(path string, v interface{}) {
		fms := v.(*pathMatcherSlice)
		ret = append(ret, pathRootWithMatchers{
			root:     pathRoot{isFile: false, path: path, recursive: true},
			matchers: deduplicateMatches(*fms),
		})
	})
	for path, fms := range nonRecursiveRoots {
		ret = append(ret, pathRootWithMatchers{
			root:     pathRoot{isFile: false, path: path, recursive: false},
			matchers: deduplicateMatches(*fms),
		})
	}
	for path, fms := range fileRoots {
		ret = append(ret, pathRootWithMatchers{
			root:     pathRoot{isFile: true, path: path, recursive: false},
			matchers: deduplicateMatches(*fms),
		})
	}
	return ret
}

func deduplicateMatches(matchers pathMatcherSlice) pathMatcherSlice {
	// Remove duplicate matchers which could happen after merging the fileRoots.
	set := container.StringSet{}
	ret := make(pathMatcherSlice, 0, len(matchers))
	for _, matcher := range matchers {
		if set.Insert(matcher.category) {
			ret = append(ret, matcher)
		}
	}
	return ret
}
