package coroner

import (
	"fmt"
	"regexp"
	"strings"

	"a.yandex-team.ru/infra/rsm/dnsmanager/pkg/pushyasm"
)

var (
	taintPtrn      = `(?:(Not tainted|Tainted:)[A-Z ]*(?P<kernel>[0-9]+\.[A-Za-z0-9-\.]+) .*)`
	stacktracePtrn = []string{
		"(?:kernel BUG.*)",
		"(?:invalid opcode: .*)",
		"(?:Modules linked in: .*)",
		taintPtrn,
		fmt.Sprintf("%s %s", `CPU: (?P<cpu>\d+) PID: (?P<pid>\d+) Comm: (?P<comm>.*)`, taintPtrn),
		`Hardware name:(?P<hwname>.*), BIOS (?P<bios>.*)`,
		`Workqueue:(?P<workqueue>.*)`,
		`(?P<stacktrace>[\? ]*\w+\+0x[0-9a-f]+/0x[0-9a-f]+)(?: \[\w+\])?`,
		"RIP:(?P<rip>.*)",
		"(?:Code:.*)",
		"(?: RIP|RSP|RAX|RDX|RBP|R[0-9]{2}|FS|CS|CR2|DR[0-9]:.*)",
	}
	meminfoPtrn = []string{
		"(?:Mem-Info:.*)",
		"(?:active_anon:[0-9]+ inactive_anon:[0-9]+ isolated_anon:[0-9]+)",
		"(?: active_file:[0-9]+ inactive_file:[0-9]+ isolated_file:[0-9]+)",
		"(?: unevictable:[0-9]+ dirty:[0-9]+ writeback:[0-9]+ unstable:[0-9]+)",
		"(?: slab_reclaimable:[0-9]+ slab_unreclaimable:[0-9]+)",
		"(?: mapped:[0-9]+ shmem:[0-9]+ pagetables:[0-9]+ bounce:[0-9]+)",
		"(?: free:[0-9]+ free_pcp:[0-9]+ free_cma:[0-9]+)",
		"(?:Node [0-9]+ .*)",
		"(?:[0-9]+ total pagecache pages)",
		"(?:[0-9]+ pages in swap cache)",
		"(?:Free swap  = [0-9]+kB)",
		"(?:Total swap = [0-9]+kB)",
		"(?:[0-9]+ pages RAM)",
		"(?:[0-9]+ pages HighMem/MovableOnly)",
		"(?:[0-9]+ pages reserved)",
		"(?:[0-9]+ pages hwpoisoned)",
	}
	bootPtrn = "Linux version (?P<kernel>[^ ]*) .*)"

	INFO  Severity = "INFO"
	WARN  Severity = "WARN"
	ERROR Severity = "ERROR"
	CRIT  Severity = "CRIT"

	Es     Events
	EsMrks *regexp.Regexp
)

func init() {
	New("mce_panic", CRIT, "", []string{"(?P<msg>Kernel panic - not syncing: Fatal [Mm]achine check)"}, false, false)
	New("mce_hardware", CRIT, "", []string{"(?P<msg>mce: \\[Hardware Error\\].*)"}, false, false)
	New("mce_panic_timeout", CRIT, "", []string{"(?P<msg>Kernel panic - not syncing: Timeout synchronizing machine check over CPUs)"}, false, false)
	New("kill_idle", CRIT, "", []string{"(?P<msg>Kernel panic - not syncing: Attempted to kill the idle task!)"}, false, false)
	New("kill_init", CRIT, "", []string{"(?P<msg>Kernel panic - not syncing: Attempted to kill init!.*)"}, false, false)
	New("double_fault", CRIT, "", []string{"(?P<msg>PANIC: double fault.*)"}, false, false)
	New("fault_null", CRIT, "", []string{"(?P<msg>BUG: unable to handle kernel NULL pointer dereference at.*)"}, true, false)
	New("fault", CRIT, "", []string{"(?P<msg>BUG: unable to handle kernel paging request at.*)"}, true, false)
	New("sleep_in_atomic", CRIT, "", []string{"(?P<msg>BUG: sleeping function called from invalid context at.*)"}, true, false)
	New("sched_in_atomic", CRIT, "", []string{"(?P<msg>BUG: scheduling while atomic:.*)"}, true, false)
	New("gpf", CRIT, "", []string{"(?P<msg>general protection fault:.*)"}, true, false)
	New("corrupted_page_table", CRIT, "", []string{"(?P<msg>Corrupted page table.*)"}, true, false)
	New("bad_page_map", CRIT, "", []string{"(?P<msg>BUG: Bad page map in process.*)"}, true, false)
	New("bad_rss_counter", CRIT, "", []string{"(?P<msg>BUG: Bad rss-counter state.*)"}, true, false)
	New("divide_error", CRIT, "", []string{"(?P<msg>divide error:.*)"}, true, false)
	// https://st.yandex-team.ru/KERNEL-793
	New("bert_error", CRIT, "", []string{"(?P<msg>BERT: Error records from previous boot:)"}, false, false)
	New("hardware_error", CRIT, "", []string{
		"\\[Hardware Error\\]:  (?P<msg>Error [0-9], type:.*)",
		"\\[Hardware Error\\]:  fru_text: (?P<fru_text>.*)",
		"\\[Hardware Error\\]:   section_type: (?P<section_type>.*)",
		"\\[Hardware Error\\]:   Local APIC_ID: (?P<cpu>.*)",
		"\\[Hardware Error\\]:    Error Structure Type: (?P<error_structure_type>.*)",
		"\\[Hardware Error\\]:    MSR Address: (?P<MSR_Address>.*)"}, false, false)

	New("ext4_error", WARN, "(EXT4-fs error)", []string{`(?P<msg>EXT4-fs error \(device (?P<dev>\w+)\).*)`}, false, false)
	New("ext4_warning", WARN, "(EXT4-fs warning)", []string{`(?P<msg>EXT4-fs warning \(device (?P<dev>\w+)\).*)`}, false, false)
	New("ext4_delalloc_fail", WARN, "(Delayed block allocation failed)", []string{`(?P<msg>EXT4-fs \((?P<dev>\w+)\): Delayed block allocation failed.*)`}, false, false)
	New("quota_error", WARN, "(Quota error)", []string{`(?P<msg>Quota error \(device (?P<dev>\w+)\).*)`}, false, false)
	New("block_io", WARN, "(blk_update_request: I/O error)", []string{`(?P<msg>blk_update_request: I/O error, dev (?P<dev>\w+).*)`}, false, false)
	New("scsi_error", WARN, "(Sense: Unrecovered read error)", []string{"(?P<msg>sd.*\\[(?P<dev>[a-z]+)\\].*Sense: Unrecovered read error.*)"}, false, false)
	New("scsi_fail", WARN, "(FAILED Result: hostbyte)", []string{"(?P<msg>sd.*\\[(?P<dev>[a-z]+)\\].*FAILED Result: hostbyte=.*)"}, false, false)
	New("nvme_reset", WARN, "", []string{"(?P<msg>nvme .*: Failed status:.* reset controller)"}, false, false)
	New("nvme_io", WARN, "", []string{"(?P<msg>nvme .*: Cancelling I/O [0-9]+ QID [0-9]+)"}, false, false)
	New("unregister_netdevice", WARN, "(unregister_netdevice: waiting for)", []string{`(?P<msg>unregister_netdevice: waiting for (?P<dev>\w+).*)`}, false, false)
	New("workqueue_lockup", WARN, "", []string{"(?P<msg>BUG: workqueue lockup.*)"}, false, false)
	New("page_dump", WARN, "", []string{"(?P<msg>page dumped because:.*)"}, true, false)
	New("soft_lockup", WARN, "", []string{"(?P<msg>.*soft lockup -.*)"}, true, false)
	New("hard_lockup", WARN, "", []string{"(?P<msg>.*hard lockup -.*)"}, true, false)
	New("hard_LOCKUP", WARN, "", []string{"(?P<msg>.*Watchdog detected hard LOCKUP.*)"}, true, false)
	New("cfs_period_timer", WARN, "", []string{"(?P<msg>cfs_period_timer\\[cpu[0-9]+\\]: period too short.*)"}, false, false)

	New("acpi_error", INFO, "", []string{"(?P<msg>ACPI Error:.*)"}, false, false)
	New("microcode_early_update", INFO, "", []string{"microcode: (?P<msg>microcode updated early.*)"}, false, false)
	New("rcu_self_lockup", INFO, "", []string{"(?P<msg>.*self-detected stall on CPU.*)"}, true, false)
	New("rcu_task_lockup", INFO, "", []string{"(?P<msg>rcu_tasks detected stalls on tasks.*)"}, true, false)
	New("rcu_lockup", INFO, "", []string{"(?P<msg>rcu.*detected stalls on CPUs/tasks.*)"}, true, false)
	New("perf_lockup", INFO, "", []string{"(?P<msg>perf interrupt took too long.*)"}, true, false)
	New("task_lockup", INFO, "(blocked for more than)", []string{`(?P<msg>task (?P<comm>.*):(?P<pid>\d+) blocked for more than .* seconds)`}, true, false)
	New("nmi_backtrace", INFO, "", []string{"(?P<msg>NMI backtrace.*)"}, true, false)
	New("page_alloc_stall", INFO, "(page allocation stalls for)", []string{"(?P<msg>.*page allocation stalls for.*)"}, true, true)
	New("page_alloc_fail", INFO, "", []string{"(?P<msg>.*page allocation failure.*)"}, true, true)
	New("slub_alloc_fail", INFO, "", []string{"(?P<msg>SLUB: Unable to allocate memory on node.*)"}, false, true)
	New("oom", INFO, "", []string{
		"(?P<msg>.*invoked oom-killer.*)",
		"(?:Killed process.*)",
		"(?:oom_reaper:.*)",
		"(?:Task in .* killed as a result of limit of.*)",
		"(?:memory: usage .*, limit .*, failcnt .*)",
		"(?:kmem: usage .*, limit .*, failcnt .*)",
		"(?:anon: usage .*, limit .*, failcnt .*)",
		"(?:Memory cgroup stats for .*)"}, true, true)

	// https://st.yandex-team.ru/RESMAN-29
	New("nvrm", ERROR, "(NVRM: Xid \\(PCI:[0-9]{4}:[0-9]{2}:[0-9]{2}\\): [0-9]{2},.*)", []string{"(?P<msg>NVRM: Xid \\(PCI:(?P<dev>[0-9]{4}:[0-9]{2}:[0-9]{2})\\): (?P<error>[0-9]{2}),.*)"}, false, false)
	// https://st.yandex-team.ru/RTCSUPPORT-6953
	New("mlx", ERROR, `(mlx[0-9]\w+ [0-9]{4}:[0-9]{2}:[0-9]{2}.[0-9]: \w+:[0-9]+:\(pid [0-9]+\): .*)`, []string{`(?P<dev>[0-9]{4}:[0-9]{2}:[0-9]{2}.[0-9]): .*\(pid (?P<pid>[0-9]+)\): (?P<msg>.*)`}, false, false)

	//New("lockdep", CRIT, []string{
	//	"(?P<msg>lock held when returning to user space!)"
	//	"(?P<msg>Nested lock was not taken)",
	//	"(?P<msg>bad unlock balance detected!)",
	//	"(?P<msg>bad contention detected!)",
	//	"(?P<msg>held lock freed!)",
	//	"(?P<msg>.*still has locks held!)",
	//	"(?P<msg>suspicious RCU usage.)",
	//	"(?P<msg>possible circular locking dependency detected)",
	//	"(?P<msg>.*unsafe lock order detected)",
	//	"(?P<msg>possible recursive locking detected)",
	//	"(?P<msg>inconsistent lock state)",
	//	"(?P<msg>possible irq lock inversion dependency detected.*)"})

	New("panic", CRIT, "", []string{"(?P<msg>.*PANIC.*)"}, true, true)
	New("bug", CRIT, "", []string{"(?P<msg>.*BUG.*)"}, true, true)
	New("warning", WARN, "", []string{"(?P<msg>WARNING.*)"}, true, true)

	EsMrks = Es.Markers()
}

type Severity string

type Event struct {
	Name     string
	Severity Severity
	Metric   *pushyasm.Numeric
	marker   string
	ptrns    []string
	rgxp     *regexp.Regexp
}

func (e *Event) Parse(s string) (bool, map[string]string) {
	keys := e.rgxp.SubexpNames()
	values := e.rgxp.FindStringSubmatch(s)
	d := make(map[string]string)
	for i := 1; i < len(values); i++ {
		if values[i] != "" {
			d[keys[i]] = strings.TrimSpace(values[i])
		}
	}
	return e.rgxp.MatchString(s), d
}

type Events []*Event

func (es Events) Markers() *regexp.Regexp {
	ptrns := []string{}
	for _, e := range es {
		ptrns = append(ptrns, e.marker)
	}
	return regexp.MustCompile(strings.Join(ptrns, "|"))
}

func (es Events) Names() (result []string) {
	for _, e := range es {
		result = append(result, e.Name)
	}
	return
}

func (es Events) Find(s string) *Event {
	matches := EsMrks.FindStringSubmatch(s)
	for i := 1; i < len(matches); i++ {
		if matches[i] != "" {
			return es[i-1]
		}
	}
	return nil
}

func New(name string, severity Severity, marker string, ptrns []string, stacktrace bool, meminfo bool) *Event {
	if marker == "" {
		marker = ptrns[0]
	}
	if stacktrace {
		ptrns = append(ptrns, stacktracePtrn...)
	}
	if meminfo {
		ptrns = append(ptrns, meminfoPtrn...)
	}

	e := &Event{
		Name:     name,
		Severity: severity,
		Metric:   pushyasm.NewNumeric(fmt.Sprintf("coroner-event-%s", name), pushyasm.Counter, pushyasm.Delta),
		marker:   marker,
		ptrns:    ptrns,
		rgxp:     regexp.MustCompile(strings.Join(ptrns, "|")),
	}
	Es = append(Es, e)
	return e
}
