package ru.yandex.direct.logging;

import ru.yandex.direct.utils.text.StringModifier;

import static com.google.re2j.Pattern.CASE_INSENSITIVE;
import static com.google.re2j.Pattern.compile;

public class ErrorMessageNormalizer {
    private static final String IPV4_SEG = "(?:25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)";
    private static final String IPV4_ADDRESS = "(?:" + IPV4_SEG + "\\.){3}" + IPV4_SEG;
    private static final String IPV6_SEG = "[0-9a-fA-F]{1,4}";
    private static final String IPV6_ADDRESS = "(?:" + IPV6_SEG + ":){7}" + IPV6_SEG;
    private static final String IP_ADDRESS = IPV4_ADDRESS + "|" + IPV6_ADDRESS;

    /**
     * Используем RE2-регулярные выражения. Они на наколеночном тесте в 2 раза медленнее, зато
     * гарантированно имеют линейную сложность от длины строки.
     */
    private static final StringModifier EVENT_MESSAGE_MODIFIER = new StringModifier.Builder()
            .withRe2ReplaceAllRule("\npayload:\n.*", " PAYLOAD")
            .withRe2ReplaceAllRule("\\bSQL \\[.*\\];(" +
                    ".*?Connection is not available, request timed out|" +
                    " Communications link failure|" +
                    " Query execution was interrupted|" +
                    " (?:Socket|Stream) closed|" +
                    " Duplicate entry|" +
                    " Lock wait timeout exceeded;|" +
                    " Deadlock found when trying to get lock;|" +
                    " Can not read response from server\\.|" +
                    " No operations allowed after connection closed\\.)", m -> "SQL" + m.group(1))
            .withRe2ReplaceAllRule("\\bDuplicate entry '.*?' for key '.*?'", "Duplicate entry")
            .withRe2ReplaceAllRule("uncategorized SQLException for SQL \\[.*?\\];",
                    "uncategorized SQLException for SQL;")
            .withRe2ReplaceAllRule("\\bGot errors for phrase .*? from ADVQ response: .*",
                    "Got ADVQ response's errors for phrase")
            .withRe2ReplaceAllRule("ru.yandex.advq.query.IllegalQueryException: Syntax error at \\d+:\\d+: .*",
                    "ru.yandex.advq.query.IllegalQueryException: Syntax error at LINE:COLUMN: MESSAGE")
            .withRe2ReplaceAllRule("\\b(blackbox fatal error: .*?): .*(; url: .*)", m -> m.group(1) + m.group(2))
            .withRe2ReplaceAllRule("\\bFailed to call blackbox: .*", "Failed to call blackbox")
            .withRe2ReplaceAllRule("\\b(Error \\d+: )(Internal RPC call failed|Proxy cannot " +
                    "synchronize with cluster) \\{.*?\\}", m -> m.group(1) + m.group(2))
            .withRe2ReplaceAllRule("\\bLogBroker message contains unknown verdictType\\. Message: \\t.*",
                    "LogBroker message contains unknown verdictType")
            .withRe2ReplaceAllRule("\\bDuplicate key .*? \\(attempted merging values .*? and .*?\\)",
                    "Duplicate key")
            .withRe2ReplaceAllRule("\\bDuplicate entry for key '.*?' \\(attempt to merge values '.*?' and '.*?'\\)",
                    "Duplicate entry for key")
            .withRe2ReplaceAllRule("\\bJob .*? threw (an unhandled |)?Exception: ",
                    m -> "JOB threw " + m.group(1) + "Exception")
            .withRe2ReplaceAllRule("\\bInterrupted while processing \\d+ requests(, the first 10 requests|) .*",
                    m -> "Interrupted while processing NUM requests" + m.group(1))
            .withRe2ReplaceAllRule("\\bDefectInfo\\{.*\\}", "DefectInfo")
            .withRe2ReplaceAllRule("(Consumer|Producer) stream \\(.*?\\) closed with error",
                    m -> m.group(1) + " stream closed with error")
            .withRe2ReplaceAllRule("Syntax error at \\d+:\\d+:", "Syntax error")
            .withRe2ReplaceAllRule("Syntax error (token recognition error at:|missing WORD at|" +
                            "extraneous input|mismatched input) '.*?'(?: expecting (?:\\{.*?\\}|WORD|<EOF>)|)",
                    m -> "Syntax error " + m.group(1))
            .withRe2ReplaceAllRule("Connection (timed out|refused): [\\w-./:]+", m -> "Connection " + m.group(1))
            .withRe2ReplaceAllRule("^.* could not be parsed into a proper Uri, missing host",
                    "could not be parsed into a proper Uri, missing host")
            .withRe2ReplaceAllRule("(Got errors in next bs-auction responses): .*", m -> m.group(1))
            .withRe2ReplaceAllRule("Error committing transaction [\\w-]+? at cell [\\w-]+",
                    "Error committing transaction at cell")
            .withRe2ReplaceAllRule("CollectionUrl: .*? doesn't match pattern",
                    "CollectionUrl: URL doesn't match pattern")
            .withRe2ReplaceAllRule("Error while calling operator \\w+ get_moderated for pageId: \\d+\\.",
                    "Error while calling OPERATOR get_moderated for pageId: NUM.")
            .withRe2ReplaceAllRule("\\baccess is denied for .*?\\.", "access is denied for USER.")
            .withRe2ReplaceAllRule("\\bCan't get (remaining space|space usage) for cluster .*?(, dir | ).*?on " +
                    "medium = .*", m -> "Can't get " + m.group(1) + " for CLUSTER" + m.group(2) + "on MEDIUM")
            .withRe2ReplaceAllRule("\\bFailed to create event reader, \\d+ attempts left",
                    "Failed to create event reader, NUM attempts left")
            .withRe2ReplaceAllRule("Cannot return null for non-nullable type: '.*?'",
                    "Cannot return null for non-nullable TYPE")
            .withRe2ReplaceAllRule("Required \\w*? parameter '.*?' is not present",
                    "Required TYPE PARAMETER is not present")
            .withRe2ReplaceAllRule("is blocked for resolver \\w+",
                    "is blocked for resolver RESOLVER")
            .withRe2ReplaceAllRule("\\bshort message: .*", "MESSAGE")
            .withRe2ReplaceAllRule("\\b(NonNullableFieldWasNullError.*)path=\\[.*?\\]", m -> m.group(1) + "PATH")
            .withRe2ReplaceAllRule("\\b(?:ppc_\\d+__\\d+|ppc:\\d+)\\b", "PPC")
            .withRe2ReplaceAllRule("\\bppcdict__\\d+\\b", "PPCDICT")
            .withRe2ReplaceAllRule("\\b\\d+(?:ms|(?:,\\d+)* milliseconds)\\b", "MILLISECONDS")
            .withRe2ReplaceAllRule("\\b\\d+(?:,\\d+)* bytes\\b", "BYTES")
            .withRe2ReplaceAllRule(
                    "\\b\\d{4}-\\d{2}-\\d{2}[ T]\\d{2}:\\d{2}:\\d{2}(.\\d{3,}[-+]\\d{4})?\\b",
                    "DATETIME")
            .withRe2ReplaceAllRule(compile("\\b[a-z]{3}, \\d{2} [a-z]{3} \\d{4} \\d{2}:\\d{2}:\\d{2} GMT\\b",
                    CASE_INSENSITIVE), "DATETIME")
            .withRe2ReplaceAllRule(IP_ADDRESS, "IPADDRESS")
            .withRe2ReplaceAllRule("IPADDRESS:\\d+", "IPADDRESS:PORT")
            .withRe2ReplaceAllRule("JSESSIONID=[a-z\\d.]+", "JSESSIONID=...")
            .withRe2ReplaceAllRule("\\bshard[_=]\\d+\\b", "SHARD")
            .withRe2ReplaceAllRule("\\brole=\\w+", "ROLE")
            .withRe2ReplaceAllRule("\\bpid=\\d+\\b", "pid=NUM")
            .withRe2ReplaceAllRule("(\\s|')-?\\d+\\.\\d+(\\s|$|')", m -> m.group(1) + "BIGDECIMAL" + m.group(2))
            .withRe2ReplaceAllRule("\\bBlock <\\d+,\\d+>", "Block <NUM,NUM>")
            .withRe2ReplaceAllRule("\\b(with uid|with id:?) \\d+", m -> m.group(1) + " NUM")
            .withRe2ReplaceAllRule("\\[\\d+\\]", "[NUM]")
            .withRe2ReplaceAllRule("\\b(Last retry failed): \\d+", m -> m.group(1) + ": NUM")
            .withRe2ReplaceAllRule(compile("\\b(row|line|column|port|regionId|code|pageId): \\d+", CASE_INSENSITIVE),
                    m -> m.group(1) + ": NUM")
            .withRe2ReplaceAllRule(compile("\\b0x[a-f0-9]{12}\\b", CASE_INSENSITIVE), "0xHEX12")
            .withRe2ReplaceAllRule(compile("\\b[a-f0-9]{16}\\b", CASE_INSENSITIVE), "HEX16")
            // порядок важен: UUID заменяем до конвертации чисел в NUM
            .withRe2ReplaceAllRule("\\b[a-f\\d]{8}-[a-f\\d]{4}-[a-f\\d]{4}-[a-f\\d]{4}-[a-f\\d]{12}\\b", "UUID")
            .withRe2ReplaceAllRule("\\b\\d{4,}\\b", "NUM")
            .withRe2ReplaceAllRule("\\bNUM(,\\s*NUM){3,}\\b", "LIST_OF_NUMS")
            //balance
            .withRe2ReplaceAllRule(".*Balance2.CreateRequest2.*PromoCode \\w+ already reserved.*", "Balance2.CreateRequest2 PromoCode already reserved")
            .withRe2ReplaceAllRule(".*Balance2.CreateRequest2.*Already has reservation.*", "Balance2.CreateRequest2 Promocode already has reservation")
            .withRe2ReplaceAllRule(".*Balance2.CreateRequest2.*ID_PC_UNKNOWN.*", "Balance2.CreateRequest2 Invalid Promocode: ID_PC_UNKNOWN")
            .withRe2ReplaceAllRule(".*Balance2.CreateRequest2.*ID_PC_INVALID_PERIOD.*", "Balance2.CreateRequest2 Invalid Promocode: ID_PC_INVALID_PERIOD")
            .withRe2ReplaceAllRule(".*Balance2.CreateRequest2.*ID_PC_NOT_NEW_CLIENT.*", "Balance2.CreateRequest2 Invalid Promocode: ID_PC_NOT_NEW_CLIENT")
            .withRe2ReplaceAllRule(".*Balance2.CreateRequest2.*ID_PC_USED.*", "Balance2.CreateRequest2 Invalid Promocode: ID_PC_USED")
            .withRe2ReplaceAllRule(".*Balance2.CreateRequest2.*ID_PC_NOT_UNIQUE_URLS.*", "Balance2.CreateRequest2 Invalid Promocode: ID_PC_NOT_UNIQUE_URLS")
            .withRe2ReplaceAllRule(".*Balance2.CreateRequest2.*PROMOCODE_WRONG_CLIENT.*", "Balance2.CreateRequest2 Invalid Promocode: PROMOCODE_WRONG_CLIENT")
            .withRe2ReplaceAllRule(".*Balance2.PayRequest.*No payment options available.*", "Balance2.PayRequest No payment options available")
            .withRe2ReplaceAllRule("can't get screenshot from Rotor.*statusCode=(\\d+).*",
                    m -> "can't get screenshot from Rotor (statusCode=" + m.group(1) + ")")
            .build();
    private static final int DEFAULT_MESSAGE_SIZE_LIMIT = 100 * 1024;

    private final int messageSizeLimit;

    public ErrorMessageNormalizer(int messageSizeLimit) {
        this.messageSizeLimit = messageSizeLimit;
    }

    public ErrorMessageNormalizer() {
        this(DEFAULT_MESSAGE_SIZE_LIMIT);
    }

    // Проводит очистку числовых данных
    public String normalize(String msg) {
        if (msg == null) {
            return "";
        }
        if (msg.length() > messageSizeLimit) {
            msg = msg.substring(0, messageSizeLimit);
        }
        var processedMsg = EVENT_MESSAGE_MODIFIER.makeReplacements(msg);
        return processedMsg.length() > 256 ? processedMsg.substring(0, 256) : processedMsg;
    }
}
