
#include <library/cpp/charset/doccodes.h>
#include <util/folder/filelist.h>
#include <util/string/strip.h>
#include <util/generic/size_literals.h>
#include <util/string/join.h>
#include <util/stream/file.h>
#include <util/string/split.h>
#include <util/digest/sequence.h>
#include <util/generic/guid.h>

#include "rengine.h"
#include "sptypes.h"
#include "spamrule.h"
#include "mail/so/spamstop/tools/so-common/sputil.h"

#include "bform.h"
#include "arform.h"
#include "setrules.h"
//#include "sperror.h"

#define SP_VERSION_NUM "#@VersionNum"
#define SP_VERSION_END "#@VersionEnd"

#define SKIP_RULE(args)                                                 \
    {                                                                   \
        ret = ecFormat;                                                 \
        if (fRulesLog) {                                                \
            if (m_fDatFile && !fPrintSORulesReport && !m_reader_regime) \
                ++m_cerr_dat;                                           \
            else                                                        \
                m_p_filter_logger->splog (TLOG_ERR, args);                                           \
        }                                                               \
        goto SkipRule;                                                  \
    }


TSetRules::TSetRules(const TVector<TFsPath>& rulesFolders, TSpLogger *pSpFilterLogger, NRegexp::TSettings pcreSettings) :
        m_p_filter_logger(pSpFilterLogger),
        PcreSettings(std::move(pcreSettings)),
        m_listRuler(pSpFilterLogger)
{
    is_spk = m_listRuler.IsSPK();

    m_ec = ecOK;
    m_cRules = 0;
    m_cRulesAll = 0;
    m_cFiles = 0;
    m_cBoolFault = 0;
    m_cArFault = 0;
#ifdef SP_FILTER_CLIENTS
    m_spdat = 0;
#endif
    m_fDeliveryFile = false;
    //#^    m_fFiltrInfo = false;
    m_max_files = 0;
    m_cs = 0;

    InitStrings();

    m_mapLvKeys.reserve(MAX_LV_KEYS);

//    m_mapFields.Resize(FD_MAX_COUNT + 1);
    m_mapDomens.reserve(10000);

    m_mapRules.reserve(20000);

    const char *pRFile = nullptr;
    TFileEntitiesList fileList(TFileEntitiesList::EM_FILES_SLINKS);

    for(const TFsPath& rulesFolder: rulesFolders) {
        Y_VERIFY(rulesFolder.Exists(), "%s doesn't exist", rulesFolder.c_str());
        Y_VERIFY(rulesFolder.IsDirectory(), "%s is not dir", rulesFolder.c_str());

        fileList.Fill(rulesFolder, "", ".rul", 1);
        m_cFiles += fileList.Size();

        fileList.Fill(rulesFolder, "", ".dlv", 1);
        m_cFiles += fileList.Size();

        fileList.Fill(rulesFolder, "", ".roll", 1);
        m_cFiles += fileList.Size();
    }

    m_max_files = m_cFiles + 10;
    m_vFileRules.resize(m_max_files);
    m_cFiles = 0;
#ifdef SP_FILTER_CLIENTS
    if (m_regime != spRegimePackRule)
            ReadClientsRules(dnRules, pPrepare);
#endif

    for(const TFsPath& rulesFolder: rulesFolders) {
        m_rstate = STATE_RULE;

        if (const TFsPath entry = rulesFolder / "entry.lua"; entry.Exists()) {
            LuaRulesRunner.Load(entry);
        } else {
            fileList.Fill(rulesFolder, "", ".lua", 1, true);
            while ((pRFile = fileList.Next())) {
                if (pRFile == "entry.lua"sv)
                    continue;
                const auto fullPath = rulesFolder / pRFile;
                LuaRulesRunner.Load(fullPath);
            }
        }

        fileList.Fill(rulesFolder, "", ".rul", 1, true);
        while ((pRFile = fileList.Next())) {
            ReadRulesDir(rulesFolder / pRFile);
        }
    }

    for(const TFsPath& rulesFolder: rulesFolders) {
        m_rstate = STATE_DLV;

        fileList.Fill(rulesFolder, "", ".dlv", 1, true);
        while ((pRFile = fileList.Next())) {
            ReadRulesDir(rulesFolder / pRFile);
        }
    }
    for(const TFsPath& rulesFolder: rulesFolders) {
        m_rstate = STATE_ROLL;

        fileList.Fill(rulesFolder, "", ".roll", 1, true);
        while ((pRFile = fileList.Next())) {
            ReadRulesDir(rulesFolder / pRFile);
        }
    }

    m_listRuler.Prepare();

    m_rstate = STATE_SRC_BAN;
    for(const TFsPath& rulesFolder: rulesFolders) {
        fileList.Fill(rulesFolder, "", ".ban", 1, true);
        while ((pRFile = fileList.Next())) {
            ReadRulesDir(rulesFolder / pRFile);
        }
    }

    if (!unknownFields.empty())
        ythrow yexception() << "unknown fields: " << JoinSeq(",", unknownFields);

    if (!m_cRulesAll) {
        m_p_filter_logger->splog(TLOG_ERR, "SpamStop data not defined!");
        return;
    }

    CheckExpr();
    {
        m_p_filter_logger->splog(TLOG_ERR, " Rules cs: %" PRIu64, m_cs);
    }
}

void TSetRules::ReadRulesDir(const TString & fn) {
    if (m_cFiles >= m_max_files) {
        m_ec = ecInternal;
        m_p_filter_logger->splog(TLOG_ERR, "%s", "File reading internal error!");
    }

    m_fDeliveryFile = (m_rstate == STATE_DLV);
    const bool fRollFile =(m_rstate == STATE_ROLL);

    const bool fSrsBanList = (m_rstate == STATE_SRC_BAN);

    if (m_fDeliveryFile) {
        m_cSources = 0;
        m_cInvalidSources = 0;
    }

    m_vFileRules[m_cFiles].szFileName = fn;

    {
        TFileInput input(fn);

        Read(input, fn.c_str(), fRollFile, fSrsBanList);
    }
    if (fSrsBanList) {
        m_p_filter_logger->splog(TLOG_ERR, "*** Data file: %s", fn.c_str());
        if (!m_pmapBanListSender.empty())
            m_p_filter_logger->splog(TLOG_ERR, " Dlist items: %zu", m_pmapBanListSender.size());
        return;
    }
    m_cFiles++;
    m_cRulesAll += m_cRules;
}

void TSetRules::Read(IInputStream& fp, const char* fn, const bool fRollFile, const bool fSrsBanList) {
    //    int cRules = 0;

    std::vector<TString> vstrock;
    //    bool fRulOperators = true;
    double default_weight = 0.;
    bool fruleopen = false;
    bool fListType = fSrsBanList;
    bool fHostsList = false;
    float DatVersion;
    bool fSkipByVersion = false; // or bad file for roll

    m_cRules = 0;

    InitStrings();
    m_listRuler.InitFile();

    TRulesType rule_type = RT_UNKNOWN;

    TString str;
    while(fp.ReadLine(str)) {
        const char* pstr = str.c_str();
        SkipSpaces(&pstr);

        if (*pstr == '#') // commentary
        {
            //            if (!fRulOperators)
            //                continue;
            if (!STRNCMP(pstr, SP_VERSION_NUM)) {
                if (!GetDataVersion(pstr, &DatVersion, m_p_filter_logger) || DatVersion > spVersion)
                    fSkipByVersion = true;
                if (fRollFile)
                    m_listRuler.InitFile();
                continue;
            } else if (!STRNCMP(pstr, SP_VERSION_END)) {
                fSkipByVersion = false;
                fHostsList = false;
                fListType = false;
                if (fRollFile)
                    m_listRuler.InitFile();
                continue;
            }

            if (fSkipByVersion) {
                continue;
            }

            if (fRollFile) {
                if (!m_listRuler.Add(pstr, fn))
                    fSkipByVersion = true; // bad file
                continue;
            }

            //            if ((m_regime == spRegimeCheckClientRule || m_regime == spRegimePackRule) && !STRNCMP(str, "#@skip_package"))
            //                break;
            if (!STRNCMP(pstr, "#@rule_weight"))
                GetNextDouble(&pstr, 0, &default_weight);
            else if (!STRNCMP(pstr, "#@rule_host")) {
                fHostsList = true;
                fListType = true;
            }

            continue;
        } else if (End_Of_Str(*pstr)) // empty line
            continue;

        if (fSkipByVersion) {
            continue;
        }

        m_cs = FnvHash<ui64>(pstr, strlen(pstr), m_cs);

        if (fRollFile) {
            if (!m_listRuler.Add(pstr, fn))
                fSkipByVersion = true; // bad file
            continue;
        }

        //        fRulOperators = false;

        const char* pkw = pstr;
        size_t len = GetFirstWord(&pkw, 0);

        if (!len)
            continue;

        if (fListType) {
            TString sTmp = to_lower(TString{TStringBuf(pkw, len)});
            if (fSrsBanList)
                vstrock.push_back(std::move(sTmp));
            else if (fHostsList) {
                double rule_weight = 0;
                if (!GetNextDouble(&pstr, 0, &rule_weight))
                    rule_weight = default_weight;

                if(m_pmapBanListHost.contains(sTmp)) {
                    Syslog(TLOG_WARNING) << "duplicated ban rule:" << sTmp;
                }

                m_pmapBanListHost.emplace(std::move(sTmp), rule_weight);
            }
            continue;
        }

        TKeyWordId kwid;

        if(!TryFromString<TKeyWordId>(TStringBuf{pkw, size_t(len)}, kwid)) {
            m_p_filter_logger->splog(TLOG_ERR, "Undefined string type: %s", pstr);
            continue;
        }

        switch (kwid) {
            case KW_RULE:
                fruleopen = true;
                rule_type = RT_UNKNOWN;
                InitStrings();
                m_szBody = StripString(TStringBuf(pstr));
                break;

            case KW_REFIRST: // first message only
            case KW_RE: // regular expression
            case KW_RE_NAME:
                m_szRegExpr = StripString(TStringBuf(pstr));
                break;

            case KW_SCRIPT:
                m_szScript = StripString(TStringBuf(pstr));
                break;

            case KW_RCVDNUM: // regular expression received
                m_szReceivedNum = StripString(TStringBuf(pstr));
                break;

            case KW_BEXPR: // boolean expression
            case KW_BEXPR_NAME:
                m_szBExpr = StripString(TStringBuf(pstr));
                break;

            case KW_ARITHMETIC: // arithmetic expression
                m_szArithmetic = StripString(TStringBuf(pstr));
                break;

            case KW_FIELD: // fields
                m_szField = StripString(TStringBuf(pstr));
                break;

            case KW_DESCRIBE: // commentary
                fruleopen = false;
                m_szComment = StripString(TStringBuf(pstr));
                Set(rule_type);
                rule_type = RT_UNKNOWN;
                InitStrings();
                break;

            case KW_TEST: // reg expr test
                m_szTest = StripString(TStringBuf(pstr));
                break;

            case KW_LVOPT: //  lv option
                m_szOption = StripString(TStringBuf(pstr));
                break;

            case KW_KEY: // lv key
                m_szKey = StripString(TStringBuf(pstr));
                break;
            case KW_SRC:
            case KW_LIST:
            case KW_INBOX:
                m_szList = StripString(TStringBuf(pstr));
                break;
            case KW_DOMEN:
                m_szDomen = StripString(TStringBuf(pstr));
                break;
            case KW_RELAY:
                m_szRelay = StripString(TStringBuf(pstr));
                break;
            case KW_RANGE:
                rule_type = RT_RANGE;
                m_szRange = StripString(TStringBuf(pstr));
                break;

            case KW_RANGE_DATE:
                rule_type = RT_RANGE_DATE;
                m_szRange = StripString(TStringBuf(pstr));
                break;

            case KW_LEVENSTEIN:
                rule_type = RT_LEVENSTEIN;
                m_szRange = StripString(TStringBuf(pstr));
                break;

            case KW_YASM: {
                const auto splitter = StringSplitter(pstr);
                auto parts = splitter.SplitBySet(" ,;\t").SkipEmpty();
                parts.Next();
                for (const auto tok : parts) {
                    YasmTags |= TKWYasmTagsBitset(FromString<TKWYasmTags>(tok.Token()));
                }
                break;
            }
            case KW_SOLOMON: {
                const auto splitter = StringSplitter(pstr);
                auto parts = splitter.SplitBySet(" ,;\t").SkipEmpty();
                parts.Next();
                for (const auto tok : parts) {
                    SolomonTags |= TKWYasmTagsBitset(FromString<TKWYasmTags>(tok.Token()));
                }
                break;
            }
        }
    }

    if (fListType) {
        if (fSrsBanList) {
            if (m_pmapBanListSender.empty()) {
                for (const auto & v : vstrock) {
                    m_pmapBanListSender.emplace(to_lower(v));
                }
            } else {
                m_p_filter_logger->splog(TLOG_ERR, "%s", "duplicated information about banlist senders");
            }
        }
    }

    if (fruleopen && m_szBody) {
        ythrow TWithBackTrace<yexception>() << "Rule does not contain field describe " << m_szBody;
    }
}

void TSetRules::SkipSpaces(const char** ppstr) {
    auto pstr = *ppstr;
    while (*pstr == 0x20 || *pstr == 0x09)
        ++pstr;
    *ppstr = pstr;
}

bool TSetRules::IsWordSym(char ch) {
    if (ch != 0x20 && ch != 0x09 && ch != '\r' && ch != '\n' && ch != 0)
        return true;
    return false;
}
void TSetRules::SkipWord(const char** ppstr) {
    while (IsWordSym(**ppstr))
        ++(*ppstr);
}

bool TSetRules::End_Of_Str(char ch) {
    if (ch == '\r' || ch == '\n' || ch == 0)
        return true;

    return false;
}

int TSetRules::GetNextWord(const char** ppstr, int len) {
    (*ppstr) += len;
    SkipWord(ppstr);
    SkipSpaces(ppstr);
    if (End_Of_Str(**ppstr))
        return 0;

    auto pstr = *ppstr;
    len = 0;

    while (IsWordSym(*pstr)) {
        ++pstr;
        len++;
    }

    return len;
}

int TSetRules::GetTokens(const TString& str) {
    int len = 0;
    int ind = 0;

    auto pstr = str.c_str();
    while ((len = GetNextWord(&pstr, len)) > 0 && ind < TokensSize) {
        m_ptokens[ind] = pstr;
        m_tokenlens[ind++] = len;
    }
    return ind;
}

size_t TSetRules::GetFirstWord(const char** ppstr, size_t offset) {
    (*ppstr) += offset;
    SkipSpaces(ppstr);
    if (End_Of_Str(**ppstr))
        return 0;

    auto pstr = *ppstr;
    size_t len = 0;

    while (IsWordSym(*pstr)) {
        ++pstr;
        len++;
    }

    return len;
}

bool TSetRules::GetNextInt(const char** ppstr, int len, int* pval) {
    *pval = 0;
    len = GetNextWord(ppstr, len);
    if (len && IsInt(*ppstr, len)) {
        *pval = atoi(*ppstr);
        (*ppstr) += len;
        return true;
    } else
        return false;
}

bool TSetRules::GetNextDouble(const char** ppstr, int len, double* pval) {
    *pval = 0.;
    len = GetNextWord(ppstr, len);
    if (len && GetDouble(*ppstr, len, pval)) {
        (*ppstr) += len;
        return true;
    } else
        return false;
}

bool TSetRules::PrintBadStrings(const TString& dest) {
    if (dest) {
        m_p_filter_logger->splog(TLOG_ERR, "Fault string: %s", dest.c_str());

        return false;
    } else
        return true;
}

void TSetRules::InitStrings() {
    m_szBody.clear();
    m_szRegExpr.clear();
    m_szReceivedNum.clear();
    m_szBExpr.clear();
    m_szScript.clear();
    m_szArithmetic.clear();
    m_szField.clear();
    m_szComment.clear();
    m_szTest.clear();
    m_szOption.clear();
    m_szKey.clear();
    m_szList.clear();
    m_szDomen.clear();
    m_szRelay.clear();
    m_szRange.clear();
    YasmTags = {};
    SolomonTags = {};
}

TSpFields TSetRules::AddField(const TStringBuf & pfield) {
    const auto & lowered = to_lower(TString{pfield});

    TSpFields fid;

    if(!TryFromString<TSpFields>(lowered, fid))
        unknownFields.emplace(pfield);

    return fid;
}


void TSetRules::GetComment(TRuleDef* pCurRule) {
    if (m_szComment) {
        const char* pstr = m_szComment.c_str();
        int len = GetNextWord(&pstr, 0);
        pCurRule->comment = pstr;
        if (!len) {
            m_p_filter_logger->splog(TLOG_ERR, "Empty commentary for rule: %s", m_szBody.c_str());
        }
    }
}

TRuleDef* TSetRules::Set(TRulesType rule_type) {
    const char* pstr;
    int len = 0;

    TRuleDef* pCurRule = new TRuleDef; // m_pengine->GetSpTop()->GetFlagPrintPerformance());

    pCurRule->fDelivery = m_fDeliveryFile;

    if (!(m_szBody)) {
        ythrow TWithBackTrace<yexception>() << "Rule Name not defined!";
    }
    else {
        pstr = m_szBody.c_str();
        len = GetNextWord(&pstr, 0);
        if (SetRuleName(pCurRule, pstr, len) != ecOK)
            ythrow TWithBackTrace<yexception>() << "Rule Name not defined!";

        len = GetNextWord(&pstr, len);
        bool fNextWord = true;
        if (len) {
            if (!strncmp(pstr, "R_PRESENT", len))
                pCurRule->rt = RT_PRESENT;
            else if (!strncmp(pstr, "R_ABSENT", len))
                pCurRule->rt = RT_ABSENT;
            else if (!strncmp(pstr, "R_ALG", len))
                pCurRule->rt = RT_ALG;
            else if (!strncmp(pstr, "R_ANTI", len)) {
                if (!GetAntiRule(pCurRule, pstr + len)) {
                    ythrow TWithBackTrace<yexception>() << "Rule definition error " << m_szBody;
                }
            } else if (!strncmp(pstr, "R_LV", len)) {
                if (GetLvRule(pCurRule) != ecOK) {
                    ythrow TWithBackTrace<yexception>() << "Rule definition error " << m_szBody;
                }
            }
            else if (!strncmp(pstr, "R_SRC", len)) {
                ythrow TWithBackTrace<yexception>() << "Rule definition error " << m_szBody;
            }
            else if(!strncmp(pstr, "R_LUA", len)) {
                pCurRule->rt = RT_LUA;
            }
            else if(!strncmp(pstr, "R_HS", len)) {
                pCurRule->rt = RT_HS;
            } else
                fNextWord = false;
        }
        else {
            ythrow TWithBackTrace<yexception>() << "Rule definition error " << m_szBody;
        }

        pCurRule->YasmTags = std::move(YasmTags);
        pCurRule->SolomonTags = pCurRule->YasmTags;
        pCurRule->SolomonTags |= TKWYasmTags::HAM;
        pCurRule->SolomonTags |= TKWYasmTags::SPAM;
        pCurRule->SolomonTags |= TKWYasmTags::MALIC;

        if (pCurRule->rt != RT_ANTI) {
            if (fNextWord)
                len = GetNextWord(&pstr, len);

            if (!IsNumber(pstr, len)) {
                pCurRule->pfields.emplace_back(AddField({ pstr, size_t(len) }));
                len = GetNextWord(&pstr, len);
            }

            if (!GetScore(pCurRule, pstr, len)) {
                ythrow TWithBackTrace<yexception>() << "Rule score error " << m_szBody;
            }
        }
    }

    if(m_szRegExpr && pCurRule->rt == RT_HS) {
        pCurRule->rules.emplace<THsDef>(m_szRegExpr);
    } else if (m_szRegExpr && pCurRule->rt != RT_LV) {
        if (pCurRule->rt != RT_UNKNOWN) {
            ythrow TWithBackTrace<yexception>() << "Rule type error " << pCurRule->rt;
        }
        if (m_szKey) {
            ythrow TWithBackTrace<yexception>() << "Rule type error! Regular expression type contain field key ...: " << m_szBody;
        }
        if (m_szOption) {
            ythrow TWithBackTrace<yexception>() << "Rule type error! Regular expression type contain field lvopt ...: " << m_szBody;
        }
        pCurRule->rt = RT_RGEX;
        pCurRule->rules.emplace<TReDef>(m_szRegExpr);
    } else if (m_szBExpr) {
        if (pCurRule->rt != RT_UNKNOWN) {
            ythrow TWithBackTrace<yexception>() << "Rule type error: " << pCurRule->rt;
        }
        pCurRule->rt = RT_BF;
        pstr = m_szBExpr.c_str();
        len = GetNextWord(&pstr, 0);
        if (!len) {
            ythrow TWithBackTrace<yexception>() << "Empty boolean expression: " << m_szBody;
        }

        pCurRule->rules.emplace<TBfDef>();
        std::get<TBfDef>(pCurRule->rules).text = pstr;
    } else if (m_szArithmetic) {
        if (pCurRule->rt != RT_UNKNOWN) {
            ythrow TWithBackTrace<yexception>() << "Rule type error!: " << pCurRule->rt;
        }

        pCurRule->rt = RT_ARITHMETIC;
        pstr = m_szArithmetic.c_str();
        len = GetNextWord(&pstr, 0);
        if (!len) {
            ythrow TWithBackTrace<yexception>() << "Empty arithmetic expression: " << m_szBody;
        }

        pCurRule->rules.emplace<TArDef>();
        std::get<TArDef>(pCurRule->rules).text = pstr;
    } else if (rule_type == RT_RANGE || rule_type == RT_RANGE_DATE) {
        if (GetRangeRule(pCurRule, rule_type) != ecOK) {
            ythrow TWithBackTrace<yexception>() << "Range rule error" << m_szBody;
        }
    } else if (rule_type == RT_LEVENSTEIN) {
        if (GetLevensteinRule(pCurRule, rule_type) != ecOK) {
            ythrow TWithBackTrace<yexception>() << "Levenstein range rule error " << m_szBody;
        }
    } else if(m_szScript) {
        TStringBuf view(m_szScript);
        view = StripString(view.After('='));
        if(!LuaRulesRunner.FunctionExists(TString(view)))
            ythrow TWithBackTrace<yexception>() << "in rule " << pCurRule->pRuleName << " unknown function " << view;
        pCurRule->rules.emplace<TLuaRuleDef>(TString{view});
    }

    if (m_szField) {
        if (!pCurRule->pfields.empty()) {
            ythrow TWithBackTrace<yexception>() << "Repeated field definition! " << m_szBody;
        }

        pstr = m_szField.c_str();
        len = 0;
        while ((len = GetNextWord(&pstr, len)))
            pCurRule->pfields.emplace_back(AddField({ pstr, size_t(len) }));
    }

    if (m_szTest && pCurRule->rt == RT_RGEX) {
        pstr = m_szTest.c_str();
        len = GetNextWord(&pstr, 0);
        if (len)
            std::get<TReDef>(pCurRule->rules).test = pstr;
    }

    if (m_szReceivedNum && pCurRule->rt == RT_RGEX) {
        pCurRule->rt = RT_RECEIVED_NUM;
        pstr = m_szReceivedNum.c_str();
        len = GetNextWord(&pstr, 0);
        std::get<TReDef>(pCurRule->rules).test.clear(); // sorry, test must be deleted
        if (len)
            std::get<TReDef>(pCurRule->rules).test = pstr;
        TReceivedNum(std::get<TReDef>(pCurRule->rules).test.c_str(), 0, *pCurRule);
    }

    if (pCurRule->rt == RT_UNKNOWN) {
        ythrow TWithBackTrace<yexception>() << "Unknown rule type " << m_szBody;
    }

    GetComment(pCurRule);

    if (pCurRule->pfields.empty() && (pCurRule->rt == RT_RGEX || pCurRule->rt == RT_PRESENT || pCurRule->rt == RT_ABSENT)) {
        ythrow TWithBackTrace<yexception>() << "Fields not defined for rule " << m_szBody;
    }

    if ((pCurRule->rt == RT_RGEX ||
         pCurRule->rt == RT_RECEIVED_NUM ||
         pCurRule->rt == RT_LV && std::get<TLvDef>(pCurRule->rules).text))
        SetPcre(((pCurRule->rt == RT_LV) ? std::get<TLvDef>(pCurRule->rules) : std::get<TReDef>(pCurRule->rules)), PcreSettings);


    m_vFileRules[m_cFiles].vrules.push_back(pCurRule);
    m_mapRules.emplace(pCurRule->pRuleName, std::make_pair(m_cFiles, m_cRules));
    ++m_cRules;

    return pCurRule;
}

bool TSetRules::GetScore(TRuleDef* prule, const char* score, int scorelen) {
    return scorelen && GetDouble(score, scorelen, &prule->score);
}

bool TSetRules::CheckSymbols(const char* str, int len) {
    for (int i = 0; i < len; i++) {
        if (str[i] < 33) {
            m_p_filter_logger->splog(TLOG_ERR, "symbol error [%c] (symbol > 127 || < 33)", str[i]);
            return false;
        }
    }
    return true;
}

ecRet TSetRules::SetRuleName(TRuleDef* pCurRule, const char* pRuleName, int NameLen) {
    ecRet ret = ecOK;
    size_t len = NameLen;
    TStringBuf pname = {pRuleName, len};
    std::pair<int, int> irule;
    decltype(m_mapRules)::const_iterator it;

    if (!len) {
        m_p_filter_logger->splog(TLOG_ERR, "%s", "Rule name not defined!");
        ret = ecFormat;
        goto SkipRule;
    }

    if (!CheckSymbols(pRuleName, len)) {
        m_p_filter_logger->splog(TLOG_ERR, "Rule name error: %s", pRuleName);
        ret = ecFormat;
        goto SkipRule;
    }

    it = m_mapRules.find(pname);

    if (len >= SP_MAX_RULE_NAME) {
        m_p_filter_logger->splog(TLOG_ERR, "Rule name too long: %.*s", int(pname.size()), pname.data());
        ret = ecFormat;
        goto SkipRule;
    }

    if (cend(m_mapRules) != it) {
        irule = it->second;
        m_p_filter_logger->splog(TLOG_ERR, "Rule name exists already: %.*s (File: %s)", int(pname.size()), pname.data(), m_vFileRules[irule.first].szFileName.c_str());
        ret = ecFormat;
        goto SkipRule;
    }

    pCurRule->pRuleName = pname;

SkipRule:
    return ret;
}



ecRet TSetRules::GetRangeRule(TRuleDef* pCurRule, TRulesType rule_type) {
    ecRet ret = ecOK;
    int i_min = 0;
    int i_max = 0;
    ui64 u64_min = 0;
    ui64 u64_max = 0;
    double d_min = 0.;
    double d_max = 0.;
    int factor = 1;
    char* pkey = 0;
    const char* pstr = m_szKey.c_str();
    int len = GetNextWord(&pstr, 0);

    pCurRule->rt = rule_type;

    auto& range = pCurRule->rules.emplace<TRangeDef>();

    if (!len || !CheckSymbols(pstr, len)) {
        m_p_filter_logger->splog(TLOG_ERR, "Range rule key error: %s", m_szKey.c_str());
        ret = ecFormat;
        goto SkipRule;
    }
    STRDUPLWR(&pkey, pstr, len);

    range.key.assign(pkey);

    pstr = m_szRange.c_str();
    len = GetNextWord(&pstr, 0);
    if (!len) {
        m_p_filter_logger->splog(TLOG_ERR, "Range field error: %s", m_szRange.c_str());
        ret = ecFormat;
        goto SkipRule;
    }

    if (rule_type == RT_RANGE_DATE) {
        if (!STRNICMP(pstr, "sec"))
            factor = 1;
        else if (!STRNICMP(pstr, "hour"))
            factor = 3600;
        else if (!STRNICMP(pstr, "day"))
            factor = 3600 * 24;
        else if (!STRNICMP(pstr, "month"))
            factor = 3600 * 24 * 30;
        else if (!STRNICMP(pstr, "year"))
            factor = 3600 * 24 * 365;
        else {
            m_p_filter_logger->splog(TLOG_ERR, "Date range error: %s", m_szRange.c_str());
            ret = ecFormat;
            goto SkipRule;
        }

        len = GetNextWord(&pstr, 0);
        if (!len) {
            m_p_filter_logger->splog(TLOG_ERR, "Date range error: %s", m_szRange.c_str());
            ret = ecFormat;
            goto SkipRule;
        }

        range.time_point = time(nullptr);
        if (!STRNICMP(pstr, "today"))
            range.fToday = true;
        else {
            range.fToday = false;
            if (!(range.time_point = GetDayMonthYear(pstr, len))) {
                m_p_filter_logger->splog(TLOG_ERR, "Date range error: %s", m_szRange.c_str());
                ret = ecFormat;
                goto SkipRule;
            }
        }
        len = GetNextWord(&pstr, 0);
        if (!len) {
            m_p_filter_logger->splog(TLOG_ERR, "Range field error: %s", m_szRange.c_str());
            ret = ecFormat;
            goto SkipRule;
        }
    }

    if (!STRNICMP(pstr, "min"))
        range.fMin = true;
    else if (IsInt(pstr, len)) {
        GetInt(pstr, len, &i_min);
        d_min = i_min;
        u64_min = i_min < 0 ? 0 : i_min;
    }
    else if (GetDouble(pstr, len, &d_min))
        i_min = (int)d_min;
    else {
        m_p_filter_logger->splog(TLOG_ERR, "Range error: [%s] %s", pstr, m_szRange.c_str());
        ret = ecFormat;
        goto SkipRule;
    }

    len = GetNextWord(&pstr, 0);
    if (!len) {
        m_p_filter_logger->splog(TLOG_ERR, "Range field error: %s", m_szRange.c_str());
        ret = ecFormat;
        goto SkipRule;
    }
    if (!STRNICMP(pstr, "max"))
        range.fMax = true;
    else if (IsInt(pstr, len)) {
        GetInt(pstr, len, &i_max);
        d_max = i_max;
        u64_max = i_max;
    }
    else if (GetDouble(pstr, len, &d_max))
        i_max = (int)d_max;
    else {
        m_p_filter_logger->splog(TLOG_ERR, "Range error: [%s] %s", pstr, m_szRange.c_str());
        ret = ecFormat;
        goto SkipRule;
    }
    range.d_min = d_min * factor;
    range.d_max = d_max * factor;

SkipRule:
    DELETE_ARR(pkey);
    return ret;
}

ecRet TSetRules::GetLevensteinRule(TRuleDef* pCurRule, TRulesType rule_type) {
    ecRet ret = ecOK;
    int i_min = 0;
    int i_max = 0;
    ui64 u64_min = 0;
    ui64 u64_max = 0;
    double d_min = 0.;
    double d_max = 0.;
    int factor = 1;
    char* pkey = 0;
    const char* pstr = m_szKey.c_str();
    int len = GetNextWord(&pstr, 0);

    auto& range = pCurRule->rules.emplace<TRangeDef>();

    if (!len || !CheckSymbols(pstr, len)) {
        m_p_filter_logger->splog(TLOG_ERR, "Lenevstein rule key error: %s", m_szKey.c_str());
        ret = ecFormat;
        goto SkipRule;
    }
    STRDUPLWR(&pkey, pstr, len);

    pCurRule->rt = rule_type;
    range.key.assign(pkey);

    pstr = m_szRange.c_str();
    len = GetNextWord(&pstr, 0);
    if (!len) {
        m_p_filter_logger->splog(TLOG_ERR, "Levenstein field error: %s", m_szRange.c_str());
        ret = ecFormat;
        goto SkipRule;
    }

    if (!STRNICMP(pstr, "min"))
        range.fMin = true;
    else if (IsInt(pstr, len)) {
        GetInt(pstr, len, &i_min);
        d_min = i_min;
        u64_min = i_min < 0 ? 0 : i_min;
    }
    else if (GetDouble(pstr, len, &d_min))
        i_min = (int)d_min;
    else {
        m_p_filter_logger->splog(TLOG_ERR, "Levenstein error: [%s] %s", pstr, m_szRange.c_str());
        ret = ecFormat;
        goto SkipRule;
    }

    len = GetNextWord(&pstr, 0);
    if (!len) {
        m_p_filter_logger->splog(TLOG_ERR, "Levenstein field error: %s", m_szRange.c_str());
        ret = ecFormat;
        goto SkipRule;
    }
    if (!STRNICMP(pstr, "max"))
        range.fMax = true;
    else if (IsInt(pstr, len)) {
        GetInt(pstr, len, &i_max);
        d_max = i_max;
        u64_max = i_max;
    }
    else if (GetDouble(pstr, len, &d_max))
        i_max = (int)d_max;
    else {
        m_p_filter_logger->splog(TLOG_ERR, "Levenstein error: [%s] %s", pstr, m_szRange.c_str());
        ret = ecFormat;
        goto SkipRule;
    }

    range.d_min = d_min * factor;
    range.d_max = d_max * factor;

SkipRule:
    DELETE_ARR(pkey);
    return ret;
}

bool TSetRules::GetAntiRule(TRuleDef* pCurRule, const char* pstr) {
    int len = 0;
    int cCanceledRules = 0;

    pCurRule->rt = RT_ANTI;
    pCurRule->rules.emplace<TAntiDef>();
    len = GetNextWord(&pstr, len);
    if (!len)
        return false;
    std::get<TAntiDef>(pCurRule->rules).pAntiRuleName.assign(pstr, len);

    while ((len = GetNextWord(&pstr, len)) && cCanceledRules < 500) {
        if (IsNumber(pstr, len))
            break;
        cCanceledRules++;
        std::get<TAntiDef>(pCurRule->rules).vCancelRulesNames.emplace_back(pstr, len);
    }

    return true;
}

static const TPcreUnit rgLvMacroRe[] = {
        {"$BG", "(?:^|\\W)", true},
        {"$EN", "(?:\\W|$)", true}};

ecRet TSetRules::GetLvRule(TRuleDef* pCurRule) {
    ecRet ret = ecOK;
    int cTokens;

    pCurRule->rt = RT_LV;

    if (m_szRegExpr) {
        auto pretext = TRulePcre::SetRe(m_szRegExpr, MakeArrayRef(rgLvMacroRe, Y_ARRAY_SIZE(rgLvMacroRe)));
        pCurRule->rules.emplace<TLvDef>(pretext ? pretext.c_str() : m_szRegExpr.c_str());
    } else {
        pCurRule->rules.emplace<TLvDef>("");
    }

    if (!(m_szKey) || GetKey(&std::get<TLvDef>(pCurRule->rules)) != ecOK) {
        m_p_filter_logger->splog(TLOG_ERR, "%s", "Key error in Lv rule!");
        ret = ecFormat;
        goto SkipRule;
    }

    if (m_szOption.empty())
        return ret; // default options - all 0

    cTokens = GetTokens(m_szOption);

    if (cTokens < 2) {
        m_p_filter_logger->splog(TLOG_ERR, "Options error for Linguistic rule: %s", m_szOption.c_str());
        ret = ecFormat;
    }

    if (!GetInt(m_ptokens[0], m_tokenlens[0], &(std::get<TLvDef>(pCurRule->rules).to))) {
        m_p_filter_logger->splog(TLOG_ERR, "Options (parameter 1) error: %s", m_szOption.c_str());
        ret = ecFormat;
    }

    if (!GetInt(m_ptokens[1], m_tokenlens[1], &(std::get<TLvDef>(pCurRule->rules).after))) {
        m_p_filter_logger->splog(TLOG_ERR, "Options (parameter 2) error: %s", m_szOption.c_str());
        ret = ecFormat;
    }

    int ind;
    for (ind = 2; ind < cTokens; ind++) {
        if (!strncmp(m_ptokens[ind], "NOT_UPPER", m_tokenlens[ind]))
            std::get<TLvDef>(pCurRule->rules).option |= LVO_NU;
        else if (!strncmp(m_ptokens[ind], "INSERT_KEY", m_tokenlens[ind]))
            std::get<TLvDef>(pCurRule->rules).option |= LVO_INSERT_KEY;
        else if (!strncmp(m_ptokens[ind], "COMPENSATORY", m_tokenlens[ind]))
            std::get<TLvDef>(pCurRule->rules).option |= LVO_COMPENSATORY;
        else {
            m_p_filter_logger->splog(TLOG_ERR, "Options (parameter %d ) error: %s", ind + 1, m_szOption.c_str());
            ret = ecFormat;
            goto SkipRule;
        }
    }

SkipRule:
    return ret;
}

ecRet TSetRules::GetKey(TLvDef* plv) {
    ecRet ret = ecOK;
    int len = 0;
    TLvKey lvkey = {int(m_pvLvKeys.size()), 0, 0};
    int kid;
    const char* pstr = m_szKey.c_str();
    char* pkey = 0;
    decltype(m_mapLvKeys)::const_iterator it;


    if (m_szKey.empty()) {
        m_p_filter_logger->splog(TLOG_ERR, "%s", "Key not defined for Linguistic rule");
        ret = ecFormat;
        goto SkipRule;
    }

    len = GetNextWord(&pstr, 0);
    if (len != 1) {
        m_p_filter_logger->splog(TLOG_ERR, "Linguistic rule key option error: %s", m_szKey.c_str());
        ret = ecFormat;
        goto SkipRule;
    }

    switch (pstr[0]) {
        case 'N':
            lvkey.option = 0;
            break;
        case 'L':
            lvkey.option = LVO_L;
            break;
        case 'R':
            lvkey.option = LVO_R;
            break;
        case 'E': //entirely
            lvkey.option = LVO_LR;
            break;
        default: {
            m_p_filter_logger->splog(TLOG_ERR, "Linguistic rule key option error: %s", m_szKey.c_str());
            ret = ecFormat;
            goto SkipRule;
        }
    }

    len = GetNextWord(&pstr, 0);
    if (!len) {
        m_p_filter_logger->splog(TLOG_ERR, "Linguistic rule key error: %s", m_szKey.c_str());
        ret = ecFormat;
        goto SkipRule;
    }

    len = strlen(pstr);
    while (len > 0 && (pstr[len - 1] == ' ' || pstr[len - 1] == '\t'))
        --len;
    STRDUPLWR(&pkey, pstr, len);

    it = m_mapLvKeys.find(TStringBuf{pkey, size_t(len)});

    if (it != cend(m_mapLvKeys)) {
        kid = it->second;
        if (lvkey.option != m_pvLvKeys[kid].option) {
            m_p_filter_logger->splog(TLOG_ERR, "Same key exists with other option : %s", m_szKey.c_str());
            ret = ecFormat;
            goto SkipRule;
        }
    } else if (lvkey.id < (MAX_LV_KEYS - 1)) {
        kid = lvkey.id;
        lvkey.key = pkey;
        m_pvLvKeys.push_back(lvkey);
        m_mapLvKeys.emplace(pkey, kid);
        }
    else {
        m_p_filter_logger->splog(TLOG_ERR, "Too much Keys is used.MAX_LV_KEYS must be increased: %s", m_szKey.c_str());
        ret = ecFormat;
        goto SkipRule;
    }

    plv->kid = kid;

SkipRule:
    DELETE_ARR(pkey);
    return ret;
}

TRuleDef* TSetRules::GetRule(int iFile, int iRule) {
    if (iFile < m_cFiles && iRule < (int)(m_vFileRules[iFile].vrules.size())) {
        TRuleDef* prule = m_vFileRules[iFile].vrules[iRule];
        if (prule->rt == RT_UNKNOWN)
            return nullptr;
        return prule;
    }

    return nullptr;
}
TRuleDef* TSetRules::GetRuleByName(const char* pRuleName, int len) {
    decltype(m_mapRules)::const_iterator it = m_mapRules.find(TStringBuf{pRuleName, size_t(len)});

    if(it == cend(m_mapRules))
        return nullptr;

    const auto & irule = it->second;
    TRuleDef* pr = GetRule(irule.first, irule.second);

    if (!pr)
        return nullptr;

    return pr;
}

void TSetRules::SetBf(TRuleDef& prule) {
    TBForm bexpr;
    TBfDef& bf = std::get<TBfDef>(prule.rules);
    TBF_Result exres;

    if (!bf.text) {
        ythrow TWithBackTrace<yexception>() << "Empty boolean expression: " << prule;
    }

    bexpr.Request(bf.text.c_str(), bf.text.size(), &exres);

    prule.Dependencies.reserve(exres.cWords);

    for (int i = 0; i < exres.cWords; i++) {
        TRuleDef* pr = GetRuleByName(exres.rgpWords[i], exres.rgWordsLen[i]);
        if (!pr) {
            ythrow TWithBackTrace<yexception>() << prule << ' ' << TStringBuf(exres.rgpWords[i], exres.rgWordsLen[i]) << " not defined";
        }

        prule.Dependencies.emplace_back(*pr);
        pr->Masters.emplace_back(prule);
    }

    int count = (exres.cWords > 3) ? (1 << (exres.cWords - 3)) : 1;

    bf.pResult.assign(exres.rgResult, exres.rgResult + count);
}

void TSetRules::SetAr(TRuleDef& prule) {
    TArForm arexpr;
    TArDef& ar = std::get<TArDef>(prule.rules);
    TAr_Result exres;
    TRuleDef* pr;

    if (!ar.text) {
        ythrow TWithBackTrace<yexception>() << prule << " Empty arithmetic expression";
    }

    if (arexpr.Parse(ar.text.c_str(), ar.text.size(), &exres) != ecOK) {
        ythrow TWithBackTrace<yexception>() << prule << ' ' << arexpr.GetError();
    }

    prule.Dependencies.reserve(exres.cWords);

    for (int i = 0; i < exres.cWords; i++) {
        pr = GetRuleByName(exres.rgpWords[i], exres.rgWordsLen[i]);
        if (!pr) {
            ythrow TWithBackTrace<yexception>() << prule << ' ' << TStringBuf(exres.rgpWords[i], exres.rgWordsLen[i]) << " not defined";
        }

        prule.Dependencies.emplace_back(*pr);
        pr->Masters.emplace_back(prule);
    }

    ar.pSignes.assign(exres.rgSigns, exres.rgSigns + exres.cWords);
    ar.comp = exres.comp;
    ar.value = exres.value;
}

void TSetRules::CheckExpr() {

    m_p_filter_logger->spprint("*** Boolean & arithmetic expressions checking");
    std::vector<TRuleDef*> vExprRules;

    for (const auto& fileRule : m_vFileRules)
        for (auto prule : fileRule.vrules) {
            if (prule->rt == RT_BF) {
                SetBf(*prule);
                vExprRules.push_back(prule);
            } else if (prule->rt == RT_ARITHMETIC) {
                SetAr(*prule);
                vExprRules.push_back(prule);
            } else if (prule->rt != RT_UNKNOWN)
                prule->id = 1;
        }
    //  get rules so that all operands was defined
    bool fInsert = true;
    while (fInsert) {
        fInsert = false;
        for (auto& prule : vExprRules) {
            if (prule->id)
                continue;

            const bool fAllDefined = AllOf(prule->Dependencies, [](const TRuleDef& rule) {
                return rule.id != 0;
            });

            if (fAllDefined) {
                m_vExprRules.push_back(prule);
                fInsert = true;
                prule->id = 1;
            }
        }
    }

    int cBool = 0;
    int cAr = 0;

    for (auto& prule : vExprRules) {
        if (prule->id) {
            if (prule->rt == RT_BF)
                cBool++;
            else if (prule->rt == RT_ARITHMETIC)
                cAr++;
        } else {
            ythrow TWithBackTrace<yexception>() << *prule << " Could not define all rule operands";
        }
    }

    m_p_filter_logger->spprint("*** Success boolean rules: %d", cBool);
    m_p_filter_logger->spprint("*** Fault boolean rules: %d", m_cBoolFault);

    m_p_filter_logger->spprint("*** Success arithmetic rules: %d", cAr);
    m_p_filter_logger->spprint("*** Fault arithmetic rules: %d", m_cArFault);
}

int TSetRules::GetRules(TVector<THolder<TRuleDef>>& ppprules, const TMaybe<TFsPath>& hsRulesCache) {

    ppprules.clear();
    // add all not boolean valid rules
    for (const auto& fileRule : m_vFileRules)
        for (auto prule : fileRule.vrules) {
            if (prule->rt != RT_UNKNOWN &&
                prule->rt != RT_BF &&
                prule->rt != RT_ARITHMETIC) {
                prule->id = ppprules.size();
                ppprules.emplace_back(prule);
            }
        }

    {
        PrepareHsRules(hsRulesCache);
    }

    // add boolean rules
    for (auto& m_vExprRule : m_vExprRules) {
        TRuleDef* prule = m_vExprRule;
        prule->id = ppprules.size();
        ppprules.emplace_back(prule);
    }

    // delete fault rules
    for (const auto& fileRule : m_vFileRules)
        for (auto prule : fileRule.vrules) {
            if (prule->rt == RT_UNKNOWN)
                delete prule;
        }

    if (m_cBoolFault || m_cArFault) {
        ythrow TWithBackTrace<yexception>() << LabeledOutput(m_cBoolFault, m_cArFault);
    }

    m_p_filter_logger->splog(TLOG_ERR, "*** All success rules: %lu", ppprules.size());
    m_p_filter_logger->splog(TLOG_ERR, "*** All fault rules: %d", m_cBoolFault + m_cArFault);

    m_p_filter_logger->splog(TLOG_NOTICE, "All success rules: %lu; All fault rules: %d", ppprules.size(), m_cBoolFault + m_cArFault);

    return ppprules.size();
}

void TSetRules::PrepareHsRules(const TMaybe<TFsPath>& hsRulesCache) {
    THashMap<TSpFields, TVector<const TRuleDef*>> rulesByField;
    for (const auto& fileRule : m_vFileRules) {
        for (auto prule : fileRule.vrules) {
            if (prule->rt == RT_HS) {
                for(const auto field : prule->pfields) {
                    rulesByField[field].emplace_back(prule);
                }
            }
        }
    }

    using THsCache = THashMap<size_t, TString>;
    const auto hsCache = [&hsRulesCache]() -> THsCache{
        if(hsRulesCache && hsRulesCache->Exists()) {
            THsCache cache;
            {
                TIFStream s(*hsRulesCache);
                ::Load(&s,cache);
                Syslog(TLOG_INFO) << "load cache from " << hsRulesCache;
            }
            return cache;
        } else {
            Syslog(TLOG_INFO) << "invalid cache " << hsRulesCache;
        }
        return {};
    }();

    THsCache mutableCache = hsCache;

    for(const auto& [field, rules]: rulesByField) {
        const auto start = Now();
        size_t hash = field;
        for(const auto& rule : rules) {
            hash = MultiHash(hash, rule->pRuleName, std::get<THsDef>(rule->rules).Text);
        }

        if(auto cached = MapFindPtr(hsCache, hash)) {
            HsDbsByField.emplace(field, NHyperscan::Deserialize(*cached));
            Syslog(TLOG_INFO) << "load for " << field << " " << hash << ' ' << rules.size() << " rules from cache for " << (Now() - start);
            continue;
        }

        TVector<const char*> regexs(Reserve(rules.size()));
        TVector<unsigned int> flags(rules.size(), HS_FLAG_SINGLEMATCH | HS_FLAG_UTF8);
        TVector<unsigned int> ids(Reserve(rules.size()));
        for (const TRuleDef *rule : rules) {
            regexs.emplace_back(std::get<THsDef>(rule->rules).Text.data());
            ids.emplace_back(rule->id);
        }

        const auto& emplaced = HsDbsByField.emplace(field, NHyperscan::CompileMulti(regexs, flags, ids)).first->second;
        mutableCache.emplace(hash, NHyperscan::Serialize(emplaced));
        Syslog(TLOG_INFO) << "load for " << field << " for " << rules.size() << " rules from raw for " << (Now() - start);
    }

    if(hsRulesCache && mutableCache != hsCache) {
        const TString tempName = hsRulesCache->GetPath() + "_" + CreateGuidAsString() + ".tmp";
        {
            TOFStream s(tempName);
            ::Save(&s, mutableCache);
        }
        Y_VERIFY(NFs::Rename(tempName, *hsRulesCache));
        Syslog(TLOG_INFO) << "saved cache " << *hsRulesCache;
    }
}

bool TSetRules::GetDataVersion(const char* str, float* pVersion, TSpLogger *pLogger) {
    *pVersion = 0.;

    if (STRNCMP(str, SP_VERSION_NUM))
        return false;

    int i = 12;
    while (str[i] > 0 && str[i] <= 32)
        i++;

    if (str[i] < '0' || str[i] > '9') {
        pLogger->splog(TLOG_ERR, "strange data version: %s", str);
        return false;
    }

    *pVersion = (float)atof(str + i);

    if (*pVersion < 3.0)
        pLogger->splog(TLOG_ERR, "rule data version: %f, %s", *pVersion, str);

    return true;
}
