#include "stopwords_list.h"

#include "utils.h"

#include <passport/infra/libs/cpp/dbpool/handle.h>
#include <passport/infra/libs/cpp/dbpool/util.h>
#include <passport/infra/libs/cpp/dbpool/value.h>
#include <passport/infra/libs/cpp/utils/log/global.h>
#include <passport/infra/libs/cpp/utils/string/format.h>
#include <passport/infra/libs/cpp/utils/string/split.h>
#include <passport/infra/libs/cpp/utils/string/string_utils.h>

namespace NPassport::NBb {
    TStopWordsList::TStopWordsList(NDbPool::TDbPool& db, const TString& defaultStopWords)
        : Db_(db)
    {
        std::vector<TString> tmpList = NUtils::ToVector(
            NUtils::TolowerCopy(defaultStopWords), ",;");

        DefaultStopWords_.insert(tmpList.begin(), tmpList.end());

        try {
            // First time loading, doesn't need mutex
            StopWords_.Set(LoadWords());
            StopSubwords_.Set(LoadSubwords());

            TLog::Info("Blackbox: StopWordsList: loaded stop words list: <%lu> words (<%lu> default), <%d> subwords",
                       StopWords_.Get()->size(),
                       DefaultStopWords_.size(),
                       SubwordsCount_);
        } catch (const std::exception& e) {
            TLog::Warning("Blackbox: StopWordsList: failed to load stop words list at startup. <%s>", e.what());
            throw;
        }
    }

    TStopWordsList::~TStopWordsList() = default;

    bool TStopWordsList::IsStopWord(const TString& word) const {
        std::shared_ptr<std::set<TString>> words = StopWords_.Get();
        return words && words->find(word) != words->end();
    }

    bool TStopWordsList::IsStopSubstr(const TString& substr) const {
        std::shared_ptr<std::regex> subwords = StopSubwords_.Get();
        return subwords && std::regex_search(substr.cbegin(), substr.cend(), *subwords);
    }

    static const TString WORDS_QUERY("SELECT word FROM stop_words");

    std::shared_ptr<std::set<TString>> TStopWordsList::LoadWords() {
        std::unique_ptr<NDbPool::TResult> result =
            NDbPool::NUtils::DoQueryTries(Db_, WORDS_QUERY).Result;

        std::shared_ptr<std::set<TString>> words = std::make_shared<std::set<TString>>();

        words->insert(DefaultStopWords_.begin(), DefaultStopWords_.end());

        for (const NDbPool::TRow& row : result->Table()) {
            // load all words as normalized lowercase for future case insensitive compare
            words->insert(TUtils::NormalizeLogin(row[0].AsString()));
        }

        return words;
    }

    static const TString SUBWORDS_QUERY("SELECT subword FROM stop_subwords");

    std::shared_ptr<std::regex> TStopWordsList::LoadSubwords() {
        std::unique_ptr<NDbPool::TResult> result =
            NDbPool::NUtils::DoQueryTries(Db_, SUBWORDS_QUERY).Result;

        unsigned count = 0;
        TString regex;
        regex.reserve(512);

        if (!result->size()) {
            SubwordsCount_ = 0;
            return std::shared_ptr<std::regex>();
        }

        for (const NDbPool::TRow& row : result->Table()) {
            // add all subwords as lowercase for future case insensitive compare
            NUtils::AppendSeparated(regex, '|', TUtils::NormalizeLogin(row[0].AsString()));
            ++count;
        }
        SubwordsCount_ = count;

        return std::make_shared<std::regex>(regex.cbegin(), regex.cend());
    }

    void TStopWordsList::Run() {
        try {
            std::shared_ptr<std::set<TString>> words = LoadWords();
            std::shared_ptr<std::regex> subwords = LoadSubwords();

            StopWords_.Set(words);
            StopSubwords_.Set(std::move(subwords));

            TLog::Info("Blackbox: StopWordsList: loaded stop words list: <%lu> words (<%lu> default), <%d> subwords",
                       words->size(),
                       DefaultStopWords_.size(),
                       SubwordsCount_);
        } catch (const std::exception& e) {
            TLog::Warning("Blackbox: StopWordsList: failed to refresh stop words list. <%s>", e.what());
        }
    }

}
