#include <util/draft/date.h>
#include <util/digest/fnv.h>
#include <util/generic/hash_set.h>
#include <util/generic/set.h>
#include <util/generic/size_literals.h>
#include <util/string/join.h>
#include <util/string/printf.h>
#include <library/cpp/string_utils/url/url.h>

#include <library/cpp/getopt/modchooser.h>
#include <library/cpp/getopt/last_getopt.h>

#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>

#include <wmconsole/version3/wmcutil/log.h>

#include <wmconsole/version3/processors/seocheck/tables.pb.h>

#include "config.h"
#include "searcher.h"
#include "text_processor.h"

using namespace NJupiter;

namespace NWebmaster {

struct TMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NProto::TParsed>> {
    Y_SAVELOAD_JOB(Searcher, TableConfig)

    TMapper() = default;
    TMapper(const TSearcher<ERuleSource> &searcher, const THashMap<ui32, TString> &tableConfig)
        : Searcher(searcher)
        , TableConfig(tableConfig)
    {
    }

public:
    void Do(TReader *input, TWriter *output) override {
        const ui32 TABLENO_SNAPSHOT = 0;
        const ui32 TABLENO_HITS     = 1;
        TTextProcessor textProcessor;
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            if (NYTUtils::IsNodeNull(row["text"])) {
                continue;
            }

            TSearcher<ERuleSource>::TRules hits;
            Searcher.Search(row["text"].AsString(), hits);
            if (hits.empty()) {
                continue;
            }

            NProto::TParsed dstMsg;
            dstMsg.SetTextTokens(textProcessor.Convert(row["text"].AsString()));
            dstMsg.SetFullName(row["full_name"].AsString());
            dstMsg.SetCandidateId(row["candidate_id"].AsInt64());
            dstMsg.SetAttachmentId(row["attachment_id"].AsInt64());
            dstMsg.SetMDSLink(row["mds"].AsString());
            dstMsg.SetMDSMimeType(row["mime_type"].AsString());
            if (!row["uploader"].IsNull()) {
                dstMsg.SetUploader(row["uploader"].AsString());
            }
            dstMsg.SetSourceTable(TableConfig.at(input->GetTableIndex()));

            TDeque<TString> resps;
            for (const auto &node : row["responsibles"].AsList()) {
                resps.push_back(node.AsString());
            }
            dstMsg.SetResponsibles(JoinSeq(",", resps));

            THashSet<TString> hitsTotal;
            TDeque<TString> hitsWho, hitsWhat, hitsWhere, hitsHow;
            for (auto &rule : hits) {
                if (rule.second == E_RULE_WHO) {
                    hitsWho.push_back(rule.first);
                } else if (rule.second == E_RULE_WHAT) {
                    hitsWhat.push_back(rule.first);
                } else if (rule.second == E_RULE_WHERE) {
                    hitsWhere.push_back(rule.first);
                } else if (rule.second == E_RULE_HOW) {
                    hitsHow.push_back(rule.first);
                }
                hitsTotal.insert(rule.first);
            }

            dstMsg.SetHitsHow(JoinSeq(",", hitsHow));
            dstMsg.SetHitsWhat(JoinSeq(",", hitsWhat));
            dstMsg.SetHitsWhere(JoinSeq(",", hitsWhere));
            dstMsg.SetHitsWho(JoinSeq(",", hitsWho));
            dstMsg.SetHitsTotal(hitsTotal.size());
            output->AddRow(dstMsg, TABLENO_SNAPSHOT);
            if (hitsTotal.size() > 1) {
                output->AddRow(dstMsg, TABLENO_HITS);
            }
        }
    }
public:
    TSearcher<ERuleSource> Searcher;
    THashMap<ui32, TString> TableConfig;
};

REGISTER_MAPPER(TMapper)

void UpdateConfig(const TString &filePath, const TString &ytPath, ERuleSource &src) {
    NYT::IClientPtr client = NYT::CreateClient(TConfig::CInstance().MR_SERVER_HOST);
    NYT::ITransactionPtr tx = client->StartTransaction();
    auto writer = TTable<NProto::TSearchRule>(tx, ytPath).GetWriter();
    TSearcher<ERuleSource> searcher;
    NProto::TSearchRule dstMsg;
    TString rule;
    TFileInput input(filePath);
    THashSet<TString> usedRules;
    while (input.ReadLine(rule)) {
        if (usedRules.contains(rule)) {
            continue;
        }
        TDeque<TSearcher<ERuleSource>::TKey> appliedKeys;
        searcher.AddRule(rule, src, appliedKeys);
        TDeque<TString> keys;
        for (const auto &key : appliedKeys) {
            keys.push_back(key.first);
        }
        dstMsg.SetRule(rule);
        dstMsg.SetKeys(JoinSeq(" | ", keys));
        dstMsg.SetTag(src);
        writer->AddRow(dstMsg);
        usedRules.insert(rule);
    }

    writer->Finish();
    tx->Commit();
}

int UpdateConfig(int argc, const char **argv) {
    TString configType;
    TString filePath;
    TString ytPath;

    NLastGetopt::TOpts opts;

    opts
        .AddLongOption("config", "who|what|how|where")
        .StoreResult(&configType)
        .Required()
    ;

    opts
        .AddLongOption("file")
        .StoreResult(&filePath)
        .Required()
    ;

    opts
        .AddLongOption("yt-path")
        .StoreResult(&ytPath)
    ;

    NLastGetopt::TOptsParseResult(&opts, argc, argv);

    ERuleSource src;
    if (configType == "who") {
        src = E_RULE_WHO;
    } else if (configType == "what") {
        src = E_RULE_WHAT;
    } else if (configType == "how") {
        src = E_RULE_HOW;
    } else if (configType == "where") {
        src = E_RULE_WHERE;
    } else {
        ythrow yexception() << "unknown config type";
    }

    if (ytPath.empty()) {
        if (src == E_RULE_WHO) {
            ytPath = TConfig::CInstance().TABLE_SEOCHECK_CONFIG_WHO;
        } else if (src == E_RULE_WHAT) {
            ytPath = TConfig::CInstance().TABLE_SEOCHECK_CONFIG_WHAT;
        } else if (src == E_RULE_HOW) {
            ytPath = TConfig::CInstance().TABLE_SEOCHECK_CONFIG_HOW;
        } else if (src == E_RULE_WHERE) {
            ytPath = TConfig::CInstance().TABLE_SEOCHECK_CONFIG_WHERE;
        }
    }

    LOG_INFO("update config, %s: %s", configType.c_str(), ytPath.c_str());
    UpdateConfig(filePath, ytPath, src);
    LOG_INFO("update config, %s: %s - done", configType.c_str(), ytPath.c_str());
    return 0;
}

void LoadSearcherRules(NYT::IClientBasePtr client, TSearcher<ERuleSource> &searcher) {
    TDeque<TString> ruleTables = {
        TConfig::CInstance().TABLE_SEOCHECK_CONFIG_WHO,
        TConfig::CInstance().TABLE_SEOCHECK_CONFIG_WHAT,
        TConfig::CInstance().TABLE_SEOCHECK_CONFIG_HOW,
        TConfig::CInstance().TABLE_SEOCHECK_CONFIG_WHERE,
    };

    for (const TString &table : ruleTables) {
        auto reader = TTable<NProto::TSearchRule>(client, table).GetReader();
        for (; reader->IsValid(); reader->Next()) {
            const NProto::TSearchRule &row = reader->GetRow();
            searcher.AddRule(row.GetRule(), static_cast<ERuleSource>(row.GetTag()));
        }
    }
}

void CreateHitsUpdate(NYT::IClientBasePtr tx, const TString &hitsIntmTable, const TString &dateStrNow) {
    auto hitsReader = TTable<NProto::TParsed>(tx, TConfig::CInstance().TABLE_SEOCHECK_PROCESSED_HITS)
        .PreCreate()
        .GetReader()
    ;

    using THitsKey = std::pair<i64, i64>;
    THashSet<THitsKey> processedHits;
    for (; hitsReader->IsValid(); hitsReader->Next()) {
        const auto &row = hitsReader->GetRow();
        processedHits.emplace(row.GetCandidateId(), row.GetAttachmentId());
    }

    int hitsUpdated = 0;
    auto hitsUpdateTable = TTable<NProto::TParsed>(
        tx, NYTUtils::JoinPath(TConfig::CInstance().TABLE_SEOCHECK_UPDATES_ROOT, dateStrNow)
    );

    auto hitsWriter = hitsUpdateTable.GetWriter();
    hitsReader = TTable<NProto::TParsed>(tx, hitsIntmTable).GetReader();
    for (; hitsReader->IsValid(); hitsReader->Next()) {
        const auto &row = hitsReader->GetRow();
        if (!processedHits.contains(THitsKey(row.GetCandidateId(), row.GetAttachmentId()))) {
            hitsWriter->AddRow(row);
            hitsUpdated++;
        }
    }

    hitsWriter->Finish();
    if (hitsUpdated == 0) {
        hitsUpdateTable.Drop();
    }
}

void LoadProcessedTables(NYT::IClientBasePtr tx, TMap<TString, TString> &processedTables) {
    auto reader = TTable<NProto::TProcessedTable>(tx, TConfig::CInstance().TABLE_SEOCHECK_PROCESSED_TABLES)
        .PreCreate()
        .GetReader()
    ;
    for (; reader->IsValid(); reader->Next()) {
        processedTables[reader->GetRow().GetTableName()] = reader->GetRow().GetTimestamp();
    }
}

void SaveProcessedTables(NYT::IClientBasePtr tx, const TMap<TString, TString> &processedTables) {
    auto writer = TTable<NProto::TProcessedTable>(tx, TConfig::CInstance().TABLE_SEOCHECK_PROCESSED_TABLES)
        .AsSortedOutput({"TableName"})
        .GetWriter()
    ;

    NProto::TProcessedTable dstMsg;
    for (const auto &obj : processedTables) {
        dstMsg.SetTableName(obj.first);
        dstMsg.SetTimestamp(obj.second);
        writer->AddRow(dstMsg);
    }
    writer->Finish();
}

int SeoCheck(int, const char **) {
    NYT::IClientPtr client = NYT::CreateClient(TConfig::CInstance().MR_SERVER_HOST);

    NYTUtils::CreatePath(client, TConfig::CInstance().TABLE_SEOCHECK_PROCESSED_ROOT);
    NYTUtils::CreatePath(client, TConfig::CInstance().TABLE_SEOCHECK_UPDATES_ROOT);

    TSearcher<ERuleSource> searcher;
    LoadSearcherRules(client, searcher);

    NYT::ITransactionPtr tx = client->StartTransaction();

    TMap<TString, TString> processedTables;
    LoadProcessedTables(tx, processedTables);

    const TString dateStrNow = Now().ToStringUpToSeconds();
    THashMap<ui32, TString> tableConfig;
    for (const auto &table : tx->List(TConfig::CInstance().TABLE_SOURCE_FEMIDA_ROOT)) {
        const TString tableName = table.AsString();
        if (!processedTables.contains(tableName)) {
            LOG_INFO("seocheck, input %s", tableName.c_str());
            tableConfig[tableConfig.size()] = tableName;
            processedTables[tableName] = dateStrNow;
        }

        if (tableConfig.size() == 30) {
            break;
        }
    }

    TMapCmd<TMapper> cmd(tx, new TMapper(searcher, tableConfig));
    for (const auto &obj : tableConfig) {
        cmd.Input<NYT::TNode>(NYTUtils::JoinPath(TConfig::CInstance().TABLE_SOURCE_FEMIDA_ROOT, obj.second));
    }

    if (!tableConfig.empty()) {
        const TString TABLE_SEOCHECK_PROCESSED_SNAPSHOT_INTM = TConfig::CInstance().TABLE_SEOCHECK_PROCESSED_SNAPSHOT + ".intm";
        const TString TABLE_SEOCHECK_PROCESSED_HITS_INTM = TConfig::CInstance().TABLE_SEOCHECK_PROCESSED_HITS + ".intm";

        LOG_INFO("seocheck, output %s", TABLE_SEOCHECK_PROCESSED_SNAPSHOT_INTM.c_str());
        LOG_INFO("seocheck, output %s", TABLE_SEOCHECK_PROCESSED_HITS_INTM.c_str());

        cmd
            .Output(TTable<NProto::TParsed>(tx, TABLE_SEOCHECK_PROCESSED_SNAPSHOT_INTM))
            .Output(TTable<NProto::TParsed>(tx, TABLE_SEOCHECK_PROCESSED_HITS_INTM))
            .Do()
        ;

        TSortCmd<NProto::TParsed>(tx)
            .Input(TTable<NProto::TParsed>(tx, TABLE_SEOCHECK_PROCESSED_SNAPSHOT_INTM))
            .Input(TTable<NProto::TParsed>(tx, TConfig::CInstance().TABLE_SEOCHECK_PROCESSED_SNAPSHOT))
            .Output(TTable<NProto::TParsed>(tx, TConfig::CInstance().TABLE_SEOCHECK_PROCESSED_SNAPSHOT))
            .By({"CandidateId", "AttachmentId"})
            .Do()
        ;

        tx->Remove(TABLE_SEOCHECK_PROCESSED_SNAPSHOT_INTM);

        SaveProcessedTables(tx, processedTables);
        CreateHitsUpdate(tx, TABLE_SEOCHECK_PROCESSED_HITS_INTM, dateStrNow);

        TSortCmd<NProto::TParsed>(tx)
            .Input(TTable<NProto::TParsed>(tx, TABLE_SEOCHECK_PROCESSED_HITS_INTM))
            .Input(TTable<NProto::TParsed>(tx, TConfig::CInstance().TABLE_SEOCHECK_PROCESSED_HITS))
            .Output(TTable<NProto::TParsed>(tx, TConfig::CInstance().TABLE_SEOCHECK_PROCESSED_HITS))
            .By({"CandidateId", "AttachmentId"})
            .Do()
        ;

        tx->Remove(TABLE_SEOCHECK_PROCESSED_HITS_INTM);
    }

    tx->Commit();
    LOG_INFO("seocheck, done");

    return 0;
}

} //namespace NWebmaster

int main(int argc, const char **argv) {
    NYT::Initialize(argc, argv);
    using namespace NWebmaster;

    TModChooser modChooser;
    modChooser.AddMode("UpdateConfig", UpdateConfig, "Update tokens config");
    modChooser.AddMode("SeoCheck", SeoCheck, "Do SEO check");
    return modChooser.Run(argc, argv);
}
