#include <util/draft/date.h>
#include <util/string/printf.h>
#include <util/generic/size_literals.h>

#include <library/cpp/getopt/last_getopt.h>
#include <library/cpp/string_utils/url/url.h>

#include <robot/library/yt/static/command.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/processors/acceptance/conf/config.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/yt/misc.h>

#include "jupiter_acceptance.h"

namespace NWebmaster::NAcceptance {

using namespace NJupiter;

static const char *F_HOST = "Host";
static const char *F_CHANGED_FIELD = "ChangedField";
static const char *F_COUNT = "Count";

struct TJoinChangedFieldsStatisticsReducer
        : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {

public:

    void Do(TReader *input, TWriter *output) override {
        long cnt = 0;
        TString changedField = input->GetRow()[F_CHANGED_FIELD].AsString();
        for (const auto &cursor : *input) {
            cnt += cursor.GetRow()[F_COUNT].AsInt64();
        }

        output->AddRow(NYT::TNode()
                               (F_CHANGED_FIELD, changedField)
                               (F_COUNT, cnt));
    }
};

REGISTER_REDUCER(TJoinChangedFieldsStatisticsReducer)

struct TJupiterAcceptanceComputeChangesReducer
        : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
private:
    const ui32 TABLENO_CURRENT_TABLE = 0;
    const ui32 TABLENO_ACCEPTANCE_TABLE = 1;
public:
    Y_SAVELOAD_JOB(FieldNames, WebmasterHosts)

    TJupiterAcceptanceComputeChangesReducer() = default;

    TJupiterAcceptanceComputeChangesReducer(const TVector<std::pair<TString, NYT::EValueType>> &fieldNames,
                                            const THashSet<TString> &webmasterHosts)
            : FieldNames(fieldNames), WebmasterHosts(webmasterHosts) {
    }

    void Do(TReader *input, TWriter *output) override {
        Y_UNUSED(output);
        static NYT::TNode nullNode = NYT::TNode::CreateEntity();
        NYT::TNode prevState;
        NYT::TNode newState;
        TString host = input->GetRow()[F_HOST].AsString();
        if (!WebmasterHosts.contains(host)) {
            return;
        }

        for (const auto &cursor : *input) {
            if (cursor.GetTableIndex() == TABLENO_CURRENT_TABLE) {
                prevState = cursor.GetRow();
            } else if (cursor.GetTableIndex() == TABLENO_ACCEPTANCE_TABLE) {
                newState = cursor.GetRow();
            } else {
                ythrow yexception() << "expected reduce on 2 tables with index" << cursor.GetTableIndex();
            }
        }

        for (const auto &pair: FieldNames) {
            const auto &fieldName = pair.first;
            const NYT::TNode &prevValue =
                    prevState.IsMap() && prevState.HasKey(fieldName) ? prevState[fieldName] : nullNode;
            const NYT::TNode &newValue =
                    newState.IsMap() && newState.HasKey(fieldName) ? newState[fieldName] : nullNode;
            if (prevValue != newValue) {
                Statistics[fieldName]++;
            }
        }
    }

    void Finish(TWriter *writer) override {
        for (const auto &stats: Statistics) {
            writer->AddRow(NYT::TNode()
                                   (F_CHANGED_FIELD, stats.first)
                                   (F_COUNT, stats.second));
        }
    }

public:
    TVector<std::pair<TString, NYT::EValueType>> FieldNames;
    THashSet<TString> WebmasterHosts;
    THashMap<TString, long> Statistics;

};

REGISTER_REDUCER(TJupiterAcceptanceComputeChangesReducer)

THashMap<TString, long>
MonitorJupiterChangesShare(const NYT::IClientBasePtr &client,
                           const TString &tableName,
                           const TVector<std::pair<TString, NYT::EValueType>> &fields) {

    THashMap<TString, long> counters;
    for (const auto &node : fields) {
        counters[node.first] = 0;
    }
    auto reader = client->CreateTableReader<NYT::TNode>(tableName);
    for (const auto &cursor : *reader) {
        const NYT::TNode &row = cursor.GetRow();
        counters[row[F_CHANGED_FIELD].AsString()] += row[F_COUNT].AsInt64();
    }

    return counters;
}

static NYT::TRichYPath DebugPath(const TString &table) {
    NYT::TRichYPath path(table);
//    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://lenta.ru"))));
//    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("http://khaliullin.info"))));
//    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://petinaprokopova.rajce.idnes.cz"))));
//    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey({"https://cwetochki.ru"}))));
//    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://www.drive2.ru"))));
//    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://meshok.net"))));
//    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://petskuafor.com"))));
    return NYT::TRichYPath(path);
}

int AcceptanceJupiter(const NYT::ITransactionPtr &tx,
                      const TString &currentState,
                      const TString &priemkaState,
                      const TString &tableCurrentName,
                      const TString &tablePriemkaName,
                      const TString &outputRootName,
                      const TVector<TString> &keys, // обязательно должно быть поле Host так как фильтруется по webmaster-hosts
                      const TVector<std::pair<TString, NYT::EValueType>> &fields) {

    if (Find(keys, F_HOST) == keys.end()) {
        ythrow yexception() << "Expected key column `Host`";
    }

    const TString tableTempStatisticsName = NYTUtils::JoinPath(outputRootName, "temp_statistics");
    const TString tableStatisticsName = NYTUtils::JoinPath(outputRootName, "statistics");

    if (tx->Exists(tableStatisticsName)) {
        bool alreadyCompared = false;

        const auto &tableStatisticsPtr = tx->CreateTableReader<NYT::TNode>(tableStatisticsName);
        for (const auto &cursor: *tableStatisticsPtr) {
            alreadyCompared |= cursor.GetRow()["CurrentTableState"].AsString() == currentState
                               && cursor.GetRow()["NextTableState"].AsString() == priemkaState;
        }

        if (alreadyCompared) {
            LOG_ERROR("current state - %s and priemka state - %s already compared", currentState.c_str(),
                      priemkaState.c_str());
            LOG_ERROR("statistics info for this state you can find in - %s", tableStatisticsName.c_str());
            tx->Abort();
            return 0;
        }
    }


    NYT::TRichYPath tempStatisticsTable(tableTempStatisticsName);
    tempStatisticsTable.Schema(NYT::TTableSchema()
                                       .AddColumn(F_CHANGED_FIELD, NYT::EValueType::VT_STRING)
                                       .AddColumn(F_COUNT, NYT::EValueType::VT_INT64)
    );

    LOG_INFO("Current: %s", tableCurrentName.c_str());
    LOG_INFO("Next: %s", tablePriemkaName.c_str());
    LOG_INFO("Statistics: %s", tableStatisticsName.c_str());

    {
        LOG_INFO("Start read webmasterHosts at %s", ToString(TInstant::Now().TimeT()).c_str());
        THashSet<TString> webmasterHosts(6000000);
        if (!NYTUtils::LoadWebmastersHosts(tx, TConfig::CInstance().TABLE_WEBMATER_HOSTS, webmasterHosts, 4000000)) {
            ythrow yexception() << "there is problem with webmaster hosts table";
        }

        LOG_INFO("Start MapReduce at %s", ToString(TInstant::Now().TimeT()).c_str());
        // частично считаем статистику
        TReduceCmd<TJupiterAcceptanceComputeChangesReducer>
                (tx, new TJupiterAcceptanceComputeChangesReducer(fields, webmasterHosts))
                .Input<NYT::TNode>(DebugPath(tableCurrentName))
                .Input<NYT::TNode>(DebugPath(tablePriemkaName))
                .Output<NYT::TNode>(tempStatisticsTable)
                .MemoryLimit(4_GBs)
                .ReduceBy(keys)
                .Do();

        TSortCmd<NYT::TNode>(tx)
                .Input<NYT::TNode>(tempStatisticsTable)
                .Output<NYT::TNode>(tempStatisticsTable)
                .By(F_CHANGED_FIELD)
                .OperationWeight(TConfig::CInstance().OPERATION_WEIGHT)
                .Do();

        // обьединяем кусочки
        TReduceCmd<TJoinChangedFieldsStatisticsReducer>
                (tx)
                .Input<NYT::TNode>(tempStatisticsTable)
                .Output<NYT::TNode>(tempStatisticsTable)
                .ReduceBy(F_CHANGED_FIELD)
                .Do();
    }


    LOG_INFO("Start process statistics at %s", ToString(TInstant::Now().TimeT()).c_str());
    THashMap<TString, long> statsMap = MonitorJupiterChangesShare(tx, tableTempStatisticsName, fields);
    LOG_INFO("Finish process statistics at %s", ToString(TInstant::Now().TimeT()).c_str());

    NYT::TRichYPath statisticsTable(NYT::TRichYPath(tableStatisticsName).Append(true));

    auto statisticsTableWriter = tx->CreateTableWriter<NYT::TNode>(statisticsTable);

    tx->Remove(tableTempStatisticsName);

    NYT::TNode statisticsRow;
    statisticsRow["CurrentTableState"] = currentState;
    statisticsRow["NextTableState"] = priemkaState;
    statisticsRow["Timestamp"] = TInstant::Now().MilliSeconds();
    statisticsRow["RowsCountInAcceptanceTable"] = TTable<NYT::TNode>(tx, tablePriemkaName).GetRecordsCount();
    for (const auto &it: statsMap) {
        statisticsRow[it.first] = it.second;
    }

    statisticsTableWriter->AddRow(statisticsRow);
    statisticsTableWriter->Finish();
    TSortCmd<NYT::TNode>(tx)
            .Input<NYT::TNode>(tableStatisticsName)
            .Output<NYT::TNode>(tableStatisticsName)
            .By("Timestamp")
            .OperationWeight(TConfig::CInstance().OPERATION_WEIGHT)
            .Do();

    return 0;
}

void GetSpreadExportUrldatData(const NYT::ITransactionPtr &tx, const TString &defaultState, TString &curSpreadState,
                               TString &pathSpreadTemplate, TVector<std::pair<TString, NYT::EValueType>> &fields,
                               TString &rootPath, TVector<TString> &keys, TDeque<NYTUtils::TTableInfo> &spreadTables) {
    pathSpreadTemplate = "//home/jupiter/spread_export/%s/urldat";
    rootPath = TConfig::CInstance().TABLE_ACCEPTANCE_SPREAD_EXPORT_URLDAT_ROOT;
    keys = {"Host", "Path"};
    fields = {
            {"LastAccess",         NYT::VT_UINT64},
            {"HttpCode",           NYT::VT_UINT64},
            {"MimeType",           NYT::VT_UINT64},
            {"Encoding",           NYT::VT_INT64},
            {"RelCanonicalTarget", NYT::VT_STRING},
            {"IsRedirect",         NYT::VT_BOOLEAN},
            {"RedirTarget",        NYT::VT_STRING},
            {"FetchTime",          NYT::VT_UINT64},
            {"SourceId",           NYT::VT_UINT64},
            {"SourceName",         NYT::VT_STRING},
    };
    curSpreadState = NYTUtils::GetAttrOrDefault<TString>(tx, rootPath, TConfig::CInstance().ATTR_LAST_PROCESSED_TABLES,
                                                         defaultState);
    LoadSpreadTables(tx, GetTsTZFromJupiterState(curSpreadState) + 1, spreadTables);
}

void GetSpreadExportHostdatData(const NYT::ITransactionPtr &tx, const TString &defaultState, TString &curSpreadState,
                                TString &pathSpreadTemplate, TVector<std::pair<TString, NYT::EValueType>> &fields,
                                TString &rootPath, TVector<TString> &keys, TDeque<NYTUtils::TTableInfo> &spreadTables) {
    pathSpreadTemplate = "//home/jupiter/spread_export/%s/hostdat";
    rootPath = TConfig::CInstance().TABLE_ACCEPTANCE_SPREAD_EXPORT_HOSTDAT_ROOT;
    keys = {"Host"};
    fields = {
            {"LastAccess",           NYT::VT_UINT64},
            {"ExportTime",    NYT::VT_UINT64},
            {"Robots", NYT::VT_STRING},
    };
    curSpreadState = NYTUtils::GetAttrOrDefault<TString>(tx, rootPath,
                                                         TConfig::CInstance().ATTR_LAST_PROCESSED_TABLES,
                                                         defaultState);
    LoadHostSpreadTables(tx, GetTsTZFromJupiterState(curSpreadState) + 1, spreadTables);
}

int ProcessSpreadMode(NYT::ITransactionPtr &tx, const TString &acceptanceMode) {//REQUIRED
    TString curSpreadState;
    TString pathSpreadTemplate;
    TVector<std::pair<TString, NYT::EValueType>> fields;
    TString rootPath;
    TVector<TString> keys;
    TDeque<NYTUtils::TTableInfo> spreadTables;


    // для первого сравнения
    TString defaultState = "20210204-000543";
    if (acceptanceMode == "spread_export_urldat") {
        GetSpreadExportUrldatData(tx,
                                  defaultState,
                                  curSpreadState,
                                  pathSpreadTemplate,
                                  fields,
                                  rootPath,
                                  keys,
                                  spreadTables
        );

    } else if (acceptanceMode == "spread_export_hostdat") {
        GetSpreadExportHostdatData(tx,
                                   defaultState,
                                   curSpreadState,
                                   pathSpreadTemplate,
                                   fields,
                                   rootPath,
                                   keys,
                                   spreadTables);
    } else {
        ythrow yexception() << "UNKNOWN SPREAD MODE: " << acceptanceMode;
    }

    if (spreadTables.empty()) {
        LOG_ERROR("No table for process");
        tx->Abort();
        return 0;
    }
    NYTUtils::TTableInfo tableInfo = spreadTables.front();
    TString priemkaSpreadState = GetJupiterStateFromPath(tableInfo.Name);
    TString spreadPriemkaName = tableInfo.Name;

    TString spreadCurrentName = Sprintf(pathSpreadTemplate.c_str(), curSpreadState.c_str());

    AcceptanceJupiter(tx,
                      curSpreadState,
                      priemkaSpreadState,
                      spreadCurrentName,
                      spreadPriemkaName,
                      rootPath,
                      keys,
                      fields
    );
    NYTUtils::SetAttr(tx, rootPath, TConfig::CInstance().ATTR_LAST_PROCESSED_TABLES, priemkaSpreadState);
    tx->Commit();
    return 0;
}

void GetWebmasterSimpleData(TString &pathJupiterTemplate, TVector<std::pair<TString, NYT::EValueType>> &fields,
                            TString &rootPath, TVector<TString> &keys) {
    pathJupiterTemplate = "//home/jupiter/acceptance/%s/urls_for_webmaster_simple";
    rootPath = TConfig::CInstance().TABLE_ACCEPTANCE_URLS_FOR_WEBMASTER_SIMPLE_ROOT;
    keys = {"Host", "Path"};
    fields = {
            {"UrlStatus",             NYT::VT_INT64},
            {"BeautyUrl",             NYT::VT_STRING},
            {"LastAccess",            NYT::VT_INT64},
            {"AddTime",               NYT::VT_INT64},
            {"MainHost",              NYT::VT_STRING},
            {"MainPath",              NYT::VT_STRING},
            {"MainMirrorHost",        NYT::VT_STRING},
            {"RedirTarget",           NYT::VT_STRING},
            {"HttpCode",              NYT::VT_INT64},
            {"MimeType",              NYT::VT_INT64},
            {"RelCanonicalTarget",    NYT::VT_STRING},
            {"IsIndexed",             NYT::VT_BOOLEAN},
            {"IsFake",                NYT::VT_BOOLEAN},
            {"IsSearchable",          NYT::VT_BOOLEAN},
            {"SourceId",              NYT::VT_INT64},
            {"SourceName",            NYT::VT_STRING},
            {"WebTier",               NYT::VT_INT64},
            {"FromSitemap",           NYT::VT_BOOLEAN},
            {"LastWatchLogCounterId", NYT::VT_INT64}
    };
}

void GetJupiterContentAttrsData(TString &pathJupiterTemplate, TVector<std::pair<TString, NYT::EValueType>> &fields,
                                TString &rootPath, TVector<TString> &keys) {
    pathJupiterTemplate = "//home/jupiter/export/%s/webmaster/content_attrs";
    rootPath = TConfig::CInstance().TABLE_ACCEPTANCE_CONTENT_ATTRS_ROOT;
    keys = {"Host", "Path"};
    fields = {
            {"Title",           NYT::VT_STRING},
            {"TitleRawUTF8",    NYT::VT_STRING},
            {"MetaDescription", NYT::VT_STRING},
    };
}

void GetJupiterHosttableData(TString &pathJupiterTemplate, TVector<std::pair<TString, NYT::EValueType>> &fields,
                             TString &rootPath, TVector<TString> &keys) {
    pathJupiterTemplate = "//home/jupiter/acceptance/%s/hosttable";
    rootPath = TConfig::CInstance().TABLE_ACCEPTANCE_HOSTTABLE_ROOT;
    keys = {"Host"};
    fields = {
            {"BannedByRobotsTxtSince", NYT::VT_INT64},
            {"HostStatus",             NYT::VT_INT64},
            {"LastAccess",             NYT::VT_INT64},
            {"Robots",                 NYT::VT_STRING},
            {"RobotsHTTPCode",         NYT::VT_INT64}
    };
}

int processJupiterMode(NYT::ITransactionPtr &tx, const TString &acceptanceMode) {
    TString curJupyterState;
    TString priemkaJupyterState;
    TString error;

    if (!GetJupiterProductionState(tx, curJupyterState, error)) {
        ythrow yexception() << error;
    }

    if (!GetJupiterDessertAcceptanceState(tx, priemkaJupyterState, error)) {
        ythrow yexception() << error;
    }

    if (curJupyterState == priemkaJupyterState) {
        LOG_ERROR("Priemka and production have same states");
        tx->Abort();
        return 0;
    }

    const TString newLastComparedTables = curJupyterState + '_' + priemkaJupyterState;
    // REQUIRED
    TString pathJupiterTemplate;
    TVector<std::pair<TString, NYT::EValueType>> fields;
    TString rootPath;
    TVector<TString> keys;
    //


    if (acceptanceMode == "jupiter_hosttable") {
        GetJupiterHosttableData(pathJupiterTemplate, fields, rootPath, keys);
    } else if (acceptanceMode == "jupiter_content_attrs") {
        GetJupiterContentAttrsData(pathJupiterTemplate, fields, rootPath, keys);
    } else if (acceptanceMode == "jupiter_urls_for_webmaster_simple") {
        ythrow yexception() << "JUPYTER MODE jupiter_urls_for_webmaster_simple moved to task_basediff: " << acceptanceMode;
        GetWebmasterSimpleData(pathJupiterTemplate, fields, rootPath, keys);
    } else {
        ythrow yexception() << "UNKNOWN JUPYTER MODE: " << acceptanceMode;
    }

    const TString &jupiterCurrentName = Sprintf(pathJupiterTemplate.c_str(), curJupyterState.c_str());
    const TString &jupiterPriemkaName = Sprintf(pathJupiterTemplate.c_str(), priemkaJupyterState.c_str());
    AcceptanceJupiter(tx,
                      curJupyterState,
                      priemkaJupyterState,
                      jupiterCurrentName,
                      jupiterPriemkaName,
                      rootPath,
                      keys,
                      fields
    );
    tx->Commit();
    return 0;
}

int AcceptanceJupiter(int argc, const char **argv) {
    TString acceptanceMode;

    NLastGetopt::TOpts opts;
    opts.AddLongOption("mode", "mode for choose kind of acceptance")
            .StoreResult(&acceptanceMode)
            .Required();

    const auto parseResult = NLastGetopt::TOptsParseResult(&opts, argc, argv);
    NYT::IClientPtr client = NYT::CreateClient(TConfig::CInstance().MR_SERVER_HOST);
    NYT::ITransactionPtr tx = client->StartTransaction();

    if (acceptanceMode.StartsWith("jupiter")) {
        return processJupiterMode(tx, acceptanceMode);
    }
    if (acceptanceMode.StartsWith("spread")) {
        return ProcessSpreadMode(tx, acceptanceMode);
    }

    ythrow yexception() << "UNKNOWN MODE: " << acceptanceMode;
}
} //namespace NWebmaster
