#include <util/datetime/cputimer.h>
#include <util/generic/vector.h>
#include <util/generic/size_literals.h>
#include <util/generic/ymath.h>
#include <util/thread/pool.h>

#include <mapreduce/yt/interface/client.h>
#include <mapreduce/yt/interface/protos/yamr.pb.h>

#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/processors/indexing/sitetree/protos/searchbase.pb.h>
#include <wmconsole/version3/protos/common.pb.h>
#include <wmconsole/version3/protos/exported.pb.h>
#include <wmconsole/version3/wmcutil/http_client.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/regex.h>
#include <wmconsole/version3/wmcutil/yt/misc.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>

#include "config.h"
#include "monitor.h"
#include "monsters.h"
#include "reduce_build_trees.h"
#include "reduce_urls.h"
#include "task_sitetree.h"

namespace NWebmaster {

using namespace NJupiter;

bool IsJupiterSrcRowCountGood(const NYTUtils::TTableInfo &info, size_t goodRowCount) try {
    if (info.RecordCount < goodRowCount) {
        ythrow yexception() << "row count " << info.RecordCount << " is below threshold " << goodRowCount;
    }

    return true;
} catch(yexception &e) {
    LOG_ERROR("jupiter source sanity check (row count): %s", e.what());
    return false;
}

bool IsJupiterSrcModifyTimeGood(const NYTUtils::TTableInfo &info) try {
    size_t DAYS_THRESHOLD = 3 * 7;
    size_t age = ((Now() - TInstant::Seconds(info.Time)).Hours() / 24);

    if (age > DAYS_THRESHOLD) {
        ythrow yexception() << "age " << age;
    }

    return true;
} catch(yexception &e) {
    LOG_ERROR("jupiter source sanity check (table age): %s", e.what());
    return false;
}

bool IsJupiterSrcGood(NYT::IClientBasePtr client, const TString &jupiterTable, size_t goodRowCount) {
    NYTUtils::TTableInfo info;
    if (!NYTUtils::GetTableInfo(client, jupiterTable, info)) {
        LOG_ERROR("unable to get table info %s", jupiterTable.data());
    }

    return IsJupiterSrcRowCountGood(info, goodRowCount)
        && IsJupiterSrcModifyTimeGood(info);
}

void UpdateMonitor(NYT::IClientBasePtr client, i64 totalDocsInSearch) {
    const auto &cfg = TConfig::CInstance();
    NYTUtils::TTableInfo urls, hosts;
    NYTUtils::GetTableInfo(client, cfg.TABLE_SOURCE_WEBMASTER_HOSTS, hosts);
    MonitorPushSitetrees(cfg.MONITOR_PERFORMANCE_SUFFIX, urls.RecordCount, totalDocsInSearch, hosts.RecordCount);
}

void LoadMonsterHosts(NYT::IClientBasePtr client, THashSet<TString> &monsterHosts) {
    auto reader = client->CreateTableReader<NYT::TYamr>(TConfig::CInstance().TABLE_SITETREE_HOSTS_MONSTERS);
    for (; reader->IsValid(); reader->Next()) {
        const NYT::TYamr &row = reader->GetRow();
        monsterHosts.insert(row.GetKey());
        LOG_INFO("sitetree, monster host [%s]", row.GetKey().c_str());
    }
}

static NYT::TRichYPath DebugPath(const TString &table) {
    NYT::TRichYPath path(table);
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("http://khaliullin.info"))));
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://auto.ru"))));
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://lenta.ru"))));
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("http://01center.ru"))));
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://100realt.ru"))));
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://www.avito.ru"))));
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("http://nefel.trgf.forcaboli1987.mhsite.us"))));
    return NYT::TRichYPath(path);
}

int TaskSiteTrees(int, const char **) {
    const auto &cfg = TConfig::CInstance();

    NYT::IClientPtr client = NYT::CreateClient(cfg.MR_SERVER_HOST);

    const TString jupiterAcceptanceTable    = cfg.TABLE_SEARCH_STORED_ACCEPTANCE;
    const TString jupiterAcceptanceSource   = GetYtAttr(client, jupiterAcceptanceTable, TAttrName::AcceptanceSource).AsString();
    const time_t jupiterAcceptanceTs        = GetJupiterTsTZFromPath(jupiterAcceptanceSource);

    const TString outputProcessedTmp        = NYTUtils::JoinPath(cfg.TABLE_TEMP_ROOT, "sitetree", "sitetree-processed");
    const TString outputProcessedMonsterTmp = NYTUtils::JoinPath(cfg.TABLE_TEMP_ROOT, "sitetree", "monster-sitetree-processed");
    const TString reducedUrlsTable          = cfg.TABLE_SITETREE_REDUCED_URLS;
    const TString reducedUrlsMonsterTable   = cfg.TABLE_SITETREE_REDUCED_URLS_MONSTER;
    const TString destinationTable          = cfg.TABLE_SITETREE_READY_PREFIX + cfg.GetRunTimestamp();

    try {
        if (GetYtAttr(client, cfg.TABLE_SITETREE_ROOT, TAttrName::AcceptanceSource).AsString() == jupiterAcceptanceSource) {
            LOG_INFO("sitetree, acceptance source is already processed: %s", jupiterAcceptanceSource.data());
            return 0;
        }
    } catch (yexception &e) {
        LOG_WARN("sitetree, %s", e.what());
    }

    LOG_INFO("sitetree, acceptance source: %s", jupiterAcceptanceSource.data());

    if (!IsJupiterSrcGood(client, jupiterAcceptanceTable, cfg.TABLE_SOURCE_JUPITER_ROW_COUNT)) {
        return 1;
    }

    THashSet<TString> webmasterHosts;
    if (!NYTUtils::LoadWebmastersHosts(client, cfg.TABLE_SOURCE_WEBMASTER_HOSTS, webmasterHosts, cfg.TABLE_SOURCE_WEBMASTER_HOSTS_ROW_COUNT)) {
        return 1;
    }

    THashSet<TString> monsterHosts;
    LoadMonsterHosts(client, monsterHosts);

    NYT::ITransactionPtr tx = client->StartTransaction();

    LOG_INFO("sitetree, uniq and reduce urls");
    LOG_INFO("sitetree, input %s", cfg.TABLE_SOURCE_WEBMASTER_HOSTS.c_str());
    LOG_INFO("sitetree, input %s", cfg.TABLE_SITETREE_USER_SETTINGS.c_str());
    LOG_INFO("sitetree, input %s", jupiterAcceptanceTable.c_str());
    LOG_INFO("sitetree, input %s", cfg.TABLE_SEARCH_DIFF.c_str());
    LOG_INFO("sitetree, input %s", cfg.TABLE_TURBO_PAGES.c_str());
    LOG_INFO("sitetree, output %s", reducedUrlsTable.c_str());
    LOG_INFO("sitetree, output %s", reducedUrlsMonsterTable.c_str());

    TReduceCmd<TReduceUrls>(tx, new TReduceUrls(monsterHosts, webmasterHosts))
        .Input(TTable<NProto::TWebmasterHost>(tx, DebugPath(cfg.TABLE_SOURCE_WEBMASTER_HOSTS)), WebmasterHostInputTag)
        .Input(TTable<NProto::TUserSettings>(tx, DebugPath(cfg.TABLE_SITETREE_USER_SETTINGS)), UserSettingsInputTag)
        .Input(TTable<NJupiter::TAcceptanceUrlForWebMasterRecord>(tx, DebugPath(jupiterAcceptanceTable)), SearchBaseInputTag)
        .Input(TTable<NProto::TSearchBaseDiffRecord>(tx, DebugPath(cfg.TABLE_SEARCH_DIFF)), SearchBaseDiffInputTag)
        .Input(TTable<NProto::TTurboPageInfo>(tx, DebugPath(cfg.TABLE_TURBO_PAGES)), TurboPageInputTag)
        //.Input(TTable<NYT::TYamr>(tx, cfg.TABLE_SOURCE_TEST_HOSTS))
        .Output(TTable<NProto::TPreparedUrl>(tx, reducedUrlsTable), PreparedUrlOutputTag)
        .Output(TTable<NProto::TPreparedUrl>(tx, reducedUrlsMonsterTable), PreparedUrlOutputTag)
        .MemoryLimit(4_GBs)
        .ReduceBy({"Host", "Path"})
        .Do()
    ;

    LOG_INFO("sitetree, uniq and reduce urls - done");
    LOG_INFO("sitetree, building trees");

    DoParallel(
        TCombineReduceCmd<TReduceBuildTrees, TReduceBuildTrees>(tx, nullptr, new TReduceBuildTrees(jupiterAcceptanceTs, jupiterAcceptanceTs))
            .Input(TTable<NProto::TPreparedUrl>(tx, reducedUrlsTable))
            .Output(TTable<NYT::TYamr>(tx, outputProcessedTmp))
            .ReducerMemoryLimit(10_GBs)
            .ReduceBy({"Host"})
            .SortBy({"Host", "Path"}),
        TMapReduceCmd<TMapMonsterSitetree, TReduceMonsterSitetree>(tx, new TMapMonsterSitetree, new TReduceMonsterSitetree(jupiterAcceptanceTs, jupiterAcceptanceTs))
            .Input(TTable<NProto::TPreparedUrl>(tx, reducedUrlsMonsterTable))
            .Output(TTable<NYT::TYamr>(tx, outputProcessedMonsterTmp))
            .ReduceBy({"key"})
            .SortBy({"key", "subkey"})
    );

    TSortCmd<NYT::TYamr>(tx)
        .Input(TTable<NYT::TYamr>(tx, outputProcessedTmp))
        .Input(TTable<NYT::TYamr>(tx, outputProcessedMonsterTmp))
        .Output(TTable<NYT::TYamr>(tx, destinationTable))
        .By({"key", "subkey"})
        .Do()
    ;

    SetYtAttr(tx, cfg.TABLE_SITETREE_ROOT, TAttrName::AcceptanceSource, jupiterAcceptanceSource);
    SetYtAttr(tx, destinationTable, TAttrName::AcceptanceSource, jupiterAcceptanceSource);

    tx->Remove(reducedUrlsTable);
    tx->Remove(reducedUrlsMonsterTable);
    tx->Remove(outputProcessedTmp);
    tx->Remove(outputProcessedMonsterTmp);

    LOG_INFO("sitetree, building trees - done");

    tx->Commit();

    return 0;
}

} //namespace NWebmaster
