#include <util/datetime/base.h>
#include <util/digest/fnv.h>
#include <util/generic/size_literals.h>
#include <util/string/cast.h>
#include <util/string/escape.h>
#include <util/string/split.h>
#include <util/string/vector.h>

#include <library/cpp/charset/doccodes.h>
#include <library/cpp/mime/types/mime.h>

#include <mapreduce/lib/all.h>

#include <robot/jupiter/protos/acceptance.pb.h>
#include <robot/jupiter/protos/export.pb.h>
#include <robot/jupiter/protos/external/host_mirror.pb.h>
#include <robot/library/yt/static/command.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/processors/indexing/hostinfo/conf/config.h>
#include <wmconsole/version3/processors/indexing/hostinfo/protos/hostinfo.pb.h>

#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>

#include "task_host_statistics.h"

namespace NWebmaster {

using namespace NJupiter;

namespace {
const char *F_HOST = "Host";
const char *F_MAIN_HOST = "MainHost";
const char *F_HOST_WM_REGIONS = "host";
const char *F_REGION_WM_REGIONS = "region";

TInputTag <NProto::THostRegions> HostRegionsInputTag(1);
TInputTag <NJupiter::THostMirror> HostMirrorsInputTag(2);

TOutputTag <NProto::THostRegions> HostRegionsOutputTag(10);
}

using namespace NJupiter;

struct TConvertWebmasterRegions : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NProto::THostRegions>> {
public:
    void Do(TReader *input, TWriter *output) override {
        TString host, path;
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode row = input->GetRow();
            if (NUtils::SplitUrl(row[F_HOST_WM_REGIONS].AsString(), host, path)) {
                NProto::THostRegions result;
                result.SetMainHost(host);
                result.AddWebmasterRegions(row[F_REGION_WM_REGIONS].AsInt64());
                output->AddRow(result);
            }
        }
    }
};
REGISTER_MAPPER(TConvertWebmasterRegions)

struct TMergeHostRegions : public TTaggedReducer {
public:
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        if (!reader.IsCurrentTable(HostRegionsInputTag)) {
            reader.SkipAllRows();
            return;
        }
        NProto::THostRegions hostRegions = reader.GetRow(HostRegionsInputTag);
        reader.Next();

        for (const NProto::THostRegions& row : reader.GetRows(HostRegionsInputTag)) {
            hostRegions.MutableDictionaryRegions()->MergeFrom(row.GetDictionaryRegions());
            hostRegions.MutableWebmasterRegions()->MergeFrom(row.GetWebmasterRegions());
        }

        // propagate for all mirrors
        bool hasMainMirror = false;
        for (const NJupiter::THostMirror& hostMirror : reader.GetRows(HostMirrorsInputTag)) {
            hasMainMirror |= hostMirror.GetMainHost() == hostMirror.GetHost();
            hostRegions.SetHost(hostMirror.GetHost());
            writer.AddRow(hostRegions, HostRegionsOutputTag);
        }
        if (!hasMainMirror) {
            hostRegions.SetHost(hostRegions.GetMainHost());
            hostRegions.ClearMainHost();
            writer.AddRow(hostRegions, HostRegionsOutputTag);
        }
    }
};
REGISTER_REDUCER(TMergeHostRegions)

struct TClearDuplicateHostRegions : public TTaggedReducer {
public:
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        NProto::THostRegions hostRegions;
        for (; reader.IsValid(); reader.Next()) {
            hostRegions = reader.GetRow(HostRegionsInputTag);
            if (hostRegions.HasMainHost()) {
                break; // explicit mirror is preferable
            }
        }
        writer.AddRow(hostRegions, HostRegionsOutputTag);
    }
};
REGISTER_REDUCER(TClearDuplicateHostRegions)

int TaskUpdateHostRegions(int argc, const char **argv) {
    NYT::Initialize(argc, argv);
    const auto &cfg = NHostInfo::TConfig::CInstance();
    NYT::IClientPtr client = NYT::CreateClient(cfg.MR_SERVER_HOST_JUPITER);
    NYT::IClientPtr dictionaryClient = NYT::CreateClient(cfg.MR_SERVER_HOST_ALTAY);

    TYtModificationTimeTrigger trigger(cfg.TABLE_EXPORT_HOST_REGIONS);
    if (!(trigger.NeedUpdate(dictionaryClient, client, "dictionary_regions", cfg.FILE_SOURCE_DICTIONARY_REGIONS) ||
          trigger.NeedUpdate(client, "webmaster_regions", cfg.TABLE_SOURCE_WEBMASTER_REGIONS))) {
        LOG_INFO("No update needed");
        return 0;
    }

    NYT::ITransactionPtr tx = client->StartTransaction();
    // preparing regions from dictionary (directory)
    auto dictionaryReader = dictionaryClient->CreateFileReader(cfg.FILE_SOURCE_DICTIONARY_REGIONS);
    const auto tableSchema = NYT::CreateTableSchema<NProto::THostRegions>();
    TTable<NProto::THostRegions> preparedDictionaryRegions(tx,
        NYT::TRichYPath(NYTUtils::JoinPath(cfg.TABLE_TEMP_ROOT, "dictionary-regions")).Schema(tableSchema));
    TTable<NProto::THostRegions> preparedWebmasterRegions(tx,
        NYT::TRichYPath(NYTUtils::JoinPath(cfg.TABLE_TEMP_ROOT, "webmaster-regions")).Schema(tableSchema));
    TTable<NJupiter::THostMirror> mirrorsTable(tx, GetJupiterMirrorsInProdTable(tx));
    auto dictionaryWriter = preparedDictionaryRegions.GetWriter();
    LOG_INFO("Processing dictionary regions");
    try {
        for (TString line = dictionaryReader->ReadLine(); true; line = dictionaryReader->ReadLine()) {
            const TVector <TString> parts = SplitString(line, " ");
            if (parts.size() < 2) {
                continue;
            }
            TString host, path, query;
            if (!NUtils::SplitUrl(parts[0], host, path, query)) {
                continue;
            }
            if (path != "/") {
                continue;
            }
            NProto::THostRegions hostRegions;
            hostRegions.SetMainHost(host);
            for (size_t i = 1; i < parts.size(); i++) {
                hostRegions.AddDictionaryRegions(FromString<i64>(parts[i]));
            }
            dictionaryWriter->AddRow(hostRegions);
        }
    } catch (yexception &ignored) {
    }
    dictionaryWriter->Finish();
    // webmaster regions
    LOG_INFO("Processing webmaster regions");
    TMapCmd<TConvertWebmasterRegions>(tx)
        .Input<NYT::TNode>(cfg.TABLE_SOURCE_WEBMASTER_REGIONS)
        .Output<NProto::THostRegions>(preparedWebmasterRegions)
        .Do();
    // sort and reduce
    DoParallel(
        TSortCmd<NProto::THostRegions>(tx, preparedDictionaryRegions).By(F_MAIN_HOST),
        TSortCmd<NProto::THostRegions>(tx, preparedWebmasterRegions).By(F_MAIN_HOST)
    );
    TTable<NProto::THostRegions> result(tx, NYT::TRichYPath(cfg.TABLE_EXPORT_HOST_REGIONS).Schema(tableSchema));
    LOG_INFO("Merging regions");
    // join data
    TReduceCmd<TMergeHostRegions>(tx)
        .Input(preparedDictionaryRegions, HostRegionsInputTag)
        .Input(preparedWebmasterRegions, HostRegionsInputTag)
        .Input(mirrorsTable, HostMirrorsInputTag)
        .Output(result, HostRegionsOutputTag)
        .ReduceBy(F_MAIN_HOST)
        .Do();
    TSortCmd<NProto::THostRegions>(tx, result).By(F_HOST).Do();
    LOG_INFO("Clearing duplicate host regions");
    TReduceCmd<TClearDuplicateHostRegions>(tx)
        .Input(result, HostRegionsInputTag)
        .Output(result, HostRegionsOutputTag)
        .ReduceBy(F_HOST)
        .Do();
    TSortCmd<NProto::THostRegions>(tx, result).By(F_HOST).Do();

    trigger.Update(dictionaryClient, tx, "dictionary_regions", cfg.FILE_SOURCE_DICTIONARY_REGIONS);
    trigger.Update(tx, "webmaster_regions", cfg.TABLE_SOURCE_WEBMASTER_REGIONS);
    tx->Commit();
    LOG_INFO("Finished successfully");
    return 0;
}
} // namespace NWebmaster
