#include <util/draft/date.h>
#include <util/digest/fnv.h>
#include <util/generic/size_literals.h>
#include <util/string/printf.h>

#include <kernel/yt/utils/yt_utils.h>
#include <kernel/search_query/search_query.h>
#include <library/cpp/getopt/modchooser.h>
#include <library/cpp/compute_graph/compute_graph.h>
#include <library/cpp/json/json_writer.h>
#include <mapreduce/yt/interface/protos/yamr.pb.h>

#include <quality/user_sessions/request_aggregate_lib/all.h>

#include <robot/jupiter/protos/export.pb.h>
#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/wmcutil/args.h>
#include <wmconsole/version3/wmcutil/owners.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>

#include <wmconsole/version3/processors/links/broken_visits/tables.pb.h>

#include "config.h"

using namespace NJupiter;

#define OPERATION_WEIGHT 1.0f

namespace NWebmaster {

const char *FORMAT = "%Y-%m-%d";

//ReduceBy key
//SortBy key, subkey
struct TParseSpyLogReducer : public NYT::IReducer<NYT::TTableReader<NYT::TYamr>, NYT::TTableWriter<NProto::TPageView>> {
    Y_SAVELOAD_JOB(BlockStatInfo)

public:
    TParseSpyLogReducer() = default;
    TParseSpyLogReducer(const TBlockStatInfo &blockStatInfo)
        : BlockStatInfo(blockStatInfo)
    {
    }

    bool SplitAndFixUrl(TString url, TString &host, TString &path) {
        const int MAX_PATH_LEN = 2048;

        if (url.size() > MAX_PATH_LEN) {
            url.resize(MAX_PATH_LEN);
        }

        SplitUrlToHostAndPath(url, host, path);
        if (host.empty()) {
            return false;
        }

        host.to_lower();
        if (host.find("://") == TString::npos) {
            host = "http://" + host;
        }

        if (path.empty()) {
            path = "/";
        }

        return true;
    }

    void WritePageView(const TString &source, const TString &uid, const NRA::TPageView &view, TWriter *output) {
        ui32 httpStatus = 0;
        TString previousUrl;

        if (view.GetHttpStatus().Defined()) {
            httpStatus = view.GetHttpStatus().GetRef();
        }

        if (view.GetPreviousUrl().Defined()) {
            previousUrl = view.GetPreviousUrl().GetRef();
        }

        if (httpStatus < 200 || httpStatus >= 600) {
            return;
        }

        if (view.GetURL().empty() || previousUrl.empty()) {
            return;
        }

        if (view.GetURL() == previousUrl) {
            return;
        }

        TString host, path;
        if (!SplitAndFixUrl(view.GetURL(), host, path)) {
            return;
        }

        TString previousHost, previousPath;
        if (!SplitAndFixUrl(previousUrl, previousHost, previousPath)) {
            return;
        }

        TString owner, previousOwner, error;
        if (!Owners.GetOwner(host, owner, error) || !Owners.GetOwner(previousHost, previousOwner, error)) {
            return;
        }

        NProto::TPageView dstMsg;
        dstMsg.SetHost(host);
        dstMsg.SetPath(path);
        dstMsg.SetPreviousHost(previousHost);
        dstMsg.SetPreviousPath(previousPath);
        dstMsg.SetSource(source);
        dstMsg.SetTimestamp(view.GetTimestamp());
        dstMsg.SetHttpStatus(httpStatus);
        dstMsg.SetUID(uid);
        dstMsg.SetInternal(owner == previousOwner);
        output->AddRow(dstMsg);
    }

    void Do(TReader *input, TWriter *output) override {
        const TString uid = input->GetRow().GetKey();

        NRA::TLogsParserParams lpParams(BlockStatInfo);
        lpParams.SetErrorHandler(new NRA::TCerrLogsParserErrorHandler(true, false));
        NRA::TEntitiesManager entManager;
        entManager.AddEntityByClass<NRA::TPageView>();
        lpParams.SetEntitiesManager(entManager);
        NRA::TLogsParser lp(lpParams);

        for (; input->IsValid(); input->Next()) {
            const auto &row = input->GetRow();
            lp.AddRec(row.GetKey(), row.GetSubkey(), row.GetValue());
        }

        lp.Join();

        NRA::TRequestsContainer rcont(lp.GetRequestsContainer());
        for (const auto &view : rcont.GetPageViews()) {
            WritePageView("VIEW", uid, *view, output);
            for (const auto &transit : view->GetTransits()) {
                WritePageView("TRANSIT_FROM", uid, *transit->GetFrom(), output);
                WritePageView("TRANSIT_TO", uid, *transit->GetTo(), output);
            }
        }
    }

public:
    TBlockStatInfo BlockStatInfo;
    TAnsipamOwnerCanonizer Owners;
};

REGISTER_REDUCER(TParseSpyLogReducer);

//ReduceBy key
//SortBy key, subkey
struct TSnapshotReducer : public NYT::IReducer<NYT::TTableReader<NProto::TPageView>, NYT::TTableWriter<NProto::TPageView>> {

public:
    void Do(TReader *input, TWriter *output) override {
        auto sampleRow = input->GetRow();
        int lastHttpStatus = 0;
        THashSet<TString> uids;
        bool existBrokenVisits = false;
        for (; input->IsValid(); input->Next()) {
            const auto &row = input->GetRow();
            if (row.GetHttpStatus() >= 400) {
                existBrokenVisits = true;
            }
            lastHttpStatus = row.GetHttpStatus();
            if (uids.size() < 3) {
                uids.insert(row.GetUID());
            }
        }
        if (lastHttpStatus < 400) {
            return;
        }
        if (!existBrokenVisits) {
            return;
        }
        if (uids.size() < 3) {
            return;
        }

        sampleRow.SetHttpStatus(lastHttpStatus);
        output->AddRow(sampleRow);
    }
};

REGISTER_REDUCER(TSnapshotReducer);

TBlockStatInfo GetBlockstatInfo(NYT::IClientBasePtr client) {
    const TString DICT_ROOT = "//statbox/statbox-dict-by-name/blockstat.dict";
    const TDate startDate = TDate(Now().TimeT());
    const TDate endDate = startDate - 180;
    for (TDate curDate = startDate; curDate > endDate; --curDate) {
        const TString dictPath = NYTUtils::JoinPath(DICT_ROOT, curDate.ToStroka(FORMAT));
        if (client->Exists(dictPath)) {
            return TBlockStatInfo(*client->CreateFileReader(dictPath).Get());
        }
    }
    ythrow yexception() << "blockstat.dict " << "not found";
}

int TaskParse(int, const char **) {
    const TConfig &cfg = TConfig::CInstance();
    NYT::IClientPtr client = NYT::CreateClient(cfg.MR_SERVER_HOST_SPYLOG);

    NYTUtils::CreatePath(client, cfg.TABLE_VISITS_ROOT);
    NYTUtils::CreatePath(client, cfg.TABLE_VISITS_SOURCE_ROOT);

    TBlockStatInfo bsi = GetBlockstatInfo(client);

    const TDate startDate(Now().TimeT());
    const TDate endDate = startDate - 7;

    NComputeGraph::TJobRunner runner(4);
    Y_UNUSED(runner);

    for (TDate curDate = startDate; curDate > endDate; --curDate) {
        const TString dateStr = curDate.ToStroka(TConfig::FORMAT);
        const TString sourceTable = NYTUtils::JoinPath(cfg.TABLE_VISITS_SOURCE_ROOT, dateStr);
        const TString spylogRoot = NYTUtils::JoinPath("//user_sessions/pub/spy_log_v2/daily", dateStr);
        if (client->Exists(sourceTable)) {
            LOG_INFO("broken_visits, source, %s is already processed", sourceTable.c_str());
            continue;
        }

        if (!client->Exists(spylogRoot)) {
            LOG_INFO("broken_visits, spy_log, %s does not exist", spylogRoot.c_str());
            continue;
        }

        runner.AddJob([=, &bsi, &client]() {
            LOG_INFO("broken_visits, spy_log, parse %s", spylogRoot.c_str());
            LOG_INFO("broken_visits, spy_log, output %s", sourceTable.c_str());
            NYT::ITransactionPtr tx = client->StartTransaction();

            TReduceCmd<TParseSpyLogReducer>(tx, new TParseSpyLogReducer(bsi))
                .Input(TTable<NYT::TYamr>(tx, NYTUtils::JoinPath(spylogRoot, "clean")))
                .Input(TTable<NYT::TYamr>(tx, NYTUtils::JoinPath(spylogRoot, "robots")))
                .Input(TTable<NYT::TYamr>(tx, NYTUtils::JoinPath(spylogRoot, "frauds")))
                .Output(TTable<NProto::TPageView>(tx, sourceTable))
                .MemoryLimit(5_GBs)
                .ReduceBy({"key"})
                .SortBy({"key", "subkey"})
                .OperationWeight(OPERATION_WEIGHT)
                .Do()
            ;

            TSortCmd<NProto::TPageView>(tx, TTable<NProto::TPageView>(tx, sourceTable))
                .By({"Host", "Path", "PreviousHost", "PreviousPath", "Timestamp"})
                .OperationWeight(OPERATION_WEIGHT)
                .Do()
            ;

            tx->Commit();
            LOG_INFO("broken_visits, spy_log, parse %s - done", spylogRoot.c_str());
        });

    }
    runner.Run();

    TDeque<NYTUtils::TTableInfo> tables;
    NYTUtils::GetTableList(client, cfg.TABLE_VISITS_SOURCE_ROOT, tables);
    LOG_INFO("broken_visits, sources %zu", tables.size());

    if (tables.size() > cfg.STORE_PERIOD_DAYS) {
        std::sort(tables.begin(), tables.end(), NYTUtils::TTableInfo::TNameGreater());
        for (auto it = tables.begin() + cfg.STORE_PERIOD_DAYS; it != tables.end(); it++) {
            LOG_INFO("broken_visits, remove %s", it->Name.c_str());
        }
    }

    return 0;
}

int TaskSnapshot(int, const char **) {
    const TConfig &cfg = TConfig::CInstance();
    NYT::IClientPtr client = NYT::CreateClient(cfg.MR_SERVER_HOST_SPYLOG);


    TDeque<NYTUtils::TTableInfo> tables;
    NYTUtils::GetTableList(client, cfg.TABLE_VISITS_SOURCE_ROOT, tables);
    if (tables.size() > cfg.STORE_PERIOD_DAYS) {
        std::sort(tables.begin(), tables.end(), NYTUtils::TTableInfo::TNameGreater());
        tables.resize(cfg.STORE_PERIOD_DAYS);
    } else if (tables.size() < cfg.STORE_PERIOD_DAYS) {
        ythrow yexception() << "broken_visitis, not enought source tables";
    }

    NYT::ITransactionPtr tx = client->StartTransaction();

    TReduceCmd<TSnapshotReducer> cmd(tx);
    for (const auto &table : tables) {
        cmd.Input(TTable<NProto::TPageView>(tx, table.Name));
    }

    const auto KEYS = {"Host", "Path", "PreviousHost", "PreviousPath"};

    cmd
        .Output(TTable<NProto::TPageView>(tx, cfg.TABLE_VISITS_SNAPSHOT).AsSortedOutput(KEYS))
        .ReduceBy(KEYS)
        .SortBy({"Host", "Path", "PreviousHost", "PreviousPath", "Timestamp"})
        .OperationWeight(OPERATION_WEIGHT)
        .Do()
    ;

    tx->Commit();

    return 0;
}

} //namespace NWebmaster

int main(int argc, const char **argv) {
    NYT::Initialize(argc, argv);
    using namespace NWebmaster;

    NLastGetopt::TOpts opts;
    TString envRoot;

    opts.AddCharOption('L', "Log path").StoreResult(&TArgs::Instance().LogPath).DefaultValue("");

    opts
        .AddCharOption('E', "Environment root")
        .StoreResult(&envRoot)
        .DefaultValue("prod")
    ;

    TModChooser modChooser;
    modChooser.AddMode("TaskParse",     TaskParse,      "TaskParse");
    modChooser.AddMode("TaskSnapshot",  TaskSnapshot,   "TaskSnapshot");

    opts.SetFreeArgDefaultTitle("Task");
    opts.SetFreeArgsMin(1);
    NLastGetopt::TOptsParseResult res(&opts, argc, argv);
    TVector<TString> modeArgs = { res.GetProgramName() };
    TVector<TString> freeArgs = res.GetFreeArgs();
    modeArgs.insert(modeArgs.end(), freeArgs.begin(), freeArgs.end());

    TCustomYTEnvironment::Instance().Init(envRoot);

    TLogger::Instance();
    NYTUtils::DisableLogger();

    return modChooser.Run(modeArgs);
}
