#include "build_ca_job.h"

#include "build_ca_mapper.h"
#include "ca_parent.h"
#include "decrease_size_reducer.h"
#include "filter_ca_parents_mapper.h"

#include <crypta/lab/lib/native/encoded_user_data.h>
#include <crypta/lib/native/proto_serializer/proto_serializer.h>
#include <crypta/lib/native/yt/utils/helpers.h>
#include <crypta/lib/native/yt/utils/timed_yt_path_generator.h>
#include <crypta/lib/proto/user_data/user_data.pb.h>
#include <crypta/siberia/bin/custom_audience/ca_builder/proto/ca_binding.pb.h>
#include <crypta/siberia/bin/custom_audience/ca_builder/proto/error.pb.h>
#include <crypta/siberia/bin/custom_audience/common/proto/ca_rule.pb.h>
#include <crypta/siberia/bin/custom_audience/common/rule/encoded_ca_rule.h>

#include <mapreduce/yt/common/config.h>
#include <mapreduce/yt/interface/client.h>
#include <mapreduce/yt/util/temp_table.h>
#include <mapreduce/yt/util/ypath_join.h>

#include <util/string/builder.h>

using namespace NCrypta;
using namespace NCrypta::NSiberia::NCustomAudience;
using namespace NCrypta::NSiberia::NCustomAudience::NCaBuilder;

namespace {
    using namespace NLab::NEncodedUserData;

    void WriteError(NYT::TTableWriterPtr<TError> errorWriter, ui64 caParentId, const TString& message) {
        TError error;
        error.SetCaParentId(caParentId);
        error.SetMessage(message);
        errorWriter->AddRow(error);
    }

    TVector<TCaParent> GetCaParents(NYT::ITransactionPtr tx, const TString& table, const TString& wordDictTable, const TString& hostDictTable, const TString& appDictTable, const TString& errorsDir, const TDuration& errorsTtl) {
        const auto& errorsTable = TTimedYtPathGenerator(TShiftedClock::Now()).GetPath(errorsDir);
        tx->Create(errorsDir, NYT::NT_MAP,NYT::TCreateOptions().Recursive(true).IgnoreExisting(true));
        auto errorWriter = tx->CreateTableWriter<TError>(errorsTable);

        const auto& wordDict = ReadStringToIdDict(tx, wordDictTable);
        const auto& hostDict = ReadStringToIdDict(tx, hostDictTable);
        const auto& appDict = ReadStringToIdDict(tx, appDictTable);

        TVector<TCaParent> res;
        TCaRule rule;

        for (auto reader = tx->CreateTableReader<NCrypta::NLookalike::TLalParent>(table); reader->IsValid(); reader->Next()) {
            const auto& lalParent = reader->GetRow();

            try {
                NProtoSerializer::FromJson(rule, lalParent.GetRule());

                res.push_back({
                    .Id = lalParent.GetId(),
                    .EncodedRule = TEncodedCaRule(rule, wordDict, hostDict, appDict)
                });
            } catch (const yexception& e) {
                WriteError(errorWriter, lalParent.GetId(), TStringBuilder() << "Can't parse CA rule. Message: " << e.what());
            }
        }

        errorWriter->Finish();

        SetTtl(tx, errorsTable, errorsTtl, ESetTtlMode::RemoveIfEmpty);

        return res;
    }
}

int NCaBuilder::BuildCaJob(TBuildCaJobConfig config, NLog::TLogPtr log) {
    NYT::TConfig::Get()->Pool = config.GetYt().GetPool();
    auto client = NYT::CreateClient(config.GetYt().GetProxy());

    auto tx = client->StartTransaction();
    NYT::TTempTable caParentsTable(tx);

    log->info("Start filter CA parents map");
    auto filterCaParentsMapSpec = NYT::TMapOperationSpec()
        .AddInput<NLookalike::TLalStateKeyValue>(config.GetLalStateTable())
        .AddOutput<NLookalike::TLalParent>(caParentsTable.Name());

    tx->Map(filterCaParentsMapSpec, new TFilterCaParentsMapper());

    log->info("Read CA parents table {}", caParentsTable.Name());
    const auto& caParents = GetCaParents(tx, caParentsTable.Name(), config.GetWordDictTable(), config.GetHostDictTable(), config.GetAppDictTable(), config.GetErrorsDir(), TDuration::Days(config.GetErrorsTtlDays()));
    log->info("Found {} CA parents", caParents.size());
    if (caParents.empty()) {
        log->info("No CA parents found");
        return 0;
    }

    const auto filterThreshold = config.GetFilterThreshold();
    const auto& caBindingsTable = config.GetCaBindingsTable();
    const auto& caParentIdYtField = YT_FIELD(TCaBinding, CaParentId);
    const auto caBindingTableSchema = NYT::CreateTableSchema<TCaBinding>();

    log->info("Start build CA map");
    auto mapSpec = NYT::TMapOperationSpec()
        .AddInput<NLab::TUserData>(config.GetUserDataTable())
        .AddOutput<TCaBinding>(NYT::TRichYPath(caBindingsTable).Schema(caBindingTableSchema).OptimizeFor(NYT::OF_SCAN_ATTR));

    tx->Map(mapSpec, new TBuildCaMapper(filterThreshold, caParents));

    log->info("Start build CA sort");
    auto sortSpec = NYT::TSortOperationSpec()
            .AddInput(caBindingsTable)
            .Output(caBindingsTable)
            .SortBy(caParentIdYtField);

    tx->Sort(sortSpec);

    log->info("Start build CA reduce");
    auto reduceSpec = NYT::TReduceOperationSpec()
        .AddInput<TCaBinding>(caBindingsTable)
        .AddOutput<TCaBinding>(caBindingsTable)
        .ReduceBy({caParentIdYtField})
        .SortBy(caParentIdYtField);

    tx->Reduce(reduceSpec, new TDecreaseSizeReducer(filterThreshold));

    tx->Commit();
    log->info("End");

    return 0;
}
