#include "check_duplicate_bindings_reducer.h"
#include "parse_bindings_mapper.h"
#include "parse_segments_job.h"
#include "raw_segment_parser.h"
#include "raw_segment_validator.h"

#include <crypta/dmp/common/data/bindings_id_fields.h>
#include <crypta/dmp/common/data/bindings_schema.h>
#include <crypta/dmp/common/data/meta_schema.h>
#include <crypta/dmp/common/data/segment_id.h>
#include <crypta/dmp/common/data/segment_fields.h>
#include <crypta/dmp/common/helpers/meta.h>
#include <crypta/dmp/common/serializers/segment_serializer.h>
#include <crypta/dmp/yandex/bin/common/errors_schema.h>
#include <crypta/dmp/yandex/bin/common/helpers.h>
#include <crypta/lib/native/yt/utils/helpers.h>
#include <mapreduce/yt/common/config.h>
#include <mapreduce/yt/interface/client.h>
#include <mapreduce/yt/util/temp_table.h>
#include <mapreduce/yt/util/ypath_join.h>
#include <util/generic/algorithm.h>
#include <util/string/join.h>

using namespace NCrypta::NDmp;

namespace {
    constexpr i64 DEFAULT_TARIFF = 1;

    struct TSegmentComparator {
        bool operator()(const TSegment& l, const TSegment& r) const {
            return l.Id < r.Id;
        }
    };

    void AddLoginToAcl(TSegment& segment, const TString& login) {
        auto& acl = segment.Acl;

        if (acl.IsNull()) {
            acl = NYT::TNode::CreateList({ login });
        } else {
            auto& aclList = acl.AsList();
            const auto& it = std::find_if(aclList.begin(), aclList.end(), [&](const auto& n) { return n.AsString() == login; });

            if (it == aclList.end()) {
                aclList.push_back(login);
            }
        }
    }

    using TMeta = TSet<TSegment, TSegmentComparator>;

    void WriteMeta(NYT::ITransactionPtr tx, const NYT::TYPath& path, const TMeta& meta) {
        auto writer = tx->CreateTableWriter<NYT::TNode>(NYT::TRichYPath(path).Schema(GetMetaSchema()).OptimizeFor(NYT::OF_SCAN_ATTR));
        for (const auto& segment : meta) {
            writer->AddRow(NSegmentSerializer::Serialize(segment));
        }
        writer->Finish();
    }

    TMaybe<TMeta> ReadMeta(NYT::ITransactionPtr tx, const NYT::TYPath& rawMeta, ui64 timestamp, const NYT::TYPath& errors, const TMaybe<TString>& ownerLogin) {
        auto reader = tx->CreateTableReader<NYT::TNode>(rawMeta);
        auto errorsWriter = tx->CreateTableWriter<NYT::TNode>(NYT::TRichYPath(errors).Schema(GetErrorsSchema()).OptimizeFor(NYT::OF_SCAN_ATTR));

        TMeta meta;
        bool metaValid = true;

        for (; reader->IsValid(); reader->Next()) {
            auto row = reader->MoveRow();
            auto& raw = row.At("raw");

            if (ownerLogin.Defined()) {
                raw[NSegmentFields::TARIFF] = DEFAULT_TARIFF; // Ignore tariff for private only DMPs, set some tariff for backward compatibility
            }

            const auto& validationErrors = ValidateRawSegment(raw);

            if (!validationErrors.Empty()) {
                errorsWriter->AddRow(CreateError("meta", raw, validationErrors));
                metaValid = false;
            } else {
                auto segment = ParseRawSegment(raw, timestamp);

                if (ownerLogin.Defined()) {
                    AddLoginToAcl(segment, *ownerLogin);
                }

                const auto& [it, wasInserted] = meta.insert(segment);

                if (!wasInserted) {
                    errorsWriter->AddRow(CreateError("meta", NSegmentSerializer::Serialize(segment), "meta file contains at least two entries with the same segment id"));
                    metaValid = false;
                }
            }
        }

        errorsWriter->Finish();

        return metaValid ? MakeMaybe(std::move(meta)) : Nothing();
    }
}

TParseSegmentsJob::TParseSegmentsJob(const TConfig& config, NLog::TLogPtr log)
    : Config(config)
    , Log(log)
{
    Y_ENSURE(log != nullptr);
}

int TParseSegmentsJob::Do() {
    Log->info("================ Start ================");
    NYT::TConfig::Get()->Pool = Config.YtPool;
    auto client = NYT::CreateClient(Config.YtProxy);

    Log->info("Look for fresh segments in {}", Config.InputDir);
    const auto listOfFreshSegments = TFreshSegments::List(client, Config.InputDir, [](const TString& dirname) { return FromString<ui64>(dirname); });
    Log->info("List of fresh segments [\n{}\n]", JoinSeq(",\n", listOfFreshSegments));

    for (const auto& freshSegments : listOfFreshSegments) {
        Log->info("Parse {}", ToString(freshSegments));
        auto tx = client->StartTransaction();
        ParseSegments(tx, freshSegments);
        if (Config.ShouldDropInput) {
            Log->info("Remove {} with content", freshSegments.Dir);
            freshSegments.Remove(tx);
        }
        tx->Commit();
    }

    Log->info("================ Finish ================");
    return 0;
}

void TParseSegmentsJob::ParseSegments(NYT::ITransactionPtr tx, const TFreshSegments& freshSegments) {
    NYT::TTempTable tmpMeta(tx);
    const bool metaParsed = ParseMeta(tx, freshSegments.Timestamp, freshSegments.Meta, tmpMeta.Name());
    if (!metaParsed) {
        return;
    }

    TMaybe<NYT::TTempTable> tmpBindings;
    if (freshSegments.Bindings.Defined()) {
        tmpBindings = NYT::TTempTable(tx);
        const auto segmentsIds = GetSegmentsIds(tx, tmpMeta.Name());

        const bool bindingsParsed = ParseBindings(tx, freshSegments.Timestamp, freshSegments.Bindings.GetRef(), tmpBindings->Name(), segmentsIds);
        if (!bindingsParsed) {
            return;
        }

        const bool noDuplicateBindings = CheckDuplicateBindings(tx, tmpBindings->Name(), freshSegments.Timestamp);
        if (!noDuplicateBindings) {
            return;
        }
    }

    const NYT::TYPath outputDir = NYT::JoinYPaths(Config.OutputDir, TimedSuffixGenerator.AppendSuffix(freshSegments.Timestamp));
    Log->info("Put parsed segments to {}", outputDir);
    tx->Create(outputDir, NYT::NT_MAP, NYT::TCreateOptions().Recursive(true));
    tx->Copy(tmpMeta.Name(), NYT::JoinYPaths(outputDir, "meta"));
    if (tmpBindings.Defined()) {
        tx->Copy(tmpBindings->Name(), NYT::JoinYPaths(outputDir, "bindings"));
    }
}

bool TParseSegmentsJob::ParseMeta(NYT::ITransactionPtr tx, ui64 timestamp, const NYT::TYPath& input, const NYT::TYPath& output) {
    Log->info("Parse meta");
    NYT::TTempTable errors(tx);

    auto meta = ReadMeta(tx, input, timestamp, errors.Name(), Config.OwnerLogin);

    if (!meta.Defined()) {
        MoveToQuarantine(tx, errors.Name(), Config.QuarantineDir, timestamp);
        Log->warn("Meta is invalid");
        return false;
    }

    WriteMeta(tx, output, meta.GetRef());

    Log->info("Meta is valid");
    return true;
}

bool TParseSegmentsJob::ParseBindings(NYT::ITransactionPtr tx, ui64 timestamp, const NYT::TYPath& input, const NYT::TYPath& output, const THashSet<ui64>& segmentsIds) {
    Log->info("Parse bindings");
    NYT::TTempTable errors(tx);

    TParseBindingsMapper::TOutputIndexes::TBuilder outputBuilder;
    outputBuilder.Add(output, TParseBindingsMapper::EOutputTables::Bindings);
    outputBuilder.Add(NYT::TRichYPath(errors.Name()).Schema(GetErrorsSchema()).OptimizeFor(NYT::OF_SCAN_ATTR), TParseBindingsMapper::EOutputTables::Errors);

    auto spec = NYT::TMapOperationSpec().AddInput<NYT::TNode>(input);
    AddOutputs<NYT::TNode>(spec, outputBuilder.GetTables());

    tx->Map(spec, new TParseBindingsMapper(outputBuilder.GetIndexes(), timestamp, segmentsIds));

    if (!EmptyTable(tx, errors.Name())) {
        MoveToQuarantine(tx, errors.Name(), Config.QuarantineDir, timestamp);
        Log->warn("Bindings is invalid");
        return false;
    }

    Log->info("Bindings is valid");
    return true;
}

bool TParseSegmentsJob::CheckDuplicateBindings(NYT::ITransactionPtr tx, const NYT::TYPath& bindings, ui64 timestamp) const {
    Log->info("Check that duplicate bindings don't exist");
    NYT::TTempTable errors(tx);

    TCheckDuplicateBindingsReducer::TOutputIndexes::TBuilder outputBuilder;
    outputBuilder.Add(
        NYT::TRichYPath(bindings).Schema(GetExtIdBindingsSchema()).OptimizeFor(NYT::OF_SCAN_ATTR),
        TCheckDuplicateBindingsReducer::EOutputTables::Bindings
    );
    outputBuilder.Add(
        NYT::TRichYPath(errors.Name()).Schema(GetErrorsSchema()).OptimizeFor(NYT::OF_SCAN_ATTR),
        TCheckDuplicateBindingsReducer::EOutputTables::Errors
    );

    auto spec = NYT::TReduceOperationSpec().ReduceBy(NBindingsIdFields::EXT_ID)
                                           .AddInput<NYT::TNode>(bindings);
    AddOutputs<NYT::TNode>(spec, outputBuilder.GetTables());

    tx->Sort(
        NYT::TSortOperationSpec()
            .AddInput(bindings)
            .Output(bindings)
            .SortBy(NBindingsIdFields::EXT_ID));

    tx->Reduce(spec, new TCheckDuplicateBindingsReducer(outputBuilder.GetIndexes()));

    if (!EmptyTable(tx, errors.Name())) {
        MoveToQuarantine(tx, errors.Name(), Config.QuarantineDir, timestamp);
        Log->warn("Bindings is invalid because there are duplicate bindings");
        return false;
    }

    Log->info("Duplicate bindings don't exists");
    return true;
}
