#include <util/datetime/parser.h>
#include <util/string/join.h>
#include <util/string/split.h>

#include <library/cpp/binsaver/util_stream_io.h>
#include <library/cpp/geobase/lookup.hpp>
#include <library/cpp/getopt/last_getopt.h>
#include <library/cpp/logger/global/global.h>

#include <kernel/geo/utils.h>

#include <mapreduce/yt/interface/client.h>
#include <mapreduce/yt/interface/operation.h>
#include <mapreduce/yt/library/operation_tracker/operation_tracker.h>
#include <mapreduce/yt/util/temp_table.h>

#include <quality/logs/parse_lib/parse_lib.h>
#include <quality/logs/parse_lib/parsing_rules.h>

#include <dict/recognize/queryrec/standalone/queryrec_standalone.h>

namespace NOptions {

    struct TOptions {
        TString Service;

        TString ReqAnsTable;

        TString OutputDir;
        TString OutputTable;

        TString IPRegPath;

        TString Cluster;
        TString Token;
    };

    TOptions ParseOptions(int argc, const char** argv) {
        TOptions res;
        NLastGetopt::TOpts opts;

        opts
            .AddLongOption("service", "Service name in logs")
            .Required()
            .StoreResult(&res.Service);
        opts
            .AddLongOption("reqans_table", "Path to reqans table")
            .Required()
            .StoreResult(&res.ReqAnsTable);
        opts
            .AddLongOption("output_table", "Output tables name")
            .Required()
            .StoreResult(&res.OutputTable);
        opts
            .AddLongOption("output_dir", "Output directory name")
            .Required()
            .StoreResult(&res.OutputDir);
        opts
            .AddLongOption("ipreg", "Path to ipreg dir")
            .Required()
            .StoreResult(&res.IPRegPath);
        opts
            .AddLongOption("cluster", "Yt cluster")
            .DefaultValue("hahn.yt.yandex.net")
            .StoreResult(&res.Cluster);
        opts
            .AddLongOption("token", "Yt token")
            .Optional()
            .StoreResult(&res.Token);

        NLastGetopt::TOptsParseResult parseRes(&opts, argc, argv);

        return res;
    }

}

class TSaasYdoReqansParsingRules : public TSaasReqansParsingRules {
public:
    TSaasYdoReqansParsingRules(const TString& service)
        : Service(service) {
    }

    const TActionItem* ParseMRData(NMR::TValue /*key*/, NMR::TValue /*subkey*/, NMR::TValue value) override {
        CopyData(value);
        Fields.SetData(DataCopy.begin(), DataCopy.end(), "\t");
        if (!strcmp(ParseFieldUnsafe(SAAS_SERVICE), Service.data())) {
            Item.Reset(CreateItem());
        } else {
            Item.Reset(nullptr);
        }
        return Item.Get();
    }

private:
    const TString Service;
};

class TParseReqAnsMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    TParseReqAnsMapper() = default;
    TParseReqAnsMapper(const TString& service, const TGeoHelper& geoHelper)
        : Service(service)
        , GeoHelper(geoHelper) {
    }

    void Save(IOutputStream& stream) const override {
        ::Save(&stream, Service);
        TGeoHelper geoHelper = GeoHelper;
        SerializeToArcadiaStream(stream, geoHelper);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, Service);
        SerializeFromStream(stream, GeoHelper);
    }

    void Start(NYT::TTableWriter<NYT::TNode>* /*output*/) override {
        SaasParsingRules.Reset(new TSaasYdoReqansParsingRules(Service));
        SaasParsingRules->SetGeoHelper(&GeoHelper);
    }

    void Do(NYT::TTableReader<NYT::TNode>* input, NYT::TTableWriter<NYT::TNode>* output) override {
        const auto& queryRecognizer = GetStandAloneQueryRecognizer();
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode& row = input->GetRow();
            const TString& value = row["value"].AsString();

            TString reqid, parentReqid;
            TString query;
            int region;

            try {
                const TActionItem* action = SaasParsingRules->ParseData(value);
                if (action && action->IsA(AT_SAAS_REQUEST) && IsNotHamster(row)) {
                    const TSaasRequestItem* item = static_cast<const TSaasRequestItem*>(action);

                    reqid = item->GetReqID();

                    if (auto parentReqidMaybe = item->GetParentReqID(); parentReqidMaybe) {
                        parentReqid = *parentReqidMaybe;
                    } else {
                        parentReqid = reqid;
                    }

                    const TStringBuf searchProps = item->GetSearchProps();

                    query = item->GetQuery();
                    region = item->HasUserRegion() ? item->GetUserRegion() : 0;

                    NYT::TNode requestInfoRow;
                    requestInfoRow["reqid"] = reqid;
                    requestInfoRow["parent_reqid"] = parentReqid;
                    requestInfoRow["query"] = query;
                    requestInfoRow["search_props"] = searchProps;
                    requestInfoRow["uid"] = item->GetUID();
                    requestInfoRow["region"] = region;
                    requestInfoRow["timestamp"] = static_cast<ui64>(item->GetTimestamp());

                    auto langs = queryRecognizer.RecognizeParsedQueryLanguage(UTF8ToWide(item->GetQuery()));
                    requestInfoRow["lang"] = langs.GetMainLang();
                    NYT::TNode testIds = NYT::TNode::CreateList();
                    const auto testInfo = item->GetTestInfo();
                    if (testInfo) {
                        for (const auto& testItem : testInfo.GetRef()) {
                            testIds.Add(FromString<uint>(testItem.GetTestID()));
                        }
                        Sort(
                            testIds.AsList().begin(),
                            testIds.AsList().end(),
                            [](const NYT::TNode& lhs, const NYT::TNode& rhs) {
                                return lhs.AsUint64() < rhs.AsUint64();
                            }
                        );
                    }
                    requestInfoRow["test-ids"] = testIds;
                    const TVector<TString>& factorNames = item->GetRankingFactorNames();

                    for (const TSaasReportItem& answer: item->GetAnswers()) {
                        if (!answer.RankingFactors) {
                            output->AddRow(NYT::TNode()
                                ("reqid", reqid)
                                ("value", value)
                                ("message", answer.Url + " doesn`t have ranking factors")
                            , 2);
                            continue;
                        }

                        NYT::TNode docsInfoRow;

                        docsInfoRow["query"] = query;
                        docsInfoRow["region"] = region;
                        docsInfoRow["reqid"] = reqid;
                        docsInfoRow["parent_reqid"] = parentReqid;
                        docsInfoRow["url"] = answer.Url;
                        docsInfoRow["position"] = answer.Num;

                        const TVector<float>& factorValues = answer.RankingFactors;
                        if (factorNames.size() != factorValues.size()) {
                            output->AddRow(NYT::TNode()
                                ("reqid", reqid)
                                ("value", value)
                                ("message", answer.Url + " has different factor names and factor values")
                            , 2);
                            continue;
                        }

                        NYT::TNode factorsMap = NYT::TNode::CreateMap();
                        for (size_t i = 0; i < factorNames.size(); ++i) {
                            factorsMap[factorNames[i]] = factorValues[i];
                        }
                        docsInfoRow["factors"] = factorsMap;
                        output->AddRow(docsInfoRow, 1);
                    }
                    output->AddRow(requestInfoRow, 0);
                }
            } catch (...) {
                output->AddRow(NYT::TNode()("reqid", reqid)("value", reqid ? "" : value)("message", CurrentExceptionMessage()), 2);
            }
        }
    }

private:
    TString Service;
    TGeoHelper GeoHelper;
    THolder<TSaasYdoReqansParsingRules> SaasParsingRules;

private:
    bool IsNotHamster(const NYT::TNode& row) const {
        Y_UNUSED(row);
        //const TString key = SplitString(SplitString(SplitString(row["key"].AsString().data(), " ")[0], "@")[1], ".")[0];
        //return key.StartsWith("ydo-searchproxy-") && IsNumber(SubstGlobalCopy(key, "ydo-searchproxy-", ""));
        //TODO: think about this filter
        return true;
    }
};

REGISTER_MAPPER(TParseReqAnsMapper);

int main(int argc, const char** argv) {
    InitGlobalLog2Console(TLOG_DEBUG);

    NYT::Initialize(argc, argv);

    const auto opts = NOptions::ParseOptions(argc, argv);

    NYT::TCreateClientOptions clientOptions;
    if (!opts.Token.empty()) {
        clientOptions.Token(opts.Token);
    }

    auto client = NYT::CreateClient(opts.Cluster, clientOptions);

    TString docsInfoTable = JoinSeq("", {opts.OutputDir, "/docs_info/", opts.OutputTable});
    TString requestInfoTable = JoinSeq("", {opts.OutputDir, "/request_info/", opts.OutputTable});
    TString errorsTable = JoinSeq("", {opts.OutputDir, "/errors/", opts.OutputTable});

    auto schemaDocsInfo = NYT::TNode::CreateList()
        .Add(NYT::TNode()("name", "factors")("type", "any"))
        .Add(NYT::TNode()("name", "position")("type", "uint32"))
        .Add(NYT::TNode()("name", "reqid")("type", "string"))
        .Add(NYT::TNode()("name", "parent_reqid")("type", "string"))
        .Add(NYT::TNode()("name", "url")("type", "string"))
        .Add(NYT::TNode()("name", "query")("type", "string"))
        .Add(NYT::TNode()("name", "region")("type", "int32"));

    auto schemaRequestInfo = NYT::TNode::CreateList()
        .Add(NYT::TNode()("name", "lang")("type", "int32"))
        .Add(NYT::TNode()("name", "query")("type", "string"))
        .Add(NYT::TNode()("name", "region")("type", "int32"))
        .Add(NYT::TNode()("name", "reqid")("type", "string"))
        .Add(NYT::TNode()("name", "parent_reqid")("type", "string"))
        .Add(NYT::TNode()("name", "search_props")("type", "string"))
        .Add(NYT::TNode()("name", "test-ids")("type", "any"))
        .Add(NYT::TNode()("name", "timestamp")("type", "uint32"))
        .Add(NYT::TNode()("name", "uid")("type", "string"));

    auto schemaErrorsTable = NYT::TNode::CreateList()
        .Add(NYT::TNode()("name", "message")("type", "string"))
        .Add(NYT::TNode()("name", "reqid")("type", "string"))
        .Add(NYT::TNode()("name", "value")("type", "string"));

    client->Create(
        docsInfoTable,
        NYT::NT_TABLE,
        NYT::TCreateOptions()
        .Attributes(NYT::TNode()("schema", schemaDocsInfo)("compression_codec", "zstd_5"))
        .Recursive(true)
        .Force(true)
    );

    client->Create(
        requestInfoTable,
        NYT::NT_TABLE,
        NYT::TCreateOptions()
        .Attributes(NYT::TNode()("schema", schemaRequestInfo)("compression_codec", "zstd_5"))
        .Recursive(true)
        .Force(true)
    );

    client->Create(
        errorsTable,
        NYT::NT_TABLE,
        NYT::TCreateOptions()
        .Attributes(NYT::TNode()("schema", schemaErrorsTable)("compression_codec", "zstd_5"))
        .Recursive(true)
        .Force(true)
    );

    TGeoHelper geoHelper(opts.IPRegPath, TRelevRegionResolver());
    client->Map(
        NYT::TMapOperationSpec()
            .AddInput<NYT::TNode>(opts.ReqAnsTable)
            .AddOutput<NYT::TNode>(requestInfoTable)
            .AddOutput<NYT::TNode>(docsInfoTable)
            .AddOutput<NYT::TNode>(errorsTable),
        new TParseReqAnsMapper(opts.Service, geoHelper),
        NYT::TOperationOptions().Spec(NYT::TNode()("job_count", 4000)("memory_limit", 2147483648)("mapper", NYT::TNode()("memory_limit", 2147483648)))
    );

    NYT::TOperationTracker tracker;

    for (const auto& table : {requestInfoTable, docsInfoTable, errorsTable}) {
        tracker.AddOperation(
            client->Merge(
                NYT::TMergeOperationSpec()
                .AddInput(table)
                .Output(table)
                .CombineChunks(true)
                .ForceTransform(true),
                NYT::TOperationOptions().Wait(false)));
    }

    tracker.WaitAllCompleted();

    return 0;
}
