#include <util/datetime/cputimer.h>
#include <util/draft/datetime.h>
#include <util/generic/deque.h>
#include <util/generic/hash_set.h>
#include <util/generic/size_literals.h>
#include <util/generic/vector.h>

#include <kernel/search_query/search_query.h>
#include <library/cpp/containers/comptrie/comptrie.h>
#include <library/cpp/containers/comptrie/prefix_iterator.h>
#include <library/cpp/l2_distance/l2_distance.h>
#include <library/cpp/string_utils/url/url.h>

#include <util/generic/strbuf.h>

#include <mapreduce/yt/interface/client.h>
#include <mapreduce/yt/interface/protos/yamr.pb.h>

#include <robot/library/dssm/utils/title_normalization.h>
#include <robot/jupiter/protos/export.pb.h>
#include <robot/jupiter/protos/external/host_mirror.pb.h>
#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>

#include <wmconsole/version3/library/dssm/dssm_utils.h>
#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <robot/library/dssm/utils/title_normalization.h>
#include <wmconsole/version3/processors/user_sessions/exports/catalogia/protos/catalogia.pb.h>
#include <wmconsole/version3/processors/user_sessions/library/utils.h>
#include <wmconsole/version3/processors/user_sessions/protos/user_sessions.pb.h>
#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/math.h>
#include <wmconsole/version3/wmcutil/owners.h>
#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>

#include "config.h"
#include "utils.h"

#include "task_xmlsearch.h"

namespace NWebmaster {
namespace NCatalogia {

using namespace NJupiter;

const float OPERATION_WEIGHT = 5.0;

struct TXMLConvertMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NUserSessions::NProto::TQuery>> {

public:
    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            try {
                const NYT::TNode &parsed = input->GetRow()["Parsed"];

                if (parsed["NumItems"].AsString() == "0") {
                    continue;
                }

                const ui32 regionId         = FromString<ui32>(parsed["reg"].AsString());
                const time_t timestamp      = FromString<time_t>(parsed["timestamp"].AsString());
                const TString reqId         = parsed["reqid"].AsString();
                TString normQuery;

                if (!CheckAndFixQueryStringUTF8(parsed["search_text"].AsString(), normQuery)) {
                    continue;
                }

                NUserSessions::NProto::TQuery dstMsg;
                dstMsg.SetRegionId(regionId);
                dstMsg.SetQuery(normQuery);
                dstMsg.SetTimestamp(timestamp);
                dstMsg.SetClicks(0);
                dstMsg.SetShows(1);
                dstMsg.SetReqID(reqId);

                for (const auto &obj : parsed.AsMap()) {
                    if (obj.first.StartsWith("ans")) {
                        THashMap<TString, TString> parts;
                        NUtils::ScanKeyValue(obj.second.AsString(), "\\t", "=", [&] (const auto& key, const auto& value) {
                            parts[key] = value;
                        });

                        TString host, path;
                        SplitUrlToHostAndPath(parts["url"], host, path);
                        const int position = FromString<int>(parts["num"]);

                        dstMsg.SetHost(host);
                        dstMsg.SetPath(path);
                        dstMsg.SetPosition(position);
                        output->AddRow(dstMsg);
                    }
                }
            } catch (yexception &e) {
                Cerr << e.what() << Endl;
            }
        }
    }

public:
};

REGISTER_MAPPER(TXMLConvertMapper)

int TaskXMLSearch(int, const char **) {
    const auto &cfg = TConfig::CInstance();
    TSimpleTimer timer;

    NYT::IClientPtr clientXml = NYT::CreateClient(cfg.MR_SERVER_HOST_XMLSEARCH);
    TDeque<NYTUtils::TTableInfo> tables;
    NYTUtils::GetTableList(clientXml, cfg.TABLE_CATALOGIA_SOURCE_XML_RAW_ROOT, tables);
    std::sort(tables.rbegin(), tables.rend(), NYTUtils::TTableInfo::TNameLess());
    if (tables.size() > cfg.INPUT_DAYS) {
        tables.resize(cfg.INPUT_DAYS);
    }

    THolder<IThreadPool> queue(CreateThreadPool(4));

    const auto KEYS = {"Host", "CorrectedQuery", "Path", "RegionId", "IsMobile", "IsPad", "Position", "RequestSource", "ResultSource"};
    for (const auto &inputTable : tables) {
        const TString dateStr = NYTUtils::GetTableName(inputTable.Name);
        const TString outputTable = NYTUtils::JoinPath(cfg.TABLE_CATALOGIA_SOURCE_XML_PARSED_ROOT, dateStr);

        queue->SafeAddFunc([=, &clientXml]() {
            try {
                LOG_INFO("user_sessions, xmlsearch, logs %s", dateStr.c_str());
                NYT::ITransactionPtr tx = clientXml->StartTransaction();

                TMapCmd<TXMLConvertMapper>(tx)
                    .OperationWeight(OPERATION_WEIGHT)
                    .Input<NYT::TNode>(inputTable.Name)
                    .Output(TTable<NUserSessions::NProto::TQuery>(tx, outputTable))
                    //.MapperMemoryLimit(1_GBs)
                    .Do()
                ;

                TSortCmd<NUserSessions::NProto::TQuery>(tx)
                    .OperationWeight(OPERATION_WEIGHT)
                    .Input(TTable<NUserSessions::NProto::TQuery>(tx, outputTable))
                    .Output(TTable<NUserSessions::NProto::TQuery>(tx, outputTable)
                        .SetCompressionCodec(ECompressionCodec::BROTLI_6)
                        .SetErasureCodec(EErasureCodec::LRC_12_2_2)
                    )
                    .By(KEYS)
                    .Do()
                ;

                tx->Remove(inputTable.Name);
                tx->Commit();
                LOG_INFO("user_sessions, xmlsearch, logs %s - done", dateStr.c_str());
            } catch (yexception &e) {
                LOG_ERROR("user_sessions, xmlsearch, logs %s: %s", dateStr.c_str(), e.what());
            }
        });
    }

    queue->Stop();

    return 0;
}

} //namespace NCatalogia
} //namespace NWebmaster
