#include <util/generic/set.h>
#include <util/generic/size_literals.h>
#include <util/string/printf.h>

#include <mapreduce/yt/interface/protos/yamr.pb.h>

#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>

#include <wmconsole/version3/wmcutil/compress.h>
#include <wmconsole/version3/wmcutil/math.h>
#include <wmconsole/version3/wmcutil/periodic.h>
#include <wmconsole/version3/wmcutil/thread.h>
#include <wmconsole/version3/wmcutil/yt/misc.h>

#include <wmconsole/version3/processors/tools/protos/user_urls.pb.h>
#include <wmconsole/version3/processors/user_sessions/conf/config.h>
#include <wmconsole/version3/processors/user_sessions/protos/reports.pb.h>
#include <wmconsole/version3/processors/user_sessions/protos/user_sessions.pb.h>
#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/protos/querygroups.pb.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>

#include "task_archive.h"

namespace NWebmaster {
namespace NReports {

using namespace NJupiter;

static const TInputTag<NProto::TArchiveSource> ArchiveSourceInputTag    (1);
static const TInputTag<NWebmaster::NProto::TUserUrl> UserUrlInputTag    (4);

static const TOutputTag<NProto::TArchiveSource> ArchiveSourceOutputTag  (1);
static const TOutputTag<NYT::TYamr> YamrOutputTag                       (4);

//ReduceBy Host
struct TArchiveReducer : public TTaggedReducer {
    TArchiveReducer() = default;
    TArchiveReducer(const TVector<time_t> &timestamps)
        : Timestamps(timestamps)
    {
    }

    void Save(IOutputStream& stream) const override {
        ::Save(&stream, Timestamps);
        TTaggedReducer::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, Timestamps);
        TTaggedReducer::Load(stream);
    }

public:
    template<class T>
    inline static TString IndicatorWrap(T indicator) {
        if (indicator == 0) {
            return "\"\"";
        }
        return ToString(indicator).Quote();
    }

    struct TClicksGetter {
        inline int operator()(const NProto::TArchiveRecord &record) const {
            return record.GetClicks();
        }
    };

    struct TShowsGetter {
        inline int operator()(const NProto::TArchiveRecord &record) const {
            return record.GetShows();
        }
    };

    struct TShowsAvgPositionGetter {
        inline double operator()(const NProto::TArchiveRecord &record) const {
            const static auto ROUND_D2 = NUtils::Round<-2, double>;
            if (record.GetShows() > 0) {
                return ROUND_D2(static_cast<double>(record.GetShowsPositionProduct()) / static_cast<double>(record.GetShows()));
            }
            return 0.0;
        }
    };

    template<class TGetter>
    struct TArchive {
        TArchive(const TVector<time_t> &timestamps, ui32 tableNo)
            : Timestamps(timestamps)
            , TableNo(tableNo)
        {
            AddHeader();
        }

        static TString EscapeText(const TString &s) {
            const static TWtringBuf WHAT(u"\"");
            const static TWtringBuf WITH(u"\\\"");
            TUtf16String wide = UTF8ToWide(s);
            SubstGlobal(wide, WHAT, WITH);
            return "\"" + WideToUTF8(wide) + "\"";
        }

        void AddHeader() {
            const char *FORMAT = "%Y-%m-%d";
            TStringBuilder header;
            header << "\"Query\"";
            for (time_t ts : Timestamps) {
                header << "," << TInstant::Seconds(ts).FormatLocalTime(FORMAT).Quote();
            }
            header << Endl;
            Chunk.Write(header.data(), header.size());
        }

        void Add(const NProto::TArchiveSource &row, TTagedWriter &writer) try {
            TStringBuilder str;
            str << EscapeText(row.GetQuery());
            for (const auto &record : row.GetRecords().GetRecords()) {
                str << "," << IndicatorWrap(Getter(record));
            }
            str << Endl;

            Chunk.Write(str.data(), str.size());
            Records++;

            if (Chunk.Overflow()) {
                NYT::TYamr dstMsg;
                dstMsg.SetKey(row.GetHost());
                dstMsg.SetSubkey(Sprintf("%05d-%lu", Chunk.No++, Records));
                dstMsg.SetValue(TString(Chunk.Data(), Chunk.Size()));
                writer.AddRowWithOffset(dstMsg, YamrOutputTag, TableNo);
                Chunk.Clear();
                Records = 0;
            }
        } catch (yexception &e) {
            Cerr << e.what() << Endl;
        }

        void Finish(const NProto::TArchiveSource &row, TTagedWriter &writer) {
            Chunk.Finish();
            if (Chunk.Size() > 0) {
                NYT::TYamr dstMsg;
                dstMsg.SetKey(row.GetHost());
                dstMsg.SetSubkey(Sprintf("%05d-%lu", Chunk.No++, Records));
                dstMsg.SetValue(TString(Chunk.Data(), Chunk.Size()));
                writer.AddRowWithOffset(dstMsg, YamrOutputTag, TableNo);
            }
        }

    public:
        NUtils::TChunk Chunk;
        size_t Records = 0;
        TGetter Getter;
        const TVector<time_t> &Timestamps;
        ui32 TableNo = 0;
    };

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        const ui32 TABLENO_CLICKS       = 0;
        const ui32 TABLENO_SHOWS        = 1;
        const ui32 TABLENO_AVG_POSITION = 2;

        TMaybe<NProto::TArchiveSource> firstRow = reader.GetRowMaybe(ArchiveSourceInputTag);
        const TString host = firstRow.GetRef().GetHost();
        const TString path = firstRow.GetRef().GetPath();
        const TString query = firstRow.GetRef().GetQuery();

        TArchive<TClicksGetter> arcClicks(Timestamps, TABLENO_CLICKS);
        TArchive<TShowsGetter> arcShows(Timestamps, TABLENO_SHOWS);
        TArchive<TShowsAvgPositionGetter> arcPosition(Timestamps, TABLENO_AVG_POSITION);
        for (const auto &row : reader.GetRows(ArchiveSourceInputTag)) {
            arcClicks.Add(row, writer);
            arcShows.Add(row, writer);
            arcPosition.Add(row, writer);
        }
        arcClicks.Finish(firstRow.GetRef(), writer);
        arcShows.Finish(firstRow.GetRef(), writer);
        arcPosition.Finish(firstRow.GetRef(), writer);
    }

public:
    TVector<time_t> Timestamps;

}; //TArchiveReducer

REGISTER_REDUCER(TArchiveReducer)

//ReduceBy Host, Query
struct TArchiveRemoveUrlsReducer : public TTaggedReducer {

public:
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        TMaybe<NProto::TArchiveSource> firstRow = reader.GetRowMaybe(ArchiveSourceInputTag);
        const TString host = firstRow.GetRef().GetHost();
        const TString query = firstRow.GetRef().GetQuery();
        THashMap<time_t, TArchiveCounter> archiveCounters;
        for (const auto &row : reader.GetRows(ArchiveSourceInputTag)) {
            for (const auto &record : row.GetRecords().GetRecords()) {
                archiveCounters[record.GetTimestamp()].Add(record);
            }
        }

        TMap<time_t, TArchiveCounter> archiveCountersSorted(archiveCounters.begin(), archiveCounters.end());
        NProto::TArchiveSource dstMsg;
        dstMsg.SetHost(host);
        dstMsg.SetQuery(query);
        for (const auto &obj : archiveCountersSorted) {
            NProto::TArchiveRecord *record = dstMsg.MutableRecords()->AddRecords();
            const time_t timestamp = obj.first;
            const auto &counter = obj.second;
            record->SetTimestamp(timestamp);
            counter.Serialize(*record);
        }
        writer.AddRow(dstMsg, ArchiveSourceOutputTag);
    }

}; //TArchiveRemoveUrlsReducer

REGISTER_REDUCER(TArchiveRemoveUrlsReducer)

//ReduceBy Host, Path
struct TUserUrlsFilterReducer : public TTaggedReducer {

public:
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        TMaybe<NWebmaster::NProto::TUserUrl> mbUserUrl = reader.GetRowMaybe(UserUrlInputTag);
        if (!mbUserUrl.Defined()) {
            return;
        }
        reader.SkipRows(UserUrlInputTag);
        for (const auto &row : reader.GetRows(ArchiveSourceInputTag)) {
            writer.AddRow(row, ArchiveSourceOutputTag);
        }
    }

}; //TUserUrlsFilterReducer

REGISTER_REDUCER(TUserUrlsFilterReducer)

static NYT::TRichYPath DebugPath(const TString &table) {
    NYT::TRichYPath path(table);
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://www.drive2.ru"))));
    return path;
}

int TaskArchives(int, const char **) {
    const auto &cfg = TConfig::CInstance();

    NYT::IClientBasePtr client = NYT::CreateClient(TCommonYTConfigSQ::CInstance().MR_SERVER_HOST_USER_SESSIONS);
    TYtSourceTrigger triggerIntm(client, cfg.TABLE_ARCHIVE_INTM_FULL);
    TYtSourceTrigger triggerArchive(client, cfg.TABLE_ARCHIVE_CLICKS);
    if (!triggerArchive.NeedUpdate(triggerIntm.Source)) {
        return 0;
    }

    LOG_INFO("reports, archives");

    NYTUtils::CreatePath(client, cfg.TABLE_ARCHIVE_ROOT);
    NYT::ITransactionPtr tx = client->StartTransaction();
    const auto timestampsNodes = GetYtAttr(tx, cfg.TABLE_ARCHIVE_INTM_FULL, TAttrName::Timestamps).AsList();
    TVector<time_t> timestamps;
    for (const auto &node : timestampsNodes) {
        timestamps.push_back(node.AsInt64());
    }

    LOG_INFO("reports, archive, input %s", cfg.TABLE_ARCHIVE_INTM_FULL.data());
    LOG_INFO("reports, archive, output %s", cfg.TABLE_ARCHIVE_INTM_QUERIES.data());

    TReduceCmd<TUserUrlsFilterReducer>(tx)
        .Input(TTable<NWebmaster::NProto::TUserUrl>(tx, DebugPath(cfg.TABLE_SOURCE_USER_URLS)), UserUrlInputTag)
        .Input(TTable<NProto::TArchiveSource>(tx, DebugPath(cfg.TABLE_ARCHIVE_INTM_FULL)), ArchiveSourceInputTag)
        .Output(TTable<NProto::TArchiveSource>(tx, cfg.TABLE_ARCHIVE_INTM_FULL).AsSortedOutput({"Host", "Path", "Query"}), ArchiveSourceOutputTag)
        .MaxRowWeight(128_MBs)
        .ReduceBy({"Host", "Path"})
        .Do()
    ;

    TCombineReduceCmd<void, TArchiveRemoveUrlsReducer>(tx, nullptr, new TArchiveRemoveUrlsReducer)
        .Input(TTable<NProto::TArchiveSource>(tx, cfg.TABLE_ARCHIVE_INTM_FULL), ArchiveSourceInputTag)
        .Output(TTable<NProto::TArchiveSource>(tx, cfg.TABLE_ARCHIVE_INTM_QUERIES), ArchiveSourceOutputTag)
        .MaxRowWeight(128_MBs)
        .IntermediateReduceTag(ArchiveSourceInputTag)
        .ReduceBy({"Host", "Query"})
        .Do()
    ;

    TSortCmd<NProto::TArchiveSource>(tx,
        TTable<NProto::TArchiveSource>(tx, cfg.TABLE_ARCHIVE_INTM_QUERIES)
    )
        .By({"Host", "Query"})
        .MaxRowWeight(128_MBs)
        .OperationWeight(TConfig::CInstance().OPERATION_WEIGHT)
        .Do()
    ;

    LOG_INFO("reports, archive, input %s", cfg.TABLE_ARCHIVE_INTM_FULL.data());
    LOG_INFO("reports, archive, output %s", cfg.TABLE_ARCHIVE_CLICKS.data());
    LOG_INFO("reports, archive, output %s", cfg.TABLE_ARCHIVE_SHOWS.data());
    LOG_INFO("reports, archive, output %s", cfg.TABLE_ARCHIVE_AVG_POSITION.data());

    TReduceCmd<TArchiveReducer>(tx, new TArchiveReducer(timestamps))
        .Input(TTable<NProto::TArchiveSource>(tx, cfg.TABLE_ARCHIVE_INTM_QUERIES), ArchiveSourceInputTag)
        .Output(TTable<NYT::TYamr>(tx, cfg.TABLE_ARCHIVE_CLICKS).AsSortedOutput({"key"}), YamrOutputTag)
        .Output(TTable<NYT::TYamr>(tx, cfg.TABLE_ARCHIVE_SHOWS).AsSortedOutput({"key"}), YamrOutputTag)
        .Output(TTable<NYT::TYamr>(tx, cfg.TABLE_ARCHIVE_AVG_POSITION).AsSortedOutput({"key"}), YamrOutputTag)
        .MaxRowWeight(128_MBs)
        .ReduceBy({"Host"})
        .MemoryLimit(1_GBs)
        .OperationWeight(cfg.OPERATION_WEIGHT)
        .Do()
    ;

    DoParallel(
        TSortCmd<NYT::TYamr>(tx, TTable<NYT::TYamr>(tx, cfg.TABLE_ARCHIVE_CLICKS))
            .By({"key", "subkey"})
            .MaxRowWeight(128_MBs)
            .OperationWeight(cfg.OPERATION_WEIGHT),
        TSortCmd<NYT::TYamr>(tx, TTable<NYT::TYamr>(tx, cfg.TABLE_ARCHIVE_SHOWS))
            .By({"key", "subkey"})
            .MaxRowWeight(128_MBs)
            .OperationWeight(cfg.OPERATION_WEIGHT),
        TSortCmd<NYT::TYamr>(tx, TTable<NYT::TYamr>(tx, cfg.TABLE_ARCHIVE_AVG_POSITION))
            .By({"key", "subkey"})
            .MaxRowWeight(128_MBs)
            .OperationWeight(cfg.OPERATION_WEIGHT)
    );

    triggerArchive.Update(tx, triggerIntm.Source);
    tx->Commit();
    LOG_INFO("reports, archives, done");
    return 0;
}

} //namespace NReports
} //namespace NWebmaster
