#include <mapreduce/yt/interface/protos/yamr.pb.h>

#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/table.h>
#include <robot/samovar/protos/export.pb.h>
#include <robot/samovar/protos/export_formats.pb.h>

#include <wmconsole/version3/processors/favicons/favicons.pb.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>

#include <util/string/vector.h>

#include "config.h"
#include "update_samovar_snapshot.h"

namespace NWebmaster {

using namespace NJupiter;

TInputTag<NProto::TFaviconIndexingRecord>          SamovarSnapshotInputTag  (0);

TOutputTag<NProto::TFaviconIndexingRecord>         SamovarSnapshotOutputTag (0);

const size_t MAX_TABLES_PER_RUN = 100;
const ui64 MAX_TABLE_AGE = 86400 * 365;

struct TSamovarSnapshotMapper : public NYT::IMapper<NYT::TTableReader<NSamovarExportSchema::TFaviconBadExport>, NYT::TTableWriter<NProto::TFaviconIndexingRecord>> {
public:
    Y_SAVELOAD_JOB(TableTimestamps)

    TSamovarSnapshotMapper() = default;
    TSamovarSnapshotMapper(const THashMap<size_t, ui64> &tableTimestamps) : TableTimestamps(tableTimestamps) {
    }

    void Do(TReader *input, TWriter *output) final {
        NProto::TFaviconIndexingRecord record;
        TVector<TString> sourceHosts;
        for (; input->IsValid(); input->Next()) {
            record.Clear();
            sourceHosts.clear();
            const NSamovarExportSchema::TFaviconBadExport &row = input->GetRow();
            record.SetUrl(row.GetUrl());
            record.SetRedirTarget(row.GetRedirTarget());
            record.SetLastAccess(row.GetLastAccess());
            record.SetAddTime(row.GetAddTime());
            record.SetHttpCode(row.GetHTTPCode());
            record.SetMimeType(row.GetMimeType());
            record.SetFilterCode(row.GetFilterCode());
            record.SetSourceName(row.GetSourceName());
            record.SetTableTimestamp(TableTimestamps.at(input->GetTableIndex()));

            for (const auto &host : row.GetHostTable().GetHosts()) {
                sourceHosts.push_back(host.GetHost());
            }

            if (sourceHosts.empty()) {
                output->AddRow(record);
            } else {
                for (const TString& sourceHost : sourceHosts) {
                    record.SetSourceHost(sourceHost);
                    output->AddRow(record);
                }
            }
        }
    }
private:
    THashMap<size_t, ui64> TableTimestamps;
};

REGISTER_MAPPER(TSamovarSnapshotMapper)

// reduce by LastAccess
struct TSamovarSnapshotReducer : public TTaggedReducer {
public:
    TSamovarSnapshotReducer() = default;
    TSamovarSnapshotReducer(ui64 minTableTimestamp) : MinTableTimestamp(minTableTimestamp) {
    }

    void Save(IOutputStream& stream) const override {
        ::Save(&stream, MinTableTimestamp);
        TTaggedReducer::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, MinTableTimestamp);
        TTaggedReducer::Load(stream);
    }

    void DoTagged(TTagedReader reader, TTagedWriter writer) override final {
        NProto::TFaviconIndexingRecord lastRecord;
        for (; reader.IsValid(); reader.Next()) {
            lastRecord = reader.GetRow(SamovarSnapshotInputTag);
        }
        if (lastRecord.GetTableTimestamp() >= MinTableTimestamp) {
            writer.AddRow(lastRecord, SamovarSnapshotOutputTag);
        }
    }

private:
    ui64 MinTableTimestamp;
};

REGISTER_REDUCER(TSamovarSnapshotReducer)

int UpdateSamovarSnapshot(int, const char **) {
    const TConfig& config = TConfig::CInstance();
    NYT::IClientPtr client = NYT::CreateClient(config.MR_SERVER_HOST);
    NYTUtils::CreatePath(client, config.TABLE_ROOT);

    NYT::ITransactionPtr tx = client->StartTransaction();

    // check for snapshot table
    TString lastProcessedTable, newLastProcessedTable;
    size_t tableIndex = 0;
    if (tx->Exists(config.TABLE_SAMOVAR_SNAPSHOT)) {
        lastProcessedTable = NYTUtils::GetAttr(tx, config.TABLE_SAMOVAR_SNAPSHOT, config.ATTR_LAST_PROCESSED_TABLE).AsString();
    }
    TDeque<NYTUtils::TTableInfo> sourceTables;
    NYTUtils::GetTableList(tx, config.TABLE_SAMOVAR_ROOT, sourceTables);
    std::sort(sourceTables.rbegin(), sourceTables.rend(), NYTUtils::TTableInfo::TNameGreater());

    THashMap<size_t, ui64> tableTimestamps;
    TVector<NYT::TRichYPath> sourcesForProcessing;

    for (const NYTUtils::TTableInfo &tableInfo : sourceTables) {
        const TString& tableName = NYTUtils::GetTableName(tableInfo.Name);
        if (tableName <= lastProcessedTable) {
            continue;
        }
        ui64 tableTimestamp = FromString<ui64>(tableName);
        tableTimestamps[tableIndex++] = tableTimestamp;
        sourcesForProcessing.emplace_back(NYT::TRichYPath(tableInfo.Name));

        newLastProcessedTable = tableName;
        if (tableTimestamps.size() >= MAX_TABLES_PER_RUN) {
            break;
        }
    }

    if (!sourcesForProcessing.empty()) {
        ui64 minTableTimestamp = Now().Seconds() - MAX_TABLE_AGE;
        TTable<NProto::TFaviconIndexingRecord> tempSnapshot(tx, NYTUtils::JoinPath(config.TABLE_ROOT, "temp-snapshot"));
        TTable<NProto::TFaviconIndexingRecord> snapshotTable(tx, config.TABLE_SAMOVAR_SNAPSHOT);

        TMapCmd<TSamovarSnapshotMapper> mapCmd(tx, new TSamovarSnapshotMapper(tableTimestamps));
        for (const NYT::TRichYPath &source : sourcesForProcessing) {
            mapCmd.Input<NSamovarExportSchema::TFaviconBadExport>(source);
        }
        mapCmd
            .Output(tempSnapshot)
            .Do();

        TSortCmd<NProto::TFaviconIndexingRecord>(tx, tempSnapshot)
            .By({F_URL, F_LAST_ACCESS})
            .Do();

        TReduceCmd<TSamovarSnapshotReducer> reduceCmd(tx, new TSamovarSnapshotReducer(minTableTimestamp));
        if (snapshotTable.Exists()) {
            reduceCmd.Input(snapshotTable, SamovarSnapshotInputTag);
        }
        reduceCmd
            .Input(tempSnapshot, SamovarSnapshotInputTag)
            .Output(snapshotTable, SamovarSnapshotOutputTag)
            .ReduceBy({F_URL})
            .SortBy({F_URL, F_LAST_ACCESS})
            .Do();

        TSortCmd<NProto::TFaviconIndexingRecord>(tx, snapshotTable)
            .By({F_URL, F_LAST_ACCESS})
            .Do();

        tempSnapshot.Drop();

        for (const auto& source : sourcesForProcessing) {
            tx->Remove(source.Path_);
        }

        NYTUtils::SetAttr(tx, config.TABLE_SAMOVAR_SNAPSHOT, config.ATTR_LAST_PROCESSED_TABLE, newLastProcessedTable);
    }
    tx->Commit();

    return 0;
}

} //namespace NWebmaster
