#include "yt.h"

#include <mapreduce/yt/interface/operation.h>
#include <maps/libs/common/include/exception.h>
#include <maps/libs/common/include/retry.h>
#include <maps/libs/http/include/http.h>
#include <maps/libs/http/include/url.h>
#include <maps/libs/log8/include/log8.h>
#include <maps/libs/tile/include/geometry.h>
#include <maps/wikimap/mapspro/services/mrc/libs/yt/include/serialization.h>

#include <boost/lexical_cast.hpp>

namespace maps::mrc::taxi_stat {

namespace {

bool isEventRelevant(const NYT::TNode& row)
{
    return row[events::EVENT_TYPE].IsString() &&
           row[events::EVENT_TYPE].AsString() == "mrc_capture" &&
           row[events::PRESIGNED_EXTERNAL_VIDEO_URL].IsString() &&
           row[events::CREATED_AT].IsDouble() &&
           row[events::GNSS_LONGITUDE].IsDouble() &&
           row[events::GNSS_LATITUDE].IsDouble() &&
           row[events::DEVICE_ID].IsInt64();
}

class GrepEventsMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>,
                                             NYT::TTableWriter<NYT::TNode>> {
public:
    void Do(NYT::TTableReader<NYT::TNode>* input,
            NYT::TTableWriter<NYT::TNode>* output) override
    {
        for (; input->IsValid(); input->Next()) {
            if (const auto& row = input->GetRow(); isEventRelevant(row)) {
                output->AddRow(row);
            }
        }
    }
};

REGISTER_MAPPER(GrepEventsMapper);

std::string errorMessage(EventId eventId, const http::Response& response)
{
    return concat("unexpected response: .eventId=",
                  eventId,
                  ", .status=",
                  response.status());
}

size_t contentLength(EventId eventId, std::string_view url)
{
    static auto httpClient = http::Client{};
    static auto retryPolicy = maps::common::RetryPolicy{}
                                  .setTryNumber(5)
                                  .setInitialCooldown(std::chrono::seconds(1))
                                  .setCooldownBackoff(2);
    return maps::common::retry(
        [&] {
            const size_t REAL_EXAMPLE_BYTES = 28'773'364;
            auto request = http::Request{httpClient, http::HEAD, url};
            auto response = request.perform();
            switch (response.status()) {
                case 403:  ///< ratelimiter
                case 404:  ///< expired
                    WARN() << errorMessage(eventId, response);
                    return REAL_EXAMPLE_BYTES;
                default:
                    REQUIRE(response.status() == 200,
                            errorMessage(eventId, response));
            }
            return boost::lexical_cast<size_t>(
                response.header("Content-Length"));
        },
        retryPolicy);
}

struct Column {
    TString name;
    NTi::TTypePtr type;
};

using Columns = std::vector<Column>;

NYT::TTableSchema tableSchema(const Columns& columns)
{
    auto result = NYT::TTableSchema{};
    for (const auto& [name, type] : columns) {
        result.AddColumn(NYT::TColumnSchema().Name(name).TypeV3(type));
    }
    return result;
}

NYT::TTableSchema logTableSchema()
{
    const auto columns = Columns{
        {log::DATE, NTi::Int64()},
        {log::GEO_ID, NTi::Int64()},
        {log::BYTES_NUMBER, NTi::Uint64()},
        {log::MAX_EVENT_ID, NTi::Int64()},
        {log::DEVICE_IDS, NTi::List(NTi::Int64())},
    };
    return tableSchema(columns);
}

NYT::TTableSchema bansTableSchema()
{
    const auto columns = Columns{
        {bans::DATE, NTi::Int64()},
        {bans::DISQ_TYPE, NTi::Int64()},
        {bans::COUNT, NTi::Uint64()},
    };
    return tableSchema(columns);
}

NYT::TTableSchema activityTableSchema()
{
    const auto columns = Columns{
        {activity::DATE, NTi::Int64()},
        {activity::GEO_ID, NTi::Int64()},
        {activity::COUNT, NTi::Uint64()},
    };
    return tableSchema(columns);
}

struct TileRequest {
    chrono::TimePoint date;
    std::string deviceId;
    size_t x;
    size_t y;
    size_t z;

    inline static const auto DATE = "date";
    inline static const auto DEVICE_ID = "device_id";
    inline static const auto X = "x";
    inline static const auto Y = "y";
    inline static const auto Z = "z";

    static auto introspect(auto& t)
    {
        return std::tie(t.date, t.deviceId, t.x, t.y, t.z);
    }

    static auto columns() { return std::make_tuple(DATE, DEVICE_ID, X, Y, Z); }
};

bool isTileRequest(const NYT::TNode& row)
{
    return row[logfeller::VHOST].IsString() &&
           row[logfeller::VHOST].AsString() == AGENT_PROXY_HOST &&
           row[logfeller::REQUEST].IsString() &&
           std::string{row[logfeller::REQUEST].AsString()}.starts_with(
               TAXI_REQUEST_PATH) &&
           row[logfeller::USER_AGENT].IsString() &&
           std::string{row[logfeller::USER_AGENT].AsString()}.find(
               TAXI_USER_AGENT) != std::string::npos &&
           row[logfeller::TIMESTAMP].IsString();
}

class LogfellerMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>,
                                            NYT::TTableWriter<NYT::TNode>> {
public:
    void Do(NYT::TTableReader<NYT::TNode>* input,
            NYT::TTableWriter<NYT::TNode>* output) override
    {
        for (; input->IsValid(); input->Next()) {
            const auto& row = input->GetRow();
            if (!isTileRequest(row)) {
                continue;
            }
            auto date =
                chrono::parseIsoDate(row[logfeller::TIMESTAMP].AsString());
            auto url = http::URL(
                concat("http://localhost", row[logfeller::REQUEST].AsString()));
            auto deviceId = url.optParam("deviceid");
            auto x = url.optParam("x");
            auto y = url.optParam("y");
            auto z = url.optParam("z");
            if (!deviceId || !x || !y || !z) {
                continue;
            }
            try {
                auto tileRequest =
                    TileRequest{.date = date,
                                .deviceId = *deviceId,
                                .x = boost::lexical_cast<size_t>(*x),
                                .y = boost::lexical_cast<size_t>(*y),
                                .z = boost::lexical_cast<size_t>(*z)};
                output->AddRow(yt::serialize(tileRequest));
            }
            catch (const boost::bad_lexical_cast&) {
            }
        }
    }
};

REGISTER_MAPPER(LogfellerMapper);

class LogfellerReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>,
                                              NYT::TTableWriter<NYT::TNode>> {
public:
    void Do(NYT::TTableReader<NYT::TNode>* input,
            NYT::TTableWriter<NYT::TNode>* output) override
    {
        for (; input->IsValid(); input->Next()) {
            auto tileRequest = yt::deserialize<TileRequest>(input->GetRow());
            output->AddRow(yt::serialize(tileRequest));
            break;
        }
    }
};

REGISTER_REDUCER(LogfellerReducer);

}  // namespace

void grepEvents(NYT::ITransaction& txn,
                std::string_view inputTable,
                std::string_view outputTable,
                EventId startEventId)
{
    auto inputPath = NYT::TRichYPath(TString(inputTable))
                         .AddRange(NYT::TReadRange().LowerLimit(
                             NYT::TReadLimit().Key(startEventId)))
                         .Columns({
                             events::ID,
                             events::CREATED_AT,
                             events::GNSS_LONGITUDE,
                             events::GNSS_LATITUDE,
                             events::EVENT_TYPE,
                             events::PRESIGNED_EXTERNAL_VIDEO_URL,
                             events::DEVICE_ID,
                         });
    auto spec = NYT::TMapOperationSpec{}
                    .AddInput<NYT::TNode>(inputPath)
                    .AddOutput<NYT::TNode>(TString(outputTable));
    txn.Remove(TString(outputTable), NYT::TRemoveOptions().Force(true));
    txn.Map(spec, new GrepEventsMapper);
}

void loadEvents(NYT::ITransaction& txn,
                GeoEvaluator& geoEvaluator,
                std::string_view eventsTable,
                Log& result)
{
    for (auto reader = txn.CreateTableReader<NYT::TNode>(TString(eventsTable));
         reader->IsValid();
         reader->Next()) {
        const auto& row = reader->GetRow();
        if (!isEventRelevant(row)) {
            continue;
        }
        auto eventId = row[events::ID].AsInt64();
        auto date = epochToDay(row[events::CREATED_AT].AsDouble());
        auto lon = row[events::GNSS_LONGITUDE].AsDouble();
        auto lat = row[events::GNSS_LATITUDE].AsDouble();
        auto url = row[events::PRESIGNED_EXTERNAL_VIDEO_URL].AsString();
        auto bytesNumber = contentLength(eventId, url);
        auto deviceId = row[events::DEVICE_ID].AsInt64();
        for (auto geoId : geoEvaluator.evalGeoIds(geolib3::Point2(lon, lat))) {
            auto& aggregate = result[{date, geoId}];
            aggregate.bytesNumber += bytesNumber;
            aggregate.maxEventId = std::max(aggregate.maxEventId, eventId);
            aggregate.deviceIdSet.insert(deviceId);
        }
    }
}

Log loadLog(NYT::ITransaction& txn, std::string_view logTable)
{
    auto result = Log{};
    if (!txn.Exists(TString(logTable))) {
        return result;
    }
    for (auto reader = txn.CreateTableReader<NYT::TNode>(TString(logTable));
         reader->IsValid();
         reader->Next()) {
        const auto& row = reader->GetRow();
        auto date = epochToDay(row[log::DATE].AsInt64());
        auto geoId = static_cast<GeoId>(row[log::GEO_ID].AsInt64());
        auto& aggregate = result[DateRegion{.date = date, .geoId = geoId}];
        aggregate.bytesNumber += row[log::BYTES_NUMBER].AsUint64();
        aggregate.maxEventId =
            std::max(aggregate.maxEventId, row[log::MAX_EVENT_ID].AsInt64());
        for (const auto& deviceId : row[log::DEVICE_IDS].AsList()) {
            aggregate.deviceIdSet.insert(deviceId.AsInt64());
        }
    }
    return result;
}

void saveLog(NYT::ITransaction& txn,
             const Log& log,
             std::string_view outputTable)
{
    txn.Remove(TString(outputTable), NYT::TRemoveOptions().Force(true));
    auto writer = txn.CreateTableWriter<NYT::TNode>(
        NYT::TRichYPath(TString(outputTable)).Schema(logTableSchema()));
    for (const auto& [logKey, aggregate] : log) {
        auto row = NYT::TNode{};
        row[log::DATE] = toEpoch(logKey.date);
        row[log::GEO_ID] = logKey.geoId;
        row[log::BYTES_NUMBER] = aggregate.bytesNumber;
        row[log::MAX_EVENT_ID] = aggregate.maxEventId;
        row[log::DEVICE_IDS] = NYT::TNode::CreateList(NYT::TNode::TListType(
            aggregate.deviceIdSet.begin(), aggregate.deviceIdSet.end()));
        writer->AddRow(row);
    }
    writer->Finish();
}

void saveBans(NYT::ITransaction& txn,
              const Bans& bans,
              std::string_view outputTable)
{
    txn.Remove(TString(outputTable), NYT::TRemoveOptions().Force(true));
    auto writer = txn.CreateTableWriter<NYT::TNode>(
        NYT::TRichYPath(TString(outputTable)).Schema(bansTableSchema()));
    for (const auto& [banKey, count] : bans) {
        auto row = NYT::TNode{};
        row[bans::DATE] = toEpoch(banKey.date);
        row[bans::DISQ_TYPE] = db::toIntegral(banKey.disqType);
        row[bans::COUNT] = count;
        writer->AddRow(row);
    }
    writer->Finish();
}

void grepTileRequestsFromDir(NYT::ITransaction& txn,
                             std::string_view inputDir,
                             std::string_view outputTable,
                             std::optional<chrono::TimePoint> startDate)
{
    txn.Remove(TString(outputTable), NYT::TRemoveOptions().Force(true));
    auto spec = NYT::TMapReduceOperationSpec{}
                    .ReduceBy({TileRequest::DATE,
                               TileRequest::DEVICE_ID,
                               TileRequest::X,
                               TileRequest::Y,
                               TileRequest::Z})
                    .AddOutput<NYT::TNode>(TString(outputTable));
    for (const auto& node : txn.List(NYT::TYPath(inputDir))) {
        auto date = chrono::parseIsoDate(node.AsString());
        if (startDate.value_or(date) > date) {
            continue;
        }
        auto inputTable = concat(inputDir, "/", node.AsString());
        auto inputPath = NYT::TRichYPath(TString(inputTable))
                             .AddRange(NYT::TReadRange::FromKeys(
                                 TString(AGENT_PROXY_HOST),
                                 TString(concat(AGENT_PROXY_HOST, "-1"))))
                             .Columns({
                                 logfeller::REQUEST,
                                 logfeller::TIMESTAMP,
                                 logfeller::USER_AGENT,
                                 logfeller::VHOST,
                             });
        spec.AddInput<NYT::TNode>(inputPath);
    }
    txn.MapReduce(spec, new LogfellerMapper, new LogfellerReducer);
}

void loadTileRequests(NYT::ITransaction& txn,
                      GeoEvaluator& geoEvaluator,
                      std::string_view tileRequestsTable,
                      Activity& result)
{
    using Device = std::string;
    using DeviceSet = boost::container::flat_set<Device>;
    using DateRegionToDeviceSet = std::map<DateRegion, DeviceSet>;

    auto dateRegionToDeviceSet = DateRegionToDeviceSet{};
    for (auto reader =
             txn.CreateTableReader<NYT::TNode>(TString(tileRequestsTable));
         reader->IsValid();
         reader->Next()) {
        auto tileRequest = yt::deserialize<TileRequest>(reader->GetRow());
        auto tile = tile::Tile(tileRequest.x, tileRequest.y, tileRequest.z);
        auto bbox =
            geolib3::convertMercatorToGeodetic(tile::mercatorBBox(tile));
        for (auto geoId : geoEvaluator.evalGeoIds(bbox.center())) {
            dateRegionToDeviceSet[{tileRequest.date, geoId}].insert(
                tileRequest.deviceId);
        }
    }
    for (const auto& [dateRegion, deviceSet] : dateRegionToDeviceSet) {
        result[dateRegion] = deviceSet.size();
    }
}

Activity loadActivity(NYT::ITransaction& txn, std::string_view activityTable)
{
    auto result = Activity{};
    if (!txn.Exists(TString(activityTable))) {
        return result;
    }
    for (auto reader =
             txn.CreateTableReader<NYT::TNode>(TString(activityTable));
         reader->IsValid();
         reader->Next()) {
        const auto& row = reader->GetRow();
        auto date = epochToDay(row[activity::DATE].AsInt64());
        auto geoId = static_cast<GeoId>(row[activity::GEO_ID].AsInt64());
        auto count = row[activity::COUNT].AsUint64();
        result[{date, geoId}] = count;
    }
    return result;
}

void saveActivity(NYT::ITransaction& txn,
                  const Activity& activity,
                  std::string_view outputTable)
{
    txn.Remove(TString(outputTable), NYT::TRemoveOptions().Force(true));
    auto writer = txn.CreateTableWriter<NYT::TNode>(
        NYT::TRichYPath(TString(outputTable)).Schema(activityTableSchema()));
    for (const auto& [dateRegion, count] : activity) {
        auto row = NYT::TNode{};
        row[activity::DATE] = toEpoch(dateRegion.date);
        row[activity::GEO_ID] = dateRegion.geoId;
        row[activity::COUNT] = count;
        writer->AddRow(row);
    }
    writer->Finish();
}

}  // namespace maps::mrc::taxi_stat
