#include <util/digest/fnv.h>
#include <util/digest/city.h>
#include <util/string/builder.h>
#include <util/generic/hash_set.h>

#include <library/cpp/getopt/last_getopt.h>
#include <library/cpp/string_utils/url/url.h>

#include <mapreduce/yt/interface/client.h>

#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/wmcutil/hostid.h>

#include "partition.h"
#include "run_config.h"
#include "task_queries_top_urls.h"

namespace NWebmaster {

void TMapQueriesTopUrls::WriteQueryStat(size_t tableId, const TString &date, const TString &hostId, int64_t pathId,
    const TQueryStat &queryStat, int32_t regionId, const TString &deviceType, TWriter* writer) {
    TStringBuilder data;
    data << date << "\t"
         << hostId << "\t"
         << ToString(pathId) << "\t"
         << ToString(regionId) << "\t"
         << ToString(queryStat.ShowsTotal) << "\t"
         << ToString(queryStat.ClicksTotal) << "\t"
         << ToString(queryStat.Shows_1) << "\t"
         << ToString(queryStat.Clicks_1) << "\t"
         << ToString(queryStat.Shows_2_3) << "\t"
         << ToString(queryStat.Clicks_2_3) << "\t"
         << ToString(queryStat.AgrPosShows_2_3) << "\t"
         << ToString(queryStat.AgrPosClicks_2_3) << "\t"
         << ToString(queryStat.Shows_4_10) << "\t"
         << ToString(queryStat.Clicks_4_10) << "\t"
         << ToString(queryStat.AgrPosShows_4_10) << "\t"
         << ToString(queryStat.AgrPosClicks_4_10) << "\t"
         << ToString(queryStat.Shows_11_50) << "\t"
         << ToString(queryStat.Clicks_11_50) << "\t"
         << ToString(queryStat.AgrPosShows_11_50) << "\t"
         << ToString(queryStat.AgrPosClicks_11_50) << "\t"
         << ToString(queryStat.Shows_1_50) << "\t"
         << ToString(queryStat.Clicks_1_50) << "\t"
         << ToString(queryStat.AgrPosShows_1_50) << "\t"
         << ToString(queryStat.AgrPosClicks_1_50) << "\t"
         << ToString(queryStat.SerpsTotal) << "\t"
         << deviceType << "\n";

    NYT::TNode row;
    row["data"] = data;
    row["TableId"] = tableId;
    row["PartitionKey"] = ToString(CityHash64(hostId) % 128);
    row["OrderKey"] =  Sprintf("%s%08X%016lX", hostId.c_str(), (ui32) regionId + 0x80000000, (ui64) pathId + 0x800000000000000);
    row["IsPacked"] = false;
    writer->AddRow(row);
}

void TMapQueriesTopUrls::WriteQueryValues(size_t tableId, const TString &hostId, int64_t pathId,
    const proto::queries2::QueryMessage &message, TWriter* writer) {
    const TRunConfig &config = TRunConfig::CInstance();
    THashMap<TString, THashMap<TString, TQueryStat>> statsByDeviceAndDate;
    for (const proto::queries2::QueryRegionInfo &regionInfo : message.reports_by_region()) {
        if (config.DateFrom != 0 && regionInfo.timestamp() < config.DateFrom) {
            continue;
        }
        if (config.DateTo != 0 && regionInfo.timestamp() > config.DateTo) {
            continue;
        }
        const TString &date = LocalDateFromTime(regionInfo.timestamp());
        const TString &deviceType = PrintDeviceType(regionInfo.is_mobile(), regionInfo.is_pad());
        statsByDeviceAndDate[date][deviceType].Add(regionInfo);

        WriteQueryStat(tableId, date, hostId, pathId, TQueryStat(regionInfo), regionInfo.region_id(), deviceType, writer);
    }
    // "total" region
    for (const auto &dateAndStats : statsByDeviceAndDate) {
        for (const auto &deviceAndStat : dateAndStats.second) {
            WriteQueryStat(tableId, dateAndStats.first, hostId, pathId, deviceAndStat.second, -1, deviceAndStat.first, writer);
        }
    }
}

void TMapQueriesTopUrls::Do(TReader *reader, TWriter *writer) {
    const THashSet<TString> &STOP_HOSTS = GetStopHostsList();
    const TRunConfig &config = TRunConfig::CInstance();
    proto::queries2::QueryMessage message;
    size_t valuesTableIdxOffset = config.ExportQueryText ? config.TextsTablesCount : 0;
    for (; reader->IsValid(); reader->Next()) {
        const TString &hostId = GetHostId(reader->GetRow()["key"].AsString());
        if (STOP_HOSTS.contains(hostId)) {
            continue;
        }
        message.Clear();
        Y_PROTOBUF_SUPPRESS_NODISCARD message.ParseFromString(reader->GetRow()["value"].AsString());
        TString urlHost, urlPath;
        SplitUrlToHostAndPath(message.url(), urlHost, urlPath);
        const int64_t pathId = JavaQueryHash(urlPath);
        const size_t tableId = FnvHash<size_t>(hostId) % config.TablesCount + valuesTableIdxOffset;

        if (config.ExportQueryText) {
            TStringBuilder data;
            data << hostId << "\t"
                 << ToString(pathId) << "\t"
                 << encode(urlPath) << "\n";
            NYT::TNode row;
            row["data"] = data;
            row["TableId"] = FnvHash<size_t>(hostId) % config.TextsTablesCount;
            row["PartitionKey"] = ToString(CityHash64(hostId) % 128);
            row["OrderKey"] = Sprintf("%s%016lX", hostId.c_str(), (ui64) pathId + 0x800000000000000);
            row["IsPacked"] = false;
            writer->AddRow(row);
        }
        WriteQueryValues(tableId, hostId, pathId, message, writer);
    }
}


} //namespace NWebmaster

