#include <util/digest/fnv.h>
#include <util/string/builder.h>
#include <library/cpp/string_utils/quote/quote.h>

#include <util/generic/hash_set.h>
#include <library/cpp/getopt/last_getopt.h>
#include <mapreduce/yt/interface/client.h>
#include <wmconsole/version3/protos/links.pb.h>
#include <wmconsole/version3/wmcutil/hostid.h>
#include <wmconsole/version3/wmcutil/url.h>

#include "partition.h"
#include "run_config.h"
#include "task_links.h"
#include "util.h"

namespace NWebmaster {

void TMapLinks::PackQueryData(const TString &date, const TString &hostId,
                                    const proto::links::RawLinkInfo &message, TStringBuilder &data) {
    TString sourceHostUtf8;
    if (!message.internal()) {
        // saving host only for source urls from other hosts
        NUtils::IDNToUtf8(message.source_host(), sourceHostUtf8);
    }

    data << date << "\t"
         << hostId << "\t"
         << encode(message.target_path()) << "\t"
         << encode(UrlUnescapeRet(message.target_path())) << "\t"
         << ToString(message.target_http_code()) << "\t"
         << ToString(message.target_last_access()) << "\t"
         << TWebmasterHostId::FromHostname(message.source_host()).Str() << "\t"
         << encode(message.source_path()) << "\t"
         << sourceHostUtf8 << encode(UrlUnescapeRet(message.source_path())) << "\t"
         << ToString(message.source_last_access()) << "\t"
         << ToString(message.source_tci()) << "\t"
         << ToString(message.link_date()) << "\t"
         << encode(ToString(message.text())) << "\t"
         << ToString(message.source_iks()) << "\t"
         << ToString(message.timestamp()) << "\n";
}

void TMapLinks::Do(TReader *reader, TWriter *writer) {
    const THashSet<TString> &STOP_HOSTS = GetStopHostsList();

    const TRunConfig &config = TRunConfig::CInstance();
    const TString &date = config.DateFieldValue;
    proto::links::RawLinkInfo message;
    for (; reader->IsValid(); reader->Next()) {
        const TString &hostId = GetHostId(reader->GetRow()["key"].AsString());
        if (STOP_HOSTS.contains(hostId)) {
            continue;
        }
        TStringBuilder data;
        message.Clear();
        Y_PROTOBUF_SUPPRESS_NODISCARD message.ParseFromString(reader->GetRow()["value"].AsString());
        size_t partId = FnvHash<size_t>(hostId) % config.TablesCount;
        size_t lineId = FnvHash<size_t>(message.target_path() + message.source_path()) % config.LinesCount;

        PackQueryData(date, hostId, message, data);
        if (data.Size() > 0) {
            writer->AddRow(NYT::TNode()
                           (TReduceCompressPartition::F_PARTITION_ID, partId)
                           (TReduceCompressPartition::F_LINE_ID, lineId)
                           (TReduceCompressPartition::F_ROW_ID, reader->GetRowIndex())
                           (TReduceCompressPartition::F_DATA, data)
                           );
            data.clear();
        }
    }
}

} //namespace NWebmaster
