#include <mapreduce/yt/interface/client.h>

#include <util/charset/wide.h>
#include <util/datetime/base.h>
#include <util/string/cast.h>
#include <util/string/join.h>

#include <wmconsole/version3/wmcutil/siphash.h>
#include <wmconsole/version3/wmcutil/string.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>

#include "query_utils.h"

namespace NWebmaster {

TQueryDigest::TQueryDigest() {
    Reset();
}

void TQueryDigest::Reset() {
    Shows = 0;
    Clicks = 0;
    QueriesHash = 0;
    Regions.clear();
}

void TQueryDigest::Add(const proto::queries2::QueryMessage &msg) {
    for (int i = 0; i < msg.reports_by_region_size(); i++) {
        const proto::queries2::QueryRegionInfo &regionInfo = msg.reports_by_region(i);

        Regions.insert(regionInfo.region_id());

        for (int e = 0; e < regionInfo.position_info_size(); e++) {
            Shows += regionInfo.position_info(e).shows_count();
            Clicks += regionInfo.position_info(e).clicks_count();
        }
    }

    QueriesHash += (SipHash64(msg.corrected_query()) & 0xFFFFFFFFul);
}

TString TQueryDigest::Signature() const {
    return ToString(QueriesHash) + ";"
        + ToString(Shows) + ";"
        + ToString(Clicks) + ";"
        + ToString(Regions.size());
}

void TReduceSignHosts::Do(TReader* input, TWriter* output) {
    const TString host = TString{input->GetRow().Key};
    TQueryDigest digest;

    for (; input->IsValid(); input->Next()) {
        const NYT::TYaMRRow &row = input->GetRow();
        proto::queries2::QueryMessage msg;
        Y_PROTOBUF_SUPPRESS_NODISCARD msg.ParseFromArray(row.Value.data(), row.Value.size());
        digest.Add(msg);
    }

    output->AddRow(NYT::TNode()
        ("host", host)
        ("signature", digest.QueriesHash)
        ("shows", digest.Shows)
        ("clicks", digest.Clicks)
        ("regions", digest.Regions.size())
    );
}

REGISTER_REDUCER(TReduceSignHosts);

static int AgeDays(time_t timestamp) {
    static TInstant now = Now();
    return (now - TInstant::Seconds(timestamp)).Hours() / 24;
}

void CopyQueryMessagePart(const proto::queries2::QueryMessage &src, proto::queries2::QueryMessage &dst) {
    if (src.has_query()) {
        dst.set_query(src.query());
    }

    if (src.has_corrected_query()) {
        dst.set_corrected_query(src.corrected_query());
    }

    if (src.has_url()) {
        dst.set_url(src.url());
    }

    if (src.has_cm2()) {
        dst.set_cm2(src.cm2());
    }

    if (src.has_timestamp()) {
        dst.set_timestamp(src.timestamp());
    }
}

static void CopyRegionParams(const proto::queries2::QueryRegionInfo &src, proto::queries2::QueryRegionInfo &dst) {
    dst.set_region_id(src.region_id());

    if (src.has_shown_serps()) {
        dst.set_shown_serps(src.shown_serps());
    }

    if (src.has_direct_avg_bid()) {
        dst.set_direct_avg_bid(src.direct_avg_bid());
    }

    if (src.has_is_mobile()) {
        dst.set_is_mobile(src.is_mobile());
    }

    if (src.has_is_pad()) {
        dst.set_is_pad(src.is_pad());
    }

    if (src.has_timestamp()) {
        dst.set_timestamp(src.timestamp());
    }
}

bool FilterAutoCampaignQuery(const TString &query, TString &filteredQuery, int &flags) {
    const static TUtf16String IGNORE_CHARS_STR = u"/?_;:*&%$№@{}[]±+-§<>\"'|\\`()^#.,=";
    const static THashSet<wchar16> IGNORE_CHARS_SET(IGNORE_CHARS_STR.begin(), IGNORE_CHARS_STR.end());
    const size_t MAX_WORD_LENGTH = 35;
    const size_t MAX_WORDS_COUNT = 7;

    TUtf16String wQuery = UTF8ToWide(query);
    int newFlags = 0;

    for (size_t i = 0; i < wQuery.size(); i++) {
        if (IGNORE_CHARS_SET.contains(wQuery[i])) {
            wQuery[i] = 0x20; //space
            newFlags |= DFQF_REMOVED_SYMBOLS;
        }
    }

    TString newQuery = WideToUTF8(wQuery);
    TVector<TStringBuf> words, shortWords;
    NUtils::FastSplit(TStringBuf(newQuery), ' ', words);

    for (const TStringBuf &word : words) {
        if (word.size() < MAX_WORD_LENGTH) {
            shortWords.push_back(word);
        } else {
            newFlags |= DFQF_REMOVED_LONG_WORDS;
        }
    }

    if (shortWords.empty()) {
        return false;
    }

    if (shortWords.size() > 7) {
        shortWords.resize(MAX_WORDS_COUNT);
        newFlags |= DFQF_QUERY_RESIZED;
    }

    filteredQuery = JoinSeq(" ", shortWords);
    flags = newFlags;
    return true;
}

void SplitQueryMessageByTime(const proto::queries2::QueryMessage &src, proto::queries2::QueryMessage &dst, int days) {
    CopyQueryMessagePart(src, dst);
    for (int i = 0; i < src.reports_by_region_size(); i++) {
        const proto::queries2::QueryRegionInfo &regionInfo = src.reports_by_region(i);

        if (AgeDays(regionInfo.timestamp()) <= days) {
            *dst.add_reports_by_region() = regionInfo;
        }
    }
}

size_t SplitQueryMessageByPosition(const proto::queries2::QueryMessage &src, proto::queries2::QueryMessage &dst, ui32 position) {
    CopyQueryMessagePart(src, dst);
    size_t records = 0;
    for (int i = 0; i < src.reports_by_region_size(); i++) {
        const proto::queries2::QueryRegionInfo &regionInfo = src.reports_by_region(i);

        proto::queries2::QueryRegionInfo *dstRegionInfo = nullptr;
        for (int e = 0; e < regionInfo.position_info_size(); e++) {
            const proto::queries2::QueryPositionInfo &positionInfo = regionInfo.position_info(e);
            if (positionInfo.position() >= position) {
                continue;
            }

            if (dstRegionInfo == nullptr) {
                dstRegionInfo = dst.add_reports_by_region();
                CopyRegionParams(regionInfo, *dstRegionInfo);
            }

            *dstRegionInfo->add_position_info() = positionInfo;
            records++;
        }
    }

    return records;
}

void SignQueryTable(NYT::IClientBasePtr client, const TString &input, const TString &output) {
    TOpRunner(client)
        .InputYaMR(input)
        .OutputNode(NYT::TRichYPath(output).SortedBy("host"))
        .ReduceBy("key")
        .MemoryLimit(MEMORY_LIMIT_4GB)
        .Reduce(new TReduceSignHosts)
        .Input(output)
        .Output(output)
        .Merge()
    ;
}

} //namespace NWebmaster
