#include <util/datetime/cputimer.h>
#include <util/draft/datetime.h>
#include <util/generic/vector.h>
#include <util/generic/deque.h>
#include <util/generic/hash_set.h>
#include <util/string/reverse.h>

#include <library/cpp/containers/comptrie/comptrie.h>
#include <library/cpp/containers/comptrie/prefix_iterator.h>
#include <library/cpp/string_utils/url/url.h>

#include <mapreduce/yt/interface/client.h>

#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/owners.h>
#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>

#include "config.h"
#include "utils.h"

#include "task_user_sessions.h"

namespace NWebmaster {
namespace NCatalogia {

using namespace NJupiter;

namespace {
const char *F_CLICKS                = "clicks";
const char *F_DOMAIN                = "domain";
const char *F_HOST                  = "host";
const char *F_METADESCR             = "metaDescription";
const char *F_POSITION              = "position";
const char *F_QUERY                 = "query";
const char *F_QUERY_ID              = "query_id";
const char *F_REGION_ID             = "region_id";
const char *F_SHOWS                 = "shows";
const char *F_TITLE                 = "title";
const char *F_URL                   = "url";
const char *F_URL_ID                = "url_id";
}

struct TMapQueries : public NYT::IMapper<NYT::TTableReader<NYT::TYaMRRow>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(MirrorsTrieFile, TrieStream, RobotsMap)

public:
    TMapQueries() = default;
    TMapQueries(const TString &mirrorsTrieFile, const TVector<char> &trieStream, THashMap<TString, TString> &robotsMap)
        : MirrorsTrieFile(mirrorsTrieFile)
        , TrieStream(trieStream)
        , RobotsMap(robotsMap)
    {
    }

public:
    void Start(TWriter* /*writer*/) override {
        Trie.Init(&TrieStream[0], TrieStream.size());
    }

    void Do(TReader *input, TWriter *output) override {
        static const THashSet<TString> OWNERS_SKIP_LIST = {
            "yandex.by",
            "yandex.com",
            "yandex.com.tr",
            "yandex.kz",
            "yandex.ru",
            "yandex.ua",
        };

        THolder<TRobotsCanonizer> canonizer;
        THolder<TMirrors> mirrors(new TMirrors(MirrorsTrieFile));
        TString canonizingHost;

        for (; input->IsValid(); input->Next()) {
            const TString host = TString{input->GetRow().Key};
            TString rhost = host;
            ReverseInPlace(rhost);

            THashSet<TString> owners;
            bool found = false;
            for (auto it = MakePrefixIterator(Trie, rhost.data(), rhost.size()); it; ++it) {
                const TString owner = host.substr(host.size() - it.GetPrefixLen());

                if (NUtils::IsSubdomain(host, owner)) {
                    found = true;
                    owners.insert(owner);
                }
            }

            if (!found) {
                continue;
            }

            proto::queries2::QueryMessage msg;
            const NYT::TYaMRRow &row = input->GetRow();
            Y_PROTOBUF_SUPPRESS_NODISCARD msg.ParseFromArray(row.Value.data(), row.Value.length());

            TString urlHost, urlPath;
            if (!NUtils::SplitUrl(msg.url(), urlHost, urlPath)) {
                continue;
            }

            const TStringBuf owner = NUtils::GetMetrikaOwner(urlHost);
            if (OWNERS_SKIP_LIST.contains(owner)) {
                continue;
            }

            TString mainMirror;
            try {
                mainMirror = mirrors->GetMainMirror(urlHost);
            } catch (yexception &e) {
                Cerr << "Unable to get main mirror for " << urlHost << ": " << e.what() << Endl;
                continue;
            }

            const TString mainUrl = TString::Join(mainMirror, urlPath);
            if (mainMirror != canonizingHost) {
                canonizer.Reset(new TRobotsCanonizer(RobotsMap));
                canonizingHost = mainMirror;
            }

            const TString canonizedPath = canonizer->GetCanonizedPath(mainUrl);
            const TString canonizedUrl = TString::Join(mainMirror, canonizedPath);
            const TUrlId urlId = GetUrlId(mainMirror, canonizedPath);
            const TQueryId queryId = GetQueryId(msg.corrected_query());

            for (int i = 0; i < msg.reports_by_region_size(); i++) {
                const auto &region = msg.reports_by_region(i);
                for (int p = 0; p < region.position_info_size(); p++) {
                    const auto &position = msg.reports_by_region(i).position_info(p);
                    for (const TString &owner : owners) {
                        output->AddRow(NYT::TNode()
                           (F_DOMAIN, owner)
                           (F_HOST, host)
                           (F_QUERY, msg.corrected_query())
                           (F_QUERY_ID, queryId)
                           (F_URL, canonizedUrl)
                           (F_URL_ID, urlId)
                           (F_REGION_ID, region.region_id())
                           (F_POSITION, position.position())
                           (F_SHOWS, position.shows_count())
                           (F_CLICKS, position.clicks_count())
                        );
                    }
                }
            }

        }
    }

public:
    TString MirrorsTrieFile;
    TVector<char> TrieStream;
    TCompactTrie<char> Trie;
    THashMap<TString, TString> RobotsMap;
};

REGISTER_MAPPER(TMapQueries)

//ReduceBy Domain, Host, Query, Url, RegionId, Position
struct TReduceQueries : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    void Do(TReader *input, TWriter *output) {
        NYT::TNode row = input->GetRow();

        size_t shows = 0;
        size_t clicks = 0;

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            shows += row[F_SHOWS].AsUint64();
            clicks += row[F_CLICKS].AsUint64();
        }

        output->AddRow(row
           (F_SHOWS, shows)
           (F_CLICKS, clicks)
       );
    }
};

REGISTER_REDUCER(TReduceQueries)

//ReduceBy Url
struct TReduceEnrichQueries : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    void Do(TReader *input, TWriter *output) {
        const int TABLENO_TEXTS = 0;

        TString title, metaDescription;
        for (; input->IsValid(); input->Next()) {
            NYT::TNode row = input->GetRow();
            if (input->GetTableIndex() == TABLENO_TEXTS) {
                title = row[F_TITLE].AsString();
                metaDescription = row[F_METADESCR].AsString();
            } else {
                row
                    (F_TITLE, title)
                    (F_METADESCR, metaDescription)
                ;
                output->AddRow(row);
            }
        }
    }
};

REGISTER_REDUCER(TReduceEnrichQueries)

void LoadSourceQueriesTables(NYT::IClientBasePtr client, const TString &prefix, int days, TDeque<NYTUtils::TTableInfo> &tablesDest, TDeque<time_t> &periodsConfig) {
    time_t periodBegin = (Now() - TInstant::Days(days)).Seconds();
    time_t periodEnd = Now().Seconds();

    TDeque<NYTUtils::TTableInfo> tmpTables;
    if (!NYTUtils::GetTableList(client, prefix, tmpTables)) {
        ythrow yexception() << "No input tables found";
    }

    std::sort(tmpTables.begin(), tmpTables.end(), [](const NYTUtils::TTableInfo &lhs, const NYTUtils::TTableInfo &rhs) -> bool {
        return lhs.Name < rhs.Name;
    });

    periodsConfig.clear();
    tablesDest.clear();
    for (const NYTUtils::TTableInfo& table : tmpTables) {
        time_t matchedPeriod;
        if (MatchPeriod(table.Name, periodBegin, periodEnd, matchedPeriod)) {
            tablesDest.push_back(table);
            periodsConfig.push_back(matchedPeriod);
        }
    }
}

int TaskUserSessions(int, const char **) {
    const auto &cfg = TConfig::CInstance();
    TSimpleTimer timer;

    NYT::IClientPtr clientMain = NYT::CreateClient(cfg.MR_SERVER_HOST_MAIN);
    NYT::IClientPtr clientCatalogia = NYT::CreateClient(cfg.MR_SERVER_HOST_CATALOGIA);

    NYTUtils::CreatePath(clientMain, cfg.TABLE_CATALOGIA_SOURCE_ROOT);

    THashSet<TString> domains;
    TVector<char> domainsTrieStream;
    LoadCatalogiaDomains(clientCatalogia, cfg.TABLE_SOURCE_CATALOGIA_DOMAINS, domains, domainsTrieStream);

    THashMap<TString, TString> robotsMap;
    LoadCatalogiaRobots(clientMain, cfg.TABLE_CATALOGIA_SOURCE_ROBOTS_FLT, robotsMap);

    NYT::ITransactionPtr tx = clientMain->StartTransaction();
    NWebmaster::TOpRunner runner(tx);

    TDeque<NYTUtils::TTableInfo> sourceTables;
    TDeque<time_t> periodsConfig;
    LoadSourceQueriesTables(clientMain, cfg.TABLE_SOURCE_QUERIES_CONVERTED_PREFIX, 91, sourceTables, periodsConfig);

    for (const NYTUtils::TTableInfo &table : sourceTables) {
        runner.InputYaMR(table.Name);
    }

    const TString mirrorsPath = GetJupiterMirrorsTrieInProdFile(clientMain);

    runner
        .OutputNode(cfg.TABLE_CATALOGIA_SOURCE_QUERIES)
        .File(mirrorsPath)
        .JobCount(10000)
        .MemoryLimit(MEMORY_LIMIT_12GB)
        .ReduceBy(F_DOMAIN, F_HOST, F_QUERY, F_URL, F_REGION_ID, F_POSITION)
        .MapReduce(
            new TMapQueries(NYTUtils::GetObjectName(mirrorsPath), domainsTrieStream, robotsMap),
            new TReduceQueries,
            new TReduceQueries
        )
        .SortBy(F_URL)
        .Sort(cfg.TABLE_CATALOGIA_SOURCE_QUERIES)

        .InputNode(cfg.TABLE_CATALOGIA_SOURCE_CONTENT)
        .InputNode(cfg.TABLE_CATALOGIA_SOURCE_QUERIES)
        .OutputNode(NYT::TRichYPath(cfg.TABLE_CATALOGIA_EXPORT_QUERIES).SortedBy(NYT::TSortColumns(F_URL)))
        .ReduceBy(F_URL)
        .Reduce(new TReduceEnrichQueries)
    ;

    SetYtAttr(tx, cfg.TABLE_CATALOGIA_EXPORT_QUERIES, TAttrName::UploadTime, Now().ToStringLocalUpToSeconds());

    tx->Commit();

    return 0;
}

} //namespace NCatalogia
} //namespace NWebmaster
