#include <util/generic/size_literals.h>
#include <util/thread/pool.h>

#include <mapreduce/yt/interface/client.h>

#include <wmconsole/version3/processors/user_sessions/conf/config.h>
#include <wmconsole/version3/processors/user_sessions/library/batch_matcher.h>
#include <wmconsole/version3/processors/user_sessions/library/query_utils.h>
#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/protos/querygroups.pb.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>

#include "composite_filter.h"
#include "task_groups.h"

namespace NWebmaster {
namespace NReports {

namespace {
const char *F_GROUP_ID              = "group_id";
const char *F_QUERY                 = "query";
const char *F_YAMR_KEY              = "key";
const char *F_YAMR_SUBKEY           = "subkey";
const char *F_YAMR_VALUE            = "value";
const char *GROUP_ID_ALL_QUERIES    = "13816710-1dd2-11b2-8080-808080808080";
const char *GROUP_ID_FAVORITES      = "1381b530-1dd2-11b2-8080-808080808080";
}

struct TTextFilter {
    typedef TSimpleSharedPtr<TTextFilter> Ptr;

public:
    TTextFilter(const NWebmaster::proto::querygroups::HostGroupInfo &groupInfo, bool full)
        : Matcher(groupInfo)
    {
        if (groupInfo.has_filter_expression()) {
            if (full) {
                Filter = BuildFullFilter(groupInfo.filter_expression());
            } else {
                Filter = BuildTextFilter(groupInfo.filter_expression());
            }
        }
    }

    bool Accepted(const TData &data) {
        return Matcher.Matches(data.GetQuery()) || (Filter && Filter->Accepted(data));
    }

    bool HasFloatFilter() const {
        return Filter && Filter->HasFloatConstraints();
    }

public:
    TBatchMatcher Matcher;
    TFilter::Ptr Filter;
};

struct TReduceGroupsStage1 : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    //ReduceBy: Host (key)
    void Do(TReader *input, TWriter *output) override {
        using namespace NWebmaster::proto::queries2;
        using namespace NWebmaster::proto::querygroups;

        const int TABLENO_GROUP_SETTINGS    = 0;
        const int TABLENO_QUERIES           = 1;

        const THashSet<TString> IGNORE_HOSTS = {
            "http://yandex.ru",
            "https://yandex.ru",
        };

        THashMap<TString, TTextFilter::Ptr> groupIdFilters;

        const TString host = input->GetRow()[F_YAMR_KEY].AsString();
        bool hostHaveGroups = false;

        if (IGNORE_HOSTS.contains(host)) {
            return;
        }

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            const int tableNo = input->GetTableIndex();

            if (tableNo == TABLENO_QUERIES) { //query
                if (!hostHaveGroups) {
                    return;
                }

                QueryMessage queryTmp, query;
                Y_PROTOBUF_SUPPRESS_NODISCARD queryTmp.ParseFromString(row[F_YAMR_VALUE].AsString());
                queryTmp.clear_query(); //will use corrected_query
                if (SplitQueryMessageByPosition(queryTmp, query) == 0) {
                    continue;
                }

                TData data(query);

                THashSet<TString> matchedGroups = {
                    GROUP_ID_ALL_QUERIES, //all queries
                };

                for (const auto &obj : groupIdFilters) {
                    const TString &groupId = obj.first;
                    try {
                        TTextFilter &filter = *obj.second;

                        if (filter.Accepted(data) || filter.HasFloatFilter()) {
                            matchedGroups.insert(groupId);
                        }
                    } catch(yexception &e) {
                        Cerr << "Host: " << host << " " << "GroupId:" << groupId << " " << e.what() << Endl;
                    }
                }

                //if (matchedGroups.empty()) { //fixme
                    //continue;
                //}

                QueryBatchMessage dstMsg;
                *dstMsg.add_queries() = query;
                for (const TString &groupId : matchedGroups) {
                    *dstMsg.add_matched_groups() = groupId;
                }

                TString stream;
                Y_PROTOBUF_SUPPRESS_NODISCARD dstMsg.SerializeToString(&stream);
                output->AddRow(NYT::TNode()
                    (F_YAMR_KEY, host)
                    (F_QUERY, query.corrected_query())
                    (F_YAMR_VALUE, stream)
                    //("Groups", JoinSeq(" ", matchedGroups))
                );
            } else if (tableNo == TABLENO_GROUP_SETTINGS) { //setting
                hostHaveGroups = true;
                const TString &groupId = row[F_YAMR_SUBKEY].AsString();

                //if (groupId != GROUP_ID_FAVORITES) { //fixme
                    //continue;
                //}

                HostGroupInfo groupInfo;
                Y_PROTOBUF_SUPPRESS_NODISCARD groupInfo.ParseFromString(row[F_YAMR_VALUE].AsString());
                try {
                    groupIdFilters[groupId].Reset(new TTextFilter(groupInfo, false));
                } catch(yexception &e) {
                    Cerr << "Host: " << host << " " << "GroupId:" << groupId << " " << e.what() << Endl;
                }
            } else {
                ythrow yexception() << "unknown table passed";
            }
        }
    }
}; //TReduceGroupsStage1;

REGISTER_REDUCER(TReduceGroupsStage1)

struct TReduceGroupsStage2 : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    //ReduceBy: Host (key), Query
    void Do(TReader *input, TWriter *output) override {
        using namespace NWebmaster::proto::queries2;

        const NYT::TNode key = input->GetRow()[F_YAMR_KEY];
        const NYT::TNode query = input->GetRow()[F_QUERY];

        QueryBatchMessage dstMsg;
        THashSet<TString> matchedGroups;
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            QueryBatchMessage batch;
            Y_PROTOBUF_SUPPRESS_NODISCARD batch.ParseFromString(row[F_YAMR_VALUE].AsString());

            for (int i = 0; i < batch.queries_size(); i++) {
                *dstMsg.add_queries() = batch.queries(i);
            }

            for (int i = 0; i < batch.matched_groups_size(); i++) {
                matchedGroups.insert(batch.matched_groups(i));
            }
        }

        for (const TString &groupId : matchedGroups) {
            *dstMsg.add_matched_groups() = groupId;
        }

        try {
            TString stream;
            Y_PROTOBUF_SUPPRESS_NODISCARD dstMsg.SerializeToString(&stream);
            output->AddRow(NYT::TNode()
                (F_YAMR_KEY, key)
                (F_YAMR_VALUE, stream)
                //("Groups", JoinSeq(" ", matchedGroups))
            );
        } catch (yexception &e) {
            Cerr << "Host: " << key.AsString() << " " << "Query:" << query.AsString() << " " << e.what() << Endl;
        }
    }
}; //TReduceGroupsStage2;

REGISTER_REDUCER(TReduceGroupsStage2)

struct TReduceGroupsStage3 : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    //ReduceBy: Host(key)
    void Do(TReader *input, TWriter *output) override {
        const int TABLENO_GROUP_SETTINGS    = 0;
        const int TABLENO_PREPARED_GROUPS   = 1;

        THashMap<TString, TTextFilter::Ptr> groupIdFilters;

        const NYT::TNode host = input->GetRow()[F_YAMR_KEY];
        THashSet<TString> acceptedGroups;

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            const int tableNo = input->GetTableIndex();

            if (tableNo == TABLENO_PREPARED_GROUPS) {
                NWebmaster::proto::queries2::QueryBatchMessage batch;
                Y_PROTOBUF_SUPPRESS_NODISCARD batch.ParseFromString(row[F_YAMR_VALUE].AsString());

                THashSet<TString> matchedGroups;
                for (int i = 0; i < batch.matched_groups_size(); i++) {
                    matchedGroups.insert(batch.matched_groups(i));
                }

                TData data(batch);

                for (const auto &obj : groupIdFilters) {
                    const TString &groupId = obj.first;
                    TTextFilter &filter = *obj.second;

                    if (filter.Filter && filter.Filter->HasTextConstraints() && !matchedGroups.contains(groupId)) { //something strange happend
                        continue;
                    }

                    try {
                        if (filter.Accepted(data)) {
                            output->AddRow(NYT::TNode()
                                (F_YAMR_KEY, host)
                                (F_GROUP_ID, groupId)
                                (F_YAMR_VALUE, row[F_YAMR_VALUE])
                            );
                            acceptedGroups.insert(groupId);
                        }
                    } catch (yexception &e) {
                        Cerr << "Host: " << host.AsString() << " " << "GroupId:" << GROUP_ID_FAVORITES << " " << e.what() << Endl;
                    }
                }

                output->AddRow(NYT::TNode() //default group
                    (F_YAMR_KEY, host)
                    (F_GROUP_ID, GROUP_ID_ALL_QUERIES)
                    (F_YAMR_VALUE, row[F_YAMR_VALUE])
                );
            } else if (tableNo == TABLENO_GROUP_SETTINGS) {
                const TString &groupId = row[F_YAMR_SUBKEY].AsString();
                try {
                    NWebmaster::proto::querygroups::HostGroupInfo groupInfo;
                    Y_PROTOBUF_SUPPRESS_NODISCARD groupInfo.ParseFromString(row[F_YAMR_VALUE].AsString());

                    if (groupId == GROUP_ID_ALL_QUERIES) { //skip default group filter creation
                        continue;
                    } else {
                        groupIdFilters[groupId].Reset(new TTextFilter(groupInfo, true));
                    }
                } catch(yexception &e) {
                    Cerr << "Host: " << host.AsString() << " " << "GroupId:" << groupId << " " << e.what() << Endl;
                }
            } else {
                ythrow yexception() << "unknown table passed";
            }
        }

        for (const auto& groupObj : groupIdFilters) {
            if (!acceptedGroups.contains(groupObj.first)) {
                output->AddRow(NYT::TNode() //fake group
                    (F_YAMR_KEY, host)
                    (F_GROUP_ID, groupObj.first)
                    (F_YAMR_VALUE, "")
                );
            }
        }
    }
}; //TReduceGroupsStage3

REGISTER_REDUCER(TReduceGroupsStage3)

inline void SumQueryRegionReports(proto::queries2::QueryPositionInfo &dst, const proto::queries2::QueryPositionInfo &src) {
    dst.set_shows_count(dst.shows_count() + src.shows_count());
    dst.set_clicks_count(dst.clicks_count() + src.clicks_count());
    dst.set_merged_count(dst.merged_count() + 1);
}

struct TReduceGroupsStage4 : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    //ReduceBy: Host(key), GroupId
    void Do(TReader *input, TWriter *output) override {
        using namespace NWebmaster::proto::queries2;

        const NYT::TNode host = input->GetRow()[F_YAMR_KEY];
        const TString groupId = input->GetRow()[F_GROUP_ID].AsString();

        struct TRegionInfo {
            THashMap<ui32, QueryPositionInfo> PositionMap;
            long ShownSerps = 0;
        };

        THashMap<std::pair<bool, bool>, THashMap<ui32, TRegionInfo>> msgRegions;

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            QueryBatchMessage batch;
            Y_PROTOBUF_SUPPRESS_NODISCARD batch.ParseFromString(row[F_YAMR_VALUE].AsString());

            for (int e = 0; e < batch.queries_size(); e++) {
                for (int i = 0; i < batch.queries(e).reports_by_region_size(); i++) {
                    const QueryRegionInfo &qr = batch.queries(e).reports_by_region(i);

                    TRegionInfo &regionInfo = msgRegions[std::make_pair(qr.is_mobile(), qr.is_pad())][qr.region_id()];
                    regionInfo.ShownSerps += qr.shown_serps();
                    THashMap<ui32, QueryPositionInfo> &positionMap = regionInfo.PositionMap;
                    for (int p = 0; p < qr.position_info_size(); p++) {
                        const QueryPositionInfo &qrp = qr.position_info(p);
                        SumQueryRegionReports(positionMap[qrp.position()], qrp);
                    }
                }
            }
        }

        QueryMessage msgDst;
        for (const auto &mobileObj : msgRegions) {
            const bool isMobile = mobileObj.first.first;
            const bool isPad = mobileObj.first.second;
            for (const auto &regionObj : mobileObj.second) {
                QueryRegionInfo *qr = msgDst.add_reports_by_region();
                qr->set_region_id(regionObj.first);
                qr->set_is_mobile(isMobile);
                qr->set_is_pad(isPad);
                qr->set_shown_serps(regionObj.second.ShownSerps);
                for (const auto &position : regionObj.second.PositionMap) {
                    QueryPositionInfo *qrp = qr->add_position_info();
                    *qrp = position.second;
                    qrp->set_position(position.first);
                }
            }
        }

        TString stream;
        Y_PROTOBUF_SUPPRESS_NODISCARD msgDst.SerializeToString(&stream);
        output->AddRow(NYT::TNode()
            (F_YAMR_KEY, host)
            (F_YAMR_SUBKEY, groupId)
            (F_YAMR_VALUE, stream)
        );
    }
}; //TReduceGroupsStage4

REGISTER_REDUCER(TReduceGroupsStage4)

static NYT::TRichYPath DebugPath(const TString &table) {
    NYT::TRichYPath path(table);
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://lenta.ru"))));
    return path;
}

//https://wiki.yandex-team.ru/jandekspoisk/interfejjsy/centrvebmastera/poiskovye-zaprosy/filters/
int TaskQueryGroups(int, const char **) {
    const auto &cfg = TConfig::CInstance();

    NYT::IClientPtr client = NYT::CreateClient(TCommonYTConfigSQ::CInstance().MR_SERVER_HOST_USER_SESSIONS);
    NYTUtils::CreatePath(client, cfg.TABLE_REPORTS_GROUPS_ROOT);
    NYTUtils::CreatePath(client, NYTUtils::JoinPath(cfg.TABLE_TEMP_ROOT, "groups"));

    TDeque<NYTUtils::TTableInfo> sourceTables;
    NYTUtils::GetTableList(client, cfg.TABLE_REPORTS_GROUPS_ROOT, sourceTables);

    TSet<TString> processed;
    for (const NYTUtils::TTableInfo &table : sourceTables) {
        const TString tableName = NYTUtils::GetTableName(table.Name);
        processed.insert(tableName);
    }

    NYTUtils::GetTableList(client, cfg.TABLE_SOURCE_QUERIES_CONVERTED_PREFIX, sourceTables, 14);
    std::sort(sourceTables.begin(), sourceTables.end(), [](const NYTUtils::TTableInfo &lhs, const NYTUtils::TTableInfo &rhs) -> bool {
        return lhs.Name > rhs.Name;
    });

    TVector<TString> unprocessedTables;
    for (const NYTUtils::TTableInfo &table : sourceTables) {
        const TString tableName = NYTUtils::GetTableName(table.Name);
        if (!processed.contains(tableName)) {
            unprocessedTables.push_back(table.Name);
        }
    }

    if (unprocessedTables.empty()) {
        LOG_INFO("groups, there are no unprocessed tables");
        return 0;
    }

    const TString TABLE_SOURCE_GROUPS_SETTINGS_SORTED = NYTUtils::JoinPath(cfg.TABLE_TEMP_ROOT, "query_groups." + ToString(Now().MicroSeconds()));
    TOpRunner(client)
        .SortBy(F_YAMR_KEY, F_YAMR_SUBKEY)
        .Sort(cfg.TABLE_SOURCE_GROUPS_SETTINGS, TABLE_SOURCE_GROUPS_SETTINGS_SORTED)
    ;

    bool valid = true;
    THolder<IThreadPool> processQueue(CreateThreadPool(8));
    for (const TString &tablePath : unprocessedTables) {
        const TString tableName = NYTUtils::GetTableName(tablePath);
        processQueue->SafeAddFunc([=, &client, &cfg, &valid]() {
            try {
                NYT::ITransactionPtr tx = client->StartTransaction();
                const TString groupsPath = NYTUtils::JoinPath(cfg.TABLE_REPORTS_GROUPS_ROOT, tableName);
                const TString groupsPathTmp = NYTUtils::JoinPath(cfg.TABLE_TEMP_ROOT, "groups", tableName);

                TOpRunner(tx)
                    .Comment("calc queries groups stage1")
                    .InputNode(DebugPath(TABLE_SOURCE_GROUPS_SETTINGS_SORTED).Foreign(true))
                    .InputNode(DebugPath(tablePath).Primary(true))
                    .OutputNode(groupsPathTmp)
                    .MaxRowWeight(128_MBs)
                    .MemoryLimit(10_GBs)
                    .JoinBy(F_YAMR_KEY)
                    .JoinReduce(new TReduceGroupsStage1)

                    .Comment("calc queries groups stage2")
                    .SortBy(F_YAMR_KEY, F_QUERY)
                    .Sort(groupsPathTmp)
                    .InputNode(groupsPathTmp)
                    .OutputNode(NYT::TRichYPath(groupsPathTmp).SortedBy(NYT::TSortColumns(F_YAMR_KEY)))
                    .MaxRowWeight(128_MBs)
                    .MemoryLimit(10_GBs)
                    .ReduceBy(F_YAMR_KEY, F_QUERY)
                    .Reduce(new TReduceGroupsStage2)

                    .Comment("calc queries groups stage3")
                    .InputNode(DebugPath(TABLE_SOURCE_GROUPS_SETTINGS_SORTED))
                    .InputNode(groupsPathTmp)
                    .OutputNode(groupsPathTmp)
                    .MaxRowWeight(128_MBs)
                    .MemoryLimit(10_GBs)
                    .ReduceBy(F_YAMR_KEY)
                    .Reduce(new TReduceGroupsStage3)

                    .Comment("calc queries groups stage4")
                    .SortBy(F_YAMR_KEY, F_GROUP_ID)
                    .Sort(groupsPathTmp)
                    .InputNode(groupsPathTmp)
                    .OutputNode(groupsPathTmp)
                    .MaxRowWeight(128_MBs)
                    .MemoryLimit(10_GBs)
                    .ReduceBy(F_YAMR_KEY, F_GROUP_ID)
                    .Reduce(new TReduceGroupsStage4)

                    .MaxRowWeight(128_MBs)
                    .SortBy(F_YAMR_KEY, F_YAMR_SUBKEY)
                    .Sort(groupsPathTmp, groupsPath)
                ;

                tx->Commit();
            } catch (yexception &e) {
                LOG_ERROR("groups, table %s process error: %s", tablePath.c_str(), e.what());
                valid = false;
            }
        });
    }

    if (!valid) {
        ythrow yexception() << "something went wrong";
    }

    processQueue->Stop();
    client->Remove(TABLE_SOURCE_GROUPS_SETTINGS_SORTED);

    return 0;
}

} //namespace NReports
} //namespace NWebmaster
