#include <util/stream/file.h>
#include <util/generic/deque.h>
#include <util/string/join.h>
#include <util/string/printf.h>
#include <util/string/subst.h>
#include <util/system/user.h>

#include <library/cpp/getopt/last_getopt.h>
#include <mapreduce/yt/interface/client.h>

#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>

#include <wmconsole/version3/searchqueries-mr/batch_matcher.cpp>
#include <wmconsole/version3/searchqueries-mr/tools/sqgrep_dbg/querystats.pb.h>

namespace NWebmaster {

struct TQueryStats {
    TQueryStats()
        : Queries(0)
        , Shows(0)
        , Clicks(0)
    {
    }

    TQueryStats(const proto::QueryStats &msg)
        : Queries(msg.queries())
        , Shows(msg.shows())
        , Clicks(msg.clicks())
        , Regions(msg.regions().begin(), msg.regions().end())
    {
    }

    void Add(const proto::queries2::QueryMessage &msg) {
        for (int i = 0; i < msg.reports_by_region_size(); i++) {
            const proto::queries2::QueryRegionInfo &regionInfo = msg.reports_by_region(i);

            Regions.insert(regionInfo.region_id());

            for (int e = 0; e < regionInfo.position_info_size(); e++) {
                Shows += regionInfo.position_info(e).shows_count();
                Clicks += regionInfo.position_info(e).clicks_count();
            }
        }

        Queries++;
    }

    TQueryStats& operator+=(const TQueryStats &rhs) {
        Queries += rhs.Queries;
        Shows += rhs.Shows;
        Clicks += rhs.Clicks;
        Regions.insert(rhs.Regions.begin(), rhs.Regions.end());
        return *this;
    }

    TQueryStats& operator+=(const proto::QueryStats &rhs) {
        Queries += rhs.queries();
        Shows += rhs.shows();
        Clicks += rhs.clicks();
        Regions.insert(rhs.regions().begin(), rhs.regions().end());
        return *this;
    }

    void Save(proto::QueryStats &msg) {
        msg.set_queries(Queries);
        msg.set_shows(Shows);
        msg.set_clicks(Clicks);
        msg.clear_regions();
        for (int region : Regions) {
            msg.add_regions(region);
        }
    }

public:
    size_t Queries;
    size_t Shows;
    size_t Clicks;
    TSet<int> Regions;
};

struct TMapQueryExtractor : public NYT::IMapper<NYT::TTableReader<NYT::TYaMRRow>, NYT::TTableWriter<NYT::TYaMRRow>> {
    Y_SAVELOAD_JOB(Queries, Host)

public:
    TMapQueryExtractor() = default;
    TMapQueryExtractor(const TString &host, const TDeque<TString> &queries)
        : Host(host)
        , Queries(queries)
    {
    }

    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            const NYT::TYaMRRow &row = input->GetRow();

            if (Host != "*" && TString{row.Key} != Host) {
                return;
            }

            proto::queries2::QueryMessage msg;
            Y_PROTOBUF_SUPPRESS_NODISCARD msg.ParseFromArray(row.Value.data(), row.Value.length());

            if (!Queries.empty() && !Matcher->Matches(msg.query())) {
                return;
            }

            TQueryStats stat;
            stat.Add(msg);

            proto::QueryStats dstMsg;
            stat.Save(dstMsg);
            TString stream;
            Y_PROTOBUF_SUPPRESS_NODISCARD dstMsg.SerializeToString(&stream);
            NYT::TYaMRRow dstRow;
            dstRow.Key = msg.query();
            dstRow.Value = stream;
            output->AddRow(dstRow);
        }
    }

    void Start(TWriter* /*writer*/) override {
        Matcher.Reset(new TBatchMatcher(Queries));
    }

public:
    TString Host;
    TDeque<TString> Queries;
    //THashMap<TString, TQueryStats>
    THolder<TBatchMatcher> Matcher;
};

REGISTER_MAPPER(TMapQueryExtractor)

struct TReduceCalcSum : public NYT::IReducer<NYT::TTableReader<NYT::TYaMRRow>, NYT::TTableWriter<NYT::TYaMRRow>> {
    void Do(TReader *input, TWriter *output) override {
        TQueryStats stat;
        const TString key = TString{input->GetRow().Key};
        for (; input->IsValid(); input->Next()) {
            const NYT::TYaMRRow &row = input->GetRow();
            proto::QueryStats msg;
            Y_PROTOBUF_SUPPRESS_NODISCARD msg.ParseFromArray(row.Value.data(), row.Value.length());
            stat += msg;
        }

        TDeque<TString> regions;
        for (int region : stat.Regions) {
            regions.push_back(ToString(region));
        }

        const TString report = Sprintf("queries:%zu shows:%zu clicks:%zu regions:[%s]", stat.Queries, stat.Shows, stat.Clicks, JoinSeq(" ", regions).data());

        NYT::TYaMRRow dstRow;
        dstRow.Key = key;
        dstRow.Value = report;
        output->AddRow(dstRow);
    }
};

REGISTER_REDUCER(TReduceCalcSum)

} //namespace NWebmaster

int main(int argc, const char **argv) {
    using namespace NWebmaster;

    NYT::Initialize(argc, argv);

    NLastGetopt::TOpts opts = NLastGetopt::TOpts::Default();

    TString mrServer;
    TString queriesPrefix;
    TString queriesFile;
    TString queryText;
    TString host;
    TString output = "tmp/webmaster/sqgrep/" + GetUsername() + "/report_" + ToString(Now().Seconds());

    opts
        .AddLongOption('S', "server", "MR server")
        .StoreResult(&mrServer)
        .DefaultValue("banach.yt.yandex.net");

    opts
        .AddLongOption('p', "prefix", "converted_v2 queries tables prefix")
        .StoreResult(&queriesPrefix)
        .DefaultValue("home/webmaster/test/searchqueries/converted_v2/");

    opts
        .AddLongOption('o', "output", "Table with results")
        .StoreResult(&output)
        .DefaultValue(output);

    opts
        .AddLongOption('q', "query-text", "Query text to grep")
        .StoreResult(&queryText);

    opts
        .AddLongOption('Q', "query-file", "File with queries to grep divided by \\n")
        .StoreResult(&queriesFile);

    opts
        .AddLongOption('h', "hostname", "Hostname")
        .Required()
        .StoreResult(&host);

    THolder<NLastGetopt::TOptsParseResult> parsedOpts(new NLastGetopt::TOptsParseResult(&opts, argc, argv));

    TDeque<TString> queries;

    if (!queriesFile.empty()) {
        TUnbufferedFileInput input(queriesFile);
        TString line;

        while (input.ReadLine(line)) {
            if (line.empty()) {
                continue;
            }

            queries.push_back(line);
        }
    }

    if (!queryText.empty()) {
        queries.push_back(queryText);
    }

    if (host.find("://") == TString::npos) {
        host = "http://" + host;
    }

    host.to_lower();
    output += "_" + host;
    SubstGlobal(output, "://", "_");

    Cerr << "Host " << host << Endl;
    Cerr << "Loaded " << queries.size() << " queries" << Endl;

    if (queries.empty()) {
        Cerr << "  All queries will be extracted" << Endl;
    }
/*
    NYT::IClientPtr client = NYT::CreateClient(mrServer);
    NWebmaster::TOpRunner(client)
        .InputByPrefix(queriesPrefix)
        .Output(output)
        .MemoryLimit(MEMORY_LIMIT_10GB)
        .MapReduce(new TMapQueryExtractor(host, queries), new TReduceCalcSum)
        .Sort(output);
*/
}
