#include <util/generic/deque.h>
#include <util/generic/set.h>
#include <util/generic/hash_set.h>
#include <util/string/printf.h>
#include <util/string/subst.h>
#include <util/system/user.h>

#include <kernel/geo/utils.h>
#include <library/cpp/getopt/last_getopt.h>
#include <mapreduce/yt/interface/client.h>

#include <wmconsole/version3/protos/queries2.pb.h>

#include <wmconsole/version3/wmcutil/yt/yt_runner.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>
#include <wmconsole/version3/wmcutil/url.h>

#include <wmconsole/version3/searchqueries-mr/batch_matcher.cpp>

namespace NWebmaster {

static int AgeDays(time_t ts) {
    return (Now() - TInstant::Seconds(ts)).Hours() / 24;
}

struct TMapQueries : public NYT::IMapper<NYT::TTableReader<NYT::TYaMRRow>, NYT::TTableWriter<NYT::TNode>> {
public:
    void Do(TReader *input, TWriter *output) override {
        static TDeque<TString> filters = {
            "<<"
            "|\\&\\&"
            "|\\~\\~"
            "|/\\+"
            "|/-"
            "|title:"
            "|url:"
            "|site:"
            "|inurl:"
            "|host:"
            "|rhost:"
            "|domain:"
            "|mime:"
            "|lang:"
            "|date:"
            "|cat:"
            "|^\\s*!"
        };

        TBatchMatcher junkMatcher(filters);

        for (; input->IsValid(); input->Next()) {
            const TString key = TString{input->GetRow().Key};

            proto::queries2::QueryMessage msg;
            const NYT::TYaMRRow &row = input->GetRow();
            Y_PROTOBUF_SUPPRESS_NODISCARD msg.ParseFromArray(row.Value.data(), row.Value.length());

            if (junkMatcher.Matches(msg.query())) {
                //continue;
            }

            for (int i = 0; i < msg.reports_by_region_size(); i++) {
                const auto &region = msg.reports_by_region(i);

                if (AgeDays(region.timestamp()) > 31) {
                    //continue;
                }

                for (int p = 0; p < region.position_info_size(); p++) {
                    const auto &position = msg.reports_by_region(i).position_info(p);

                    //if (position.clicks_count() > 0) {
                        output->AddRow(NYT::TNode()
                            ("Host", key)
                            ("Url", msg.url())
                            ("Query", msg.query())
                            ("RegionId", region.region_id())
                            ("Position", position.position())
                            ("Shows", position.shows_count())
                            ("Clicks", position.clicks_count())
                            //("Period", static_cast<ui64>(region.timestamp()))
                        );
                    //}
                    //break;
                }
            }
        }
    }
};

REGISTER_MAPPER(TMapQueries)

struct TMapQueriesLQ : public NYT::IMapper<NYT::TTableReader<NYT::TYaMRRow>, NYT::TTableWriter<NYT::TNode>> {
public:
    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            const TString key = TString{input->GetRow().Key};

            proto::queries2::QueryWeightedAggrInfo msg;
            const NYT::TYaMRRow &row = input->GetRow();
            Y_PROTOBUF_SUPPRESS_NODISCARD msg.ParseFromArray(row.Value.data(), row.Value.length());

            for (int i = 0; i < msg.per_group_info_size(); i++) {
                const auto &pgi = msg.per_group_info(i);

                //if (position.clicks_count() > 0) {
                output->AddRow(NYT::TNode()
                    ("Host", key)
                    ("Url", msg.url())
                    ("Query", msg.query())
                    ("Shows", msg.shows_count())
                    ("Clicks", msg.clicks_count())
                    //("Period", static_cast<ui64>(region.timestamp()))

                    ("PGI_Group", pgi.group())
                    ("PGI_ShowsCount", pgi.shows_count())
                    ("PGI_ClicksCount", pgi.clicks_count())
                    ("PGI_ShowsPositionProduct", pgi.shows_position_product())
                    ("PGI_ClicksPositionProduct", pgi.clicks_position_product())
                );
                //}
                //break;
            }
        }
    }
};

REGISTER_MAPPER(TMapQueriesLQ)

} //namespace NWebmaster

NYT::TRichYPath DebugPath(const TString &table) {
    NYT::TRichYPath path(table);
    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("http://lesnoymarket.ru"))));
    return path;
}

int main(int argc, const char **argv) {
    using namespace NWebmaster;
    NYT::Initialize(argc, argv);
    NYTUtils::DisableLogger();

    TString output = "//tmp/webmaster/extracted_queries/" + GetUsername() + "/report_" + ToString(Now().Seconds());
    TString input = "//home/webmaster/prod/searchqueries/reports_v3/last_queries";
    TString mrServer = "hahn.yt.yandex.net";

    NLastGetopt::TOpts opts = NLastGetopt::TOpts::Default();

    opts
        .AddLongOption('s', "server", "MR server")
        .StoreResult(&mrServer)
        .DefaultValue(mrServer);

    opts
        .AddLongOption('i', "input", "Table with results")
        .StoreResult(&input)
        .DefaultValue(input);

    opts
        .AddLongOption('o', "output", "Table with results")
        .StoreResult(&output)
        .DefaultValue(output);

    THolder<NLastGetopt::TOptsParseResult> parsedOpts(new NLastGetopt::TOptsParseResult(&opts, argc, argv));

    NYT::IClientPtr client = NYT::CreateClient(mrServer);

    NYT::TTableSchema tableSchema;
    tableSchema.AddColumn(NYT::TColumnSchema().Name("Host").Type(NYT::VT_STRING));
    tableSchema.AddColumn(NYT::TColumnSchema().Name("Url").Type(NYT::VT_STRING));
    tableSchema.AddColumn(NYT::TColumnSchema().Name("Query").Type(NYT::VT_STRING));
    tableSchema.AddColumn(NYT::TColumnSchema().Name("RegionId").Type(NYT::VT_UINT64));
    tableSchema.AddColumn(NYT::TColumnSchema().Name("Position").Type(NYT::VT_UINT64));
    tableSchema.AddColumn(NYT::TColumnSchema().Name("Shows").Type(NYT::VT_UINT64));
    tableSchema.AddColumn(NYT::TColumnSchema().Name("Clicks").Type(NYT::VT_UINT64));
    //tableSchema.AddColumn(NYT::TColumnSchema().Name("Period").Type(NYT::VT_UINT64));

    tableSchema.AddColumn(NYT::TColumnSchema().Name("PGI_Group").Type(NYT::VT_INT64));
    tableSchema.AddColumn(NYT::TColumnSchema().Name("PGI_ShowsCount").Type(NYT::VT_UINT64));
    tableSchema.AddColumn(NYT::TColumnSchema().Name("PGI_ClicksCount").Type(NYT::VT_UINT64));
    tableSchema.AddColumn(NYT::TColumnSchema().Name("PGI_ShowsPositionProduct").Type(NYT::VT_UINT64));
    tableSchema.AddColumn(NYT::TColumnSchema().Name("PGI_ClicksPositionProduct").Type(NYT::VT_UINT64));

    tableSchema.Strict(true);

    NWebmaster::TOpRunner(client)
        .InputYaMR(DebugPath(input))
        .OutputNode(NYT::TRichYPath(output).Schema(tableSchema))
        .MemoryLimit(MEMORY_LIMIT_1GB)
        .Map(new TMapQueriesLQ)
        //.SortBy("Host", "Query")
        .SortBy("Query", "RegionId")
        .Sort(output)
    ;
}
