#include <util/draft/date.h>
#include <library/cpp/getopt/last_getopt.h>
#include <library/cpp/string_utils/url/url.h>


#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/table.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/processors/acceptance/conf/config.h>
#include <wmconsole/version3/protos/sitemap.pb.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/monitor.h>


#include "task_sitemap.h"

namespace NWebmaster {
namespace NAcceptance {

using namespace NJupiter;

static const char *HOST_DATA_TABLES_PREFIX = "HostDataToWebmasterExport_";
static const char *SITEMAP_DATA_TABLES_PREFIX = "SitemapDataToWebmasterExport_";


static const char *F_KEY = "Key";

static const char *F_MAP_PREFIX = "Map_";

static const char *F_HOSTS_WITHOUT_SITEMAPS = "HostsWithoutSitemaps";
static const char *F_SITEMAP_COUNT = "SitemapCount";
static const char *F_SOURCE_ID_0 = "SourceId0";
static const char *F_SOURCE_ID_1 = "SourceId1";
static const char *F_SOURCE_ID_2 = "SourceId2";
static const char *F_SOURCE_ID_3 = "SourceId3";
static const char *F_SOURCE_ID_4 = "SourceId4";
const TVector<TString> HOST_DATA_FIELDS{F_HOSTS_WITHOUT_SITEMAPS, F_SITEMAP_COUNT, F_SOURCE_ID_0, F_SOURCE_ID_1,
                                               F_SOURCE_ID_2, F_SOURCE_ID_3, F_SOURCE_ID_4};


static const char *F_KEY_WITHOUT_SITEMAPS = "KeyWithoutSitemaps";
static const char *F_SITEMAP_NO_ERRORS = "SitemapNoErrors";
static const char *F_SITEMAP_NO_URLS = "SitemapNoUrls";
static const char *F_URLS = "Urls";
static const char *F_SITEMAP_PARSED = "SitemapParsed";
static const char *F_SITEMAP_REDIRECTED = "SitemapRedirected";
static const char *F_SITEMAP_NO_SITEMAP_URL = "SitemapNoSitemapUrl";
static const char *F_SITEMAP_SITEMAP_URL_EQ_KEY = "SitemapSitemapUrlEqKey";

const TVector<TString> SITEMAP_DATA_FIELDS{
        F_SITEMAP_COUNT, F_SITEMAP_NO_ERRORS,
        F_SITEMAP_NO_URLS, F_KEY_WITHOUT_SITEMAPS,
        F_URLS, F_SITEMAP_PARSED,
        F_SITEMAP_REDIRECTED, F_SITEMAP_NO_SITEMAP_URL,
        F_SITEMAP_SITEMAP_URL_EQ_KEY, "HttpCode200",
        "HttpCode300", "HttpCode400",
        "HttpCode500", "HttpCode1000",
        "HttpCode2000", "HttpCode0",
        "Type0", "Type1",
        "Type2", "LastAccess0",
        "LastAccess1", "LastAccess2",
        "LastAccess3", "LastAccess4",
        "LastAccess5", "LastAccess6",
        "LastAccess7", "Map_SourceId"
};

static const i64 ONE_DAY_SECONDS = 60 * 60 * 24;

enum {
    ONE_DAY = 0,
    TWO_DAY = 1,
    THREE_DAY = 2,
    FOUR_DAY = 3,
    FIVE_DAY = 4,
    SIX_DAY = 5,
    SEVEN_DAY = 6,
    OTHER = 7,
};

struct THostDataAcceptanceMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    Y_SAVELOAD_JOB(TableTs)

    THostDataAcceptanceMapper() = default;

    THostDataAcceptanceMapper(const long &tableTs)
            : TableTs(tableTs) {
    }

    void Do(TReader *input, TWriter *output) override {
        int countHostsWithoutSitemaps = 0;
        int countSitemaps = 0;
        int countSourceId0 = 0;
        int countSourceId1 = 0;
        int countSourceId2 = 0;
        int countSourceId3 = 0;
        int countSourceId4 = 0;
        for (const auto &cursor : *input) {

            proto::sitemap::HostRelatedSitemapMessage importMsg;
            Y_PROTOBUF_SUPPRESS_NODISCARD importMsg.ParseFromString(cursor.GetRow()["value"].AsString());
            int cntSitemaps = importMsg.Getrelated_sitemaps().size();
            if (cntSitemaps > 0) {
                countSitemaps += cntSitemaps;
                for (const auto &info: importMsg.Getrelated_sitemaps()) {
                    switch (info.Getsource_id()) {
                        case proto::sitemap::SitemapSourceId::ROBOTS_SOURCE_ID:
                            countSourceId0++;
                            break;
                        case proto::sitemap::SitemapSourceId::WMCLOG_SOURCE_ID:
                            countSourceId1++;
                            break;
                        case proto::sitemap::SitemapSourceId::SITEMAPINDEX_SOURCE_ID:
                            countSourceId2++;
                            break;
                        case proto::sitemap::SitemapSourceId::UNKNOWN_SOURCE_ID:
                            countSourceId3++;
                            break;
                        case proto::sitemap::SitemapSourceId::WMC_ROBOTS_SOURCE_ID:
                            countSourceId4++;
                            break;
                    }
                }
            } else {
                countHostsWithoutSitemaps++;
            }
        }
        output->AddRow(NYT::TNode()
                               (F_KEY, F_KEY)
                               (F_HOSTS_WITHOUT_SITEMAPS, countHostsWithoutSitemaps)
                               (F_SITEMAP_COUNT, countSitemaps)
                               (F_SOURCE_ID_0, countSourceId0)
                               (F_SOURCE_ID_1, countSourceId1)
                               (F_SOURCE_ID_2, countSourceId2)
                               (F_SOURCE_ID_3, countSourceId3)
                               (F_SOURCE_ID_4, countSourceId4)
        );
    }

public:
    long TableTs = 0;
};

REGISTER_MAPPER(THostDataAcceptanceMapper)

struct TSitemapDataAcceptanceMapper
        : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    Y_SAVELOAD_JOB(TableTs)

    TSitemapDataAcceptanceMapper() = default;

    TSitemapDataAcceptanceMapper(const long &tableTs)
            : TableTs(tableTs) {
    }

    void Do(TReader *input, TWriter *output) override {
        int countKeyWithoutSitemaps = 0;
        int countSitemap = 0;
        int countSitemapNoErrors = 0;
        int countSitemapNoUrls = 0;
        int countUrls = 0;
        int countSitemapParsed = 0;
        int countSitemapRedirected = 0;
        int countSitemapNoSitemapUrl = 0;
        int countSitemapSitemapUrlEqKey = 0;

        THashMap<int, int> countHttpCodes;
        THashMap<int, int> countSourceIds;
        THashMap<int, int> countTypes;
        THashMap<int, int> countLastAccesses;

        for (const auto &cursor: *input) {
            proto::sitemap::LemurSitemapExportMessage importMsg;
            NYT::TNode row = cursor.GetRow();
            TString key = row["key"].AsString();
            Y_PROTOBUF_SUPPRESS_NODISCARD importMsg.ParseFromString(row["value"].AsString());

            if (importMsg.Getsitemaps().empty()) {
                countKeyWithoutSitemaps++;
            } else {
                countSitemap += importMsg.Getsitemaps().size();
                for (const auto &sitemap: importMsg.Getsitemaps()) {
                    if (sitemap.Geterror_count() == 0) {
                        countSitemapNoErrors++;
                    }
                    if (sitemap.Geturl_count() == 0) {
                        countSitemapNoUrls++;
                    }
                    countUrls += sitemap.Geturl_count();
                    countSitemapParsed += sitemap.Getis_sitemap_parsed() ? 1 : 0;
                    countSitemapRedirected += sitemap.Getredir_target().empty() ? 0 : 1;
                    countSitemapNoSitemapUrl += sitemap.Getsitemap_url().empty() ? 1 : 0;
                    countSitemapSitemapUrlEqKey += sitemap.Getsitemap_url() == key ? 1 : 0;

                    const int httpCode = sitemap.Gethttp_code() / 100;
                    switch (httpCode) {
                        case 2:
                        case 3:
                        case 4:
                        case 5:
                        case 10:
                        case 20:
                            countHttpCodes[httpCode]++;
                            break;
                        default:
                            countHttpCodes[0]++;
                    }
                    countSourceIds[sitemap.Getsource_id()]++;
                    countTypes[sitemap.Gettype()]++;
                    int diff = TableTs - sitemap.Getlast_access();
                    if (diff < ONE_DAY_SECONDS) {
                        countLastAccesses[ONE_DAY]++;
                    } else if (diff < 2 * ONE_DAY_SECONDS) {
                        countLastAccesses[TWO_DAY]++;
                    } else if (diff < 3 * ONE_DAY_SECONDS) {
                        countLastAccesses[THREE_DAY]++;
                    } else if (diff < 4 * ONE_DAY_SECONDS) {
                        countLastAccesses[FOUR_DAY]++;
                    } else if (diff < 5 * ONE_DAY_SECONDS) {
                        countLastAccesses[FIVE_DAY]++;
                    } else if (diff < 6 * ONE_DAY_SECONDS) {
                        countLastAccesses[SIX_DAY]++;
                    } else if (diff < 7 * ONE_DAY_SECONDS) {
                        countLastAccesses[SEVEN_DAY]++;
                    } else {
                        countLastAccesses[OTHER]++;
                    }
                }
            }
        }
        NYT::TNode node = NYT::TNode()
                (F_KEY, F_KEY)
                (F_SITEMAP_COUNT, countSitemap)
                (F_SITEMAP_NO_ERRORS, countSitemapNoErrors)
                (F_SITEMAP_NO_URLS, countSitemapNoUrls)
                (F_KEY_WITHOUT_SITEMAPS, countKeyWithoutSitemaps)
                (F_URLS, countUrls)
                (F_SITEMAP_PARSED, countSitemapParsed)
                (F_SITEMAP_REDIRECTED, countSitemapRedirected)
                (F_SITEMAP_NO_SITEMAP_URL, countSitemapNoSitemapUrl)
                (F_SITEMAP_SITEMAP_URL_EQ_KEY, countSitemapSitemapUrlEqKey);

        for (const auto &p: countHttpCodes) {
            node["HttpCode" + ToString(p.first * 100)] = p.second;
        }

        for (const auto &p: countTypes) {
            node["Type" + ToString(p.first)] = p.second;
        }

        for (const auto &p: countLastAccesses) {
            node["LastAccess" + ToString(p.first)] = p.second;
        }

        NYT::TNode tmpNode;

        for (const auto &p: countSourceIds) {
            tmpNode[ToString(p.first)] = p.second;
        }

        node["Map_SourceId"] = tmpNode;


        output->AddRow(node);
    }

public:
    long TableTs = TInstant::Now().Seconds();
};

REGISTER_MAPPER(TSitemapDataAcceptanceMapper)


struct TSitemapAcceptanceReducer
        : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {

public:
    Y_SAVELOAD_JOB(Fields)

    TSitemapAcceptanceReducer() = default;

    TSitemapAcceptanceReducer(const TVector<TString> &fields)
            : Fields(fields) {
    }

    void Do(TReader *input, TWriter *output) override {

        THashMap<TString, int> mp;
        THashMap<TString, THashMap<TString, int>> mapFields;
        for (const auto &cursor : *input) {
            auto row = cursor.GetRow();
            for (const auto &fieldName: Fields) {
                if (fieldName.StartsWith(F_MAP_PREFIX)) {
                    for (const auto &p: row[fieldName].AsMap()) {
                        mapFields[fieldName][p.first] += p.second.AsInt64();
                    }
                } else {
                    if (row.HasKey(fieldName)) {
                        mp[fieldName] += row[fieldName].AsInt64();
                    }
                }
            }
        }


        NYT::TNode node;
        for (const auto &fieldName: Fields) {
            if (fieldName.StartsWith(F_MAP_PREFIX)) {
                continue;
            }
            node[fieldName] = mp[fieldName];
        }

        for (const auto &p: mapFields) {
            NYT::TNode tmpNode;
            for (const auto &innerP: p.second) {
                tmpNode[innerP.first] = innerP.second;
            }
            node[p.first] = tmpNode;
        }

        output->AddRow(node);
    }

public:
    TVector<TString> Fields;
    TVector<TString> MapFields;

};

REGISTER_REDUCER(TSitemapAcceptanceReducer)

NYT::TNode MonitorSitemapsChangesShare(const NYT::IClientBasePtr &client,
                                       const TString &tableName,
                                       const TVector<TString> &fields,
                                       const int &rows,
                                       const TString &prefix) {
    THashMap<TString, int> counters;
    NYT::TNode node;

    auto reader = client->CreateTableReader<NYT::TNode>(tableName);
    const NYT::TNode &row = reader->GetRow();
    for (const auto &fieldName: fields) {
        if (!fieldName.StartsWith(F_MAP_PREFIX)) {
            counters[fieldName] = row[fieldName].AsInt64();
        }
        node[fieldName] = row[fieldName];
    }
    void (*monitoringFunction)(const TString &, double);
    if (prefix == HOST_DATA_TABLES_PREFIX) {
        monitoringFunction = MonitorPushSitemapHostrelatedChangesShare;
    } else if (prefix == SITEMAP_DATA_TABLES_PREFIX) {
        monitoringFunction = MonitorPushSitemapSitemapdataChangesShare;
    }

    for (const auto &p: counters) {
        monitoringFunction(p.first + "_cnt", p.second);
    }

    for (const auto &p: counters) {
        monitoringFunction(p.first + "_mean", static_cast<double>(p.second) / static_cast<double>(rows));
    }

    double cntHostsWithSitemaps = static_cast<double>(rows - counters[F_HOSTS_WITHOUT_SITEMAPS] + 1);
    for (const auto &p: counters) {
        monitoringFunction(p.first + "_mean_by_hosts_with_sitemap",
                                       static_cast<double>(p.second) / static_cast<double>(cntHostsWithSitemaps));
    }

    return node;
}

template<class TMapper>
int ProcessTables(const TString &acceptanceRootTableName,
                  const TString &tablePrefix,
                  const TVector<TString> &fieldsNames) {
    const TConfig &config = TConfig::CInstance();

    NYT::IClientPtr client = NYT::CreateClient(TConfig::CInstance().MR_SERVER_HOST);
    NYT::ITransactionPtr tx = client->StartTransaction();

    const time_t lastProcessedTs = FromString<time_t>(NYTUtils::GetAttrOrDefault<TString>(tx, acceptanceRootTableName,
                                                                                          config.ATTR_LAST_PROCESSED_TABLES,
                                                                                          "0"));

    TString tmpTableName = JoinYtPath(acceptanceRootTableName, "tmp");
    TString statisticsTableName = JoinYtPath(acceptanceRootTableName, "statistics");
    TDeque<NYTUtils::TTableInfo> allTables;
    NYTUtils::GetTableList(tx, config.TABLE_SITEMAP_ROOT, allTables, 1000);
    TVector<TString> hostDataTables;
    time_t maxTableTs = 0;

    TString prefix = JoinYtPath(config.TABLE_SITEMAP_ROOT, tablePrefix);
    for (const auto &table: allTables) {
        if (table.Name.StartsWith(prefix)) {
            const time_t tableTs = FromString<time_t>(table.Name.substr(prefix.size()));

            maxTableTs = std::max(maxTableTs, tableTs);
            if (tableTs > lastProcessedTs) {
                hostDataTables.push_back(table.Name);
            }
        }
    }

    if (hostDataTables.empty()) {
        LOG_ERROR("No new tables for process");
        tx->Abort();
        return 0;
    }

    std::sort(hostDataTables.begin(), hostDataTables.end(), NYTUtils::TTableInfo::TNameLess());
    NYT::TRichYPath statisticsTable(NYT::TRichYPath(statisticsTableName).Append(true));

    NYTUtils::TTableInfo tableInfo;

    int countProcessedTables = 0;
    for (const auto &table: hostDataTables) {
        countProcessedTables++;
        LOG_INFO("Process table %s", table.c_str());
        NYTUtils::GetTableInfo(client, table, tableInfo);

        TMapReduceCmd<TMapper, TSitemapAcceptanceReducer>
                (tx, new TMapper(FromString<long>(table.substr(prefix.size()))),
                 new TSitemapAcceptanceReducer(fieldsNames))
                .template Input<NYT::TNode>(table)
                .template Output<NYT::TNode>(tmpTableName)
                .ReduceBy(F_KEY)
                .Do();
        auto node = MonitorSitemapsChangesShare(tx, tmpTableName, fieldsNames, tableInfo.RecordCount, tablePrefix);


        NYTUtils::SetAttr(tx, table, config.ATTR_ACCEPTED, true);


        node["Timestamp"] = FromString<time_t>(table.substr(prefix.size()));
        node["Rows"] = tableInfo.RecordCount;
        auto statisticsTableWriter = tx->CreateTableWriter<NYT::TNode>(statisticsTable);
        statisticsTableWriter->AddRow(node);
        statisticsTableWriter->Finish();
        if (countProcessedTables % 10 == 0) {
            LOG_INFO("Commit statistics");
            // если много таблиц чтобы не сбросился прогресс
            // тут не выполняем сортировку, но это не критично
            NYTUtils::SetAttr(tx, acceptanceRootTableName, config.ATTR_LAST_PROCESSED_TABLES,
                              ToString(FromString<time_t>(table.substr(prefix.size()))));
            tx->Commit();
            tx = client->StartTransaction();
        }
    }

    TSortCmd<NYT::TNode>(tx)
            .Input<NYT::TNode>(statisticsTableName)
            .Output<NYT::TNode>(statisticsTableName)
            .By("Timestamp")
            .OperationWeight(TConfig::CInstance().OPERATION_WEIGHT)
            .Do();
    NYTUtils::SetAttr(tx, acceptanceRootTableName, config.ATTR_LAST_PROCESSED_TABLES, ToString(maxTableTs));
    tx->Remove(tmpTableName);
    tx->Commit();
    return 0;
}

int AcceptanceSitemap(int argc, const char **argv) {
    const TConfig &config = TConfig::CInstance();

    TString acceptanceMode;

    NLastGetopt::TOpts opts;
    opts.AddLongOption("mode", "mode for choose kind of acceptance")
            .StoreResult(&acceptanceMode)
            .Required();

    const auto parseResult = NLastGetopt::TOptsParseResult(&opts, argc, argv);


    if (acceptanceMode == "sitemapdata") {
        ProcessTables<TSitemapDataAcceptanceMapper>(config.TABLE_ACCEPTANCE_SITEMAP_DATA_WEBMASTER_ROOT,
                                                    SITEMAP_DATA_TABLES_PREFIX,
                                                    SITEMAP_DATA_FIELDS);
    }
    if (acceptanceMode == "hostrelated") {
        ProcessTables<THostDataAcceptanceMapper>(config.TABLE_ACCEPTANCE_HOST_DATA_WEBMASTER_ROOT,
                                                 HOST_DATA_TABLES_PREFIX,
                                                 HOST_DATA_FIELDS);
    }

    return 0;
}

}
} //namespace NWebmaster
