#include <limits>

#include <util/charset/wide.h>
#include <util/string/escape.h>

#include <robot/jupiter/protos/acceptance.pb.h>
#include <robot/library/yt/static/table.h>

#include <wmconsole/version3/processors/indexing/checkurl/conf/config.h>
#include <wmconsole/version3/processors/indexing/important_urls/conf/config.h>
#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/library/jupiter/search_url_status.h>
#include <wmconsole/version3/wmcutil/hostid.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/string.h>
#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>

#include "fields.h"
#include "monitor.h"
#include "schemes.h"
#include "task_history.h"

namespace NWebmaster {
namespace NImportantUrls {

TString ToCanonical(const TString &s) {
    const static TUtf16String SUFFIX = u"...";
    const static size_t MAX_WIDTH = 1024;
    if (s.Empty()) {
        return s;
    }
    const TUtf16String wideString = UTF8ToWide(s);
    TUtf16String result;
    result.reserve(s.size());
    wchar16 curr, prev = 0;
    for (size_t i = 0; i < wideString.size(); i++) {
        curr = wideString[i];
        if (curr > ' ') {
            result.append(curr);
        } else {
            curr = ' ';
            if (prev != ' ') {
                result.append(curr);
            }
        }
        prev = curr;
    }
    if (prev == ' ') {
        result.resize(result.size() - 1);
    }
    if (result.size() >= MAX_WIDTH) {
        result = result.substr(0, MAX_WIDTH - SUFFIX.size()) + SUFFIX;
    }
    return WideToUTF8(result);
}

struct TImportantUrlsCompressReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(BaseDates)

public:
    TImportantUrlsCompressReducer() = default;
    TImportantUrlsCompressReducer(const THashMap<ui64, ui64> &baseDates) :
        BaseDates(baseDates)
    {
    }

    //reduce by Host, Path
    void Do(TReader *input, TWriter *output) override {
        const ui64 UNKNOWN_VALUE = std::numeric_limits<ui64>::max();
        const TString CHANGED_PREFIX_STR = CHANGED_PREFIX;
        const size_t TABLENO_SOURCE_URLS = 0;
        const size_t TABLENO_HISTORY = 0;
        const size_t TABLENO_LAST_STATE = 1;

        // fields for comparison
        ui64 jupiterTimestamp = 0, prevJupiterTimestamp = 0;
        ui64 spreadLastAccess = 0, searchLastAccess = 0, maxSpreadLastAccess = 0, maxSearchLastAccess = 0;
        ui64 prevHttpCode = 0, httpCode = 0, searchHttpCode = 0, prevSearchHttpCode = 0;
        NSearchUrlStatus::ESearchUrlStatus prevUrlStatus = NSearchUrlStatus::NOTHING_FOUND, urlStatus;
        bool first = true;
        bool isSearchable;
        TString prevTitle, title;
        TString prevDescription, description;
        TString prevRelCanonical, relCanonical;

        NYT::TNode prevRow;
        NYT::TNode resultRow;

        if (!input->IsValid() || input->GetTableIndex() != TABLENO_SOURCE_URLS) {
            return; // no source url
        }
        input->Next(); // reading source url

        for (; input->IsValid(); input->Next()) {
            // current values
            NYT::TNode row = input->GetRow();
            // copy spread or search data from prev row, if needed
            if (!prevRow.IsUndefined()) {
                if (row[F_SPREAD_LAST_ACCESS].IsNull() && !prevRow[F_SPREAD_LAST_ACCESS].IsNull()) {
                    // copy spread fields
                    for (const TString &field : SPREAD_FIELDS()) {
                        row[field] = prevRow[field];
                    }
                }
                if (row[F_JUPITER_TIMESTAMP].IsNull() && !prevRow[F_JUPITER_TIMESTAMP].IsNull()) {
                    // copy search fields
                    for (const TString &field : JUPITER_FIELDS()) {
                        row[field] = prevRow[field];
                    }
                    row[F_JUPITER_TIMESTAMP] = prevRow[F_JUPITER_TIMESTAMP];
                }
            }

            jupiterTimestamp = NYTUtils::GetNodeFieldOrDefault<ui64>(row, F_JUPITER_TIMESTAMP, 0);
            searchLastAccess = NYTUtils::GetNodeFieldOrDefault<ui64>(row, F_LAST_ACCESS, 0);
            spreadLastAccess = NYTUtils::GetNodeFieldOrDefault<ui64>(row, F_SPREAD_LAST_ACCESS, 0);

            if (searchLastAccess > spreadLastAccess) {
                spreadLastAccess = searchLastAccess;
                row[F_SPREAD_LAST_ACCESS] = spreadLastAccess;
                row[F_SPREAD_HTTP_CODE] = row[F_HTTP_CODE];
                row[F_SPREAD_MIME_TYPE] = row[F_MIME_TYPE];
            }

            if ((jupiterTimestamp > 0 && searchLastAccess < maxSearchLastAccess) || spreadLastAccess < maxSpreadLastAccess) {
                // inconsistent data - ignoring
                continue;
            }

            maxSpreadLastAccess = std::max(maxSpreadLastAccess, spreadLastAccess);
            maxSearchLastAccess = std::max(maxSearchLastAccess, searchLastAccess);

            httpCode = NYTUtils::GetNodeFieldOrDefault<ui64>(row, F_SPREAD_HTTP_CODE, UNKNOWN_VALUE);
            searchHttpCode = NYTUtils::GetNodeFieldOrDefault<ui64>(row, F_HTTP_CODE, UNKNOWN_VALUE);
            urlStatus = NSearchUrlStatus::RawToView(
                NYTUtils::GetNodeFieldOrDefault<ui64>(row, F_URL_STATUS, UNKNOWN_VALUE),
                NYTUtils::GetNodeFieldOrDefault<bool>(row, F_IS_SEARCHABLE, false));
            try {
                title = NUtils::Abbreviate(ToCanonical(NYTUtils::GetNodeFieldOrDefault<TString>(row, F_TITLE, "")), 1024);
            } catch (yexception &e) {
                Cerr << "TITLE_ERROR: [" << NYTUtils::GetNodeFieldOrDefault<TString>(row, F_TITLE, "") << "] --- " << e.what() << Endl;
            }
            try {
                description = NUtils::Abbreviate(ToCanonical(NYTUtils::GetNodeFieldOrDefault<TString>(row, F_DESCRIPTION, "")), 1024);
            } catch (yexception &e) {
                Cerr << "DESCRIPTION_ERROR: [" << NYTUtils::GetNodeFieldOrDefault<TString>(row, F_DESCRIPTION, "") << "] --- " << e.what() << Endl;
            }
            relCanonical = NYTUtils::GetNodeFieldOrDefault<TString>(row, F_REL_CANONICAL, "");
            isSearchable = NYTUtils::GetNodeFieldOrDefault<bool>(row, F_IS_SEARCHABLE, false);

            if (!isSearchable && title.Empty() && !prevTitle.Empty()) {
                title = prevTitle;
            }
            if (!isSearchable && description.Empty() && !prevDescription.Empty()) {
                description = prevDescription;
            }
            if (!isSearchable && relCanonical.Empty() && !prevRelCanonical.Empty()) {
                relCanonical = prevRelCanonical;
            }
            bool foundSpread = spreadLastAccess > 0;
            bool foundSearch = BaseDates.contains(jupiterTimestamp);

            if (first && (foundSpread || foundSearch)) {
                // first row
                resultRow = NYT::TNode();
                CopyRow(row, resultRow, urlStatus, title, description, relCanonical);
                resultRow[F_TABLE_TIMESTAMP] = row[F_TABLE_TIMESTAMP];
                /*resultRow["Debug"] = NYT::TNode()
                    ("First", true)
                ;*/
                output->AddRow(resultRow, TABLENO_HISTORY);
                first = false;

                prevHttpCode = httpCode;
                prevUrlStatus = urlStatus;
                prevTitle = title;
                prevDescription = description;
                prevRelCanonical = relCanonical;
                prevJupiterTimestamp = jupiterTimestamp;
                prevSearchHttpCode = searchHttpCode;
            } else {
                // spread change
                if (foundSpread) {
                    bool httpCodeChanged = httpCode != prevHttpCode || searchHttpCode != prevSearchHttpCode;
                    if (httpCodeChanged) {
                        resultRow = NYT::TNode();
                        CopyRow(row, resultRow, urlStatus, title, description, relCanonical);
                        // changes
                        resultRow[CHANGED_PREFIX_STR + F_SPREAD_HTTP_CODE] = httpCodeChanged;
                        resultRow[F_TABLE_TIMESTAMP] = row[F_TABLE_TIMESTAMP];
                        /*resultRow["Debug"] = NYT::TNode()
                            ("HttpCode", httpCode)
                            ("PrevHttpCode", prevHttpCode)
                        ;*/
                        output->AddRow(resultRow, TABLENO_HISTORY);
                    }
                    prevHttpCode = httpCode;
                    prevSearchHttpCode = searchHttpCode;
                }
                // jupiter change
                if (foundSearch && jupiterTimestamp != prevJupiterTimestamp) {
                    ui64 switchDate = BaseDates[jupiterTimestamp];
                    bool urlStatusChanged = urlStatus != prevUrlStatus;
                    bool titleChanged = title != prevTitle;
                    bool descriptionChanged = description != prevDescription; // && jupiterTimestamp > DESCRIPTION_THRESHOLD_TS;
                    bool relCanonicalChanged = relCanonical != prevRelCanonical; // && jupiterTimestamp > DESCRIPTION_THRESHOLD_TS;
                    bool httpCodeChanged = searchHttpCode != prevSearchHttpCode;

                    // save row if anything is changed
                    if (urlStatusChanged || titleChanged || descriptionChanged || relCanonicalChanged || httpCodeChanged) {
                        resultRow = NYT::TNode();
                        CopyRow(row, resultRow, urlStatus, title, description, relCanonical);
                        // changes
                        resultRow[CHANGED_PREFIX_STR + F_URL_STATUS] = urlStatusChanged;
                        resultRow[CHANGED_PREFIX_STR + F_TITLE] = titleChanged;
                        resultRow[CHANGED_PREFIX_STR + F_DESCRIPTION] = descriptionChanged;
                        resultRow[CHANGED_PREFIX_STR + F_REL_CANONICAL] = relCanonicalChanged;
                        // update time
                        // for indexing changes its TableTimestamp, otherwise - BaseSwitchDate
                        resultRow[F_TABLE_TIMESTAMP] = switchDate;

                        /*resultRow["Debug"] = NYT::TNode()
                            ("UrlStatus", ToString(urlStatus))
                            ("PrevUrlStatus", ToString(prevUrlStatus))
                            ("Title", title)
                            ("PrevTitle", prevTitle)
                            ("Description", description)
                            ("PrevDescription", prevDescription)
                            ("RelCanonical", relCanonical)
                            ("PrevRelCanonical", prevRelCanonical)
                            ("JupiterTimestamp", jupiterTimestamp)
                            ("PrevJupiterTimestamp", prevJupiterTimestamp)
                        ;*/

                        output->AddRow(resultRow, TABLENO_HISTORY);
                    }
                    prevUrlStatus = urlStatus;
                    prevTitle = title;
                    prevDescription = description;
                    prevRelCanonical = relCanonical;
                    prevJupiterTimestamp = jupiterTimestamp;
                    prevSearchHttpCode = searchHttpCode;
                }
            }
            if (foundSpread || foundSearch) {
                prevRow = std::move(row);
            }
        }

        // last row
        if (!resultRow.IsUndefined()) {
            if (maxSearchLastAccess != 0) {
                resultRow[F_LAST_ACCESS] = maxSearchLastAccess;
            }
            if (maxSpreadLastAccess != 0) {
                resultRow[F_SPREAD_LAST_ACCESS] = maxSpreadLastAccess;
            }
            output->AddRow(resultRow, TABLENO_LAST_STATE);
        }
    }

    void CopyRow(const NYT::TNode &row, NYT::TNode &result,
        const NSearchUrlStatus::ESearchUrlStatus &urlStatus, const TString &title, const TString &description,
        const TString &relCanonicalTarget) {
        for (const TString& field : JUPITER_FIELDS()) {
            result[field] = row[field];
        }
        for (const TString& field : SPREAD_FIELDS()) {
            result[field] = row[field];
        }
        result[F_JUPITER_TIMESTAMP] = row[F_JUPITER_TIMESTAMP];
        result[F_WMC_URL_STATUS] = ToString(urlStatus);
        result[F_TITLE] = title;
        result[F_DESCRIPTION] = description;
        result[F_REL_CANONICAL] = relCanonicalTarget;
    }

public:
    THashMap<ui64, ui64> BaseDates;
};

REGISTER_REDUCER(TImportantUrlsCompressReducer)

struct TImportantUrlsLastChangesDiffReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {

public:
    TImportantUrlsLastChangesDiffReducer() = default;

    //reduce by Host, Path
    //for each host+notification_type+user_id generating row with notification data
    void Do(TReader *input, TWriter *output) override {
        const size_t TABLENO_SETTINGS = 0;
        const size_t TABLENO_PREV_STATE = 1;

        NYT::TNode prevState, currState;
        TString path, prevPath;

        if (!input->IsValid() || input->GetTableIndex() != TABLENO_SETTINGS) {
            return; // no user notification settings
        }
        NYT::TNode userSettingsRow = input->GetRow();
        TString host = userSettingsRow[F_HOST].AsString();
        NYT::TNode userSettings = userSettingsRow[F_USER_SETTINGS];
        input->Next();

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            path = row[F_PATH].AsString();
            if (prevPath != path) {
                StoreChanges(prevState, currState, userSettings);
                prevState = NYT::TNode();
                currState = NYT::TNode();
            }
            if (input->GetTableIndex() == TABLENO_PREV_STATE) {
                prevState = row;
            } else {
                currState = row;
            }
            prevPath = path;
        }
        StoreChanges(prevState, currState, userSettings);
        // writing result
        for (const auto &pair : userSettings.AsMap()) {
            const TString &notificationType = pair.first;
            if (pair.second.IsUndefined()) {
                continue;
            }

            for (NYT::TNode userNotifications : pair.second.AsList()) {
                userNotifications[F_US_NOTIFICATION_TYPE] = notificationType;
                if (!userNotifications[F_CHANGES].IsUndefined()) {
                    output->AddRow(userNotifications);
                }
            }
        }

    }

    // for each row with user notifications settings add change to TNode
    void StoreChanges(NYT::TNode &prevState, NYT::TNode &currState, NYT::TNode &userSettings) const {
        if (!prevState.IsUndefined() && !currState.IsUndefined()) {
            // search for changes
            for (const auto &field : CHANGES_FIELDS()) {
                const TString &notificationType = CHANGES_TO_NOTIFICATION_TYPES().at(field.first);
                NYT::TNode &userNotifications = userSettings[notificationType];
                if (userNotifications.IsNull() || userNotifications.IsUndefined()) {
                    // no one interested
                    continue;
                }
                const TString &fieldName = field.first;
                const bool changed = prevState[fieldName] != currState[fieldName] && !prevState[fieldName].IsNull();
                // send to each user
                if (changed) {
                    for (NYT::TNode &userNotification : userNotifications.AsList()) {
                        if (userNotification[F_CHANGES].IsUndefined()) {
                            userNotification[F_CHANGES] = NYT::TNode::CreateList();
                        }
                        NYT::TNode change;
                        change[F_CURRENT] = currState;
                        change[F_PREVIOUS] = prevState;
                        userNotification[F_CHANGES].Add(change);
                    }
                }
            }
        }
    }
};

REGISTER_REDUCER(TImportantUrlsLastChangesDiffReducer)

struct TUserNoficationSettingsReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    TUserNoficationSettingsReducer() = default;

    // reduce by host_id
    // result description:
    // Host
    // Path - only for sorting, always ""
    // TableTimestamp - only for sorting, always 0
    // UserSettings - array of user settings (uid (+fio,login,email), notification_type, channels)
    void Do(TReader *input, TWriter *output) override {

        const THashSet<TString> NOTIFICATION_TYPES = {
            CHANGE_TITLE, CHANGE_DESCRIPTION, CHANGE_REL_CANONICAL_TARGET, CHANGE_SEARCH_STATUS,
            CHANGE_SEARCH_LAST_ACCESS, CHANGE_INDEXING_STATUS, CHANGE_INDEXING_LAST_ACCESS};

        TString host;

        THashMap<TString, NYT::TNode> userSettingsByNotificationType;

        for (; input->IsValid(); input->Next()) {
            NYT::TNode row = input->GetRow();
            if (host.Empty()) {
                host = TWebmasterHostId::FromHostId(row[F_US_HOST_ID].AsString()).ToHostName();
            }
            const TString notificationType = row[F_US_NOTIFICATION_TYPE].AsString();
            if (!NOTIFICATION_TYPES.contains(notificationType)) {
                continue;
            }

            const bool channelService = row[F_US_CHANNEL_SERVICE].AsBool();
            const bool channelEmail = row[F_US_CHANNEL_EMAIL].AsBool();

            if (!channelService && !channelEmail) {
                continue;
            }

            if (!userSettingsByNotificationType.contains(notificationType)) {
                userSettingsByNotificationType[notificationType] = NYT::TNode::CreateList();
            }
            userSettingsByNotificationType[notificationType].Add(row);
        }

        if (!userSettingsByNotificationType.empty()) {
            NYT::TNode resultRow;
            resultRow[F_HOST] = host;
            resultRow[F_PATH] = "";
            resultRow[F_TABLE_TIMESTAMP] = 0;
            NYT::TNode userSettings;
            for (const auto &pair : userSettingsByNotificationType) {
                userSettings[pair.first] = pair.second;
            }
            resultRow[F_USER_SETTINGS] = userSettings;
            output->AddRow(resultRow);
        }
    }
};

REGISTER_REDUCER(TUserNoficationSettingsReducer)

static NYT::TRichYPath DebugPath(const TString &table) {
    NYT::TRichYPath path(table);
/*
    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://lenta.ru"))));
    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("http://www.pleer.ru"))));
    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("http://odobri.ru"))));
    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("http://www.vedomosti.ru"))));
    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("http://www.bloknotov.ru"))));
*/
    return path;
}

int TaskHistory(int, const char **) {
    const TConfig &config = TConfig::CInstance();
    NYT::IClientPtr client = NYT::CreateClient(config.MR_SERVER_HOST);
    NYT::ITransactionPtr tx = client->StartTransaction();
    // reading actual search base dates
    LOG_INFO("Reading search base collection dates");
    THashMap<ui64, ui64> searchBaseDates;
    auto reader = tx->CreateTableReader<NYT::TNode>(config.TABLE_SOURCE_SEARCH_BASES);
    for (; reader->IsValid(); reader->Next()) {
        const NYT::TNode &row = reader->GetRow();
        searchBaseDates[row[F_COLLECTION_DATE].AsUint64()] = row[F_SWITCH_DATE].AsUint64();
    }
    LOG_INFO("Found %lu search bases", searchBaseDates.size());
    ui64 timestamp = Now().MilliSeconds();

    // CreateHistorySchema
    NYT::TTableSchema snapshotSchema = CreateSnapshotSchema(tx);
    NYT::TTableSchema updateSchema = CreateUpdateSchema(snapshotSchema);
    NYT::TTableSchema historySchema = CreateHistorySchema(CreateMergeSchema(snapshotSchema));

    NYT::TRichYPath newLastState = NYT::TRichYPath(config.TABLE_IMPORTANT_URLS_LAST_STATE + ".new").Schema(historySchema);
    NYT::TRichYPath userSettingsTable = NYT::TRichYPath(
        NYTUtils::JoinPath(config.TABLE_IMPORTANT_URLS_ROOT, "user-settings")).Schema(CreateUserSettingsSchema());
    TString sortedUserSettings = NYTUtils::JoinPath(config.TABLE_IMPORTANT_URLS_ROOT, "user-settings-sorted");

    NYT::TRichYPath changesTable = NYT::TRichYPath(
        NYTUtils::JoinPath(config.TABLE_IMPORTANT_URLS_CHANGES, ToString(timestamp))).Schema(CreateChangesSchema());

    LOG_INFO("Compressing important urls history");
    // merging checkurl and important-urls results
    TOpRunner opRunner(tx);
    opRunner
        .InputNode(DebugPath(config.TABLE_IMPORTANT_URLS_SOURCE_URLS_PREPARED))
        .InputNode(DebugPath(NCheckurl::TConfig::CInstance().TABLE_CHECKURL_OUTCOME_MERGED))
        .InputNode(DebugPath(config.TABLE_IMPORTANT_URLS_MERGED))
        .OutputNode(NYT::TRichYPath(config.TABLE_IMPORTANT_URLS_HISTORY).Schema(historySchema))
        .OutputNode(newLastState)
        .ReduceBy(F_HOST, F_PATH)
        .SortBy(F_HOST, F_PATH, F_TABLE_TIMESTAMP)
        .MapReduce(new TImportantUrlsCompressReducer(searchBaseDates))

        .Comment("Sorting result")
        .SortBy(F_HOST, F_PATH, F_TABLE_TIMESTAMP)
        .Sort(config.TABLE_IMPORTANT_URLS_HISTORY, ASYNC_CTX0)

        .SortBy(F_HOST, F_PATH, F_TABLE_TIMESTAMP)
        .Sort(newLastState.Path_, ASYNC_CTX0)
        .Wait(ASYNC_CTX0);

    if (tx->Exists(config.TABLE_IMPORTANT_URLS_LAST_STATE)) {
        opRunner
            .Comment("Preparing user notifications settings")
            .SortBy(F_US_HOST_ID)
            .Sort(config.TABLE_USER_NOTIFICATION_SETTINGS, sortedUserSettings)

            .InputNode(sortedUserSettings)
            .OutputNode(userSettingsTable)
            .ReduceBy(F_US_HOST_ID)
            .Reduce(new TUserNoficationSettingsReducer)

            .SortBy(F_HOST, F_PATH, F_TABLE_TIMESTAMP)
            .Sort(userSettingsTable.Path_)

            .Comment("Last states diff")
            .InputNode(userSettingsTable)
            .InputNode(config.TABLE_IMPORTANT_URLS_LAST_STATE)
            .InputNode(newLastState.Path_)
            .OutputNode(changesTable)
            .ReduceBy(F_HOST)
            .SortBy(F_HOST, F_PATH, F_TABLE_TIMESTAMP)
            .Reduce(new TImportantUrlsLastChangesDiffReducer)

            .SortBy(F_US_HOST_ID)
            .Sort(changesTable.Path_)

            .Move(config.TABLE_IMPORTANT_URLS_LAST_STATE, config.TABLE_IMPORTANT_URLS_LAST_STATE + ".old");
    }

    opRunner
        .Move(newLastState.Path_, config.TABLE_IMPORTANT_URLS_LAST_STATE);
    NYTUtils::SetAttr(tx, config.TABLE_IMPORTANT_URLS_LAST_STATE, TAttrName::ProcessedTimestamp, timestamp);
    NYTUtils::SetAttr(tx, config.TABLE_IMPORTANT_URLS_HISTORY, TAttrName::ProcessedTimestamp, timestamp);

    tx->Commit();

    return 0;
}

} //namespace NImportantUrls
} //namespace NWebmaster
