#include <library/cpp/mime/types/mime.h>

#include <robot/favicon/protos/tables.pb.h>
#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/table.h>

#include <wmconsole/version3/processors/favicons/favicons.pb.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>
#include <wmconsole/version3/library/jupiter/jupiter.h>

#include <util/generic/hash_set.h>
#include <util/digest/fnv.h>

#include "collect_host_favicons.h"
#include "config.h"
#include <iostream>

namespace NWebmaster {

using namespace NJupiter;

TInputTag<NProto::TWebmasterHost>  WebmasterHostsInputTag                    (0);
TInputTag<NFavicon::TSupportRecord>  PreparedWebmasterHostsInputTag          (1);
TInputTag<NFavicon::TSupportRecord>  SupportTableInputTag                    (2);
TInputTag<NFavicon::TBlobRecord>     EnrichedTableInputTag                   (3);
TInputTag<NProto::TFaviconRecord> FaviconSearchInputTag                      (4);
TInputTag<NProto::TFaviconIndexingRecord> FaviconIndexingRecordInputTag      (5);
TInputTag<NProto::TFaviconRecord> FaviconMergedInputTag                      (6);
TInputTag<NProto::THostWithFaviconsAndProblems> PrevHostSnapshotInputTag     (7);

TOutputTag<NFavicon::TSupportRecord> PreparedWebmasterHostsOutputTag         (0);
TOutputTag<NProto::TFaviconRecord> PreparedFaviconsOutputTag                 (1);
TOutputTag<NProto::TFaviconRecord> FaviconMergedOutputTag                    (2);
TOutputTag<NProto::THostWithFaviconsAndProblems> HostSnapshotOutputTag       (3);
TOutputTag<NProto::THostWithFaviconsAndProblems> HostSnapshotChangesOutputTag(4);

const TStringBuf PNG_HEADER = "\x89\x50\x4E\x47";

struct TPrepareWebmasterHostsMapper : public TTaggedMapper {
public:
    void DoTagged(TTagedReader reader, TTagedWriter writer) override final {
        for (; reader.IsValid(); reader.Next()) {
            const NProto::TWebmasterHost &in = reader.GetRow(WebmasterHostsInputTag);
            NFavicon::TSupportRecord out;
            out.SetKey(in.GetHost() + "/");
            out.SetTargetSize(0);
            out.SetUrl("");
            writer.AddRow(out, PreparedWebmasterHostsOutputTag);
        }
    }
};
REGISTER_MAPPER(TPrepareWebmasterHostsMapper)

struct TPrepareFaviconsReducer : public TTaggedReducer {
public:
    void DoTagged(TTagedReader reader, TTagedWriter writer) override final {
        if (!reader.SkipRows(PreparedWebmasterHostsInputTag)) {
            return;
        }
        // join data from support-table and enriched
        for (; reader.IsValid(); ) {
            NFavicon::TSupportRecord supportRecord = reader.GetSingleRow(SupportTableInputTag);
            NFavicon::TBlobRecord blobRecord = reader.GetSingleRow(EnrichedTableInputTag);
            NProto::TFaviconRecord out;
            out.SetHost(supportRecord.GetHost());
            out.SetUrl(supportRecord.GetUrl());
            out.SetIsFromMorda(supportRecord.GetIsFromMorda());
            out.SetIsOnSearch(supportRecord.GetIsOnSearch());
            out.SetTargetSize(supportRecord.GetTargetSize());
            // get image size
            TStringBuf bytes = blobRecord.GetImageDoc();
            // 89 50 4E 47 - PNG-header
            if (bytes.size() >= 24 && bytes.StartsWith(PNG_HEADER)) {
                i32 width = (ui8) bytes[16] * 0x1000000 + (ui8) bytes[17] * 0x10000 +
                    (ui8) bytes[18] * 0x100 + (ui8) bytes[19];
                i32 height = (ui8) bytes[20] * 0x1000000 + (ui8) bytes[21] * 0x10000 +
                    (ui8) bytes[22] * 0x100 + (ui8) bytes[23];
                out.SetWidth(width);
                out.SetHeight(height);
            }
            writer.AddRow(out, PreparedFaviconsOutputTag);
        }
    }
};
REGISTER_REDUCER(TPrepareFaviconsReducer)

struct TSearchAndIndexingFaviconMerger : public TTaggedReducer {
public:
    void DoTagged(TTagedReader reader, TTagedWriter writer) override final {
        NProto::TFaviconRecord result;
        TMaybe<NProto::TFaviconIndexingRecord> indexingRecord = reader.GetSingleRowMaybe(FaviconIndexingRecordInputTag);
        if (indexingRecord.Defined()) {
            result.SetUrl(indexingRecord->GetUrl());
            result.SetLastAccess(indexingRecord->GetLastAccess());
            result.SetAddTime(indexingRecord->GetAddTime());
            result.SetHttpCode(indexingRecord->GetHttpCode());
            result.SetFilterCode(indexingRecord->GetFilterCode());
            result.SetMimeType(indexingRecord->GetMimeType());
            result.SetRedirTarget(indexingRecord->GetRedirTarget());
            result.SetSourceName(indexingRecord->GetSourceName());
            result.SetHost(indexingRecord->GetSourceHost());
        }
        bool hasSearchRecords = false;
        for (; reader.IsValid(); reader.Next()) {
            hasSearchRecords = true;
            const NProto::TFaviconRecord &searchRecord = reader.GetRow(FaviconSearchInputTag);
            result.CopyFrom(searchRecord);
            writer.AddRow(result, FaviconMergedOutputTag);
        }
        if (!hasSearchRecords) {
            writer.AddRow(result, FaviconMergedOutputTag);
        }
    }
};
REGISTER_REDUCER(TSearchAndIndexingFaviconMerger)

struct TCollectHostFaviconsAndProblems : public TTaggedReducer {
public:
    TCollectHostFaviconsAndProblems() = default;
    TCollectHostFaviconsAndProblems(double share) : Share(share) {
    }

    bool IsInExperiment(const TString &host) {
        double value = static_cast<double>(FnvHash<ui64>(host));
        double res = value / static_cast<double>(Max<ui64>());
        return res < Share;
    }

    void Save(IOutputStream& stream) const override {
        ::Save(&stream, Share);
        TTaggedReducer::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, Share);
        TTaggedReducer::Load(stream);
    }

    void DoTagged(TTagedReader reader, TTagedWriter writer) override final {
        static const THashSet <i32> VALID_MIME_TYPES = {-1, MIME_UNKNOWN, MIME_IMAGE_JPG, MIME_IMAGE_PNG,
                                                        MIME_IMAGE_GIF,
                                                        MIME_IMAGE_BMP, MIME_IMAGE_SVG};
        static const i32 BIG_FAVICON_SIZE = 120;

        TMaybe <NProto::TWebmasterHost> webmasterHost = reader.GetRowMaybe(WebmasterHostsInputTag);
        if (webmasterHost.Empty()) {
            return; // nothing to do here
        }
        reader.SkipRows(WebmasterHostsInputTag); // possible duplicates
        TMaybe <NProto::THostWithFaviconsAndProblems> prevRecord = reader.GetSingleRowMaybe(PrevHostSnapshotInputTag);
        NProto::THostWithFaviconsAndProblems newRecord, changesRecord;
        newRecord.SetHost(webmasterHost->GetHost());
        changesRecord.SetHost(webmasterHost->GetHost());
        NProto::THostFaviconsAndProblems *faviconsAndProblems = newRecord.MutableFaviconsAndProblems();

        ui64 now = Now().MilliSeconds();
        bool hasBigFavicon = false;
        bool hasValidFaviconOnSearch = false;
        TVector <NProto::TFaviconSample> badFavicons;
        for (; reader.IsValid(); reader.Next()) {
            NProto::TFaviconRecord faviconRecord = reader.GetRow(FaviconMergedInputTag);
            NProto::TFaviconSample *sample = faviconsAndProblems->add_favicons();
            // TODO fix protobufs
            sample->SetHost(faviconRecord.GetHost());
            sample->SetUrl(faviconRecord.GetUrl());
            sample->SetLastAccess(faviconRecord.GetLastAccess());
            sample->SetAddTime(faviconRecord.GetAddTime());
            sample->SetHttpCode(faviconRecord.GetHttpCode());
            sample->SetFilterCode(faviconRecord.GetFilterCode());
            sample->SetMimeType(faviconRecord.GetMimeType());
            sample->SetRedirTarget(faviconRecord.GetRedirTarget());
            sample->SetSourceName(faviconRecord.GetSourceName());
            sample->SetIsFromMorda(faviconRecord.GetIsFromMorda());
            sample->SetTargetSize(faviconRecord.GetTargetSize());
            sample->SetHeight(faviconRecord.GetHeight());
            sample->SetWidth(faviconRecord.GetWidth());

            if (faviconRecord.GetHttpCode() >= 300 && !VALID_MIME_TYPES.contains(faviconRecord.GetMimeType())) {
                badFavicons.push_back(*sample);
            } else if (faviconRecord.GetIsOnSearch()) {
                hasBigFavicon |=
                    faviconRecord.GetHeight() >= BIG_FAVICON_SIZE && faviconRecord.GetWidth() >= BIG_FAVICON_SIZE;
                TString url = faviconRecord.GetUrl();
                bool isSvg = GetPathExtension(url) == "svg" || faviconRecord.GetMimeType() == MIME_IMAGE_SVG;
                hasBigFavicon |= isSvg;
                hasValidFaviconOnSearch = true;
            }
        }
        // generate problems
        if (faviconsAndProblems->FaviconsSize() == 0) {
            faviconsAndProblems->add_problems()->SetType(NProto::FaviconProblemType::MISSING_FAVICON);
        } else {
            if (IsInExperiment(webmasterHost->GetHost()) && !hasBigFavicon ) {
                faviconsAndProblems->add_problems()->SetType(NProto::FaviconProblemType::BIG_FAVICON_ABSENT);
            }
            if (!badFavicons.empty() && !hasValidFaviconOnSearch) {
                NProto::TFaviconProblem *faviconError = faviconsAndProblems->add_problems();
                faviconError->SetType(NProto::FaviconProblemType::FAVICON_ERROR);
                for (const auto &badFavicon : badFavicons) {
                    *faviconError->add_samples() = badFavicon;
                }
            }
        }
        // compare with problems from prev snapshot
        THashMap <NProto::FaviconProblemType, NProto::TFaviconProblem> oldProblems;
        if (prevRecord.Defined()) {
            for (const NProto::TFaviconProblem &oldProblem : prevRecord->GetFaviconsAndProblems().problems()) {
                oldProblems[oldProblem.GetType()] = oldProblem;
            }
        }

        bool hasChanges = false;
        for (int i = 0; i < faviconsAndProblems->problems_size(); i++) {
            NProto::TFaviconProblem *newProblem = faviconsAndProblems->mutable_problems(i);
            newProblem->SetLastUpdate(now);
            if (oldProblems.contains(newProblem->GetType())) { // copy actual since if present
                newProblem->SetActualSince(oldProblems.at(newProblem->GetType()).GetActualSince());
            } else {
                newProblem->SetActualSince(now);
                // store changes
                *changesRecord.MutableFaviconsAndProblems()->add_problems() = *newProblem;
                hasChanges = true;
            }
        }

        writer.AddRow(newRecord, HostSnapshotOutputTag);
        if (hasChanges) {
            writer.AddRow(changesRecord, HostSnapshotChangesOutputTag);
        }
    }

private:
    double Share = 0;
};
REGISTER_REDUCER(TCollectHostFaviconsAndProblems)

int CollectHostFaviconsAndProblems(int, const char **) {
    const TConfig &config = TConfig::CInstance();
    NYT::IClientPtr client = NYT::CreateClient(config.MR_SERVER_HOST);

    TString newProdTag = NYTUtils::GetAttr(client, config.TABLE_FAVICONS_ROOT, config.ATTR_PROD_TAG).AsString();
    if (client->Exists(config.TABLE_HOST_FAVICONS2)) {
        try {
            NYT::TNode prodTag = NYTUtils::GetAttr(client, config.TABLE_HOST_FAVICONS2, config.ATTR_LAST_PROCESSED_TABLE);
            if (!prodTag.IsNull() && newProdTag == prodTag.AsString()) {
                // no new data
                return 0;
            }
        } catch (yexception &e) {
            LOG_ERROR("No attribute %s at table", config.ATTR_LAST_PROCESSED_TABLE.c_str());
        }
    }

    NYT::ITransactionPtr tx = client->StartTransaction();
    // prepare webmaster hosts for support_table/_enriched format (Host + / => Key)
    TTable<NProto::TWebmasterHost> webmasterHosts(tx, config.TABLE_WEBMASTER_HOSTS);
    TTable<NFavicon::TSupportRecord> intmWebmasterHosts(tx, NYTUtils::JoinPath(config.TABLE_ROOT, "intm-webmaster-hosts"));

    TMapCmd<TPrepareWebmasterHostsMapper>(tx)
        .Input(webmasterHosts, WebmasterHostsInputTag)
        .Output(intmWebmasterHosts, PreparedWebmasterHostsOutputTag)
        .Do();
    TSortCmd<NFavicon::TSupportRecord>(tx, intmWebmasterHosts)
        .By({"Key", "TargetSize", "Url"})
        .Do();

    TTable<NProto::TFaviconRecord> preparedFavicons(tx, NYTUtils::JoinPath(config.TABLE_ROOT, "intm-favicons"));
    // reduce support-table, _enriched, and webmaster-hosts
    TReduceCmd<TPrepareFaviconsReducer>(tx)
        .Input(intmWebmasterHosts, PreparedWebmasterHostsInputTag)
        .Input(TTable<NFavicon::TSupportRecord>(tx, config.TABLE_SUPPORT_DATA), SupportTableInputTag)
        .Input(TTable<NFavicon::TBlobRecord>(tx, config.TABLE_ENRICHED), EnrichedTableInputTag)
        .Output(preparedFavicons, PreparedFaviconsOutputTag)
        .ReduceBy({"Key"})
        .SortBy({"Key", "TargetSize", "Url"})
        .Do();

    TSortCmd<NProto::TFaviconRecord>(tx, preparedFavicons)
        .By({"Url"})
        .Do();

    TTable<NProto::TFaviconRecord> preparedFavicons2(tx, NYTUtils::JoinPath(config.TABLE_ROOT, "intm-favicons2"));
    // merging with lemur/samovar snapshot
    TReduceCmd<TSearchAndIndexingFaviconMerger>(tx)
        .Input(TTable<NProto::TFaviconIndexingRecord>(tx, config.TABLE_SAMOVAR_SNAPSHOT), FaviconIndexingRecordInputTag)
        .Input(preparedFavicons, FaviconSearchInputTag)
        .Output(preparedFavicons2, FaviconMergedOutputTag)
        .ReduceBy("Url")
        .Do();

    TSortCmd<NProto::TFaviconRecord>(tx, preparedFavicons2)
        .By("Host")
        .Do();

    //tx->Commit();
    TTable<NProto::THostWithFaviconsAndProblems> hostFavicons(tx, config.TABLE_HOST_FAVICONS2);
    if (hostFavicons.Exists()) {
        tx->Move(config.TABLE_HOST_FAVICONS2, config.TABLE_PREV_HOST_FAVICONS, NYT::TMoveOptions().Force(true));
    }
    TTable<NProto::THostWithFaviconsAndProblems> prevHostFavicons(tx, config.TABLE_PREV_HOST_FAVICONS);
    const TString changesTableName = NYTUtils::JoinPath(config.TABLE_CHANGES_ROOT, ToString(Now().MilliSeconds()));
    TTable<NProto::THostWithFaviconsAndProblems> changesTable(tx, changesTableName);
    // collecting host favicons, problems and compare problems with prev snapshot
    double shareExp = NYTUtils::GetAttrOrDefault<double>(client, config.TABLE_CHANGES_ROOT, config.ATTR_BIG_FAVICON_EXP,0.0);
    TReduceCmd<TCollectHostFaviconsAndProblems>(tx, new TCollectHostFaviconsAndProblems(shareExp))
        .Input(webmasterHosts, WebmasterHostsInputTag)
        .Input(prevHostFavicons.IfExists(), PrevHostSnapshotInputTag)
        .Input(preparedFavicons2, FaviconMergedInputTag)
        .Output(hostFavicons, HostSnapshotOutputTag)
        .Output(changesTable, HostSnapshotChangesOutputTag)
        .ReduceBy("Host")
        .Do();

    TSortCmd<NProto::THostWithFaviconsAndProblems>(tx, hostFavicons)
        .By("Host")
        .Do();

    TSortCmd<NProto::THostWithFaviconsAndProblems>(tx, changesTable)
        .By("Host")
        .Do();

    NYTUtils::SetAttr(tx, config.TABLE_HOST_FAVICONS2, config.ATTR_LAST_PROCESSED_TABLE, newProdTag);
    tx->Commit();

    return 0;
}

} // namespace NWebmaster
