#include "parsers.h"

#include <functional>

#include <google/protobuf/descriptor.h>
#include <google/protobuf/message.h>

#include <library/cpp/digest/md5/md5.h>
#include <library/cpp/geobase/lookup.hpp>
#include <library/cpp/string_utils/base64/base64.h>
#include <library/cpp/string_utils/tskv_format/tskv_map.h>

#include <util/digest/murmur.h>
#include <util/generic/hash.h>
#include <util/generic/hash_set.h>
#include <util/generic/yexception.h>
#include <util/random/random.h>
#include <util/string/cast.h>
#include <util/string/strip.h>

#include <crypta/graph/rt/events/events.h>
#include <crypta/graph/rt/events/proto/fp.pb.h>
#include <crypta/graph/rt/lib/debounce_cache/debounce_cache.h>
#include <crypta/lib/native/identifiers/lib/id_types/all.h>

namespace {
    using namespace NResharder;
    using TParserConfig = TRowsProcessorConfig::TParser;
    using namespace NCrypta::NEvent;

    static const THashSet<TString> kIgnoredIsps{
        "icloud private relay",
        "maximatelecom jsc",
        "yandex llc",
        "yandex oy",
    };

    // @brief IRowParser implementation for TBsRtbLog format
    class TBsRtbLog: public NRtSklejka::IRowParser {
    public:
        TBsRtbLog(const NProtoBuf::Descriptor* /* schema */,
                  const TParserConfig::TFormat& config)
            : GeoBase(config.GetGeoBasePath())
            , Debounce("deb", config.GetDebounceCfg())
            , messageLifetime(config.GetLifetime())
            , sampling(config.GetSampling()) {
        }

        void Parse(TStringBuf text, NBigRT::TRow& row, NBigRT::TRowsBatch& rows, [[maybe_unused]] NBigRT::TParseContext& ctx) const override {
            auto tskvMap{Preparse(text, row)};
            const auto& msg{dynamic_cast<TFpEvent*>(row.Message.Get())};

            TString userIp{};
            TString userAgent{};

            if (!FillFprintsFromLine(tskvMap, userAgent, userIp)) {
                ythrow TNoFpsError() << "no fps on log line! skip";
            }

            if (!FillIdsFromLine(tskvMap, row, userIp, userAgent)) {
                ythrow TNoIdsError() << "no ids on log line! skip";
            }

            msg->SetUserIP(userIp);
            msg->SetUserAgent(userAgent);

            const auto& CreateFingerprintKey{
                [&](const TString& userAgent, const TString& userIp, const EHerschelVersion& version) {
                    TFingerprint fp{};

                    if (!userAgent.empty()) {
                        const auto uaHash{MD5::CalcHalfMix(userAgent)};
                        fp.SetUAHash(uaHash);
                    }

                    if (!userIp.empty()) {
                        fp.SetUserIP(userIp);
                    }
                    fp.SetVersion(version);

                    TString key{};
                    if (!fp.SerializeToString(&key)) {
                        ythrow TNoFpsError() << "unserialize pb key";
                    }

                    const auto hash{TMurmurHash<ui64>{}(key.data(), key.size())};
                    msg->MutableFingerprint()->Swap(&fp);
                    msg->SetFingerprintHash(hash);
                    Y_ENSURE(msg->IsInitialized(), "Not all of required fields are initialized!");

                    {
                        NBigRT::TRow localRow{
                            .Meta = row.Meta,
                            .TimeStamp = row.TimeStamp,
                            .MessageType = row.MessageType,
                            .Message = NCrypta::NEvent::MakeMessage(row.MessageType),
                        };
                        localRow.Message.Get()->CopyFrom(*msg);

                        rows.push_back(localRow);
                    }
                }
            };

            CreateFingerprintKey(userAgent, userIp, EHerschelVersion::NAIVE);
            CreateFingerprintKey({}, userIp, EHerschelVersion::NAIVE);
            CreateFingerprintKey(userAgent, userIp, EHerschelVersion::MODEL_IP_UA_V0);
            CreateFingerprintKey({}, userIp, EHerschelVersion::MODEL_IP_UA_V0);
        }

    private:
        THashMap<TString, TString> Preparse(TStringBuf text, NBigRT::TRow& row) const {
            if (!text.SkipPrefix("tskv\t")) {
                ythrow TInvalidFormatError() << "wrong format of log line! " << text;
            }

            THashMap<TString, TString> tskvMap;
            NTskvFormat::DeserializeMap(text, tskvMap);

            row.TimeStamp = TInstant::Seconds(::FromString<ui64>(tskvMap["unixtime"]));
            Y_ENSURE(TInstant::Zero() != row.TimeStamp, "Missing TimeStamp!");

            if (messageLifetime > 0) {
                const auto limitTimestamp{TInstant::Now() - TInstant::Seconds(messageLifetime)};

                if (row.TimeStamp.Seconds() < limitTimestamp.Seconds()) {
                    ythrow TObsoleteError() << "skip old line";
                }
            }
            if (RandomNumber<ui64>(1000) < sampling) {
                ythrow TSamplingError() << "skip by hit sampler limiter";
            }

            return tskvMap;
        }

        bool FillIdsFromLine(THashMap<TString, TString>& tskvMap, NBigRT::TRow& row,
                             const TString& ip, const TString& ua) const {
            bool hasAnyIds{false};
            const auto& msg{dynamic_cast<TFpEvent*>(row.Message.Get())};
            const auto idType{NIdentifiers::GetIdentifierTypeByNameOrDefault(tskvMap["uniqidsource"])};

            if (const auto id{NIdentifiers::TYandexuid(tskvMap["uniqid"])};
                (idType == NCrypta::NIdentifiersProto::NIdType::YANDEXUID) && id.IsSignificant()
                && NoDeb(id.Normalize(), ip, ua)) {
                (*msg->AddIds()) = id.ToProto();
                hasAnyIds = true;
            }
            if (const auto id{NIdentifiers::TIdfa(tskvMap["idfa"])};
                id.IsSignificant() && NoDeb(id.Normalize(), ip, ua)) {
                (*msg->AddIds()) = id.ToProto();
                hasAnyIds = true;
            }
            if (const auto id{NIdentifiers::TGaid(tskvMap["gaid"])};
                id.IsSignificant() && NoDeb(id.Normalize(), ip, ua)) {
                (*msg->AddIds()) = id.ToProto();
                hasAnyIds = true;
            }
            return hasAnyIds;
        }

        bool FillFprintsFromLine(THashMap<TString, TString>& tskvMap, TString& userAgent, TString& userIp) const {
            userAgent = tskvMap["useragent"];
            userIp = embeddedV4V6(tskvMap["clientip6"]);

            if (tskvMap["useragent"].empty()) {
                return false;
            }

            if (userIp.empty()) {
                return false;
            }

            if (kIgnoredIsps.contains(GeoBase.GetTraitsByIp(userIp).IspName)) {
                ythrow TIspError();
            }
            return true;
        }

        TString embeddedV4V6(const TString& userIp) const {
            constexpr TStringBuf ipv4embedded{"::ffff:"};
            if (userIp.StartsWith(ipv4embedded)) {
                return userIp.substr(ipv4embedded.size());
            }
            return userIp;
        }

        bool NoDeb(const TString& id, const TString& ip, const TString& ua) const {
            TString key{id + ip + ua};
            if (Debounce.Pull(key)) {
                return false;
                // ythrow TDebounceError();
            } else {
                Debounce.Push(key);
                return true;
            }
        }

        NGeobase::TLookup GeoBase;
        mutable NRtCrypta::TDebounceCache Debounce;
        const ui64 messageLifetime;
        const ui64 sampling;
    };
}

namespace NResharder {
    NRtSklejka::IRowParserPtr MakeParser(const NProtoBuf::Descriptor* schema, const TRowsProcessorConfig::TParser& config) {
        if (config.HasFromBsRtbLog()) {
            return MakeHolder<TBsRtbLog>(schema, config.GetFromBsRtbLog());
        } else {
            ythrow yexception() << "there is no row format in config!";
        }
    }
}
