#include <quality/ab_testing/cost_join_lib/all.h>
#include <quality/ab_testing/paths_to_tables_lib/paths.h>
#include <quality/ab_testing/stat_collector_lib/metrics.h>
#include <quality/ab_testing/stat_collector_lib/common/gmv/common.h>
#include <quality/ab_testing/stat_collector_sources_lib/mr_sources.h>

#include <quality/user_metrics/surplus_metrics/lib/all.h>
#include <quality/user_sessions/request_aggregate_lib/all.h>

#include <library/cpp/getopt/last_getopt.h>
#include <library/cpp/streams/factory/factory.h>
#include <library/cpp/string_utils/url/url.h>

#include <mapreduce/yt/interface/client.h>
#include <mapreduce/yt/util/temp_table.h>

#include <yweb/webdaemons/icookiedaemon/icookie_lib/utils/uuid.h>

#include <util/draft/date.h>
#include <util/string/builder.h>
#include <util/string/split.h>
#include <util/string/subst.h>

using namespace NYT;
using namespace NRA;
using namespace NStatCollector;

class TMapEComRequest
    : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TYaMRRow>>
{
public:
    TMapEComRequest() = default;
    void Do(NYT::TTableReader<NYT::TNode>* input, NYT::TTableWriter<NYT::TYaMRRow>* writer) override {
        for (; input->IsValid(); input->Next()) {
            auto& row = input->GetRow();
            double revenue = 0.0;
            if (row["price"].GetType() == TNode::Double) {
                revenue = row["price"].AsDouble();
            }

            if (revenue == 0.0) {
                continue;
            }

            time_t timeEvent = 0;
            if (row["order_date"].GetType() == TNode::String && row["order_time"].GetType() == TNode::String) {
                TString datetime = row["order_date"].AsString() + "T" + row["order_time"].AsString() + "+03:00";
                if (!ParseISO8601DateTime(datetime.data(), datetime.size(), timeEvent)) {
                    continue;
                }
            } else {
                continue;
            }

            TString order_id = "";
            if (row["order_id"].GetType() == TNode::String) {
                order_id = row["order_id"].AsString();
            }

            TString uid = "";
            if (row["icookie"].GetType() == TNode::String) {
                uid = "y" + row["icookie"].AsString();
            } else if (row["yandexuid"].GetType() == TNode::String) {
                uid = "y" + row["yandexuid"].AsString();
            } else {
                continue;
            }

            TString source = "";
            if (row["source"].GetType() == TNode::String) {
                source = row["source"].AsString();
            }


            TString service = "";
            if (row["domain"].GetType() == TNode::String) {
                service = row["domain"].AsString();
            }

            TString title = "";
            if (row["shop_title"].GetType() == TNode::String) {
                title = row["shop_title"].AsString();
            }

            double mult = 1.;
            if (row["multiplier"].GetType() == TNode::Double) {
                mult = row["multiplier"].AsDouble();
            }

            TStringStream value;
            value << timeEvent;
            value << "\trevenue=" << revenue
                  << "\tsource=" << source
                  << "\tshop_title=" << title
                  << "\tdomain=" << service
                  << "\tmultiplier=" << mult
                  << "\torder_id=" << order_id;

            if (row["additional_attributes"].GetType() == TNode::Map) {
                auto& mapOpts = row["additional_attributes"];
                if (mapOpts["ReqID"].GetType() == TNode::String) {
                    value << "\tReqID=" << mapOpts["ReqID"].AsString();
                }
                if (mapOpts["SearchEngineID"].GetType() == TNode::Uint64) {
                    value << "\tSearchEngineID=" << mapOpts["SearchEngineID"].AsUint64();
                }
                if (mapOpts["AdvEngineID"].GetType() == TNode::Uint64) {
                    value << "\tAdvEngineID=" << mapOpts["AdvEngineID"].AsUint64();
                }
            }
            writer->AddRow({uid, " ecom", value.Str()});
        }
    }
};

REGISTER_MAPPER(TMapEComRequest);


class TMapCubeCostRequest
    : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TYaMRRow>>
{
public:
    TMapCubeCostRequest() = default;
    void Do(NYT::TTableReader<NYT::TNode>* input, NYT::TTableWriter<NYT::TYaMRRow>* writer) override {
        for (; input->IsValid(); input->Next()) {
            auto& row = input->GetRow();
            double revenue = 0.0;
            if (row["revenue"].GetType() == TNode::Double) {
                revenue = row["revenue"].AsDouble();
            }

            double payments = 0.0;
            if (row["payments"].GetType() == TNode::Double) {
                payments = row["payments"].AsDouble();
            }

            if (revenue == 0.0 && payments == 0.0) {
                continue;
            }

            time_t timeEvent = 0;
            if (row["eventtime"].GetType() == TNode::Int64) {
                timeEvent = row["eventtime"].AsInt64();
            } else if (row["showtime"].GetType() == TNode::Int64) {
                timeEvent = row["showtime"].AsInt64();
            } else {
                continue;
            }

            TString uid = "";
            if (row["icookie"].GetType() == TNode::Uint64) {
                ui64 icookie = row["icookie"].AsUint64();
                if (icookie) {
                    uid = "y" + ToString(icookie);
                }
            }

            if (uid == "") {
                if (row["browser"].GetType() == TNode::String && row["uuid"].GetType() == TNode::String && row["browser"].AsString() == "YandexSearch") {
                    TMaybe<TString> icookie = NIcookie::GenerateIcookieFromUuid(row["uuid"].AsString());
                    if (icookie.Defined()) {
                        uid = "y" + (*icookie.Get());
                    } else {
                        continue;
                    }
                } else if (row["yandexuid"].GetType() == TNode::String) {
                    uid = "y" + row["yandexuid"].AsString();
                } else {
                    continue;
                }
            }
            TStringStream value;
            value << timeEvent;

            TString service = "";
            if (row["service"].GetType() == TNode::String) {
                service = row["service"].AsString();
                value << "\tservice=" << service;
            }

            TString serviceComdep = "";
            if (row["service_comdep"].GetType() == TNode::String) {
                serviceComdep = row["service_comdep"].AsString();
                value << "\tservice_comdep=" << serviceComdep;
            }

            auto it = CUBE_COST_SERVICE_TO_WIZ_INTENTS.find(service);
            if (it != CUBE_COST_SERVICE_TO_WIZ_INTENTS.end() && !it->second.empty()) {
                value << "\tintents=";

                bool first = true;
                for (const auto& intent :it->second) {
                    if (!first) {
                        value << ",";
                    }
                    value << intent;
                    first = false;
                }
            }
            value << "\trevenue=" << (revenue - payments) << "\trevenue_cube=" << revenue << "\tpayments=" << payments;
            writer->AddRow({uid, " cube_cost", value.Str()});
        }
    }
};

REGISTER_MAPPER(TMapCubeCostRequest);

template<class T>
struct TPrggSort {
    bool operator()(const T& p, const T& q) const {
        return p.Prgg > q.Prgg;
    }
};

class TReduceMetrikaPrgg : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TYaMRRow>>
{
    struct TPrggEvent {
        TPrggEvent()
        : Deleted (false){
        }

        TPrggEvent(const NYT::TNode& row)
            : Deleted(false)
        {
            if (row["StartURL"].GetType() == TNode::String) {
                Url = row["StartURL"].AsString();
            }

            if (row["VisitID"].GetType() == TNode::Uint64) {
                VisitID = row["VisitID"].AsUint64();
            }

            if (row["ReqID"].GetType() == TNode::String) {
                ReqID = row["ReqID"].AsString();
            }

            if (row["Referer"].GetType() == TNode::String) {
                Referer = row["Referer"].AsString();
            }

            if (row["StartURLDomain"].GetType() == TNode::String) {
                StartUrlDomain = row["StartURLDomain"].AsString();
            }

            if (row["PrggGoodModelScore"].GetType() == TNode::Double) {
                Prgg = row["PrggGoodModelScore"].AsDouble();
            }

            if (row["UTCStartTime"].GetType() == TNode::Uint64) {
                Start = row["UTCStartTime"].AsUint64();
            }

            if (row["Duration"].GetType() == TNode::Uint64) {
                End = Start + row["Duration"].AsUint64();
            }
            if (row["CounterID"].IsUint64()) {
                CounterId = row["CounterID"].AsUint64();
            }
        }

        bool Intersect(const TPrggEvent& p) const {
            if ((p.Start <= Start && Start <= p.End) || (Start <= p.Start && p.Start <= End)) {
                return p.GetPath() == GetPath();
            }
            return false;
        }

        TStringBuf GetPath() const {
            return TStringBuf(Url).Before('?').Before('#');
        }

        double Prgg;
        time_t Start;
        time_t End;
        TString Url;
        TString ReqID;
        TString Referer;
        TString StartUrlDomain;
        ui64 CounterId;
        ui64 VisitID;
        bool Deleted;
    };
    THashMap<ui64, std::pair<double, double>> Dom2Prgg;
    THashMap<ui64, double> Counter2Means;
    bool MeanOnly;
public:
    void SaveCounter(TTableWriter<TYaMRRow>* writer, bool last) {
        if (Dom2Prgg.size() > 10000 || last) {
            for (const auto& el: Dom2Prgg) {
                writer->AddRow({ToString(el.first), ToString(el.second.first), ToString(el.second.second)}, 1);
            }
            Dom2Prgg.clear();
        }
    }

    void Finish(TTableWriter<TYaMRRow>* output) override {
        SaveCounter(output, true);
    }

    TReduceMetrikaPrgg() = default;
    TReduceMetrikaPrgg(const THashMap<ui64, double>& counter2Means, bool meanOnly)
        : Counter2Means(counter2Means)
        , MeanOnly(meanOnly)
    {
    }

    void Do(NYT::TTableReader<NYT::TNode>* input, NYT::TTableWriter<NYT::TYaMRRow>* writer) override {
        const size_t MAX_VISIT_ON_UID = 1000;
        size_t uid = 0;
        TVector<TPrggEvent> visits;
        for (size_t count = 0; input->IsValid() && count < MAX_VISIT_ON_UID; input->Next(), ++count) {
            auto& row = input->GetRow();
            if (row["ICookie"].GetType() == TNode::Uint64) {
                uid = row["ICookie"].AsUint64();
                visits.push_back(TPrggEvent(row));
            }
        }
        Sort(visits, TPrggSort<TPrggEvent>());

        for (size_t i = 1; i < visits.size(); ++i) {
            for (size_t j = 0; j < i; ++j) {
                if (!visits[j].Deleted && visits[j].Intersect(visits[i])) {
                    visits[i].Deleted = true;
                    break;
                }
            }
        }
        for (size_t i = 0; i < visits.size(); ++i) {
            if (!visits[i].Deleted) {
                if (!MeanOnly) {
                    TStringStream value;
                    value << visits[i].Start;
                    value << "\tservice=" << visits[i].GetPath();
                    value << "\trevenue=" << visits[i].Prgg;
                    value << "\tCounterID=" << visits[i].CounterId;
                    value << "\tReqID=" << visits[i].ReqID;
                    value << "\tReferer=" << visits[i].Referer;
                    value << "\tStartUrlDomain=" << visits[i].StartUrlDomain;
                    value << "\tVisitID=" << visits[i].VisitID;

                    auto itFind = Counter2Means.find(visits[i].CounterId);
                    if (itFind != Counter2Means.end()) {
                        value << "\tMeanPrgg=" << itFind->second;
                        value << "\tCorrPrgg=" << (visits[i].Prgg - itFind->second);
                    }

                    writer->AddRow({"y" + ToString(uid), " prgg", value.Str()}, 0);
                }
                auto& dom2 = Dom2Prgg[visits[i].CounterId];
                ++dom2.first;
                dom2.second += visits[i].Prgg;
            }
        }
        SaveCounter(writer, false);
    }

    Y_SAVELOAD_JOB(Counter2Means, MeanOnly);

};

REGISTER_REDUCER(TReduceMetrikaPrgg);

class TSumPrgg : public NYT::IReducer<NYT::TTableReader<NYT::TYaMRRow>, NYT::TTableWriter<NYT::TYaMRRow>> {
    public:
    TSumPrgg() = default;

    void Do(NYT::TTableReader<NYT::TYaMRRow>* input, NYT::TTableWriter<NYT::TYaMRRow>* writer) override {
        TString uid;
        if (input->IsValid()) {
            auto& row = input->GetRow();
            uid = TString{row.Key};
        } else {
            return;
        }
        size_t count = 0;
        double sum = 0.;
        for (; input->IsValid(); input->Next()) {
             auto& row = input->GetRow();
             count += FromString<size_t>(row.SubKey);
             sum += FromString<double>(row.Value);
        }
        writer->AddRow({uid, ToString(count), ToString(sum)});
    }
};

REGISTER_REDUCER(TSumPrgg);


class TGMVReduce : public NYT::IReducer<NYT::TTableReader<NYT::TYaMRRow>, NYT::TTableWriter<NYT::TNode>>
{
    TString BDict;
    TSimpleSharedPtr<TBlockStatInfo> BlockStatInfo;

public:
    TGMVReduce() = default;
    TGMVReduce(const TString& bdict)
        : BDict(bdict)
    {}

    void Start(NYT::TTableWriter<NYT::TNode>* /*output*/) override {
        TMemoryInput blockStatInput{BDict};
        BlockStatInfo.Reset(new TBlockStatInfo(blockStatInput));
    }

    const TString& Get(const THashMap<TString, TString>& map, const TString& a, const TString& b) {
        auto it = map.find(a);
        if (it != map.end()) {
            return it->second;
        }
        return b;
    }

    void InsertNodes(const TString& uid,
                     const THashMap<TString, TVector<TWebJoinRevenueEvent>>& reqid2JoinEvents,
                     const THashMap<TString, const TRequest*>& id2req,
                     NYT::TTableWriter<NYT::TNode>* out) {
        for (auto& reqid2Join: reqid2JoinEvents) {
            double mult = 0;
            double gmv = 0;
            time_t ecom_ts = 0;
            time_t us_ts = 0;
            double dt =  0;
            TString type;
            TString dom, url, ct, seid, aeid;
            const auto it = id2req.find(reqid2Join.first);
            for (auto& webJoinRevenueEvent: reqid2Join.second) {
                if (!webJoinRevenueEvent.ClickType) {
                    continue;
                }
                mult = Max(mult, webJoinRevenueEvent.Multiplier);
                gmv += webJoinRevenueEvent.Revenue /  webJoinRevenueEvent.Multiplier;
                type = webJoinRevenueEvent.Type;
                dom = webJoinRevenueEvent.Domain;
                url = webJoinRevenueEvent.PWevent->Url;
                ct = webJoinRevenueEvent.PWevent->ClickType;
                seid = Get(webJoinRevenueEvent.Pevent->Opts, "SearchEngineID", "");
                aeid = Get(webJoinRevenueEvent.Pevent->Opts, "AdvEngineID", "");
                ecom_ts = webJoinRevenueEvent.Timestamp;
                us_ts = webJoinRevenueEvent.PWevent->Timestamp;
                dt = webJoinRevenueEvent.DwellTimeOnService;
            }
            if (gmv > 0.1) {
                TNode node;

                node["type"] = type;
                node["uid"] = uid;
                node["reqid"] = reqid2Join.first;
                node["gmv"] = gmv;
                node["dom"] = GetCanonDom(dom);
                node["url_dom"] = GetCanonDom(url);
                node["url"] = url;
                node["ct"] = ct;
                node["max_mult"] = mult;
                node["query"] = it->second->GetQuery();
                node["seid"] = seid;
                node["aeid"] = aeid;
                node["ecom_ts"] = ecom_ts;
                node["us_ts"] = us_ts;
                node["dt"] = dt;
                // TODO
                const auto& maybeParentReqid = it->second->GetWebParentReqID();
                if (!maybeParentReqid.Defined()) {
                    node["parentReqid"] = "none";
                } else {
                    node["parentReqid"] = maybeParentReqid.GetRef();
                }
                if (const TGeoInfoRequestProperties *geoProps = dynamic_cast<const TGeoInfoRequestProperties*>(it->second)) {
                    node["region"] = geoProps->GetUserRegion();
                } else {
                    node["region"] = 0;
                }

                if (auto yaProps = dynamic_cast<const TYandexRequestProperties *>(it->second)) {
                    node["tld"] = yaProps->GetServiceDomRegion();
                } else {
                    node["tld"] = "none";
                }

                if (dynamic_cast<const TImagesRequestProperties*>(it->second)) {
                    node["service"] = "images";
                    if (auto searchProps = dynamic_cast<const TSearchPropsRequestProperties*>(it->second)) {
                        auto sP = searchProps->GetSearchPropsValues();
                        auto qcv2 = "IMAGES.ImgQueryFactors.QCommercialProbV2";
                        auto qcv3 = "IMAGES.ImgQueryFactors.QCommercialProbV3";
                        if (!sP.contains(qcv2)) {
                            node["qcpv2"] = "0";
                        } else {
                            node["qcpv2"] = sP.find(qcv2)->second;
                        }

                        if (!sP.contains(qcv3)) {
                            node["qcpv3"] = "0";
                        } else {
                            node["qcpv3"] = sP.find(qcv3)->second;
                        }
                    } else {
                        node["qcpv2"] = "0";
                        node["qcpv3"] = "0";
                    }
                    // QPCPV2 V3
                    // IMAGES.ImgQueryFactors.QCommercialProbV2
                } else if (dynamic_cast<const TWebRequestProperties*>(it->second)) {
                    node["service"] = "web";

                    if (auto searchProps = dynamic_cast<const TSearchPropsRequestProperties*>(it->second)) {
                        auto sP = searchProps->GetSearchPropsValues();
                        auto qcv2 = "UPPER.ImgSerpData.QCommercialProbV2";
                        auto qcv3 = "UPPER.ImgSerpData.QCommercialProbV3";
                        if (!sP.contains(qcv2)) {
                            node["qcpv2"] = "0";
                        } else {
                            node["qcpv2"] = sP.find(qcv2)->second;
                        }

                        if (!sP.contains(qcv3)) {
                            node["qcpv3"] = "0";
                        } else {
                            node["qcpv3"] = sP.find(qcv3)->second;
                        }
                    } else {
                        node["qcpv2"] = "0";
                        node["qcpv3"] = "0";
                    }
                    // QCPV2 V
                    // UPPER.ImgSerpData.QCommercialProbV2
                } else if (dynamic_cast<const TCbirRequestProperties*>(it->second)) {
                    node["service"] = "cbir";
                    node["qcpv2"] = "0";
                    node["qcpv3"] = "0";
                } else if (dynamic_cast<const TVideoRequestProperties*>(it->second)) {
                    node["service"] = "video";
                    node["qcpv2"] = "0";
                    node["qcpv3"] = "0";
                }

                if (dynamic_cast<const TDesktopUIProperties*>(it->second)) {
                    node["ui"] = "desktop";
                } else if (dynamic_cast<const TTouchUIProperties*>(it->second)) {
                    node["ui"] = "touch";
                } else if (dynamic_cast<const TPadUIProperties*>(it->second)) {
                    node["ui"] = "pad";
                } else if (dynamic_cast<const TMobileAppUIProperties*>(it->second)) {
                    node["ui"] = "mobileapp";
                } else {
                    node["ui"] = "other";
                }
                if (auto relProps = dynamic_cast<const TRelevRequestProperties*>(it->second)) {
                    auto relev = relProps->GetRelevValues();
                    if (!relev.contains("cm2")) {
                        node["cm2"] = "0";
                    } else {
                        node["cm2"] = relev.find("cm2")->second;
                    }
                    if (!relev.contains("vcomm")) {
                        node["vcomm"] = "0";
                    } else {
                        node["vcomm"] = relev.find("vcomm")->second;
                    }
                } else {
                    node["cm2"] = "0";
                    node["vcomm"] = "0";
                }

                out->AddRow(node);
            }
        }
    }

    static NYT::TTableSchema OutputSchema() {
        return NYT::TTableSchema()
            .AddColumn(NYT::TColumnSchema().Name("qcpv2").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("qcpv3").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("cm2").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("vcomm").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("service").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("type").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("uid").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("reqid").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("query").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("region").Type(NYT::VT_INT64))
            .AddColumn(NYT::TColumnSchema().Name("tld").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("dom").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("url_dom").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("url").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("ct").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("seid").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("aeid").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("ui").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("max_mult").Type(NYT::VT_DOUBLE))
            .AddColumn(NYT::TColumnSchema().Name("parentReqid").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("ecom_ts").Type(NYT::VT_UINT64))
            .AddColumn(NYT::TColumnSchema().Name("us_ts").Type(NYT::VT_UINT64))
            .AddColumn(NYT::TColumnSchema().Name("dt").Type(NYT::VT_DOUBLE))
            .AddColumn(NYT::TColumnSchema().Name("gmv").Type(NYT::VT_DOUBLE));
    }

    void Do(NYT::TTableReader<NYT::TYaMRRow>* input, NYT::TTableWriter<NYT::TNode>* out) override {
        if (input->IsValid()) {
            const auto& row = input->GetRow();
            if (row.SubKey != " ecom") {
                return;
            }
        } else {
            return;
        }

        TLogsParserParams parserParams(*BlockStatInfo);
        parserParams.SetErrorHandler(new TDefaultLogsParserErrorHandler());
        TVector<TTimeRevenueEvent> ecomEvents;

        TLogsParser parser(parserParams);
        TString uid;
        try {
            for (; input->IsValid(); input->Next()) {
                const auto& row = input->GetRow();
                uid = TString{row.Key};
                if (row.SubKey == " ecom") {
                    ecomEvents.push_back(ParseRevenueRec(row.Value));
                } else {
                    parser.AddRec(row.Key, row.SubKey, row.Value);
                }
            }

            parser.Join();
        } catch (std::exception& ex) {
            Cerr << ex.what() << Endl;
        } catch (...) {
            Cerr << "Unknown error" << Endl;
        }
        Sort(ecomEvents, TSortTime<TTimeRevenueEvent>());
        const TRequestsContainer& cont = parser.GetRequestsContainer();
        TContWebEvents container(parser.GetRequestsContainer(), ETypeExtracted::ETE_PROFIT);
        TContMediaEvents contMediaEvents(cont, container.GetWebEvents());

        THashMap<TString, const TRequest*> id2req;

        for (const auto request: cont.GetRequests()) {
            if (!dynamic_cast<const NRA::TWebRequestProperties*>(&*request) && !dynamic_cast<const NRA::TImagesRequestProperties*>(&*request) && !dynamic_cast<const NRA::TCbirRequestProperties*>(&*request) && !dynamic_cast<const NRA::TVideoRequestProperties*>(&*request)) {
                continue;
            }

            id2req[request->GetReqID()] = &*request;
        }


        {
            TVector<TTypeTimeJoin> types = {
                {"ecom_80", 80 * 60, false},
            };
            THashMap<TString, double> unJoinRevenue;
            THashMap<TString, TVector<TWebJoinRevenueEvent>> reqid2JoinEvents;
            contMediaEvents.JoinCubeCost(ecomEvents, types, reqid2JoinEvents, unJoinRevenue);
            InsertNodes(uid, reqid2JoinEvents, id2req, out);
        }
        {
            THashMap<TString, TVector<TWebJoinRevenueEvent>> reqid2JoinEvents;
            contMediaEvents.JoinDomainTr("domtr_80", 80 * 60, ecomEvents, reqid2JoinEvents);
            InsertNodes(uid, reqid2JoinEvents, id2req, out);
        }
       /* {
            THashMap<TString, TVector<TWebJoinRevenueEvent>> reqid2JoinEvents;
            container.JoinReqidDomain(ecomEvents, reqid2JoinEvents);
            InsertNodes(uid, reqid2JoinEvents, id2req, out);
        }*/

    }
    Y_SAVELOAD_JOB(BDict);
};

REGISTER_REDUCER(TGMVReduce);


class TColReduce : public NYT::IReducer<NYT::TTableReader<NYT::TYaMRRow>, NYT::TTableWriter<NYT::TNode>>
{
    TString BDict;
    TSimpleSharedPtr<TBlockStatInfo> BlockStatInfo;
    bool IsLearnMode;

public:
    TColReduce() = default;
    TColReduce(const TString& bdict, const bool isLearnMode)
        : BDict(bdict)
        , IsLearnMode(isLearnMode)
    {}

    void Start(NYT::TTableWriter<NYT::TNode>* /*output*/) override {
        TMemoryInput blockStatInput{BDict};
        BlockStatInfo.Reset(new TBlockStatInfo(blockStatInput));
    }

    void InsertNodes(const TString& uid, const THashMap<TString, TVector<TWebJoinRevenueEvent>>& reqid2JoinEvents, NYT::TTableWriter<NYT::TNode>* out) {
        for (auto& reqid2Join: reqid2JoinEvents) {
            for (auto& webJoinRevenueEvent: reqid2Join.second) {
                TNode node;
                node["uid"] = uid;
                node["reqid"] = reqid2Join.first;
                node["type"] = webJoinRevenueEvent.Type;
                node["revenue"] = webJoinRevenueEvent.Revenue;
                node["multiplier"] = webJoinRevenueEvent.Multiplier;
                node["delta"] = webJoinRevenueEvent.Delta;
                node["event_reqid"] = webJoinRevenueEvent.Pevent->Reqid;
                node["event_domain"] = webJoinRevenueEvent.Pevent->Domain;
                node["wevent_url"] = webJoinRevenueEvent.PWevent->Url;
                node["wevent_dwell_time"] = webJoinRevenueEvent.DwellTimeOnService;
                node["wevent_timestamp"] = webJoinRevenueEvent.Timestamp;
                node["order_id"] = webJoinRevenueEvent.OrderId;

                out->AddRow(node);
            }
        }
    }

    static NYT::TTableSchema OutputSchema() {
        return NYT::TTableSchema()
            .AddColumn(NYT::TColumnSchema().Name("uid").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("reqid").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("type").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("event_reqid").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("event_domain").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("delta").Type(NYT::VT_INT64))
            .AddColumn(NYT::TColumnSchema().Name("wevent_url").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("multiplier").Type(NYT::VT_DOUBLE))
            .AddColumn(NYT::TColumnSchema().Name("revenue").Type(NYT::VT_DOUBLE))
            .AddColumn(NYT::TColumnSchema().Name("wevent_dwell_time").Type(NYT::VT_UINT64))
            .AddColumn(NYT::TColumnSchema().Name("wevent_timestamp").Type(NYT::VT_UINT64))
            .AddColumn(NYT::TColumnSchema().Name("order_id").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("ui").Type(NYT::VT_STRING))
            .AddColumn(NYT::TColumnSchema().Name("click_ecom").Type(NYT::VT_ANY))
            .AddColumn(NYT::TColumnSchema().Name("purchase_total_predict").Type(NYT::VT_DOUBLE))
            .AddColumn(NYT::TColumnSchema().Name("query_conversion_detector_predict").Type(NYT::VT_DOUBLE));
    }

    void Do(NYT::TTableReader<NYT::TYaMRRow>* input, NYT::TTableWriter<NYT::TNode>* out) override {

        TLogsParserParams parserParams(*BlockStatInfo);
        parserParams.SetErrorHandler(new TDefaultLogsParserErrorHandler());
        TVector<TTimeRevenueEvent> cubeRevenueEvents;
        TVector<TTimeRevenueEvent> prggEvents;
        TVector<TTimeRevenueEvent> corrPrggEvents;
        TVector<TTimeRevenueEvent> ecomEvents;

        TLogsParser parser(parserParams);
        TString uid;
        try {
            for (; input->IsValid(); input->Next()) {
                const auto& row = input->GetRow();
                uid = TString{row.Key};
                if (row.SubKey == " cube_cost") {
                    cubeRevenueEvents.push_back(ParseRevenueRec(row.Value));
                } else if (row.SubKey == " prgg") {
                    corrPrggEvents.push_back(ParseRevenueRec(row.Value, "CorrPrgg="));
                    prggEvents.push_back(ParseRevenueRec(row.Value));
                } else if (row.SubKey == " ecom") {
                    ecomEvents.push_back(ParseRevenueRec(row.Value));
                } else {
                    parser.AddRec(row.Key, row.SubKey, row.Value);
                    if (parser.IsFatUser()) {
                        return;
                    }
                }
            }

            parser.Join();
        } catch (std::exception& ex) {
            Cerr << ex.what() << Endl;
        } catch (...) {
            Cerr << "Unknown error" << Endl;
        }
        Sort(cubeRevenueEvents, TSortTime<TTimeRevenueEvent>());
        Sort(prggEvents, TSortTime<TTimeRevenueEvent>());
        Sort(corrPrggEvents, TSortTime<TTimeRevenueEvent>());
        Sort(ecomEvents, TSortTime<TTimeRevenueEvent>());
        THashMap<TString, TVector<TWebJoinRevenueEvent>> reqid2JoinEvents;
        {
            TContWebEvents container(parser.GetRequestsContainer(), ETypeExtracted::ETE_PROFIT);
            if (!cubeRevenueEvents.empty()) {
                reqid2JoinEvents.clear();

                // 5мин, 10мин, 20мин, 30мин, 45мин, 60мин
                TVector<TTypeTimeJoin> types = {
                    /*{"revenue_05", 5 * 60, false},
                    {"revenue_10", 10 * 60, false},
                    {"revenue_20", 20 * 60, false},*/
                    {"revenue_30", 30 * 60, false},
                    /*{"revenue_45", 45 * 60, false},
                    {"revenue_60", 60 * 60, false}*/
                };
                container.JoinCubeCost(cubeRevenueEvents, types, reqid2JoinEvents);
                InsertNodes(uid, reqid2JoinEvents, out);
                /*for (const auto& type: types) {
                    THashMap<TString, THashMap<TString, double>> revenueScores = CalcRevenueScore(type.Name, reqid2JoinEvents);
                    const NSurplus::TSurplusWebRequestsContainer webRequestsContainerV6BaobabWithRevenue(parser.GetRequestsContainer(),
                                                                                             NSurplus::TSurplusRequestsSettings(true,
                                                                                                                                false,
                                                                                                                                true,
                                                                                                                                true,
                                                                                                                                false,
                                                                                                                                false,
                                                                                                                                0.,
                                                                                                                                1.),
                                                                                             &revenueScores
                                                                                             );
                    const NSurplus::TSurplusConfigManager configV6BaobabWithRevenue(NSurplus::ESurplusConfigFile::SURPLUS_DEFAULT_CONFIG, "v6.b");
                    ExtractSessionSurplusSufixFeatures(webRequestsContainerV6BaobabWithRevenue,
                        configV6BaobabWithRevenue, SESSION_V5_SURPLUS_METRIC_NAMES, uid, type.Name, out);
                }*/

            }

            if (!prggEvents.empty()) {
                reqid2JoinEvents.clear();
                // 5мин, 10мин, 20мин, 30мин, 45мин, 60мин
                TVector<TTypeTimeJoin> types = {
                    //{"prgg_05", 5 * 60, false},
                    //{"prgg_10", 10 * 60, false},
                    //{"prgg_20", 20 * 60, false},
                    {"prgg_30", 30 * 60, false},

                    //{"prgg_45", 45 * 60, false},
                    //{"prgg_60", 60 * 60, false}
                };
                container.JoinCubeCost(prggEvents, types, reqid2JoinEvents);
                InsertNodes(uid, reqid2JoinEvents, out);
                /*
                for (const auto& type: types) {
                    THashMap<TString, THashMap<TString, double>> revenueScores = CalcRevenueScore(type.Name, reqid2JoinEvents);
                    const NSurplus::TSurplusWebRequestsContainer webRequestsContainerV6BaobabWithRevenue(parser.GetRequestsContainer(),
                                                                                             NSurplus::TSurplusRequestsSettings(true,
                                                                                                                                false,
                                                                                                                                true,
                                                                                                                                true,
                                                                                                                                false,
                                                                                                                                false,
                                                                                                                                0.,
                                                                                                                                1.),
                                                                                             &revenueScores
                                                                                             );
                    const NSurplus::TSurplusConfigManager configV6BaobabWithRevenue(NSurplus::ESurplusConfigFile::SURPLUS_DEFAULT_CONFIG, "v6.b");
                    ExtractSessionSurplusSufixFeatures(webRequestsContainerV6BaobabWithRevenue,
                        configV6BaobabWithRevenue, SESSION_V5_SURPLUS_METRIC_NAMES, uid, type.Name, out);
                }*/
            }
            if (!corrPrggEvents.empty()) {
                reqid2JoinEvents.clear();
                // 5мин, 10мин, 20мин, 30мин, 45мин, 60мин
                TVector<TTypeTimeJoin> types = {
                    //{"prgg_05", 5 * 60, false},
                    //{"prgg_10", 10 * 60, false},
                    //{"prgg_20", 20 * 60, false},
                    {"corr_prgg_30", 30 * 60, false},

                    //{"prgg_45", 45 * 60, false},
                    //{"prgg_60", 60 * 60, false}
                };
                container.JoinCubeCost(corrPrggEvents, types, reqid2JoinEvents);
                InsertNodes(uid, reqid2JoinEvents, out);
                /*
                for (const auto& type: types) {
                    THashMap<TString, THashMap<TString, double>> revenueScores = CalcRevenueScore(type.Name, reqid2JoinEvents);
                    const NSurplus::TSurplusWebRequestsContainer webRequestsContainerV6BaobabWithRevenue(parser.GetRequestsContainer(),
                                                                                             NSurplus::TSurplusRequestsSettings(true,
                                                                                                                                false,
                                                                                                                                true,
                                                                                                                                true,
                                                                                                                                false,
                                                                                                                                false,
                                                                                                                                0.,
                                                                                                                                1.),
                                                                                             &revenueScores
                                                                                             );
                    const NSurplus::TSurplusConfigManager configV6BaobabWithRevenue(NSurplus::ESurplusConfigFile::SURPLUS_DEFAULT_CONFIG, "v6.b");
                    ExtractSessionSurplusSufixFeatures(webRequestsContainerV6BaobabWithRevenue,
                        configV6BaobabWithRevenue, SESSION_V5_SURPLUS_METRIC_NAMES, uid, type.Name, out);
                }*/
            }

            if (!ecomEvents.empty() && !IsLearnMode) {
                reqid2JoinEvents.clear();
                // 5мин, 10мин, 20мин, 30мин, 45мин, 60мин
                TVector<TTypeTimeJoin> types = {
                    {"ecom_30", 30 * 60, false},
                };
                THashMap<TString, double> unJoinRevenue;
                container.JoinCubeCost(ecomEvents, types, reqid2JoinEvents, unJoinRevenue);
                InsertNodes(uid, reqid2JoinEvents, out);
            }

            if (IsLearnMode) {
                TVector<TWebEvent> learnData;
                TVector<TTypeTimeJoin> types = {
                    {"ecom_180", 180 * 60, false},
                };
                THashMap<TString, double> unJoinRevenue;
                container.JoinCubeCost(ecomEvents, types, reqid2JoinEvents, unJoinRevenue, &learnData);

                for (const auto& event : learnData) {
                    if (!event.ClickType) {
                        continue;
                    }

                    TNode node;
                    node["uid"] = uid;
                    node["reqid"] = event.Reqid;
                    node["type"] = event.ClickType;
                    const auto hosts = GetComplexCanonHosts(event.Url);
                    node["wevent_url"] = JoinSeq("\t", hosts);
                    node["wevent_dwell_time"] = event.DwellTimeOnService;
                    node["wevent_timestamp"] = event.Timestamp;

                    THashMap<TString, TNode> clickEcomData;
                    for (const auto o : event.OrderId2EcomData) {
                        THashMap<TString, TNode> orderIdEcomData;
                        for (const auto s : o.second) {
                            THashMap<TString, TNode> ecomData;
                            for (const auto r : s.second) {
                                ecomData[r.first] = TNode(r.second);
                            }
                            orderIdEcomData[s.first] = TNode(ecomData);
                        }
                        clickEcomData[o.first] = TNode(orderIdEcomData);
                    }

                    if (clickEcomData) {
                        node["click_ecom"] = TNode(clickEcomData);
                    }

                    node["purchase_total_predict"] = event.PurchaseTotalPredict;
                    node["query_conversion_detector_predict"] = event.QueryConversionDetectorPredict;

                    out->AddRow(node);
                }
            }
        }
    }

    Y_SAVELOAD_JOB(BDict, IsLearnMode);
};

REGISTER_REDUCER(TColReduce);

NYT::TNode CreateOperationSpec() {
    NYT::TNode result;
    result["data_size_per_job"] = 20ul * 1024 * 1024 * 128;
    result["tentative_pool_trees"] = NYT::TNode::CreateList().Add("cloud");
    result["pool_trees"] = NYT::TNode::CreateList().Add("physical");
    result["tentative_tree_eligibility"] = NYT::TNode()("ignore_missing_pool_trees", true);
    result["use_columnar_statistics"] = true;
    return result;
}

TStringBuf GetNormalizedHostname(const TStringBuf& val) {
    TStringBuf result = ::CutWWWPrefix(::GetOnlyHost(val));

    if (result.EndsWith('/')) {
        result.Chop(1);
    }

    return result;
}

void DoTColReduceTask(const TString server,
                      const NABPaths::TPathsToTables& paths,
                      const TString time,
                      const TVector<TString>& additionalPaths,
                      TColReduce* reducer,
                      const TString output) {
    TReduceOperationSpec mrspec;
    const uint64_t memoryLimit = 20ul * 1024 * 1024 * 1024;
    const auto opts = TOperationOptions{}.Spec(CreateOperationSpec());
    mrspec.ReducerSpec(TUserJobSpec{}.MemoryLimit(memoryLimit));

    auto client = NYT::CreateClient(server);

    NStatCollector::TMRSources sources(
        time,
        true,
        client,
        false
    );
    sources.Init("", paths, {}, "user_sessions_clean_splitted_ksv");
    sources.PrintSources();

    for (size_t i = 0; i < sources.GetCountTable(); ++i) {
        mrspec.AddInput<NYT::TYaMRRow>(sources.GetTableName(i));
    }

    for (const auto& path : additionalPaths) {
        mrspec.AddInput<NYT::TYaMRRow>(paths.GetDailyPath(path, time));
    }

    TTempTable tmp(client, "calc_cube_money_");
    mrspec.AddOutput<TNode>(NYT::TRichYPath(tmp.Name()).Schema(TColReduce::OutputSchema()));

    mrspec.ReduceBy("key");
    mrspec.SortBy({"key", "subkey"});
    client->Reduce(mrspec, reducer, opts);

    client->Sort(NYT::TSortOperationSpec()
            .AddInput(tmp.Name())
            .Output(output)
            .SortBy({"uid", "type"}));
}

const double MIN_MEAN_PRGG = 0.001;
const double MIN_VISITS = 100;

void CalcPrepareData(const TString& server,
                     const NABPaths::TPathsToTables& paths,
                     const TString& time,
                     const THashSet<TString>& tasks,
                     const TString& ojoin,
                     const TString& bdict
                     ) {
    auto client = NYT::CreateClient(server);
    TTempTable tmp(client, "calc_cube_money_");
    TTempTable tmpCounters(client, "cost_cube_");
    NYT::TNode specNode;
    //specNode["mapper"]["copy_files"] = true;
    //specNode["mapper"]["tmpfs_path"] = ".";
    specNode["mapper"]["memory_limit"] = 20ull * 1024 * 1024 * 1024;
    //specNode["mapper"]["tmpfs_size"] = 500ull * 1024 * 1024;
    specNode["data_size_per_job"] = 1024 * 1024 * 1024;
    const auto operOpts = NYT::TOperationOptions{}.Spec(specNode);
    TVector<TString> cols = {"ICookie", "StartURL", "PrggGoodModelScore", "UTCStartTime", "Duration", "CounterID", "ReqID", "Referer", "StartURLDomain", "VisitID"};

    if (tasks.contains("prgg")) {
        THashMap<ui64, double> counter2Means;
        if (!tasks.contains("mean_only_prgg")) {
            THashMap<ui64, std::pair<double, double>> counters;
            TDate date(time);
            TDate end = date - 7;
            --date;
            for (; date > end; --date) {
                auto reader = client->CreateTableReader<NYT::TYaMRRow>(paths.GetDailyPath("abt_prgg_hosts", date.ToStroka()));
                for (; reader->IsValid(); reader->Next()) {
                    const auto& row = reader->GetRow();
                    ui64 domain = FromString<ui64>(row.Key);
                    auto& el = counters[domain];
                    el.first += FromString<double>(row.SubKey);
                    el.second += FromString<double>(row.Value);
                }
            }

            for (const auto& el: counters) {
                if (el.second.second / el.second.first > MIN_MEAN_PRGG && el.second.first > MIN_VISITS) {
                    counter2Means[el.first] = el.second.second / el.second.first;
                }
            }
        }

        TMapReduceOperationSpec spec;
        {
            TRichYPath path(paths.GetDailyPath("visit-cooked-private-log", time));
            path.Columns(cols);
            spec.AddInput<TNode>(path);
        }
        {
            TRichYPath path(paths.GetDailyPath("visit-cooked-log", time));
            path.Columns(cols);
            spec.AddInput<TNode>(path);
        }
        spec.AddOutput<NYT::TYaMRRow>(tmp.Name());
        spec.AddOutput<NYT::TYaMRRow>(tmpCounters.Name());
        spec.ReduceBy({"ICookie"});
        spec.SortBy({"ICookie"});

        client->MapReduce(spec, nullptr, new TReduceMetrikaPrgg(counter2Means, tasks.contains("mean_only_prgg")), operOpts);
        if (!tasks.contains("mean_only_prgg")) {
            client->Sort(NYT::TSortOperationSpec()
                .AddInput(tmp.Name())
                .Output(paths.GetDailyPath("abt_prgg", time))
                .SortBy({"key", "subkey"}));
        }

        TMapReduceOperationSpec specCounter;
        specCounter.AddInput<NYT::TYaMRRow>(tmpCounters.Name());
        specCounter.AddOutput<NYT::TYaMRRow>(paths.GetDailyPath("abt_prgg_hosts", time));
        specCounter.ReduceBy({"key"});
        specCounter.SortBy({"key", "subkey"});
        client->MapReduce(specCounter, nullptr, new TSumPrgg(), operOpts);
    }

    if (tasks.contains("cube")) {
        auto mapSpec = NYT::TMapOperationSpec();

        mapSpec.AddInput<NYT::TNode>(paths.GetDailyPath("cost_cube", time));
        mapSpec.AddOutput<NYT::TYaMRRow>(tmp.Name());

        client->Map(
            mapSpec,
            new TMapCubeCostRequest(),
            operOpts
            );

        client->Sort(NYT::TSortOperationSpec()
                .AddInput(tmp.Name())
                .Output(paths.GetDailyPath("abt_cost_cube", time))
                .SortBy({"key", "subkey"}));
    }

    if (tasks.contains("ecom")) {
        auto mapSpec = NYT::TMapOperationSpec();

        mapSpec.AddInput<NYT::TNode>(paths.GetDailyPath("ecom_log", time));
        mapSpec.AddOutput<NYT::TYaMRRow>(tmp.Name());

        client->Map(
            mapSpec,
            new TMapEComRequest(),
            operOpts
            );

        client->Sort(NYT::TSortOperationSpec()
                .AddInput(tmp.Name())
                .Output(paths.GetDailyPath("abt_ecom", time))
                .SortBy({"key", "subkey"}));
    }

    if (tasks.contains("gmv_prior_learn")) {
        DoTColReduceTask(server,
                         paths,
                         time,
                         {"abt_ecom", "user_sessions_direct_urls"},
                         new TColReduce(bdict, true),
                         paths.GetDailyPath("abt_gmv_prior_learn", time));
    }

    if (ojoin && tasks.contains("join")) {
        DoTColReduceTask(server,
                         paths,
                         time,
                         {"abt_cost_cube", "abt_prgg", "abt_ecom"},
                         new TColReduce(bdict, false),
                         ojoin);
    }

    if (ojoin && tasks.contains("qjoin")) {
        TReduceOperationSpec mrspec;

        const uint64_t memoryLimit = 20ul * 1024 * 1024 * 1024;
        const auto opts = TOperationOptions{}.Spec(CreateOperationSpec());
        mrspec.ReducerSpec(TUserJobSpec{}.MemoryLimit(memoryLimit));

        NStatCollector::TMRSources sources(
            time,
            true,
            client,
            false
        );
        sources.Init("", paths, {}, "user_sessions_clean_splitted_ksv");
        sources.PrintSources();

        for (size_t i = 0; i < sources.GetCountTable(); ++i) {
            mrspec.AddInput<NYT::TYaMRRow>(sources.GetTableName(i));
        }
        mrspec.AddInput<NYT::TYaMRRow>(paths.GetDailyPath("user_sessions_direct_urls", time));
        mrspec.AddInput<NYT::TYaMRRow>(paths.GetDailyPath("abt_ecom", time));

        mrspec.AddOutput<TNode>(NYT::TRichYPath(tmp.Name()).Schema(TGMVReduce::OutputSchema()));

        mrspec.ReduceBy("key");
        mrspec.SortBy({"key", "subkey"});
        client->Reduce(mrspec, new TGMVReduce(bdict), opts);

        client->Sort(NYT::TSortOperationSpec()
                .AddInput(tmp.Name())
                .Output(ojoin)
                .SortBy({"uid"}));
    }

}


int main(int argc, const char *argv[]) {
    NYT::Initialize(argc, argv);

    TString server, time;
    TString yConfig;
    TString blockstat;
    TString output;

    using namespace NLastGetopt;
    TOpts opts = NLastGetopt::TOpts::Default();
    opts.AddHelpOption();
    opts.AddLongOption('s', "server").StoreResult(&server);
    opts.AddLongOption('t', "time").StoreResult(&time);
    opts.AddLongOption('y', "paths").StoreResult(&yConfig);
    opts.AddLongOption('b', "blockstat").StoreResult(&blockstat);
    opts.AddLongOption('j', "join").StoreResult(&output);
    TOptsParseResult res(&opts, argc, argv);

    THashSet<TString> tasks;
    for (const auto& task: res.GetFreeArgs()) {
        tasks.insert(task);
    }

    NABPaths::TPathsToTables paths(yConfig, true);
    if (server.empty()) {
        server = paths.GetServer();
    } else if (paths.GetServer().empty()) {
        paths.SetServer(server);
    }

    CalcPrepareData(server, paths, time, tasks, output, TFileInput(blockstat).ReadAll());

    return 0;
}
