#include "database.h"

#include <crypta/lib/native/time/scope_timer.h>
#include <crypta/lib/native/yt/utils/helpers.h>
#include <crypta/siberia/bin/custom_audience/suggester/bin/service/lib/proto/host.pb.h>
#include <crypta/siberia/bin/custom_audience/suggester/bin/service/lib/proto/segment.pb.h>

#include <mapreduce/yt/common/config.h>
#include <mapreduce/yt/interface/client.h>
#include <mapreduce/yt/library/parallel_io/parallel_reader.h>

#include <util/charset/utf8.h>

using namespace NCrypta::NSiberia::NCustomAudience::NSuggester;
using namespace NCrypta::NLog;

namespace {
    TDatabaseState::TSegments ReadSegments(NYT::IClientPtr client, const NYT::TRichYPath& srcTable, TLogPtr log, TStats& stats) {
        NCrypta::TScopeTimer scopeTimer(stats.Count, "update.segments.timing");
        log->info("[Segments] Start to download table '{}'", srcTable.Path_);

        const ui64 rowsCount = NCrypta::GetAttribute(client, srcTable.Path_, "row_count").AsInt64();

        auto reader = NYT::CreateParallelTableReader<NRawData::TSegment>(
            client, srcTable, NYT::TParallelTableReaderOptions().Ordered(false)
        );

        log->info("[Segments] Records total count: {}", rowsCount);

        TVector<TDatabaseState::TSegmentItem> results;
        results.reserve(rowsCount);

        for (size_t i = 0; reader->IsValid(); reader->Next(), ++i) {
            const auto& segment = reader->GetRow();

            TDatabaseState::TSegmentItem item;

            for (const auto& tankerInfo : segment.GetTankerNames()) {
                item.LangInfo[tankerInfo.GetLanguage()] = TDatabaseState::TSegmentItem::TLangSpecificInfo(tankerInfo.GetName(), tankerInfo.GetDescription());
            }

            for (const auto& exp : segment.GetExports()) {
                auto* newExport = item.Exports.Add();
                newExport->SetSegmentId(exp.GetSegmentId());
                newExport->SetKeywordId(exp.GetKeywordId());
            }

            item.Type = segment.GetType();
            for (const auto& campaignType: segment.GetCampaignTypes()) {
                item.CampaignTypes.emplace(static_cast<NRawData::TExport::ECampaignType>(campaignType));
            }

            results.push_back(item);

            if (i != 0 && i % 100 == 0) {
                log->info("[Segments] Records loaded: {}", i);
            }
        }

        stats.Count->Add("count.segments", rowsCount);
        log->info("[Segments] Successfully read", srcTable.Path_);

        return results;
    }

    THolder<TDatabaseState::THosts> ReadHosts(NYT::IClientPtr client, const NYT::TRichYPath& srcTable, TLogPtr log, TStats& stats) {
        NCrypta::TScopeTimer scopeTimer(stats.Count, "update.hosts.timing");
        auto result = MakeHolder<TDatabaseState::THosts>();

        log->info("[Hosts] Start to download table '{}'", srcTable.Path_);

        const ui64 rowsCount = NCrypta::GetAttribute(client, srcTable.Path_, "row_count").AsInt64();

        auto reader = NYT::CreateParallelTableReader<NRawData::THost>(
            client, srcTable, NYT::TParallelTableReaderOptions().Ordered(false)
        );

        log->info("[Hosts] Records total count: {}", rowsCount);

        TCompactTrieBuilder<char, TDatabaseState::THostData> builder;

        for (size_t i = 0; reader->IsValid(); reader->Next(), ++i) {
            const auto& host = reader->GetRow();
            const auto& text = host.GetHost();
            // TODO(CRYPTA-15560) remove when such sites are removed
            if (text.EndsWith("https")) {
                continue;
            }
            builder.Add(text.data(), text.size(), std::make_pair(host.GetCount(), host.GetHostId()));

            if (i != 0 && i % 100000 == 0) {
                log->info("[Hosts] Records loaded: {}", i);
            }

            result->ById.emplace(host.GetHostId(), text);
        }

        size_t len = builder.Save(result->TrieStream);

        result->Trie = TDatabaseState::TTrie(result->TrieStream.Buffer().Data(), len);

        stats.Count->Add("count.hosts", rowsCount);

        log->info("[Hosts] Successfully read", srcTable.Path_);

        return result;
    }

    TDatabaseState::TApps ReadApps(NYT::IClientPtr client, const NYT::TRichYPath& srcTable, TLogPtr log, TStats& stats) {
        NCrypta::TScopeTimer scopeTimer(stats.Count, "update.apps.timing");
        TDatabaseState::TApps result;

        log->info("[Apps] Start to download table '{}'", srcTable.Path_);

        const ui64 rowsCount = NCrypta::GetAttribute(client, srcTable.Path_, "row_count").AsInt64();

        auto reader = NYT::CreateParallelTableReader<NRawData::TApp>(
            client, srcTable, NYT::TParallelTableReaderOptions().Ordered(false)
        );

        log->info("[Apps] Records total count: {}", rowsCount);
        result.SortedByCount.reserve(rowsCount);

        for (size_t i = 0; reader->IsValid(); reader->Next(), ++i) {
            auto app = reader->GetRow();
            const auto& bundleId = app.GetBundleId();

            auto normalizedBundleId = ToLowerUTF8(bundleId);
            THashMap<TString, TString> normalizedTitles;
            for (const auto& [lang, title] : app.GetTitles()) {
                normalizedTitles[lang] = ToLowerUTF8(title);
            }

            result.SortedByCount.push_back(TDatabaseState::TApps::TAppInfo{
                .Raw=std::move(app),
                .NormalizedBundleId=ToLowerUTF8(bundleId),
                .NormalizedTitles=std::move(normalizedTitles),
            });

            if (i != 0 && i % 100000 == 0) {
                log->info("[Apps] Records loaded: {}", i);
            }
        }

        SortBy(result.SortedByCount.rbegin(), result.SortedByCount.rend(), [](const auto& appInfo) {
            return appInfo.Raw.GetCount();
        });

        for (const auto& appInfo : result.SortedByCount) {
            result.ById.emplace(appInfo.Raw.GetAppId(), &appInfo);
        }

        stats.Count->Add("count.apps", rowsCount);

        log->info("[Apps] Successfully read", srcTable.Path_);

        return result;
    }
}

TDatabase::TDatabase(const TDbConfig& config, const TStats::TSettings& statsSettings)
    : TPeriodicTask(
        TDuration::Seconds(config.GetUpdatePeriodSeconds()),
        [this](){ Read(); },
        "db-reader",
        true
    )
    , Config(config)
    , Log(NLog::GetLog("database"))
    , Stats(TaggedSingleton<TStats, decltype(*this)>("database", statsSettings))
{
}

void TDatabase::Read() {
    Log->info("Reading database...");

    for (const auto& ytConfig: Config.GetYt()) {
        const auto& proxy = ytConfig.GetProxy();
        try {
            Log->info("Reading from {}", proxy);
            auto client = NYT::CreateClient(proxy);

            auto newState = MakeIntrusive<TDatabaseState>(
                ReadSegments(client, Config.GetSegmentsTable(), Log, Stats),
                ReadHosts(client, Config.GetHostsTable(), Log, Stats),
                ReadApps(client, Config.GetAppsTable(), Log, Stats)
            );

            State.AtomicStore(newState);
            Stats.Count->Add("update");
            Log->info("Finished");
            return;
        } catch (const std::exception& e) {
            Log->error("Failed to load base from {}: {}", proxy, e.what());
        }
    }

    Stats.Count->Add("update.failed");
    ythrow yexception() << "Failed to load tables from all clusters";
}

bool TDatabase::IsReady() const {
    return State.AtomicLoad() != nullptr;
}

TDatabaseStatePtr TDatabase::GetState() const {
    return State.AtomicLoad();
};
