#include <util/string/builder.h>
#include <util/string/join.h>
#include <util/string/vector.h>
#include <util/stream/file.h>

#include <wmconsole/version3/wmcutil/log.h>

#include "monitor.h"
#include "service.h"

namespace NWebmaster {

TRegion::TRegion(i32 regionCode)
    : Category(regionCode / 1000000)
    , Id(regionCode - Category * 1000000)
{
    switch(Category) {
        case 11:
        case 51:
        case 61:
            SourceId = proto::geodata::AttributeSourceType::CATALOG;
            break;
        case 21:
            SourceId = proto::geodata::AttributeSourceType::AUTOCLASSIFIER;
            break;
        case 31:
            SourceId = proto::geodata::AttributeSourceType::WEBMASTER;
            break;
        case 81:
            SourceId = proto::geodata::AttributeSourceType::DICTIONARY;
            break;
        default:
            SourceId = proto::geodata::AttributeSourceType::UNKNOWN;
    }
}

TGeoService::TGeoService(const TConfig &config)
    : Config(config)
    , Monitor(TMonitor::Instance())
{
}

bool TGeoService::Reply(THttpServer::TRequest &request) {
    if (request.Method == "/getGeoData") {
        return MethodGetGeoData(request);
    } else if (request.Method == "/ping") {
        request.Output() << "HTTP/1.1 200 Ok\r\n\r\n" << "<source>webmaster-geodata</source>";
        return true;
    } else {
        request.Die(404, "Not found");
        return true;
    }
}

static TString GetSchemedHostName(const TString &hostName) {
    TString fullHostName;
    TString::size_type pos = hostName.find("://");

    if (pos == TString::npos) {
        fullHostName = "http://";
        fullHostName += hostName;
    } else {
        fullHostName = hostName;

        if (hostName.compare(0, pos, "https") != 0) {
            LOG_WARN("unknown protocol: %s", hostName.data());
        }
    }

    return fullHostName;
}

static TString ExtractHostFromUrl(const TString &line) {
    TString url, host;
    size_t index = line.find('\t');
    bool https = false;

    if (index != TString::npos) {
        url = line.substr(0, index);
    } else {
        url = line;
    }

    url.to_lower();

    if (url.find("https://") == 0) {
        https = true;
    }

    index = url.find('/', https ? 8 : 0);

    if (index != TString::npos) {
        host = url.substr(0, index);
    } else {
        host = url;
    }

    return GetSchemedHostName(host);
}

void TGeoService::TryUpdateDataFiles() {
    TFileStat currentStatFilterTrie(Config.DATA_FILTER_TRIE.data());
    TFileStat currentStatMirrorsTrie(Config.DATA_MIRRORS_TRIE.data());
    TFileStat currentStatResourceTxt(Config.DATA_CATALOG_VISIBLE.data());

    if (!currentStatFilterTrie.IsFile()) {
        ythrow yexception() << "DataSource " << Config.DATA_FILTER_TRIE << " not found";
    }

    if (!currentStatMirrorsTrie.IsFile()) {
        ythrow yexception() << "DataSource " << Config.DATA_MIRRORS_TRIE << " not found";
    }

    if (currentStatFilterTrie.MTime != StatFilterTrie.MTime || currentStatMirrorsTrie.MTime != StatMirrorsTrie.MTime) {
        StatFilterTrie = currentStatFilterTrie;
        StatMirrorsTrie = currentStatMirrorsTrie;

        with_lock(UpdateDataMutex) {
            TAtomicSharedPtr<TMirrorsMappedTrie> tmp = MirrorsTrie;
            MirrorsTrie.Reset(new TMirrorsMappedTrie(Config.DATA_MIRRORS_TRIE.data()));
            CatFilter.Reset(GetCatFilter(Config.DATA_FILTER_TRIE.data(), true, MirrorsTrie.Get()));
        }

        if (!MirrorsTrie) {
            ythrow yexception() << "unable to load DataSource " << Config.DATA_MIRRORS_TRIE;
        }

        if (!CatFilter) {
            ythrow yexception() << "unable to load DataSource " << Config.DATA_FILTER_TRIE;
        }

        LOG_INFO("DataSource %s reloaded", Config.DATA_MIRRORS_TRIE.data());
        LOG_INFO("DataSource %s reloaded", Config.DATA_FILTER_TRIE.data());
    }

    if (currentStatResourceTxt.MTime != StatResourceTxt.MTime) {
        StatResourceTxt = currentStatResourceTxt;

        TFileInput input(Config.DATA_CATALOG_VISIBLE);
        TString line;

        TSet<TString> hosts;

        while (input.ReadLine(line)) {
            TString host(ExtractHostFromUrl(line));
            hosts.insert(host);
        }

        VisibleHosts.swap(hosts);

        LOG_INFO("DataSource %s reloaded (%zu hosts)", Config.DATA_CATALOG_VISIBLE.data(), VisibleHosts.size());
    }
}

bool TGeoService::IsVisibleHost(const TString &host) {
    return VisibleHosts.find(host) != VisibleHosts.end();
}

void TGeoService::LoadGeoDataForHost(const TString &_host, proto::geodata::HostInfo *hostInfo) {
    TString host = GetSchemedHostName(_host);

    hostInfo->set_visible(IsVisibleHost(host));

    TAtomicSharedPtr<ICatFilter> catFilter(CatFilter);

    const TCatAttrsPtr attrs = catFilter->Find(host);

    if (!attrs) {
        LOG_INFO("host %s not found in the %s", host.data(), Config.DATA_CATALOG_VISIBLE.data());
        return;
    }

    TDeque<TString> regionsLog;

    for (TCatAttrs::const_iterator it = attrs->begin(); it != attrs->end(); ++it) {
        TRegion region(*it);

        if (region.SourceId == proto::geodata::AttributeSourceType::UNKNOWN) {
            continue;
        }

        proto::geodata::AttributeInfo *attributeInfo = hostInfo->add_attributes();

        attributeInfo->set_region_id(region.Id);
        attributeInfo->set_source_type(region.SourceId);
        regionsLog.push_back(TStringBuilder() << int(region.SourceId) << ":" << int(region.Id));
    }

    LOG_INFO("for host %s successfully loaded %u regions [%s]", host.data(), hostInfo->attributes_size(), JoinSeq(" ", regionsLog).data());
}

bool TGeoService::MethodGetGeoData(THttpServer::TRequest &request) try {
    LOG_INFO("requested %s - [%s]", request.Method.data(), request.GetRemoteAddr().data());

    TString content = request.Input().ReadAll();

    try {
        TryUpdateDataFiles();
    } catch(yexception &e) {
        Monitor.FailedRequests();
        const TString error = TStringBuilder() << "unable to reload datafiles " << e.what();
        LOG_ERROR("%s", error.data());
        request.Die(500, error);
        return true;
    }

    TVector<TString> hosts = SplitString(content, ";");

    if (hosts.empty()) {
        Monitor.SuccessRequests();
        request.Die(200, "");
        return true;
    }

    proto::geodata::GeoDataMessage geoData;
    geoData.mutable_application_info()->set_name(TConfig::MSG_APPLICATION_NAME);
    geoData.mutable_application_info()->set_version(TConfig::MSG_APPLICATION_VERSION);
    geoData.mutable_message_info()->set_data_timestamp(Now().MilliSeconds());

    for (const TString &host : hosts) {
        proto::geodata::HostInfo *hostInfo = geoData.add_hosts();
        *hostInfo->mutable_hostname() = host;

        try {
            LoadGeoDataForHost(host, hostInfo);
        } catch (const yexception &e) {
            LOG_WARN("unable to complete attribute extraction for host %s", host.data());
            *hostInfo->mutable_error() = e.what();
        }
    }

    Monitor.ProcessedHosts(hosts.size());

    TString data;

    try {
        Y_PROTOBUF_SUPPRESS_NODISCARD geoData.SerializeToString(&data);
        request.Output() << "HTTP/1.1 200 Ok\r\n\r\n" << data;
        Monitor.SuccessRequests();
        LOG_INFO("sent protobuf reply with %u hosts in %s", geoData.hosts_size(), request.GetTimerString().data());
    } catch(const yexception &e) {
        LOG_ERROR("unable to complete protobuf answer: %s", e.what());
        request.Die(500, e.what());
        Monitor.FailedRequests();
    }

    return true;
} catch (yexception &e) {
    LOG_ERROR("unable to process request: %s", e.what());
    return true;
}

} //namespace NWebmaster
