#include <library/cpp/archive/yarchive.h>
#include <util/generic/strbuf.h>
#include <util/generic/string.h>
#include <util/generic/yexception.h>
#include <util/memory/blob.h>

#include "owners.h"
#include "url.h"

namespace NWebmaster {

TAnsipamOwnerCanonizer::TAnsipamOwnerCanonizer() {
    TVector<TString> areas;

    static const unsigned char URLRULES_DATA[] = {
        #include "urlrules.inc"
    };

    TArchiveReader archive(TBlob::NoCopy(URLRULES_DATA, Y_ARRAY_SIZE(URLRULES_DATA)));

    for (ui32 i = 0; i < archive.Count(); ++i) {
        TString key = archive.KeyByIndex(i);
        TBlob blob = archive.ObjectBlobByKey(key);
        TMemoryInput mi(blob.Data(), blob.Length());
        for (TString line; mi.ReadLine(line);) {
            areas.push_back(line);
        }
    }

    Extractor.Reset(new TOwnerExtractor(areas));
    OwnerForms.Reset(new TOwnerForms(*Extractor));
}

bool TAnsipamOwnerCanonizer::GetOwner(const TString &url, TString &owner, TString &error, bool hostOnly) {
    try {
        Parser.ProcessUrl(url);
        OwnerForms->ProcessUrl(Parser);
        owner = OwnerForms->NormalizedOwner();
    } catch (THostIdnaConvError& e) {
        error = TString("bad url ") + Parser.OriginalUrl() + " : " + e.what();
        return false;
    } catch (TUrlDescendingParserError& e) {
        error = TString("bad url ") + Parser.OriginalUrl() + " : " + e.what();
        return false;
    }

    size_t del = owner.find('/');
    if (hostOnly && del != TString::npos) {
        owner = owner.substr(0, del);
    }

    return true;
}

namespace NUtils {

TStringBuf GetMetrikaOwner(const TStringBuf &hostname) {
    const static THashSet<TString> SPEC_2LD = {
        "com.ru", "com.ua", "com.tr", "net.ru", "org.ru", "com.by", "org.ua", "co.ua", "net.ua", "co.il", "com.kz", "co.ru", "co.uk", "com.ge", "com.kg",
        "com.pl", "org.by", "net.tr", "com.cy", "org.il", "org.tr", "com.br", "co.ke", "co.in", "com.ee", "org.cn", "net.kg", "co.th", "co.kr", "com.ni",
        "com.am", "co.id", "com.es", "org.kg", "co.za", "org.nz", "com.eg", "net.by", "com.md", "co.jp", "com.ar", "com.de", "com.sg", "com.mx", "com.au",
        "org.pl", "org.uk", "org.kz", "com.co", "com.se", "co.no", "com.cn", "com.tm",
    };

    const TStringBuf domain = RemoveScheme(hostname);

    if (domain.empty()) {
        return domain;
    }

    TDeque<size_t> dels;
    for (size_t i = 0; i < domain.size(); i++) {
        if (domain[i] == '.') {
            dels.push_front(i);
        }
    }

    if (dels.size() < 2) {
        return domain;
    }

    const TStringBuf ld2Domain = domain.SubStr(dels[1] + 1);

    if (SPEC_2LD.find(ld2Domain) != SPEC_2LD.end()) {
        size_t del = 0;
        if (dels.size() > 2) {
            del = dels[2] + 1;
        }
        return domain.SubStr(del);
    }

    return domain.SubStr(dels[1] + 1);
}

} //namespace NUtils

} //namespace NWebmaster
