#include "address_normalizer.h"
#include "address.h"

#include <maps/search/geocoder/segmenter/interface/splitter.h>
#include <maps/libs/stringutils/include/case.h>

#include <unicode/unistr.h>

namespace maps::wiki::schedule_feedback {

using segmenter::AddressSplit;
using segmenter::AddressSplitter;
using search::common::Kind;

namespace {

std::vector<std::string> streetMarkers = {
    // TODO: add support for other countries, or
    // solve this problem more elegantly

    "переулок", "пер", "пр-к", "пер-к", "проезд", "пр-д," "пр-зд",
    "пр", "тракт", "тр", "тр-т", "линия", "линии", "проулок",
    "тупик", "туп", "просек", "просека", "улица", "ул", "въезд",     /*rus*/
    "съезд", "подъем", "спуск", "бульвар", "бульв", "бул", "б-р",
    "проспект", "просп", "пр-т", "пр-кт", "п-т", "прт", "площадь",
    "пл-дь", "набережная", "наб", "шоссе", "ш", "аллея", "ал",

    "вуліца", "дарога", "завулак", "кальцо", "набярэжная", "праезд",   /*bel*/
    "праспект", "тупік", "шаша", "плошча",

    "шосе", "набережна", "лінія", "площа", "майдан", "алея", "узвіз",    /*ukr*/
    "вулиця", "вул", "проїзд", "переїзд", "провулок", "пров", "завулок",

    "asfaltı", "asfalti", "asf", "sokak", "sokağı", "sokagi",
    "sok", "sk", "cadde", "caddesi", "cad", "cd", "geçiş", "gecis",
    "geçişi", "gecisi", "geçit", "gecit", "geçidi", "gecidi", "ara sokak",
    "ara sokağı", "ara sokagi", "arasokağ", "arasokagi", "arasokak",
    "ara sok", "ara sk", "çıkmaz sokak", "çıkmaz", "cikmaz", "çıkmazı",
    "cikmazi", "cıkmazı", "çıkmazı sokak", "çk", "otoyol", "otoyolu",       /*tr*/
    "otoban", "otobanı", "otobani", "anayol", "anayolu", "asfaltı cad",
    "şose", "sose", "kıyı bölgesi", "kiyi bolgesi", "meydan", "meydanı",
    "meydani", "ağaçlıklı yol", "agaclikli yol", "aralık", "ara", "aralik",
    "aralığı", "araligi", "ar", "park yolu", "bulvar", "bulvarı",
    "bulvari", "bulv", "bul", "blv", "büyük yol", "buyuk yol", "iniş",
    "inis", "orman yolu", "giriş yeri", "giris yeri", "yol", "yolu",
    "yollar", "yolları", "yollari", "yokuş", "yokus", "yokuşu", "yokusu",
    "kavşağı", "kavsagi", "kavşak", "kavsak", "yanyol", "karayolu",
    "karayol", "kara yol", "kara yolu"
};

// This regex pattern signalizes if considered string
// contains this pattern as a word, not as a substring.
// '\b' stands for word boundary
//
icu::UnicodeString createMatchWordRegex(const std::string& word)
{
    return icu::UnicodeString::fromUTF8("\\b" + word + "\\b");
}

bool containsStreetMarker(
    const std::string& strUTF8,
    const std::vector<std::unique_ptr<icu::RegexMatcher>>& streetMarkersMatchers)
{
    icu::UnicodeString str = icu::UnicodeString::fromUTF8(strUTF8);

    for (auto& matcher : streetMarkersMatchers) {
        matcher->reset(str);

        UErrorCode status = U_ZERO_ERROR;
        bool contains = matcher->find(status);
        if (U_FAILURE(status)) {
            throw RuntimeError() << "Regex matcher 'find' internal error";
        }

        if (contains) {
            return true;
        }
    }

    return false;
}

std::string addDefaultStreetMarker(const std::string& str)
{
    return "улица " + str;
}

AddressSplit normalizeAddressToAddressParts(
    const std::string& address,
    const AddressSplitter& splitter)
{
    try {
        const auto splits = splitter.splitAddress(address);
        REQUIRE(!splits.empty(), "Splits are empty");
        return splits.front();
    } catch (...) {
        throw RuntimeError("Address splits convertion segmenter error");
    }
}

void assertCollectionUnique(
    const std::vector<std::string>& collection,
    const std::string& entityName)
{
    if (collection.size() != 1) {
        throw RuntimeError()
            << entityName << " size is " << collection.size()
            << ", but should be 1";
    }
}

std::vector<std::string> extractOneKindNames(
    const AddressSplit& addressSplit,
    Kind kind)
{
    std::vector<std::string> names;
    for (const auto& part : addressSplit) {
        if (part.kinds() & kind) {
            names.push_back(part.name());
        }
    }
    return names;
}

}

AddressNormalizer::AddressNormalizer(const std::string& grammarPath) :
    splitter_(segmenter::createGrammarSplitter(grammarPath))
{
    REQUIRE(splitter_, "Failed to create address splitter");
    for (const auto& marker : streetMarkers) {
        auto markerRegex = createMatchWordRegex(marker);

        UErrorCode status = U_ZERO_ERROR;
        auto matcher = std::make_unique<icu::RegexMatcher>(markerRegex, 0, status);
        if (U_FAILURE(status)) {
            throw RuntimeError() << "Regex matcher initialization failed";
        }

        streetMarkersMatchers_.push_back(std::move(matcher));
    }
}

// When we have address, separated on street and house,
// we can perform normalization better if apply normalization
// separately for street and house.
//
// Additional problem that we solve - when user didn't mention
// any street marker in 'street' field. In this case segmenter
// can't do anything, that's why we should detect such cases and
// add default street marker (it doesn't matter which one)
//

std::optional<std::string>
AddressNormalizer::normalizeStreet(const std::string& street) const try
{
    auto corrected = stringutils::lowercase(street);
    if (!containsStreetMarker(corrected, streetMarkersMatchers_)) {
        corrected = addDefaultStreetMarker(corrected);
    }

    auto addressParts = normalizeAddressToAddressParts(corrected, *splitter_);
    auto streets = extractOneKindNames(addressParts, Kind::Street);
    assertCollectionUnique(streets, "Street");
    return streets.front();

} catch (RuntimeError&) {
    return std::nullopt;
}

std::optional<std::string>
AddressNormalizer::normalizeHouse(const std::string& house) const try
{
    auto corrected = stringutils::lowercase(house);

    auto addressParts = normalizeAddressToAddressParts(corrected, *splitter_);
    auto houses = extractOneKindNames(addressParts, Kind::House);
    assertCollectionUnique(houses, "House");
    return houses.front();

} catch (RuntimeError&) {
    return std::nullopt;
}

std::optional<NormalizedAddress>
AddressNormalizer::normalize(const RawAddress& address) const
{
    auto street = normalizeStreet(address.street);
    auto house  = normalizeHouse(address.house);
    if (!(street && house)) {
        return std::nullopt;
    }
    return NormalizedAddress{*street, *house};
}

std::optional<NormalizedAddress>
AddressNormalizer::normalize(const std::string& address) const try
{
    auto corrected = stringutils::lowercase(address);
    if (!containsStreetMarker(corrected, streetMarkersMatchers_)) {
        corrected = addDefaultStreetMarker(corrected);
    }

    auto addressParts = normalizeAddressToAddressParts(corrected, *splitter_);

    auto streets = extractOneKindNames(addressParts, Kind::Street);
    assertCollectionUnique(streets, "Street");

    auto houses  = extractOneKindNames(addressParts, Kind::House);
    assertCollectionUnique(houses, "House");

    return NormalizedAddress{streets.front(), houses.front()};

} catch (RuntimeError&) {
    return std::nullopt;
}

} // namespace maps::wiki::schedule_feedback
