#include "names.h"
#include "misc.h"

#include <maps/libs/locale/include/convert.h>
#include <yandex/maps/wiki/common/icu_helpers.h>

#include <boost/locale.hpp>
#include <boost/locale/boundary/index.hpp>

#include <memory>
#include <vector>

namespace maps {
namespace wiki {
namespace validator {
namespace utils {

namespace {

const std::locale& LOCALE_RU()
{
    static const auto locale = boost::locale::generator()("ru_RU.UTF-8");
    return locale;
}

} // namespace

const icu::RegexPattern Regexps::BEGINS_WITH_SPACE_REGEX =
    compilePattern(R"(^\s)");
const icu::RegexPattern Regexps::DOUBLE_SPACE_REGEX =
    compilePattern(R"(\s\s)");
const icu::RegexPattern Regexps::NONCYRILLIC_LETTER_REGEX =
    compilePattern(R"([\p{Letter}--\p{script=cyrillic}])");

size_t countCodePoints(const std::string& utf8string)
{
    return std::count_if(
        utf8string.begin(),
        utf8string.end(),
        [](char byte) { return (byte & 0xC0) != 0x80; });
}

/// @brief Checks if a utf8 string contains only upper case characters
/// and non-alphabetic characters
/// @return true if there is at least one alphabetic character
/// and all alphabetic characters are uppercase
bool isUpperCase(const std::string& utf8string)
{
    icu::UnicodeString unicodeString = icu::UnicodeString::fromUTF8(utf8string);
    auto wstr = common::icuToUtf32(unicodeString);

    bool hasLetter = false;
    for (auto wch : wstr) {
        if (::u_islower(wch))
            return false;
        if (::u_isupper(wch))
            hasLetter = true;
    }
    return hasLetter;
}

std::vector<std::string> splitIntoWords(const std::string& line)
{
    namespace bound = boost::locale::boundary;
    std::vector<std::string> words;

    bound::ssegment_index map(bound::word, line.begin(), line.end(), LOCALE_RU());
    map.rule(bound::word_any);
    words.insert(words.end(), map.begin(), map.end());
    return words;
}

bool isNameEmpty(const std::string& name)
{
    icu::UnicodeString unicodeString = icu::UnicodeString::fromUTF8(name);
    auto str = common::icuToUtf32(unicodeString);
    return std::none_of(str.begin(), str.end(), [](uint32_t c) { return u_isgraph(c); });
}

icu::RegexPattern compilePattern(const std::string& regex, uint32_t flags)
{
    UErrorCode status = U_ZERO_ERROR;
    std::unique_ptr<icu::RegexPattern> pattern(
            icu::RegexPattern::compile(
                    icu::UnicodeString::fromUTF8(regex), flags, status));
    REQUIRE(U_SUCCESS(status), "bad regex: " << regex);
    return *pattern;
}

bool matchesPattern(const std::string& string, const icu::RegexPattern& pattern)
{
    UErrorCode status = U_ZERO_ERROR;
    icu::UnicodeString unicodeString = icu::UnicodeString::fromUTF8(string);
    std::unique_ptr<icu::RegexMatcher> matcher(pattern.matcher(unicodeString, status));
    bool result = matcher->find();
    REQUIRE(U_SUCCESS(status),
            "error while matching string: '" << string << "': " << u_errorName(status));
    return result;
}

bool isValidLang(const std::string& lang)
{
    try {
        locale::to<locale::Language>(lang);
    } catch (const locale::LocaleParsingError&) {
        return false;
    }
    return true;
}

} // namespace utils
} // namespace validator
} // namespace wiki
} // namespace maps
