# -*- coding: utf-8 -*-
from itertools import product
import logging
import re
import string

from passport.backend.core.conf import settings
from passport.backend.core.lazy_loader import (
    lazy_loadable,
    LazyLoader,
)
from passport.backend.utils import file
from passport.backend.utils.math import sequence_multiply


log = logging.getLogger('passport.transliterations')

META_SYMBOLS = re.compile(r'[%@$^.]')

TRANSLITERATE_AS_IS = '-.'
TRANSLITERATE_AS_IS += string.digits
TRANSLITERATE_AS_IS += string.ascii_letters

META_FIRST_SYMBOLS = '[^'


@lazy_loadable()
class Transliterations(object):
    def __init__(self):
        self.rules = self._load_rules()

    def _load_rules(self):
        rules = {}
        for lang in settings.SUGGEST_SUPPORTED_LANGUAGES:
            full_path = settings.LANG_TO_TRANSLITERATIONS_FILE[lang]
            rules[lang] = self._parse_file(full_path)
        return rules

    def _parse_file(self, full_path):
        """
        Парсим файл формата: pattern replacement replacement
        Где pattern может быть:
            - одной буквой - матчится одна буква
            - псевдо-регулярным выражением - обычная регулярка с мета-символами:
            - ^ - начало строки
            - $ - конец строки
            - @ - гласная
            - % - согласная
            - . - любая буква

        На входе:
            дж j dj dzh
            др$ der dr
            д d

        На выходе:
            {
                u'д': [
                    {
                        'replacements': [
                            {'replacement': 'j', 'factor': 1.0},
                            {'replacement': 'dj', 'factor': 0.98},
                            {'replacement': 'dzh', 'factor': 0.96},
                        ],
                        'pattern': u'дж',
                        'characters': 'дж',
                    },
                    {
                        'replacements': [
                            {'replacement': 'der', 'factor': 1.0},
                            {'replacement': 'dr', 'factor': 0.98},
                        ],
                        'pattern': u'др$',
                        'characters': 'др',
                    },
                    {
                        'replacements': [
                            {'replacement': 'd', 'factor': 1.0},
                        ],
                        'pattern': u'д',
                        'characters': 'д',
                    },
                ],
            }
        """
        rules = {}

        if not file.path_exists(full_path):
            log.warning('Transliterations file not found: %s', full_path)
            return rules

        tr_file = file.read_file(full_path, encoding='utf8').split('\n')

        for line in tr_file:
            line = line.strip()
            if not line or line.startswith(u'#'):
                continue
            replacements_data = line.split()
            replacements = []
            pattern_to_match = replacements_data.pop(0)
            for i, replacement in enumerate(replacements_data):
                replacements.append({
                    'replacement': replacement,
                    'factor': settings.REPLACEMENTS_INITIAL_FACTOR - i * settings.REPLACEMENTS_FACTOR_STEP
                })
            characters = re.sub(META_SYMBOLS, '', pattern_to_match)
            first_char = characters[0]

            rule = {'replacements': replacements, 'characters': characters}

            # Шаблон — это одна буква
            if len(pattern_to_match) == 1:
                rule.update(
                    pattern=pattern_to_match,
                )
            # Шаблон - нечто с регулярками и специальными символами из конфига
            else:
                pattern_replaced = pattern_to_match\
                    .replace(u'%', settings.CONSONANTS)\
                    .replace(u'@', settings.VOWELS)
                rule.update(
                    pattern=pattern_replaced,
                )

            rules.setdefault(first_char, []).append(rule)
        return rules

    @classmethod
    def find_replacements(cls, word, current_char_ind, rules_source):
        replacements = []
        replacement_len = 1
        char = word[current_char_ind]
        matched_rules = rules_source.get(char)
        if matched_rules is None:
            if char in TRANSLITERATE_AS_IS:
                replacements.append({
                    'replacements': [{
                        'replacement': char,
                        'factor': 1.0,
                    }],
                })
            return replacements, replacement_len
        # Подходит первый сматченный паттерн
        for rule in matched_rules:
            start_ind = current_char_ind
            if rule['pattern'][0] in META_FIRST_SYMBOLS:
                start_ind = current_char_ind - 1 if current_char_ind else current_char_ind
            matched = re.match(rule['pattern'], word[start_ind:])
            if matched:
                replacements.append({
                    'replacements': rule['replacements'],
                })
                # Прибавляем длину подошедшей последовательности букв
                replacement_len = len(rule['characters'])
                return replacements, replacement_len
        else:
            # Например, буква есть в комбинации, но ее нет отдельно (ь, ъ)
            if char in TRANSLITERATE_AS_IS:
                replacements.append({
                    'replacements': [{
                        'replacement': char,
                        'factor': 1.0,
                    }],
                })
            return replacements, replacement_len

    def collect_rules(self, word, lang=None, rules_source=None):
        """
        Функция, которая собирает все возможные правила
        транслитерирования для переданного слова
        :type word: unicode
        :type lang: basestring
        :type rules_source: dict
        :rtype: list
        """
        rules = []
        current_char_ind = 0
        word_length = len(word)
        rules_source = rules_source or self.rules.get(lang, {})
        if not rules_source:
            return rules

        while current_char_ind < word_length:
            matched_rules, replacement_len = self.find_replacements(
                word,
                current_char_ind,
                rules_source,
            )
            rules += matched_rules
            current_char_ind += replacement_len
        return rules

    @classmethod
    def apply_rules(cls, source_rules, ignore_as_is=False, threshold=None):
        """
        Внимание! Флаг ignore_as_is сработает, если в источниках транслитераций
        соблюдено правило: первая замена на саму себя.
        Как в LETTER_TO_NUMBER_REPLACEMENTS
        """
        transliterations = []
        threshold = threshold or settings.MAX_TRANSLITERATIONS - 1

        helper = []
        i = 0
        while i < len(source_rules):
            replacements = source_rules[i]['replacements']
            if not replacements:
                # пропустим, иначе product будет пустым
                source_rules.pop(i)
                continue
            helper.append(range(len(replacements)))
            i += 1

        indices_combinations = product(*helper)

        if ignore_as_is:
            next(indices_combinations, None)
        for i, comb in enumerate(indices_combinations):
            replacement_combination = [source['replacements'][comb[j]] for j, source in enumerate(source_rules)]
            res = ''.join([combination['replacement'] for combination in replacement_combination])
            transliterations.append({
                'transliteration': res,
                'factor': round(
                    sequence_multiply([combination['factor'] for combination in replacement_combination]),
                    2,
                ),
            })
            if i >= threshold:
                break
        return transliterations


def get_transliteration_rules():
    return LazyLoader.get_instance('Transliterations')
