# -*- coding: utf-8 -*-
from copy import deepcopy
import logging
import re

from passport.backend.core.conf import settings
from passport.backend.core.lazy_loader import (
    lazy_loadable,
    LazyLoader,
)
from passport.backend.utils import file


log = logging.getLogger('passport.first_names')


@lazy_loadable()
class FirstNames(object):

    def __init__(self):
        self.names = self._load_names()

    def _load_names(self):
        female_names = {}
        male_names = {}

        for lang in settings.SUGGEST_SUPPORTED_LANGUAGES:
            female_full_path = settings.LANG_TO_FEMALE_NAMES_FILE[lang]
            female_names[lang] = self._parse_file(female_full_path)

            male_full_path = settings.LANG_TO_MALE_NAMES_FILE[lang]
            male_names[lang] = self._parse_file(male_full_path)

        return {
            settings.FEMALE_GENDER: female_names,
            settings.MALE_GENDER: male_names,
            settings.UNDEFINED_GENDER: {},
        }

    def _parse_file(self, full_path):
        """
        Парсим файл имен с синонимами и сокращениями формата:
        <name>,<space-delimited native synonym list>[,<space-delimited english synonym list>][,<space-delimited chunk list>]
        """
        result = {}

        if not file.path_exists(full_path):
            log.warning('FirstNames file not found: %s', full_path)
            return result

        names_file = file.read_file(full_path, encoding='utf8').split('\n')

        for line in names_file:
            line = line.strip()
            if line.startswith(u'#') or not line:
                continue
            blocks = line.split(u',')
            original_name = name = blocks[0]
            # Заменим и добавим как синоним
            name = name.replace(u'ё', u'е')

            result.setdefault(name, {
                'chunks': [],
                'synonyms': set(),
                'synonyms_en': set(),
            })

            if original_name != name:
                result[name]['synonyms'].add(original_name)

            len_blocks = len(blocks)
            current_synonyms_en = set()

            if len_blocks == 1:
                continue
            if len_blocks > 1:
                # Вес синонимов одинаков
                current_synonyms = set(blocks[1].strip().split())
                result[name]['synonyms'].update(current_synonyms)
            if len_blocks > 2:
                current_synonyms_en = set(blocks[2].strip().split())
                result[name]['synonyms_en'].update(current_synonyms_en)
            if len_blocks == 4:
                result[name]['chunks'].extend(list(blocks[3].strip().split()))

            # Синонимы тоже должны быть в списке имен,
            # при этом синонимы разных имен могут совпадать.
            for synonym in current_synonyms:
                result.setdefault(
                    synonym,
                    {
                        'chunks': [],
                        'synonyms': set(),
                        'synonyms_en': deepcopy(current_synonyms_en),
                    },
                )
                result[synonym]['synonyms'].update(deepcopy(current_synonyms))
                result[synonym]['synonyms'].add(name)
                result[synonym]['synonyms'].discard(synonym)

            for synonym in current_synonyms_en:
                result.setdefault(
                    synonym,
                    {
                        'chunks': [],
                        'synonyms': set(),
                        'synonyms_en': set(),
                    },
                )
                result[synonym]['synonyms'].update((result[name]['synonyms']))
                result[synonym]['synonyms'].add(name)
                result[synonym]['synonyms_en'].update(deepcopy(current_synonyms_en))
                result[synonym]['synonyms_en'].discard(synonym)
        return result

    def get_name_synonyms(self, gender, lang, name):
        return self.names.get(gender, {}).get(lang, {}).get(name, {}).get('synonyms', set())

    def get_name_synonyms_en(self, gender, lang, name):
        return self.names.get(gender, {}).get(lang, {}).get(name, {}).get('synonyms_en', set())

    def get_name_chunks(self, gender, lang, name):
        return self.names.get(gender, {}).get(lang, {}).get(name, {}).get('chunks', [])

    def get_auto_chunks(self, gender, name):
        """
        Список нужен, чтобы сохранить порядок,
        по которому потом будут вычисляться веса
        """
        chunks = []
        chunks_set = set()
        for i, auto_chunk_re in enumerate(settings.GENDER_TO_CHUNKS_REGEXP.get(gender, [])):
            matched_chunk = re.match(auto_chunk_re, name)
            if not matched_chunk:
                continue
            chunk = matched_chunk.group(1)
            if chunk == name or chunk in chunks_set:
                continue
            chunks_set.add(chunk)
            chunks.append(chunk)
        return chunks


def get_first_names():
    return LazyLoader.get_instance('FirstNames')
