# -*- coding: utf-8 -*-
import json
from Stemmer import Stemmer
from collections import OrderedDict
from datetime import date, timedelta
from uuid import UUID

import re
from httpgeobase import Lookup as HttpGeobaseLookuper
from langdetect import detect
from langdetect.lang_detect_exception import LangDetectException
from nile.api.v1 import extended_schema
from nile.api.v1 import with_hints
from nile.processing.record import Record
from yt.wrapper import is_inside_job

from jafar_yt.utils.translit import transliterate as util_transliterate

stemmers = {
    'en': Stemmer('en'),
    'ru': Stemmer('ru'),
}

DEFAULT_SEARCH_LANG = 'ru'


def parsed_google_play_unpacker(rows):
    """
    Data in parsed google play tables have weird format.
    There is a json string in 'value' column.
    You can find example here:
    https://yt.yandex-team.ru/hahn/#page=navigation&path=//home/extdata/mobile/google_play/latest/ru
    """
    for row in rows:
        data = json.loads(row.value)
        if isinstance(data, dict):
            # workaround for https://st.yandex-team.ru/EXTDATA-1673 bug
            if 'rating' in data and data['rating'].get('votes', 0) > 1e11:
                data['rating']['votes'] = 0
            yield Record(**data)


class EventValueMapper(object):
    def __init__(self, *extra_columns):
        self.extra_columns = extra_columns

    def __call__(self, rows):
        for row in rows.fields(*(self.extra_columns + ('EventValue',))):
            if row['EventValue']:
                try:
                    event_value = json.loads(row['EventValue'])
                except ValueError:
                    continue
                for column in self.extra_columns:
                    event_value[column] = row[column]
                yield Record.from_dict(event_value)


def mapper_wrapper(cls):
    def wrapped(*args, **kwargs):
        instance = cls(*args, **kwargs)
        return with_hints(instance.schema)(instance)
    return wrapped


@mapper_wrapper
class EventValueMapperSelective(object):
    def __init__(self, schema):
        self.schema_dict = schema
        self.schema = extended_schema(**schema)

    def __call__(self, rows):
        for row in rows:
            if not row['EventValue']:
                continue
            try:
                event_value = json.loads(row['EventValue'])
            except ValueError:
                continue

            event_values_selected = dict()
            for column in self.schema_dict:
                event_values_selected[column] = event_value.get(column)
            yield Record(row, **event_values_selected)


def get_lookuper():
    if is_inside_job():
        # Use geobase5 installed on YT instances
        import geobase5

        lookup = geobase5.Lookup('/var/cache/geobase/geodata5.bin')
    else:
        lookup = HttpGeobaseLookuper()
    return lookup


def tokenize(text):
    return re.findall(r'\w+', ensure_unicode(text).lower(), re.UNICODE)


def normalize_russian(text):
    return text.replace(u'ё', u'е').replace(u'й', u'и')


def ensure_unicode(text):
    if not isinstance(text, unicode):
        return text.decode('utf8')
    return text


def lang_detect(text):
    try:
        return detect(text)
    except LangDetectException:
        return None


def stemmize(text):
    text = ensure_unicode(text)
    tokens = OrderedDict.fromkeys(tokenize(normalize_russian(text))).keys()
    if len(tokens) == 0:
        return []

    lang = lang_detect(text)
    if lang not in stemmers:
        lang = DEFAULT_SEARCH_LANG

    stems = stemmers[lang].stemWords(tokens)
    return stems


def transliterate(text, language=None, make_stem=True):
    """ language: 'ru'/'en' or None for autodetect by text """
    text = ensure_unicode(text)
    tokens = OrderedDict.fromkeys(tokenize(normalize_russian(text))).keys()
    if len(tokens) == 0:
        return []

    if language is None:
        lang = lang_detect(text)
        if lang not in stemmers:
            lang = DEFAULT_SEARCH_LANG
        language = lang

    direction = 'lat-cyr' if language == 'en' else 'cyr-lat'
    translits = [util_transliterate(token, direction) for token in tokens]
    if make_stem:
        translits = stemmers[language].stemWords(translits)
    return translits


def parse_workspace_config(rows):
    """
    config example:

    {
        u 'action': u 'config',
        u 'height': 5,
        u 'lang': u 'ru_RU',
        u 'screens': [{
            u 'items': [{
                    u 'className': u 'com.android.camera.CameraLauncher',
                    u 'container': u 'desktop',
                    u 'packageName': u 'com.android.camera',
                    u 'title': u '\xd0\x9a\xd0\xb0\xd0\xbc\xd0\xb5\xd1\x80\xd0\xb0',
                    u 'type': u 'favorite',
                    u 'x': 0,
                    u 'y': 5
                },
                ...
            ],
            u 'screenId': 1,
            u 'screenRank': 0
            },
            ...
        ],
        u 'width': 6
    }
    """

    def parse_items(items):
        for item in items:
            if item.get('type') == 'favorite' and item.get('title') and item.get('packageName'):
                yield {
                    'package_name': item['packageName'],
                    'short_title': item['title'],
                }
            elif item.get('type') == 'folder':
                for subitem in parse_items(item.get('items', [])):
                    yield subitem

    for row in rows:
        for screen in row['screens']:
            for item in parse_items(screen.get('items', [])):
                yield Record(language=row['lang'].split('_')[0], **item)


def force_ascii(value):
    if isinstance(value, unicode):
        return value.encode('utf-8').decode('ascii', errors='ignore')
    else:
        return value


def is_valid_uuid(value):
    try:
        UUID(value)
        return True
    except (ValueError, TypeError):
        return False


def html_to_plain_text(html_markup):
    from bs4 import BeautifulSoup

    soup = BeautifulSoup(html_markup, 'html.parser', from_encoding='UTF-8')
    for br in soup.find_all("br"):
        br.replace_with("\n")
    return soup.get_text()


def day_before(n_days):
    return date.today() - timedelta(days=n_days)


def place_launched(place_dict):
    if not isinstance(place_dict, dict):
        return "", ""
    primary = place_dict.keys()[0]
    secondary = place_dict[primary]

    if secondary and isinstance(secondary, dict):
        secondary = secondary.keys()[0]

    return str(primary), str(secondary)


def unify_uuid(device_id):
    try:
        return str(UUID(device_id))
    except ValueError:
        return None
