
import time
import re
import uuid
import logging
from functools import partial
from multiprocessing.dummy import Pool as ThreadPool

import requests
from django.utils.encoding import smart_str

logger = logging.getLogger(__name__)


BOUNDARY_MARKER = '<!-- boundary %s -->' % uuid.uuid1()


class Translator:
    _escape_patterns = [
        re.compile(r'<[^>]+>'),  # escape html from translating
    ]
    APPEND = 'append'
    INSERT = 'insert'

    def __call__(self, text, lang='en-ru', escape=True, **query_params):
        """
        Return translated strings from `strings_list`. Send HTTP request
        to Yandex Translate service.
        """
        if isinstance(text, list):
            strings_list = (smart_str(u) for u in text)
            concatenation = BOUNDARY_MARKER.join(strings_list)
            translation = translate(
                text=concatenation, lang=lang, escape_patterns=self._escape_patterns if escape else None, **query_params
            )
            return translation.split(BOUNDARY_MARKER)

        return translate(
            text=text, lang=lang, escape_patterns=self._escape_patterns if escape else None, **query_params
        )

    def _add_to_patterns(self, pattern, method=None):
        if method is None:
            method = self.APPEND
        if method == self.INSERT:
            self._escape_patterns.insert(0, pattern)
        elif method == self.APPEND:
            self._escape_patterns.append(pattern)

    def prepend_escape_pattern(self, rgx):
        if isinstance(rgx, (list, tuple)):
            for item in reversed(rgx):
                self._add_to_patterns(_pattern(item), method=self.INSERT)
        else:
            self._add_to_patterns(_pattern(rgx), method=self.INSERT)
        return self

    def append_escape_pattern(self, rgx):
        if isinstance(rgx, (list, tuple)):
            for item in rgx:
                self._add_to_patterns(_pattern(item), method=self.APPEND)
        else:
            self._add_to_patterns(_pattern(rgx), method=self.APPEND)
        return self


CHUNK_SIZE = 2000
TRANSLATE_URL = 'https://translate.yandex.net/api/v1/tr.json/translate'
QUERY_THREADS_COUNT = 16
# 'https://ajax.googleapis.com/ajax/services/language/translate?' +
# 'v=1.0&q=Hello,%20my%20friend!&langpair=en%7Ces&key=INSERT-YOUR-KEY&userip=INSERT-USER-IP')

languages = (
    'ru-en',
    'en-ru',
    'ru-uk',
    'uk-ru',
    'ru-tr',
    'tr-ru',
    'pl-ru',
    'ru-pl',
    'de-ru',
    'ru-de',
    'fr-ru',
    'ru-fr',
    'it-ru',
    'es-ru',
    'ru-es',
)

# Deprecated! Use wiki.legacy.langs module. (oh ironic)
langs_degradation = {
    'ru': 'ru',
    'en': 'en',
    'tr': 'en',
    'uk': 'ru',
    'kk': 'ru',
}


class UnknownTranslateDirection(ValueError):
    pass


class TranslationException(Exception):
    pass


def translate(text, lang='en-ru', escape_patterns=None, **query_params):
    """
    Вернуть переведенный текст.

    @type timeout: int
    @rtype: str
    """
    if lang not in languages:
        raise UnknownTranslateDirection()
    if escape_patterns:
        text, replaces = _escape(text, escape_patterns)
    chunks = _split_to_chunks(text)
    text = _multi_query(chunks, lang=lang, **query_params)
    if escape_patterns:
        text = _unescape(text, replaces)
    return text


def _multi_query(chunks, lang, total_timeout=60, query_timeout=5, strict=True, srv=None):
    start_time = time.time()
    timeout_info = start_time, total_timeout, query_timeout

    pool = ThreadPool(QUERY_THREADS_COUNT)
    translate_func = partial(_translate_chunk, lang=lang, timeout_info=timeout_info, srv=srv)
    results = pool.map(translate_func, chunks)

    pool.close()
    pool.join()

    handled_chunks = [item for item in results if item is not None]
    handled_chunks_count = len(handled_chunks)
    chunks_count = len(chunks)

    if handled_chunks_count < chunks_count:
        logger.warning(
            'Translate interrupted by timeout. %s of %s chunks was not handled',
            chunks_count - handled_chunks_count,
            chunks_count,
        )
        if strict:
            raise TranslationException('Translation Failed')

    return ''.join(handled_chunks)


def _translate_chunk(chunk, lang, timeout_info, srv):
    start_time, total_timeout, query_timeout = timeout_info

    # do nothing if timeout is exceeded
    time_elapsed = time.time() - start_time
    if total_timeout and time_elapsed > total_timeout:
        logger.warning(
            'Translate failed. Timeout %ss exceeded (elapsed %ss)',
            total_timeout,
            time_elapsed,
        )
        return None

    encoded_chunk = chunk.encode('utf-8')
    try:
        response = requests.post(
            url=TRANSLATE_URL if not srv else '{url}?srv={srv}'.format(url=TRANSLATE_URL, srv=srv),
            data={
                'text': encoded_chunk,
                'lang': lang,
            },
            headers={
                'Connection': 'close',
            },
            timeout=query_timeout,
        )
    except requests.RequestException:
        logger.exception('Translate failed')
        return None

    try:
        return ''.join(response.json()['text'])
    except ValueError:
        logger.exception('Translate failed. Got %s', response.text)
        return chunk


def _split_to_chunks(text):
    chunks = []
    while len(text) > CHUNK_SIZE:
        chunk = text[:CHUNK_SIZE]
        pos = chunk.rfind('. ')
        if pos > 0:
            chunks.append(chunk[: pos + 2])
            text = chunk[pos + 2 :] + text[CHUNK_SIZE:]
            continue
        pos = chunk.rfind(' ')
        if pos > 0:
            chunks.append(chunk[: pos + 1])
            text = chunk[pos + 1 :] + text[CHUNK_SIZE:]
            continue
        chunks.append(chunk)
        text = text[CHUNK_SIZE:]
    chunks.append(text)
    return chunks


_ESCAPE_MARKER = '========= '


def _pattern(str_or_rgx):
    if hasattr(str_or_rgx, 'match'):
        return str_or_rgx
    else:
        return re.compile(str_or_rgx)


def _escape(text, escape_patterns):
    replaces = []
    marker = _ESCAPE_MARKER
    text = text.replace(marker, marker[0] + ' ' + marker[1:])
    for rgx in escape_patterns:
        diff = 0
        _replaces = []
        marker_len = len(marker)
        for match in rgx.finditer(text):
            _replaces.append(match.group(0))
            text = text[: match.start() - diff] + marker + text[match.end() - diff :]
            diff += len(match.group(0)) - marker_len
        replaces.append({'marker': marker, 'replaces': _replaces})
        # пробел между маркером и текстом нужен для правильного перевода
        marker = marker[:-1] + '<> '
    return text, replaces


def _unescape(text, replaces):
    for _replaces in reversed(replaces):
        marker = _replaces['marker']
        marker_len = len(marker)
        for replace in _replaces['replaces']:
            pos = text.find(marker)
            if pos != -1:
                text = text[:pos] + replace + text[pos + marker_len :]
    return text
