# coding=utf-8
from __future__ import absolute_import, unicode_literals, print_function

import logging
import tempfile
from copy import deepcopy

from sandbox.projects.direct_internal_analytics.laborer_base.exceptions import ExecutionException, \
    TargetAlreadyProcessedException, TargetValidationException, \
    TargetDependenciesNotReadyException
from sandbox.projects.direct_internal_analytics.laborer_base.processing import get_data_path, get_expiration_time
from ..target_types.base import YqlSelectTarget, ClickHouseSelectTarget
from ..util.templating import smart_apply_template

logger = logging.getLogger(__name__)


class _TargetList(object):
    def __init__(self, targets, executor):
        self._order = []
        self._data = {}

        for target in targets:
            path = executor.get_output_path(target)

            self._order.append(path)
            self._data[target.title] = path

    def __getattr__(self, item):
        return self._data[item]

    def __getitem__(self, item):
        return self._order[item]

    def __len__(self):
        return len(self._order)

    def __deepcopy__(self, memodict={}):
        tl = _TargetList([], None)
        tl._order = deepcopy(self._order)
        tl._data = deepcopy(self._data)
        return tl


class BaseExecutor(object):
    """Базовый класс экзекьютора. В наследниках нужно переопределить метод .execute() и, если нужно, метод .is_valid()

    В базовой версии класс проверяет, что:
    - метод .is_valid() возвращает истину
    - задан ключ force или не существует таблица, в которую нужно записывать результаты
    - существуют таблицы с результатами работы зависимостей

    После этого запускается метод .execute()
    """
    # Базовый класс цели. Все его подклассы будут обрабатываться этим экзекьютором
    target_class = None

    def __init__(self, target, config, context, force=False):
        """
        :type target: laborer.target.base.BaseTarget
        :type config: dict[basestring]
        :type context: dict[basestring]
        :type force: bool
        """
        assert issubclass(target, self.target_class), "Target should be a subclass of executors target_class"

        self._config = config
        self._target = target
        self._force = force

        self._paths = {}

        self._context = None
        self._prepare_local_context(context)

    def _prepare_local_context(self, context):
        self._context = {
            'title': self.title,
            'home': self._config['home'],
        }
        self._context.update(context)
        self._context.update({
            'output_path': self.output_path,
            'insert_target': self.output_path,
        })
        if self._target.dependencies:
            self._context['dependencies'] = _TargetList(self._target.dependencies, self)

    def apply_template(self, template):
        return smart_apply_template(template, self._context)

    def get_paths_to_delete(self):
        return [self.output_path]

    def delete_data(self):
        for path in self.get_paths_to_delete():
            if self.yt.exists(path):
                logger.info('Deleting %s', path)
                self.yt.remove(path)

    def get_output_path(self, target):
        """Получить путь, по которому будет лежать результат переданной цели в контексте исполнения этого класса"""
        if target not in self._paths:
            self._paths[target] = get_data_path(target, self._context)

        return self._paths[target]

    def get_expiration_time(self, target):
        """Получить время после которого результат цели в кипарисе будет удален"""
        return get_expiration_time(target, self._context)

    def dependencies_ready(self):
        """Вернуть истину, если все таблицы с зависимостями существуют"""
        ready = True
        for target in self._target.dependencies:
            if not self.yt_node_exists(target):
                ready = False
                break

        logger.info('Result all dependencies checking: %s', ready)
        return ready

    def yt_node_exists(self, target=None):
        """Вернуть истину, если таблица с результатом обработки переданной цели существует"""
        path = self.get_output_path(target or self._target)

        exists = self.yt.exists(path)
        logger.info('Checking if yt_node "%s" exists: %s', path, exists)
        return exists

    def yt_write_table(self, path, data_iterator=None):
        with self.yt.Transaction():
            if not self.yt.exists(path):
                logger.info("Creating table %s", self.output_path)
                self.yt.create_table(path, recursive=True)

            logger.info("Writing table %s", path)
            self.yt.write_table(
                self.yt.TablePath(path, append=False),
                (data_iterator or []),
                format=self.yt.JsonFormat(attributes={"encode_utf8": False})
            )

    def execute_scope(self, transaction_id=None):
        self.execute(transaction_id)
        if self.yt_node_exists():
            attributes = self.user_attributes
            expiration_time = self.expiration_time
            if expiration_time and 'expiration_time' not in attributes:
                attributes['expiration_time'] = expiration_time
            for attribute, value in attributes.items():
                logger.info('Setting attribute {} = {}'.format(attribute, value))
                self.yt.set_attribute(self.output_path, attribute, value)

    def start_execution(self):
        """Запустить расчет цели класса со всеми необходимыми проверками перед стартом"""
        logger.info('Starting validation of %s', self._target.__name__)
        if not self.is_valid():
            raise TargetValidationException()

        if not self._force and self.yt_node_exists():
            logger.warn('Result already exists')
            raise TargetAlreadyProcessedException()

        if not self.dependencies_ready():
            logger.error('Dependencies not ready: %s', self._target.dependencies)
            raise TargetDependenciesNotReadyException()

        if self._force and self.yt_node_exists():
            logger.info('Result already exists, deleting')
            self.yt.remove(self.output_path)

        logger.info('Starting execution with context %s', self._context)
        if self._target.execute_in_transaction:
            with self.yt.Transaction(ping=True, timeout=30000) as tx:
                self.execute_scope(tx.transaction_id)
        else:
            self.execute_scope()

    def is_valid(self):
        """Вернуть истину, если переданная цель не содержит ошибок и может быть расчитана"""
        return True

    def execute(self, transaction_id=None):
        """Выполнить непосрественную полезную работу"""
        raise NotImplementedError('Method should be implemented in child')

    @property
    def user_attributes(self):
        return {}

    @property
    def title(self):
        """Заголовок обрабатываемой цели"""
        return self._target.title

    @property
    def output_path(self):
        """Путь к результату обрабатываемой цели"""
        return self.get_output_path(self._target)

    @property
    def expiration_time(self):
        """Время жизни результата обрабатываемой цели"""
        return self.get_expiration_time(self._target)

    @property
    def yt(self):
        """Настроенный и готовый к работе YT-клиент"""
        return self._config['yt_client']


class YqlSelectExecutor(BaseExecutor):
    YQL_OPERATION_PATTERN = 'https://yql.yandex-team.ru/Operations/{id}'

    target_class = YqlSelectTarget
    operation_id = None

    def get_share_url(self, operation):
        from yql.client.operation import YqlOperationShareIdRequest

        share_request = YqlOperationShareIdRequest(operation.operation_id)
        share_request.run()

        return self.YQL_OPERATION_PATTERN.format(id=share_request.json)

    def operation_callback(self, operation):
        self.operation_id = operation.operation_id
        share_url = self.get_share_url(operation)
        logger.info('YQL shared link : %s', share_url)

    def execute(self, transaction_id=None):
        logger.info('Executing query %s', self.title)

        query_text = self.apply_template(self.query)
        query_text = "PRAGMA yt.TemporaryAutoMerge = 'disabled';\n" + query_text
        query_text = "PRAGMA yt.PublishedAutoMerge = 'economy';\n" + query_text
        logger.info('Will run query: %s', query_text)
        query = self.yql.query(query_text, syntax_version=self.yql_syntax_version)
        query.run(pre_start_callback=self.operation_callback, transaction_id=transaction_id)
        results = query.get_results()

        if not results.is_success:
            msg = '\n'.join([str(err) for err in results.errors])
            logger.error('Error when executing query %s: %s', self.title, msg)

            raise ExecutionException(msg)
        else:
            logger.info('Query %s successfully finished. Collecting result', self.title)

        result = []
        for table in results:
            table.fetch_full_data()

            columns = []
            for column_name, column_type in table.columns:
                columns.append(column_name)

            for row in table.rows:
                result.append(
                    dict([(columns[i], value) for i, value in enumerate(row)]))

        logger.info('Result is %s', result)

    @property
    def yql(self):
        return self._config['yql_client']

    @property
    def yql_syntax_version(self):
        # noinspection PyUnresolvedReferences
        return self._target.yql_syntax_version

    @property
    def query(self):
        # noinspection PyUnresolvedReferences
        return self._target.query

    @property
    def user_attributes(self):
        # noinspection PyUnresolvedReferences
        return self._target.user_attributes(deepcopy(self._context))


class ClickHouseSelectExecutor(BaseExecutor):
    READ_CHUNK_SIZE = 1024 * 1024
    target_class = ClickHouseSelectTarget
    _cast_funcs = {
        'string': unicode,
        'integer': int,
        'double': float,
    }

    def execute(self, transaction_id=None):
        query_text = self.apply_template(self.query)
        logger.info('Will run query: %s', query_text)
        r = self.clickhouse.query(query_text, timeout=(10, 600))
        with tempfile.NamedTemporaryFile('wb', bufsize=0, delete=False) as ntf:
            for data_chunk in r.iter_content(self.READ_CHUNK_SIZE):
                ntf.file.write(data_chunk)
            ntf.file.close()
            logger.info('Saved query results to temporary file %s', ntf.name)

            with open(ntf.name, 'rb') as f:
                self.yt_write_table(self.output_path, self._content_generator(f, self.columns))

    def _content_generator(self, input_stream, schema):
        fails_num = 0
        total_lines = 0
        for num, line in enumerate(input_stream, start=1):
            total_lines += 1
            try:
                d = {k: self._cast(k, v) for k, v in zip(schema, line.decode('utf-8').strip('\n').split('\t'))}
                yield d
            except UnicodeDecodeError as e:
                fails_num += 1
                logger.info('Failed on line #%s (%r): %s', num, line, e)

        if fails_num / total_lines > 0.001:
            raise ExecutionException("Too much corrupted lines: {} of {}".format(fails_num, total_lines))
        elif fails_num:
            logger.info('Got %s fails in total %s lines (%s ratio)', fails_num, total_lines, fails_num / total_lines)

    def _cast(self, key, value):
        try:
            return self._cast_funcs[self.columns_types[key]](value)
        except KeyError:
            raise ValueError('Unknown cast key: {}'.format(self.columns_types[key]))

    @property
    def clickhouse(self):
        return self._config['clickhouse_client']

    @property
    def query(self):
        # noinspection PyUnresolvedReferences
        return self._target.query

    @property
    def columns(self):
        # noinspection PyUnresolvedReferences
        return self._target.columns

    @property
    def columns_types(self):
        # noinspection PyUnresolvedReferences
        return self._target.columns_types

    @property
    def sort_by(self):
        # noinspection PyUnresolvedReferences
        return self._target.sort_by
