# -*- coding: utf-8 -*-
import re
from functools import wraps
from yt.wrapper import with_context, create_table_switch, ypath_join

from datacloud.dev_utils.yql import yql_helpers
from datacloud.launcher.lib.util import rotate_stream_tables, rotate_history_tables, RotateLevel
from datacloud.dev_utils.yt.yt_utils import create_folders
from datacloud.dev_utils.yt import yt_utils
from datacloud.dev_utils.time.patterns import RE_DAILY_LOG_FORMAT
from datacloud.dev_utils.logging.logger import get_basic_logger

logger = get_basic_logger(__name__)


def _parallel_mapper(mapper):
    @with_context
    @wraps(mapper)
    def wrapper(record, context):
        table = context.table_index
        if table is not None:
            yield create_table_switch(table)
        for r in mapper(record):
            yield r
    return mapper


class LogFilter(object):
    """ Manipulates log YT tables """

    def grep_stream_tables(self, table_pathes):
        raise NotImplementedError()

    def yt_list(self, path):
        yt = yt_utils.get_yt_client()
        if yt.exists(path):
            return yt.list(path, absolute=True)
        else:
            return []

    def merge_history(self, merge_today=False, is_clean_logs=True):
        yt = yt_utils.get_yt_client()
        with yt.Transaction():
            for history_root, all_stream_tables in self.list_filtered_stream():
                all_stream_tables = list(all_stream_tables)
                logger.info('Merge stream tables into %s', history_root)
                yt.mkdir(history_root, recursive=True)
                day_tables = rotate_stream_tables(yt, all_stream_tables, history_root, merge_today)
                if is_clean_logs:
                    clean_logs(yt, day_tables.keys())
                logger.info('Rotate history tables in %s', history_root)
                rotate_history_tables(yt, history_root, last_merge_level=RotateLevel.YEARLY)

    def update_events_last_ndays(self, service, env_type, merge_today=False, is_clean_logs=True):
        yt_client = yt_utils.get_yt_client()
        with yt_client.Transaction():
            logger.info('Started update ndays folder...')
            all_service_stream_tables = self.list_filtered_events_stream()
            last_ndays_path = self.get_events_last_ndays_path(service, env_type)
            events_hitory_root = self.get_events_history_root(service, env_type)
            logger.info('Merge stream tables into %s', last_ndays_path)

            create_folders([last_ndays_path], yt_client)
            day_logs = rotate_stream_tables(yt_client, all_service_stream_tables, last_ndays_path, merge_today)

            for day in day_logs:
                logger.info('Copy day table [%s] to history root %s', day, events_hitory_root)
                history_day_log_table = ypath_join(
                    events_hitory_root,
                    yt_client.get_attribute(day, 'key')
                )
                if yt_client.exists(history_day_log_table):
                    tmp_service_history_folder = events_hitory_root + '_tmp'
                    logger.warn('Table [{}] exists, it is going to be \
                        replaced with table [{}]. Current table will \
                        be temporary stored in tmp folder [{}]'.format(history_day_log_table, day, tmp_service_history_folder))
                    create_folders([tmp_service_history_folder], yt_client)
                    yt_client.move(
                        history_day_log_table,
                        ypath_join(tmp_service_history_folder, yt_client.get_attribute(day, 'key')),
                        force=True
                    )

                if is_clean_logs:
                    yt_client.run_map(
                        UserIdsCleaner(('okb', 'mkb')),
                        day,
                        history_day_log_table,
                        spec={'title': 'Clean partner logs'}
                    )
                else:
                    yt_client.copy(day, history_day_log_table, force=True)

            logger.info('Deleting old ndays tables...')
            keep_only_last_ndays(yt_client, last_ndays_path)
            logger.info('Done update ndays folder!')

    def get_yt_client(self):
        raise NotImplementedError()

    def get_stream_output_tables(self, time_str):
        raise NotImplementedError()

    def list_unfiltered_stream(self):
        raise NotImplementedError()

    def list_filtered_stream(self):
        raise NotImplementedError()

    def get_history_root(self):
        raise NotImplementedError()


class YtLogFilter(LogFilter):
    """ Manipulates log YT tables """

    def grep_stream_tables(self, mapper, table_pathes):
        """ Filters table like "//home/logfeller/logs/qloud-runtime-log/stream/5min/2017-10-18T15:20:00" """
        assert not isinstance(table_pathes, str), 'list of pathes expected'
        # TODO: allow process multiple input into multiple output tables (where len(input) != len(output))
        assert len(table_pathes) == 1, 'multiple tables processing with multiple outputs not supported yet'
        yt = yt_utils.get_yt_client()
        with yt.Transaction():
            output_tables = []
            input_tables = []
            output_multiplier = None
            for table_path in table_pathes:
                if not yt.exists(table_path):
                    continue
                time_str = table_path.split('/')[-1]
                res_tables = list(self.get_stream_output_tables(time_str))
                if output_multiplier is None:
                    output_multiplier = len(res_tables)
                else:
                    assert len(output_multiplier) == len(res_tables)

                # for table in res_tables:
                #    table.create_table()

                output_tables.extend(res_tables)
                input_tables.append(table_path)

            if len(input_tables) == 0:
                return
            assert int(len(input_tables) * output_multiplier) == len(output_tables)
            if len(input_tables) > 1:
                mapper = _parallel_mapper(mapper, output_multiplier)
            yt.run_map(
                mapper,
                input_tables,
                output_tables,
            )
            for table in output_tables:
                # remove empty result tables
                if yt.row_count(table) == 0:
                    yt.remove(table)


class YqlLogFilter(LogFilter):
    """ Manipulates log YT tables """

    def grep_stream_tables(self, query, table_pathes):
        """ Filters table like "//home/logfeller/logs/qloud-runtime-log/stream/5min/2017-10-18T15:20:00" """
        assert not isinstance(table_pathes, str), 'list of pathes expected'
        assert len(table_pathes) == 1, 'multiple tables processing with multiple outputs not supported yet'
        yt = yt_utils.get_yt_client()
        with yt.Transaction():
            table_path = table_pathes[0]
            time_str = table_path.split('/')[-1]
            res_tables = list(self.get_stream_output_tables(time_str))
            assert len(res_tables) == 1
            res_table = res_tables[0]
            query_params = {
                'input_table': table_path,
                'output_table': str(res_table),
            }

            logger.info('query is:\n{}\n'.format(query))
            logger.info('params are: {}'.format(query_params))

            yql_helpers.execute_yql(
                query=query,
                params=query_params,
                yt_client=yt,
                syntax_version=1,
            )
            # remove empty result tables
            if yt.row_count(res_table) == 0:
                yt.remove(res_table)


def build_ready_detector(fn):
    """ Builds status-lib compatible ready detector from tables list method """
    @wraps(fn)
    def wrapper(date_time, days=None):
        for table in fn():
            yield table, True

    return wrapper


def new_build_ready_detector(fn):
    """ Builds status-lib compatible ready detector from tables list method """
    @wraps(fn)
    def wrapper(date_time, days=None):
        for table in fn():
            yield table, {'table_path': table}
    return wrapper


def build_step_processor(fn, step_name):
    """ Builds status-lib compatible step processor from tables processing method """
    @wraps(fn)
    def wrapper(task):
        assert 'table_path' in task.data, '`table_path` not found in `{}` data.'.format(task)
        fn([task.data['table_path']])
        return [task.make_done()]
    return wrapper


class UserIdsCleaner(object):
    _ERASE = 'erased'
    _IDS_TYPES = {
        'emails': 'email',
        'phones': 'phone',
        'cookies': 'cookie',
    }

    def __init__(self, partner_id_list):
        self._partner_id_list = partner_id_list

    def _clear_context(self, rec):
        context = rec.get('context', {})
        if not context:
            context = rec.get('fields', {}).get('context', {})
        if context and context.get('partner_id') in self._partner_id_list:
            user_ids = context.get('query', {}).get('user_ids', {})
            self._clear_user_ids(user_ids)

    def _clear_user_ids(self, user_ids):
        if not user_ids:
            return
        for ids_type, id_name in self._IDS_TYPES.iteritems():
            for user_id in user_ids.get(ids_type, []):
                if id_name in user_id:
                    user_id[id_name] = self._ERASE
                if 'id_value' in user_id:
                    user_id['id_value'] = self._ERASE

    def __call__(self, rec):
        # For oldschool logs
        if ('context' in rec or
            # rec['fields'] can exist and actually contain None
           (rec.get('fields') is not None and 'context' in rec['fields'])):
            self._clear_context(rec)
        # For fast lofs
        if rec.get('partner_id') in self._partner_id_list and rec.get('req_body', {}).get('user_ids'):
            self._clear_user_ids(rec['req_body']['user_ids'])
        yield rec


def clean_logs(yt, stream_tables):
    for table in stream_tables:
        logger.info('Clean logs in %s', table)
        with yt.Transaction():
            yt.run_map(
                UserIdsCleaner(('okb', 'mkb')),
                table,
                table,
                spec={'title': 'Clean partner logs'}
            )


def keep_only_last_ndays(yt_client, folder_path, n=20):
    folder_list = yt_client.list(folder_path, absolute=True)
    folder_list = sorted(folder_list)
    delete_list = []
    if len(folder_list) > n:
        delete_list = folder_list[:-n]

    for table in delete_list:
        if re.match(RE_DAILY_LOG_FORMAT, table.rsplit('/', 1)[-1]):
            yt_client.remove(table)
