# coding: utf-8

from datetime import timedelta
from collections import OrderedDict

from nile.api.v1 import (
    extractors as ne,
    extended_schema,
    multischema,
    with_hints,
    stream,
    files,
)

from .utils import DATE_FORMAT, date_range, transpose

RAW_LOGS = {
    'redir-log',
    'collections-redir-log',
    'user_sessions',
    'cmnt-api-access-log',
    'sent-messages-log',
    'push-log',
    'chevent-log',
    'watch-log',
    'all-collections-shows-log'
}


class Stream(object):
    pass


class TableStream(Stream):
    def __init__(self, job, table):
        self.job = job
        self.table = table
        self.stream = job.table(table)

    def get_stream(self):
        return self.stream


class DailyLogsStream(Stream):
    def __init__(self, job, date, dateend):
        self.plots_data = OrderedDict()
        self.nile_streams = OrderedDict()
        self.job = job
        self.date = date
        self.dateend = dateend

    def add_extractor(self, plot, function, fn_kwargs):
        self.plots_data[(plot, function)] = fn_kwargs
        self.nile_streams[(plot, function)] = None

    def get_nile_streams(self):
        for (plot, function), nile_stream in self.nile_streams.iteritems():
            yield plot, function, nile_stream


class UserSessionsStream(DailyLogsStream):
    log = 'user_sessions'

    def __init__(self, *args, **kwargs):
        if 'use_1p' in kwargs:
            self.use_1p = kwargs.get('use_1p', False)
            del kwargs['use_1p']
        else:
            self.use_1p = False

        super(UserSessionsStream, self).__init__(*args, **kwargs)

    def _get_table_name(self, date):
        return '//user_sessions/pub/{use_1p}{{search,images}}/daily/{date}/clean'.format(
            use_1p='sample_by_uid_1p/' if self.use_1p else '',
            date=date.strftime(DATE_FORMAT)
        )

    def _get_log_paths(self):
        return [self._get_table_name(date)
                for date in date_range(self.date, self.dateend)]

    def fulfill(self):
        schemas = []
        for (plot, function), fn_kwargs in self.plots_data.items():
            schemas.append(fn_kwargs.get('schema'))

        all_nile_streams = []
        for table in self._get_log_paths():
            log = self.job.table(table, ignore_missing=True)

            nile_streams = log.groupby('key').sort('subkey').reduce(
                with_hints(
                    outputs_count=len(schemas),
                    output_schema=multischema(*schemas),
                )(
                    UserSessionsLogsSplitter(self.plots_data)
                ),
                files=[
                    # libra тащить с собой не нужно - она аркадийная и вшита в плоттер
                    files.RemoteFile('//statbox/statbox-dict-last/blockstat.dict')
                ],
                memory_limit=16*1024,
            )

            # convert Stream to iterable for 1 stream. Otherwise BatchStream
            nile_streams_iter = [nile_streams] if type(nile_streams) == stream.Stream else nile_streams

            # HACK: стримам-выходам из юзерсессий нужна ещё одна операция, иначе их нельзя потом сконкатить вместе STATLIBS-791
            nile_streams_iter = [x.project(ne.all()) for x in nile_streams_iter]

            all_nile_streams.append(nile_streams_iter)

        # превращаем список списков с индексами-по-дням в список списков с индексами-по-типам-выходов
        streams_by_type = transpose(all_nile_streams)

        for i, streams, (plot, function) in zip(
            range(len(schemas)), streams_by_type, self.plots_data.keys()
        ):
            # склеиваем выходы одного типа за каждый день в один nile-поток
            self.nile_streams[(plot, function)] = self.job.concat(*streams)


class RedirLogBaseStream(DailyLogsStream):
    def __init__(self, *args, **kwargs):
        if 'additional_days' in kwargs:
            self.additional_days = kwargs.get('additional_days', 0)
            del kwargs['additional_days']
        else:
            self.additional_days = 0

        super(RedirLogBaseStream, self).__init__(*args, **kwargs)

    def _get_log_path(self):
        return ''

    def fulfill(self):
        filters = []
        for plot, function in self.nile_streams.keys():
            filters.append(function(plot))

        log = self.job.table(self._get_log_path(), ignore_missing=True, view=None)
        nile_streams = log.map(
            with_hints(
                outputs_count=len(filters),
                output_schema=extended_schema(),
            )(
                LogsSplitter(filters)
            ),
        )

        # Convert Stream to iterable for 1 stream. Otherwise BatchStream
        nile_streams_iter = [nile_streams] if type(nile_streams) == stream.Stream else nile_streams
        for i, nile_stream, (plot, function) in zip(
            range(len(filters)), nile_streams_iter, self.nile_streams.keys()
        ):
            self.nile_streams[(plot, function)] = nile_stream


class RedirLogStream(RedirLogBaseStream):
    log = 'redir-log'

    def _get_log_path(self):
        if self.dateend > self.date:
            return '//logs/redir-log/1d/{{{date}..{dateend}}}'.format(
                date=self.date.strftime(DATE_FORMAT),
                dateend=self.dateend.strftime(DATE_FORMAT),
            )
        return '//logs/redir-log/1d/{date}'.format(date=self.date.strftime(DATE_FORMAT))


class CollectionsRedirLogStream(RedirLogBaseStream):
    log = 'collections-redir-log'

    def _get_log_path(self):
        if self.dateend > self.date or self.additional_days > 0:
            if self.additional_days > 0:
                date = (self.date - timedelta(days=self.additional_days)).strftime(DATE_FORMAT)
            else:
                date = self.date.strftime(DATE_FORMAT)

            return '//logs/collections-redir-log/1d/{{{}..{}}}'.format(date, self.dateend.strftime(DATE_FORMAT))
        return '//logs/collections-redir-log/1d/{}'.format(self.date.strftime(DATE_FORMAT))


class CmntAPILogStream(RedirLogBaseStream):
    log = 'cmnt-api-access-log'

    def _get_log_path(self):
        if self.dateend > self.date:
            return '//logs/cmnt-production-cmnt-api-access-log/1d/{{{date}..{dateend}}}'.format(
                date=self.date.strftime(DATE_FORMAT),
                dateend=self.dateend.strftime(DATE_FORMAT),
            )
        return '//logs/cmnt-production-cmnt-api-access-log/1d/{date}'.format(date=self.date.strftime(DATE_FORMAT))


class SentMessagesLogStream(RedirLogBaseStream):
    log = 'sent-messages-log'

    def _get_log_path(self):
        # if self.dateend:
        #     return '//home/mssngr/squeeze/sent_messages/{{{date}..{dateend}}}'.format(
        #         date=self.date.strftime(DATE_FORMAT),
        #         dateend=self.dateend.strftime(DATE_FORMAT),
        #     )
        # return '//home/mssngr/squeeze/sent_messages/{date}'.format(date=self.date.strftime(DATE_FORMAT))
        return '//home/mssngr/squeeze/sent_messages_all'


class PushLog(RedirLogBaseStream):
    log = 'push-log'

    def _get_log_path(self):
        if self.dateend > self.date:
            return '//home/search-functionality/sup/push_stats/{{{date}..{dateend}}}/push_final'.format(
                date=self.date.strftime(DATE_FORMAT),
                dateend=self.dateend.strftime(DATE_FORMAT),
            )
        return '//home/search-functionality/sup/push_stats/{date}/push_final'.format(date=self.date.strftime(DATE_FORMAT))


class CheventLog(RedirLogBaseStream):
    log = 'chevent-log'

    def _get_log_path(self):
        if self.dateend > self.date:
            return '//statbox/cooked_logs/bs-chevent-cooked-log/v2/1d/{{{date}..{dateend}}}'.format(
                date=self.date.strftime(DATE_FORMAT),
                dateend=self.dateend.strftime(DATE_FORMAT),
            )
        return '//statbox/cooked_logs/bs-chevent-cooked-log/v2/1d/{date}'.format(date=self.date.strftime(DATE_FORMAT))


class WatchLog(RedirLogBaseStream):
    log = 'watch-log'

    def _get_log_path(self):
        if self.dateend > self.date:
            return '//home/logfeller/logs/bs-watch-log/1d/{{{date}..{dateend}}}'.format(
                date=self.date.strftime(DATE_FORMAT),
                dateend=self.dateend.strftime(DATE_FORMAT),
            )
        return '//home/logfeller/logs/bs-watch-log/1d/{date}'.format(date=self.date.strftime(DATE_FORMAT))


class AllCollectionsShowsLog(RedirLogBaseStream):
    log = 'all-collections-shows-log'

    def _get_log_path(self):
        if self.dateend > self.date:
            return '//home/collections/analytics/backups/COLA-10_collections_content_shows_squeeze/{{{date}..{dateend}}}'.format(
                date=self.date.strftime(DATE_FORMAT),
                dateend=self.dateend.strftime(DATE_FORMAT),
            )
        return '//home/collections/analytics/backups/COLA-10_collections_content_shows_squeeze/{date}'.format(date=self.date.strftime(DATE_FORMAT))


class LogsSplitter(object):
    """
    Mapper по любому redir-логу
    Применяет фильтры из filters к каждой строке таблицы
    и возвращает её в поток, соответствующий индексу в filters
    """
    def __init__(self, filters):
        self.filters = filters

    def apply_rules(self, row, rules):
        # TODO: поддержать сложные фильтры, не только на "==": "in array", "or"
        # TODO: поддержать rules - callback(row)
        for key, value in rules.items():
            if row.get(key) != value:
                return False
        return True

    def __call__(self, records, *outputs):
        for record in records:
            for i, log_filter in enumerate(self.filters):
                if self.apply_rules(record, log_filter):
                    outputs[i](record)


class UserSessionsLogsSplitter(object):
    """
    Reduce по юзер-сессиям
    """
    def __init__(self, plots_data):
        self.plots_data = plots_data

    def __call__(self, sessions, *outputs):
        # libra используется аркадийная
        import libra
        for key, records in sessions:
            try:
                requests_container = libra.Parse(records, 'blockstat.dict')
            except Exception:
                continue

            for i, (plot, function) in enumerate(self.plots_data.keys()):
                for record in function(plot, requests_container):
                    outputs[i](record)
