# coding: utf-8
from collections import defaultdict

from analytics.plotter_lib.plotter import Plot, require
from analytics.plotter_lib.utils import split_sessions, AddTotalsMapper

from nile.api.v1 import (
    with_hints,
    extended_schema,
    extractors as ne,
    aggregators as na,
    filters as nf,
    Record,
    grouping
)
from qb2.api.v1 import (
    filters as qf
)

SESSION_TYPES = ('any', 'access', 'has_access')


def convert_ui(ui):
    if ui in ('yandexApp', 'mobile'):
        return 'mobile'
    elif ui in ('tablet', 'desktop'):
        return 'desktop'
    elif not ui:
        return 'undefined'
    return ui


def filter_session(session, session_type):
    if session_type == 'access':
        session = filter(lambda e: e.get('path') == 'access', session)
    elif session_type == 'has_access':
        session = session if any(e.get('path') == 'access' for e in session) else []

    return session


@with_hints(output_schema=dict(fielddate=str, ui=str, session_type=str, users=int, sessions=int, time_spent=int, pages=int))
def time_spent_reducer(groups):
    for key, recs in groups:
        time_spent = defaultdict(int)
        session_count = defaultdict(int)
        pages_count = defaultdict(int)

        sessions = split_sessions(recs)
        for session in sessions:
            for session_type in SESSION_TYPES:
                filtered_session = filter_session(session, session_type)
                if filtered_session:
                    session_duration = filtered_session[-1].timestamp - filtered_session[0].timestamp
                    # Длина сессии(сек.) должна быть меньше чем секунд в дне
                    if 0 <= session_duration <= 60*60*24:
                        time_spent[session_type] += session_duration
                        session_count[session_type] += 1
                        pages_count[session_type] += len([True for x in filtered_session if x.get('path') == 'access'])

        for session_type in time_spent:
            yield Record(
                fielddate=key.fielddate,
                ui=key.ui,
                session_type=session_type,
                users=1,
                sessions=session_count[session_type],
                time_spent=time_spent[session_type],
                pages=pages_count[session_type]
            )


@with_hints(output_schema=extended_schema(time_spent=int))
def event_time_spent_reducer(groups):
    for key, records in groups:
        last_rec = None
        for r in records:
            if not last_rec:
                last_rec = r
                continue
            time_spent = r.timestamp - last_rec.timestamp

            yield last_rec.transform(time_spent=time_spent)

            last_rec = r


@with_hints(output_schema=dict(fielddate=str, yandexuid=str, session_duration=int, traffic_source=str, ui=str, start_page_name=str, session_type=str))
def traffic_source_time_spent_reducer(groups):
    for key, records in groups:
        start_r, last_r = None, None
        start_r_access, last_r_access = None, None

        for r in records:
            if not start_r:
                start_r = r
            last_r = r

            if r.path != 'access':
                continue
            if not start_r_access:
                start_r_access = r
            last_r_access = r

        outrec = Record(
            ui=key.ui,
            fielddate=key.fielddate,
            yandexuid=key.yandexuid,
            traffic_source=start_r.traffic_source,
            start_page_name=start_r.page_name,
            session_type='any',
            session_duration=last_r.timestamp - start_r.timestamp,
        )
        yield outrec

        if start_r_access:
            yield outrec.transform(
                session_type='access',
                session_duration=last_r_access.timestamp - start_r_access.timestamp,
            )


class TimeSpent(Plot):
    @require('CollectionsRedirLog.parsed')
    def time_spent_publish(self, streams):
        for user_id_type in ['icookie', 'yandexuid', 'puid', 'user_id', 'puid_yandexuid']:

            if user_id_type == 'puid_yandexuid':
                uid_filter = nf.custom(lambda x, y: True if x or y else False, 'puid', 'yandexuid')
                uid_extractor = ne.custom(lambda x, y: x if x else y, 'puid', 'yandexuid').with_type(str).rename(user_id_type)
            else:
                uid_filter = nf.custom(lambda x: True if x else False, user_id_type)
                uid_extractor = user_id_type

            yield 'time_spent_publish_{}'.format(user_id_type), streams['CollectionsRedirLog.parsed'] \
                .filter(uid_filter) \
                .project('fielddate', 'timestamp', 'path', uid_extractor, ui=ne.custom(convert_ui, 'ui').with_type(str)) \
                .groupby(user_id_type, 'fielddate', 'ui') \
                .sort('timestamp') \
                .reduce(time_spent_reducer, memory_limit=32 * 1024) \
                .map(
                    with_hints(output_schema=extended_schema())(
                        AddTotalsMapper(
                            ['ui', 'session_type'],
                            ['fielddate', 'users', 'sessions', 'time_spent', 'pages']
                        )
                    )
                ) \
                .groupby('fielddate', 'ui', 'session_type') \
                .aggregate(
                    users=na.sum('users'),
                    sessions=na.sum('sessions'),
                    sessions_avg=na.mean('sessions'),
                    sessions_q=na.quantile('sessions', [0.05, 0.25, 0.5, 0.75, 0.95]),
                    time_spent=na.sum('time_spent'),
                    time_spent_avg=na.mean('time_spent'),
                    time_spent_q=na.quantile('time_spent', [0.05, 0.25, 0.5, 0.75, 0.95]),
                    pages=na.sum('pages'),
                    pages_avg=na.mean('pages'),
                    pages_q=na.quantile('pages', [0.05, 0.25, 0.5, 0.75, 0.95]),
                ) \
                .project(
                    ne.all(['sessions_q', 'time_spent_q', 'pages_q']),
                    user_id_type=ne.const(user_id_type),
                    sessions_five_p=ne.custom(lambda array:  array[0][1], 'sessions_q').with_type(float),
                    sessions_twenty_five_p=ne.custom(lambda array:  array[1][1], 'sessions_q').with_type(float),
                    sessions_median=ne.custom(lambda array:  array[2][1], 'sessions_q').with_type(float),
                    sessions_seventy_five_p=ne.custom(lambda array:  array[3][1], 'sessions_q').with_type(float),
                    sessions_ninety_five_p=ne.custom(lambda array:  array[4][1], 'sessions_q').with_type(float),
                    time_spent_five_p=ne.custom(lambda array:  array[0][1], 'time_spent_q').with_type(float),
                    time_spent_twenty_five_p=ne.custom(lambda array:  array[1][1], 'time_spent_q').with_type(float),
                    time_spent_median=ne.custom(lambda array:  array[2][1], 'time_spent_q').with_type(float),
                    time_spent_seventy_five_p=ne.custom(lambda array:  array[3][1], 'time_spent_q').with_type(float),
                    time_spent_ninety_five_p=ne.custom(lambda array:  array[4][1], 'time_spent_q').with_type(float),
                    pages_five_p=ne.custom(lambda array:  array[0][1], 'pages_q').with_type(float),
                    pages_twenty_five_p=ne.custom(lambda array:  array[1][1], 'pages_q').with_type(float),
                    pages_median=ne.custom(lambda array:  array[2][1], 'pages_q').with_type(float),
                    pages_seventy_five_p=ne.custom(lambda array:  array[3][1], 'pages_q').with_type(float),
                    pages_ninety_five_p=ne.custom(lambda array:  array[4][1], 'pages_q').with_type(float),
                ) \
                .publish(self.get_statface_report('Collections/Metrics/sessions/TimeSpent'), allow_change_job=True)

    @require('CollectionsRedirLog.parsed')
    def page_time_spent_publish(self, streams):
        add_totals_key_fields = ['ui', 'page_name']
        add_totals_pass_fields = ['fielddate', 'time_spent']

        return streams['CollectionsRedirLog.parsed'] \
            .filter(
                nf.and_(
                    qf.nonzero('yandexuid'),
                    nf.equals('path', 'access'),
                    nf.custom(lambda page_name: False if page_name is None or page_name == '' else True, 'page_name'),
                )
            ) \
            .project(
                'fielddate',
                'yandexuid',
                'timestamp',
                'cts',
                page_name=ne.custom(lambda x: 'empty' if x is None else x, 'page_name').with_type(str),
                # TODO: сделать отчёт отдельно по ui без группировки на десктоп-тач
                ui=ne.custom(convert_ui, 'ui').with_type(str)
            ) \
            .groupby('yandexuid', 'fielddate', 'ui') \
            .sort('timestamp', 'cts') \
            .groupby(grouping.sessions()) \
            .reduce(event_time_spent_reducer) \
            .map(
                with_hints(output_schema=extended_schema())(
                    AddTotalsMapper(
                        add_totals_key_fields + add_totals_pass_fields, add_totals_pass_fields
                    )
                )
            ) \
            .groupby('fielddate', 'ui', 'page_name') \
            .aggregate(
                time_spent=na.sum('time_spent'),
                avg_time_spent=na.mean('time_spent')
            ) \
            .publish(self.get_statface_report('Collections/Metrics/sessions/PageTimeSpent'), allow_change_job=True)

    @require('CollectionsRedirLog.parsed')
    def traffic_source_page_name_time_spent(self, streams):
        return streams['CollectionsRedirLog.parsed'] \
            .filter(qf.nonzero('yandexuid', 'timestamp', 'ui', 'page_name')) \
            .groupby('ui', 'fielddate', 'yandexuid') \
            .sort('timestamp', 'cts') \
            .groupby(grouping.sessions()) \
            .reduce(traffic_source_time_spent_reducer) \
            .map(with_hints(output_schema=extended_schema())(
                AddTotalsMapper(['ui', 'traffic_source', 'start_page_name'], ['fielddate', 'yandexuid', 'session_duration', 'session_type'])
            )) \
            .groupby('fielddate', 'ui', 'traffic_source', 'start_page_name', 'session_type') \
            .aggregate(
                sessions=na.count(),
                users=na.count_distinct('yandexuid'),
                avg_session_duration=na.mean('session_duration'),
                time_spent=na.sum('session_duration')
            ) \
            .publish(self.get_statface_report('Collections/Metrics/sessions/TrafficSource_PageName_TimeSpent'), allow_change_job=True)

    @require('CollectionsRedirLog.clean')
    def event_time_spent(self, streams):
        return streams['CollectionsRedirLog.clean'] \
            .filter(qf.nonzero('yandexuid', 'url', 'timestamp')) \
            .groupby('yandexuid') \
            .sort('timestamp', 'cts') \
            .groupby(grouping.sessions()) \
            .reduce(event_time_spent_reducer)

    @require('TimeSpent.event_time_spent')
    def card_time_spent(self, streams):
        return streams['TimeSpent.event_time_spent'] \
            .filter(qf.nonzero('url')) \
            .groupby('url') \
            .aggregate(
                time_spent=na.sum('time_spent'),
                card_id=na.any('card_id', predicate=nf.and_(nf.equals('path', 'access'), nf.equals('parsed_page_name', 'card'))),
                board_id=na.any('board_id', predicate=nf.and_(nf.equals('path', 'access'), nf.equals('parsed_page_name', 'card')))
            ) \
            .filter(qf.nonzero('card_id')) \
            .groupby('card_id', 'board_id') \
            .aggregate(
                time_spent=na.sum('time_spent')
            )

    @require('TimeSpent.event_time_spent')
    def board_time_spent(self, streams):
        return streams['TimeSpent.event_time_spent'] \
            .filter(qf.nonzero('url')) \
            .groupby('url') \
            .aggregate(
                time_spent=na.sum('time_spent'),
                board_id=na.any('board_id', predicate=nf.and_(nf.equals('path', 'access'), nf.equals('parsed_page_name', 'board')))
            ) \
            .filter(qf.nonzero('board_id')) \
            .groupby('board_id') \
            .aggregate(
                time_spent=na.sum('time_spent')
            )

    @require('TimeSpent.card_time_spent', 'TimeSpent.board_time_spent')
    def board_card_time_spent(self, streams):
        return streams['TimeSpent.card_time_spent'] \
            .concat(streams['TimeSpent.board_time_spent']) \
            .filter(qf.nonzero('board_id')) \
            .groupby('board_id') \
            .aggregate(
                time_spent=na.sum('time_spent'),
                card_time_spent=na.sum('time_spent', predicate=(qf.nonzero('card_id'))),
                card_ids=na.histogram('card_id', predicate=(qf.nonzero('card_id')))
            )
