# coding: utf-8

import sys
import pytz
import json
import bisect
import urllib
import itertools
from datetime import datetime, timedelta

from nile.api.v1 import (
    Record,
    filters as nf,
)

DATE_FORMAT = '%Y-%m-%d'

SESSION_SPLIT_INTERVAL = 30*60

FLOW_NAME_SEPARATOR = ' > '


def transpose(l):
    """Транспонирует список списков"""
    return map(list, zip(*l))  # py2 only


def date_range(start_date, end_date=None, delta=0):
    """
    Итератор по датам в интервале [start_date, end_date], включительно
    если end_date отсутствует - то возвращает первый день
    возможно вычесть `delta` дней, но минимум всегда будет start_date
    """
    if not end_date:
        yield start_date
        return

    days_range = int((end_date - start_date).days) + 1 - delta

    if days_range <= 1:
        yield start_date
        return

    for n in range(days_range):
        yield start_date + timedelta(n)


def split_by_dates(stream, dates, yt_folder, date_field='fielddate', sort_by=None):
    if len(dates) == 1:
        new_stream = stream \
            .filter(
                nf.equals(date_field, dates[0])
            )

        if sort_by:
            new_stream = new_stream.sort(*sort_by)

        yield new_stream \
            .put(
                yt_folder + dates[0],
                allow_override=True
            )
        return

    split_filters = []
    for date in dates:
        split_filters.append(nf.equals(date_field, date))

    streams_by_dates = stream.split(
        *split_filters,
        multisplit=True,
        strategy='stop_if_true'
    )

    for idx, date in enumerate(dates):
        stream = streams_by_dates[idx]
        if sort_by:
            stream = stream.sort(*sort_by)

        yield stream.put(
            yt_folder + date,
            allow_override=True
        )


class AddTotalsMapper(object):
    def __init__(self, key_fields, passed_keys=None, total_value='_total_'):
        # ключевые поля по которым добавлять _total_
        self.key_fields = key_fields
        # ключи, по которым не строятся total'ы и которые пробрасываются дальше
        self.passed_keys = ['fielddate'] if passed_keys is None else passed_keys
        self.total_value = total_value

    def __call__(self, recs):
        key_fields = [key for key in self.key_fields if key not in self.passed_keys]
        for r in recs:
            # one record is multiplied 2^<number of keys> times for each combination of self.total_value
            # and the initial value for the current key
            for comb in itertools.product(
                *[(getattr(r, key), self.total_value) for key in key_fields]
            ):
                result = {}
                for key in self.passed_keys:
                    result[key] = getattr(r, key)

                for idx, key in enumerate(key_fields):
                    result[key] = comb[idx]

                yield Record.from_dict(result)


class PrecomputeTotalsMapper(object):
    def __init__(self, key_fields, count_key='hits'):
        # ключевые колонки, по которым пред'аггрегировать их количество
        self.key_fields = key_fields
        self.count_key = count_key

    def _get_result_data(self, last_key_data, counter):
        result = {}
        result[self.count_key] = counter

        for idx, key_name in enumerate(self.key_fields):
            result[key_name] = last_key_data[idx]
        return result

    def __call__(self, recs):
        counter = 0
        key, last_key = None, None

        for r in recs:
            key = tuple(getattr(r, key) for key in self.key_fields)
            if key != last_key:
                if last_key is not None:
                    yield Record.from_dict(self._get_result_data(last_key, counter))
                last_key = key
                counter = 0
            counter += 1

        if key is not None and counter > 0:
            yield Record.from_dict(self._get_result_data(key, counter))


def parse_urlencoded(value):
    try:
        return urllib.unquote(value)
    except:
        return None


def parse_urlencoded_json(value):
    value = parse_urlencoded(value)

    if value is None:
        return None

    try:
        return json.loads(value)
    except:
        return None


def parse_vars(click_vars):
    # TODO: tests
    if isinstance(click_vars, list):
        return {k: v for k, v in click_vars}
    elif isinstance(click_vars, basestring):
        if not click_vars:
            return {}
        parts = [x.split('=') for x in click_vars.split(',')]
        return {x[0]: x[1] for x in parts}
    assert False, 'Принимает на вход только список или строку'


def fromtimestamp(ts):
    return datetime.fromtimestamp(ts, tz=pytz.timezone('Europe/Moscow'))


def get_dts_delta(dt1, dt2, dt_format=DATE_FORMAT):
    return datetime.strptime(dt1, dt_format) - datetime.strptime(dt2, dt_format)


def extract_tskv(value):
    return {x.split('=')[0]: '='.join(x.split('=')[1:]) for x in value.split('\t')}


def get_element_by_path(node, path, default=None):
    """ Safe
    :param path_elements: strings and ints that are keys and indices of the corresponding level.
    :param default:
    :param json: if it is "root", path_elements specify a path from the root, otherwise it's a path in the passed object.
    :return: the node at the given path, default otherwise.
    """

    path_elements = [int(x) if x.isdigit() else x
                     for x in path.split(".")]

    delimeter = " > "
    path = delimeter.join(str(x) for x in path_elements)
    path_so_far = ""
    for pe in path_elements:
        path_so_far += str(pe)
        if node is None:
            sys.stderr.write("Node {} is null. Path: {}".format(path_so_far, path))
            break
        elif isinstance(node, dict):
            if pe in node:
                node = node[pe]
            else:
                sys.stderr.write("{} expected, but not found in path {}".format(path_so_far, path))
                node = default
                break
        elif isinstance(node, list):
            if isinstance(pe, str) and pe.isdigit():
                pe = int(pe)
            if isinstance(pe, int):
                if pe + 1 <= len(node):
                    node = node[pe]
                else:
                    sys.stderr.write("{} expected, but not found in path {}".format(path_so_far, path))
                    break
            else:
                sys.stderr.write("list index should be an int, got {} instead in {}".format(path_so_far, path))
                break
        else:
            sys.stderr.write("Failed to go to {} in path {}".format(path_so_far, path))
            break
        path_so_far += delimeter
    else:
        return node
    return default


def split_sessions(events, split_interval=SESSION_SPLIT_INTERVAL, timestamp_key='timestamp'):
    """
    Sessions generator
    :param events: dicts or Records sorted by timestamp
    :param split_interval: split interval
    :return: sessions generator
    """
    event_iterator = iter(events)

    session = [event_iterator.next()]
    for event in event_iterator:
        if int(event.get(timestamp_key)) - int(session[-1].get(timestamp_key)) > split_interval:
            yield session
            session = [event]
        else:
            session.append(event)

    yield session


RETENTION_PERIODS = (7, 14, 30)


def calc_retention_from_action(points_dates, actions_dates, periods=None, date_format=DATE_FORMAT):
    """
    Функция считает ретеншен в действие.

    Например, если мы хотим для автора коржей получить даты создания коржей, после которых автор в течении следующих 7 дней добавил карточку,
     то входные данные будут следующие:

    points_dates = [2019-01-01, 2019-01-03, ...] даты создания коржей
    action_dates = [2019-01-01, 2019-01-04, ...] даты создания карточек
    periods = [7]
    date_format='%Y-%M-%d'

    функция вернёт все point_date из points_dates, для которых найдётся пара action_date такая, что выполнено условие
    1 <= delta(point_date, action_date) <= 7, где дельта разница в днях между action_date и point_date.

    :param points_dates: даты событий от которых считаем возвращаемость в формате date_format,
    :param actions_dates: даты ключевых событий, которые считаем возвратами
    :param periods: массив периодов за которые хотим посчитать возвраты, если None, то парметр устанавливается в значение (7, 14, 30)
    :param date_format: формат даты в параметрах points_dates, actions_dates, по умолчанию '%Y-%m-%d'
    :return: пары point_date, period ,для которых существует action_date удвлотворяющий условию:
     1 <= (datetime.strptime(action_date, date_format) - datetime.strptime(point_date, date_format)).days <= period
    """

    if not periods:
        periods = RETENTION_PERIODS

    for point in points_dates:
        nearest_action_index = bisect.bisect_right(actions_dates, point)

        if nearest_action_index < len(actions_dates):
            for period in periods:
                if 1 <= get_dts_delta(actions_dates[nearest_action_index], point, date_format).days <= period:
                    yield point, period
