import json
import time
from datetime import datetime, timedelta
from collections import namedtuple

import util


def _do_segments_intersect(lhs, rhs):
    return rhs[0] <= lhs[0] <= rhs[1] or rhs[0] <= lhs[1] <= rhs[1]


def _get_utc_timestamp(dt):
    return int((dt - datetime(1970, 1, 1)).total_seconds())


class PromoDocument(object):
    def __init__(self, url, start, end):
        self.url = url

        if end < start:
            raise ValueError("Incorrect promo interval: end < start")
        self.segments = [(start, end)]

    def merge_from(self, other, same_day):
        if self.url != other.url:
            raise KeyError("Whoops, trying to merge different promos")

        if len(other.segments) != 1:
            raise ValueError("Merging complex segment sets has not been implemented yet")

        self._merge_single_segment(other.segments[0], same_day)

    def _get_end(self):
        return max(self.segments, key=lambda x: x[1])[1]

    def _ends_on_midnight(self):
        dt = self._get_end()
        return dt.hour == 21 \
            and dt.minute == 0 \
            and dt.second == 0 \
            and dt.microsecond == 0

    def _merge_single_segment(self, new_segment, same_day):
        if not same_day and self._ends_on_midnight():
            self.segments = [(self._get_end(), new_segment[1])]
            return

        intersecting_segments = filter(
            lambda x: _do_segments_intersect(new_segment, x),
            self.segments
        )

        remaining_segments = filter(
            lambda x: not _do_segments_intersect(new_segment, x),
            self.segments
        )

        intersecting_segments.append(new_segment)
        remaining_segments.append((
            min(intersecting_segments, key=lambda x: x[0])[0],
            max(intersecting_segments, key=lambda x: x[1])[1]
        ))

        self.segments = remaining_segments

    def trim_to_date(self, date):
        tomorrow = date + timedelta(days=1)
        intersecting_segments = filter(
            lambda x: _do_segments_intersect(x, (date, tomorrow)),
            self.segments
        )

        trimmed_segments = map(
            lambda x: (max(x[0], date), min(x[1], tomorrow)),
            intersecting_segments
        )

        self.segments = trimmed_segments


PromoKey = namedtuple('PromoKey', 'url tag request_type')


class PromoDump(object):
    def __init__(self, date, trie=None, timestamp=None, yt_table_dict=None):
        self.date = date
        self.promos = {}
        if trie is not None:
            self._init_promos_from_trie(trie, timestamp)
        elif yt_table_dict is not None:
            self._init_promos_from_yt_table(yt_table_dict)

    def update_from(self, other, dt):
        for key in self.promos:
            if key not in other.promos:
                last_segment = self.promos[key].segments[-1]
                self.promos[key].segments[-1] = (last_segment[0], dt)
        for key in other.promos:
            if key in self.promos:
                self.promos[key].merge_from(
                    other.promos[key],
                    self.date == other.date
                )
            else:
                self.promos[key] = other.promos[key]

        self.date = other.date
        self.trim_to_date()

    def get_yt_writer(self):
        for key in self.promos:
            for segment in self.promos[key].segments:
                row = {}
                row["url"] = key.url
                row["tag"] = key.tag
                row["request_type"] = key.request_type
                row["tm_start"] = _get_utc_timestamp(segment[0])
                row["tm_end"] = _get_utc_timestamp(segment[1])

                yield row

    def trim_to_date(self):
        for key in self.promos:
            self.promos[key].trim_to_date(self.date)

    def _init_promos_from_trie(self, trie, timestamp):
        for line in trie:
            for entry in _retrieve_single_tag_promo_entries(line):
                self._init_single_promo(entry, timestamp)

    def _init_promos_from_yt_table(self, yt_table_dict):
        for entry in yt_table_dict:
            self._init_single_promo(entry)

    def _init_single_promo(self, entry, timestamp=None):
        url = entry["url"]
        tag = entry["tag"]
        request_type = entry["request_type"]

        if timestamp is not None:
            start = datetime.utcfromtimestamp(timestamp)
            # 2**64 - 1 is out of range for time_t, python says
            end = datetime.utcfromtimestamp(2**32 - 1)
        else:
            start = datetime.utcfromtimestamp(entry["tm_start"])
            end = datetime.utcfromtimestamp(entry["tm_end"])

        key = PromoKey(url, tag, request_type)
        promo_document = PromoDocument(url, start, end)
        if key not in self.promos:
            self.promos[key] = promo_document
        else:
            self.promos[key].merge_from(promo_document, True)


def dump_promo_to_yt(proxy, dump_dir, file_name, timestamp=time.time(), delta=timedelta(hours=3)):
    import yt.wrapper as yt

    yt.config['proxy']['url'] = proxy
    if not yt.exists(dump_dir):
        raise IndexError(
            "Specified directory ({0}) not found on {1}".format(
                dump_dir,
                proxy
            )
        )
    if yt.get_type(dump_dir) != "map_node":
        raise ValueError("Specified yt node is not a directory")

    with open(file_name, 'r') as trie_file:
        trie = trie_file.readlines()

    new_dump = PromoDump(
        _get_start_of_day_from_utc(timestamp, delta),
        trie=trie,
        timestamp=timestamp
    )

    latest_dump_date = _get_latest_dump_date(yt, dump_dir)
    if latest_dump_date is None:
        new_dump.trim_to_date()
        _write_dump_to_yt(yt, new_dump, dump_dir, delta)
        return

    latest_dump_table = _get_dump_by_date(dump_dir, latest_dump_date)

    latest_dump = PromoDump(
        datetime.strptime(latest_dump_date, "%Y-%m-%d") - delta,
        yt_table_dict=yt.read_table(latest_dump_table)
    )

    latest_dump.update_from(
        new_dump,
        datetime.utcfromtimestamp(timestamp)
    )
    _write_dump_to_yt(yt, latest_dump, dump_dir, delta)


def _write_dump_to_yt(client, promo_dump, dump_dir, delta):
    with client.TempTable() as unsorted_result:
        client.write_table(
            unsorted_result,
            promo_dump.get_yt_writer()
        )
        client.run_sort(
            unsorted_result,
            _get_dump_by_date(
                dump_dir,
                datetime.strftime(promo_dump.date + delta, "%Y-%m-%d")
            ),
            sort_by=["request_type", "tag", "tm_start"]
        )


def _get_start_of_day_from_utc(timestamp, delta):
    dt = datetime.utcfromtimestamp(timestamp) + delta
    return dt.replace(hour=0, minute=0, second=0, microsecond=0) - delta


def _validate_date(text):
    try:
        datetime.strptime(text, "%Y-%m-%d")
        return True
    except ValueError:
        return False


def _is_yt_table(client, node_address):
    return client.get_type(node_address) == 'table'


def _get_latest_dump_date(client, dump_dir):
    dump_tables = client.list(dump_dir)
    dump_dates = filter(
        lambda x: _validate_date(x) and _is_yt_table(
            client,
            _get_dump_by_date(dump_dir, x)
        ),
        dump_tables
    )

    if len(dump_dates) == 0:
        return None
    return max(dump_dates)


def _get_dump_by_date(dump_dir, date):
    return dump_dir + "/" + date


def _retrieve_single_tag_promo_entries(line):
    try:
        key, value = util.parse_trie_line(line)
    except IndexError:
        return []
    value_dict = json.loads(value)

    try:
        docs = value_dict["mixed_promo_url_data"]["docs"]
        for doc in docs:
            doc["tag"], doc["request_type"] = key.split(";")
        return docs
    except KeyError:
        return []


if __name__ == "__main__":
    dump_promo_to_yt(
        "hahn",
        "//home/videoindex/recommender/promo_dumps_test",
        "videohub.fast"
    )
