import argparse
import json
import logging
import time
import os
from concurrent.futures import TimeoutError
import ydb


class VisitsPaginated:
    def __init__(self, session, limit):
        self.session = session
        self.limit = limit
        self.timestamp = (int(time.time()) - 3600*24*3)*1000000
        self.last_video_id = None
        self.last_event_name = None

        next_page_query = """
                DECLARE $limit AS Uint64;
                DECLARE $lastVideoId AS String;
                DECLARE $lastEventName AS String;
                DECLARE $timestamp AS Timestamp;

                $Data = (
                    SELECT video_id, event_name, count, first_ts, last_ts FROM news_rt_vh_visits
                    WHERE source = "yanews" AND video_id = $lastVideoId AND event_name > $lastEventName AND last_ts > $timestamp
                    ORDER BY video_id, event_name LIMIT $limit

                    UNION ALL

                    SELECT video_id, event_name, count, first_ts, last_ts FROM news_rt_vh_visits
                    WHERE source = "yanews" AND video_id > $lastVideoId AND last_ts > $timestamp
                    ORDER BY video_id, event_name LIMIT $limit
                );
                SELECT video_id, event_name, count, first_ts, last_ts FROM $Data ORDER BY video_id, event_name, first_ts LIMIT $limit;
            """
        self.prepared_next_page_query = self.session.prepare(next_page_query)

    def get_first_page(self):
        query = """
                DECLARE $limit AS Uint64;
                DECLARE $timestamp AS Timestamp;

                SELECT video_id, event_name, count, first_ts, last_ts FROM news_rt_vh_visits
                WHERE source = "yanews" AND last_ts > $timestamp
                ORDER BY video_id, event_name, first_ts LIMIT $limit;
            """
        prepared_query = self.session.prepare(query)
        result_sets = self.session.transaction(ydb.SerializableReadWrite()).execute(
            prepared_query, {
                "$limit": self.limit,
                "$timestamp": self.timestamp,
            },
            commit_tx=True
        )

        return result_sets[0]

    def get_next_page(self):
        result_sets = self.session.transaction(ydb.SerializableReadWrite()).execute(
            self.prepared_next_page_query, {
                "$limit": self.limit,
                "$lastVideoId": self.last_video_id,
                "$lastEventName": self.last_event_name,
                "$timestamp": self.timestamp,
            },
            commit_tx=True
        )
        return result_sets[0]

    def pages(self):
        while True:
            if self.last_video_id is None or self.last_event_name is None:
                result = self.get_first_page()
            else:
                result = self.get_next_page()
            if not result.rows:
                return
            last_row = result.rows[-1]
            self.last_video_id = last_row.video_id
            self.last_event_name = last_row.event_name
            yield result


def post_process(data):
    for video_id in data:
        for event_id in data[video_id]:
            items = data[video_id][event_id]

            st = items[0][0]
            fin = items[0][1]
            count = items[0][2]
            for i in xrange(1, len(items)):
                cur_st = items[i][0]
                cur_fin = items[i][1]
                cur_count = items[i][2]

                if fin < cur_st:  # no intersection
                    count += cur_count
                    fin = cur_fin
                    continue
                if abs(fin - cur_fin) > 1800*1000000:  # different ends, get latest
                    if fin < cur_fin:
                        fin, st, count = cur_fin, cur_st, cur_count
                    continue
                # do nothing because we already have earliest answer in count
            data[video_id][event_id] = count


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('ydb_endpoint')
    parser.add_argument('ydb_database')

    args = parser.parse_args()

    driver_config = ydb.DriverConfig(
        args.ydb_endpoint,
        args.ydb_database,
        auth_token=os.environ["YDB_TOKEN"]
    )

    with ydb.Driver(driver_config) as driver:
        try:
            driver.wait(timeout=5)
        except TimeoutError:
            logging.info("Connect failed to YDB")
            logging.info("Last reported errors by discovery:")
            logging.info(driver.discovery_debug_details())
            exit(1)

        session = driver.table_client.session().create()

        data = {}
        largest_last_timestamp = 0
        for page in VisitsPaginated(session, 1000).pages():
            for row in page.rows:
                if row.video_id and row.event_name:
                    largest_last_timestamp = max(largest_last_timestamp, row.last_ts)
                    if row.video_id.decode('utf-8') not in data:
                        data[row.video_id.decode('utf-8')] = {}
                    if row.event_name.decode('utf-8') not in data[row.video_id.decode('utf-8')]:
                        data[row.video_id.decode('utf-8')][row.event_name.decode('utf-8')] = []
                    data[row.video_id.decode('utf-8')][row.event_name.decode('utf-8')].append([row.first_ts, row.last_ts, row.count])

        post_process(data)

        with open("data.json", "w") as f:
            json.dump({'timestamp': int(time.time()), 'data': data, 'largest_last_timestamp': largest_last_timestamp}, f)


if __name__ == '__main__':
    main()
