#!/usr/bin/env python
import sys
import yt.wrapper as yt
import baobab
from baobab.common.error_handler import BaseErrorHandler
import baobab.common.block as bcb
from baobab.common.tree import children_iterator
from collections import defaultdict as defdict


class MyErrorHandler(BaseErrorHandler):
    def handle_event_error(self, exception):
        print 'Error parsing event: %s' % exception
    def handle_events_batch_error(self, exception):
        print 'Error parsing events batch: %s' % exception


def get_blocks_from_root_list_with_type(block):
    blocks = []
    cur_block = block
    while cur_block is not None:
        block_type = cur_block.attrs.get('type', '')
        if cur_block.name in ['$main', 'breadcrumbs']:
            block_type = ''
        blocks.append(cur_block.name + (('@' + block_type) if block_type else ''))
        cur_block = cur_block.parent
    return list(reversed(blocks))


class ShowAndClicks(object):
    def __init__(self):
        self.uid = None
        self.show_maybe = None
        self.clicks = []


class ShowsClicksVisitor(baobab.BaseVisitor):
    def __init__(self):
        self.req_data = defdict(ShowAndClicks)
    def handle_show(self, show):
        self.__do_handle_show_like_event(show)
    def handle_append(self, append):
        self.__do_handle_show_like_event(append)
    def handle_replace(self, replace):
        self.__do_handle_show_like_event(replace)
    def handle_overlay(self, overlay):
        self.__do_handle_show_like_event(overlay)
    def __do_handle_show_like_event(self, show):
        reqid = show.context.req_id
        self.req_data[reqid].uid = show.context.uid
        if reqid is not None:
            if self.req_data[reqid].show_maybe is None:
                self.req_data[reqid].show_maybe = show
            else:
                raise Exception("Show duplicate for same reqid!")
    def handle_click(self, click):
        reqid = click.context.req_id
        if reqid is not None:
            self.req_data[reqid].clicks.append(click)


def myreduce(key, rows):
    visitor = ShowsClicksVisitor()
    parser = baobab.EventsParser(baobab.ParserConfig(BaseErrorHandler()))
    trees_merger = baobab.TreesAccumulateMerger()
    tlp = baobab.TransportLevelParser()
    for row in rows:
        yuid, ts, value = row['key'], row['subkey'], row['value']
        ctx = tlp.create_from_sessions(yuid, ts, value)

        # parts = row['value'].strip().split("\t")
        # for p in parts:
        #     equals_parts = p.split("=")
        #     if equals_parts[0] == 'builder-version':
        #         ctx.builder_version = equals_parts[1]

        trees_merger.add(ctx)
    ctxs_and_merge_info = trees_merger.join()

    for ctx in ctxs_and_merge_info.ctxs:
        parser.parse(ctx, visitor)

    if visitor.req_data:
        for reqid, data in visitor.req_data.iteritems():
            worker_click_count = 0
            ui = None
            errors = []
            shows_paths = []
            clicks_paths = []
            try:
                joiner = baobab.ShowAndClicksJoiner()
                if data.show_maybe is not None:
                    joiner.set_show(data.show_maybe)
                for click in data.clicks:
                    joiner.add_click(click)
                joiner.join()
                show = joiner.get_show()
                if show is not None:
                    root = show.tree.root
                    ui = root.attrs.get('ui', None)
                    if root is not None:
                        for block in baobab.tree.bfs_iterator(root):
                            path = "\t".join(get_blocks_from_root_list_with_type(block))
                            shows_paths.append(path)
                            for event in joiner.get_events_by_block(block):
                                if isinstance(event, baobab.common.event.Click):
                                    clicks_paths.append(path)
            except Exception as e:
                errors.append(str(e))
            yield {'shows': shows_paths, 'clicks': clicks_paths, 'uid': data.uid, 'errors': errors, 'reqid': reqid, 'ui': ui}


if __name__ == '__main__':
    yt.config["read_retries"]["enable"] = True
    yt.config['memory_limit'] = 8 * 1024**3
    yt.config['pickling']['module_filter'] = lambda module: 'hashlib' not in getattr(module, '__name__', '')
    yt.run_reduce(myreduce, sys.argv[1], sys.argv[2], reduce_by=["key"], format=yt.JsonFormat())
