# coding: utf-8

from collections import defaultdict
from urlparse import urlparse, parse_qsl

import libra
import baobab
import tamus


class Record(object):
    def __init__(self, s):
        fields = s.split('\t', 2)
        self.key = fields[0]
        self.subkey = fields[1]
        self.value = fields[2]


def parse_file(filename):
    res = []
    with open(filename, 'r') as f:
        for line in f:
            if 0 == len(line):
                continue
            if line[-1] == '\n':
                line = line[:-1]
            res.append(Record(line))
    return res


def parse_cgi_params(url):
    parsed = urlparse(url)
    return dict(parse_qsl(parsed.query, keep_blank_values=True))


def merge_and_sum_features(x, y):
    return {k: x.get(k, 0) + y.get(k, 0) for k in set(x) | set(y)}


def get_parent_reqid(sess):
    for r in sess.GetRequests():
        params = parse_cgi_params(r.PageUrl)
        if 'parent-reqid' in params:
            return params['parent-reqid']


def extract_collections_features_for_session(sess):
    # events, see https://stat.yandex-team.ru/-/CKERyDp9
    return {
        'pages_shown_count': len(sess.GetRequests()),  # количество просмотренных страниц
        'total_events_count': len(sess.GetEvents()),   # все возможные пользовательские + технические события
        'total_clicks_count': len([x for x in sess.GetEvents() if 'click' in x.Path]),  # все обычные + динамические клики
        'total_actions_count': len([x for x in sess.GetEvents() if 'finish' in x.Path]),  # успешное завершение какого-то действия

        # негативный сигнал от пользователя
        'bad_events_count': len([x for x in sess.GetEvents() if 'complain' in x.Path or 'dislike' in x.Path]),

        # разные полезные типы событий:
        'likes_count': len([x for x in sess.GetEvents() if 'like' in x.Path]),
        'greenurls_count': len([x for x in sess.GetEvents() if 'link_external' in x.Path]),
        'create_content_count': len([x for x in sess.GetEvents() if 'create' in x.Path or 'unlock' in x.Path]),
        'share_content_count': len([x for x in sess.GetEvents() if 'share' in x.Path]),
        'comment_count': len([x for x in sess.GetEvents() if 'comment' in x.Path]),
        'subscribe_count': len([x for x in sess.GetEvents() if 'subscribe' in x.Path]),
    }


def extract_collections_features_by_parent_reqids_v1(cont):
    """считает фичи только для тех запросов/сессий, у которых был заход с &parent-reqid=NNN"""
    features = {}
    for sess in cont.GetYandexCollectionsEvents():
        # see https://a.yandex-team.ru/arc/trunk/arcadia/quality/ab_testing/stat_collector_lib/abt_features_collections.cpp?rev=6257032#L819
        parent_reqid = get_parent_reqid(sess)
        if not parent_reqid:
            continue
        features[parent_reqid] = extract_collections_features_for_session(sess)
    return features


def extract_collections_features_by_parent_reqids_v2(cont):
    """считает фичи для запросов/сессий, у которых был заход с &parent-reqid=NNN в интервале 30 минут"""
    features = defaultdict(dict)
    parent_reqid = None
    last_timestamp = None
    for sess in cont.GetYandexCollectionsEvents():
        # see https://a.yandex-team.ru/arc/trunk/arcadia/quality/ab_testing/stat_collector_lib/abt_features_collections.cpp?rev=6257032#L819
        if parent_reqid is None:
            parent_reqid = get_parent_reqid(sess)
            if not parent_reqid:
                continue
            last_timestamp = sess.GetRequests()[-1].Timestamp

        if len(sess.GetRequests()) == 0:
            # странная сессия без запросов, не считаем
            continue

        if sess.GetRequests()[0].Timestamp - last_timestamp > 60*30:
            # cледующая сессия начинается через полчаса после последнего события
            continue
        last_timestamp = sess.GetRequests()[-1].Timestamp

        features[parent_reqid] = merge_and_sum_features(
            features[parent_reqid],
            extract_collections_features_for_session(sess)
        )
    return dict(features)


def extract_collections_features_by_parent_reqids_v3(cont):
    """считает фичи для ВСЕХ(!) запросов/сессий у пользователя, у которых был заход с &parent-reqid=NNN"""
    features = defaultdict(dict)
    parent_reqid = None
    for sess in cont.GetYandexCollectionsEvents():
        # see https://a.yandex-team.ru/arc/trunk/arcadia/quality/ab_testing/stat_collector_lib/abt_features_collections.cpp?rev=6257032#L819
        if parent_reqid is None:
            parent_reqid = get_parent_reqid(sess)
            if not parent_reqid:
                continue

        features[parent_reqid] = merge_and_sum_features(
            features[parent_reqid],
            extract_collections_features_for_session(sess)
        )
    return dict(features)


def get_block_total_clicks(block, joiners):
    """Возвращает количество кликов на блок, включая все дочерние блоки"""
    events = set([
        x.event_id
        for joiner in joiners
        for x in joiner.get_events_by_block(block)
        if isinstance(x, baobab.common.Click)
    ])
    for inner_block in baobab.common.bfs_iterator(block):
        if inner_block.id == block.id:
            continue
        events.update(get_block_total_clicks(inner_block, joiners))
    return events


def extract_collections_wizard_viewer_features(r):
    """Считает фичи взаимодействия с просмотрщиком Коллекций на десктопах"""
    joiners = r.BaobabAllTrees()
    if not joiners:
        return {}

    rules = {
        'wizard_collections': '$result [@type = "wizard" and @wizard_name = "collections"]',

        'wizard_collections_viewer': '$subresult // collection-viewer-root // collection-viewer',
        'wizard_collections_viewer_sidebar': '#wizard_collections_viewer // sidebar',
        'wizard_collections_viewer_sidebar_link': '#wizard_collections_viewer_sidebar / link',
        'wizard_collections_viewer_sidebar_save': '#wizard_collections_viewer_sidebar / save',
        'wizard_collections_viewer_sidebar_like': '#wizard_collections_viewer_sidebar / like',
        'wizard_collections_viewer_sidebar_share': '#wizard_collections_viewer_sidebar / share',
    }

    marks = tamus.check_rules_multiple_joiners(rules, joiners)
    wizard_blocks = marks.get_blocks('wizard_collections')
    if len(wizard_blocks) == 0:
        return {}

    features = {}
    features['wizard_total_clicks_count'] = 0
    for block in wizard_blocks:
        # клики по колдунщику, включая открытие просмотрщика. Не включает (!) в себя клики внутри просмотрщика
        features['wizard_total_clicks_count'] += len(get_block_total_clicks(block, joiners))

    features['viewer_total_clicks_count'] = 0
    for block in marks.get_blocks('wizard_collections_viewer'):
        # все клики внутри просмотрщика
        features['viewer_total_clicks_count'] += len(get_block_total_clicks(block, joiners))

    features['cards_shown_count'] = len(marks.get_blocks('wizard_collections_viewer_sidebar'))  # количество просмотренных/пролистанных карточек

    # различные полезные действия в просмотрщике на серпе справа в сайдбаре
    features['greenurls_count'] = sum([len(get_block_total_clicks(block, joiners))
                                       for block in marks.get_blocks('wizard_collections_viewer_sidebar_link')])
    features['create_content_count'] = sum([len(get_block_total_clicks(block, joiners))
                                            for block in marks.get_blocks('wizard_collections_viewer_sidebar_save')])
    features['likes_count'] = sum([len(get_block_total_clicks(block, joiners))
                                   for block in marks.get_blocks('wizard_collections_viewer_sidebar_like')])
    features['share_content_count'] = sum([len(get_block_total_clicks(block, joiners))
                                           for block in marks.get_blocks('wizard_collections_viewer_sidebar_share')])

    return features


def extract_collections_features_from_sessions_file(path):
    records = parse_file(path)
    cont = libra.Parse(records, 'blockstat.dict')

    features1 = extract_collections_features_by_parent_reqids_v1(cont)
    features2 = extract_collections_features_by_parent_reqids_v2(cont)
    features3 = extract_collections_features_by_parent_reqids_v3(cont)

    for r in cont.GetRequests():
        if not r.IsA('TWebRequestProperties'):
            continue
        print '\nWeb request', 'ReqID:', r.ReqID
        print '    wizard viewer features: {}'.format(extract_collections_wizard_viewer_features(r))
        print '    service features1: {}'.format(features1.get(r.ReqID, {}))
        print '    service features2: {}'.format(features2.get(r.ReqID, {}))
        print '    service features3: {}'.format(features3.get(r.ReqID, {}))


def main():
    extract_collections_features_from_sessions_file('session_2020-01-26__09-36-24-324081.tsv')  # сессия с переходом на Коллекции и действиями там
    extract_collections_features_from_sessions_file('session_2019-12-30__15-57-17-531855.tsv')  # сессия с активностью в просмотрщике


if __name__ == '__main__':
    main()
