#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import sys
import os
import codecs
import argparse
from pytils import sane_dir, stringify_dict_keys
import scarab.main as scarab
import base64
import nile.config
from urlparse import urlparse, parse_qs
import nile
from nile.api.v1 import (
    clusters,
    Record
)


def humanize_map(records):
    for record in records:
        text = record['text'].split('\t')[-1]
        try:
            parsed = scarab.deserialize_event_from_str(
                text, True
            )
        except:
            continue
        if parsed.type == 'ATOM_FRONT_REQUEST_EVENT':
            yield Record(**humanize_request(parsed))
        elif parsed.type == 'ATOM_FRONT_ANSWER_EVENT':
            yield Record(**humanize_answer(parsed))


def crop_timestamp(ts):
    if isinstance(ts, int):
        ts = str(ts)
    return int(ts[:10])


def humanize_request(e):
    return {
        'request_id': e.request_id.value,
        'timestamp_r': crop_timestamp(e.timestamp),
        'request': request_params(e.request_url)
    }


def request_params(url):
    parsed = urlparse(url)
    qs = parse_qs(parsed.query)
    return {
        k: v[0] for k, v in qs.items() if v
    }


def humanize_answer(e):
    return {
        'request_id': e.request_id.value,
        'timestamp_a': crop_timestamp(e.timestamp),
        'user_ids': get_userids(e.user_id),
        'answer': parse_answer(e.answer)
    }


def get_userids(uid):
    result = {}
    for x in sane_dir(uid):
        value = getattr(getattr(uid, x), 'value', '')
        if value:
            result[x] = value
    return result


def parse_answer(a):
    result = {}
    for answer in a:
        if not getattr(answer, 'name', ''):
            continue
        parsed = parse_single_answer(answer)
        if parsed:
            result[answer.name] = parsed
    return result


def parse_single_answer(a):
    result = {
        'props': wrap_props(a.aux_info.props.json),
        'rerank_success': a.rerank_success,
    }
    if a.collect_pool_mode:
        result['collect_pool_mode'] = True
    if a.docs:
        result['candidate'] = [parse_doc(x) for x in a.docs]
    elif a.aux_info.candidate_infos:
        result['candidate'] = [parse_doc(x) for x in a.aux_info.candidate_infos]
    return result


def parse_doc(d):
    result = {
        'banner_id': d.banner_id,
        'host': d.link.split('/')[0],
        'score': d.score
    }
    if d.beta_params:
        result['beta_params'] = [d.beta_params.alpha, d.beta_params.beta]
    if d.weighted_score != d.score:
        result['weighted_score'] = d.weighted_score
    if d.featuredump:
        result['featuredump'] = stringify_dict_keys(dict(zip(
            d.featuredump[::2],
            d.featuredump[1::2]
        )))
    return result


def wrap_props(props):
    props['filtering-results'] = stringify_dict_keys(dict(
        (x.split(':')[0], int(x.split(':')[1]))
        for x in props['filtering-results']
    ))
    return props


def humanize_reduce(groups):
    for key, records in groups:
        result = {}
        for rec in records:
            result.update(vars(rec))
        yield Record(**result)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('date')
    args = parser.parse_args()

    cluster = clusters.Hahn(
        pool='search-research_pecheny'
    ).env(
        templates=dict(
            job_root='home/atom/humanized'
        )
    )


    if '/usr/share/nile/wheels' not in nile.config.WHEEL_PATH:
        nile.config.WHEEL_PATH.append('/usr/share/nile/wheels')
    if 'scarab' not in nile.config.PACKAGES:
        nile.config.PACKAGES.append('scarab')
    if 'rstr' not in nile.config.PACKAGES:
        nile.config.PACKAGES.append('rstr')

    job = cluster.job()

    job.table(
        'statbox/atomfront-answer-log/{}'.format(args.date)
    ).map(
        humanize_map, files=[nile.files.LocalFile(
            '/home/pecheny/analytics/atom/pytils.py'
        )]
    ).groupby(
        'request_id'
    ).reduce(
        humanize_reduce
    ).put(
        '$job_root/{}'.format(args.date)
    )

    job.run()


if __name__ == "__main__":
    main()
