#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import sys
import os
import codecs
import argparse
import random

from nile.api.v1 import (
    clusters,
    filters as nf,
    extractors as ne,
    aggregators as na,
    Record
)
from collections import Counter
import getpass


def query_reduce(groups):
    for key, records in groups:
        c = Counter()
        ts = ""
        for rec in records:
            c[str(rec.lr)] += 1
            if not ts and rec.ts:
                ts = rec.ts
            elif ts and rec.ts and random.randint(1, 10) == 1:
                ts = rec.ts
        result = vars(key)
        result['ts'] = ts
        result['lr'] = dict(c)
        yield Record(**result)


def main():
    source = (
        '//home/search-research/'
        'ensuetina/QUERIES_MINING/queries_with_country'
    )

    hahn = clusters.yt.Hahn(
        pool='search-research_{}'.format(getpass.getuser())
    ).env(
        templates=dict(
            job_root='home/videolog/new_baskets_lrs_ts',
        )
    )

    job = hahn.job()

    job.table(source).filter(
        nf.custom(lambda x: bool(x), 'query')
    ).groupby(
        'country', 'platform', 'service', 'query'
    ).reduce(
        query_reduce
    ).put(
        '$job_root/table'
    ).sort(
        'country', 'platform', 'service', 'query'
    )

    job.run()


if __name__ == "__main__":
    main()
