#-*-coding: utf8 -*-

from nile import (
    clusters,
    Record,
    filters as nf,
    aggregators as na
)
import nile
import uatraits, libra

import urlparse

def Reduce(groups):
    for key, records in groups:
        uid = key.key
        if uid[0] != 'y':
            continue

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            return

        for r in session:
            if not r.IsA('TYandexWebRequest'):
                continue

            q = r.Query
            spv = r.SearchPropsValues
            for k in spv.keys():
                if 'misspell_keyboard_layout' in k:
                    if str(spv[k]) == '1':
                        yield Record(q=q,k=k,k_val=spv[k])
                #yield Record(q=q,uid=uid)

username = 'ensuetina'
cluster = clusters.Plato().env(
                                job_root=('home/search-research/' + username + '/'
                                         'nile/TOP_MISSPELL_QUERIES') # directory on plato where i plan to store results
                            )

dates = nile.api.path.DateRange('20160112', '20160112')
for date in dates:
    job = cluster.job().env(date=date)

    log = job.table('userdata/user_sessions/$date')

    queries = log.groupby('key').sort('subkey').reduce(Reduce,files=[nile.files.LocalFile('blockstat.dict'),nile.files.LocalFile('libra.so')]).put('$job_root/result')
    queries.groupby('q').aggregate(freq=na.count()).put('$job_root/aggr_result')

#    aggr_queries = queries.groupby('query').aggregate(freq=aggregators.count()).put('$job_root/queries')

#    filtered_queries = aggr_queries.filter(
#                        nf.custom(lambda x: x > 1000, 'freq')
#                   ).put('$job_root/queries_freq_more_1K') # filtering freq > 1000 and store filtered table


#    join1 = tr.join(filtered_queries, by='query').filter( nf.custom(bool, 'freq')).put('$job_root/join1') # join table of hosts with table of queries
    job.run()

