#!/usr/bin/env python

import argparse
import datetime
import urlparse
import time

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    clusters,
    Record,
)
import nile
import libra
import pandas as pd
import yt.wrapper as yt
import logging

logging.basicConfig(format='[%(asctime)s] %(filename)s[LINE:%(lineno)d] %(levelname)-8s %(message)s',
                    level=logging.ERROR)

JOB_NAME = 'TURBO METRICS'
LOG_ROOT = '//user_sessions/pub/search/daily'
RETRY_NUM = 48
SLEEP_TIME = 60 * 30


def is_dynamic_click(click):
    try:
        for key, value in click.GetVars():
            if key == 'action' and value == 'dynamic_click':
                return True
    except:
        return False


def firstreduce(groups):
    for key, recs in groups:
        uid = key.key
        try:
            requests = libra.ParseSession(recs, 'blockstat.dict')
        except:
            continue
        for request in requests:
            if request.IsA('TYandexWebRequest'):
                ui = 'desktop'
            elif request.IsA('TMobileAppYandexWebRequest'):
                ui = 'app'
            elif request.IsA('TTouchYandexWebRequest'):
                ui = 'touch'
            elif request.IsA('TPadYandexWebRequest'):
                ui = 'pad'
            else:
                continue
            if request.PageNo != 0:
                continue
            if request.IsA('TWebRequestProperties'):
                query = request.Query.decode('utf-8').lower().encode('utf-8')
                rearr = request.RearrValues
                browser = ''
                if 'browser_name' in rearr:
                    browser = rearr['browser_name']
                total_click = 0
                total_turbo_click = 0
                misc_turbo_click = 0
                wiki_turbo_click = 0
                snippet_nonwiki_turbo_click = 0
                entity_turbo_click = 0
                news_turbo_click = 0
                instructions_turbo_click = 0
                chats_click = 0

                with_turbo = 0
                with_wiki_turbo = 0
                with_snippet_nonwiki_turbo = 0
                with_entity_turbo = 0
                with_news_turbo = 0
                with_instructions_turbo = 0
                with_chats = 0

                for click in request.GetMiscClicks():
                    if ['preview', 'true'] in click.GetVars():
                        if 'sideblock' not in click.ConvertedPath:
                            misc_turbo_click += 1
                            total_turbo_click += 1
                    if 'web/item/chat' in click.ConvertedPath:
                        chats_click += 1
                    if '/snippet/companies/company/chat' in click.ConvertedPath:
                        chats_click += 1
                if request.GetMainBlocks():
                    props = request.SearchPropsValues
                    for bs_block in request.GetBSBlocks():
                        if ['preview', 'true'] in bs_block.GetVars():
                            # if bs_block.Path == '/snippet/suggest_fact/button' and \
                            #     ('UPPER.Facts.Source_fact_instruction' in props) and \
                            #     (props['UPPER.Facts.Source_fact_instruction'] == '1'):
                            if bs_block.Path == '/snippet/suggest_fact/button' and \
                                    ('Facts.Source_fact_instruction' in props) and \
                                    (props['Facts.Source_fact_instruction'] == '1'):
                                with_instructions_turbo = 1
                                with_turbo = 1
                            if bs_block.Path == '/snippet/entity_search/object-badge/description/title':
                                with_entity_turbo = 1
                                with_turbo = 1
                            if bs_block.Path == '/snippet/news/doc':
                                with_news_turbo = 1
                                with_turbo = 1
                            if bs_block.Path == '/web/item/showcase/item/thumb':
                                with_turbo = 1
                        if bs_block.Path == '/snippet/companies/company/chat':
                            with_chats = 1
                    for pos, block in enumerate(request.GetMainBlocks()):
                        if block.GetMainResult().IsA('TOrganicResultProperties'):
                            first_res = block.GetMainResult()
                            wizard_news = 0
                            wizard_entity = 0
                            wizard_suggest_fact = 0
                            wiki_block = 0
                            snippet_nonwiki_block = 0
                            if first_res.IsA('TWizardResult') or first_res.IsA('TBlenderWizardResult'):
                                if first_res.Name == 'news':
                                    wizard_news = 1
                                if first_res.Name == 'entity_search':
                                    wizard_entity = 1
                                if first_res.Name == 'suggest_fact':
                                    wizard_suggest_fact = 1
                            adapters = block.Adapters
                            url_block = block.GetMainResult().Url
                            try:
                                host_block = urlparse.urlparse(url_block).hostname.replace('www.', '')
                            except:
                                host_block = url_block
                            if host_block in [
                                'ru.wikipedia.org',
                                'ru.m.wikipedia.org',
                                'en.wikipedia.org',
                                'en.m.wikipedia.org',
                            ]:
                                wiki_block = 1
                            if 'snippet-with-extended-preview' in adapters:
                                with_turbo = 1
                                if wiki_block:
                                    with_wiki_turbo = 1
                                else:
                                    snippet_nonwiki_block = 1
                                    with_snippet_nonwiki_turbo = 1
                            if 'chat-with-org' in adapters:
                                with_chats = 1
                            for child in block.GetChildren():
                                for click in child.GetClicks():
                                    if not is_dynamic_click(click):
                                        total_click += 1
                                    if 'web/item/chat' in click.ConvertedPath:
                                        chats_click += 1
                                    if '/snippet/companies/company/chat' in click.ConvertedPath:
                                        chats_click += 1
                                    if ['preview', 'true'] in click.GetVars():
                                        if 'sideblock' not in click.ConvertedPath:
                                            total_turbo_click += 1
                                            if wiki_block:
                                                wiki_turbo_click += 1
                                            if with_snippet_nonwiki_turbo:
                                                snippet_nonwiki_turbo_click += 1
                                            if wizard_entity:
                                                entity_turbo_click += 1
                                            if wizard_news:
                                                news_turbo_click += 1
                                            if wizard_suggest_fact:
                                                instructions_turbo_click += 1
                    yield Record(
                        uid=uid,
                        reqid=request.ReqID,
                        time=request.Timestamp,
                        req_day=datetime.datetime.fromtimestamp(request.Timestamp).strftime('%Y-%m-%d'),
                        query=query,
                        browser=browser,
                        ui=ui,
                        total_click=total_click,
                        total_turbo_click=total_turbo_click,
                        misc_turbo_click=misc_turbo_click,
                        wiki_turbo_click=wiki_turbo_click,
                        snippet_nonwiki_turbo_click=snippet_nonwiki_turbo_click,
                        entity_turbo_click=entity_turbo_click,
                        news_turbo_click=news_turbo_click,
                        instructions_turbo_click=instructions_turbo_click,
                        chats_click=chats_click,
                        with_turbo_click=int(total_turbo_click > 0),
                        with_misc_turbo_click=int(misc_turbo_click > 0),
                        with_wiki_turbo_click=int(wiki_turbo_click > 0),
                        with_entity_turbo_click=int(entity_turbo_click > 0),
                        with_news_turbo_click=int(news_turbo_click > 0),
                        with_instructions_turbo_click=int(instructions_turbo_click > 0),
                        with_chats_click=int(chats_click > 0),
                        with_turbo=with_turbo,
                        with_wiki_turbo=with_wiki_turbo,
                        with_snippet_nonwiki_turbo=with_snippet_nonwiki_turbo,
                        with_entity_turbo=with_entity_turbo,
                        with_news_turbo=with_news_turbo,
                        with_instructions_turbo=with_instructions_turbo,
                        with_chats=with_chats,
                    )


def argument_parser():
    parser = argparse.ArgumentParser(description='Get parameters')
    parser.add_argument(
        '-p',
        dest='pool',
        type=str,
        help='you hahn pool',
    )
    parser.add_argument(
        '-r',
        dest='rpath',
        type=str,
        help='result path',
    )
    parser.add_argument(
        '-t',
        dest='timestamp',
        type=int,
        help='timestamp',
    )
    parser.add_argument(
        '-n',
        dest='ndays',
        type=int,
        help='num of days',
    )
    args = parser.parse_args()
    return args


def main():
    args = argument_parser()
    timestamp = args.timestamp
    job_root = args.rpath
    n_days = args.ndays

    if timestamp > 100000000000:
        timestamp = timestamp / 1000
    current_day = datetime.datetime.fromtimestamp(timestamp) - datetime.timedelta(1)
    last_needed_day = datetime.datetime.fromtimestamp(timestamp) - datetime.timedelta(n_days)
    current_date = '{}-{:0>2}-{:0>2}'.format(
        current_day.year,
        current_day.month,
        current_day.day
    )
    last_needed_date = '{}-{:0>2}-{:0>2}'.format(
        last_needed_day.year,
        last_needed_day.month,
        last_needed_day.day
    )

    dates = [i.strftime('%Y-%m-%d') for i in pd.date_range(last_needed_date, current_date)]

    yt.config.set_proxy("hahn")
    dates_log = yt.list(LOG_ROOT)
    current_dates = set(dates).intersection(set(dates_log))

    dates_str = '{' + ','.join(['{}/clean'.format(x) for x in dates if x in current_dates]) + '}'

    cluster = clusters.Hahn(pool=args.pool).env(templates=dict(
        log_root=LOG_ROOT,
        job_root=job_root,
    )
    )
    external_files = [
        nile.files.RemoteFile('//statbox/resources/libra.so'),
        nile.files.RemoteFile('//statbox/statbox-dict-last/blockstat.dict')
    ]

    for retry in range(RETRY_NUM):
        try:
            job = cluster.job(JOB_NAME + ': filter_logs_{}'.format(current_date)).env(
                templates=dict(
                    dates=dates_str,
                )
            )
            all_log = job.table('$log_root/@dates')
            result_logs = all_log.groupby('key').sort('subkey').reduce(
                firstreduce,
                files=external_files,
                memory_limit=16 * 1024,
                intensity='large_data',
            )
            aggregate_logs = result_logs.groupby('ui', 'req_day').aggregate(
                timestamp=na.max('time'),
                count=na.count(),
                total_click=na.sum('total_click'),
                total_turbo_click=na.sum('total_turbo_click'),
                misc_turbo_click=na.sum('misc_turbo_click'),
                wiki_turbo_click=na.sum('wiki_turbo_click'),
                snippet_nonwiki_turbo_click=na.sum('snippet_nonwiki_turbo_click'),
                entity_turbo_click=na.sum('entity_turbo_click'),
                news_turbo_click=na.sum('news_turbo_click'),
                instructions_turbo_click=na.sum('instructions_turbo_click'),
                chats_click=na.sum('chats_click'),
                with_turbo_click=na.sum('with_turbo_click'),
                with_misc_turbo_click=na.sum('with_misc_turbo_click'),
                with_wiki_turbo_click=na.sum('with_wiki_turbo_click'),
                with_entity_turbo_click=na.sum('with_entity_turbo_click'),
                with_news_turbo_click=na.sum('with_news_turbo_click'),
                with_instructions_turbo_click=na.sum('with_instructions_turbo_click'),
                with_chats_click=na.sum('with_chats_click'),
                with_turbo=na.sum('with_turbo'),
                with_wiki_turbo=na.sum('with_wiki_turbo'),
                with_snippet_nonwiki_turbo=na.sum('with_snippet_nonwiki_turbo'),
                with_entity_turbo=na.sum('with_entity_turbo'),
                with_news_turbo=na.sum('with_news_turbo'),
                with_instructions_turbo=na.sum('with_instructions_turbo'),
                with_chats=na.sum('with_chats'),
            ).put('$job_root/{}'.format(current_date))
            job.run()
            break
        except nile.nodes.table.MissingSourceTablesError as ex:
            logging.error('Error with try {}, go to sleep for {} second'.format(retry, SLEEP_TIME))
            time.sleep(SLEEP_TIME)


if __name__ == "__main__":
    main()
