#!/usr/bin/env python
# -*- coding: utf-8 -*-


from nile.api.v1 import (
    Record,
    files,
    clusters,
    cli,
    with_hints,
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns #obligatory for Statface
)
from qb2.api.v1 import (
    QB2,
    resources as sr,
    filters as qf
)

import os #obligatory for Statface
import sys #obligatory for Statface
import re #obligatory for Statface
import argparse #obligatory for Statface
import getpass #obligatory for Statface
import json
import datetime

def get_ui(r):
    ui=''
    if r.IsA('TDesktopUIProperties'):
        ui = 'desktop'
    elif r.IsA('TTouchUIProperties') or r.IsA('TMobileAppUIProperties') or r.IsA('TPadUIProperties'):
        ui = 'touch+searchapp+pad'
    return ui

def gather_request_events(request):
    result = []
    if request.IsA('TWebRequestProperties'):
        result.extend(request.GetClicks())
    elif request.IsA('TImagesRequestProperties'):
        result.extend(request.GetClicks())

        for event in request.GetOwnEvents():
            if event.IsA('TImageShow'):
                result.append(event)
            elif event.IsA('TImageNavig'):
                if "show" in event.ConvertedPath or "rim/more" in event.ConvertedPath: #TODO:уточнить
                    continue
                else:
                    result.append(event)
    return result

@with_hints(output_schema=dict(
    click=int,
    path=str,
    device=str
    )
)
def parse_user_sessions_img(recs):
    import libra
    paths = set(["/image/new/preview/commercial/related/schemaorg",
             "/image/new/preview/commercial/related/market",
             "/image/new/preview/commercial/title/market",
            "/image/new/preview/commercial/link/market",
            "/image/new/preview/commercial/button/market",
            "/image/new/preview/commercial/thumb/market",
            "/image/touch/preview/commercial/link/market",
            "/image/touch/preview/commercial/button/market",
            "/snippet/images/commercial/related/schemaorg",
            "/snippet/images/commercial/title/market",
            "/snippet/images/commercial/path/market",
            "/snippet/images/commercial/button/market",
            "/snippet/images/commercial/thumb/market",
            "/snippet/images/commercial/related/market"])

    for key, records in recs:
        uid = key.key

        try:
            session = libra.ParseSession(records, 'blockstat.dict', None, ['img', 'web'])
        except Exception as e:
            print >> sys.stderr, "libraerror:"
            continue

        previous_action_timestamp = None
        last_greenurl_path = None
        previous_action_path = None
        ui = ''

        for r in session:
            if previous_action_path in paths and previous_action_timestamp is not None:
                if r.Timestamp > previous_action_timestamp + 120:
                    yield Record(
                        click=1,
                        path=previous_action_path,
                        device=ui
                     )
                    previous_action_path = None
                    previous_action_timestamp = r.Timestamp
            ui = get_ui(r)

            events = gather_request_events(r)

            events.sort(key=lambda x: x.ClientTimestamp)

            for event in events:
                if previous_action_timestamp is not None and previous_action_path in paths and event.Timestamp > previous_action_timestamp + 120:
                     yield Record(
                        click=1,
                        path=previous_action_path,
                        device=ui
                    )

                previous_action_timestamp = event.Timestamp
                previous_action_path = event.ConvertedPath if hasattr(event, 'ConvertedPath') else None

@with_hints(output_schema=dict(
    click=int,
    path=str,
    device=str
)
)
def add_totals(recs):
    for rec in recs:
        yield rec
        yield Record(click=rec.path_clicks,
                     path=rec.path,
                     device="_total_"
                    )

@cli.statinfra_job
def make_job(job, nirvana, options, statface_client):

    job_root = nirvana.output_tables[0]
    strdate = options.dates[0]

    job = job.env(
        yt_spec_defaults=dict(
            pool_trees=["physical"],
            tentative_pool_trees=["cloud"]
        ),
        templates=dict(
            job_root=job_root,
            tmp_root='//tmp/anastasiiait/regular_comm_greenurls'
        )
    )

    report = ns.StatfaceReport() \
        .path('Image/Others/commercial_long_greenurls') \
        .scale('daily') \
        .client(statface_client)

    log = job.table('//user_sessions/pub/sample_by_uid_1p/search/daily/@dates/clean')

    parsed_raw = log.groupby('key').sort('subkey') \
        .reduce(
            parse_user_sessions_img,
            files=[files.RemoteFile('//statbox/resources/libra.so'),
                   files.RemoteFile('//statbox/statbox-dict-last/blockstat.dict')],
            memory_limit=3*1024,
            intensity='default'
        ).groupby('path') \
        .aggregate(path_clicks = na.sum('click')) \
        .map(add_totals) \
        .groupby('device') \
        .aggregate(
            sum_click = na.sum('path_clicks')
        ) \
        .put('$job_root/parsed_raw') \
        .project(ne.all(), fielddate=ne.const(strdate)) \
        .sort('device') \
        .publish(report, allow_change_job=True)

    return job


if __name__ == '__main__':
    cli.run()

