from nile.api.v1 import (
    Record,
    files,
    cli,
    with_hints,
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns #obligatory for Statface
)


import os
import json
import datetime
from yt.wrapper import YtClient


direct_pages = '//statbox/statbox-dict-last/export_page'
dict_path = '//statbox/statbox-dict-last/blockstat.dict'
libra_path = '//statbox/resources/libra.so'

def get_pages(data):
    res = []
    js = json.loads(data)
    for k, v in js.iteritems():
        if 'Name' in v:
            if 'images.yandex' in v['Name'] or 'gorsel.yandex' in v['Name']:
                res.append(int(k))
    return res


def UE2Rub(value):
    return value * 30. / 1000000.


def CostByService(request, imgPages):
    result = {}
    result['imgDirectCost'] = 0
    result['imgDirectClicksCount'] = 0
    result['webImgDirectCost'] = 0
    result['webImgDirectClicksCount'] = 0
    if request.IsA('TWebRequestProperties'):
        for blk in request.GetMainBlocks() + request.GetParallelBlocks():
            clicks = blk.GetClicks()
            for click in clicks:
                directClickProperties = click.GetProperties('TDirectClickProperties')
                if directClickProperties and \
                directClickProperties.PageID in imgPages and \
                directClickProperties.FraudBits == 0:
                    result['webImgDirectCost'] += directClickProperties.EventCost
                    result['webImgDirectClicksCount'] += 1
    if request.IsA('TImagesRequestProperties'):
        for blk in request.GetMainBlocks():
            main_result = blk.GetMainResult()
            for click in main_result.GetClicks():
                directClickProperties = click.GetProperties('TDirectClickProperties')
                if directClickProperties and \
                directClickProperties.FraudBits == 0:
                    result['imgDirectCost'] += directClickProperties.EventCost
                    result['imgDirectClicksCount'] += 1
    result['imgDirectCost'] = UE2Rub(result['imgDirectCost'])
    result['webImgDirectCost'] = UE2Rub(result['webImgDirectCost'])
    result['total'] = result['imgDirectCost'] + result['webImgDirectCost']
    return result


@with_hints(output_schema=dict(
    total=float,
    cm2=float,
    vcomm=float,
    device=str)
)
def mapper(records):
    result = {}
    result['cm2'] = 0
    result['vcomm'] = 0
    result['total'] = 0
    for record in records:
        result['cm2'] += record.cm2
        result['vcomm'] += record.vcomm
        result['total'] += record.total
        yield record
    result['device'] = '__total__'
    yield Record.from_dict(result)


class page_filter(object):
    def __init__(self, imgPages):
        self.imgPages = imgPages

    def __call__(self, sessions):
        for key, records in sessions:
            try:
                import libra
                session = libra.ParseSession(records, 'blockstat.dict')
            except:
                continue
            for r in session:
                result = CostByService(r, self.imgPages)
                result['cm2'] = float(0)
                result['vcomm'] = float(0)
                result['device'] = 'desktop' if r.IsA('TDesktopUIProperties') else 'touch'
                if hasattr(r, 'RelevValues'):
                    result['cm2'] = float(r.RelevValues.get('cm2', 0))
                    result['vcomm'] = float(r.RelevValues.get('vcomm', 0))
                yield Record.from_dict(result)


@cli.statinfra_job
def make_job(job, nirvana, options, statface_client):
    job_root = nirvana.output_tables[0]
    strdate = options.dates[0]
    job = job.env(
        yt_spec_defaults=dict(
            pool_trees=['physical'],
            tentative_pool_trees=['cloud']
        ),
        templates=dict(
            job_root=job_root
        )
    )
    report = ns.StatfaceReport() \
        .path('Image/Others/direct_money_by_cm2_vcomm') \
        .scale('daily') \
        .client(statface_client)
    client = YtClient(proxy='hahn', token=os.environ['YT_TOKEN'])
    data = client.read_file(direct_pages).read()
    imgPages = get_pages(data)
    hints = with_hints(output_schema=dict(
                imgDirectCost=float,
                webImgDirectCost=float,
                total=float,
                imgDirectCount=int,
                webImgDirectCount=int,
                cm2=float,
                vcomm=float,
                device=str)
            )
    reducer = hints(page_filter(imgPages))
    log = job.table('//user_sessions/pub/{search,images}/daily/@dates/clean')
    parsed_raw = log \
        .groupby('key') \
        .sort('subkey') \
        .reduce(
            reducer,
            files=[files.RemoteFile(dict_path),
                   files.RemoteFile(libra_path)],
            memory_limit=6*1024
        ) \
        .put('$job_root/parsed_raw')
    parsed_full = parsed_raw \
        .groupby('device') \
        .aggregate(
            cm2 = na.sum('total', predicate=nf.custom(lambda x: x > 0.21, 'cm2')),
            vcomm = na.sum('total', predicate=nf.custom(lambda x: x > 0.51, 'vcomm')),
            total = na.sum('total')
        ) \
        .map(mapper) \
        .project(ne.all(), fielddate=ne.const(strdate)) \
        .put('$job_root/parsed_full')
    parsed_full.publish(report, allow_change_job=True)
    return job


if __name__ == '__main__':
    cli.run()
