#!/usr/bin/env python
# -*- coding: utf-8 -*-

# https://st.yandex-team.ru/EXPERIMENTS-21325

from nile.api.v1 import (
    Record,
    files,
    clusters,
    cli,
    with_hints,
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns #obligatory for Statface
)
from qb2.api.v1 import (
    QB2,
    resources as sr,
    filters as qf
)

import os #obligatory for Statface
import sys #obligatory for Statface
import re #obligatory for Statface
import datetime
import time
import re


EXPORT_PAGE = '//statbox/statbox-dict-last/export_page'

@with_hints(
    output_schema=dict(
        date=str,
        page_id=str,
        click=int,
        show=int,
        cost=float,
        uid=str
    )
)
def parse_chevent_img(recs):
    pageId_list = []
    export_page_full = sr.get(sr.json('export_page'))
    for k, v in export_page_full.iteritems():
        if "Name" in v:
            if "images.yandex" in v["Name"] or "gorsel.yandex" in v["Name"]:
                pageId_list.append(k)
    for rec in recs:
        pageid = rec.pageid

        if pageid in pageId_list:

            devicetype = int(rec.devicetype)
            if devicetype < 4:
                devicetypestr = "touch"
            elif devicetype == 4:
                devicetypestr = "pad"
            else:
                devicetypestr = "desktop"

            countertype = rec.countertype
            if countertype == "2":
                click = 1
                show = 0
            else:
                click = 0
                show = 1

            eventcost = float(rec.eventcost)
            cost = click*eventcost*30/1000000

            date = datetime.datetime.utcfromtimestamp(int(rec.unixtime)).strftime('%Y-%m-%d %H:%M:%S')[:10]

            yield Record(date=date,
                         page_id=pageid,
                         click=click,
                         show=show,
                         cost=cost,
                         uid=str(rec.uniqid)
                         )


def parse_from_path(s):
    splitted_s = s.split('/')
    table = splitted_s[-1]
    job_root = "/".join(splitted_s[0:-1])
    return [job_root, table]


@cli.statinfra_job(options=[cli.Option('test_ids', default='?')])

def make_job(job, nirvana, options):
    output_table = nirvana.output_tables[0]
    output_folder = parse_from_path(output_table)[0]
    table_name = parse_from_path(output_table)[1]

    job = job.env(
        yt_spec_defaults=dict(
            pool_trees=["physical"],
            tentative_pool_trees=["cloud"]
        ),
        templates=dict(
            job_root=output_folder,
            tmp_files=output_folder + "/temporary"
        )
    )

    testids = options.test_ids
    if testids == "?":
        print >> sys.stderr, 'wrong testids'

    log = job.table('//logs/bs-chevent-log/1d/@dates')
    midresult = log.filter(
            nf.and_(
                nf.equals('placeid', '542'),
                nf.equals('fraudbits', '0')
                )
            ).map(
                parse_chevent_img,
                files=[files.RemoteFile(EXPORT_PAGE)],
                memory_limit=6*1024,
                intensity='default'
            ).put("$tmp_files" + "/" + table_name + "_1money")

    return job


if __name__ == '__main__':
    cli.run()

