#!/usr/bin/env python
# -*- coding: utf-8 -*-

from nile.api.v1 import (
    Record,
    files,
    clusters,
    cli,
    files as nfl,
    with_hints,
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns #obligatory for Statface
)
from qb2.api.v1 import (
    QB2,
    filters as sf,
    extractors as se
)
from qb2.api.v1.typing import Optional, Json

import os #obligatory for Statface
import sys #obligatory for Statface
import re #obligatory for Statface
import argparse #obligatory for Statface
import datetime
import time
import itertools

# https://hitman./yandex-team.ru/projects/edinoe_izbrannoe/EI_Stats_MMA_1923


@with_hints(
    output_schema=dict(
        page=str,
        ui=str,
        clicks=int,
        shows=int,
        uids_clicks=int,
        uids_shows=int
    )
)

def add_totals(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.page, '_total_'),
                (rec.ui, '_total_'),
                (rec.clicks, ),
                (rec.shows, ),
                (rec.uids_clicks, ),
                (rec.uids_shows, )
                ):
            yield Record(
                page=pair[0],
                ui=pair[1],
                clicks=pair[2],
                shows=pair[3],
                uids_clicks=pair[4],
                uids_shows=pair[5]
                )

@with_hints(
    output_schema=dict(
        page=str,
        ui=str,
        opens=int,
        tickers=int,
        opens_per_ticker_on=int
    )
)

def add_totals3(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.page, '_total_'),
                (rec.ui, '_total_'),
                (rec.opens, ),
                (rec.tickers, ),
                (rec.opens_with_ticker, ),
                (rec.opens_per_ticker_on, )
                ):
            yield Record(
                page=pair[0],
                ui=pair[1],
                opens=pair[2],
                tickers=pair[3],
                opens_with_ticker=pair[4],
                opens_per_ticker_on=pair[5]
                )

def url_to_ui(r):
    url = r.split("?")
    if "/touch" in url[0]:
        ui = "touch"
        if "searchapp" in url[0] or (len(url) > 1 and "mobileapp" in url[1]):
            ui = "app"
    elif "/pad" in url[0] or "/tablet" in url[0]:
        ui = "pad"
    else:
        ui = "desktop"
    return ui


@with_hints(output_schema=dict(page=str, ui=str, fielddate=str, yandexuid=str))
def correct_page(recs):

    for rec in recs:
        url = rec.request

        ss = rec.service
        ss = ss.replace("yand", "").replace("touch", "").replace("pad", "")
        if "stream_active" in url:
            continue
        elif "tv" in ss:
            continue
        elif ss in ["instant", "d"]:
            continue
        elif ss in ["themes", ""]:
            ss = "morda"

        ui = url_to_ui(url)
        if ui is None:
            continue

        yield Record(page = ss, ui=ui, yandexuid=rec.yandexuid)


def page_to_service(p):
    try:
        if p == "morda_informer":
            return p
        referer_rep = p.replace("https://", "").replace("www.", "").replace("m.yandex.", "yandex.").replace("m.zen.", "zen.")
        currect_action_sp = referer_rep.split("||", 1)
        host_sp = currect_action_sp[0].split(".")
        if host_sp[0] != "yandex" and len(host_sp) > 1:
            currect_action = host_sp[0]
        else:
            if currect_action_sp[1]=="/":
                currect_action = "morda"
            else:
                currect_action = currect_action_sp[1].split("/")[1]
                if currect_action == "instant" or "tv" in currect_action:
                    currect_action = None
                elif currect_action in ["yandsearch", "touchsearch", "people"]:
                    currect_action = "search"
                elif currect_action == "gorsel":
                    currect_action = "images"
                elif currect_action == "themes" or currect_action == "m":
                    currect_action = "morda"
                elif currect_action == "user":
                    currect_action = "ugcpub"
#                elif currect_action == "ugcpub" or currect_action == "user":
#                    currect_action = "ugc"
#                elif currect_action == "local":
#                    currect_action = "district"
#                elif currect_action == "global-notifications":
#                    currect_action = "zen"
                elif currect_action == "st":
                    currect_action = "tracker"
                elif currect_action == "" or currect_action == "d":
                    currect_action = "unknown"
        return currect_action
    except:
        return None


@with_hints(
    output_schema=dict(
        ui=str,
        page=str,
        clicks=int,
        shows=int,
        uids_clicks=int,
        uids_shows=int
    )
)
def replace_none(recs):
    for rec in recs:
        try:
            clicks = rec.clicks
        except:
            clicks = 0
        try:
            shows = rec.shows
        except:
            shows = 0
        try:
            uids_clicks = rec.uids_clicks
        except:
            uids_clicks = 0
        try:
            uids_shows = rec.uids_shows
        except:
            uids_shows = 0
        yield Record(ui=rec.ui, page=rec.page, clicks=clicks, shows=shows,
            uids_clicks=uids_clicks, uids_shows=uids_shows)


@with_hints(
    output_schema=dict(
        uid=str,
        ui=str,
        page=str,
        opens=int,
        tickers=int,
        opens_with_ticker=int
    )
)
def myreduce(recs):
    for key, records in recs:
        uid = key.uid
        prev_action, prev_ts = None, None
        opens, opens_with_ticker = 0, 0
        tickers = []
        tickers_page = []

        for rec in records:
            currect_action = rec.path
            page = rec.service
            if page in ["st", "wiki", "pda", "demo", "unknown"] or "test" in page:
                continue
            timestamp = rec.timestamp

            if currect_action == "notifier.ticker.show":
                tickers.append(timestamp)
                tickers_page.append(page)
            elif currect_action == "notifier.show":
                opens += 1
                if len(tickers) > 0 and (timestamp - tickers[-1] < 2*60) and page == tickers_page[-1]:
                    opens_with_ticker += 1

            prev_action = currect_action
            prev_timestamp = timestamp

        tickers_count = len(tickers)

        yield Record(uid=rec.uid, ui=rec.ui, page=page,
                     tickers=tickers_count, opens=opens, opens_with_ticker=opens_with_ticker,
                     opens_per_ticker_on = round((opens_with_ticker+0.00001)/(tickers_count+0.00001), 7))


# https://clubs.at.yandex-team.ru/yt/2642
@cli.statinfra_job

def make_job(job, nirvana, statface_client, options):

    job = job.env(
        yt_spec_defaults=dict(
            pool_trees=["physical"],
            tentative_pool_trees=["cloud"]
        ),
        templates=dict(
            job_root=nirvana.directories[0]
        )
    )

    report = ns.StatfaceReport() \
        .path('Notifier/Counters/HeadNotifier') \
        .scale('daily') \
        .client(statface_client)

    mydates = options.dates

    for strdate in mydates:

        # process apps and morda
        morda = job.table('$job_root/app_data_head_' + strdate) \
            .groupby("fielddate", "ui") \
            .aggregate(shows = na.sum("shows"), clicks = na.sum("clicks")) \
            .project(ne.all(), page=ne.const("morda").add_hints(type=str))

        # process search
        shows = job.concat(job.table('//logs/search-web-blockstat-log/1d/' + strdate),
                           job.table('//logs/images-blockstat-log/1d/' + strdate)) \
            .qb2(log='blockstat-log',
                fields=['yandexuid', 'browser', 'page', 'request', 'canonized_vhost',
                       se.custom('bl', lambda normal_blocks: normal_blocks.keys()).with_type(str),
                       se.custom('service', lambda page: page.split('/')[1] if page.count("/") > 0 else page).with_type(str)
                       ],
                filters = [sf.default_filtering('blockstat-log'),
                           sf.defined('normal_blocks'),
                           sf.or_(sf.contains('blocks', '/head/notifier'), sf.contains('blocks', '.head.notifier'))]
            ).map(correct_page) \
            .groupby("ui", "page") \
            .aggregate(shows = na.count(), uids_shows = na.count_distinct("yandexuid")) \
            .put("$job_root/normal_blocks3")

        opens_data = job.table('$job_root/opens/' + strdate)
        opens = opens_data.project(ne.all(), page = ne.custom(lambda x: page_to_service(x), "referer").add_hints(type=str)) \
            .filter(sf.defined('page')) \
            .groupby("ui", "page") \
            .aggregate(clicks = na.count(), uids_clicks = na.count_distinct("uid"))

        search = opens.join(shows, by=("ui", "page"), type="full")

        search_morda = search.concat(morda) \
            .map(replace_none) \
            .map(add_totals) \
            .groupby("fielddate", "ui", "page") \
            .aggregate(clicks=na.sum('clicks'), shows=na.sum('shows'),
                uids_clicks=na.sum('uids_clicks'), uids_shows=na.sum('uids_shows')) \
            .put("$job_root/head_final2")#.publish(report, allow_change_job=True)

        # add new ticker data
        ticker_data = job.table('$job_root/ticker/' + strdate + '_v2')
        ticker_data_joined = ticker_data.concat(opens_data).filter(sf.defined("service")) \
            .groupby("uid").sort("timestamp") \
            .reduce(myreduce,
                memory_limit=4000,
                intensity='data')

        p1 = ticker_data_joined.filter(sf.custom(lambda x: x > 0, "tickers")) \
            .map(add_totals3) \
            .groupby("ui", "page") \
            .aggregate(tickers = na.sum('tickers'), uids_tickers = na.count())

        p2 = ticker_data_joined.filter(sf.custom(lambda x: x > 0, "opens")) \
            .map(add_totals3) \
            .groupby("ui", "page") \
            .aggregate(opt_mean=na.mean("opens_per_ticker_on"),
                opt_on_50=na.quantile("opens_per_ticker_on", 0.5),
                uids_opens = na.count(),
                opens = na.sum('opens'),
                opens_with_ticker = na.sum('opens_with_ticker')
            )

        p3 = p1.join(p2, by=("ui", "page"), type='full') \
            .join(search_morda, by=("ui", "page"), type='full') \
            .project(ne.all(), fielddate = ne.const(strdate).add_hints(type=str)) \
            .put('//home/lego/statistics/MMA-1923/ticker/joined_rep') \
            .publish(report, allow_change_job=True)

    return job


if __name__ == '__main__':
    cli.run()

