#!/usr/bin/env python
# -*- coding: utf-8 -*-

from nile.api.v1 import (
    Record,
    files,
    clusters,
    cli,
    files as nfl,
    with_hints,
    extended_schema,
    multischema,
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns #obligatory for Statface
)
from qb2.api.v1 import (
    QB2,
    filters as sf,
    resources as sr
)
from qb2.api.v1.typing import Optional, Json, String

import os #obligatory for Statface
import sys #obligatory for Statface
import re #obligatory for Statface
import argparse #obligatory for Statface
import datetime
import time
import re
import ast
import urllib
import itertools

# https://hitman.yandex-team.ru/projects/edinoe_izbrannoe/EI_Stats_MMA_1923


@with_hints(
    output_schema=dict(
        notif_type=str,
        notif_service=str,
        ui=str,
        fielddate=str,
        path=str,
        uid=str
    )
)

def add_totals(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.notif_type, '_total_'),
                (rec.notif_service, '_total_'),
                (rec.ui, '_total_'),
                (rec.fielddate, ),
                (rec.path, ),
                (rec.uid, )
                ):
            yield Record(
                notif_type=pair[0],
                notif_service=pair[1],
                ui=pair[2],
                fielddate=pair[3],
                path=pair[4],
                uid=pair[5]
                )


@with_hints(
    output_schema=dict(
        page_from=str,
        page_to=str,
        ui=str,
        notif_type=str,
        clong=int,
        overlong=int
    )
)
def add_totals2(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.page_from, '_total_'),
                (rec.page_to, '_total_'),
                (rec.ui, '_total_'),
                (rec.notif_type, '_total_'),
                (rec.clong, ),
                (rec.overlong, )
                ):
            yield Record(
                page_from = pair[0],
                page_to = pair[1],
                ui = pair[2],
                notif_type = pair[3],
                clong = pair[4],
                overlong = pair[5]
                )

def url_to_ui(r):
    url = r.split("?")
    if "/touch" in url[0]:
        ui = "touch"
        if "searchapp" in url[0] or (len(url) > 1 and "mobileapp" in url[1]):
            ui = "app"
    elif "/pad" in url[0] or "/tablet" in url[0]:
        ui = "pad"
    else:
        ui = "desktop"
    return ui

def page_to_service(p):
    try:
        referer_rep = p.replace("https://", "").replace("www.", "").replace("m.yandex.", "yandex.")
        currect_action_sp = referer_rep.split("||", 1)
        host_sp = currect_action_sp[0].split(".")
        if host_sp[0] != "yandex" and len(host_sp) > 1:
            currect_action = host_sp[0]
        else:
            if currect_action_sp[1]=="/":
                currect_action = "morda"
            else:
                currect_action = currect_action_sp[1].split("/")[1]
                if currect_action == "instant" or "tv" in currect_action:
                    currect_action = None
                elif currect_action in ["yandsearch", "touchsearch"]:
                    currect_action = "search"
                elif currect_action == "gorsel":
                    currect_action = "images"
                elif currect_action == "themes":
                    currect_action = "morda"
                elif currect_action == "" or currect_action == "d":
                    currect_action = "unknown"
        return currect_action
    except:
        return None


@with_hints(
    output_schema=extended_schema(
        uid=str,
        timestamp=int,
        referer=str,
        ui=str,
        path=str,
        vrsr=Optional[Json],
        pid=str
    )
)

# #    @with_hints(output_schema=extended_schema(a=Json))
def process_redir_data2(recs):
    for rec in recs:

        path = rec.normal_path
        vrsr = ""

        try:
            pid = rec.pid
            if pid == "900" or pid == 900:
                try:
                    vrsr = rec.parsed_vars
                except:
                    pass
        except:
            pid = ""

        try:
            referer = rec.referer_canonized_vhost + "||" + rec.referer_page
            if pid == "900":
                referer = rec.canonized_vhost + "||" + rec.page
        except:
            referer = ""

        if referer is None:
            referer = ""
        elif "hamster" in referer or "priemka" in referer or "clck" in referer or "-test" in referer or "tracker" in referer:
            continue

        if vrsr is None:
            vrsr = ""

        yield Record(uid=rec.yandexuid, timestamp=rec.timestamp, ui=url_to_ui(referer), referer=referer,
                     path=path, pid=pid, vrsr=vrsr)


@with_hints(
    output_schema=dict(
        uid=str,
        timestamp=int,
        referer=str,
        ui=str,
        path=str,
        pid=str,
        notif_type=str,
        notif_service=str,
        recordid=str,
        page=Optional[String]
    )
)

def parse_vars_values2(recs):

    for rec in recs:

        if rec.pid == "900":

            vrsr = rec.vrsr

            if rec.path == "notifier.show":

                yield Record(uid=rec.uid, timestamp=rec.timestamp, ui=rec.ui,
                             referer=rec.referer, path=rec.path, pid=rec.pid,
                             notif_type='unknown', notif_service='unknown', recordid=None,
                             page=page_to_service(rec.referer))

            else:
                if "-notifications" in vrsr:
                    vrs = vrsr["-notifications"]
                    vrs1 = urllib.unquote(vrs).decode('utf8')
                    try:
                        vrs2 = ast.literal_eval(vrs1)
                    except:
                        continue

                    for vrs_prs in vrs2:
                        notif_type, notif_service = "unknown", "unknown"
                        recordId = None

                        if "type" in vrs_prs:
                            notif_type = vrs_prs["type"].replace("_male","").replace("_female","")
                        if "service" in vrs_prs:
                            notif_service = vrs_prs["service"]
                        if "recordId" in vrs_prs:
                            recordId = vrs_prs["recordId"]

                        if notif_service == "tracker":
                            continue
                        #elif "global-notifications" in notif_service:
                        #    notif_service = "unknown"

                        yield Record(uid=rec.uid, timestamp=rec.timestamp, ui=rec.ui,
                                     referer=rec.referer, path=rec.path, pid=rec.pid,
                                     notif_type=notif_type, notif_service=notif_service, recordid=recordId)

                else:
                    notif_type, notif_service = "unknown", "unknown"

                    if "-service" in vrsr and "-type" in vrsr:
                        notif_service = urllib.unquote(vrsr["-service"]).decode('utf8')
                        notif_type = urllib.unquote(vrsr["-type"]).decode('utf8').replace("_male","").replace("_female","")

                    elif "-settingId" in vrsr and "-recordId" in vrsr:
                        notif_service = vrsr["-recordId"]
                        notif_type = vrsr["-settingId"].replace("_male","").replace("_female","")

                    if notif_service == "tracker":
                        continue

                    yield Record(uid=rec.uid, timestamp=rec.timestamp, ui=rec.ui,
                                 referer=rec.referer, path=rec.path, pid=rec.pid,
                                 notif_type=notif_type, notif_service=notif_service, recordid=None)

        else:
            yield Record(uid=rec.uid, timestamp=rec.timestamp, ui=rec.ui,
                         referer=rec.referer, path=rec.path, pid=rec.pid,
                         notif_type=None, notif_service=None, recordid=None)


@with_hints(
    files=[nfl.TableFile('$job_root/services', 'services')],
    output_schema=dict(
        uid=str,
        ui=str,
        path=str,
        notif_type=str,
        notif_service=str,
        service=str,
        timestamp=int,
        splitter=str
    )
)
def service_map(recs, **options):
    file_streams = options['file_streams']
    services_list = [x.n_service for x in file_streams['services'] if x.get('n_service')]
    services_list2 = [k for k in services_list if "notifications" not in k and "clck" not in k and "hamster" not in k and "priemka" not in k and "test" not in k and "tracker" not in k]

    for rec in recs:
        for s in services_list2:
            service = rec.service
            if "hamster" in service or "priemka" in service or "clck" in service:# or "global-notifications" in service:
                continue
            yield Record(uid=rec.uid, notif_service=rec.notif_service, notif_type=rec.notif_type, path=rec.path, service=service, timestamp=rec.timestamp, ui=rec.ui, splitter=s)


@with_hints(
    output_schema=dict(
        uid=str,
        ui=str,
        notif_type=str,
        page_from=str,
        page_to=str,
        clong=str,
        overlong=int
    )
)
def myreduce(recs):
    for key, records in recs:
        uid = key.uid
        j = key.splitter

        prev_path, prev_action, prev_ts, notif_service = None, None, None, None

        for rec in records:
            currect_action = rec.service
            if j in currect_action or currect_action in j:
                continue
            current_path = rec.path
            if current_path in ["/notifier/results/mark_read", "notifier.results.mark_read"]:
                continue
            currect_ts = rec.timestamp

            if prev_path is not None:
                if prev_path in ["/notifier/results/click", "notifier.results.click"] and notif_service == j and notif_type is not None:
                    if currect_ts - prev_ts > 120:
                        yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=1, overlong=1)
                    elif currect_ts - prev_ts > 30:
                        yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=1, overlong=0)
                    else:
                        yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=0, overlong=0)

            prev_action = currect_action
            prev_ts = currect_ts
            prev_path = current_path
            notif_service = rec.notif_service
            notif_type = rec.notif_type
            ui = rec.ui

        if prev_path is not None:
            if prev_path in ["/notifier/results/click", "notifier.results.click"] and notif_service == j and notif_type is not None:
                yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=1, overlong=1)


@with_hints(
    output_schema=dict(
        ui=str,
        long_clicks=int,
        shows=int
    )
)
def replace_none(recs):
    for rec in recs:
        try:
            long_clicks = rec.long_clicks
        except:
            long_clicks = 0
        try:
            shows = rec.shows
        except:
            shows = 0
        yield Record(ui=rec.ui, long_clicks=long_clicks, shows=shows)


@with_hints(output_schema=dict(path=str, page=str, baobab=bool))

def myreduce_us_bao(sessions):

    import libra
    import yandex_baobab_api as baobab

    for key, records in sessions:
        uid = key.key

        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except Exception as e:
            continue

        for r in session:

            #if not r.IsA('TYandexWebRequest') and not r.IsA('TPadYandexWebRequest') and not r.IsA('TTouchYandexWebRequest') and not r.IsA('TMobileAppYandexWebRequest'):
            if not r.IsA('TWebRequestProperties'):
                continue

            req = r.FullRequest
            page = req.split("?")[0]

            for click in r.GetClicks():
                path = click.ConvertedPath
                if path == "/head/notifier":
                    yield Record(path=path, page=page, baobab=False)

            if not r.IsA('TBaobabProperties'):
                continue

            joiner = r.BaobabTree()
            if not joiner:
                continue
            show = joiner.get_show()
            if not show:
                continue

            root = show.tree.root
            for block in baobab.common.tree.bfs_iterator(root):
                if "notifier" in block.name:
                    batr = block.attrs
                    yield Record(path=batr["oldPath"], page=page, baobab=True)


# https://clubs.at.yandex-team.ru/yt/2642
@cli.statinfra_job

def make_job(job, nirvana, statface_client, options):

    job = job.env(
        yt_spec_defaults=dict(pool_trees=["physical"], tentative_pool_trees=["cloud"]),
        templates=dict(
            job_root=nirvana.directories[0],
            tmp_root='//home/lego/tmp'
        )
    )

    mydates = options.dates

    for strdate in mydates:

        log = job.table('//user_sessions/pub/search/daily/' + strdate + '/clean')

        log.groupby('key').sort('subkey') \
            .reduce(myreduce_us_bao,
                files=[files.RemoteFile('statbox/statbox-dict-last/blockstat.dict'),
                files.RemoteFile('statbox/resources/libra.so'),
                files.RemoteFile('statbox/resources/yandex_baobab_api')],
                memory_limit=4000
                ) \
            .put('$job_root/serp_data') \
            .groupby('path', 'page', 'baobab') \
            .aggregate(count = na.count()) \
            .put('$job_root/serp_data_rep')

    return job

if __name__ == '__main__':
    cli.run()

