#!/usr/bin/env python
# -*- coding: utf-8 -*-

from nile.api.v1 import (
    Record,
    files,
    clusters,
    cli,
    files as nfl,
    with_hints,
    extended_schema,
    multischema,
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns #obligatory for Statface
)
from qb2.api.v1 import (
    QB2,
    filters as sf,
    extractors as se,
    resources as sr
)
from qb2.api.v1.typing import Optional, Json, String

import os #obligatory for Statface
import sys #obligatory for Statface
import re #obligatory for Statface
import argparse #obligatory for Statface
import datetime
import time
import re
import ast
import urllib
import itertools

# https://hitman.yandex-team.ru/projects/edinoe_izbrannoe/EI_Stats_MMA_1923


@with_hints(
    output_schema=dict(
        notif_type=str,
        notif_service=str,
        ui=str,
        fielddate=str,
        path=str,
        uid=str
    )
)

def add_totals(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.notif_type, '_total_'),
                (rec.notif_service, '_total_'),
                (rec.ui, '_total_'),
                (rec.fielddate, ),
                (rec.path, ),
                (rec.uid, )
                (rec.testid, )
                ):
            yield Record(
                notif_type=pair[0],
                notif_service=pair[1],
                ui=pair[2],
                fielddate=pair[3],
                path=pair[4],
                uid=pair[5],
                testid=pair[6]
                )


@with_hints(
    output_schema=dict(
        page_from=str,
        page_to=str,
        ui=str,
        notif_type=str,
        clong=int,
        overlong=int
    )
)
def add_totals2(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.page_from, '_total_'),
                (rec.page_to, '_total_'),
                (rec.ui, '_total_'),
                (rec.notif_type, '_total_'),
                (rec.clong, ),
                (rec.overlong, )
                ):
            yield Record(
                page_from = pair[0],
                page_to = pair[1],
                ui = pair[2],
                notif_type = pair[3],
                clong = pair[4],
                overlong = pair[5]
                )

def url_to_ui(r):
    url = r.split("?")
    if "/touch" in url[0]:
        ui = "touch"
        if "searchapp" in url[0] or (len(url) > 1 and "mobileapp" in url[1]):
            ui = "app"
    elif "/pad" in url[0] or "/tablet" in url[0]:
        ui = "pad"
    else:
        ui = "desktop"
    return ui

def page_to_service(p):
    try:
        referer_rep = p.replace("https://", "").replace("www.", "").replace("m.yandex.", "yandex.")
        currect_action_sp = referer_rep.split("||", 1)
        host_sp = currect_action_sp[0].split(".")
        if host_sp[0] != "yandex" and len(host_sp) > 1:
            currect_action = host_sp[0]
        else:
            if currect_action_sp[1]=="/":
                currect_action = "morda"
            else:
                currect_action = currect_action_sp[1].split("/")[1]
                if currect_action == "instant" or "tv" in currect_action:
                    currect_action = None
                elif currect_action in ["yandsearch", "touchsearch"]:
                    currect_action = "search"
                elif currect_action == "gorsel":
                    currect_action = "images"
                elif currect_action == "themes":
                    currect_action = "morda"
                elif currect_action == "" or currect_action == "d":
                    currect_action = "unknown"
        return currect_action
    except:
        return None


@with_hints(
    output_schema=multischema(
        dict(uid=str, timestamp=int, referer=str, ui=str, pid=str, path=str, notif_type=str, notif_service=str, recordid=str),
        dict(uid=str, timestamp=int, referer=str, ui=str, path=str, notif_type=str, notif_service=str)
    )
)

def process_redir_data4(recs, only900, actions):
    for rec in recs:

        path = rec.normal_path
        uid = rec.yandexuid
        notif_type, notif_service = None, None

        try:
            if rec.pid == "900":
                referer = rec.canonized_vhost + "||" + rec.page
            else:
                referer = rec.referer_canonized_vhost + "||" + rec.referer_page
            if "hamster" in referer or "priemka" in referer or "clck" in referer or "-test" in referer:
                continue
        except:
            referer = ""

        ui = url_to_ui(referer)

        if rec.pid == "900":
            vrsr = None
            if rec.parsed_vars is not None:
                vrsr = dict(rec.parsed_vars)
            if vrsr is not None:
                if "-notifications" in vrsr:
                    vrs = vrsr["-notifications"]
                    vrs1 = urllib.unquote(vrs).decode('utf8')
                    try:
                        vrs2 = ast.literal_eval(vrs1)
                    except:
                        continue

                    for vrs_prs in vrs2:
                        notif_type, notif_service = "unknown", "unknown"
                        recordId = None
                        if "type" in vrs_prs:
                            notif_type = vrs_prs["type"].replace("_male","").replace("_female","")
                        if "service" in vrs_prs:
                            notif_service = vrs_prs["service"]
                        if "recordId" in vrs_prs:
                            recordId = vrs_prs["recordId"]

                        if notif_service == "tracker":
                            continue
                        #elif "global-notifications" in notif_service:
                        #    notif_service = "unknown"

                        only900( Record(uid=uid, timestamp=rec.timestamp, ui=ui, referer=referer, path=path,
                                pid=rec.pid, notif_type=notif_type, notif_service=notif_service, recordid=recordId) )

                else:
                    notif_type, notif_service = "unknown", "unknown"

                    # click
                    if "-service" in vrsr and "-type" in vrsr:
                        notif_service = urllib.unquote(vrsr["-service"]).decode('utf8')
                        notif_type = urllib.unquote(vrsr["-type"]).decode('utf8')

                    # open
                    elif "-settingId" in vrsr and "-recordId" in vrsr:
                        notif_service = vrsr["-recordId"]
                        notif_type = vrsr["-settingId"]

                    notif_type = notif_type.replace("_male","").replace("_female","")

                    if notif_service == "tracker":
                        continue

                    only900( Record(uid=uid, timestamp=rec.timestamp, ui=ui, referer=referer, path=path,
                            pid=rec.pid, notif_type=notif_type, notif_service=notif_service, recordid=None) )

            else:
                only900( Record(uid=uid, timestamp=rec.timestamp, ui=ui, referer=referer, path=path,
                            pid=rec.pid, notif_type='unknown', notif_service='unknown', recordid=None) )

        if path == "notifier.results.show":
            notif_type, notif_service = None, None

        elif "player-events." in path or "videohub" in path or rec.pid=="197":
            referer = "yandex.ru||/video/search"
        elif rec.pid=="40":
            referer = "yandex.ru||/images/search"

        actions( Record(uid=uid, timestamp=rec.timestamp, ui=ui, referer=referer, path=path,
                        notif_service=notif_service, notif_type=notif_type) )


@with_hints(
    files=[nfl.TableFile('$job_root/services', 'services')],
    output_schema=dict(
        uid=str,
        ui=str,
        path=str,
        notif_type=str,
        notif_service=str,
        service=str,
        timestamp=int,
        splitter=str
    )
)
def service_map(recs, **options):
    file_streams = options['file_streams']
    services_list = [x.n_service for x in file_streams['services'] if x.get('n_service')]
    services_list2 = [k for k in services_list if "notifications" not in k and "clck" not in k and "hamster" not in k and "priemka" not in k and "test" not in k and "tracker" not in k and "unknown" not in k]

    for rec in recs:
        for s in services_list2:
            service = rec.service
            if "hamster" in service or "priemka" in service or "clck" in service:# or "global-notifications" in service:
                continue
            yield Record(uid=rec.uid, notif_service=rec.notif_service, notif_type=rec.notif_type, path=rec.path, service=service, timestamp=rec.timestamp, ui=rec.ui, splitter=s)


@with_hints(
    output_schema=dict(
        uid=str,
        ui=str,
        notif_type=str,
        page_from=str,
        page_to=str,
        clong=str,
        overlong=int
    )
)
def myreduce_v2(recs):
    for key, records in recs:

        uid = key.uid
        j = key.splitter

        prev_path, prev_action, prev_ts, notif_service = None, None, None, None

        for rec in records:
            currect_action = rec.service
            if j in currect_action or currect_action in j:
                continue
            current_path = rec.path

            if current_path in ["/notifier/results/mark_read", "notifier.results.mark_read"]:
                continue
            currect_ts = rec.timestamp

            if prev_path is not None:
                if prev_path in ["/notifier/results/click", "notifier.results.click"] and notif_service == j and notif_type is not None:
                    dw = currect_ts - prev_ts
                    if dw > 30*60:
                        pass
                    elif dw > 120:
                        yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=1, overlong=1)
                    elif dw > 30:
                        yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=1, overlong=0)
                    else:
                        yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=0, overlong=0)

            prev_action = currect_action
            prev_ts = currect_ts
            prev_path = current_path
            notif_service = rec.notif_service
            notif_type = rec.notif_type
            ui = rec.ui

        if prev_path in ["/notifier/results/click", "notifier.results.click"] and notif_service == j and notif_type is not None:
            yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=0, overlong=0)


def parse_ticker_events(events):
    try:
        r = ast.literal_eval(urllib.unquote(events))
        if not "type" in r[0]:
            return False
        elif not r[0]["type"] == 'ticker':
            return False
        elif not "notifierTickerValue" in r[0]["data"]:
            return False
        else:
            return True
    except:
        return False


def parse_bao_events(norm_p, events):
    res = None
    try:
        r = ast.literal_eval(urllib.unquote(events))
        if "event" in r[0]:
            res = r[0]["event"]
            if "parent-path" in r[0]:
                if res=="click" and r[0]["parent-path"] == "videohub":
                    res = res + "_videohub"
    except:
        try:
            res = norm_p
        except:
            pass
    return res


@with_hints(
    output_schema=dict(
        uid=str,
        notif_type=str,
        page_from=str,
        page_to=str,
        clong=str,
        overlong=int,
        ticker=int
    )
)
def myreduce_v3(recs):
    for key, records in recs:

        uid = key.uid
        j = key.splitter

        prev_path, prev_action, prev_ts, notif_service = None, None, None, None
        has_ticker, ticker_ts = 0, None

        for rec in records:
            currect_action = rec.service
            if j in currect_action or currect_action in j:
                continue
            current_path = rec.path

            if current_path in ["/notifier/results/mark_read", "notifier.results.mark_read"]:
                continue
            currect_ts = rec.timestamp

            if current_path == "ticker":
                has_ticker = 1
                ticker_ts = currect_ts

            if prev_path is not None:
                if prev_path in ["/notifier/results/click", "notifier.results.click"] and notif_service == j and notif_type is not None:
                    dw = currect_ts - prev_ts

                    if ticker_ts is not None and (currect_ts - ticker_ts) > 30*60:
                        has_ticker = 0

                    if dw > 30*60:
                        pass
                    elif dw > 120:
                        yield Record(uid=uid, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=1, overlong=1, ticker=has_ticker)
                    elif dw > 30:
                        yield Record(uid=uid, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=1, overlong=0, ticker=has_ticker)
                    else:
                        yield Record(uid=uid, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=0, overlong=0, ticker=has_ticker)

            prev_action = currect_action
            prev_ts = currect_ts
            prev_path = current_path
            notif_service = rec.notif_service
            notif_type = rec.notif_type

        if prev_path in ["/notifier/results/click", "notifier.results.click"] and notif_service == j and notif_type is not None:
            if ticker_ts is not None and (currect_ts - ticker_ts) > 30*60:
                has_ticker = 0
            yield Record(uid=uid, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=0, overlong=0, ticker=has_ticker)


@cli.statinfra_job(options=[cli.Option('test_ids', default='?')])

def make_job(job, nirvana, options):
    input_table = nirvana.input_tables[0]
    output_table = nirvana.output_tables[0]

    job = job.env(
        yt_spec_defaults=dict(pool_trees=["physical"], tentative_pool_trees=["cloud"]),
        templates=dict(
            job_root=nirvana.directories[0]
        )
    )

    mydates = options.dates
    strdate = mydates[-1]

    uids_exp = job.table(input_table)
    data_raw = job.table('${global.your_home}/${global.exp}/raw_data')

    standard, ticker = data_raw.split(sf.and_(sf.defined('events'), nf.custom(parse_ticker_events, 'events')))

    s3 = ticker.project("uid").unique("uid").join(uids_exp, by='uid')

#    s3.groupby('testid') \
#        .aggregate(uids=na.count()) \
#        .put('$job_root/ticker/rep')

    s1 = standard.filter(sf.defined('normal_path'))
    s2 = ticker.project(ne.all(exclude='events'), normal_path = ne.const("ticker"))

    log900, data = s1.concat(s2).map(process_redir_data4)

    ops = log900.filter(sf.equals("path", "notifier.show"))
    s3.join(ops, by='uid') \
        .groupby('testid') \
        .aggregate(hits=na.count(),
            uids=na.count_distinct('uid')
        ).put('$job_root/ticker/open_' + strdate + "_rep")

#    log900.put(output_table + '_900')
#    data.put(output_table + '_data')

    log900.filter(sf.defined('notif_service')) \
            .project(n_service = 'notif_service').unique('n_service') \
            .put('$job_root/services')

    # only pid=900: count and CTR
    not_shows_900, shows_900 = log900.split(sf.defined('recordid'))
    old_records = job.table('//home/lego/statistics/MMA-1923/record_id_date') \
        .filter(nf.custom(lambda a: datetime.datetime.strptime(a, '%Y-%m-%d') < datetime.datetime.strptime(strdate, '%Y-%m-%d'), 'fielddate')) \
        .unique('recordid').put(output_table + '_filtered_records')

    filtered_show_records = shows_900.join(old_records, by='recordid', type='left_only')

    log900_mod = job.concat(not_shows_900, filtered_show_records).join(uids_exp, by='uid')

    clicks_rep2 = log900_mod.map(add_totals) \
        .groupby('testid', 'path', 'notif_service') \
        .aggregate(hits = na.count()) \
        .sort('path', 'notif_service', 'testid') \
        .put(output_table + '_rep')
    """
    click2 = clicks_rep2.filter(nf.equals('path', 'notifier.results.click')).project(ne.all(exclude='hits'), clicks='hits')
    show2 = clicks_rep2.filter(nf.equals('path', 'notifier.results.show')).project(ne.all(exclude='hits'), shows='hits')
    click_show2 = show2.join(click2, by=('testid', 'notif_service', 'notif_type')) \
        .project(ne.all(), ctr=ne.custom(lambda a, b: 100.0*a/b, 'clicks', 'shows').add_hints(type=float)) \
        .put(output_table + "_click_show")
    """
    p2 = data.project(ne.all(), service=ne.custom(page_to_service, 'referer').add_hints(type=str))
    p4 = p2.filter(sf.defined('service')).map(service_map) \
        .put(output_table + "_p3") \
        .groupby('uid', 'splitter').sort('timestamp') \
        .reduce(myreduce_v3,
                memory_limit=4000,
                intensity='data'
        ).join(uids_exp, by='uid').put(output_table + "_p4")

    p4.groupby('testid') \
        .aggregate(hits=na.count(),
            hits_long=na.sum('clong'),
            hits_overlong=na.sum('overlong'),
            uids=na.count_distinct('uid')) \
        .put(output_table + "_longclicks_total")

    p4.groupby('testid', 'page_to') \
        .aggregate(hits=na.count(),
            hits_long=na.sum('clong'),
            hits_overlong=na.sum('overlong'),
            uids=na.count_distinct('uid')) \
        .sort('page_to', 'testid') \
        .put(output_table + "_longclicks_service")

    p4.groupby('bucket', 'testid', 'page_to') \
        .aggregate(hits_long=na.sum('clong'), uids=na.count_distinct('uid')) \
        .project(ne.all(), share = ne.custom(lambda x, y: 1.0*x/y, 'hits_long', 'uids')) \
        .groupby('testid', 'page_to') \
        .aggregate(share=na.mean('share')) \
        .sort('page_to', 'testid') \
        .put(output_table + "_longclicks_ctr")


    return job


if __name__ == '__main__':
    cli.run()

