#!/usr/bin/env python
# -*- coding: utf-8 -*-

from nile.api.v1 import (
    Record,
    files,
    clusters,
    cli,
    files as nfl,
    with_hints,
    extended_schema,
    multischema,
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns #obligatory for Statface
)
from qb2.api.v1 import (
    QB2,
    filters as sf,
    extractors as se,
    resources as sr
)
from qb2.api.v1.typing import Optional, Json, String

import os #obligatory for Statface
import sys #obligatory for Statface
import re #obligatory for Statface
import argparse #obligatory for Statface
# import datetime
# import time
import re
import ast
import urllib
import itertools
import json
from datetime import datetime


safe_list = ["MORDA_CARD_CONTENT_HORIZONTAL_SCROLL_EVENT",
             "MOBILE_UI_TOUCH_EVENT",
             "MOBILE_URL_OPENED_EVENT",
             "MOBILE_MORDA_VERTICAL_SWITCH_EVENT",
             "SEARCH_VERTICAL_SWITCH_EVENT",
             "MORDA_CONTENT_SCROLLED_EVENT",
             "APP_LAUNCH_STATUS_EVENT"]

def process_key(r, dt):
    dt2 = dict(dt)
    try:
        return dt2[r]
    except:
        return ""

def process_key(r, dt):
    if r in dt.keys():
        return dt[r]
    else:
        return ""


@with_hints(
    output_schema=dict(
        UUID=str,
        time=str,
        scarabOriginalTimestamp=int,
        scarab_type=str,
        method=str,
        state=str,
        actionType=str,
        scope=str,
        screen=str,
        uiUsed=str,
        extIdentifier=str,
        toVertical=str,
        cardType=str,
        cardName=str,
        scrollable=str,
        isFirstTime=str,
        sessionId=str,
        height=str,
        pageId=str,
        tabId=str,
        url=str,
        list_=str,
        eventNumber=str,
        value=Optional[Json],
        appliedTestIds=str
    )
)

def parse(recs):
    for rec in recs:
        uu = rec.key
        UUID = uu.replace('uu/', '')
        try:
            #d_raw = ast.literal_eval(rec.value)
            d_raw = json.loads(rec.value)
        except:
            continue

        try:
            extIdentifier, isFirstTime, sessionId, eventNumber, info, additionalInfo = "", "", "", "", "", ""
            d = d_raw.copy()

            scarab_type = d['scarab:type']

            d.pop('scarab:format', None)
            d.pop('type', None)
            d.pop('provider', None)
            d.pop('application', None)
            d.pop('sender', None)

            ts = int(str(d['timestamp'])[:10]) + 3*60*60
            timestamp = datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
            #timestamp = datetime.datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')

            method = process_key('method', d)
            actionType = process_key('actionType', d)
            state = process_key('state', d)
            scope = process_key('scope', d)
            screen = process_key('screen', d)

            uiUsed = process_key('uiUsed', d)
            cardType = process_key('cardType', d)
            cardName = process_key('cardName', d)
            scrollable = process_key('scrollable', d)

            toVertical = process_key('toVertical', d)
            height = process_key('height', d)
            list_ = process_key('list', d)
            pageId = process_key('pageId', d)
            tabId = process_key('tabId', d)
            url = process_key('url', d)

            additionalInfo = process_key('additionalInfo', d)

            if "info" in d.keys():
                try:
                    info = json.loads(d['info'])
                    if "extIdentifier" in info:
                        extIdentifier = info['extIdentifier']
                    if "isFirstTime" in info:
                        isFirstTime = info['isFirstTime']
                except:
                    toVertical = d['info']

            if "sessionId" in d['metrika'].keys():
                sessionId = d['metrika']['sessionId']
            if "eventNumber" in d['metrika']:
                eventNumber = d['metrika']['eventNumber']
            if "toVertical" in d:
                if "extIdentifier" in d['toVertical']:
                    extIdentifier = d['toVertical']['extIdentifier']

            yield Record(UUID=UUID, time=timestamp, scarabOriginalTimestamp=d['scarabOriginalTimestamp'],
                    scarab_type=scarab_type, sessionId=sessionId, method=method,
                    state=state, url=url, scope=scope, screen=screen, uiUsed=uiUsed,
                    toVertical=toVertical, pageId=pageId, tabId=tabId, eventNumber=eventNumber,
                    additionalInfo=additionalInfo)
        except:
            continue


def page_to_service(p):
    try:
        currect_action_sp = p.replace("https://", "").replace("www.", "").replace("m.yandex.", "yandex.").split("/", 1)
        host_sp = currect_action_sp[0].split(".")

        if host_sp[0] != "yandex" and len(host_sp) > 1:
            currect_action = host_sp[0]
        else:
            if currect_action_sp[1]=="/":
                currect_action = "morda"
            else:
                currect_action = currect_action_sp[1].split("/")[0]
                if currect_action == "instant" or "tv" in currect_action:
                    currect_action = None
                elif currect_action in ["yandsearch", "touchsearch"]:
                    currect_action = "search"
                elif currect_action == "gorsel":
                    currect_action = "images"
                elif currect_action == "themes":
                    currect_action = "morda"
#                elif currect_action == "global-notifications":
#                    currect_action = "zen"
                elif currect_action == "portal" and "video" in currect_action_sp[1].split("/")[1]:
                    currect_action = currect_action + "_" + currect_action_sp[1].split("/")[1]
                elif currect_action == "m":
                    currect_action = "morda"
                elif currect_action == "st":
                    currect_action = "tracker"
                elif currect_action == "" or currect_action == "d":
                    currect_action = "unknown"

        #if not host_sp[0] == "yandex" and not host_sp[1] == "yandex":
        if "yandex" not in p:
            currect_action = "not_yandex_portal"
        return currect_action

    except:
        return "unknown"


@with_hints(
    files=[nfl.TableFile('//home/lego/statistics/MMA-1923/services', 'services')],
    output_schema=dict(uid=str,
                       clicks=int,
                       opens=int,
                       informer_bell=int,
                       menu_notifications=int))

def myReduce(recs, **options):
    file_streams = options['file_streams']
    services_list = [x.n_service for x in file_streams['services'] if x.get('n_service')]
    services_list2 = [k for k in services_list if "notifications" not in k and "clck" not in k and "hamster" not in k and "priemka" not in k and "test" not in k and "unknown" not in k]

    for key, records in recs:
        uid = key.UUID
        opens = []
        clicks = []
        informer_bell = []
        menu_notifications = []
        urls = []
        prev_ts, prev_action, prev_url = None, None, None

        for rec in records:
            action = rec.scarab_type
            current_ts = int(rec.scarabOriginalTimestamp)
            url = None

            if action == "MOBILE_UI_TOUCH_EVENT":
                if "bender_informer_bell" in rec.uiUsed:
                    #opens += 1
                    uiUsed = rec.uiUsed
                    informer_bell.append(uiUsed.split("/")[0])
                elif "menu_notifications" in rec.uiUsed:
                    menu_notifications.append(rec.uiUsed)

            elif action == "MOBILE_URL_OPENED_EVENT":
                url = rec.url
                if "global-notifications" in url:
                    opens.append(url)
                elif prev_url is not None and "global-notifications" in prev_url and len(opens) > 0:
                    if url not in urls:
                        urls.append(url)
                        service = page_to_service(url)
                        if service in services_list2:
                            clicks.append(service)
            else:
                continue
            prev_ts = current_ts
            prev_action = action
            prev_url = url

        if len(informer_bell) > 0 or len(menu_notifications) > 0:
            yield Record(uid=uid, clicks=len(clicks), opens=len(opens),
                         informer_bell=len(informer_bell),
                         menu_notifications=len(menu_notifications))


@with_hints(
    files=[nfl.TableFile('//home/lego/statistics/MMA-1923/services', 'services')],
    output_schema=dict(uid=str,
                       clicks=int,
                       long_clicks=int,
                       long_clicks_=str,
                       opens=int,
                       informer_bell=int,
                       menu_notifications=int)
                   )
def myReduce2(recs, **options):
    file_streams = options['file_streams']
    services_list = [x.n_service for x in file_streams['services'] if x.get('n_service')]
    services_list2 = [k for k in services_list if "notifications" not in k and "clck" not in k and "hamster" not in k and "priemka" not in k and "test" not in k and "unknown" not in k]

    for key, records in recs:
        uid = key.UUID
        opens = []
        clicks = []
        long_clicks = []
        urls = []

        informer_bell = []
        menu_notifications = []

        prev_ts, prev_action, prev_url = None, None, None
        start_ts, notif_service = None, None

        for rec in records:
            action = rec.scarab_type
            current_ts = round(rec.scarabOriginalTimestamp / 1000.0, 0)
            url = None

            """
            if action == "MOBILE_UI_TOUCH_EVENT":
                if "bender_informer_bell" in rec.uiUsed:
                    #opens += 1
                    uiUsed = rec.uiUsed
                    informer_bell.append(uiUsed.split("/")[0])
                elif "menu_notifications" in rec.uiUsed:
                    menu_notifications.append(rec.uiUsed)
            """

            if action == "MOBILE_URL_OPENED_EVENT":
                url = rec.url
                service = page_to_service(url)
                if "global-notifications" in url:
                    opens.append(url)
                elif prev_url is not None and "global-notifications" in prev_url and len(opens) > 0:
                    # if url not in urls:
                    urls.append(url)
                    if service in services_list2:
                        clicks.append(service)
                        start_ts = current_ts
                        notif_service = service
                elif service != notif_service:
                    if start_ts is not None and notif_service is not None:
                        if current_ts - start_ts > 30:
                            #long_clicks.append(notif_service)
                            long_clicks.append([start_ts, current_ts])
                            start_ts, notif_service = None, None

            else:
                if start_ts is not None and notif_service is not None:
                    if current_ts - start_ts > 30:
                        #long_clicks.append(notif_service)
                        long_clicks.append([start_ts, current_ts])
                        start_ts, notif_service = None, None

            prev_ts = current_ts
            prev_action = action
            prev_url = url

        if len(opens) > 0:
            yield Record(uid=uid, clicks=len(clicks), opens=len(opens), long_clicks=len(long_clicks),
                     long_clicks_=long_clicks, informer_bell=len(informer_bell),
                     menu_notifications=len(menu_notifications))


# https://nirvana.yandex-team.ru/flow/8d2bdd96-57f9-435b-b8c6-c065fb6a6893/06eada43-0225-43f9-9215-61fd80976d50/graph


# https://clubs.at.yandex-team.ru/yt/2642
@cli.statinfra_job

def make_job(job, nirvana, statface_client, options):

    job = job.env(
        yt_spec_defaults=dict(pool_trees=["physical"], tentative_pool_trees=["cloud"]),
        templates=dict(
            job_root=nirvana.directories[0]
        )
    )

    report = ns.StatfaceReport() \
        .path('Notifier/Counters/AppNotifierClicksStats') \
        .scale('daily') \
        .client(statface_client)

    mydates = options.dates

    for strdate in mydates:

        try:
            table = nirvana.input_tables[0]
        except:
            table = '//user_sessions/pub/mobile_app_log/daily/'+strdate+'/clean'

        log = job.table(table)

        sessions_app_parsed = log.map(parse)

        sessions_app_parsed.filter(sf.custom(lambda x: x in safe_list, 'scarab_type')) \
            .unique('UUID', 'time', 'scarab_type', 'uiUsed', 'url') \
            .groupby('UUID').sort('time') \
            .reduce(myReduce2) \
            .sort('uid') \
            .put('$job_root/sessions_app_reduce') \
            .aggregate(clicks=na.sum('clicks'),
                long_clicks=na.sum('long_clicks'),
                opens=na.sum('opens'),
                uids=na.count()
            ) \
            .put('$job_root/sessions_app_reduce_rep') \
            .project(ne.all(), fielddate = ne.const(strdate),
                page_from = ne.const("_total_"),
                page_to = ne.const("_total_")) \
            .publish(report, allow_change_job=True)

        sessions_app_parsed.filter(sf.custom(lambda x: x in ["MOBILE_UI_TOUCH_EVENT", "MOBILE_URL_OPENED_EVENT"], 'scarab_type')) \
            .unique('UUID', 'time', 'scarab_type', 'uiUsed', 'url') \
            .groupby('UUID').sort('time') \
            .reduce(myReduce) \
            .sort('uid') \
            .put('$job_root/sessions_app_reduce_v0') \
            .aggregate(clicks=na.sum('clicks'),
                long_clicks=na.sum('long_clicks'),
                opens=na.sum('opens'),
                uids=na.count()
            ) \
            .put('$job_root/sessions_app_reduce_rep_v0')

    return job


if __name__ == '__main__':
    cli.run()

