#!/usr/bin/env python
# -*- coding: utf-8 -*-

from nile.api.v1 import (
    Record,
    files,
    clusters,
    cli,
    files as nfl,
    with_hints,
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns #obligatory for Statface
)
from qb2.api.v1 import (
    QB2,
    filters as sf,
    resources as sr
)

import os #obligatory for Statface
import sys #obligatory for Statface
import re #obligatory for Statface
import argparse #obligatory for Statface
import getpass #obligatory for Statface
import datetime
import time
import re
import ast
import urllib
import itertools

# https://hitman.yandex-team.ru/projects/edinoe_izbrannoe/EI_Stats_MMA_1923


@with_hints(
    output_schema=dict(
        notif_type=str,
        notif_service=str,
        ui=str,
        fielddate=str,
        path=str,
        uid=str
    )
)

def add_totals(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.notif_type, '_total_'),
                (rec.notif_service, '_total_'),
                (rec.ui, '_total_'),
                (rec.fielddate, ),
                (rec.path, ),
                (rec.uid, )
                ):
            yield Record(
                notif_type=pair[0],
                notif_service=pair[1],
                ui=pair[2],
                fielddate=pair[3],
                path=pair[4],
                uid=pair[5]
                )


@with_hints(
    output_schema=dict(
        page_from=str,
        page_to=str,
        ui=str,
        notif_type=str,
        clong=int,
        overlong=int
    )
)
def add_totals2(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.page_from, '_total_'),
                (rec.page_to, '_total_'),
                (rec.ui, '_total_'),
                (rec.notif_type, '_total_'),
                (rec.clong, ),
                (rec.overlong, )
                ):
            yield Record(
                page_from = pair[0],
                page_to = pair[1],
                ui = pair[2],
                notif_type = pair[3],
                clong = pair[4],
                overlong = pair[5]
                )

def url_to_ui(r):
    url = r.split("?")
    if "/touch" in url[0]:
        ui = "touch"
        if "searchapp" in url[0] or (len(url) > 1 and "mobileapp" in url[1]):
            ui = "app"
    elif "/pad" in url[0] or "/tablet" in url[0]:
        ui = "pad"
    else:
        ui = "desktop"
    return ui

def page_to_service(p):
    try:
        referer_rep = p.replace("https://", "").replace("www.", "").replace("m.yandex.", "yandex.")
        currect_action_sp = referer_rep.split("||", 1)
        host_sp = currect_action_sp[0].split(".")
        if host_sp[0] != "yandex" and len(host_sp) > 1:
            currect_action = host_sp[0]
        else:
            if currect_action_sp[1]=="/":
                currect_action = "morda"
            else:
                currect_action = currect_action_sp[1].split("/")[1]
                if currect_action == "" or currect_action == "global-notifications":
                    currect_action = "unknown"
                elif currect_action in ["yandsearch", "touchsearch"]:
                    currect_action = "search"
                elif currect_action == "gorsel":
                    currect_action = "images"
        return currect_action
    except:
        return "unknown"

def parse_vars_values(vrsr):
    notif_services = []
    notif_types = []
    notif_recordids = []

    if "-notifications" in vrsr:

        vrs = vrsr["-notifications"]
        vrs1 = urllib.unquote(vrs).decode('utf8')
        vrs2 = ast.literal_eval(vrs1)

        for vrs_prs in vrs2:
            notif_type = "unknown"
            if "type" in vrs_prs:
                notif_type = vrs_prs["type"]
            notif_types.append(notif_type)

            notif_service = "unknown"
            if "service" in vrs_prs:
                notif_service = vrs_prs["service"]
            notif_services.append(notif_service)

            recordId = None
            if "recordId" in vrs_prs:
                recordId = vrs_prs["recordId"]
            notif_recordids.append(recordId)

    elif "-service" in vrsr and "-type" in vrsr:
        notif_services.append(urllib.unquote(vrsr["-service"]).decode('utf8'))
        notif_types.append(urllib.unquote(vrsr["-type"]).decode('utf8'))
        notif_recordids.append(None)

    elif "-settingId" in vrsr and "-recordId" in vrsr:
        notif_services.append(vrsr["-recordId"])
        notif_types.append(vrsr["-settingId"])
        notif_recordids.append(None)

    return [notif_services, notif_types, notif_recordids]


@with_hints(
    output_schema=dict(
        uid=str,
        referer=str,
        ui=str,
        path=str,
        notif_type=str,
        notif_service=str,
        recordid=str,
        pid=str
    )
)

def process_redir_data(recs):
    for rec in recs:
        notif_service, notif_type = None, None
        path = rec.normal_path

        try:
            pid = rec.pid
        except:
            pid = ""
        try:
            referer = rec.referer_canonized_vhost + "||" + rec.referer_page
            if pid == "900":
                referer = rec.canonized_vhost + "||" + rec.page
        except:
            referer = ""
        if referer is None:
            referer = ""
        if "hamster" in referer or "priemka" in referer or "clck" in referer or "-test" in referer or "tracker" in referer:
            continue

        if pid == "900":
            try:
                vrsr = rec.parsed_vars
                parsed = parse_vars_values(vrsr)
                notif_data = zip(parsed[0], parsed[1])
                recordids = parsed[2]
                w = -1
                for i in notif_data:
                    w += 1
                    notif_service = i[0]
                    if "tracker" in notif_service:
                        continue
                    elif "global-notifications" in notif_service:
                        notif_service = "unknown"
                    notif_type = i[1].replace("_male","").replace("_female","")
                    yield Record(uid=rec.yandexuid, timestamp=rec.timestamp, ui=url_to_ui(referer), referer=referer,
                         path=path, pid=pid, notif_type=notif_type, notif_service=notif_service, recordid=recordids[w])
            except:
                if path == "notifier.show":
                    yield Record(uid=rec.yandexuid, timestamp=rec.timestamp, ui=url_to_ui(referer), referer=referer,
                         path=path, pid=pid, notif_type='unknown', notif_service='unknown', recordid=None)
                else:
                    pass
        else:
            yield Record(uid=rec.yandexuid, timestamp=rec.timestamp, ui=url_to_ui(referer), referer=referer,
                     path=path, pid=pid, notif_type=notif_type, notif_service=notif_service, recordid=None)


@with_hints(
    files=[nfl.TableFile('$job_root/services', 'services')],
    output_schema=dict(
        uid=str,
        ui=str,
        path=str,
        notif_type=str,
        notif_service=str,
        service=str,
        timestamp=int,
        splitter=str
    )
)
def service_map(recs, **options):
    file_streams = options['file_streams']
    services_list = [x.n_service for x in file_streams['services'] if x.get('n_service')]
    services_list2 = [k for k in services_list if "notifications" not in k and "clck" not in k and "hamster" not in k and "priemka" not in k and "test" not in k and "tracker" not in k]

    for rec in recs:
        for s in services_list2:
            service = rec.service
            if "hamster" in service or "priemka" in service or "clck" in service:# or "global-notifications" in service:
                continue
            yield Record(uid=rec.uid, notif_service=rec.notif_service, notif_type=rec.notif_type, path=rec.path, service=service, timestamp=rec.timestamp, ui=rec.ui, splitter=s)


@with_hints(
    output_schema=dict(
        uid=str,
        ui=str,
        notif_type=str,
        page_from=str,
        page_to=str,
        clong=str,
        overlong=int
    )
)
def myreduce(recs):
    for key, records in recs:
        uid = key.uid
        j = key.splitter

        prev_path, prev_action, prev_ts, notif_service = None, None, None, None

        for rec in records:
            currect_action = rec.service
            if j in currect_action or currect_action in j:
                continue
            current_path = rec.path
            if current_path in ["/notifier/results/mark_read", "notifier.results.mark_read"]:
                continue
            currect_ts = rec.timestamp

            if prev_path is not None:
                if prev_path in ["/notifier/results/click", "notifier.results.click"] and notif_service == j and notif_type is not None:
                    if currect_ts - prev_ts > 120:
                        yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=1, overlong=1)
                    elif currect_ts - prev_ts > 30:
                        yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=1, overlong=0)
                    else:
                        yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=0, overlong=0)

            prev_action = currect_action
            prev_ts = currect_ts
            prev_path = current_path
            notif_service = rec.notif_service
            notif_type = rec.notif_type
            ui = rec.ui

        if prev_path is not None:
            if prev_path in ["/notifier/results/click", "notifier.results.click"] and notif_service == j and notif_type is not None:
                yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=1, overlong=1)


@with_hints(
    output_schema=dict(
        ui=str,
        long_clicks=int,
        shows=int
    )
)
def replace_none(recs):
    for rec in recs:
        try:
            long_clicks = rec.long_clicks
        except:
            long_clicks = 0
        try:
            shows = rec.shows
        except:
            shows = 0
        yield Record(ui=rec.ui, long_clicks=long_clicks, shows=shows)


@with_hints(
    output_schema=dict(
        notif_type=str,
        notif_service=str,
        fielddate=str
    )
)
def add_totals_v3(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.notif_type, '_total_'),
                (rec.notif_service, '_total_'),
                (rec.fielddate, )
                ):
            yield Record(
                notif_type = pair[0],
                notif_service = pair[1],
                fielddate = pair[2]
                )

# https://clubs.at.yandex-team.ru/yt/2642
@cli.statinfra_job

def make_job(job, nirvana, statface_client, options):

    # https://nirvana.yandex-team.ru/flow/e1310632-a4e2-4054-b744-f7a0fda051af/9d3e53fc-1112-49dc-8c61-45d11bc1a35f/graph

    job = job.env(
        yt_spec_defaults=dict(
            pool_trees=["physical"], tentative_pool_trees=["cloud"]
        ),
        templates=dict(
            job_root=nirvana.directories[0],
            tmp_root='//home/images/tmp'
        )
    )

    report = ns.StatfaceReport() \
        .path('Notifier/Counters/NotifierClicksStatsV2') \
        .scale('daily') \
        .client(statface_client)

    report2 = ns.StatfaceReport() \
        .path('Notifier/Counters/NotifierClicksStatsV2Conversion') \
        .scale('daily') \
        .client(statface_client)

    report3 = ns.StatfaceReport() \
        .path('Notifier/Counters/NotificationsTypesConversion') \
        .scale('daily') \
        .client(statface_client)

    report5 = ns.StatfaceReport() \
        .path('Notifier/Counters/NotificationBlockCreated') \
        .scale('daily') \
        .client(statface_client)

    report6 = ns.StatfaceReport() \
        .path('Notifier/Counters/NotifierClicksStatsV2Fixed') \
        .scale('daily') \
        .client(statface_client)

    report7 = ns.StatfaceReport() \
        .path('Notifier/Counters/NotificationsTypesConversionFixed') \
        .scale('daily') \
        .client(statface_client)

    mydates = options.dates

    for strdate in mydates:

        log = job.table('statbox/redir-log/' + strdate)
        log2 = log.qb2(log = 'redir-log',
                      fields=['yandexuid', 'timestamp', 'normal_path', 'pid', 'url', 'referer', 'parsed_vars'],
                      filters = [sf.default_filtering('redir-log'),
                                 sf.defined('normal_path', 'yandexuid', 'referer'),
                                 sf.not_(sf.contains('normal_path', 'tech'))],
                      mode='yamr_lines'
                )
        data = log2.map(process_redir_data)#.put("$job_root/test/process_redir_data")

        log900 = data.filter(nf.equals('pid', '900'))

        clicks_rep = log900.project(ne.all(), fielddate=ne.const(strdate)) \
            .map(add_totals) \
            .groupby('fielddate', 'ui', 'path', 'notif_service', 'notif_type') \
            .aggregate(hits = na.count()) \
            .publish(report, allow_change_job=True)

        click = clicks_rep.filter(nf.equals('path', 'notifier.results.click')).project(ne.all(exclude='hits'), clicks='hits')
        show = clicks_rep.filter(nf.equals('path', 'notifier.results.show')).project(ne.all(exclude='hits'), shows='hits')

        click_show1 = show.join(click, by=('fielddate', 'notif_service', 'notif_type', 'ui')) \
            .project(ne.all(), ctr=ne.custom(lambda a, b: 100.0*a/b, 'clicks', 'shows').add_hints(type=float)) \
            .put("$job_root/test/notification_block_created_CTR0") \
            .publish(report3, allow_change_job=True)

        ## corrected shows count
        not_shows_900, shows_900 = log900.split(sf.defined('recordid'))
        old_records = job.table('$job_root/test/record_id_date')

        filtered_show_records = shows_900.join(old_records, by='recordid', type='left_only')
        filtered_show_records.project('recordid', fielddate=ne.const(strdate)) \
            .unique('recordid') \
            .put('$job_root/test/new_records')

        log900_mod = job.concat(not_shows_900, filtered_show_records)
        clicks_rep2 = log900_mod.project(ne.all(), fielddate=ne.const(strdate)) \
            .map(add_totals) \
            .groupby('fielddate', 'ui', 'path', 'notif_service', 'notif_type') \
            .aggregate(hits = na.count()) \
            .publish(report6, allow_change_job=True)

        click2 = clicks_rep2.filter(nf.equals('path', 'notifier.results.click')).project(ne.all(exclude='hits'), clicks='hits')
        show2 = clicks_rep2.filter(nf.equals('path', 'notifier.results.show')).project(ne.all(exclude='hits'), shows='hits')

        click_show2 = show2.join(click2, by=('fielddate', 'notif_service', 'notif_type', 'ui')) \
            .project(ne.all(), ctr=ne.custom(lambda a, b: 100.0*a/b, 'clicks', 'shows').add_hints(type=float)) \
            .put("$job_root/test/notification_block_created_CTR0_v2") \
            .publish(report7, allow_change_job=True)

    return job


if __name__ == '__main__':
    cli.run()

