#!/usr/bin/env python
# -*- coding: utf-8 -*-

from nile.api.v1 import (
    Record,
    files,
    clusters,
    cli,
    files as nfl,
    with_hints,
    extended_schema,
    multischema,
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns #obligatory for Statface
)
from qb2.api.v1 import (
    QB2,
    filters as sf,
    extractors as se,
    resources as sr
)
from qb2.api.v1.typing import Optional, Json, String

import os #obligatory for Statface
import sys #obligatory for Statface
import re #obligatory for Statface
import argparse #obligatory for Statface
import datetime
import time
import re
import ast
import urllib
import itertools

# https://hitman.yandex-team.ru/projects/edinoe_izbrannoe/EI_Stats_MMA_1923


@with_hints(
    output_schema=dict(
        notif_type=str,
        notif_service=str,
        ui=str,
        fielddate=str,
        path=str,
        uid=str
    )
)

def add_totals(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.notif_type, '_total_'),
                (rec.notif_service, '_total_'),
                (rec.ui, '_total_'),
                (rec.fielddate, ),
                (rec.path, ),
                (rec.uid, )
                ):
            yield Record(
                notif_type=pair[0],
                notif_service=pair[1],
                ui=pair[2],
                fielddate=pair[3],
                path=pair[4],
                uid=pair[5]
                )

@with_hints(
    output_schema=dict(
        notif_type=str,
        notif_service=str,
        ui=str,
        fielddate=str,
        path=str,
        uid=str
    )
)

def add_totals1(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.notif_type, '_total_'),
                (rec.notif_service, '_total_'),
                (rec.ui, '_total_'),
                (rec.page, '_total_'),
                (rec.fielddate, ),
                (rec.path, ),
                (rec.uid, )
                ):
            yield Record(
                notif_type=pair[0],
                notif_service=pair[1],
                ui=pair[2],
                page=pair[3],
                fielddate=pair[4],
                path=pair[5],
                uid=pair[6]
                )

@with_hints(
    output_schema=dict(
        page_from=str,
        page_to=str,
        ui=str,
        notif_type=str,
        clong=int,
        overlong=int
    )
)
def add_totals2(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.page_from, '_total_'),
                (rec.page_to, '_total_'),
                (rec.ui, '_total_'),
                (rec.notif_type, '_total_'),
                (rec.clong, ),
                (rec.overlong, )
                ):
            yield Record(
                page_from = pair[0],
                page_to = pair[1],
                ui = pair[2],
                notif_type = pair[3],
                clong = pair[4],
                overlong = pair[5]
                )

@with_hints(
    output_schema=dict(
        page_from=str,
        page_to=str,
        ui=str,
        notif_type=str,
        hits=int,
        hits_long=int
    )
)
def add_totals3(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.page_from, '_total_'),
                (rec.page_to, '_total_'),
                (rec.ui, '_total_'),
                (rec.notif_type, '_total_'),
                (rec.hits, ),
                (rec.hits_long, )
                ):
            yield Record(
                page_from = pair[0],
                page_to = pair[1],
                ui = pair[2],
                notif_type = pair[3],
                hits = pair[4],
                hits_long = pair[5]
                )

def url_to_ui(r):
    url = r.split("?")
    if "/pad" in url[0] or "/tablet" in url[0]:
        ui = "pad"
    elif "/touch" in url[0]:
        ui = "touch"
        if "searchapp" in url[0] or (len(url) > 1 and "mobileapp" in url[1]):
            ui = "app"
    elif "m." in url[0].split("/")[0]:
        ui = "touch"
    else:
        ui = "desktop"
    return ui

def page_to_service(p):
    try:
        if p == "morda_informer":
            return p
        referer_rep = p.replace("https://", "").replace("www.", "").replace("m.yandex.", "yandex.").replace("m.zen.", "zen.")
        currect_action_sp = referer_rep.split("||", 1)
        host_sp = currect_action_sp[0].split(".")
        if host_sp[0] != "yandex" and len(host_sp) > 1:
            currect_action = host_sp[0]
        else:
            if currect_action_sp[1]=="/":
                currect_action = "morda"
            else:
                currect_action = currect_action_sp[1].split("/")[1]
                if currect_action == "instant" or "tv" in currect_action:
                    currect_action = None
                elif currect_action in ["yandsearch", "touchsearch", "people"]:
                    currect_action = "search"
                elif currect_action == "gorsel":
                    currect_action = "images"
                elif currect_action == "themes" or currect_action == "m":
                    currect_action = "morda"
                elif currect_action == "user":
                    currect_action = "ugcpub"
#                elif currect_action == "ugcpub" or currect_action == "user":
#                    currect_action = "ugc"
#                elif currect_action == "local":
#                    currect_action = "district"
#                elif currect_action == "global-notifications":
#                    currect_action = "zen"
                elif currect_action == "st":
                    currect_action = "tracker"
                elif currect_action == "" or currect_action == "d":
                    currect_action = "unknown"
        return currect_action
    except:
        return None


@with_hints(
    output_schema=multischema(
        dict(uid=str, timestamp=int, referer=str, ui=str, pid=str, path=str, notif_type=str, notif_service=str, recordId=str, ugc_id=str),
        dict(uid=str, timestamp=int, referer=str, ui=str, path=str)
    )
)
def process_redir_data3(recs, only900, actions):
    for rec in recs:

        path = rec.normal_path
        uid = rec.yandexuid
        notif_type, notif_service = None, None

        try:
            referer = rec.referer_canonized_vhost + "||" + rec.referer_page
            if rec.pid == "900":
                referer_page_900 = referer
                referer = rec.canonized_vhost + "||" + rec.page
            if "hamster" in referer or "priemka" in referer or "clck" in referer or "-test" in referer or "staff" in referer or "captcha" in referer:
                continue
            elif "freeze/browser" in rec.url or "from=browser_android_ntp" in rec.url:
                referer = "yandex.ru||/browser/"
        except:
            referer = ""
            referer_page_900 = referer

        ui = url_to_ui(referer)
        try:
            if "browser_android_ntp" in rec.url:
                ui = "touch"
        except:
            pass

        if rec.pid == "900":
            vrsr = rec.parsed_vars
            if vrsr is not None:
                if "-notifications" in vrsr:
                    vrs = vrsr["-notifications"]
                    vrs1 = urllib.unquote(vrs).decode('utf8')
                    try:
                        vrs2 = ast.literal_eval(vrs1)
                    except:
                        continue

                    for vrs_prs in vrs2:
                        notif_type, notif_service = "unknown", "unknown"
                        recordId = None
                        ugc_id = None
                        if "type" in vrs_prs:
                            notif_type = vrs_prs["type"].replace("_male","").replace("_female","")
                        if "service" in vrs_prs:
                            notif_service = vrs_prs["service"]
                        if "recordId" in vrs_prs:
                            recordId = vrs_prs["recordId"]
                        if "ugc_id" in vrs_prs:
                            ugc_id = vrs_prs["ugc_id"]

                        #if notif_service == "tracker":
                        #    continue

                        #elif "global-notifications" in notif_service:
                        #    notif_service = "unknown"

                        only900( Record(uid=uid, timestamp=rec.timestamp, ui=ui, referer=referer, path=path,
                                pid=rec.pid, notif_type=notif_type, notif_service=notif_service, recordid=recordId, ugc_id=ugc_id,
                                referer_page_900 = referer_page_900) )

                else:
                    notif_type, notif_service = "unknown", "unknown"
                    recordid = None
                    ugc_id = None

                    # click
                    if "-service" in vrsr and "-type" in vrsr:
                        notif_service = urllib.unquote(vrsr["-service"]).decode('utf8')
                        notif_type = urllib.unquote(vrsr["-type"]).decode('utf8')
                        if "-record_id" in vrsr:
                            recordid = urllib.unquote(vrsr["-record_id"]).decode('utf8')
                        if "-ugc_id" in vrsr:
                            ugc_id = urllib.unquote(vrsr["-ugc_id"]).decode('utf8')

                    # open
                    elif "-settingId" in vrsr and "-recordId" in vrsr:
                        notif_service = vrsr["-recordId"]
                        notif_type = vrsr["-settingId"]

                    notif_type = notif_type.replace("_male","").replace("_female","")

                    #if notif_service == "tracker":
                    #    continue

                    only900( Record(uid=uid, timestamp=rec.timestamp, ui=ui, referer=referer, path=path,
                            pid=rec.pid, notif_type=notif_type, notif_service=notif_service, recordid=recordid, ugc_id=ugc_id,
                            referer_page_900 = referer_page_900) )

            else:
                only900( Record(uid=uid, timestamp=rec.timestamp, ui=ui, referer=referer, path=path,
                            pid=rec.pid, notif_type='unknown', notif_service='unknown', recordid=None, ugc_id=None,
                            referer_page_900 = referer_page_900) )

        if path == "notifier.results.show":
            notif_type, notif_service = None, None

        elif "player-events." in path or "videohub" in path or rec.pid=="197":
            referer = "yandex.ru||/video/search"
        elif rec.pid=="40":
            referer = "yandex.ru||/images/search"
        elif rec.pid=="14":
            referer = "yandex.ru||/news/"

        actions( Record(uid=uid, timestamp=rec.timestamp, ui=ui, referer=referer, path=path,
                        notif_service=notif_service, notif_type=notif_type) )


def morda_informer(path_raw):

    if path_raw["event"] != "click":
        return None

    if path_raw["parent-path"] == "v14" and path_raw["blocks"][0]["ctag"] == "notifications" and \
                path_raw["blocks"][0]["children"][0]["ctag"] == "social-alert" and \
                path_raw["blocks"][0]["children"][0]["children"][0]["ctag"] == "bell":
        notif_info = path_raw["blocks"][0]["children"][0]["children"][0]["children"][0]["ctag"]
        device = "desktop"

    elif path_raw["parent-path"] == "geotouch" and path_raw["blocks"][0]["ctag"] == "bell":
        notif_info = path_raw["blocks"][0]["children"][0]["ctag"]
        device = "touch"

    else:
        return None

    if "_" in notif_info:
        notif_service = notif_info.split("_", 1)[0]
        notif_type = notif_info.split("_", 1)[1]
        return device, notif_service, notif_type
    else:
        return None


@with_hints(
    output_schema=multischema(
        dict(uid=str, timestamp=int, referer=str, ui=str, pid=str, path=str, notif_type=str, notif_service=str, recordId=str, ugc_id=str),
        dict(uid=str, timestamp=int, referer=str, ui=str, path=str)
    )
)
def process_redir_events(recs, only900, actions):
    for rec in recs:

        uid = rec.yandexuid
        notif_type, notif_service = None, None
        path, path_raw = None, None

        try:
            referer = rec.referer_canonized_vhost + "||" + rec.referer_page
        except:
            continue
            #referer = "yandex.ru" + "||" + "/"

        ui = url_to_ui(referer)

        try:
            path_raw = ast.literal_eval(urllib.unquote(rec.events).decode('utf8'))[0]

            if "event" in path_raw:
                path = path_raw["event"]
            else:
                continue

            if path == "tech":
                continue

            morda_informer_click = morda_informer(path_raw)
            if morda_informer_click is not None:
                path, referer = "notifier.results.click", "morda_informer"
                ui = morda_informer_click[0]
                notif_service = morda_informer_click[1]
                notif_type = morda_informer_click[2].replace("_male","").replace("_female","")

                only900( Record(uid=uid, timestamp=rec.timestamp, ui=ui, referer=referer,
                        path=path, pid="900",
                        notif_type=notif_type, notif_service=notif_service, recordid=None, ugc_id=None) )

            actions( Record(uid=uid, timestamp=rec.timestamp, ui=ui, referer=referer, path=path,
                            notif_service=notif_service, notif_type=notif_type) )
        except:
            continue


@with_hints(
    files=[nfl.TableFile('$job_root/services', 'services')],
    output_schema=dict(
        uid=str,
        ui=str,
        path=str,
        notif_type=str,
        notif_service=str,
        service=str,
        timestamp=int,
        splitter=str
    )
)
def service_map(recs, **options):
    file_streams = options['file_streams']
    services_list = [x.n_service for x in file_streams['services'] if x.get('n_service')]
    services_list2 = [k for k in services_list if "notifications" not in k and "clck" not in k and "hamster" not in k and "priemka" not in k and "test" not in k and "unknown" not in k and "staff" not in k]

    for rec in recs:
        for s in services_list2:
            service = rec.service
            if "hamster" in service or "priemka" in service or "clck" in service:# or "global-notifications" in service:
                continue
            yield Record(uid=rec.uid, notif_service=rec.notif_service, notif_type=rec.notif_type, path=rec.path, service=service, timestamp=rec.timestamp, ui=rec.ui, splitter=s)


@with_hints(
    output_schema=dict(
        uid=str,
        ui=str,
        notif_type=str,
        page_from=str,
        page_to=str,
        clong=str,
        overlong=int,
        last=bool
    )
)
def myreduce(recs):
    for key, records in recs:
        uid = key.uid
        j = key.splitter

        prev_path, prev_action, prev_ts, notif_service = None, None, None, None

        for rec in records:
            currect_action = rec.service
            if j in currect_action or currect_action in j:
                continue
            current_path = rec.path
            if current_path in ["notifier.results.mark_read", "notifier.results.kebab.read"]:
                continue
            currect_ts = rec.timestamp

            if prev_path is not None:
                if prev_path in ["notifier.results.click"] and notif_service == j and notif_type is not None:
                    dw = currect_ts - prev_ts
                    last = False
                    if dw > 30*60:
                        last = True
                    if dw > 120:
                        yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=1, overlong=1, last=last)
                    elif dw > 30:
                        yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=1, overlong=0, last=last)
                    else:
                        yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=0, overlong=0, last=last)

            prev_action = currect_action
            prev_ts = currect_ts
            prev_path = current_path
            notif_service = rec.notif_service
            notif_type = rec.notif_type
            ui = rec.ui

        if prev_path is not None:
            if prev_path in ["notifier.results.click"] and notif_service == j and notif_type is not None:
                yield Record(uid=uid, ui=ui, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=1, overlong=1, last=True)


@with_hints(
    output_schema=multischema(
        dict(uid=str, timestamp=int, referer=str, ui=str, pid=str, path=str, notif_type=str, notif_service=str),
        dict(uid=str, timestamp=int, referer=str, ui=str, path=str)
    )
)
def myreduce_v5(recs):
    for key, records in recs:

        uid = key.uid
        prev_path, prev_action, prev_ts, notif_service = None, None, None, None
        has_ticker, ticker_ts = 0, None
        start_exploring, service_exploring, from_notif_type = None, None, None

        for rec in records:

            current_path = rec.path
            if current_path in ["notifier.results.mark_read", "notifier.results.kebab.read"]:
                continue

            currect_action = rec.service
            currect_ts = rec.timestamp

            if current_path == "ticker":
                has_ticker = 1
                ticker_ts = currect_ts

            if prev_path is not None:
                if prev_path in ["notifier.results.click"] and notif_service is not None and notif_type is not None:
                    page_from = prev_action
                    yield Record(ui=rec.ui, uid=uid, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=0, ts=prev_ts)
                    if currect_action == notif_service:
                        start_exploring = prev_ts
                        service_exploring = notif_service
                        from_notif_type = notif_type
                    else:
                        dw = currect_ts - prev_ts
#                        if ticker_ts is not None and (currect_ts - ticker_ts) > 30*60:
#                            has_ticker = 0
                        if dw > 30*60:
                            pass
                        elif dw > 30:
                            yield Record(ui=rec.ui, uid=uid, notif_type=notif_type, page_from=prev_action, page_to=notif_service, clong=1, ts=prev_ts)
                        start_exploring, service_exploring, from_notif_type = None, None, None

            if start_exploring is not None and service_exploring is not None:
                # находится на service_exploring
                if currect_action == service_exploring:

                    # очень долгое отсутствие, 30 мин - новая сессия
                    if currect_ts - start_exploring > 30*60:
                        start_exploring, service_exploring, from_notif_type = None, None, None

                    # находится на service_exploring больше 30 сек - учитываем только один раз (момент превышения 30 сек-порога)
                    elif currect_ts - start_exploring > 30:
                        yield Record(ui=rec.ui, uid=uid, notif_type=from_notif_type, page_from=page_from, page_to=service_exploring, clong=1, ts=start_exploring)
                        start_exploring, service_exploring, from_notif_type = None, None, None

                else:
                    if currect_ts - start_exploring > 30:
                        yield Record(ui=rec.ui, uid=uid, notif_type=notif_type, page_from=page_from, page_to=service_exploring, clong=1, ts=start_exploring)
                    start_exploring, service_exploring, from_notif_type = None, None, None

            prev_action = currect_action
            prev_ts = currect_ts
            prev_path_ = prev_path
            prev_path = current_path
            notif_service = rec.notif_service
            notif_type = rec.notif_type


def parse_ticker_events(events):
    try:
        r = ast.literal_eval(urllib.unquote(events))
        if not "type" in r[0]:
            return False
        elif not r[0]["type"] == 'ticker':
            return False
        elif not "notifierTickerValue" in r[0]["data"]:
            return False
        else:
            return True
    except:
        return False


# https://clubs.at.yandex-team.ru/yt/2642
@cli.statinfra_job

def make_job(job, nirvana, statface_client, options):

    job = job.env(
        yt_spec_defaults=dict(pool_trees=["physical"], tentative_pool_trees=["cloud"]),
        templates=dict(
            job_root=nirvana.directories[0]
        )
    )
    # tmp_root='//home/lego/tmp'

    report2 = ns.StatfaceReport() \
        .path('Notifier/Counters/NotifierClicksStatsV2Conversion') \
        .scale('daily') \
        .client(statface_client)

    report1 = ns.StatfaceReport() \
        .path('Notifier/Counters/NotifierClicksStatsV2Conversion_v2_tmp2') \
        .scale('daily') \
        .client(statface_client)

    report6 = ns.StatfaceReport() \
        .path('Notifier/Counters/NotifierClicksStatsV2Fixed') \
        .scale('daily') \
        .client(statface_client)

    report6b = ns.StatfaceReport() \
        .path('Notifier/Counters/NotifierClicksStatsV3') \
        .scale('daily') \
        .client(statface_client)

    mydates = options.dates

    for strdate in mydates:

        try:
            redir_table = nirvana.input_tables[0]
        except:
            redir_table = 'statbox/redir-log/' + strdate

        log = job.table(redir_table)

        redir_common_data = log.qb2(log = 'redir-log',
                fields=['yandexuid', 'timestamp', 'normal_path', 'pid', 'url', 'parsed_vars', 'referer',
                        'page', 'canonized_vhost', 'referer_page', 'referer_canonized_vhost',
                        se.log_field('events')],
                filters = [sf.default_filtering('redir-log'),
                        sf.defined('yandexuid', 'referer'),
                        sf.or_(sf.defined('normal_path'), sf.defined('events')),
                        sf.not_(sf.contains('normal_path', 'tech.'))],
                mode='yamr_lines') \
            .put('//tmp/rkam/data_qb2_' + strdate)

        coll_data = job.table('//home/logfeller/logs/collections-redir-log/1d/' + strdate) \
            .qb2(log = 'redir-log',
                fields=['yandexuid', 'timestamp', 'normal_path', 'pid', 'url', 'parsed_vars',
                        'page', 'canonized_vhost', 'referer_page', 'referer_canonized_vhost',
                        se.log_field('events')],
                filters = [sf.default_filtering('redir-log'),
                        sf.defined('yandexuid', 'referer'), sf.not_(sf.contains('normal_path', 'tech.'))],
                mode='yamr_lines')

        answ_data = job.table('//home/logfeller/logs/answ-redir-log/1d/' + strdate) \
            .project("yandexuid", "timestamp", normal_path="path", pid=ne.const("znatoki"),
                page="url", canonized_vhost=ne.const(""),
                referer_page="/znatoki", referer_canonized_vhost=ne.const("yandex.ru")
            )

        data_raw_extra = coll_data.concat(answ_data)
        data_raw = redir_common_data.concat(data_raw_extra)

        s1, s2 = data_raw.filter(sf.and_(sf.defined('events'),
                      nf.custom(parse_ticker_events, 'events'))) \
            .project(ne.all(), normal_path = ne.const("ticker")) \
            .map(process_redir_data3)

        s2.project("uid", "referer", "ui").put('$job_root/ticker/' + strdate)

        data_raw_events, data_raw_path = data_raw.split(sf.defined("normal_path"))

        log900_path, data_path = data_raw_path.map(process_redir_data3)
        log900_events, data_events = data_raw_events.map(process_redir_events)

        log900 = job.concat(log900_path, log900_events)
        data = job.concat(data_path, data_events)


        # only pid=900: count and CTR
        not_shows_900, shows_900 = log900.split(
            sf.and_(
                sf.equals('path','notifier.results.show'),
                sf.defined('recordid')
                )
            )
        old_records = job.table('$job_root/record_id_date') \
            .filter(nf.custom(lambda a: datetime.datetime.strptime(a, '%Y-%m-%d') < datetime.datetime.strptime(strdate, '%Y-%m-%d'), 'fielddate')) \
            .unique('recordid')

        filtered_show_records = shows_900.join(old_records, by='recordid', type='left_only')

        ###
        # (2) collect data for shows-history dataset and (1) save shows data for others
        filtered_show_records \
            .project('uid', 'path', 'notif_service', 'notif_type', 'timestamp', 'recordid') \
            .put('$job_root/new_shows_' + strdate,
                schema=dict(recordid=str, notif_service=str, notif_type=str, timestamp=int, path=str, uid=str)) \
            .project('recordid', 'notif_service', 'notif_type', fielddate=ne.const(strdate)) \
            .unique('recordid') \
            .put('$job_root/new_records', schema=dict(recordid=str, notif_service=str, notif_type=str, fielddate=str))


        log900_mod = job.concat(not_shows_900, filtered_show_records) \
            .project(ne.all(), fielddate=ne.const(strdate), page=ne.custom(lambda x: page_to_service(x), "referer"))\
            .put("$job_root/clicks_" + strdate + "_res")

        log900_mod.map(add_totals) \
            .groupby('fielddate', 'ui', 'path', 'notif_service', 'notif_type') \
            .aggregate(hits = na.count(), uids = na.count_distinct("uid")) \
            .publish(report6, allow_change_job=True)

        log900_mod.filter(sf.defined("page")).map(add_totals1) \
            .groupby('fielddate', 'ui', 'path', 'page', 'notif_service', 'notif_type') \
            .aggregate(hits = na.count(), uids = na.count_distinct("uid")) \
            .publish(report6b, allow_change_job=True)

        log900.filter(sf.defined('notif_service')) \
            .project(n_service = 'notif_service').unique('n_service') \
            .put('$job_root/services')

        log900.filter(nf.equals('path', 'notifier.show')) \
            .project(ne.all(), service=ne.custom(page_to_service, 'referer').add_hints(type=str)) \
            .put('$job_root/opens/' + strdate)

        notif_uids, ticker_data = log900.split(sf.equals("path", "notifier.ticker.show"))

        notif_uids = notif_uids.project('ui', 'uid').unique('ui', 'uid') \
            .put('$job_root/notif_uids_' + strdate)
        ticker_data.project(ne.all(), service=ne.custom(page_to_service, 'referer').add_hints(type=str)) \
            .put('$job_root/ticker/' + strdate + '_v2')

        target_line = job.table("$job_root/target_line")

        p1 = data.project(ne.all(exclude='ui')).join(notif_uids, by='uid')
        p2 = p1.project(ne.all(), service=ne.custom(page_to_service, 'referer').add_hints(type=str)) \
            .put("$job_root/user_actions_" + strdate)

        # calc clicks' dwelltimes
        p4 = p2.filter(sf.defined('service')).map(service_map) \
            .groupby('uid', 'splitter').sort('timestamp') \
            .reduce(
                myreduce,
                memory_limit=4000,
                intensity='data'
            ).put("$job_root/user_actions_" + strdate + "_res")

        longclicks = p4.map(add_totals2) \
            .groupby('ui', 'page_from', 'page_to', 'notif_type') \
            .aggregate(
                hits=na.count(),
                hits_long=na.sum('clong'),
                hits_overlong=na.sum('overlong')
            ) \
            .filter(sf.defined('page_from', 'page_to', 'notif_type')) \
            .sort('ui', 'page_from', 'page_to', 'notif_type')

        longclicks.concat(target_line) \
            .project(ne.all(), fielddate=ne.const(strdate)) \
            .publish(report2, allow_change_job=True)

        # second way of long clicks calculation
        p5 = p2.filter(sf.defined('service')) \
            .filter(sf.custom(lambda x: "hamster" not in x and "priemka" not in x and "clck" not in x and "global-notifications" not in x and "test" not in x and "staff" not in x, "service")) \
            .groupby('uid').sort('timestamp') \
            .reduce(
                myreduce_v5,
                memory_limit=4000,
                intensity='data'
            ).put("$job_root/user_actions_v5_" + strdate)

        w = p4.groupby('ui', 'page_from', 'page_to', 'notif_type', 'clong').aggregate(hits=na.count())

        w0 = w.filter(sf.equals("clong", 0)) \
            .project(ne.all(exclude="clong"), hits_long = ne.const(0))
        w1 = w.filter(sf.equals("clong", 1)) \
            .project(ne.all(exclude=("clong", "hits")), hits_long = "hits")

        w0.join(w1, by=('ui', 'page_from', 'page_to', 'notif_type'), type='left') \
            .map(add_totals3) \
            .groupby('ui', 'page_from', 'page_to', 'notif_type') \
            .aggregate(hits=na.sum('hits'), hits_long=na.sum('hits_long')) \
            .project(ne.all(), fielddate=ne.const(strdate), mode=ne.const("_total_")) \
            .sort('fielddate', 'ui', 'page_from', 'page_to', 'notif_type') \
            .put("$job_root/v2_rep2") \
            .publish(report1, allow_change_job=True)

    return job


if __name__ == '__main__':
    cli.run()

