#!/usr/bin/env python
# -*- coding: utf-8 -*-

from nile.api.v1 import (
    Record,
    files,
    clusters,
    cli,
    with_hints,
    extended_schema,
    multischema,
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns #obligatory for Statface
)
from qb2.api.v1 import (
    QB2,
    resources as sr
)

import os #obligatory for Statface
import sys #obligatory for Statface
import re #obligatory for Statface
import argparse #obligatory for Statface
import getpass #obligatory for Statface
import datetime
import time
import re

import uatraits


q1 = ["finish.like", "finish.subscribe"]
q2 = ["finish.question.submit", "finish.comment.submit", "share", "finish.experts.call"]
q3 = ["finish.answer.submit"]

@with_hints(
    output_schema=dict(
        actiond=str,
        actiontype=str,
        device=str,
        fielddate=str
    )
)
def myMap_answ(recs):
    for rec in recs:
        path = rec.path
        if path not in q1 + q2 + q3:
            continue
        if path in q1:
            action_type = 1
        elif path in q2:
            if path == "finish.subscribe":
                if rec.subscribed == "true":
                    action_type = 2
            else:
                action_type = 2
        elif path in q3:
            action_type = 3
        else:
            continue

        try:
            if "tolokaTaskId" in rec.HTTP_REFERER:
                continue
            else:
                pass
        except:
            pass

        try:
            ui=rec.ui
            if rec.ui=="mobile":
                ui="touch"
        except:
            ui = "desktop"

        date = rec.iso_eventtime
        yield Record(actiond=path, device=ui, actiontype=str(action_type), fielddate=date[:10])


@with_hints(
    output_schema=dict(
        service=str,
        device=str,
        fielddate=str,
        uid=str
    )
)
def myMap_passport(recs):
    detector = uatraits.detector('/usr/share/uatraits/browser.xml')
    for rec in recs:
        retpath3 = None
        device = "desktop"
        user_agent = rec.user_agent
        if user_agent is not None:
            d = detector.detect(user_agent)
            isTouch = d.get('isTouch')
            if isTouch:
                device = "touch"
        retpath = rec.retpath
        if retpath is not None:
            retpath2 = retpath.replace('https://', '', 1).replace('http://', '', 1)
            retpath3 = retpath2
            if "/" in retpath2:
                retpath3 = retpath2.split('/')[1]
                if "?" in retpath3:
                    retpath3 = retpath3.split('?')[0]
        origin = rec.origin
        service = None
        if origin is None and retpath3 is None:
            continue
        elif retpath3 is not None:
            if "ugcpub" in retpath3:
                service = "ugc"
        elif origin is not None:
            if "collections" in origin:
                service = "collections"
            elif "znatoki" in origin:
                service = "answers"
            elif "ugc" in origin:
                service = "ugc"

        if service is not None:
            yield Record(device=device, service=service, uid=rec.uid, fielddate=rec.date)


@with_hints(
    output_schema=dict(
        actiond=str,
        actiontype=str,
        device=str,
        fielddate=str
    )
)
def myMap_sprav(recs):
    for rec in recs:
        fielddate = rec.fielddate
        if datetime.datetime.strptime(fielddate, '%Y-%m-%d').date() < datetime.datetime.strptime('2018-04-09', '%Y-%m-%d').date():
            continue

        device = "desktop"
        if "MOBILE_" in rec.app_type:
            device = "yandexApp"
        actiontype = "3"
        actiond = "review"
        yield Record(actiond=actiond, actiontype=actiontype, device=device, fielddate=fielddate)


@with_hints(
    output_schema=multischema(
        dict(actiond=str, actiontype=str, device=str, service=str, fielddate=str),
        dict(actiond=str, actiontype=str, device=str, service=str, fielddate=str)
    )
)
def myMap_ugc(recs, ugc, entity_search):
    for rec in recs:
        fielddate = rec.fielddate
        if datetime.datetime.strptime(fielddate, '%Y-%m-%d').date() < datetime.datetime.strptime('2018-04-09', '%Y-%m-%d').date():
            continue

        action = rec.action_type
        app_id = rec.app_id
        if action in ["rating", "answer", "sbs", "film_sbs", "photo_sbs_answer"]:
            action_type = 2
        elif action in ["review", "photo"]:
            action_type = 3
        else:
            action_type = 1

        try:
            object_id = rec.object_id
        except:
            object_id = None

        if object_id is not None and not "sprav/" in object_id and not "market/" in object_id:
            if action_type!='sbs':
                entity_search( Record(actiontype=str(action_type), actiond=action, service=app_id, device='desktop', fielddate=fielddate) )

        ugc( Record(actiontype=str(action_type), actiond=action, service=app_id, device='desktop', fielddate=fielddate) )


def card_status(w):
    if w is True:
        r = '2'
    else:
        r = '3'
    return r


@with_hints(output_schema=extended_schema())
def filter_dates(recs):
    for rec in recs:
        fielddate = datetime.datetime.strptime(rec.fielddate, '%Y-%m-%d').date()
        base = datetime.datetime.strptime(rec.d, '%Y-%m-%d').date()

        end_point = base + datetime.timedelta(days=1)
        start_point = end_point - datetime.timedelta(days=30)

        if fielddate < start_point or fielddate >= end_point:
            continue
        yield rec


@cli.statinfra_job

def make_job(job, nirvana, statface_client, options):

    job = job.env(
        yt_spec_defaults=dict(
            pool_trees=["physical"],
            tentative_pool_trees=["cloud"]
        ),
        templates=dict(
            job_root=nirvana.directories[0],
            tmp_root='//home/images/tmp'
        )
    )

    mydates = options.dates
    strdate = mydates[-1]

    answ_log = job.table('//home/images/tmp/rkam/MMA-1607/data/answ-redir-log/1d/@dates')
    answ_rep = answ_log.map(myMap_answ) \
        .groupby('actiontype', 'actiond', 'device', 'fielddate') \
        .aggregate(count = na.count()) \
        .project(ne.all(), service = ne.const('answers'))

    web_auth = job.table('//home/passport/production/successful-web-authentications/@dates')
    web_auth_rep = web_auth.map(myMap_passport) \
        .groupby('device', 'service', 'fielddate') \
        .aggregate(count = na.count_distinct('uid')) \
        .project(ne.all(), actiontype=ne.const("4"), actiond=ne.const("login"))

    for d in mydates:
        answ_rep.filter(nf.equals('fielddate', d)).put('$job_root/answers/' + d)
        web_auth_rep.filter(nf.equals('fielddate', d)).put('$job_root/passport/' + d)

    ugcbd = job.table('//home/robot-ugc/export/global_ugc/user_action') \
        .project(ne.all(), d = ne.const(str(strdate)),
            fielddate = ne.custom(lambda x: datetime.datetime.fromtimestamp(int(str(x)[:-3])).strftime('%Y-%m-%d'), 'timestamp').add_hints(type=str)) \
        .map(filter_dates)

    ugc, entity_search = ugcbd.map(myMap_ugc)
    ugcbd_rep = ugc.groupby('actiontype', 'actiond', 'service', 'device', 'fielddate') \
        .aggregate(count = na.count()) \
        .put('$job_root/ugc/report_other_30d')
    entity_rep = entity_search.groupby('actiontype', 'actiond', 'service', 'device', 'fielddate') \
        .aggregate(count = na.count()) \
        .put('$job_root/ugc/entity_30d')

    sprav_feedback = job.table('//home/images/tmp/rkam/MMA-1597/sprav/parsed')
    sprav_feedback_rep = sprav_feedback.project(ne.all(), d=ne.const(strdate), fielddate='date_received') \
        .map(filter_dates) \
        .map(myMap_sprav) \
        .groupby('actiontype', 'actiond', 'device', 'fielddate') \
        .aggregate(count = na.count()) \
        .project(ne.all(), service = ne.const('sprav')).put('$job_root/sprav/report_30d')

    return job


if __name__ == '__main__':
    cli.run()

