#!/usr/bin/env python
# coding=utf-8

import datetime
import json
import os

from nile.api.v1 import (
    with_hints,
    multischema,
    extended_schema,
    filters as nf,
    extractors as ne,
    Record,
    cli
)

from qb2.api.v1 import (
    extractors as qe,
    filters as qf,
    typing as qt
)

from pytz import timezone

REDIR_LOG_SCHEMA = dict(
    puId=str,
    path=str,
    timestamp=str,
    userLogin=qt.Optional[qt.String],
    boardId=qt.Optional[qt.String],
    cardId=qt.Optional[qt.String],
    url=qt.Optional[qt.String],
)


def get_ts_from_id(id_str):
    return int(id_str[0:8], 16)


def date_from_object_id(id_str):
    return datetime.datetime.fromtimestamp(get_ts_from_id(id_str))


@with_hints(output_schema=REDIR_LOG_SCHEMA)
def prep_log_mapper(recs):
    for rec in recs:

        if rec.get("puId", "") == "":
            continue

        yield rec


def prep_log_data(job, options_json, calc_date):
    return job.table(
        "{}/{}".format(options_json["redir_tables_path"], calc_date)) \
        .qb2(
        log="redir-log",
        fields=[
            "path",
            qe.log_field("uid").rename('puId'),
            qe.log_field("boardId"),
            qe.log_field("cardId"),
            qe.log_field("timestamp"),
            qe.log_field("url")
        ],
        filters=[
            qf.defined("path"),
            qf.defined("timestamp"),
            qf.defined("puId"),
            qf.one_of("path", {
                "click.social.share",
                "finish.board.unlock",
                "click.contest.like",
                "click.contest.dislike"
            }),
        ],
        mode="yamr_lines"
    ) \
        .map(prep_log_mapper)


@with_hints(output_schema=multischema(
    dict(
        user_id=str,
        puid=qt.Optional[qt.String],
        user_login=qt.Optional[qt.String],
        is_toloker=bool
    )
))
def prep_user_mapper(recs):
    for rec in recs:
        yield Record(
            user_id=rec.id,
            puid=rec.document.get("uid", {}).get("_$numberLong"),
            user_login=rec.document.get("login"),
            is_toloker=rec.document.get("role", "") == "redactor" or rec.get("is_toloker")
        )


@with_hints(output_schema=dict(
        user_id=str,
        owner_puid=qt.Optional[qt.String],
        board_id=str,
        created_at_date=str,
        created_at_ts=str,
        is_board_owner_toloker=bool
    )
)
def prep_board_mapper(recs):
    for rec in recs:
        if rec.document.get("service", {}).get("created_at", {}).get("_$date") is None:
            continue
        created_at_dt = datetime.datetime.strptime(rec.document["service"]["created_at"]["_$date"],
                                                   "%Y-%m-%dT%H:%M:%S.%fZ") \
            .replace(tzinfo=timezone("UTC")) \
            .astimezone(timezone("Europe/Moscow"))
        created_at_ts = created_at_dt \
            .strftime("%s")
        created_at_date = created_at_dt.strftime("%Y-%m-%d")

        yield Record(
            user_id=rec.owner,
            owner_puid=rec.puid,
            board_id=rec.id,
            created_at_date=created_at_date,
            created_at_ts=created_at_ts,
            is_board_owner_toloker=rec.is_toloker
        )


@with_hints(output_schema=dict(
    user_puid=str,
    owner_puid=str,
    created_at_date=str,
    created_at_ts=str,
    like_id=str,
    card_id=str,
    action_type=str,
    card_type=qt.Optional[qt.String],
    origin_client_name=qt.Optional[qt.String],
    origin_action=qt.Optional[qt.String],
    like_owner_is_toloker=bool,
    is_card_owner_toloker=bool
))
def prep_likes_mapper(recs):
    for rec in recs:
        created_at_dt = date_from_object_id(rec.id)
        created_at_ts = created_at_dt.strftime("%s")
        created_at_date = created_at_dt.strftime("%Y-%m-%d")

        yield Record(
            user_puid=rec.puid,
            owner_puid=rec.puid,
            created_at_date=created_at_date,
            created_at_ts=created_at_ts,
            like_id=rec.id,
            card_id=rec.card_id,
            action_type="like card",
            card_type=rec.card_type,
            origin_client_name=rec.origin_client_name,
            origin_action=rec.origin_action,
            like_owner_is_toloker=rec.is_toloker,
            is_card_owner_toloker=rec.is_card_owner_toloker
        )

        if rec.card_owner_puid and rec.card_owner_puid != rec.puid:
            # для действий с несколькими пользователями генерим запись для каждого пользователя
            yield Record(
                user_puid=rec.card_owner_puid,
                owner_puid=rec.puid,
                created_at_date=created_at_date,
                created_at_ts=created_at_ts,
                like_id=rec.id,
                card_id=rec.card_id,
                action_type="like card",
                card_type=rec.card_type,
                origin_client_name=rec.origin_client_name,
                origin_action=rec.origin_action,
                like_owner_is_toloker=rec.is_toloker,
                is_card_owner_toloker=rec.is_card_owner_toloker
            )


@with_hints(output_schema=dict(
    user_puid=str,
    owner_puid=str,
    created_at_ts=str,
    created_at_date=str,
    action_type=str,
    action_target=qt.Yson,
    action_owner_is_toloker=bool
))
def prep_subscription_mapper(recs):
    SUBSCRIPTIONS_TYPES = {
        "Subscription.BoardSubscription": "board",
        "Subscription.ChannelSubscription": "channel",
        "Subscription.CompilationSubscription": "channel",
        "Subscription.UserSubscription": "user",
    }

    for rec in recs:
        if rec.document.get("due_to_user") == True:
            continue
        created_at_dt = date_from_object_id(rec.id)
        created_at_ts = created_at_dt.strftime("%s")
        created_at_date = created_at_dt.strftime("%Y-%m-%d")
        subscription_type = SUBSCRIPTIONS_TYPES.get(rec._cls, "empty")

        yield Record(
            user_puid=rec.puid,
            owner_puid=rec.puid,
            created_at_ts=created_at_ts,
            created_at_date=created_at_date,
            action_type="subscribe to {}".format(subscription_type),
            action_target={
                "subscription_id": rec.id,
                "subscription_type": subscription_type,
                "target": rec.target,
                "from_toloka": True if rec.get("is_user_toloker") or rec.get("is_board_owner_toloker") else False
            },
            action_owner_is_toloker=True if rec.is_toloker else False
        )
        if subscription_type == "user" and rec.get("target_puid") and rec.puid != rec.get("target_puid"):
            yield Record(
                user_puid=rec.target_puid,
                owner_puid=rec.puid,
                created_at_ts=created_at_ts,
                created_at_date=created_at_date,
                action_type="subscribe to {}".format(subscription_type),
                action_target={
                    "subscription_id": rec.id,
                    "subscription_type": subscription_type,
                    "target": rec.target,
                    "from_toloka": True if rec.get("is_user_toloker") or rec.get("is_board_owner_toloker") else False
                },
                action_owner_is_toloker=True if rec.is_toloker else False
            )
        if subscription_type == "board" and rec.get("board_user_puid") and rec.puid != rec.get("board_user_puid"):
            yield Record(
                user_puid=rec.board_user_puid,
                owner_puid=rec.puid,
                created_at_ts=created_at_ts,
                created_at_date=created_at_date,
                action_type="subscribe to {}".format(subscription_type),
                action_target={
                    "subscription_id": rec.id,
                    "subscription_type": subscription_type,
                    "target": rec.target,
                    "from_toloka": True if rec.get("is_user_toloker") or rec.get("is_board_owner_toloker") else False
                },
                action_owner_is_toloker=True if rec.is_toloker else False
            )


@with_hints(output_schema=extended_schema())
def filter_subscriptions_reducer(sessions):
    for key, records in sessions:
        for r in records:
            # в отсортированных записях берём подписки на борды до тех пор, пока не подписались на конкретного юзера
            # TODO: сделать не строгий порядок, а окном в 1 час
            if r._cls == "Subscription.UserSubscription":
                yield r
                break
            yield r


@with_hints(output_schema=dict(
    user_id=qt.Optional[qt.String],
    owner_puid=qt.Optional[qt.String],
    created_at_date=str,
    created_at_ts=str,
    card_id=str,
    card_board=qt.Optional[qt.String],
    card_type=qt.Optional[qt.String],
    origin_client_name=qt.Optional[qt.String],
    origin_action=qt.Optional[qt.String],
    is_card_share=bool,
    is_card_owner_toloker=bool
))
def prep_card_mapper(recs):
    for rec in recs:
        if rec.document.get("service", {}).get("created_at", {}).get("_$date") is None:
            continue
        elif "browser_bookmark_id" in rec.document:
            continue

        created_at_dt = datetime.datetime.strptime(rec.document["service"]["created_at"]["_$date"],
                                                   "%Y-%m-%dT%H:%M:%S.%fZ") \
            .replace(tzinfo=timezone("UTC")) \
            .astimezone(timezone("Europe/Moscow"))
        created_at_ts = created_at_dt \
            .strftime("%s")
        created_at_date = created_at_dt.strftime("%Y-%m-%d")

        is_card_share = False
        if rec.document.get("origin", {}).get("client", {}).get("name", "") == "collections" and \
                rec.document.get("origin", {}).get("action", "") == "share":
            is_card_share = True

        card_type = rec.document.get("content", [{}])[0].get("source_type")
        origin_client_name = rec.document.get("origin", {}).get("client", {}).get("name")

        yield Record(
            user_id=rec.owner,
            owner_puid=rec.get("puid"),
            card_id=rec.id,
            card_board=rec.get("board"),
            created_at_date=created_at_date,
            created_at_ts=created_at_ts,
            card_type=card_type,
            origin_client_name=origin_client_name,
            origin_action=rec.document.get("origin", {}).get("action", ""),
            is_card_share=is_card_share,
            is_card_owner_toloker=True if rec.get("is_toloker") else False
        )


@with_hints(output_schema={
    "id": str,
    "document": qt.Yson,
    "owner": qt.Optional[qt.String],
})
def format_trashcan_boards(records):
    for r in records:
        document = r.entity
        if document.get("service", {}).get("created_at", {}).get("_$date"):
            ts = document["service"]["created_at"]["_$date"]
            document["service"]["created_at"]["_$date"] = datetime.datetime.fromtimestamp(ts/1000.).strftime("%Y-%m-%dT%H:%M:%S.%fZ")

        yield Record(
            id=r.id,
            document=document,
            owner=r.entity.get("owner", {}).get("_$oid")
        )


def get_boards_from_trashcan(job, table_path):
    log = job.table(table_path)

    boards = log.filter(
        nf.equals("entity_type", "board"),
    )

    return boards.map(format_trashcan_boards)


@with_hints(output_schema={
    "id": str,
    "document": qt.Yson,
    "owner": qt.Optional[qt.String],
    "board": qt.Optional[qt.String],
})
def format_trashcan_cards(records):
    for r in records:
        document = r.entity
        if document.get("service", {}).get("created_at", {}).get("_$date"):
            ts = document["service"]["created_at"]["_$date"]
            document["service"]["created_at"]["_$date"] = datetime.datetime.fromtimestamp(ts/1000.).strftime("%Y-%m-%dT%H:%M:%S.%fZ")

        yield Record(
            id=r.id,
            document=document,
            owner=document.get("owner", {}).get("_$oid"),
            board=document.get("board", {}).get("_$oid")
        )


def get_cards_from_trashcan(job, table_path):
    log = job.table(table_path)

    cards = log.filter(nf.equals("entity_type", "card"))

    return cards.map(format_trashcan_cards)


def prep_db_data(job, options_json):
    if "trashcan_path" in options_json:
        boards_log = job.concat(
            get_boards_from_trashcan(job, options_json["trashcan_path"]),
            job.table(options_json["collections_db_path"] + "/board")
        )
    else:
        boards_log = job.table(options_json["collections_db_path"] + "/board")

    tolokers = boards_log.filter(nf.custom(lambda x: x.get("moder") is not None, "document")) \
        .project(id="owner", is_toloker=ne.const(True)) \
        .unique("id")

    user_table = job.table(options_json["collections_db_path"] + "/user") \
        .join(tolokers, by="id", type="left") \
        .map(prep_user_mapper)

    board_table = boards_log \
        .join(user_table, by_left="owner", by_right="user_id", type="inner") \
        .map(prep_board_mapper)

    if "trashcan_path" in options_json:
        cards_log = job.concat(
            get_cards_from_trashcan(job, options_json["trashcan_path"]),
            job.table(options_json["collections_db_path"] + "/card")
        )
    else:
        cards_log = job.table(options_json["collections_db_path"] + "/card")

    card_table = cards_log \
        .join(user_table, by_left="owner", by_right="user_id", type="left") \
        .map(prep_card_mapper)

    like_table = job.table(options_json["collections_db_path"] + "/card_like") \
        .join(user_table, by_left="owner", by_right="user_id", type="inner") \
        .join(
            card_table.project("card_id", "card_type", "origin_client_name", "origin_action", "is_card_owner_toloker", card_owner="user_id", card_owner_puid="owner_puid"),
            by_left="card", by_right="card_id", type="inner"
        ) \
        .map(prep_likes_mapper)

    board_table_for_subscriptions = board_table \
        .project(
            "board_id",
            "is_board_owner_toloker",
            _cls=ne.const("Subscription.BoardSubscription"),
            board_user_id="user_id",
            board_user_puid="owner_puid"
        )
    user_table_for_subscriptions = user_table \
        .project(
            is_user_toloker="is_toloker",
            _cls=ne.const("Subscription.UserSubscription"),
            target_user_id="user_id",
            target_puid="puid",
        )
    subscriptions_table = job.table(options_json["collections_db_path"] + "/subscription") \
        .join(user_table, by_left="owner", by_right="user_id", type="inner") \
        .join(board_table_for_subscriptions, by_left=["_cls", "target"], by_right=["_cls", "board_id"], type="left") \
        .join(user_table_for_subscriptions, by_left=["_cls", "target"], by_right=["_cls", "target_user_id"], type="left") \
        .project(
            ne.all(),
            owner_id=ne.custom(lambda x, y: x or y, "board_user_id", "target_user_id").with_type(str),
            timestamp=ne.custom(lambda x: get_ts_from_id(x), "id").with_type(int),
        ) \
        .groupby("owner", "owner_id") \
        .sort("timestamp") \
        .reduce(filter_subscriptions_reducer) \
        .sort("id") \
        .map(prep_subscription_mapper)

    return user_table, board_table, card_table, like_table, subscriptions_table


@with_hints(output_schema=dict(
    puid=str,
    ts=str,
    action_owner_puid=str,
    action_owner_is_toloker=bool,
    action_type=str,
    service_type=qt.Optional[qt.String],
    action_target=qt.Yson,
))
def log_events_mapper(recs):
    ACTIONS_MAP = {
        "click.social.share": "share to social",
        "finish.board.unlock": "unlock board",
        "click.contest.like": "contest vote",
        "click.contest.dislike": "contest vote",
    }

    for rec in recs:
        action_target = {}
        if rec.get("path", "") == "finish.board.unlock" and rec.get("boardId", "") != "":
            action_target = {
                "board_id": rec["boardId"],
                "from_toloka": True if rec.get("from_toloka") else False
            }
        elif rec.get("path", "") == "click.social.share" and rec.get("url", "") != "":
            if rec.get("cardId"):
                action_target = {
                    "card_id": rec.get("card_id"),
                    "card_type": rec.get("card_type"),
                    "origin_client_name": rec.get("origin_client_name"),
                    "origin_action": rec.get("origin_action"),
                    "from_toloka": True if rec.get("is_card_owner_toloker") else False
                }
            elif rec.get("boardId"):
                action_target = {
                    "board_id": rec.get("board_id"),
                    "from_toloka": True if rec.get("is_board_owner_toloker") else False
                }
            else:
                action_target = {
                    "url": rec.get("url")
                }
        else:
            pass

        main_outrec = Record(
            puid=rec.get("puId", ""),
            ts=rec.get("timestamp", ""),
            action_owner_puid=rec.get("puId", ""),
            action_owner_is_toloker=rec.get("is_toloker"),
            action_type=ACTIONS_MAP.get(rec.get("path")),
            service_type="collections",
            action_target=action_target
        )
        outrec = Record(
            puid=rec.get("puid", ""),
            ts=rec.get("timestamp", ""),
            action_owner_puid=rec.get("puId", ""),
            action_owner_is_toloker=rec.get("is_toloker"),
            action_type=ACTIONS_MAP.get(rec.get("path")),
            service_type="collections",
            action_target=action_target
        )

        if rec.get("path") in ("click.social.share", "finish.board.unlock", "click.contest.like", "click.contest.dislike"):
            if main_outrec.puid != "":
                yield main_outrec

def format_board_action_target(board_id, is_board_owner_toloker):
    return {
        "board_id": board_id,
        "from_toloka": is_board_owner_toloker
    }

def format_card_action_target(card_id, card_type=None, origin_client_name=None, origin_action=None, is_card_owner_toloker=None, like_id=None):
    result = dict(card_id=card_id)
    if card_type:
        result["card_type"] = card_type
    if origin_client_name:
        result["origin_client_name"] = origin_client_name
    if origin_action:
        result["origin_action"] = origin_action
    if is_card_owner_toloker:
        result["from_toloka"] = is_card_owner_toloker
    if like_id:
        result["like_id"] = like_id

    return result


def collections_social_data(job, options_json):
    user_table, board_table, card_table, like_table, subscriptions_table = prep_db_data(job, options_json)

    for calc_date in options_json["calc_dates"]:
        board_create_event = board_table \
            .filter(nf.equals("created_at_date", calc_date)) \
            .project(
                puid="owner_puid",
                ts="created_at_ts",
                action_owner_puid="owner_puid",
                action_owner_is_toloker="is_board_owner_toloker",
                service_type=ne.const("collections"),
                action_type=ne.const("create board"),
                action_target=ne.custom(
                    format_board_action_target,
                    "board_id",
                    "is_board_owner_toloker"
                ).with_type(qt.Yson)
            )

        card_create_event = card_table \
            .filter(
                nf.and_(
                    nf.equals("created_at_date", calc_date),
                    nf.custom(lambda x: x, "owner_puid")
                )
            ) \
            .project(
                puid="owner_puid",
                ts="created_at_ts",
                action_owner_puid="owner_puid",
                action_owner_is_toloker="is_card_owner_toloker",
                service_type=ne.const("collections"),
                action_type=ne.custom(lambda x: "share card" if x else "create card", "is_card_share").with_type(str),
                action_target=ne.custom(
                    format_card_action_target,
                    "card_id",
                    "card_type",
                    "origin_client_name",
                    "origin_action",
                    "is_card_owner_toloker"
                ).with_type(qt.Yson)
            )

        like_event = like_table \
            .filter(nf.equals("created_at_date", calc_date)) \
            .project(
                puid="user_puid",
                ts="created_at_ts",
                action_owner_puid="owner_puid",
                action_owner_is_toloker="like_owner_is_toloker",
                service_type=ne.const("collections"),
                action_type="action_type",
                action_target=ne.custom(
                    format_card_action_target,
                    "card_id",
                    "card_type",
                    "origin_client_name",
                    "origin_action",
                    "is_card_owner_toloker",
                    "like_id"
                ).with_type(qt.Yson)
            )

        subscription_event = subscriptions_table \
            .filter(nf.equals("created_at_date", calc_date)) \
            .project(
                puid="user_puid",
                ts="created_at_ts",
                action_owner_puid="owner_puid",
                action_owner_is_toloker="action_owner_is_toloker",
                service_type=ne.const("collections"),
                action_type="action_type",
                action_target="action_target"
            )


        log_table = prep_log_data(job, options_json, calc_date)
        log_events = job \
            .concat(
                log_table.join(
                    board_table.project("is_board_owner_toloker", puid="owner_puid", boardId="board_id"),
                    by="boardId",
                    type="inner",
                    allow_undefined_keys=False
                ),
                log_table.join(
                    card_table.project("card_type", "origin_client_name", "origin_action", "is_card_owner_toloker", puid="owner_puid", cardId="card_id"),
                    by="cardId",
                    type="inner",
                    allow_undefined_keys=False
                ),
                log_table.filter(
                    nf.or_(
                        nf.equals("path", "click.contest.like"),
                        nf.equals("path", "click.contest.dislike")
                    )
                )
            ) \
            .join(user_table.project("is_toloker", puId="puid"), by="puId", type="left") \
            .map(log_events_mapper)

        job.concat(board_create_event, card_create_event, like_event, subscription_event, log_events) \
            .filter(
                nf.and_(
                    nf.equals('action_owner_is_toloker', False),
                    nf.custom(lambda x: x.get('from_toloka') != True, 'action_target')
                )
            ) \
            .sort("puid", "ts") \
            .put(
                "{}/{}".format(options_json["output_dir"], calc_date)
            )

@cli.statinfra_job
def make_job(job):
    options_json = json.load(open(os.path.join(os.getcwd(), "_files_1_"), "r"))
    if len(options_json["calc_dates"]) == 0:
        options_json["calc_dates"].append(
            (datetime.datetime.now() - datetime.timedelta(days=1)).date().strftime("%Y-%m-%d"))
        options_json["calc_dates"].append(
            (datetime.datetime.now() - datetime.timedelta(days=2)).date().strftime("%Y-%m-%d"))

    collections_social_data(job, options_json)

    return job

@cli.job_constructor(options=["options_json"])
def run_local(cluster, params):
    job = cluster.job()
    options_json = json.loads(params.options_json)
    print options_json
    collections_social_data(job, options_json)
    return job


if __name__ == "__main__":
    cli.run()
