# coding: utf-8
'''
Copy from https://a.yandex-team.ru/arc/trunk/arcadia/junk/paletskikh/dash_scripts/sent_messages.py?rev=6355647
'''

from nile.api.v1 import (
    cli,
    Record,
    with_hints,
    multischema,
)

from qb2.api.v1.typing import *


EVENTS = ['msg_send', 'seen_marker']#, 'report']


def event_parser(event, recs):
    if event not in EVENTS:
        return None
    if event == 'msg_send':
        return msg_send_parser(recs)
    if event == 'seen_marker':
        return seen_marker_parser(recs)


def seen_marker_parser(recs):
    fields = dict()
    fields['status_code'] = None
    fields['guid'] = None
    fields['puid'] = 0
    fields['yandexuid'] = 0
    fields['user_agent'] = ''
    fields['chat_id'] = None
    fields['chat_type'] = None
    fields['namespace'] = None
    type = None
    fields['event_timestamp'] = None
    fields['msg_timestamp'] = None
    fields['iso_eventtime'] = None
    fields['origin'] = None
    fields['is_valid'] = True
    fields['event'] = None
    max_ts = None
    min_ts = None
    timestamp = 0
    fields['tvm_service_id'] = None
    fields['subservice'] = None

    in_count = 0
    response_count = 0
    is_privileged = False

    for rec in recs:
        if rec.event_name in ['TInMessageInfo', 'TPrivilegedInMessageInfo']:
            type = rec.type
            if rec.type != 3:
                return None
            fields['guid'] = rec.guid
            fields['puid'] = rec.puid if (rec.puid is not None and fields['puid'] == 0) else fields['puid']
            fields['user_agent'] = rec.user_agent
            fields['chat_id'] = rec.chat_id
            fields['event_timestamp'] = rec.event_timestamp
            fields['iso_eventtime'] = rec.iso_eventtime
            fields['origin'] = rec.origin
            timestamp = rec.timestamp
            fields['tvm_service_id'] = rec.tvm_service_id
            in_count += 1
            is_privileged |= rec.event_name == 'TPrivilegedInMessageInfo'
        elif rec.event_name == 'TResponseInfo':
            fields['status_code'] = rec.status_code
            response_count += 1

        if rec.event_timestamp is not None:
            ts = rec.event_timestamp
            if min_ts is None or min_ts > ts:
                min_ts = ts
            if max_ts is None or max_ts < ts:
                max_ts = ts

    if fields['chat_id'] is not None:
        if '/' in fields['chat_id']:
            fields['chat_type'] = 'group_chat' if fields['chat_id'].startswith('0/') else 'channel'
            fields['namespace'] = int(fields['chat_id'].split('/')[1])
        else:
            fields['chat_type'] = '1on1'

    fields['is_valid'] = in_count == 1 and response_count == 1 and max_ts - min_ts < 2000000000
    fields['is_privileged'] = is_privileged

    if fields['status_code'] in [1, 9] and type == 3:
        fields['event'] = 'seen_marker'
        return fields
    return None


def reaction_parser(recs):
    fields = dict()
    fields['status_code'] = None
    fields['guid'] = None
    fields['puid'] = 0
    fields['yandexuid'] = 0
    fields['user_agent'] = ''
    fields['chat_id'] = None
    fields['chat_type'] = None
    fields['namespace'] = None
    fields['reaction_type'] = 0
    type = None
    fields['event_timestamp'] = None
    fields['msg_timestamp'] = None
    fields['iso_eventtime'] = None
    fields['origin'] = None
    fields['is_valid'] = True
    fields['event'] = None
    fields['event_subtype'] = None
    max_ts = None
    min_ts = None
    timestamp = 0
    fields['tvm_service_id'] = None
    fields['subservice'] = None

    in_count = 0
    content_count = 0
    response_count = 0
    derp_count = 0

    for rec in recs:
        if rec.event_name in ['TInMessageInfo', 'TPrivilegedInMessageInfo']:
            type = rec.type
            if rec.type != 11:
                return None
            fields['guid'] = rec.guid
            fields['puid'] = rec.puid if (rec.puid is not None and fields['puid'] == 0) else fields['puid']
            fields['user_agent'] = rec.user_agent
            fields['chat_id'] = rec.chat_id
            fields['event_timestamp'] = rec.event_timestamp
            fields['iso_eventtime'] = rec.iso_eventtime
            # fields['parent_timestamp'] = rec.parent_timestamp
            fields['origin'] = rec.origin
            timestamp = rec.timestamp
            fields['tvm_service_id'] = rec.tvm_service_id
            in_count += 1
        elif rec.event_name == 'TReactionInfo':
            # fields['payload_type'] = rec.payload_type
            fields['event_subtype'] = 'add' if rec.action == 0 else 'replace' if rec.action == 2 else 'delete' if rec.action == 1 else 'unknown'
            fields['msg_timestamp'] = rec.timestamp
            fields['puid'] = rec.puid if (rec.puid is not None and fields['puid'] == 0) else fields['puid']
            fields['subservice'] = rec.subservice
            fields['reaction_type'] = rec.type
            fields['yandexuid'] = rec.yandexuid
            content_count += 1
        elif rec.event_name == 'TResponseInfo':
            fields['status_code'] = rec.status_code
            response_count += 1

        if rec.event_timestamp is not None:
            ts = rec.event_timestamp
            if min_ts is None or min_ts > ts:
                min_ts = ts
            if max_ts is None or max_ts < ts:
                max_ts = ts

    if fields['chat_id'] is not None:
        if '/' in fields['chat_id']:
            fields['chat_type'] = 'group_chat' if fields['chat_id'].startswith('0/') else 'channel'
            fields['namespace'] = int(fields['chat_id'].split('/')[1])
        else:
            fields['chat_type'] = '1on1'

    fields['is_valid'] = in_count == 1 and response_count == 1 and max_ts - min_ts < 2000000000

    if fields['status_code'] in [1, 9] and type == 11:
        fields['event'] = 'reaction'
        return fields
    return None


def msg_send_parser(recs):
    fields = dict()
    fields['status_code'] = None
    fields['guid'] = None
    fields['puid'] = 0
    fields['yandexuid'] = 0
    fields['user_agent'] = ''
    fields['chat_id'] = None
    fields['chat_type'] = None
    fields['namespace'] = None
    fields['payload_id'] = None
    fields['payload_type'] = None
    fields['hash'] = 0
    type = None
    fields['event_timestamp'] = None
    fields['msg_timestamp'] = None
    fields['iso_eventtime'] = None
    fields['origin'] = None
    fields['is_valid'] = True
    fields['event'] = None
    fields['event_subtype'] = 'sending'
    fields['is_reply'] = None
    forward_chat_id = None
    forward_count = 0
    max_ts = None
    min_ts = None
    timestamp = 0
    fields['tvm_service_id'] = None
    fields['subservice'] = None
    fields['is_forward'] = False
    fields['forward_chat_id'] = None

    in_count = 0
    content_count = 0
    response_count = 0
    derp_count = 0

    for rec in recs:
        if rec.event_name in ['TInMessageInfo', 'TPrivilegedInMessageInfo']:
            type = rec.type
            if rec.type != 4 or rec.tvm_service_id == 2002026:
                return None
            fields['guid'] = rec.guid
            fields['puid'] = rec.puid if (rec.puid is not None and fields['puid'] == 0) else fields['puid']
            fields['user_agent'] = rec.user_agent
            fields['chat_id'] = rec.chat_id
            fields['payload_id'] = rec.payload_id
            fields['event_timestamp'] = rec.event_timestamp
            fields['iso_eventtime'] = rec.iso_eventtime
            fields['parent_timestamp'] = rec.parent_timestamp
            fields['origin'] = rec.origin
            timestamp = rec.timestamp
            fields['tvm_service_id'] = rec.tvm_service_id
            in_count += 1
        elif rec.event_name == 'TMessageContent':
            fields['puid'] = rec.puid if (rec.puid is not None and fields['puid'] == 0) else fields['puid']
            fields['payload_type'] = rec.payload_type
            fields['msg_timestamp'] = rec.timestamp
            fields['subservice'] = rec.subservice
            fields['hash'] = rec.hash
            fields['yandexuid'] = rec.yandexuid
            forward_count = rec.forward_count
            content_count += 1
        elif rec.event_name == 'TDepResolutionInfo':
            forward_chat_id = rec.chat_id
            derp_count += 1
        elif rec.event_name == 'TResponseInfo':
            fields['status_code'] = rec.status_code
            response_count += 1

        if rec.event_timestamp is not None:
            ts = rec.event_timestamp
            if min_ts is None or min_ts > ts:
                min_ts = ts
            if max_ts is None or max_ts < ts:
                max_ts = ts

    if fields['chat_id'] is not None:
        if '/' in fields['chat_id']:
            fields['chat_type'] = 'group_chat' if fields['chat_id'].startswith('0/') else 'channel'
            fields['namespace'] = int(fields['chat_id'].split('/')[1])
        else:
            fields['chat_type'] = '1on1'

    fields['is_valid'] = in_count == 1 and response_count == 1 and max_ts - min_ts < 2000000000

    if forward_count > 0:
        fields['is_reply'] = 'forward'
        fields['is_forward'] = True
        fields['forward_chat_id'] = forward_chat_id
        if derp_count == 0:
            fields['is_valid'] = False
    if forward_count == 1 and fields['payload_type'] != 0 and forward_chat_id == fields['chat_id']:
        fields['is_reply'] = 'reply'
    if fields['tvm_service_id'] == 2001636:
        fields['event_subtype'] = 'moderation'
        if timestamp != 0 and timestamp is not None:
            fields['msg_timestamp'] = timestamp
    elif timestamp != 0 and timestamp is not None:
        fields['is_valid'] = fields['is_valid'] and content_count == 1
        if fields['payload_type'] == 0:
            fields['event_subtype'] = 'deletion'
        else:
            fields['event_subtype'] = 'edit'

    if fields['status_code'] in [1, 9] and type == 4 and fields['payload_id'] is not None and not fields['payload_id'].startswith('_mon-') and not fields['payload_id'].startswith('_unimon_'):
        fields['event'] = 'msg_sent'
        return fields
    return None


sent_messages_schema = dict(
    source_uri=String,
    frame_id=Integer,
    status_code=Integer,
    guid=String,
    puid=Integer,
    yandexuid=UInt64,
    user_agent=Optional[String],
    chat_id=String,
    chat_type=String,
    namespace=Optional[Integer],
    payload_id=String,
    payload_type=Optional[Integer],
    event_timestamp=Integer,
    msg_timestamp=Optional[Integer],
    parent_timestamp=Optional[Integer],
    tvm_service_id=Optional[Integer],
    origin=Optional[Integer],
    hash=UInt64,
    subservice=Optional[Integer],
    iso_eventtime=String,
    event=String,
    event_subtype=String,
    is_reply=Optional[String],
    is_valid=Bool,
    is_forward=Bool,
    forward_chat_id=Optional[String],
)

reactions_schema = dict(
    source_uri=String,
    frame_id=Integer,
    status_code=Integer,
    guid=String,
    puid=Integer,
    yandexuid=UInt64,
    user_agent=Optional[String],
    chat_id=String,
    chat_type=String,
    namespace=Optional[Integer],
    reaction_type=Integer,
    event_timestamp=Integer,
    msg_timestamp=Optional[Integer],
    tvm_service_id=Optional[Integer],
    origin=Optional[Integer],
    subservice=Optional[Integer],
    iso_eventtime=String,
    event=String,
    event_subtype=String,
    is_valid=Bool,
)

seen_markers_schema = dict(
    source_uri=String,
    frame_id=Integer,
    status_code=Integer,
    guid=String,
    puid=Integer,
    yandexuid=UInt64,
    user_agent=Optional[String],
    chat_id=String,
    chat_type=String,
    namespace=Optional[Integer],
    event_timestamp=Integer,
    msg_timestamp=Optional[Integer],
    tvm_service_id=Optional[Integer],
    origin=Optional[Integer],
    subservice=Optional[Integer],
    iso_eventtime=String,
    event=String,
    is_valid=Bool,
    is_privileged=Bool,
)

@with_hints(
    output_schema=multischema(
        sent_messages_schema,
        reactions_schema,
        seen_markers_schema,
    )
)
def event_reducer(groups, sent_messages_out, reaction_out, seen_marker_out):
    for key, recs in groups:
        record_list = [rec for rec in recs]
        rec = msg_send_parser(record_list)
        if rec is not None:
            sent_messages_out(Record(key, **rec))
        else:
            rec = reaction_parser(record_list)
            if rec is not None:
                reaction_out(Record(key, **rec))
            else:
                rec = seen_marker_parser(record_list)
                if rec is not None:
                    seen_marker_out(Record(key, **rec))


@cli.statinfra_job
def make_job(job, options, nirvana):
    output_dir = nirvana.directories[0]
    input_table = nirvana.input_tables[0]
    inp = job.table(input_table)

    sent_messages, reactions, seen_markers = inp.groupby('frame_id', 'source_uri').reduce(event_reducer)

    sent_messages.put(output_dir+'/sent_messages')
    seen_markers.put(output_dir+'/seen_markers')
    reactions.put(output_dir+'/reactions')

    return job


if __name__ == '__main__':
    cli.run()
