import sys
import json
import itertools
import re
from urlparse import urlparse, parse_qs

request_uri_regexp = re.compile(r'(/[^\/]*)?/v1/data/app/databases/([^\/]*)/(deltas|snapshot).*')

db_regext = re.compile(
    r'(?P<db_name>[\w-][\.\w-]{0,127})|^\.ext\.(?P<app_name>[\w-][\.\w-]{0,127})@(?P<ext_db_name>[\w-][\.\w-]{0,128})$')

MAP_TO_KEYS = frozenset({'appName', 'dbId', 'uid',
                         'oauthId', 'lastDeltaRevisionAccess',
                         'lastDeltaAccess', 'lastSnapshotAccess'})

REDUCE_KEYS = ("appName", "dbId", "oauthId", "uid")
MAX_FIELD_LENGTH = 1000


def safe_print(data):
    for field in data.values():
        if len(unicode(field)) > MAX_FIELD_LENGTH:
            return
    print json.dumps(data)


def extract_data(request_uri):
    re_obj = re.match(request_uri_regexp, request_uri)
    if re_obj is None:
        return

    _, db_string, deltas_or_snapshot = re_obj.groups()

    is_delta = deltas_or_snapshot == 'deltas'

    params = parse_qs(urlparse(request_uri).query)

    re_obj = re.match(db_regext, db_string)

    if re_obj is None:
        app_name, database_id = None, None
    else:
        data = re_obj.groupdict()
        app_name = data['app_name']
        database_id = data['db_name'] or data['ext_db_name']

    try:
        last_delta_revision_access = int(params['base_revision'][0])
    except (KeyError, TypeError, ValueError):
        last_delta_revision_access = -1

    if last_delta_revision_access < 0 and is_delta:
        return

    return {'appName': app_name or '',
            'dbId': database_id or '',
            'lastDeltaRevisionAccess': last_delta_revision_access}


def do_decode(lines):
    for line in lines:
        yield json.loads(line)


def do_map(rows):
    for row in rows:
        if MAP_TO_KEYS.issubset(row.keys()):
            safe_print(row)
            continue

        if None in map(row.get, ['puid', 'request_uri',
                                 'oauth_id', 'unixtime']):
            continue

        if row.get('tskv_format') != 'ydisk-mpfs-stat-log-api-access':
            continue

        if not (200 <= int(row.get('status_code', '200')) <= 299):
            continue

        data = extract_data(row['request_uri'])

        if data is None or not data['dbId']:
            continue

        data['uid'] = row['puid']
        data['oauthId'] = row['oauth_id']
        if data['lastDeltaRevisionAccess'] < 0:
            data['lastSnapshotAccess'] = int(row['unixtime'])
            data['lastDeltaAccess'] = 0
        else:
            data['lastSnapshotAccess'] = 0
            data['lastDeltaAccess'] = int(row['unixtime'])

        safe_print(data)


def do_reduce(rows):
    prev_id = tuple()
    max_revision = -1
    max_delta_access_time = 0
    max_snapshot_access_time = 0
    for row in itertools.chain(rows, ({},)):
        row_id = tuple((row.get(key) for key in REDUCE_KEYS))
        if prev_id and prev_id != row_id:
            result = dict(zip(REDUCE_KEYS, prev_id))
            result['lastDeltaRevisionAccess'] = max_revision
            result['lastDeltaAccess'] = max_delta_access_time
            result['lastSnapshotAccess'] = max_snapshot_access_time
            safe_print(result)

            max_revision = -1
            max_delta_access_time = 0
            max_snapshot_access_time = 0

        if row:
            row_revision = row.get('lastDeltaRevisionAccess')
            if row_revision >= 0:
                if (row_revision > max_revision):
                    max_revision = row_revision
                    max_delta_access_time = row.get('lastDeltaAccess', 0)
            else:
                row_unixtime = row.get('lastSnapshotAccess', 0)
                max_snapshot_access_time = max(row_unixtime,
                                               max_snapshot_access_time)

        prev_id = row_id


if __name__ == '__main__':
    stream = sys.stdin
    stream = do_decode(stream)
    actions = {'map': do_map, 'reduce': do_reduce}
    if len(sys.argv) < 2 or sys.argv[1] not in actions:
        print >> sys.stderr, 'Please, specify proper map or reduce command to execute'
        sys.exit(1)

    actions[sys.argv[1]](stream)
