# -*- coding: utf-8 -*-

import argparse
import yt.wrapper as yt


def mapper(rec):
    if 'deviceId' in rec and rec['deviceId']:
        del rec["@table_index"]
        rec['deviceId'] = rec['deviceId'].replace('-', '').lower()
        yield rec

    elif 'value' in rec:
        result = {}
        result['apps'] = []
        for kv in [item.split("=", 1) for item in rec['value'].split('\t')]:
            if len(kv) == 2:
                (k, v) = kv
            else:
                (k, v) = (kv, '')

            if k == 'apps':
                result['apps'] = v.split(',')
            if k == 'mmetric_devids':
                result['deviceId'] = v.replace('-', '').lower()

        for id in result['deviceId'].split(','):
            result['deviceId'] = id
            yield result


def reducer(key, rows):
    result = {}
    timestamp = 0
    for row in rows:
        if 'timestamp' in row:
            if int(row['timestamp']) > timestamp:
                # merge only newer data
                timestamp = int(row['timestamp'])
                result.update(row)
        else:
            # if there is no timestamp, merge it anyway because rows from
            # dev_info have no timestamp
            result.update(row)

    if 'apps' in result and 'installId' in result and 'deviceId' in result:
        has_metro = '0'
        has_weather = '0'
        has_search = '0'
        # has_navi = '0'  # unused variable: https://sandbox.yandex-team.ru/task/137575330/view
        for app in result['apps']:
            if app.startswith('ru.yandex.metro') or app.startswith('ru.yandex.mobile.metro'):
                has_metro = '1'

            if app.startswith('ru.yandex.weatherplugin') or app.startswith('ru.yandex.mobile.weather'):
                has_weather = '1'

            if app.startswith('ru.yandex.searchplugin') or app == 'ru.yandex.mobile' or app == 'ru.yandex.mobile.inhouse':
                has_search = '1'

            # if app.startswith('ru.yandex.yandexnavi') or app.startswith('ru.yandex.mobile.navigator'):
            #    has_navi = '1'

        # common fields
        res = {'install_id': result['installId'], 'did': result['deviceId']}

        res['tag_name'] = 'has_yandex_metro_app'
        res['tag_value'] = has_metro
        yield res

        res['tag_name'] = 'has_yandex_weather_app'
        res['tag_value'] = has_weather
        yield res

        res['tag_name'] = 'has_yandex_search_app'
        res['tag_value'] = has_search
        yield res

        # res['tag_name'] = 'has_yandex_navi_app'
        # res['tag_value'] = has_navi
        # yield res


def run(client, input, output):
    client.run_map_reduce(mapper, reducer, input, output, reduce_by='deviceId',
                          format=yt.YsonFormat(control_attributes_mode="row_fields"))


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--server', help='YT cluster',
                        default='hahn')
    parser.add_argument('--token', help='YT token')
    parser.add_argument('--sup_users', help='SUP users table path',
                        default='//home/search-functionality/news-push/sup/users_for_sup')
    parser.add_argument('--dev_info', help='Device info table path',
                        default='//home/crypta/production/state/graph/dicts/dev_info')
    parser.add_argument('--output', help='Output table path',
                        default='//home/search-functionality/sup/tags/apps')
    args = parser.parse_args()

    token = args.token
    if token.startswith('@'):
        token = open(token[1:], 'r').read()

    input = [args.dev_info] + args.sup_users.split(',')
    output = [args.output]
    config = {
        'proxy': {
            'url': args.server
        },
        'token': token,
        'pickling': {
            'python_binary': '/skynet/python/bin/python'
        }
    }
    client = yt.YtClient(config=config)
    run(client, input, output)


if __name__ == '__main__':
    main()
