#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import json
import os
import re

import yt.wrapper as yt


def grepSearchAppDids(rec):
    data = json.loads(rec['json'])
    appId = rec.get('appId')
    deviceId = rec.get('deviceId')

    registration = data.get('registration', {})
    pushToken = registration.get('pushToken')
    notifyDisabled = registration.get('notifyDisabled', False)

    hasBrowser = False
    tags = data.get('tags', {})
    for app in tags.get('apps', []):
        if app.startswith('com.yandex.browser') or app.startswith('ru.yandex.mobile.search'):
            hasBrowser = True

    if (
        appId and deviceId and pushToken and not notifyDisabled and \
        isinstance(appId, basestring) and isinstance(deviceId, basestring) and \
        (re.match('^ru.yandex.searchplugin(\.dev|\.beta|\.nightly)?$', appId) or re.match('^ru.yandex.mobile(\.dev|\.inhouse)?$', appId)) and \
        hasBrowser
    ):
        yield {
            'device_id': deviceId
        }


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--server', help='YT cluster',
        default='hahn')
    parser.add_argument('--token', help='YT token')
    parser.add_argument('--sup-users-dir', dest='sup_users_dir', help='YT directory with backups of sup users',
        default='//home/search-functionality/sup/backup/sup-prod.n.yandex-team.ru')
    parser.add_argument('--dev-info-table', dest='dev_info_table', help='YT table with crypta dev_info',
        default='//home/crypta/production/state/graph/dicts/dev_info')
    parser.add_argument('--output-file', dest='output_file', help='Result file',
        default='search_app_dids.data')
    args = parser.parse_args()

    token = args.token
    if token.startswith('@'):
        token = open(token[1:], 'r').read()

    config = {
        'proxy': {
            'url': args.server
        },
        'token': token,
        'pickling': {
            'python_binary': '/skynet/python/bin/python'
        }
    }
    client = yt.YtClient(config=config)

    with client.TempTable(prefix='generate_device_ids.map') as tmpBrowserSearchAppDids:
        # Find last timestamp
        lastSupTs = sorted(client.list(args.sup_users_dir, absolute=False))[-1]

        # Find users, that have search_app and ya_browser
        client.run_map(
            grepSearchAppDids,
            source_table=args.sup_users_dir + '/' + lastSupTs + '/sup_users',
            destination_table=tmpBrowserSearchAppDids,
            spec={'data_size_per_job': 1 * 1024 * 1024 * 1024}
        )

        # Save device_ids into result file
        didsCount = 0
        tmpName = args.output_file + '.tmp'
        tmp = open(tmpName, 'w')
        for rec in client.read_table(tmpBrowserSearchAppDids):
            tmp.write(rec['device_id'] + '\n')
            didsCount += 1
        tmp.close()
        res = open(args.output_file, 'w')
        res.write(str(didsCount) + '\n')
        res.close()
        os.system('sort -u %s >> %s' % (tmpName, args.output_file))


if __name__ == "__main__":
    main()
