#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
from nile.api.v1 import (
    clusters,
    filters as nf,
    extractors as ne,
    aggregators as na,
    Record
)
import getpass
import datetime
import json
import re

DATE = datetime.date.today()
YESTERDAY = DATE - datetime.timedelta(days=1)
DATE_F = DATE.strftime('%Y-%m-%d')
YESTERDAY_F = YESTERDAY.strftime('%Y-%m-%d')

hahn = clusters.Hahn(
    pool='search-research_{}'.format(getpass.getuser())
).env(
    templates=dict(
        job_root='home/atom/2017-04-27_pogoda_safari',
        today=DATE_F,
        yesterday=YESTERDAY_F
    )
)


def tryint(x):
    try:
        return int(x)
    except ValueError:
        return x


def make_key(yandexuid):
    if len(yandexuid) == 32:
        return yandexuid
    return 'y{}'.format(yandexuid)


def version_gt(x, y):
    if not isinstance(x, basestring) or not isinstance(y, basestring):
        return False
    x = x.split('.')
    y = y.split('.')
    for i in range(min(len(x), len(y))):
        if tryint(x[i]) == tryint(y[i]):
            continue
        if tryint(x[i]) > tryint(y[i]):
            return True
        else:
            return False
    if len(x) > len(y):
        return True
    else:
        return False


def crypta_map(records):
    for rec in records:
        try:
            obj = json.loads(rec.value)
        except:
            continue
        yandexuid = re.sub(r'[^a-z0-9]', '', rec.key)
        device_ids = [x.lower() for x in (obj.get('d') or [])]
        for device_id in device_ids:
            yield Record(
                yandexuid=yandexuid,
                DeviceID=device_id
            )


def main():
    job = hahn.job()

    tables = [job.table(
        'logs/metrika-mobile-log/1d/$yesterday'
    )]

    tables.extend(
        [
            job.table(x) for x in hahn.driver.client.search(
                root='//logs/metrika-mobile-log/30min',
                path_filter=lambda y: DATE_F in y
            )
        ]
    )

    extracted = job.concat(
        *tables
    ).project(
        'DeviceID', 'AppID', 'AppVersionName', 'Manufacturer'
    ).filter(
        nf.and_(
            nf.equals('Manufacturer', 'Apple'),
            nf.custom(lambda x: 'weather' in (x or '').lower(), 'AppID'),
            nf.custom(lambda x: version_gt(x, '3.3'), 'AppVersionName')
        )
    ).project(
        'AppID', 'AppVersionName', 'Manufacturer',
        DeviceID=ne.custom(lambda x: x.lower(), 'DeviceID')
    ).put(
        '$job_root/$today/extracted'
    )

    crypta = job.table(
        'home/personalization/production/crypta_extract/id2generalized_id'
    ).map(
        crypta_map
    ).put(
        '$job_root/crypta_mapped'
    )

    extracted.join(
        crypta, by='DeviceID', type='inner'
    ).unique(
        'DeviceID', 'yandexuid'
    ).put(
        '$job_root/$today/joined'
    ).project(
        key=ne.custom(make_key, 'yandexuid'),
        subkey=ne.const('pogoda_safari'),
        value=ne.const('1')
    ).put(
        'home/extdata-shared/release/promoliba/pogoda_safari'
    )

    job.run()


if __name__ == "__main__":
    main()
