# coding=utf-8
import logging
import time
from datetime import date, timedelta, datetime
from itertools import groupby
from operator import itemgetter

import yt.wrapper as yt
import yt.yson as yson
from flask import current_app as app
from flask_script import Command, Option
from nile.api.v1 import aggregators as na, files, get_records_from_file
from nile.api.v1 import extractors as ne, Record
from qb2.api.v1 import filters as qf, extractors as qe

from jafar import jafar_mongo, localization_mongo
from jafar.datasets import get_dataset_processor
from jafar.datasets.base import get_table_name
from jafar.utils.dyntables import (mount_dynamic_table, create_dyntable, dynamize_static_and_cleanup)
from jafar.utils.io import get_cluster
from jafar_yt.update_datasets import AdvisorMongoInstallMapper
from jafar_yt.utils.helpers import (parsed_google_play_unpacker, stemmize, parse_workspace_config,
                                    EventValueMapper, force_ascii, html_to_plain_text)

logger = logging.getLogger(__name__)

GOOGLE_PLAY_URL_TEMPLATE = "https://play.google.com/store/apps/details?id={}"

APPS_INFO_SCHEMA = yson.YsonList([
    {"name": "package_name", "type": "string", "sort_order": "ascending"},
    {"name": "language", "type": "string", "sort_order": "ascending"},
    {"name": "name", "type": "utf8"},
    {"name": "description", "type": "utf8"},
    {"name": "publisher", "type": "utf8"},
    {"name": "rating", "type": "double"},
    {"name": "rating_count", "type": "int64"},
    {"name": "categories", "type": "any"},
    {"name": "genres", "type": "any"},
    {"name": "content_rating", "type": "utf8"},
    {"name": "is_free", "type": "boolean"},
    {"name": "screenshots", "type": "any"},
    {"name": "screenshots_info", "type": "any"},
    {"name": "short_title", "type": "utf8"},
    {"name": "release_date", "type": "int64"},
    {"name": "icon_url", "type": "string"},
    {"name": "icon_colorwiz", "type": "any"},
])
APPS_INFO_SCHEMA.attributes["unique_keys"] = True
APPS_INFO_MODE = 'compressed'

PARSED_REGION_AND_LANGUAGES = [
    ('ru', 'ru'),
    ('en', 'us'),
    ('es', 'mx'),
    ('pt', 'br'),
    ('be', 'by'),
    ('kk', 'kz'),
    ('tr', 'tr'),
]

SHORT_TITLES_COLLECTION_PERIOD = 7

CATEGORY_TRANSLATION_OVERRIDES = {
    'GAME_ACTION': {'ru': u'Экшн-игры', 'en': u'Action games'},
    'GAME_BOARD': {'ru': u'Настольные игры', 'en': u'Board games'},
    'GAME_CARD': {'ru': u'Карточные игры', 'en': u'Card games'},
    'GAME_CASINO': {'ru': u'Игры-казино', 'en': u'Casino games'},
    'GAME_CASUAL': {'ru': u'Казуальные игры', 'en': u'Casual games'},
    'GAME_EDUCATIONAL': {'ru': u'Обучающие игры', 'en': u'Educational games'},
    'GAME_MUSIC': {'ru': u'Музыкальные игры', 'en': u'Music games'},
    'GAME_PUZZLE': {'en': u'Puzzle games'},
    'GAME_RACING': {'en': u'Racing games'},
    'GAME_ROLE_PLAYING': {'ru': u'Ролевые игры', 'en': u'Role playing games'},
    'GAME_SIMULATION': {'en': u'Simulator games'},
    'GAME_SPORTS': {'en': u'Sport games'},
    'GAME_TRIVIA': {'en': u'Trivia games'},
    'GAME_WORD': {'en': u'Word games'}
}

APPS_SEARCH_INDEX = 'apps_search_index'
YT_PATH_APPS_SEARCH_INDEX = '//home/advisor/' + APPS_SEARCH_INDEX
YT_PATH_CATEGORY_TRANSLATIONS = '//home/advisor/category_translations_tmp'


def table_is_empty(yt_client, table):
    size_attr = 'uncompressed_data_size'
    return yt_client.get(table, attributes=[size_attr]).attributes.get(size_attr, 0) == 0


def calculate_icons(stream, job):
    icons = app.config['YT_PATH_APP_ICONS']

    def icon_mapper(rows):
        from colorwiz.tool import process_url

        for record in rows:
            # noinspection PyBroadException
            try:
                yield Record(record, colorwiz=process_url(record['icon']))
            except:
                continue

    return stream.project(
        'icon_url'
    ).unique(
        'icon_url'
    ).join(
        job.table(icons),
        by='icon_url', type='left_only'
    ).map(
        icon_mapper,
        intensity='ultra_cpu'
    ).put(
        icons,
        append=True
    ).join(
        stream,
        by='icon_url', type='right', assume_unique_left=True
    )


def get_short_titles_stream(job):
    date_range = (date.today() - timedelta(days=SHORT_TITLES_COLLECTION_PERIOD), date.today())
    return job.table(
        yt.ypath_join(app.config['YT_METRIKA_PATH_1_DAY'], '{%s..%s}' % date_range),
        ignore_missing=True
    ).filter(
        qf.equals('APIKey', app.config['YT_LAUNCHER_API_KEY']),
        qf.equals('EventName', 'workspace_config'),
        qf.defined('EventValue'),
        qf.custom(lambda x: x > '2.0', 'AppVersionName')
    ).map(
        EventValueMapper()
    ).filter(
        qf.equals('action', 'config'),
        qf.defined('screens', 'lang'),
    ).map(
        parse_workspace_config
    ).filter(
        qf.one_of('language', map(itemgetter(1), PARSED_REGION_AND_LANGUAGES))
    ).groupby(
        'package_name', 'language', 'short_title'
    ).aggregate(
        count=na.count()
    ).groupby(
        'package_name', 'language'
    ).top(
        1, by='count'
    ).project(
        ne.all('count')
    )


def prepare_static_table():
    result_table = yt.ypath_join(app.config['YT_PATH_PARSED_ITEMS'], date.today().isoformat())
    job = get_cluster().job()

    logger.info('Collecting fresh parsed data into %s', result_table)
    updated_at = int(time.time())
    sources = []
    for language, region in PARSED_REGION_AND_LANGUAGES:
        source = job.table(
            app.config['YT_PATH_ITEMS'].format(region=region)
        ).map(
            parsed_google_play_unpacker
        ).filter(
            qf.defined('rating'),
            qf.nonzero('screens'),
            qf.nonzero('screens_ex'),
            qf.custom(lambda x: all((s and s.get('meta') and s['meta'].get('orig-size')) for s in x), 'screens_ex'),
        ).project(
            ne.all(),
            region=ne.const(region),
            language=ne.const(language),
            updated_at=ne.const(updated_at),
        )
        sources.append(source)

    job.concat(
        *sources
    ).put(
        result_table
    )
    job.run()
    return result_table


def update_category_translations_on_yt(parsed_apps_table):
    cluster = get_cluster()
    job = cluster.job()
    job.table(
        parsed_apps_table
    ).project(
        # Unfold zipped sequences
        # [Record(language='ru', subcategory_eng=['a','b','c'], subcategory=['D','E','F'])]
        # as
        # [Record(category='a', language='ru', translation='D'),
        #  Record(category='b', language='ru', translation='E'),
        #  Record(category='c', language='ru', translation='F')]
        'language',
        qe.custom('zipped', zip, 'subcategory_eng', 'subcategory'),
        qe.unfold('unzipped', 'zipped'),
        qe.item('category', 0, 'unzipped'),
        qe.item('translation', 1, 'unzipped'),
    ).groupby(
        # Count translation popularity
        'language', 'category', 'translation'
    ).aggregate(
        count=na.count()
    ).groupby(
        # Select most popular translation for category in language
        'language', 'category'
    ).top(
        1, by='count'
    ).project(
        ne.all('count')
    ).sort(
        'category'
    ).put(
        YT_PATH_CATEGORY_TRANSLATIONS
    )
    job.run()


def save_category_translations_to_mongo():
    logger.info('Updating translations')
    translations = localization_mongo.db[app.config['LAUNCHER_TRANSLATIONS_COLLECTION']]
    for category, rows in groupby(get_cluster().read(YT_PATH_CATEGORY_TRANSLATIONS), key=itemgetter('category')):
        values = [
            {
                'conditions': {'locale': {'language': row.language}},
                'value': CATEGORY_TRANSLATION_OVERRIDES.get(category, {}).get(row.language) or row.translation
            } for row in rows
        ]
        query = {'_id': 'backend_categories_%s' % category.lower()}
        update = {'$set': {'updated_at': datetime.utcnow(), 'values': values}}
        translations.update_one(query, update, upsert=True)
    logger.info('Translations updated')


def filter_unpopular_items(installs, min_percentile=60):
    percentile = installs.aggregate(threshold=na.percentile('popularity', min_percentile)).checkpoint('threshold')
    threshold = files.StreamFile(percentile, 'threshold')

    def items_filter(records):
        treshold = get_records_from_file('threshold').next().threshold
        for record in records:
            if record.popularity > treshold:
                yield record

    return installs.map(
        items_filter,
        files=[threshold]
    )


def update_apps_search_index_on_yt(parsed_apps_table, dataset_name, country):
    def make_stems(title, author):
        text = '%s %s' % (title, author['name'])
        return stemmize(text)

    def language_reducer(groups):
        for key, records in groups:
            yield Record(
                key,
                stems=list(frozenset(stem for record in records for stem in record['stems'])),
            )

    def calculate_popularity(installs):
        return installs.groupby(
                'item'
        ).aggregate(
                popularity=na.count()
        ).project(
                'popularity',
                package_name='item'
        )

    dataset_processor = get_dataset_processor(dataset_name)
    popularity_source = get_table_name(dataset_processor.yt_table_result, country)
    job = get_cluster().job()

    users_installs = job.table(
        app.config['YT_PATH_USERS_FULL']
    ).map(
        AdvisorMongoInstallMapper(),
        intensity='cpu'
    )

    popularity_expanded = filter_unpopular_items(calculate_popularity(users_installs))
    popularity = calculate_popularity(job.table(popularity_source))

    parsed_apps = job.table(parsed_apps_table).filter(
        qf.one_of('language', ['ru', 'en'])
    ).project(
        package_name='id',
        stems=ne.custom(make_stems, 'name', 'author'),
    ).groupby(
        'package_name'
    ).reduce(
        language_reducer
    )

    for (popularity_values, suffix) in zip([popularity_expanded, popularity], ('_expanded', '')):
        parsed_apps.join(
            popularity_values, by='package_name', type='inner'
        ).put(
            YT_PATH_APPS_SEARCH_INDEX + suffix
        )
    job.run()


def save_apps_search_index_to_mongo():
    logger.info('Downloading apps_search_index info to mongo')
    cluster = get_cluster()
    db = jafar_mongo.db
    for suffix in ('_expanded', ''):
        apps = (record.to_dict() for record in cluster.read(YT_PATH_APPS_SEARCH_INDEX + suffix))

        apps_search_index_temp = db[APPS_SEARCH_INDEX + '_temp']
        apps_search_index_temp.drop()
        apps_search_index_temp.insert_many(apps)

        logger.info('Creating indexes ' + suffix)
        apps_search_index_temp.create_index([
            ('stems', 1),
        ],
            name='stem_search'
        )
        apps_search_index_temp.rename(APPS_SEARCH_INDEX + suffix, dropTarget=True)


def prepare_dynamic_table():
    end_date = date.today()
    start_date = end_date - timedelta(days=app.config['APP_INFO_COLLECTION_INTERVAL'])

    def extract_screens_ex(screen_ex):
        return map(lambda s: {u'meta': {u'orig-size': s['meta']['orig-size']}}, screen_ex)

    result_table = yt.ypath_join(app.config['YT_PATH_ITEMS_DYNAMIC'], date.today().isoformat())
    logger.info('Collecting data into new dyntable %s', result_table)

    if yt.exists(result_table) and not table_is_empty(yt, result_table):
        return result_table

    create_dyntable(result_table, yt, schema=APPS_INFO_SCHEMA, in_memory_mode=APPS_INFO_MODE)

    job = get_cluster().job()
    job.table(
        yt.ypath_join(app.config['YT_PATH_PARSED_ITEMS'], '{%s..%s}' % (start_date, end_date)),
        # TODO: remove "ignore_missing" when all tables will be collected
        ignore_missing=True
    ).groupby(
        'id', 'language'
    ).top(
        1, by='updated_at'
    ).project(
        # rename "rating" field to "rating_" to use "rating" in the next project
        ne.all('rating'),
        rating_='rating'
    ).project(
        'language',
        'name',
        package_name='id',
        categories='subcategory_eng',
        content_rating='adult',
        genres='subcategory',
        icon_url='icon',
        description=ne.custom(html_to_plain_text, 'html_description'),
        is_free=ne.custom(lambda x: (x is None) or (x['value'] == 0), 'price'),
        publisher=ne.custom(itemgetter('name'), 'author'),
        rating_count=ne.custom(itemgetter('votes'), 'rating_'),
        rating=ne.custom(itemgetter('value'), 'rating_'),
        release_date=ne.custom(int, 'date_release_ts'),
        screenshots='screens',
        screenshots_info=ne.custom(extract_screens_ex, 'screens_ex'),
    ).join(
        get_short_titles_stream(job), by=('package_name', 'language'), type='left'
    ).call(
        calculate_icons, job=job
    ).sort(
        'package_name', 'language',
    ).put(
        result_table,
        compression_level='lightest',
    )
    job.run()
    logger.debug('Reducing chunks size')
    yt.run_merge(
        source_table=result_table,
        destination_table=result_table,
        spec=dict(
            force_transform=True,
            combine_chunks=True,
            job_io=dict(
                table_writer=dict(
                    block_size=256 * 2 ** 10,
                    desired_chunk_size=100 * 2 ** 20
                ))))
    return result_table


def save_apps_urls():
    job = get_cluster().job()
    job.table(
        app.config['YT_PATH_USERS_FULL']
    ).map(
        AdvisorMongoInstallMapper()
    ).project(
        item=ne.custom(force_ascii, 'item')
    ).project(
        key=ne.custom(GOOGLE_PLAY_URL_TEMPLATE.format, 'item')
    ).unique(
        'key'
    ).put(
        app.config['YT_PATH_APP_URLS'],
        schema={'key': str}
    )
    job.run()


def update_apps_info():
    parsed_apps = prepare_static_table()
    table = prepare_dynamic_table()
    kwargs_list = [dict(base_path=app.config['YT_PATH_PARSED_ITEMS'],
                        ttl=app.config['APP_INFO_PARSED_TTL_DAYS']),
                   dict(base_path=app.config['YT_PATH_ITEMS_DYNAMIC'],
                        ttl=app.config['APP_INFO_DYNAMIC_TTL_DAYS'])]
    dynamize_static_and_cleanup(table=table,
                                schema=APPS_INFO_SCHEMA,
                                cleanup_kwargs_list=kwargs_list,
                                in_memory_mode=APPS_INFO_MODE)
    update_apps_search_index_on_yt(parsed_apps, 'advisor_mongo', 'ru')
    update_category_translations_on_yt(parsed_apps)
    save_apps_urls()


def mount_apps_info_dynamic_table(date):
    for cluster in itemgetter('YT_PROXY', 'YT_RESERVE_PROXY')(app.config):
        yt_client = yt.YtClient(cluster, app.config['YT_TOKEN'])
        table = yt.ypath_join(app.config['YT_PATH_ITEMS_DYNAMIC'], date)
        if not yt_client.exists(table) or table_is_empty(yt_client, table):
            raise Exception('Table "{}" does not exist or is empty on cluster "{}"'.format(table, cluster))
        mount_dynamic_table(table, yt_client, app.config['YT_PATH_DYNAMIC_LINK'])


class UpdateAppsInfoYT(Command):
    def run(self):
        yt.update_config(app.config['YT_CONFIG'])
        update_apps_info()


class UpdateAppsInfoMongo(Command):
    def run(self):
        yt.update_config(app.config['YT_CONFIG'])
        save_apps_search_index_to_mongo()
        save_category_translations_to_mongo()


class MountAppsInfoDynamicTable(Command):
    option_list = (
        Option(
            '--date', dest='date',
            required=True, type=str, action='store', help='Table date to mount'
        ),
    )

    def run(self, date):
        yt.update_config(app.config['YT_CONFIG'])
        mount_apps_info_dynamic_table(date)
