# -*- coding: utf-8 -*-

import json
import yt.wrapper as yt


class EntityBaseRecord(object):
    @staticmethod
    def __extract_otypes(record_json_repr):
        if 'isa' not in record_json_repr:
            return set()
        if ('otype' not in record_json_repr['isa']) or (len(record_json_repr['isa']['otype']) == 0):
            return set()
        result = set()
        for otype in record_json_repr['isa']['otype']:
            if 'value' not in otype:
                raise ValueError('Expected to have required isa/otype/value field')
            value = otype['value']
            subvalue = '' if 'subvalue' not in otype else otype['subvalue']
            result.add((value, subvalue))
        return result

    @staticmethod
    def __extract_image_per_locale(record_json_repr):
        if ('Image' not in record_json_repr) or (record_json_repr['Image'] is None):
            return {}
        result = {}
        for image_info in record_json_repr['Image']:
            locales = []
            if 'RelevLocale' in image_info:
                locales = image_info['RelevLocale']
            if len(locales) == 0:
                continue
            if 'show_on_serp' not in image_info:
                raise ValueError('Expected to have required Image/show_on_serp field')
            if not image_info['show_on_serp']:
                continue
            if ('url' not in image_info) or ('value' not in image_info):  # duplicated fields
                raise ValueError('Expected to have required Image/(url or value) fields')
            if image_info['url'] != image_info['value']:
                raise ValueError('Expected to have Image/url == Image/value')
            image_url = image_info['url']
            for locale in locales:
                result[locale] = image_url
        return result

    @staticmethod
    def __extract_title_per_locale(record_json_repr):
        if 'Title' not in record_json_repr:
            return {}
        result = {}
        for title_info in record_json_repr['Title']:
            locales = []
            if 'RelevLocale' in title_info:
                locales = title_info['RelevLocale']
            if len(locales) == 0:
                continue
            if 'value' not in title_info:
                raise ValueError('Expected to have required Title/value field')
            title = title_info['value']
            for locale in locales:
                result[locale] = title
        return result

    def __init__(self, record_str_repr):
        record_json_repr = json.loads(record_str_repr)
        self._types = EntityBaseRecord.__extract_otypes(record_json_repr)
        self._image_per_locale = EntityBaseRecord.__extract_image_per_locale(record_json_repr)
        self._title_per_locale = EntityBaseRecord.__extract_title_per_locale(record_json_repr)

    def is_full(self):
        return (len(self._types) > 0) and (len(self._image_per_locale) > 0)\
            and (len(self._title_per_locale) > 0)

    def is_film(self):
        for _type, _subtype in self._types:
            if 'Film' in _type:
                return True
        return False

    def update(self, other_record):
        self._types.update(other_record._types)
        self._image_per_locale.update(other_record._image_per_locale)
        self._title_per_locale.update(other_record._title_per_locale)

    def to_dict(self):
        types_repr = list(map(lambda value_subvalue: '{}/{}'.format(value_subvalue[0], value_subvalue[1]), self._types))
        return {
            'types': types_repr,
            'image_locales': self._image_per_locale.keys(),
            'images': self._image_per_locale.values(),
            'title_locales': self._title_per_locale.keys(),
            'titles': self._title_per_locale.values()
        }


def build_delayed_view_entity_base_reducer(key_fields, rows):
    entity_id = key_fields['key']

    merged_record = None
    for row in rows:
        try:
            record = EntityBaseRecord(row['value'])
        except ValueError as value_error:
            raise ValueError('Entity id {}: {}'.format(entity_id, str(value_error)))

        if merged_record is None:
            merged_record = record
        else:
            merged_record.update(record)

    if merged_record is not None and merged_record.is_full() and merged_record.is_film():
        result = {'entity_id': entity_id}
        result.update(merged_record.to_dict())
        yield result


def get_yt_client(yt_proxy, yt_token):
    yt_config = {
        'proxy': {
            'url': yt_proxy
        },
        'token': yt_token,
        'pickling': {
            'python_binary': '/skynet/python/bin/python'
        }
    }
    return yt.YtClient(config=yt_config)


def build_delayed_view_entity_base(yt_proxy, yt_token, entity_main_base_yt_table_path,
                                   entity_delta_base_yt_table_path, delayed_view_entity_base_yt_table_path):
    yt_client = get_yt_client(yt_proxy, yt_token)

    for yt_table_path, yt_table_name in zip([entity_main_base_yt_table_path, entity_delta_base_yt_table_path], ['entity main base', 'entity delta base']):
        if not yt_client.exists(yt_table_path):
            raise RuntimeError('Input table containing {} does not exist at yt table path {}'.format(yt_table_name, yt_table_path))

    yt_client.run_reduce(build_delayed_view_entity_base_reducer, [entity_main_base_yt_table_path, entity_delta_base_yt_table_path],
                         delayed_view_entity_base_yt_table_path, input_format='json', output_format='json', reduce_by=['key'],
                         spec={'reducer': {'enable_input_table_index': False}})


def __main(args):
    import os
    # None stands for another way of auth if token is not given in current environ
    yt_token = os.environ.get('YT_TOKEN', None)

    build_delayed_view_entity_base(args.yt_proxy, yt_token, args.entity_main_base_yt_table_path,
                                   args.entity_delta_base_yt_table_path, args.delayed_view_entity_base_yt_table_path)


def __get_args():
    import argparse
    parser = argparse.ArgumentParser(description='Build delayed view entity base on YT from main and delta entity bases')
    parser.add_argument('--yt_proxy', type=str, required=True)
    parser.add_argument('--entity_main_base_yt_table_path', type=str, required=True)
    parser.add_argument('--entity_delta_base_yt_table_path', type=str, required=True)
    parser.add_argument('--delayed_view_entity_base_yt_table_path', type=str, required=True)
    return parser.parse_args()


if __name__ == '__main__':
    __main(__get_args())
