# -*- coding: utf-8 -*-
import yt.wrapper as yt_wrapper
from datacloud.dev_utils.logging.logger import get_basic_logger
from datacloud.dev_utils.yt import yt_utils
from datacloud.dev_utils.yt import yt_ops
from datacloud.dev_utils.id_value import id_value_lib
from datacloud.dev_utils.yt.converter import TableConverter

logger = get_basic_logger(__name__)

__all__ = [
    'RequestsToFeaturesConverter'
]


CRYPTA_ROOT_FOLDER = '//home/x-products/production/crypta_v2'
TAG = 'STABILITY-SCORES'


def hash_id_mapper(rec):
    rec['id_value'] = id_value_lib.count_md5(rec.pop('user_id'))
    yield rec


@yt_wrapper.with_context
def match_hashed_ids(key, recs, context):
    rec_to_yield = None
    for rec in recs:
        if context.table_index == 0:
            rec_to_yield = {
                'partner_id': rec['partner_id'],
                'score_id': rec['score_id'],
                'stability_segments': rec['stability_segments']
            }
        elif rec_to_yield:
            rec_to_yield['cid'] = rec['cid']
            yield rec_to_yield
        else:
            break


@yt_wrapper.with_context
def interesting_feature_extractor_reducer(key, recs, context):
    rec_to_yield = None
    for rec in recs:
        if context.table_index == 0:
            rec_to_yield = {
                'partner_id': rec['partner_id'],
                'score_id': rec['score_id'],
                'stability_segments': rec['stability_segments']
            }
        elif rec_to_yield:
            rec_to_yield['features'] = rec['features']
            yield rec_to_yield
        else:
            break


DEFAULT_STABILITY_SEGMENTS = []


class ExtractIdsMapper(object):
    def __init__(self, partner_id, score_id):
        self._partner_id = partner_id
        self._score_id = score_id

    def __call__(self, rec):
        data = rec['context']
        partner_id = rec['partner_id']
        if partner_id != self._partner_id:
            raise StopIteration
        if 'query' in data and 'user_ids' in data['query']:
            emails = data['query']['user_ids'].get('emails', [])
            phones = data['query']['user_ids'].get('phones', [])
            # stability_segments = data['query'].get('response_ids', {}).get('stability_segments', DEFAULT_STABILITY_SEGMENTS)
            stability_segment = data['query'].get('response_ids', {}).get('stability_segment', None)

            if stability_segment:
                stability_segments = [stability_segment]
            else:
                stability_segments = DEFAULT_STABILITY_SEGMENTS

            scores = []
            if 'response' in data:
                scores = [item['score_name'] for item in data['response'].get('scores') if item['has_score']]

            for score_id in scores:
                if score_id != self._score_id:
                    continue
                for key, id_records in zip(('email', 'phone'), (emails, phones)):
                    for id_rec in id_records:
                        if key in id_rec:
                            yield {
                                'partner_id': partner_id,
                                'score_id': score_id,
                                'user_id': id_rec[key],
                                'id_type': key,
                                'stability_segments': stability_segments
                            }


class LogsToRequestIdConverter(TableConverter):
    def __init__(self, yt_client, partner_id, score_id):
        super(LogsToRequestIdConverter, self).__init__(yt_client)
        self._partner_id = partner_id
        self._score_id = score_id
        self._output_table_schema = [
            {'name': 'partner_id', 'type': 'string'},
            {'name': 'score_id', 'type': 'string'},
            {'name': 'user_id', 'type': 'string'},
            {'name': 'id_type', 'type': 'string'},
            {'name': 'stability_segments', 'type': 'any'}
        ]

    def _convert(self, input_table, output_table):
        output_table = yt_wrapper.TablePath(
            output_table,
            schema=self._output_table_schema
        )
        reduce_by = ('partner_id', 'score_id', 'user_id')
        self._yt_client.run_map_reduce(
            ExtractIdsMapper(self._partner_id, self._score_id),
            yt_ops.unique_reducer,
            input_table,
            output_table,
            reduce_by=reduce_by,
            spec={'title': '[{}] Get unique ids from api requests'.format(TAG)}
        )


class IdToHashedIdConverter(TableConverter):
    def __init__(self, yt_client):
        super(IdToHashedIdConverter, self).__init__(yt_client)
        self._output_table_schema = [
            {'name': 'partner_id', 'type': 'string'},
            {'name': 'score_id', 'type': 'string'},
            {'name': 'id_value', 'type': 'string'},
            {'name': 'id_type', 'type': 'string'},
            {'name': 'stability_segments', 'type': 'any'}
        ]

    def _convert(self, input_table, output_table):
        output_table = yt_wrapper.TablePath(output_table, schema=self._output_table_schema)
        self._yt_client.run_map(
            hash_id_mapper,
            input_table,
            output_table,
            spec={'title': '[{}] Apply hash'.format(TAG)}
        )
        self._yt_client.run_sort(
            output_table,
            sort_by='id_value',
            spec={'title': '[{}] Sort id_value table'.format(TAG)}
        )


class HashedIdToCidConverter(TableConverter):
    def __init__(
            self,
            yt_client,
            ext_email_id_to_cid_table=yt_wrapper.ypath_join(CRYPTA_ROOT_FOLDER, 'crypta_db_last/email_id_value_to_cid'),
            ext_phone_id_to_cid_table=yt_wrapper.ypath_join(CRYPTA_ROOT_FOLDER, 'crypta_db_last/phone_id_value_to_cid')):
        super(HashedIdToCidConverter, self).__init__(yt_client)
        self._email_id_to_cid_table = ext_email_id_to_cid_table
        self._phone_id_to_cid_table = ext_phone_id_to_cid_table
        self._output_table_schema = [
            {'name': 'partner_id', 'type': 'string'},
            {'name': 'score_id', 'type': 'string'},
            {'name': 'cid', 'type': 'string'},
            {'name': 'stability_segments', 'type': 'any'}
        ]

    def _convert(self, input_table, output_table):
        # with self.yt_client.Transaction():  # TODO: Return transaction
            output_table = yt_wrapper.TablePath(output_table, schema=self._output_table_schema)
            self._yt_client.run_reduce(
                match_hashed_ids,
                [
                    input_table,
                    self._email_id_to_cid_table,
                    self._phone_id_to_cid_table
                ],
                output_table,
                reduce_by='id_value',
                spec={'title': '[{}] Get interesting cids'.format(TAG)}
            )
            self._yt_client.run_sort(
                output_table,
                sort_by='cid'
            )


class CidToFeaturesConverter(TableConverter):
    def __init__(self, yt_client, features_table):
        super(CidToFeaturesConverter, self).__init__(yt_client)
        self._features_table = features_table
        self._output_table_schema = [
            {'name': 'partner_id', 'type': 'string'},
            {'name': 'score_id', 'type': 'string'},
            {'name': 'features', 'type': 'string'},
            {'name': 'stability_segments', 'type': 'any'}
        ]

    def _convert(self, input_table, output_table):
        output_table = yt_wrapper.TablePath(output_table, schema=self._output_table_schema)
        # TODO: Add transaction
        self._yt_client.run_reduce(
            interesting_feature_extractor_reducer,
            [
                input_table,
                self._features_table
            ],
            output_table,
            reduce_by='cid',
            spec={'title': '[{}] Extract interesting features'.format(TAG)}
        )
        self._yt_client.run_merge(
            output_table,
            output_table,
            spec={
                'title': '[{}] Merge interesting features'.format(TAG),
                'combine_chunks': True
            }
        )
        self._yt_client.run_sort(
            output_table,
            sort_by=['partner_id', 'score_id'],
            spec={'title': '[{}] Sort interesting features'.format(TAG)}
        )


class RequestsToFeaturesConverter(TableConverter):
    def __init__(self, yt_client, stability_record):
        """

        :param yt_client: [description]
        :type yt_client: [type]
        :param stability_rec: datacloud.stability_record.StabilityRecord
        :type stability_rec: [type]
        """
        super(RequestsToFeaturesConverter, self).__init__(yt_client)
        self._stability_record = stability_record
        # TODO: Add schema to output_table
        self._output_table_schema = [

        ]

    def _convert(self, input_table, output_table):
        # with yt_client.Transaction():  # TODO: Use transaction
        logs_to_request_id = LogsToRequestIdConverter(self._yt_client, self._stability_record.partner_id, self._stability_record.score_id)
        id_to_hashed_id = IdToHashedIdConverter(self._yt_client)
        hashed_id_to_cid = HashedIdToCidConverter(self._yt_client)
        cid_to_features = CidToFeaturesConverter(self._yt_client, self._stability_record.features_table)

        transformer = logs_to_request_id | id_to_hashed_id | hashed_id_to_cid | cid_to_features
        transformer(input_table, output_table)


if __name__ == '__main__':
    from datacloud.config.yt import YT_PROXY
    yt_wrapper.config.set_proxy(YT_PROXY)
    requests_table = '//home/x-products/production/services/logs/datacloud_score_api/production/events/history/2019-01-20'
    output_table = '//tmp/r9/result-table'
    features_table = '//home/x-products/production/datacloud/models/tcs/tcs_xprod_868_20180330/current'  # ['//home/x-products/production/datacloud/models/tcs/tcs_xprod_868_20180330/current']
    yt_client = yt_utils.get_yt_client()

    from datacloud.stability.stability_record import StabilityRecord
    stability_rec = StabilityRecord('tcs', 'tcs_xprod_987_20181212', features_table)

    requests_to_features = RequestsToFeaturesConverter(yt_client, stability_rec)
    requests_to_features(requests_table, output_table)
