# -*- coding: utf-8 -*-
import os
import sys
from library.python.nyt import client as nyt_client
# nyt_client.initialize(sys.argv)
nyt_client.initialize(list(map(lambda it: it.encode(), sys.argv)))

import yt.wrapper as yt_wrapper
from datacloud.dev_utils.logging.logger import get_basic_logger
from datacloud.features.dssm.fast_dssm45 import fast_dssm_step5


logger = get_basic_logger(__name__)


def run_get_max_features(path_config, yt_client):
    with yt_wrapper.Transaction():
        yt_token = yt_wrapper.config['token'] or os.environ.get('YT_TOKEN')
        assert yt_token, '[FAST STEP 5] No YT_TOKEN provided'

        with yt_client.TempTable('//tmp', prefix='xprod-dssm-sorted') as tmp_sorted_dssm:
            logger.info('[FAST STEP 5] Start fast max reducer')
            fast_dssm_step5(
                yt_token,
                yt_client.config['proxy']['url'],
                str(path_config.id2_dot_tmp),
                tmp_sorted_dssm)
            logger.info('[FAST STEP 5] Done fast max reducer')

            # Hack to rename column during sort operation
            rename_columns = '<rename_columns={{key={ext_id}}}>'.format(
                ext_id=path_config.ext_id_key)
            yt_client.run_sort(
                rename_columns + tmp_sorted_dssm,
                path_config.result_dssm_table,
                sort_by=path_config.ext_id_key,
                spec=dict(
                    title='[{}] Sort dssm table'.format(path_config.tag),
                    **path_config.cloud_nodes_spec))
            yt_client.run_merge(
                path_config.result_dssm_table,
                path_config.result_dssm_table,
                spec=dict(
                    title='[{}] Merge chunks for dssm'.format(path_config.tag),
                    combine_chunks=True,
                    **path_config.cloud_nodes_spec))
        yt_client.remove(path_config.id2_dot_tmp)
