import time
from yt.wrapper import ypath_join, TablePath
from datacloud.config.yt import (
    PRODUCTION_ROOT, DATACLOUD_FOLDER, RELIABLE_TMP_FOLDER, CRYPTA_DB_LAST_FOLDER)
from datacloud.dev_utils.yt import features
from datacloud.dev_utils.logging.logger import get_basic_logger
from datacloud.features.cluster.cat2index import cat2index as default_cat2index

logger = get_basic_logger(__name__)


class PathConfig(object):

    def __init__(self,
                 root=PRODUCTION_ROOT,
                 date='current',
                 cat2index=None,
                 days_to_take=175,
                 is_retro=False,
                 retro_input_yuid_table=None,
                 retro_tag=None,
                 garbage_collect_on=True,
                 use_cloud_nodes=False,
                 yuid2cid_table=None,
                 res_user2clust_table=None):
        """
        Use retro_tag to differ your tmp folder from others
        """
        self.date = date
        self.is_retro = is_retro
        self.retro_tag = retro_tag
        if not self.retro_tag:
            if is_retro:
                self.retro_tag = str(time.time())
            else:
                self.retro_tag = '-PROD'
        self.cat2index = cat2index or default_cat2index
        self.days_to_take = days_to_take

        self.user2clust_tables_to_keep = 3
        self.url_tables_to_keep = 175

        # self.data_dir = data_dir or ypath_join(root, '/datacloud/aggregates/cluster')
        self.data_dir = ypath_join(root, '/datacloud/aggregates/cluster')
        self.s2v_dir = ypath_join(root, '/datacloud/aggregates/normed_s2v')
        self.tmp_dir = ypath_join(RELIABLE_TMP_FOLDER, 'prod-tmp/cluster', date + self.retro_tag)
        self.ready_dir = ypath_join(self.data_dir, 'ready')
        self.ready_table = ypath_join(self.ready_dir, 'features')
        self.normed_s2v_ready_table = ypath_join(self.s2v_dir, 'ready/features')

        self.garbage_collect_on = garbage_collect_on
        self.use_cloud_nodes = use_cloud_nodes

        self.extracted_urls_dir = ypath_join(self.data_dir, 'urls')

        # External tables
        self.external_cid2yuid_table = yuid2cid_table or ypath_join(CRYPTA_DB_LAST_FOLDER, 'yuid_to_cid')
        self.external_crypta_host_vectors_table = ypath_join(
            DATACLOUD_FOLDER, 'bins/site2vec_04.04.2017')
        self.external_logs_dir = ypath_join(root, '/datacloud/grep')
        self.yandex_catalog_hosts = ypath_join(DATACLOUD_FOLDER, 'bins/yandex_catalog_hosts_v1')
        self.cluster_centers_table = ypath_join(DATACLOUD_FOLDER, 'bins/yandex_catalog_cluster_centers_04.04.2018')

        # For retro computation
        self.tag = 'CLUSTER-FEATURES-' + self.date
        if self.is_retro:
            self.EXTERNAL_ID_KEY = 'external_id'
            self.EXTRACTED_URLS_KEY = self.EXTERNAL_ID_KEY
            self.tag += '-retro'
            self.all_users = ypath_join(root, 'input_yuid')
        else:
            self.EXTERNAL_ID_KEY = 'cid'
            self.EXTRACTED_URLS_KEY = 'yuid'
            self.tag += '-take-{}-days'.format(self.days_to_take)
            self.all_users = retro_input_yuid_table or ypath_join(self.tmp_dir, 'input_yuid')

        compression_params = {
            'compression_codec': 'brotli_3',
            'optimize_for': 'scan',
        }

        self.current_extracted_urls_table = TablePath(
            ypath_join(self.extracted_urls_dir, self.date),
            attributes=dict(
                schema=[
                    {'name': self.EXTRACTED_URLS_KEY, 'type': 'string'},
                    {'name': 'host', 'type': 'string'},
                    {'name': 'counter', 'type': 'int64'},
                    {'name': 'timestamp', 'type': 'int64'}
                ],
                **compression_params
            )
        )

        # result folders
        self.user2host_dir = ypath_join(self.data_dir, 'user2host')
        self.user2clust_dir = ypath_join(self.data_dir, 'user2clust')
        self.user2normed_s2v_weekly_dir = ypath_join(self.s2v_dir,  'weekly')

        self.res_user2host_features = TablePath(
            ypath_join(self.user2host_dir, self.date),
            attributes=dict(
                schema=[
                    {'name': 'key', 'type': 'string'},
                    {'name': 'features', 'type': 'string'}
                ],
                **compression_params
            )
        )
        user2clust_table = res_user2clust_table or ypath_join(self.user2clust_dir, self.date)
        self.res_user2clust_features = TablePath(
            user2clust_table,
            attributes=dict(
                schema=[
                    {'name': self.EXTERNAL_ID_KEY, 'type': 'string'},
                    {'name': 'features', 'type': 'string'}
                ],
                **compression_params
            )
        )
        self.res_user2normed_s2v_features = TablePath(
            ypath_join(self.user2normed_s2v_weekly_dir, self.date),
            attributes=dict(
                schema=[
                    {'name': self.EXTERNAL_ID_KEY, 'type': 'string'},
                    {'name': 'features', 'type': 'string'}
                ],
                **compression_params
            )
        )
        # tmp tables
        self.cid_bow_table = TablePath(
            ypath_join(self.tmp_dir, 'cid2bow'),
            attributes=dict(
                schema=[
                    {'name': 'cid', 'type': 'string'},
                    {'name': 'host', 'type': 'string'},
                    {'name': 'counter', 'type': 'int64'},
                    {'name': 'timestamp', 'type': 'int64'}
                ],
                **compression_params
            )
        )
        self.external_id2bow = TablePath(
            ypath_join(self.tmp_dir, 'external_id2bow'),
            attributes=dict(
                schema=[
                    {'name': 'key', 'type': 'string'},
                    {'name': 'host', 'type': 'string'},
                ],
                **compression_params
            )
        )

    def collect_garbage(self, yt_client):
        logger.info('Started garbage collect')
        if self.garbage_collect_on:
            for item in [self.tmp_dir, self.res_user2host_features]:
                if yt_client.exists(item):
                    yt_client.remove(item)
            logger.info('Garbage collected')
        else:
            logger.info('Garbage collect turned off!')

    @property
    def cloud_nodes_spec(self):
        return features.cloud_nodes_spec(self.use_cloud_nodes)
