# -*- coding: utf-8 -*-
from datetime import datetime, timedelta
from yt.wrapper import ypath_join, TablePath
from datacloud.config.yt import DATACLOUD_FOLDER, GREP_ROOT, CRYPTA_SNAPSHOT_FOLDER, AGGREGATES_FOLDER
from datacloud.dev_utils.time.patterns import FMT_DATE
from datacloud.dev_utils.yt import features


def date_before(date, lag):
    return datetime.strftime(datetime.strptime(date, FMT_DATE) - timedelta(days=lag), FMT_DATE)


class TimeHistBuildConfig(object):

    def __init__(
        self,
        root=None,
        tag='TIMEHIST',
        days_to_take=175,
        log_folders=('watch_log_tskv', 'spy_log'),
        use_cloud_nodes=False,
        is_retro=True,
        snapshot_date=None,
        min_retro_date=None,
        max_retro_date=None
    ):
        self.root = root
        self.tag = tag
        self.days_to_take = days_to_take
        self.use_cloud_nodes = use_cloud_nodes
        self.is_retro = is_retro

        if self.is_retro:
            self.ext_id_key = 'external_id'
            self.data_dir = ypath_join(root, 'datacloud/aggregates/time_hist')
            self.input_yuid = ypath_join(root, 'input_yuid')
            self.grep_root = ypath_join(root, 'datacloud/grep')
            self.max_log_date = max_retro_date
            self.min_log_date = date_before(min_retro_date, self.days_to_take)
        else:
            self.ext_id_key = 'cid'
            self.snapshot_date = snapshot_date
            self.input_yuid = ypath_join(CRYPTA_SNAPSHOT_FOLDER, self.snapshot_date, 'yuid_to_cid')
            self.data_dir = ypath_join(AGGREGATES_FOLDER, 'time_hist', snapshot_date)
            self.grep_root = GREP_ROOT
            self.tmp_grep_table = ypath_join(self.data_dir, 'log')
            self.max_log_date = self.snapshot_date
            self.min_log_date = date_before(self.snapshot_date, self.days_to_take)

        compression_params = {
            'compression_codec': 'brotli_3',
            'optimize_for': 'scan',
        }

        self.histogram_table = TablePath(
            ypath_join(self.data_dir, 'time_histograms'),
            attributes=dict(
                schema=[
                    {'name': self.ext_id_key, 'type': 'string'},
                    {'name': 'hist_activity_count', 'type': 'any'},
                    {'name': 'total_activity_days', 'type': 'any'},
                    {'name': 'hist_activity_rate', 'type': 'any'},
                    {'name': 'timezone_name', 'type': 'string'},
                ],
                **compression_params
            )
        )
        self.timezones_table = TablePath(
            ypath_join(self.data_dir, 'timezone'),
            attributes=dict(
                schema=[
                    {'name': self.ext_id_key, 'type': 'string'},
                    {'name': 'user_region', 'type': 'int32'},
                    {'name': 'timezone', 'type': 'string'},
                ],
                **compression_params
            )
        )
        self.features_table = TablePath(
            ypath_join(self.data_dir, 'features'),
            attributes=dict(
                schema=[
                    {'name': self.ext_id_key, 'type': 'string'},
                    {'name': 'features', 'type': 'string'},
                ],
                **compression_params
            )
        )

        self.region_dir = ypath_join(GREP_ROOT, 'region_log')
        self.holidays_table = ypath_join(DATACLOUD_FOLDER, 'holidays')
        self.log_folders = log_folders

    @property
    def cloud_nodes_spec(self):
        return features.cloud_nodes_spec(self.use_cloud_nodes)

    def get_log_tables(self, yt_client):
        assert self.is_retro
        return [
            TablePath(table, columns=[self.ext_id_key, 'timestamp'])
            for log_folder in self.log_folders
            for table in yt_client.list(ypath_join(self.grep_root, log_folder), absolute=True)
        ]
