# -*- coding: utf-8 -*-
from yt.wrapper import ypath_join, TablePath
from datacloud.config.yt import GREP_ROOT
from datacloud.dev_utils.yt import features
from datacloud.features.geo.constants import MAX_DISTANCES_IN_CATEGORY, DISTANCE_THRESH, ADDRS_TYPES, \
    FEATURES_FILLNA, DEFAULT_FEATURES_SORT_ORDER


class GeoBuildConfig(object):
    EXTERNAL_LOGS_DIR = '//home/user_identification/usergeo/production/state/regular'
    LOCAL_LOGS_DIR = ypath_join(GREP_ROOT, 'geo_log')

    def __init__(
            self,
            root,
            addresses_table=None,
            resolved_addrs=None,
            input_yuid_table=None,
            tag='GEO',
            min_date=None,
            max_date=None,
            max_distances_in_category=None,
            distance_thresh=None,
            addrs_types=None,
            features_fillna=None,
            features_sort_order=None,
            use_cloud_nodes=False):

        self.tag = tag
        self.min_date = min_date
        self.max_date = max_date
        self.max_distances_in_category = max_distances_in_category or MAX_DISTANCES_IN_CATEGORY
        self.distance_thresh = distance_thresh or DISTANCE_THRESH
        self.addrs_types = addrs_types or ADDRS_TYPES
        self.features_fillna = features_fillna or FEATURES_FILLNA
        self.use_cloud_nodes = use_cloud_nodes

        features_sort_order = features_sort_order or DEFAULT_FEATURES_SORT_ORDER
        assert features_sort_order in (1, -1), 'Sort order should be 1 or -1 corresponding to forward and backward'
        self.features_sort_order = features_sort_order

        self.root = root
        self.logs_dir = ypath_join(self.root, '/datacloud/grep/geo')
        self.data_dir = ypath_join(self.root, '/datacloud/aggregates/geo')
        self.addresses_table = addresses_table or ypath_join(self.root, 'raw_data', 'glued')
        self.resolved_addrs = resolved_addrs or ypath_join(self.data_dir, 'resolved_addrs')
        self.input_yuid_table = input_yuid_table or ypath_join(self.root, 'input_yuid')

        compression_options = {
            'compression_codec': 'brotli_3',
            'optimize_for': 'scan'
        }

        logs_schema = [
            {'name': 'external_id', 'type': 'string'},
            {'name': 'lon', 'type': 'double'},
            {'name': 'lat', 'type': 'double'},
            {'name': 'timestamp_of_log', 'type': 'int64'},
            {'name': 'original_timestamp', 'type': 'int64'},
        ]
        self.fetched_logs = TablePath(
            ypath_join(self.logs_dir, 'geo'),
            attributes=dict(
                schema=logs_schema,
                **compression_options
            )
        )
        self.filtered_logs1 = TablePath(
            ypath_join(self.data_dir, 'filtered_logs1'),
            attributes=dict(
                schema=logs_schema,
                **compression_options
            )
        )
        self.filtered_logs2 = TablePath(
            ypath_join(self.data_dir, 'filtered_logs2'),
            attributes=dict(
                schema=logs_schema,
                **compression_options
            )
        )

        self.resolved_addrs_table = TablePath(
            self.resolved_addrs,
            attributes=dict(
                schema=[
                    {'name': 'external_id', 'type': 'string'},
                    {'name': 'type', 'type': 'string'},
                    {'name': 'lon', 'type': 'double'},
                    {'name': 'lat', 'type': 'double'},
                ],
                **compression_options
            )
        )

        distances_schema = [
            {'name': 'external_id', 'type': 'string'},
            {'name': 'type', 'type': 'string'},
            {'name': 'distance', 'type': 'double'},
        ]
        self.distances_table = TablePath(
            ypath_join(self.data_dir, 'distances'),
            attributes=dict(
                schema=distances_schema,
                **compression_options
            )
        )
        self.distances_filtered = TablePath(
            ypath_join(self.data_dir, 'distances_filtered'),
            attributes=dict(
                schema=distances_schema,
                **compression_options
            )
        )

        self.features_flatten = TablePath(
            ypath_join(self.data_dir, 'features_flatten'),
            attributes=dict(
                schema=[
                    {'name': 'external_id', 'type': 'string'},
                    {'name': 'type', 'type': 'string'},
                    {'name': 'feature', 'type': 'double'},
                ],
                **compression_options
            )
        )

        features_schema = [
            {'name': 'external_id', 'type': 'string'},
            {'name': 'features', 'type': 'string'},
        ]
        self.features_table = TablePath(
            ypath_join(self.data_dir, 'features'),
            attributes=dict(
                schema=features_schema,
                **compression_options
            )
        )
        self.features_with_binary = TablePath(
            ypath_join(self.data_dir, 'features_geo'),
            attributes=dict(
                schema=features_schema,
                **compression_options
            )
        )

    @property
    def cloud_nodes_spec(self):
        return features.cloud_nodes_spec(self.use_cloud_nodes)
