from collections import defaultdict
import datetime
import logging
import os

import numpy as np
import yt.yson as yson

from crypta.audience.proto.tables_pb2 import (
    TGeneralStorageOutput,
    TStorageOutput,
)
from crypta.lib.proto.identifiers import id_type_pb2
from crypta.lib.proto.user_data import (
    user_data_pb2,
    user_data_stats_pb2,
)
from crypta.lib.python import time_utils
import crypta.lib.python.bt.conf.conf as conf
from crypta.lib.python.bt.yt import (
    AbstractDynamicStorage,
)
from crypta.lib.python.native_yt.proto import (
    create_schema,
    get_column_name,
)
from crypta.lib.python.yt import yt_helpers

logger = logging.getLogger(__name__)


class InvalidMatchingParameters(Exception):
    pass


class Attributes(object):
    ROW_COUNT = 'row_count'
    CREATION_TIME = 'creation_time'
    MODIFICATION_TIME = 'modification_time'
    SCHEMA = 'schema'
    COMPRESSION_CODEC = 'compression_codec'
    LOCK_COUNT = "lock_count"
    TYPE = 'type'
    OPTIMIZE_FOR = 'optimize_for'


class Types(object):
    STRING = 'string'
    INT64 = 'int64'
    UINT64 = 'uint64'
    DOUBLE = 'double'
    ANY = 'any'


class Input(object):

    class Attributes(object):
        STATUS = 'crypta_status'
        SEGMENT_ID = 'segment_id'
        SEGMENT_PRIORITY = 'segment_priority'
        ID_TYPE = 'crypta_id_type'
        MATCHING_TYPE = 'crypta_matching_type'
        DEVICE_MATCHING_TYPE = 'crypta_dev_matching_type'
        CRYPTA_RELATED_GOALS = 'crypta_related_goals'
        CRYPTA_SEGMENT_INFO = 'crypta_segment_info'
        SOURCE_ID = 'source_id'

    class Fields(object):
        ID_VALUE = 'id_value'


class InputBatch(object):

    class Attributes(object):
        PRIORITY = 'priority'
        IN_PROCESSING = 'in_processing'

    class Meta(object):
        META = 'crypta_meta'
        AUDIENCE_SEGMENT_ID = 'audience_segment_id'
        SEGMENTS = 'segments'
        RELATED_GOALS = 'related_goals'
        MATCHING_TABLE = 'matching_table'
        SEGMENT_INFO = 'crypta_segment_info'
        ID_TYPE = 'id_type'
        DEVICE_MATCHING_TYPE = 'dev_matching_type'
        TARGET_TYPE = 'target_type'

    class Fields(object):
        SEGMENT_ID = 'segment_id'
        EXTERNAL_ID = 'external_id'

    @staticmethod
    def meta(path):
        return os.path.join(path, 'Meta')

    @staticmethod
    def users(path):
        return os.path.join(path, 'Users')

    @staticmethod
    def meta_users(path):
        return InputBatch.meta(path), InputBatch.users(path)


class GeoBatch(object):
    class Fields(object):
        SEGMENT_ID = 'segment_id'
        USER_ID = 'user_id'


class OutputBatch(object):
    class Fields(object):
        SEGMENT_ID = 'segment_id'
        STATS = 'meta'

    @staticmethod
    def meta(path):
        return os.path.join(path, 'Meta')

    @staticmethod
    def record(segment_id, stats):
        return {
            OutputBatch.Fields.SEGMENT_ID: segment_id,
            OutputBatch.Fields.STATS: stats
        }


class Output(object):

    CRYPTA_INTERESTS = [
        user_data_pb2.TSegment(ID=id, Keyword=keyword)
        for (keyword, id) in (
            (217, 13), (217, 15), (217, 16), (217, 17), (217, 23),
            (217, 44), (217, 56), (217, 71), (217, 141), (217, 405),
        )
    ]
    CRYPTA_SEGMENTS = [
        user_data_pb2.TSegment(ID=id, Keyword=keyword)
        for (keyword, id) in (
            (216, 36), (216, 37), (217, 90), (217, 88), (217, 421),
            (216, 519), (217, 269), (217, 272), (217, 188), (217, 566),
        )
    ]

    class Attributes(object):
        STATUS = 'crypta_status'
        ERROR = 'crypta_error'
        COMMUNALITY = 'crypta_communality'
        COVARIANCE_LOG_DET = 'crypta_covariance_log_det'
        COVARIANCE_TRACE = 'crypta_covariance_trace'
        INTERESTS_AFFINITY = 'crypta_interests_affinity'
        SEGMENTS_AFFINITY = 'crypta_segments_affinity'
        CRYPTA_RELATED_GOALS_SIMILARITY = 'crypta_related_goals_similarity'
        OVERALL_STATS = 'crypta_stats'
        SEGMENT_ID = 'segment_id'

    class Fields(object):
        YUID = 'yuid'
        ID_VALUE = 'id_value'
        SEND = 'send'
        SCORE = 'score'
        ALL = [YUID, ID_VALUE, SEND, SCORE]

    class Statuses(object):
        FAILED = 'failed'
        DONE = 'done'
        NEW = 'new'

    class Stats(object):
        SEX = 'sex'
        AGE = 'age'
        REGION = 'region'
        DEVICE = 'device'
        UNIQ_ID_VALUE = 'uniq_id_value'
        UNIQ_YUID = 'uniq_yuid'

    SCHEMA = [
        dict(name=Fields.YUID, type=Types.STRING),
        dict(name=Fields.ID_VALUE, type=Types.STRING),
        dict(name=Fields.SEND, type=Types.INT64),
        dict(name=Fields.SCORE, type=Types.DOUBLE),
    ]


class StorageOutput(object):
    SCHEMA = create_schema(TStorageOutput)

    REDUCE_BY = [
        get_column_name(field)
        for field in TStorageOutput.DESCRIPTOR.fields
        if field.name != 'Timestamp'
    ]


class GeneralStorageOutput(object):
    SCHEMA = create_schema(TGeneralStorageOutput)


class Matching(object):

    YUID = 'yuid'
    IDFA_GAID = 'idfa_gaid'
    EMAIL = 'email'
    PHONE = 'phone'
    MAC = 'mac'
    # virtual type for emails and phones
    CRM = 'crm'

    CRYPTA_ID = 'crypta_id'
    PUID = 'puid'

    TARGET_TYPE_TO_ID_TYPE = {
        CRYPTA_ID: id_type_pb2.EIdType.CRYPTA_ID,
        YUID: id_type_pb2.EIdType.YANDEXUID,
        PUID: id_type_pb2.EIdType.PUID,
    }

    class Fields(object):
        ID_VALUE = 'id_value'
        ID_TYPE = 'id_type'
        YUID = 'yuid'

    @staticmethod
    def get(matching_type, id_type, device_matching_type):
        # maybe it's better just to matchinghash.get(id_type, None)
        if id_type in {Matching.YUID, Matching.CRYPTA_ID, Matching.PUID}:
            return None

        if id_type in {Matching.MAC, Matching.EMAIL, Matching.PHONE, Matching.CRM}:
            return conf.paths.audience.matching.by_id_value

        if id_type == Matching.IDFA_GAID:   # should we keep this logic?
            if not device_matching_type:
                raise InvalidMatchingParameters(
                    '%s attr is %s, but no %s attr' % (Input.Fields.ID_TYPE, id_type, Input.Fields.DEVICE_MATCHING_TYPE)
                )
            return conf.paths.audience.matching.by_id_value

        raise InvalidMatchingParameters('Unknown %s: %s' % (Input.Attributes.ID_TYPE, id_type))


class RecordBase(dict):
    def _get_proto(self, proto_cls, key):
        proto = proto_cls()
        string_value = self.get(key, "")
        if not string_value:
            return proto
        proto.ParseFromString(string_value)
        return proto


class StatsStorage(AbstractDynamicStorage):

    class Fields(object):
        HASH = 'Hash'
        SEGMENT_ID = 'SegmentID'
        SEGMENT_TYPE = 'SegmentType'
        TIMESTAMP = 'Timestamp'
        STATS = 'Stats'

    @property
    def path(self):
        return conf.paths.audience.dynamic.stats

    def create(self, *args, **kwargs):
        super(StatsStorage, self).create(*args, **kwargs)
        self.set_yql_proto_attributes()

    @property
    def schema(self):
        schema = yson.to_yson_type(
            [
                dict(
                    name=StatsStorage.Fields.HASH,
                    type=Types.UINT64,
                    expression='farm_hash(({}, {}))'.format(StatsStorage.Fields.SEGMENT_ID, StatsStorage.Fields.SEGMENT_TYPE),
                    sort_order='ascending'
                ),
                dict(name=StatsStorage.Fields.SEGMENT_ID, type=Types.INT64, sort_order='ascending'),
                dict(name=StatsStorage.Fields.SEGMENT_TYPE, type=Types.STRING, sort_order='ascending'),
                dict(name=StatsStorage.Fields.TIMESTAMP, type=Types.UINT64),
                dict(name=StatsStorage.Fields.STATS, type=Types.STRING)
            ],
            attributes=dict(unique_keys=True),
        )
        return schema

    @staticmethod
    def record(segment_id, segment_type, segment_stats, timestamp=None):
        if timestamp is None:
            timestamp = time_utils.get_current_time()
        record = {
            StatsStorage.Fields.SEGMENT_ID: segment_id,
            StatsStorage.Fields.SEGMENT_TYPE: segment_type,
            StatsStorage.Fields.STATS: segment_stats,
            StatsStorage.Fields.TIMESTAMP: timestamp
        }
        return record

    def set_yql_proto_attributes(self):
        yt_helpers.set_yql_proto_field(self.path, StatsStorage.Fields.STATS, user_data_stats_pb2.TUserDataStats, self.yt)


def _quantile(numbers, q, _sorted=False):
    if numbers:
        count = len(numbers)
        if count == 1:
            return numbers[0]

        sorted_numbers = numbers if _sorted else sorted(numbers)
        q_index = min(count-1, int(q*count))
        return sorted_numbers[q_index]
    else:
        return 0.0


def get_ts_minus_days(days):
    return time_utils.get_current_time() - int(datetime.timedelta(days=days).total_seconds())


class SegmentStateStorage(AbstractDynamicStorage):

    class Fields(object):
        HASH = 'Hash'
        SEGMENT_ID = 'SegmentID'
        SEGMENT_TYPE = 'SegmentType'
        INPUT_TIMESTAMP = 'InputTimestamp'
        OUTPUT_TIMESTAMP = 'OutputTimestamp'
        INPUT_ROWS = 'InputRows'
        OUTPUT_ROWS = 'OutputRows'

    def create(self, optimize_for='scan', in_memory_mode='uncompressed', **kwargs):
        super(SegmentStateStorage, self).create(optimize_for=optimize_for, in_memory_mode=in_memory_mode, **kwargs)

    def insert_rows(self, rows, update=True, with_preparing=True, **kwargs):
        super(SegmentStateStorage, self).insert_rows(rows, update=update, with_preparing=with_preparing, **kwargs)

    @property
    def schema(self):
        schema = yson.to_yson_type(
            [
                dict(
                    name=SegmentStateStorage.Fields.HASH,
                    type=Types.UINT64,
                    expression='farm_hash(({}, {}))'.format(SegmentStateStorage.Fields.SEGMENT_ID, SegmentStateStorage.Fields.SEGMENT_TYPE),
                    sort_order='ascending'
                ),
                dict(name=SegmentStateStorage.Fields.SEGMENT_ID, type=Types.INT64, sort_order='ascending'),
                dict(name=SegmentStateStorage.Fields.SEGMENT_TYPE, type=Types.STRING, sort_order='ascending'),
                dict(name=SegmentStateStorage.Fields.INPUT_TIMESTAMP, type=Types.UINT64),
                dict(name=SegmentStateStorage.Fields.OUTPUT_TIMESTAMP, type=Types.UINT64),
                dict(name=SegmentStateStorage.Fields.INPUT_ROWS, type=Types.UINT64),
                dict(name=SegmentStateStorage.Fields.OUTPUT_ROWS, type=Types.UINT64),
            ],
            attributes=dict(unique_keys=True),
        )
        return schema

    @staticmethod
    def record(segment_id, segment_type, is_input, row_count, timestamp=None):
        if timestamp is None:
            timestamp = time_utils.get_current_time()
        record = {
            SegmentStateStorage.Fields.SEGMENT_ID: segment_id,
            SegmentStateStorage.Fields.SEGMENT_TYPE: segment_type
        }
        if is_input:
            record[SegmentStateStorage.Fields.INPUT_TIMESTAMP] = timestamp
            record[SegmentStateStorage.Fields.INPUT_ROWS] = row_count
            record[SegmentStateStorage.Fields.OUTPUT_TIMESTAMP] = 0
            if row_count == 0:
                record[SegmentStateStorage.Fields.OUTPUT_ROWS] = 0
                record[SegmentStateStorage.Fields.OUTPUT_TIMESTAMP] = timestamp
        else:
            record[SegmentStateStorage.Fields.OUTPUT_TIMESTAMP] = timestamp
            record[SegmentStateStorage.Fields.OUTPUT_ROWS] = row_count
        return record

    def get_processing_time_q(self, days=1, quantiles=(0.95,), segment_type=None):
        first_timestamp = get_ts_minus_days(days)
        query = "{output_timestamp}-{input_timestamp} as t FROM [{path}] WHERE {input_timestamp} > {ts} and {output_timestamp} > {ts}".format(
            path=self.path,
            input_timestamp=SegmentStateStorage.Fields.INPUT_TIMESTAMP,
            output_timestamp=SegmentStateStorage.Fields.OUTPUT_TIMESTAMP,
            ts=first_timestamp
        )
        if segment_type:
            query += " AND {segment_type} = \"{segment_type_value}\"".format(
                segment_type=SegmentStateStorage.Fields.SEGMENT_TYPE,
                segment_type_value=segment_type,
            )
        records = self.yt.select_rows(query)
        values = sorted([record['t'] for record in records])
        return {q: _quantile(values, q=q, _sorted=True) for q in quantiles}

    def get_unfinished_processing_time_q(self, quantiles=(0.95,), segment_type=None, days=30):
        first_timestamp = get_ts_minus_days(days)
        query = "{now}-{input_timestamp} as t FROM [{path}] WHERE {input_timestamp} > {ts} and {output_timestamp} = 0".format(
            path=self.path,
            input_timestamp=SegmentStateStorage.Fields.INPUT_TIMESTAMP,
            output_timestamp=SegmentStateStorage.Fields.OUTPUT_TIMESTAMP,
            now=time_utils.get_current_time(),
            ts=first_timestamp,
        )
        if segment_type:
            query += ' AND {segment_type} = \"{segment_type_value}\"'.format(
                segment_type=SegmentStateStorage.Fields.SEGMENT_TYPE,
                segment_type_value=segment_type,
            )
        records = self.yt.select_rows(query)
        values = sorted([record['t'] for record in records])
        return {q: _quantile(values, q=q, _sorted=True) for q in quantiles}


class RegularSegmentStateStorage(SegmentStateStorage):
    @property
    def path(self):
        return conf.paths.audience.dynamic.states.regular


class LookalikeSegmentStateStorage(SegmentStateStorage):
    @property
    def path(self):
        return conf.paths.audience.dynamic.states.lookalike


class LookalikeStatsStorage(AbstractDynamicStorage):
    class Fields(object):
        HASH = 'Hash'
        BATCH_ID = 'BatchID'
        TIMESTAMP = 'Timestamp'
        SUM_PERCENTS_BY_INPUT_TIME = 'SumPercentsByInputTime'
        MAX_PERCENTS_BY_INPUT_TIME = 'MaxPercentsByInputTime'
        MIN_PERCENTS_BY_INPUT_TIME = 'MinPercentsByInputTime'
        SUM_PERCENTS_BY_OUTPUT_TIME = 'SumPercentsByOutputTime'
        MAX_PERCENTS_BY_OUTPUT_TIME = 'MaxPercentsByOutputTime'
        MIN_PERCENTS_BY_OUTPUT_TIME = 'MinPercentsByOutputTime'
        COUNT = 'Count'

    @property
    def path(self):
        return conf.paths.audience.dynamic.lookalike_stats

    @property
    def schema(self):
        schema = yson.to_yson_type(
            [
                dict(
                    name=LookalikeStatsStorage.Fields.HASH,
                    type=Types.UINT64,
                    expression='farm_hash(({}))'.format(LookalikeStatsStorage.Fields.BATCH_ID),
                    sort_order='ascending'
                ),
                dict(name=LookalikeStatsStorage.Fields.BATCH_ID, type=Types.STRING, sort_order='ascending'),
                dict(name=LookalikeStatsStorage.Fields.TIMESTAMP, type=Types.UINT64),
                dict(name=LookalikeStatsStorage.Fields.COUNT, type=Types.UINT64),
                dict(name=LookalikeStatsStorage.Fields.SUM_PERCENTS_BY_INPUT_TIME, type=Types.UINT64),
                dict(name=LookalikeStatsStorage.Fields.MAX_PERCENTS_BY_INPUT_TIME, type=Types.UINT64),
                dict(name=LookalikeStatsStorage.Fields.MIN_PERCENTS_BY_INPUT_TIME, type=Types.UINT64),
                dict(name=LookalikeStatsStorage.Fields.SUM_PERCENTS_BY_OUTPUT_TIME, type=Types.UINT64),
                dict(name=LookalikeStatsStorage.Fields.MAX_PERCENTS_BY_OUTPUT_TIME, type=Types.UINT64),
                dict(name=LookalikeStatsStorage.Fields.MIN_PERCENTS_BY_OUTPUT_TIME, type=Types.UINT64),
            ],
            attributes=dict(unique_keys=True),
        )
        return schema

    def create(self, optimize_for='scan', in_memory_mode='uncompressed', **kwargs):
        super(LookalikeStatsStorage, self).create(optimize_for=optimize_for, in_memory_mode=in_memory_mode, **kwargs)

    def record(self, batch_id, stats, timestamp=None):
        if stats is None:
            return None
        filtered = stats['Metrics']['FilteredRecords']
        by_input = filtered['ByInputSize']['Rate']['$']['completed']['sorted_reduce']
        by_output = filtered['ByOutputSize']['Rate']['$']['completed']['sorted_reduce']

        if timestamp is None:
            timestamp = time_utils.get_current_time()
        record = {
            LookalikeStatsStorage.Fields.BATCH_ID: batch_id,
            LookalikeStatsStorage.Fields.TIMESTAMP: timestamp,
            LookalikeStatsStorage.Fields.SUM_PERCENTS_BY_INPUT_TIME: by_input['sum'],
            LookalikeStatsStorage.Fields.MAX_PERCENTS_BY_INPUT_TIME: by_input['max'],
            LookalikeStatsStorage.Fields.MIN_PERCENTS_BY_INPUT_TIME: by_input['min'],
            LookalikeStatsStorage.Fields.SUM_PERCENTS_BY_OUTPUT_TIME: by_output['sum'],
            LookalikeStatsStorage.Fields.MAX_PERCENTS_BY_OUTPUT_TIME: by_output['max'],
            LookalikeStatsStorage.Fields.MIN_PERCENTS_BY_OUTPUT_TIME: by_output['min'],
            LookalikeStatsStorage.Fields.COUNT: by_input['count'],
        }
        return record

    def get_rate(self, record, key, denominator=100.):
        return float(record.get(key)) / denominator

    def get_stats(self, days=1):
        if self.yt.exists(self.path):
            query = "* FROM [{path}]".format(path=self.path)
            if days is not None:
                first_timestamp = get_ts_minus_days(days)
                query += " WHERE {timestamp} > {ts}".format(
                    timestamp=LookalikeStatsStorage.Fields.TIMESTAMP,
                    ts=first_timestamp,
                )
            records = self.yt.select_rows(query)
            sum_rate_by_input_time = 0.
            sum_rate_by_output_time = 0.
            count = 0. + 1.
            max_rate_by_input_time = 0.
            max_rate_by_output_time = 0.
            for record in records:
                sum_rate_by_input_time += self.get_rate(record, LookalikeStatsStorage.Fields.SUM_PERCENTS_BY_INPUT_TIME)
                sum_rate_by_output_time += self.get_rate(record, LookalikeStatsStorage.Fields.SUM_PERCENTS_BY_OUTPUT_TIME)
                count += record[LookalikeStatsStorage.Fields.COUNT]
                max_rate_by_input_time = max(max_rate_by_input_time, self.get_rate(record, LookalikeStatsStorage.Fields.MAX_PERCENTS_BY_INPUT_TIME))
                max_rate_by_output_time = max(max_rate_by_output_time, self.get_rate(record, LookalikeStatsStorage.Fields.MAX_PERCENTS_BY_OUTPUT_TIME))
            return float(sum_rate_by_input_time) / count, float(sum_rate_by_output_time) / count, max_rate_by_input_time, max_rate_by_output_time, count
        return 0, 0, 0, 0, 0

    def insert_row(self, *args, **kwargs):
        record = self.record(*args, **kwargs)
        if record:
            return self.insert_rows([record])


class SegmentsGoalsRelationStorage(AbstractDynamicStorage):

    class Fields(object):
        HASH = 'Hash'
        SEGMENT_ID = 'SegmentID'
        GOAL_ID = 'GoalID'
        COSINE = 'Cosine'

    def create(self, optimize_for='scan', in_memory_mode='uncompressed', **kwargs):
        super(SegmentsGoalsRelationStorage, self).create(optimize_for=optimize_for, in_memory_mode=in_memory_mode, **kwargs)

    @property
    def path(self):
        return conf.paths.audience.dynamic.goals_relation

    def insert_rows(self, rows, update=True, with_preparing=True, **kwargs):
        super(SegmentsGoalsRelationStorage, self).insert_rows(rows, update=update, with_preparing=with_preparing, **kwargs)

    @property
    def schema(self):
        schema = yson.to_yson_type(
            [
                dict(
                    name=self.Fields.HASH,
                    type=Types.UINT64,
                    expression='farm_hash(({},{}))'.format(self.Fields.SEGMENT_ID, self.Fields.GOAL_ID),
                    sort_order='ascending'
                ),
                dict(name=self.Fields.SEGMENT_ID, type=Types.UINT64, sort_order='ascending'),
                dict(name=self.Fields.GOAL_ID, type=Types.UINT64, sort_order='ascending'),
                dict(name=self.Fields.COSINE, type=Types.DOUBLE),
            ],
            attributes=dict(unique_keys=True),
        )
        return schema

    @staticmethod
    def record(segment_id, goal_id, cosine):
        record = {
            SegmentsGoalsRelationStorage.Fields.SEGMENT_ID: int(segment_id),
            SegmentsGoalsRelationStorage.Fields.GOAL_ID: int(goal_id),
            SegmentsGoalsRelationStorage.Fields.COSINE: cosine,
        }
        return record


class SegmentPropertiesStorage(AbstractDynamicStorage):
    COMMUNALITY_QUANTILES = "communality_quantiles"
    LAST_COMMUNALITY_QUANTILES_UPDATE_DATE = "last_communality_quantiles_update_data"

    class Fields(object):
        HASH = 'Hash'
        SEGMENT_ID = 'SegmentID'
        SEGMENT_TYPE = 'SegmentType'
        TIMESTAMP = 'Timestamp'
        COMMUNALITY = 'Communality'

    def create(self, optimize_for='scan', in_memory_mode='uncompressed', **kwargs):
        super(SegmentPropertiesStorage, self).create(optimize_for=optimize_for, in_memory_mode=in_memory_mode, **kwargs)

    @property
    def path(self):
        return conf.paths.audience.dynamic.properties

    def insert_rows(self, rows, update=True, with_preparing=True, **kwargs):
        super(SegmentPropertiesStorage, self).insert_rows(rows, update=update, with_preparing=with_preparing, **kwargs)

    @property
    def schema(self):
        return self.SCHEMA

    SCHEMA = yson.to_yson_type(
        [
            dict(
                name=Fields.HASH,
                type=Types.UINT64,
                expression='farm_hash(({}, {}))'.format(Fields.SEGMENT_ID, Fields.SEGMENT_TYPE),
                sort_order='ascending'
            ),
            dict(name=Fields.SEGMENT_TYPE, type=Types.STRING, sort_order='ascending'),
            dict(name=Fields.SEGMENT_ID, type=Types.INT64, sort_order='ascending'),
            dict(name=Fields.TIMESTAMP, type=Types.UINT64),
            dict(name=Fields.COMMUNALITY, type=Types.DOUBLE),

        ],
        attributes=dict(unique_keys=True),
    )

    @staticmethod
    def record(segment_id, segment_type, communality, timestamp=None):
        if timestamp is None:
            timestamp = time_utils.get_current_time()
        return {
            SegmentPropertiesStorage.Fields.SEGMENT_ID: segment_id,
            SegmentPropertiesStorage.Fields.SEGMENT_TYPE: segment_type,
            SegmentPropertiesStorage.Fields.COMMUNALITY: communality,
            SegmentPropertiesStorage.Fields.TIMESTAMP: timestamp,
        }

    def get_communality_quantiles(self):
        return self.yt.get_attribute(self.path, self.COMMUNALITY_QUANTILES, {})

    def compute_communality_quantiles(self, days=10., num_quantiles=10):
        if not self.yt.exists(self.path):
            return {}
        ts = get_ts_minus_days(days)

        communalities = defaultdict(list)
        for record in self.yt.read_table(self.path):
            segment_type = record[self.Fields.SEGMENT_TYPE]
            communality = record[self.Fields.COMMUNALITY]
            if record[self.Fields.TIMESTAMP] > ts and segment_type is not None and communality is not None:
                communalities[segment_type].append(communality)

        quantiles = dict()

        for segment_type, values in communalities.items():
            quantiles[segment_type] = list(np.percentile(values, range(0, 100, num_quantiles)))
        logger.info("Communality quantiles %s", quantiles)
        self.yt.set_attribute(self.path, self.COMMUNALITY_QUANTILES, quantiles)
        self.yt.set_attribute(self.path, self.LAST_COMMUNALITY_QUANTILES_UPDATE_DATE, str(time_utils.get_current_moscow_datetime().date()))
