from ads.bsyeti.libs.log_protos import crypta_profile_pb2
import yt.wrapper as yt

from crypta.lib.python import time_utils
from crypta.lib.python.yt import (
    schema_utils,
    yt_helpers,
)
from crypta.siberia.bin.custom_audience.to_bigb_collector.proto import collector_pb2


def get_collector_schema():
    return schema_utils.get_schema_from_proto(collector_pb2.TCollector)


class ToBigbCollectorReducer:
    def __init__(self, timestamp, id_type, keyword_id):
        self.timestamp = timestamp
        self.id_type = id_type
        self.keyword_id = keyword_id

    def __call__(self, key, rows):
        profile = crypta_profile_pb2.TCryptaLog(timestamp=self.timestamp, **{self.id_type: key['id']})
        values = profile.items.add(keyword_id=self.keyword_id).uint_values
        values.extend(row['cluster_id'] for row in rows)

        if values:
            yield {'value': profile.SerializeToString()}


def run(config, logger):
    yt_client = yt_helpers.get_yt_client(config.Yt.Proxy, config.Yt.Pool)

    with yt_client.Transaction():
        tables = list(yt_client.list(config.SourceDir, absolute=True))

        if not tables:
            raise Exception("Missing tables")

        latest_table = max(tables)
        collector_table = yt.ypath_join(config.CollectorDir, f"{time_utils.get_current_moscow_datetime().isoformat()}-{config.TableSuffix}")

        timestamp = time_utils.get_current_time()

        yt_client.create_table(collector_table, attributes={"schema": get_collector_schema()}, recursive=True)
        logger.info("Schema: %s", yt_client.get_attribute(collector_table, "schema"))

        yt_client.run_reduce(
            ToBigbCollectorReducer(timestamp, config.IdType, config.KeywordId),
            latest_table,
            collector_table,
            reduce_by=['id'],
        )

        yt_helpers.set_yql_proto_field(collector_table, 'value', crypta_profile_pb2.TCryptaLog, yt_client)
