import datetime
import json
import logging
import os
import tempfile
import uuid

from cached_property import cached_property
import concurrent.futures
from library.python.protobuf.json import proto2json
import library.python.resource as rs
import requests
import retry
import tvmauth
from yt.wrapper import ypath_join

from crypta.audience.lib.storage import segment_priorities
from crypta.audience.lib.tasks.base import YtTask, YQLTaskV1
from crypta.audience.lib.tasks.constants import CRYPTAID_SOURCEID
from crypta.audience.lib.tasks.native_operations import (
    TUpdateFullState,
)
from crypta.audience.proto import storage_pb2
from crypta.lib.proto.config import environment_pb2
from crypta.lib.python import (
    argparse_utils,
    getoptpb,
    templater,
    time_utils,
)
import crypta.lib.python.bt.conf.conf as conf
from crypta.lib.python.bt.workflow import IndependentTask, Parameter
from crypta.lib.python.sandbox.client import SandboxClient
from crypta.lib.python.solomon import reporter
import crypta.lib.python.tvm.helpers as tvm_helpers
import crypta.lib.python.yql.client as yql_client
from crypta.lib.python.yt import (
    schema_utils,
    yt_helpers,
)


logger = logging.getLogger(__name__)

DEFAULT_SEGMENT_PRIORITY = 500
MIN_SEGMENT_PRIORITY = 0
MAX_SEGMENT_PRIORITY = 10000
PROTO_ENV_TO_RELEASE = {
    environment_pb2.EEnvironment.ENV_DEVELOP: "unstable",
    environment_pb2.EEnvironment.ENV_TESTING: "testing",
    environment_pb2.EEnvironment.ENV_PRODUCTION: "stable",
}
PRIORITIES_RESOURCE_TYPE = "CRYPTA_AUDIENCE_SEGMENTS_PRIORITIES"
VINYL_PRIORITIES_RESOURCE_TYPE = "CRYPTA_AUDIENCE_SEGMENTS_PRIORITIES_VINYL"
BIGB_AB_EXPERIMENT_CONFIG_RESOURCE_TYPE = "BIGB_AB_EXPERIMENTS_PRODUCTION_CONFIG"
BS_YETI_LIGHT_CONFIGS = "BS_YETI_LIGHT_CONFIGS"


def _yql_tables_list(tables):
    return ',\n'.join(["`%s`" % t for t in tables])


def _yql_set(iterable):
    return '{' + ', '.join({"'%s'" % each for each in iterable}) + '}'


def _latest(yt, path, n=None):
    if not yt.exists(path):
        return []
    tables = yt.search(
        path,
        node_type='table',
        attributes=['modification_time']
    )
    tables = sorted(tables, key=lambda x: x.attributes['modification_time'])
    return tables[:n]


def _foreign(table):
    return '<foreign=%true>' + table


def _get_resp_tables_from_other_directory(new_dir, tables, filter_func=None):
    result = [os.path.join(new_dir, os.path.basename(table)) for table in tables]
    if filter_func:
        return list(filter(filter_func, result))
    return result


def get_or_create_segments_info_table(yt):
    path = conf.paths.storage.meta.segments_info
    if not yt.exists(path):
        yt.copy(conf.paths.audience.segments_simple, path, recursive=True, force=True)
    return path


def get_ts_path(yt, dirname, timestamp):
    if not yt.exists(dirname):
        yt.create('map_node', dirname, recursive=True)
    return ypath_join(dirname, datetime.datetime.fromtimestamp(timestamp).isoformat())


def get_export_id(id_):
    return id_ + 2*10**9 if id_ < 15e8 else id_


class UpdateLimited(YQLTaskV1, IndependentTask):

    @property
    def query_main(self):
        return '/query/update_limited.yql'

    @property
    def update_segments_info(self):
        return '/query/update_segments_info.yql'

    @cached_property
    def segments_info(self):
        return get_or_create_segments_info_table(self.yt)

    @property
    def query(self):
        # TODO(CRYPTA-15774) maybe remove this?
        queries = rs.find(self.update_segments_info).format(
            src_tables_list=_yql_tables_list(self.info_tables + [self.segments_info]),
            segments_simple=conf.paths.audience.segments_simple,
            segments_info=self.segments_info
        )

        data_for_full_storage = os.path.join(conf.paths.storage.for_full, str(self.timestamp))
        data_for_cryptaid_storage = os.path.join(conf.paths.storage.crypta_id_queue, str(self.timestamp))

        queries += rs.find(self.query_main).format(
            src_tables_list=_yql_tables_list(self.queue_tables),
            segments_simple=self.segments_info,
            cryptaid_sourceid=_yql_set(CRYPTAID_SOURCEID),
            data_for_full_storage=data_for_full_storage,
            data_for_cryptaid_storage=data_for_cryptaid_storage,
        )

        return queries

    @cached_property
    def timestamp(self):
        return time_utils.get_current_time()

    @cached_property
    def queue_tables(self):
        return _latest(self.yt, conf.paths.storage.queue, n=conf.proto.Options.Storage.QueueBatchSize)

    @cached_property
    def info_tables(self):
        return self.yt.list(conf.paths.storage.queue_segments_info, max_size=1000, absolute=True)

    def run(self, **kwargs):
        if not self.queue_tables:
            return

        super(UpdateLimited, self).run(**kwargs)

        for t in self.queue_tables + self.info_tables:
            self.yt.remove(t)


class UpdateFullBase(YQLTaskV1, YtTask, IndependentTask):

    overwrite_all = Parameter(parse=argparse_utils.boolean_as_string, default="false")

    @cached_property
    def timestamp(self):
        return time_utils.get_current_time()

    @cached_property
    def source_tables(self):
        return _latest(self.yt, self.source_dir, n=conf.proto.Options.Storage.QueueBatchSize)

    @cached_property
    def segments_info(self):
        return get_or_create_segments_info_table(self.yt)

    @cached_property
    def to_update(self):
        return '//tmp/crypta-audience/ToUpdate-{}'.format(uuid.uuid1())

    @cached_property
    def to_update_meta(self):
        return '//tmp/crypta-audience/ToUpdateMeta-{}'.format(uuid.uuid1())

    @cached_property
    def to_bigb_table(self):
        return get_ts_path(self.yt, self.to_bigb_dir, self.timestamp)

    @cached_property
    def sample_log_table(self):
        return get_ts_path(self.yt, self.sample_log_dir, self.timestamp)

    @cached_property
    def sample_collector_table(self):
        return get_ts_path(self.yt, self.sample_collector_dir, self.timestamp)

    @cached_property
    def resource_dir(self):
        path = '//tmp/crypta-audience/Resources-{}'.format(uuid.uuid1())
        self.yt.create("map_node", path)
        return path

    def create_storage_table(self, path):
        if not self.yt.exists(path):
            self.yt.create(
                'table',
                path,
                ignore_existing=True,
                attributes={
                    'schema': self.storage_schema,
                    'optimize_for': 'scan',
                },
            )

    @property
    def sort_by(self):
        return 'Id', 'IdType'

    @property
    def ids_meta_schema(self):
        return schema_utils.get_schema_from_proto(storage_pb2.TIdMeta, self.sort_by)

    def ensure_ids_meta_exists(self):
        if not self.yt.exists(self.ids_meta):
            self.yt.create(
                'table',
                self.ids_meta,
                attributes={
                    'schema': self.ids_meta_schema,
                    'optimize_for': 'scan',
                }
            )

    def fill_segment_state(self, state, table_path, updated):
        for row in self.yt.read_table(table_path):
            segment = state.Segments[row["SegmentID"]]
            segment.Timestamp = row["Timestamp"]
            segment.RemovedOnTTL = (self.timestamp - segment.Timestamp) > conf.proto.Options.Storage.FullTTL
            segment.Updated = updated and not segment.RemovedOnTTL

    @cached_property
    def segments_meta_schema(self):
        return [
            dict(name='SegmentID', type='int64'),
            dict(name='Timestamp', type='int64'),
        ]

    def run(self, **kwargs):
        if not self.source_tables:
            return
        self.yt.create(
            'table',
            self.segments_meta,
            ignore_existing=True,
            attributes={
                "schema": self.segments_meta_schema,
                "optimize_for": "scan"
            },
        )
        self.create_storage_table(self.private_full)
        self.create_storage_table(self.sample_log_table)

        super(UpdateFullBase, self).run(**kwargs)

        for each in self.source_tables:
            self.yt.remove(each)

        state = storage_pb2.TUpdateFullStateState(
            HardLimit=conf.proto.Options.Storage.AudienceSegmentsHardLimit,
            OverwriteAll=self.overwrite_all,
            Timestamp=self.timestamp,
            UploadTimestampThreshold=int(self.timestamp - datetime.timedelta(days=conf.proto.Options.Storage.ReuploadIntervalDays).total_seconds()),
            SampleSizeLowerBound=conf.proto.Options.Storage.SampleSizeLowerBound,
        )
        state.Sampler.Denominator = self.sample_denominator
        state.Sampler.Rest = conf.proto.Options.Storage.SampleRest

        self.fill_segment_state(state, self.segments_meta, False)
        self.fill_segment_state(state, self.to_update_meta, True)
        self.yt.write_table(
            self.yt.TablePath(self.segments_meta, schema=self.segments_meta_schema),
            (
                {"SegmentID": segment_id, "Timestamp": segment.Timestamp}
                for segment_id, segment in sorted(state.Segments.iteritems(), key=lambda x: x[0])
                if not segment.RemovedOnTTL
            ),
        )

        self.ensure_ids_meta_exists()

        self.native_reduce(
            reducer_name=TUpdateFullState,
            state=state.SerializeToString(),
            source=[self.private_full, self.to_update, self.ids_meta],
            destination=[
                self.yt.TablePath(self.private_full, sorted_by=self.sort_by),
                self.to_bigb_table,
                self.yt.TablePath(self.ids_meta, sorted_by=self.sort_by),
                self.yt.TablePath(self.sample_log_table, sorted_by=self.sort_by),
                self.sample_collector_table,
            ],
            reduce_by=self.sort_by,
            spec={
                'auto_merge': {'mode': 'relaxed'},
                'reducer': {'cpu_limit': 0.5},
            },
        )
        self.yt.run_merge(self.sample_log_table, self.sample_log_table, spec={'combine_chunks': True})
        self.yt.run_merge(self.sample_collector_table, self.sample_collector_table, spec={'combine_chunks': True})

        yt_helpers.set_yql_proto_fields(self.to_bigb_table, storage_pb2.TUserSegmentsRow, self.yt)
        yt_helpers.set_yql_proto_fields(self.sample_collector_table, storage_pb2.TUserSegmentsRow, self.yt)

        yt_helpers.set_ttl(self.to_bigb_table, datetime.timedelta(days=conf.proto.Options.Storage.BigbTTLDays), self.yt, remove_if_empty=True)
        yt_helpers.set_ttl(self.sample_log_table, datetime.timedelta(days=conf.proto.Options.Storage.SampleLogTTLDays), self.yt, remove_if_empty=True)
        yt_helpers.set_ttl(self.sample_collector_table, datetime.timedelta(days=conf.proto.Options.Storage.SampleLogTTLDays), self.yt, remove_if_empty=True)

        self.create_storage_table(self.public_full)
        self.yt.run_merge(self.private_full, self.public_full)

        self.yt.remove(self.to_update)
        self.yt.remove(self.to_update_meta)
        self.yt.remove(self.resource_dir, recursive=True)


class UpdateFullYandexuid(UpdateFullBase):

    @property
    def query(self):
        return rs.find('/query/update_full_yandexuid.yql').format(
            src_tables_list=_yql_tables_list(self.source_tables),
            to_update=self.to_update,
            to_update_meta=self.to_update_meta,
        )

    @cached_property
    def source_dir(self):
        return conf.paths.storage.for_full

    @cached_property
    def to_bigb_dir(self):
        return conf.paths.storage.to_bigb.yandexuid

    @property
    def storage_schema(self):
        schema = schema_utils.get_schema_from_proto(storage_pb2.TFullBinding, self.sort_by)
        schema.append(dict(name='yandexuid', type='uint64'))
        return schema

    @property
    def segments_meta(self):
        return conf.paths.storage.meta.segments.full_yandexuid

    @property
    def ids_meta(self):
        return conf.paths.storage.meta.ids.full_yandexuid

    @property
    def private_full(self):
        return conf.paths.storage.full_yandexuid

    @property
    def public_full(self):
        return conf.paths.storage.public_full_yandexuid

    @property
    def sample_denominator(self):
        return conf.proto.Options.Storage.YuidSampleDenominator

    @property
    def sample_log_dir(self):
        return conf.paths.storage.sample_log.yandexuid

    @property
    def sample_collector_dir(self):
        return conf.paths.storage.sample_collector.yandexuid


class UpdateFullCryptaId(UpdateFullBase):

    @property
    def query(self):
        return rs.find('/query/update_full_crypta_id.yql').format(
            emails_phones_src=",".join(map(lambda x: '"%s"' % x, self.emails_phones_src)),
            crypta_ids_src=",".join(map(lambda x: '"%s"' % x, self.crypta_ids_src)),
            emails_matching=conf.paths.audience.matching.cryptaid.emails,
            phones_matching=conf.paths.audience.matching.cryptaid.phones,
            to_update=self.to_update,
            to_update_meta=self.to_update_meta,
        )

    def create_empty_table_if_queue_is_empty(self, queue_path, add_hash_column=False):
        if len(self.yt.list(queue_path)) == 0:
            schema = [
                dict(name='CryptaID', type='uint64'),
                dict(name='SegmentID', type='int64'),
                dict(name='Timestamp', type='int64'),
            ]

            if add_hash_column:
                schema.append(dict(name='Hash', type='string'))

            self.yt.create(
                'table',
                os.path.join(queue_path, str(uuid.uuid1())),
                attributes=dict(schema=schema, optimize_for='scan'),
            )

    @cached_property
    def emails_phones_src(self):
        return _latest(
            self.yt,
            conf.paths.storage.email_phone_queue,
            n=conf.proto.Options.Storage.QueueBatchSize,
        )

    @cached_property
    def crypta_ids_src(self):
        return _latest(
            self.yt,
            conf.paths.storage.crypta_id_queue,
            n=conf.proto.Options.Storage.QueueBatchSize,
        )

    @cached_property
    def source_tables(self):
        return self.crypta_ids_src + self.emails_phones_src

    @cached_property
    def to_bigb_dir(self):
        return conf.paths.storage.to_bigb.crypta_id

    @property
    def storage_schema(self):
        schema = schema_utils.get_schema_from_proto(storage_pb2.TFullBinding, self.sort_by)
        schema.append(dict(name='CryptaID', type='uint64'))
        return schema

    @property
    def segments_meta(self):
        return conf.paths.storage.meta.segments.full_crypta_id

    @property
    def ids_meta(self):
        return conf.paths.storage.meta.ids.full_crypta_id

    @property
    def private_full(self):
        return conf.paths.storage.full_crypta_id

    @property
    def public_full(self):
        return conf.paths.storage.public_full_crypta_id

    def run(self, **kwargs):
        if not self.source_tables:
            return
        self.create_empty_table_if_queue_is_empty(queue_path=conf.paths.storage.email_phone_queue, add_hash_column=True)
        self.create_empty_table_if_queue_is_empty(queue_path=conf.paths.storage.crypta_id_queue)

        super(UpdateFullCryptaId, self).run(**kwargs)

    @property
    def sample_denominator(self):
        return conf.proto.Options.Storage.CryptaIdSampleDenominator

    @property
    def sample_log_dir(self):
        return conf.paths.storage.sample_log.crypta_id

    @property
    def sample_collector_dir(self):
        return conf.paths.storage.sample_collector.crypta_id


class UpdateFullDevices(UpdateFullBase):

    @property
    def query(self):
        return rs.find('/query/update_full_devices.yql').format(
            src_tables_list=_yql_tables_list(self.source_tables),
            to_update=self.to_update,
            to_update_meta=self.to_update_meta,
            device_hashes=conf.paths.audience.matching.device_hashes,
        )

    @property
    def source_dir(self):
        return conf.paths.storage.device_queue

    @property
    def to_bigb_dir(self):
        return conf.paths.storage.to_bigb.devices

    @property
    def storage_schema(self):
        return schema_utils.get_schema_from_proto(storage_pb2.TFullBinding, self.sort_by)

    @property
    def segments_meta(self):
        return conf.paths.storage.meta.segments.full_devices

    @property
    def ids_meta(self):
        return conf.paths.storage.meta.ids.full_devices

    @property
    def private_full(self):
        return conf.paths.storage.full_devices

    @property
    def public_full(self):
        return conf.paths.storage.public_full_devices

    @property
    def sample_denominator(self):
        return conf.proto.Options.Storage.DeviceSampleDenominator

    @property
    def sample_log_dir(self):
        return conf.paths.storage.sample_log.devices

    @property
    def sample_collector_dir(self):
        return conf.paths.storage.sample_collector.devices


class UpdateFullPuid(UpdateFullBase):
    @property
    def query(self):
        return templater.render_resource('/query/update_full_puid.yql', strict=True, vars={
            "source_tables": self.source_tables,
            "to_update": self.to_update,
            "to_update_meta": self.to_update_meta,
        })

    @cached_property
    def source_dir(self):
        return conf.paths.storage.puid_queue

    @cached_property
    def to_bigb_dir(self):
        return conf.paths.storage.to_bigb.puid

    @property
    def storage_schema(self):
        return schema_utils.get_schema_from_proto(storage_pb2.TFullBinding, self.sort_by)

    @property
    def segments_meta(self):
        return conf.paths.storage.meta.segments.full_puid

    @property
    def ids_meta(self):
        return conf.paths.storage.meta.ids.full_puid

    @property
    def private_full(self):
        return conf.paths.storage.full_puid

    @property
    def public_full(self):
        return conf.paths.storage.public_full_puid

    @property
    def sample_denominator(self):
        return conf.proto.Options.Storage.PuidSampleDenominator

    @property
    def sample_log_dir(self):
        return conf.paths.storage.sample_log.puid

    @property
    def sample_collector_dir(self):
        return conf.paths.storage.sample_collector.puid


class DistributeCryptaidorSegmentsForFull(YQLTaskV1, IndependentTask):

    @property
    def query_file(self):
        return '/query/distribute_cryptaidor_segments_for_full.yql'

    @cached_property
    def timestamp(self):
        return time_utils.get_current_time()

    @property
    def query(self):
        return rs.find(self.query_file).format(
            timestamp=self.timestamp,
            cryptaid_sourceid=_yql_set(CRYPTAID_SOURCEID),
            full_cryptaid_table=conf.paths.storage.public_full_crypta_id,
            segments_simple_table=conf.paths.audience.segments_simple,
            cryptaid_yandexuid_matching_table=conf.paths.audience.matching.cryptaid.yandexuids,
            output_table=os.path.join(conf.paths.storage.for_full, str(self.timestamp)),
        )


class UploadSegmentPrioritiesToSandbox(YQLTaskV1, YtTask, IndependentTask):
    @cached_property
    def segments_info(self):
        return get_or_create_segments_info_table(self.yt)

    @property
    def query_priorities(self):
        return rs.find('/query/segments_priorities.yql').format(
            segments_simple=self.segments_info,
            cryptaid_sourceid=_yql_set(CRYPTAID_SOURCEID),
            default_segment_priority=DEFAULT_SEGMENT_PRIORITY,
            max_segment_priority=MAX_SEGMENT_PRIORITY,
        )

    @property
    def query_direct(self):
        return rs.find('/query/direct_segments_info.yql').format(
            direct_current=conf.paths.storage.direct_current,
        )

    @property
    def yql_libs(self):
        return [rs.find('/yql_lib/to_retargeting_id.yql')]

    @cached_property
    def direct_priorities(self):
        return '//tmp/crypta-audience/DirectPriorities-{}'.format(uuid.uuid1())

    @cached_property
    def segments_simple(self):
        return conf.paths.audience.segments_simple_dyntable

    @property
    def query_dump(self):
        return rs.find('/query/dump_priorities.yql').format(
            segments_priorities=self.direct_priorities,
            segments_full=conf.paths.storage.meta.segments.full_yandexuid,
            segments_full_crypta_id=conf.paths.storage.meta.segments.full_crypta_id,
            segments_full_devices=conf.paths.storage.meta.segments.full_devices,
        )

    @property
    def query(self):
        return self.query_priorities + self.query_direct + self.query_dump

    def run(self, **kwargs):
        super(UploadSegmentPrioritiesToSandbox, self).run(**kwargs)

        proto = storage_pb2.TPrioritiesByType()
        by_segment_type_and_direct_info = proto.PrioritiesByType["by_segment_type_and_direct_info"]
        adfox = proto.PrioritiesByType["adfox"]
        zen = proto.PrioritiesByType["zen"]

        for priorities in (by_segment_type_and_direct_info, adfox, zen):
            priorities.DefaultPriority = MIN_SEGMENT_PRIORITY

        for row in self.yt.read_table(self.direct_priorities):
            retargeting_id, priority = row["RetargetingID"], row["Priority"]

            if row["Active"]:
                by_segment_type_and_direct_info.Priorities[retargeting_id] = MAX_SEGMENT_PRIORITY if row["Negative"] else priority

        self.yt.remove(self.direct_priorities)

        for row in self.yt.read_table(self.segments_simple):
            retargeting_id = get_export_id(row["segment_id"])
            if row["adfox_retargeting"] == 1:
                adfox.Priorities[retargeting_id] = DEFAULT_SEGMENT_PRIORITY
            if row["zen_retargeting"] == 1:
                zen.Priorities[retargeting_id] = DEFAULT_SEGMENT_PRIORITY

        client = SandboxClient(conf.proto.Sandbox.Token)

        with tempfile.NamedTemporaryFile("w") as proto_output, tempfile.NamedTemporaryFile("w") as vinyl_output:
            proto_output.write(proto2json.proto2json(proto, config=proto2json.Proto2JsonConfig(map_as_object=True)))
            proto_output.flush()

            resource_id = client.upload_to_sandbox(
                path=proto_output.name,
                resource_type=PRIORITIES_RESOURCE_TYPE,
                description="Audience segments priorities",
                owner="CRYPTA",
                attributes={"ttl": "30", "released": PROTO_ENV_TO_RELEASE[conf.proto.Environment]},
                alias_path=storage_pb2.TFilePaths().Priorities,
            )
            logger.info("Uploaded proto priorities, id: %s", resource_id)

            segment_priorities.convert_proto_to_vinyl(proto, vinyl_output.name)

            vinyl_resource_id = client.upload_to_sandbox(
                path=vinyl_output.name,
                resource_type=VINYL_PRIORITIES_RESOURCE_TYPE,
                description="Audience segments priorities (vinyl)",
                owner="CRYPTA",
                attributes={"ttl": "30", "released": PROTO_ENV_TO_RELEASE[conf.proto.Environment]},
                alias_path=storage_pb2.TFilePaths().PrioritiesVinyl,
            )
            logger.info("Uploaded vinyl priorities, id: %s", vinyl_resource_id)


class DumpBigbSample(YQLTaskV1, YtTask, IndependentTask):
    @cached_property
    def timestamp(self):
        return time_utils.get_current_time()

    @property
    def query(self):
        return templater.render_resource('/query/get_id_sample.yql', strict=True, vars={
            "tmp_table": self.tmp_table,
            "input_table": conf.paths.storage.public_full_yandexuid,
            "denominator": conf.proto.Options.Storage.YuidSampleDenominator,
            "rest": conf.proto.Options.Storage.SampleRest,
            "lower_bound": conf.proto.Options.Storage.SampleSizeLowerBound,
        })

    @property
    def udfs(self):
        return [yql_client.Udf("libcrypta_sampler_udf.so", conf.paths.udfs.sampler)]

    @cached_property
    def tmp_table(self):
        return '//tmp/crypta-audience/DumpBigbSample-{}'.format(uuid.uuid1())

    @cached_property
    def tvm_headers(self):
        dst_tvm_id = conf.proto.Options.Storage.DumpBigbSample.DestinationTvm.DestinationTvmId
        tvm_client = tvmauth.TvmClient(tvmauth.TvmApiClientSettings(
            self_tvm_id=conf.proto.Options.Storage.DumpBigbSample.SourceTvm.SourceTvmId,
            self_secret=conf.proto.Options.Storage.DumpBigbSample.SourceTvm.Secret,
            dsts=[dst_tvm_id],
            localhost_port=tvm_helpers.get_tvm_test_port(),
        ))
        return tvm_helpers.get_tvm_headers(tvm_client.get_service_ticket_for(tvm_id=dst_tvm_id))

    @retry.retry(tries=5, delay=1)
    def get_segments_from_bigb(self, uid):
        response = requests.get(
            conf.proto.Options.Storage.DumpBigbSample.Handle,
            params={
                "exp-json": json.dumps({
                    "Crypta": {
                        "Audience": {
                            "MaxPublicRecords": conf.proto.Options.Storage.AudienceSegmentsHardLimit,
                            "RemoveLalFromParent": False
                        }
                    },
                    "EagleSettings": {"GlueByDefault": False},
                }),
                "bigb-uid": uid,
                "keywords": "557",
                "format": "protobuf-json",
                "client": conf.proto.Options.Storage.DumpBigbSample.DestinationTvm.ClientName,
            },
            headers=self.tvm_headers,
        )
        response.raise_for_status()
        return (
            uid,
            [item["uint_values"][0] for item in response.json().get("items", []) if item["keyword_id"] == 557],
            (response.json().get("items") or [{"update_time": 0}])[0]["update_time"]
        )

    def get_bigb_state(self, uids):
        with concurrent.futures.ThreadPoolExecutor(conf.proto.Options.Storage.DumpBigbSample.Threads) as executor:
            futures = [executor.submit(self.get_segments_from_bigb, uid) for uid in uids]

            concurrent.futures.wait(futures)
            result = [future.result() for future in futures]

        return result

    @cached_property
    def dst_path(self):
        return self.yt.TablePath(
            get_ts_path(self.yt, conf.paths.storage.bigb_sample_dump_log.yandexuid, self.timestamp),
            schema=schema_utils.get_schema_from_proto(storage_pb2.TBigbDump),
        )

    def run(self, **kwargs):
        super(DumpBigbSample, self).run(**kwargs)

        uids = [x["Id"] for x in self.yt.read_table(self.tmp_table)]
        bigb = self.get_bigb_state(uids)
        self.yt.write_table(self.dst_path, (
            {
                "Yandexuid": uid,
                "Segments": segments,
                "UpdateTime": update_time,
            }
            for uid, segments, update_time in bigb
        ))
        self.yt.remove(self.tmp_table)
        yt_helpers.set_ttl(str(self.dst_path), datetime.timedelta(days=conf.proto.Options.Storage.SampleLogTTLDays), self.yt, remove_if_empty=True)

        logger.info("Done")


class CheckDesync(YQLTaskV1, YtTask, IndependentTask):
    @cached_property
    def timestamp(self):
        return time_utils.get_current_time()

    @cached_property
    def output_table(self):
        return get_ts_path(self.yt, conf.paths.storage.check_desync_log.yandexuid, self.timestamp)

    @property
    def yql_libs(self):
        return [rs.find('/yql_lib/to_retargeting_id.yql')]

    @property
    def query(self):
        return templater.render_resource('/query/check_desync.yql', strict=True, vars={
            "to_bigb_dir": conf.paths.storage.sample_collector.yandexuid,
            "sample_log_dir": conf.paths.storage.sample_log.yandexuid,
            "bigb_sample_dump_log": conf.paths.storage.bigb_sample_dump_log.yandexuid,
            "output_table": self.output_table,
            "lower_bound": conf.proto.Options.Storage.SampleSizeLowerBound,
        })

    def run(self, **kwargs):
        results = list(self.run_query())
        for result in results:
            result.fetch_full_data()

        sample_yuids = results[0].rows[0][0]
        desynced_yuids = results[1].rows[0][0]
        yt_helpers.set_ttl(self.output_table, datetime.timedelta(days=conf.proto.Options.Storage.DesyncLogTTLDays), self.yt, remove_if_empty=True)

        solomon_reporter = reporter.create_solomon_reporter(
            oauth_token=conf.proto.Solomon.Token,
            url=conf.proto.Solomon.Url,
            project="crypta_audience",
            cluster=getoptpb.convert_env_enum_to_string(conf.proto.Environment),
            service="metrics_audience",
        )
        solomon_reporter.set_value("sample_yuids", sample_yuids)
        solomon_reporter.set_value("desynced_yuids", desynced_yuids)

        logger.info("Done")
