from crypta.graph.fuzzy.lib.luiger import BaseTask, DateParameter, TrueTarget
from crypta.lib.python.native_yt import run_native_reduce, run_native_map_reduce
from crypta.graph.fuzzy.lib.common.dates import parse_day
from crypta.graph.fuzzy.lib.common import cached_property
from yt.wrapper import common as yt_common
import crypta.graph.fuzzy.lib.config as conf
import logging
from constants import YANDEXUID_LEFT, YANDEXUID_RIGHT, SQUARE_IDX
import geo_operations  # noqa
from crypta.graph.fuzzy.lib.tasks.sources.proto.geo_pb2 import TRadius

logger = logging.getLogger(__name__)


class CleanupGeo(BaseTask):
    def output(self):
        return [self.yt.targets.not_exists(table) for table in self.outdated_tables] + [TrueTarget()]

    @cached_property
    def outdated_tables(self):
        base_dir = conf.Paths.sources.geo.CANDIDATES
        geo_tables = ["{base_dir}/{table}".format(base_dir=base_dir, table=table) for table in self.yt.list(base_dir)]
        return sorted(geo_tables)[:-1]

    def _run(self):
        for table in self.outdated_tables:
            self.yt.remove(table)


class ExtractHomeWorkLog(BaseTask):

    date = DateParameter()
    RADIUS = 25.0

    def requires(self):
        return [CleanupGeo()]

    @property
    def compression(self):
        return {"compression_codec": "brotli_3", "erasure_codec": "lrc_12_2_2"}

    def _run(self):
        self.yt.create_table_with_schema(
            self.destination,
            self.destination_schema,
            compression=self.compression,
            strict=True,
            recreate_if_exists=False,
        )
        radius_state = TRadius(radius=self.RADIUS).SerializeToString()
        with self.yt.TempTable() as candidates:
            op = run_native_map_reduce(
                mapper_name="NFuzzyGeo::TSpaceSlicer",
                reducer_name="NFuzzyGeo::TFindNeighbors",
                source=self.source,
                destination=candidates,
                proxy=self.yt.proxy,
                transaction=self.yt.transaction_id,
                pool=conf.Yt.POOL,
                title="Find neighbor yandexuids",
                reduce_by=[SQUARE_IDX],
                sort_by=[SQUARE_IDX],
                mapper_state=radius_state,
                reducer_state=radius_state,
                spec={"data_size_per_map_job": 16 * yt_common.MB, "partition_data_size": 16 * yt_common.MB},
            )
            logger.info("Operation %s is done", op)

            self.yt.run_sort(candidates, sort_by=[YANDEXUID_LEFT, YANDEXUID_RIGHT])

            op = run_native_reduce(
                reducer_name="NFuzzyGeo::TUniqueNeighbors",
                source=candidates,
                destination=self.destination,
                proxy=self.yt.proxy,
                transaction=self.yt.transaction_id,
                pool=conf.Yt.POOL,
                title="Filter unique records",
                reduce_by=[YANDEXUID_LEFT, YANDEXUID_RIGHT],
            )

        logger.info("Operation %s is done", op)

        self.yt.run_sort(self.destination, self.destination, sort_by=[YANDEXUID_LEFT, YANDEXUID_RIGHT])

    @cached_property
    def last_actual_source(self):
        return max(
            parse_day(date)
            for date in self.yt.list(conf.Paths.sources.geo.HOMEWORK_DIR)
            if parse_day(date) <= self.date
        )

    @cached_property
    def source_type(self):
        return conf.SourceTypes.GEO_HOMEWORK

    @cached_property
    def source(self):
        return "{base}/{date}/homework_yuid".format(
            base=conf.Paths.sources.geo.HOMEWORK_DIR, date=self.last_actual_source.isoformat()
        )

    @cached_property
    def destination(self):
        return "{geo_dir}/{date}".format(
            geo_dir=conf.Paths.sources.geo.CANDIDATES, date=self.last_actual_source.isoformat()
        )

    @cached_property
    def destination_schema(self):
        return conf.Paths.sources.geo.CANDIDATES_SCHEMA

    def output(self):
        yield self.yt.targets.not_empty(self.destination)
