from crypta.graph.fuzzy.lib.luiger import BaseTask, DateParameter, IntParameter, TrueTarget
from crypta.lib.python.native_yt import run_native_reduce, run_native_map_reduce
from crypta.graph.fuzzy.lib.common import cached_property
from crypta.graph.fuzzy.lib.common.dates import days_range_back, parse_day
import crypta.graph.fuzzy.lib.config as conf
import logging
from crypta.graph.fuzzy.lib.tasks.sources.proto.visitlog_logins_messages_pb2 import TFilterRareLoginsOptions, TFilterKeysOptions

logger = logging.getLogger(__name__)


class CleanupLogins(BaseTask):

    date = DateParameter()
    days = IntParameter(default=30)

    @cached_property
    def outdated_tables(self):
        outdated = []
        for table in self.yt.list(conf.LoginPaths.ROOT):
            try:
                table_date = parse_day(table.split("/")[-1])
            except ValueError:
                continue
            if (self.date - table_date).days > self.days:
                path = "{base}/{table}".format(base=conf.LoginPaths.ROOT, table=table)
                outdated.append(path)
        return outdated

    def output(self):
        return [self.yt.targets.not_exists(table) for table in self.outdated_tables] + [TrueTarget()]

    def _run(self):
        for table in self.outdated_tables:
            self.yt.remove(table)


class ExtractDayLogins(BaseTask):

    date = DateParameter()
    threshold = IntParameter(default=10)

    @property
    def source(self):
        return conf.VisitLogPaths.DAY_LOG.format(date=self.date.isoformat())

    @property
    def destination(self):
        return conf.LoginPaths.DAY_LOGINS.format(date=self.date.isoformat())

    @property
    def destination_schema(self):
        return conf.LoginPaths.DAY_LOGINS_SCHEMA

    def output(self):
        return [self.yt.targets.not_empty(self.destination)]

    @property
    def filter_rare_logins_options(self):
        return TFilterRareLoginsOptions(Threshold=self.threshold).SerializeToString()

    @property
    def filter_keys_options(self):
        return TFilterKeysOptions(
            Keywords=[
                "login",
                "user",
                "userid",
                "clientid",
                "uid",
                "email",
                "emailhash",
                "\u043b\u043e\u0433\u0438\u043d",
                "computerid",
                "cid",
                "suserid",
            ]
        ).SerializeToString()

    def _run(self):
        self.yt.create_table_with_schema(
            self.destination, self.destination_schema, strict=True, recreate_if_exists=True
        )
        run_native_map_reduce(
            mapper_name="NCommonLogin::TFilterKeys",
            reducer_name="NCommonLogin::TFilterRareLogins",
            source=self.source,
            destination=self.destination,
            proxy=self.yt.proxy,
            transaction=self.yt.transaction_id,
            pool=conf.Yt.POOL,
            title="Day Logins",
            mapper_state=self.filter_keys_options,
            reducer_state=self.filter_rare_logins_options,
            reduce_by=["host", "keyword", "login"],
            sort_by=["host", "keyword", "login"],
        )
        self.yt.run_sort(self.destination, self.destination, sort_by=["host", "keyword", "login"])


class ExtractIntervalLogins(BaseTask):

    date = DateParameter()
    threshold = ExtractDayLogins.threshold
    INTERVAL = 14

    @cached_property
    def __requirements(self):
        return [ExtractDayLogins(date=process_date) for process_date in days_range_back(self.date, self.INTERVAL)]

    def requires(self):
        return self.__requirements + [CleanupLogins(date=self.date)]

    def output(self):
        return [self.yt.targets.table_is_actual(self.destination, self.date.isoformat())]

    @property
    def filter_rare_logins_options(self):
        return TFilterRareLoginsOptions(Threshold=self.threshold).SerializeToString()

    @property
    def destination(self):
        return conf.LoginPaths.CANDIDATES

    @property
    def destination_schema(self):
        return conf.LoginPaths.CANDIDATES_SCHEMA

    @cached_property
    def sources(self):
        return [task.destination for task in self.__requirements]

    def _run(self):
        self.yt.create_table_with_schema(
            self.destination, self.destination_schema, strict=True, recreate_if_exists=True
        )
        with self.yt.TempTable() as not_unique:
            run_native_reduce(
                reducer_name="NCommonLogin::TExpose",
                source=self.sources,
                destination=not_unique,
                proxy=self.yt.proxy,
                transaction=self.yt.transaction_id,
                pool=conf.Yt.POOL,
                title="Expose users with the same site login",
                reducer_state=self.filter_rare_logins_options,
                reduce_by=["host", "keyword", "login"],
                sort_by=["host", "keyword", "login"],
            )
            yandexuid_pair = [conf.Constants.YUID_LEFT, conf.Constants.YUID_RIGHT]
            self.yt.run_sort(not_unique, not_unique, sort_by=yandexuid_pair)
            run_native_reduce(
                reducer_name="NCommonLogin::TUnique",
                source=not_unique,
                destination=self.destination,
                proxy=self.yt.proxy,
                transaction=self.yt.transaction_id,
                pool=conf.Yt.POOL,
                title="Group matched pairs",
                reduce_by=yandexuid_pair,
            )
        self.yt.run_sort(self.destination, self.destination, sort_by=yandexuid_pair)
        self.yt.set(self.destination + "/@generate_date", self.date.isoformat())
