import time
import contextlib
import datetime
import logging

from cached_property import cached_property
from dateutil.parser import parse as parse_dt

from crypta.graph.data_import.soup.lib import get_daily_edges
from crypta.graph.data_import.stream.lib.tasks.base import ProcessedMixin, SolomonMixin, YQLRunnerTask, IndependentTask
from crypta.lib.python.bt.workflow import Parameter

import crypta.lib.python.bt.conf.conf as conf

logger = logging.getLogger(__name__)


class SoupTask(ProcessedMixin, SolomonMixin, YQLRunnerTask, IndependentTask):

    """Take all stream tables and make it soup"""

    log_sources = Parameter(
        default="", parse=lambda value: set(filter(None, map(lambda each: each.lower().strip(), value.split(","))))
    )
    run_date = Parameter(default=None)
    throw_before_date = Parameter(default=None)
    commit_full_day = Parameter(parse=lambda value: value.lower() in {"+", "yes", "true", "t", "1"}, default="False")

    update_all_log_sources = True

    def get_input_tables(self, path):
        """Lookup all observed path and list tables"""
        table_iterator = self.yt.search(
            path,
            node_type=["table"],
            attributes=["row_count", "processed", "creation_time", "soup_timestamp"],
            depth_bound=1,
            follow_links=False,
        )
        for table in table_iterator:
            if table.attributes.get("soup_timestamp", False):
                # already souped
                continue
            elif table.attributes.get("row_count", 0) > 0:
                yield table
            elif table.attributes.get("processed", {}).get("source", []):
                # if table is processed with input sources, but still has no any line take it
                yield table

    def input(self):
        input_tables = list(self.get_input_tables(conf.paths.stream.storage))
        # filter by log sources
        if self.log_sources:
            input_tables = filter(
                lambda table: table.attributes.get("processed", {}).get("ls") in self.log_sources, input_tables
            )

        if self.commit_full_day:
            for table in input_tables:
                for source in table.attributes.get("processed", {}).get("source", []):
                    if self._is_ready(source):
                        return input_tables
            return []

        return input_tables

    @property
    def query_template(self):
        return "soup.sql.j2"

    def output(self):
        tables = list(self.get_input_tables(conf.paths.storage.soup))
        if not self.log_sources:
            return tables
        return list(filter(lambda table: table.split("_")[-1] in self.log_sources, tables))

    def postprocess_input_table_non_transactional(self, tbl):
        timestamp = int(time.time())
        self.yt.set(
            "{path}/@expiration_time".format(path=tbl),
            "{date:%Y-%m-%d %H:%M:%S.%f%Z}+03:00".format(date=datetime.datetime.now() + datetime.timedelta(hours=12)),
        )
        self.yt.set("{path}/@soup_timestamp".format(path=tbl), timestamp)

    def get_edges(self):
        """Return list of observed edges"""
        if not self.log_sources:
            return get_daily_edges()
        else:
            if ("wl" in self.log_sources and "fp" not in self.log_sources) or (
                "fp" in self.log_sources and "wl" not in self.log_sources
            ):
                raise Exception("Watchlog should be with FP Always")
            return [edge for edge in get_daily_edges() if edge.LogSource.Name in self.log_sources]

    def get_context_data(self, **kwargs):
        """Context for jinja rendering"""
        context = super(SoupTask, self).get_context_data(**kwargs)
        assert conf.paths.storage.soup.endswith("state/graph/stream/soup"), "USE ONLY AT STREAM!"

        # WARNING: THIS IS SO DANGEROUS AS YOU CAN SEE, AND SOME FEW MORE
        # DANGER: don't try in ON FULL SOUP, use only in stream directory
        # cause if run on prod, you may loose soup data
        throw_before_date = self.throw_before_date or (
            self.date - datetime.timedelta(days=conf.proto.DaysTTL)
        ).strftime("%Y-%m-%d")

        context.update(
            stream=True,
            date=self.date.strftime("%Y-%m-%d"),
            throw_before_date=throw_before_date,
            input_tables=self.input_tables,
            soup_dir=conf.paths.storage.soup,
            soup_tables=self.output_tables,
            edge_types=self.get_edges(),
            log_source_name=self.ls,
            normalize_lazy=True,
        )
        return context

    @cached_property
    def date(self):
        if self.run_date:
            return datetime.datetime.strptime(self.run_date, "%Y-%m-%d").date()
        return datetime.date.today()

    @contextlib.contextmanager
    def run_context(self):
        self.yt.create(
            "map_node",
            "{soup_dir}/day/{date:%Y-%m-%d}/validation".format(soup_dir=conf.paths.storage.soup, date=self.date),
            ignore_existing=True,
            recursive=True,
        )
        start_at = time.time()
        with super(SoupTask, self).run_context() as ctx:
            yield ctx
        self._set_expiration()
        finish_at = time.time()
        self.solomonify(start_at, finish_at)
        self._clean_obsolete()

    def run(self, **kwargs):
        if not self.unprocessed_tables:
            logger.info("Soup finish fast, no tables to process!")
            return
        return super(SoupTask, self).run(**kwargs)

    def _set_expiration(self):
        path = "{path}/day/{date:%Y-%m-%d}".format(path=conf.paths.storage.soup, date=self.date)
        if self.yt.exists(path):
            self.yt.set(
                "{path}/@expiration_time".format(path=path),
                "{date:%Y-%m-%d} 12:00:00.0+00:00".format(
                    date=self.date + datetime.timedelta(days=conf.proto.DaysTTL)
                ),
            )

    def _is_ready(self, path):
        """Check is table path day ready for commit"""
        last_times = {
            "5min": ("23:55:00", "11:55:00"),
            "30min": ("23:30:00", "11:30:00"),
            "1h": ("23:00:00", "11:00:00"),
        }
        parts = path.split("/")

        if len(parts) >= 3 and parts[-2] in last_times.keys():  # logfeller logs
            dt = parts[-1]
            period = parts[-2]
        else:  # non-logfeller logs
            dt = parts[-1]
            period = "1d"

        return (period == "1d") or (dt.split("T")[1] in last_times[period])

    def _clean_obsolete(self):
        """Set expiration on obsolete tables"""

        table_iterator = self.yt.search(
            conf.paths.stream.storage,
            node_type=["table"],
            attributes=["row_count", "processed", "creation_time"],
            depth_bound=1,
            follow_links=False,
        )
        for table in table_iterator:
            rows = table.attributes.get("row_count", 0)
            ctime = table.attributes.get("creation_time")
            creation_time = parse_dt(ctime).replace(tzinfo=None).date()
            is_obsolete_table = creation_time < self.date - datetime.timedelta(days=3)
            if not is_obsolete_table:
                continue
            self.solomon.set_value("obsolete", rows, labels={"task": self.key, "ls": self.ls})
            if rows == 0:
                self.yt.remove(table)

    def _assert_tables_ready(self, *args, **kwargs):
        """Soup always ready"""
        return True

    @cached_property
    def ls(self):
        """return log source string representation"""
        return "_".join(sorted(self.log_sources)) or "all"
