from __future__ import unicode_literals

import logging
import re
import luigi
import json

from crypta.graph.v1.python.lib.crypta_api import report_task_status_to_api
from crypta.graph.v1.python.lib.luigi.yt_luigi import BaseYtTask, YtDateAttributeTarget
from crypta.graph.v1.python.rtcconf import config
from crypta.graph.v1.python.utils import yt_clients
from crypta.graph.v1.python.utils.utils import singleton

import crypta.lib.python.bt.conf.conf as conf
import crypta.lib.python.bt.conf.resource_conf as resource_conf

logger = logging.getLogger(__name__)


@singleton
class StreamImportConfig(object):
    def __init__(self):
        conf.use(resource_conf.find("/crypta/graph/data_import/stream"))

    def __getattr__(self, attr):
        return getattr(conf, attr)


def soup_table(id1, id2, source, log):
    cfg = StreamImportConfig()
    return "{}/{}_{}_{}_{}".format(cfg.paths.storage.soup, id1.Name, id2.Name, source.Name, log.Name)


def is_eod(path, date):
    last_times = {"5min": "23:55:00", "30min": "23:30:00", "1h": "23:00:00"}

    parts = path.split("/")

    if len(parts) >= 3 and parts[-2] in last_times.keys():  # logfeller logs
        dt = parts[-1]
        period = parts[-2]
    else:  # non-logfeller logs
        dt = parts[-1]
        period = "1d"

    if period == "1d":
        return date == dt
    else:
        return "{}T{}".format(date, last_times[period]) == dt


class StreamTarget(luigi.Target, yt_clients.YtClientMixin):
    def __init__(self, date, wait_keys, observed_logs, log_source):
        self.date = date
        self.wait_keys = json.loads(wait_keys)
        self.observed_logs = json.loads(observed_logs)
        self.log_source = log_source
        self.conf = StreamImportConfig()
        self.param_kwargs = dict(
            date=date,
            wait_keys=wait_keys,
            observed_logs=observed_logs,
            log_source=log_source,
        )

    def is_day_processed(self):
        """
        Select rows from dyn table and check day-logs finalized. Query be like:

        $ yt select-rows '
            path
            FROM [//home/crypta/production/graph/stream/production/processed]
            WHERE log_source = "mm"
                AND try_get_int64(process_time, "/AppMetrikaTask") != NULL
                AND try_get_int64(process_time, "/SoupTask") != NULL
                AND regex_extract("(\\d{4}-\\d{2}-\\d{2})", path, "\\1") = "2022-05-05"
        ' --format json | grep 'T12:00:00'

        {"path":"//home/logfeller/logs/appmetrica-external-events/stream/5min/2022-05-05T12:00:00"}
        {"path":"//home/logfeller/logs/appmetrica-yandex-events/stream/5min/2022-05-05T12:00:00"}
        {"path":"//home/logfeller/logs/browser-metrika-mobile-log/stream/5min/2022-05-05T12:00:00"}
        {"path":"//home/logfeller/logs/superapp-metrika-mobile-log/stream/5min/2022-05-05T12:00:00"}
        """

        process_keys = "\n".join(
            ['AND try_get_int64(process_time, "{k}") != NULL'.format(k="/" + key) for key in self.wait_keys]
        )

        query = """path FROM [{conf.paths.stream.processed}]
            WHERE log_source = "{ls}" {process_keys}
                AND regex_extract("(\\\\d{{4}}-\\\\d{{2}}-\\\\d{{2}})", path, "\\\\1") = "{date}"
        """.format(
            conf=self.conf,
            ls=self.log_source,
            process_keys=process_keys,
            date=self.date,
        )

        tables = [x["path"] for x in self.yt.select_rows(query) if is_eod(x["path"], self.date)]
        return self.is_full_day(tables)

    def exists(self):
        def _is_mounted(client, path):
            return client.get("{0}/@tablets/0/state".format(path)) == "mounted"

        if not _is_mounted(self.yt, self.conf.paths.stream.processed):
            # Do not mount if already is, to protect from err:
            # Cannot mount table since node is locked by mount-unmount operation
            self.yt.mount_table(self.conf.paths.stream.processed, sync=True)

        return self.is_day_processed()

    def is_full_day(self, processed_tables):
        if isinstance(self.observed_logs, (bytes, str, unicode)):
            expected, processed = self._get_observed_links(processed_tables)
        else:
            expected, processed = self._get_observed_list(processed_tables)

        if len(processed) > 0 and expected.issubset(processed):
            report_task_status_to_api(self, "SUCCESS")
            return True
        else:
            report_task_status_to_api(self, "FAILURE")
            logger.info(
                "These logs %s still aren't fully processed for %s: %s",
                self.log_source,
                self.date,
                expected - processed,
            )
            return False

    def _get_observed_list(self, processed_tables):
        expected = set(self.observed_logs or [])
        processed = set([self.get_log_name(x) for x in processed_tables])
        return expected, processed

    def _get_observed_links(self, processed_tables):
        symlinks = self.yt.list(self.observed_logs, absolute=True)
        real_logs = [self.yt.get_attribute(x, "path") for x in symlinks]

        expected = set([self.get_log_name(x) for x in real_logs])
        processed = set([self.get_log_name(x) for x in processed_tables])
        return expected, processed

    @staticmethod
    def get_log_name(path):
        m = re.search(r"/logs/([^/]+)/", path)
        if m:
            return m.group(1)
        else:
            return None

    def get_task_family(self):
        return "StreamTarget_{}".format(self.log_source)


class StreamImportExternalTask(luigi.ExternalTask):

    """External waiter for stream sources"""

    date = luigi.Parameter()
    wait_keys = luigi.Parameter()
    observed_logs = luigi.Parameter()
    log_source = luigi.Parameter()

    def output(self):
        return StreamTarget(
            self.date,
            self.wait_keys,
            self.observed_logs,
            self.log_source,
        )


class StreamImportBaseTask(BaseYtTask):

    observed_logs = None
    # Getting "Dynamic table commands can not be performed under master transaction" error
    with_transaction = False

    def __init__(self, log_source, wait_keys, *args, **kwargs):
        super(StreamImportBaseTask, self).__init__(*args, **kwargs)
        self.log_source = log_source
        self.wait_keys = wait_keys
        self.conf = StreamImportConfig()

        self.stream_target = YtDateAttributeTarget(
            path=config.YT_OUTPUT_FOLDER + self.date,
            attribute_name="{log}_{keys}_imported".format(log=self.log_source.Name, keys="_".join(self.wait_keys)),
            date=self.date,
        )

    def run(self):
        self.run_stream()
        self.finalize()

    def run_stream(self):
        pass

    def finalize(self):
        if not self.yt.exists(self.stream_target.path):
            self.yt.mkdir(self.stream_target.path, recursive=True)
        self.stream_target.set_date()

    def requires(self):
        return StreamImportExternalTask(
            date=self.date,
            wait_keys=json.dumps(self.wait_keys),
            observed_logs=json.dumps(self.observed_logs),
            log_source=self.log_source.Name,
        )

    def output(self):
        return [self.stream_target]
