import os

from functools import partial

import crypta.graph.v1.python.v2.soup.soup_storage_yql_schema as soup_storage_yql_schema

from crypta.graph.v1.python.utils import mr_utils as mr
from crypta.graph.v1.python.utils import yt_clients
from crypta.graph.v1.python.lib.luigi import yt_luigi
from crypta.graph.v1.python.rtcconf import config
from crypta.graph.v1.python.v2.soup import soup_dirs

from crypta.graph.soup.config.python import EDGE_TYPE as edges, LOG_SOURCE as log_sources, SOURCE_TYPE as source_types

edge_key_columns = ["id1Type", "id2Type", "sourceType", "logSource", "id1", "id2"]
edge_type_key_columns = ["id1Type", "id2Type", "sourceType", "logSource"]


class Table(yt_clients.YtClientMixin):
    """ Base class """

    def dir_path(self):
        raise NotImplementedError()

    @property
    def schema(self):
        raise NotImplementedError()

    def finalize(self):
        raise NotImplementedError()

    def create(self, transaction=None, recreate_if_exists=True):
        path = self.table_path()
        mr.create_table_with_schema(path, self.schema, transaction, strict=True, recreate_if_exists=recreate_if_exists)
        return path

    def ensure_dir(self):
        mr.mkdir(self.dir_path())

    def as_target(self):
        return yt_luigi.YtDateTarget(self.table_path(), self.date)

    @staticmethod
    def make_rec(id1, id2, edge_type, table_index=0):
        return {
            "id1": id1,
            "id1Type": edge_type.Id1Type.Name,
            "id2": id2,
            "id2Type": edge_type.Id2Type.Name,
            "sourceType": edge_type.SourceType.Name,
            "logSource": edge_type.LogSource.Name,
            "@table_index": table_index,
        }


class DailyTable(Table):

    """ Base class for all daily soup temporary tables """

    def __init__(self, log_source_, date):
        self.log_source = log_source_
        self.date = date

    def table_path(self):
        return os.path.join(self.dir_path(), self.log_source.Name)

    def finalize(self):
        if self.yt.exists(self.table_path()):
            self.yt.remove(self.table_path())


class SoupDailyDumpTable(DailyTable):

    """ Table stores data imported from daily dump in the same format as in storage """

    def dir_path(self):
        return os.path.join(soup_dirs.SOUP_ACTIVE_DUMPS_DIR, self.date)

    @property
    def schema(self):
        return {
            "id1Type": "string",
            "id2Type": "string",
            "sourceType": "string",
            "logSource": "string",
            "id1": "string",
            "id2": "string",
            "dates": "any",
        }

    @staticmethod
    def make_rec(id1, id2, edge_type, dates, table_index=0):
        rec = Table.make_rec(id1, id2, edge_type, table_index)
        rec.update({"dates": dates})
        return rec


class SoupDailyLogTable(DailyTable):

    """ Intermediate tmp table between transactional log parsing and daily storage soup table """

    def __init__(self, log_source_, date, source_types=None):
        super(SoupDailyLogTable, self).__init__(log_source_, date)
        self.soup_daily_tables = [
            SoupDailyTable(et, self.date)
            for et in edges.values()
            if et.LogSource == self.log_source
            and et.Props.ActivityType == et.Props.DAILY
            and et.Props.SupplyMethod == et.Props.LOG
        ]
        self.source_types = source_types
        if self.source_types:
            self.soup_daily_tables = [t for t in self.soup_daily_tables if t.edge_type.SourceType in source_types]

    def dir_path(self):
        return soup_dirs.get_day_log_dir(self.date)

    def table_path(self):
        if self.source_types:
            st_postfix = "_".join(st.Name for st in self.source_types)
            # hack when the same log is parsed several times for subsets of sourcePaths
            return os.path.join(self.dir_path(), self.log_source.Name + "_" + st_postfix)
        else:
            return os.path.join(self.dir_path(), self.log_source.Name)

    @property
    def schema(self):
        return {
            "id1Type": "string",
            "id2Type": "string",
            "sourceType": "string",
            "logSource": "string",
            "id1": "string",
            "id2": "string",
            "hits": "uint64",
            "ts": "int64",
        }

    @staticmethod
    def make_rec(id1, id2, edge_type, ts=None, hits=None, table_index=0):
        rec = Table.make_rec(id1, id2, edge_type, table_index)
        rec.update({"ts": ts, "hits": hits})
        return rec

    def ensure_dir(self):
        mr.mkdir(soup_dirs.SOUP_DAY_DIR + self.date)
        super(SoupDailyLogTable, self).ensure_dir()

    def prepare_daily_tables_from_log(self):
        with self.yt.Transaction() as tr:
            log_table = self.table_path()

            out_table_paths = [t.create(tr) for t in self.soup_daily_tables]
            out_table_indexes = {edges.tuplize(t.edge_type): idx for idx, t in enumerate(self.soup_daily_tables)}

            log_source_name = self.log_source.Name

            # Workaround for regular access-log import and kinopoisk access-log import having the same error table.
            # One of the tasks can fail if they're run in parallel, being unable to acquire exclusive lock for error table.
            # This happens because they both have the same log_source.
            have_kp_source_type = (
                any([x == source_types.KINOPOISK for x in self.source_types]) if self.source_types else False
            )
            if self.log_source == log_sources.ACCESS_LOG and have_kp_source_type:
                log_source_name = log_source_name + "-kp"

            errors_table = "{prefix_path}{date}/{log_source}_errors".format(
                prefix_path=soup_dirs.SOUP_DAY_DIR, date=self.date, log_source=log_source_name
            )
            errors_idx = len(out_table_indexes)
            out_table_indexes["errors"] = errors_idx

            partition_count = mr.calculate_optimized_mr_partition_count(log_table)

            # import soup utils inside method to prevent import loop error
            from crypta.graph.v1.python.v2.soup import soup_utils

            self.yt.run_map_reduce(
                None,
                partial(
                    soup_utils.reduce_day_activity,
                    date=self.date,
                    out_table_indexes=out_table_indexes,
                    key_weight_limit=config.YT_KEY_SIZE_LIMIT,
                ),
                log_table,
                out_table_paths + [errors_table],
                reduce_by=edge_key_columns,
                spec={"partition_count": partition_count},
            )
            self.finalize()
            SoupDailyTable.finalize_all(self.soup_daily_tables)

    def daily_tables_targets(self):
        return [t.as_target() for t in self.soup_daily_tables]

    def _get_processing_status_table(self):
        return soup_dirs.SOUP_DAY_LOGS_DIR + "processing_status/" + self.log_source.Name

    def set_last_processed_log_table(self, t):
        status_table = self._get_processing_status_table()
        self.yt.create_table(status_table, recursive=True, ignore_existing=True)
        self.yt.set_attribute(status_table, "last_processed_log_table", t)

    def get_last_processed_log_table(self):
        status_table = self._get_processing_status_table()
        if self.yt.exists(status_table):
            return self.yt.get_attribute(status_table, "last_processed_log_table")


class SoupTable(Table):

    """ Base class for all soup storage tables """

    def __init__(self, edge_type, date):
        self.edge_type = edge_type
        self.date = date

    def table_path(self):
        return "{prefix_path}{id1_type}_{id2_type}_{source_type}_{log_source}".format(
            prefix_path=self.dir_path(),
            id1_type=self.edge_type.Id1Type.Name,
            id2_type=self.edge_type.Id2Type.Name,
            source_type=self.edge_type.SourceType.Name,
            log_source=self.edge_type.LogSource.Name,
        )

    def finalize(self):
        self.finalize_all([self])

    @staticmethod
    def finalize_all(tables):
        mr.merge_chunks_all([t.table_path() for t in tables])
        for t in tables:
            mr.set_generate_date(t.table_path(), t.date)


class SoupStorageTable(SoupTable):

    """ Table that stores final soup aggregate """

    @property
    def schema(self):
        return {
            "id1Type": "string",
            "id2Type": "string",
            "sourceType": "string",
            "logSource": "string",
            "id1": "string",
            "id2": "string",
            "dates": "any",
        }

    def dir_path(self):
        return soup_dirs.SOUP_DIR

    @staticmethod
    def make_rec(id1, id2, edge_type, dates, table_index=0):
        rec = Table.make_rec(id1, id2, edge_type, table_index)
        rec.update({"dates": dates})
        return rec

    @staticmethod
    def finalize_all(tables, yt_client=None):
        if yt_client is None:
            yt_client = yt_clients.get_yt_client()
        SoupTable.finalize_all(tables)
        yql_row_spec = "_yql_row_spec"
        for tbl in tables:
            if yt_client.get_attribute(tbl.table_path(), yql_row_spec, None) is None:
                yt_client.set_attribute(tbl.table_path(), yql_row_spec, soup_storage_yql_schema.SCHEMA)


class SoupDailyTable(SoupTable):

    """ Table stores transactional data per day """

    @property
    def schema(self):
        return {
            "id1Type": "string",
            "id2Type": "string",
            "sourceType": "string",
            "logSource": "string",
            "id1": "string",
            "id2": "string",
            "date": "string",
            "dayActivity": "any",
            "dayHits": "uint64",
        }

    def dir_path(self):
        return soup_dirs.get_day_dir(self.date)

    def corresponding_storage_table(self):
        return SoupStorageTable(self.edge_type, self.date)

    @staticmethod
    def make_rec(id1, id2, edge_type, date, day_activity, day_hits, table_index=0):
        rec = Table.make_rec(id1, id2, edge_type, table_index)
        rec.update({"date": date, "dayActivity": day_activity, "dayHits": day_hits})
        return rec


class SoupDumpTable(SoupStorageTable):

    """ Table stores intermediate data imported as dump in the same format as in storage """

    def dir_path(self):
        return soup_dirs.SOUP_DUMPS_DIR

    def corresponding_storage_table(self):
        return SoupStorageTable(self.edge_type, self.date)


class MobmetPreprocessedDumpTable(Table):

    """ Class for table of preprocessing cross metrika-appmetrica edges """

    def __init__(self, name, schema, date):
        self.name = name
        self.schema = schema
        self.date = date

    def dir_path(self):
        return soup_dirs.SOUP_PREPROCESS_DIR

    def table_path(self):
        return os.path.join(self.dir_path(), self.name)

    def schema(self):
        return self.schema

    def make_rec(id1, id2, edge_type, table_index=0):
        pass

    def finalize(self):
        mr.set_generate_date(self.table_path(), self.date)
