import time

from infi.clickhouse_orm import models
from infi.clickhouse_orm import engines

from sandbox.yasandbox.database.clickhouse import fields
from sandbox.yasandbox.database.clickhouse import exceptions
from sandbox.yasandbox.database.clickhouse import engines as custom_engines


class DistributedModel(models.Model):
    """
    By wrapping replicated and distributed tables creation,
    DistributedModel lets you insert and query without having to care about sharding.
    All you have to do is:

    - subclass from it;

    - define a ``meta`` attribute with a tuple of primary key columns and anything else
      you need to pass to engine as keywords, such as below:

    .. code-block:: python

        from infi.clickhouse_orm import fields
        from sandbox.yasandbox.database.clickhouse import models

        class Dummy(models.DistributedModel):
            meta=dict(
                order_by=("date",)
            )

            date = fields.DateField()  # whatever

    - call ``db.create_table(Dummy)``, and you're all set!

    More on sharding: https://clickhouse.yandex/docs/ru/single/#distributed
    """

    # noinspection PyPep8Naming
    class __metaclass__(type(models.Model)):
        """
        A neat trick for saving you from boilerplate code! Well, at least a small portion of it
        """

        def __new__(mcs, name, bases, namespace):
            if bases == (models.Model,):
                return type(models.Model).__new__(mcs, name, bases, namespace)

            underlying_table_name = name.lower()  # default table name implementation
            # Base settings for replicated tables.
            # Don't worry about {{shard}} being the same for production and preproduction:
            # create_table_sql() does the thing.
            kws = dict(
                date_col="date",
                order_by=("date", "timestamp"),
                replica_table_path="/clickhouse/tables/{{shard}}/{{database}}.{table}.{time}".format(
                    table=underlying_table_name,
                    time=int(time.time())
                ),
                replica_name="{replica}",
            )
            kws.update(namespace.pop("meta", {}))

            if "schema_version" not in namespace:
                raise RuntimeError("schema_version for {} model is not specified".format(name))

            date_col = kws.get("date_col")
            if not date_col:
                raise RuntimeError("Missing date_col in meta of {}".format(name))
            if date_col not in namespace:
                raise RuntimeError("{} doesn't have a date column called {}".format(name, date_col))
            missing_columns = list(filter(lambda col: col not in namespace, kws.get("order_by", [])))
            if missing_columns:
                raise RuntimeError("{} doesn't have columns {} specified in meta".format(name, missing_columns))

            namespace["engine"] = engines.MergeTree(**kws)
            namespace["distributed_engine"] = custom_engines.DistributedEngine(
                underlying_table=underlying_table_name,
                sampling_expr="rand()",  # the best distribution ever \:D/
            )
            # capture table name for later usage
            namespace["underlying_table_name"] = classmethod(lambda _: underlying_table_name)

            return type(models.Model).__new__(mcs, name, bases, namespace)

    @classmethod
    def table_name(cls):
        return cls.underlying_table_name() + "d"  # set in metaclass

    @classmethod
    def create_table_sql(cls, db, cluster_name):
        """
        Generate SQL statements for creating underlying table and the distributed one itself.
        For use with `sandbox.yasandbox.database.clickhouse.database.DistributedDatabase` only.
        This is mostly copied from `infi.clickhouse_orm.database.Database.create_table_sql` function
        """

        for table_name, engine in (
            (cls.underlying_table_name(), cls.engine),
            (cls.table_name(), cls.distributed_engine),
        ):
            parts = [
                "CREATE TABLE IF NOT EXISTS `{}`.`{}` (".format(db.db_name, table_name)
            ]

            cols = []

            # on the second iteration I could rip the schema off the underlying table
            # using CREATE TABLE ... AS ..., but that would totally ruin the loop, so why bother?
            for name, field in cls._fields.iteritems():
                cols.append("    {} {}".format(name, field.get_sql(db=db)))
            parts.append(",\n".join(cols))
            parts.append(")")

            engine_sql = (
                engine.create_table_sql(db, cluster_name)
                if isinstance(engine, custom_engines.DistributedEngine) else
                "ENGINE = {}".format(engine.create_table_sql(db))
            )

            # ClickHouse's replicated tables MUST have different ZooKeeper paths for proper replication,
            # unless you want to run into a hard-to-diagnose trouble.
            # Sandbox is known for slapping 1 on the end of preproduction-related entities
            # (see: Conductor tags), so why not do the same for tables?
            if db.db_name.endswith("1"):
                engine_sql.replace("{shard}", "{shard}_1")
            parts.append(engine_sql)
            yield "\n".join(parts)

    @classmethod
    def drop_table_sql(cls, db_name):
        for table_name in (cls.underlying_table_name(), cls.table_name()):
            yield "DROP TABLE IF EXISTS `{}`.`{}`".format(db_name, table_name)

    @classmethod
    def init_auto_enums(cls, db):
        table_name = cls.underlying_table_name()
        model_descr = None
        for i, (field_name, field) in enumerate(cls._fields.iteritems()):
            if not isinstance(field, fields.AutoEnumField):
                continue

            full_enum_name = ".".join((db.db_name, table_name, field_name))
            if field.load_from_mongo(full_enum_name, db.logger):
                continue

            if model_descr is None:
                model_descr = {
                    column.name: column
                    for column in list(db.select("DESCRIBE TABLE `{}`.`{}`".format(db.db_name, table_name)))
                }

            alter_table = False
            items = []

            if field_name in model_descr:
                column = model_descr[field_name]
                if not any(column.type.startswith(_) for _ in ("String", "Enum")):
                    raise exceptions.DatabaseException(
                        "Cannot convert field {} of type `{}` to Enum".format(full_enum_name, column.type)
                    )
                if column.type.startswith("String"):
                    items = map(
                        lambda _: getattr(_, field_name),
                        db.select("SELECT DISTINCT `{field}` FROM `{db_name}`.`{table}` ORDER BY `{field}`".format(
                            field=field_name, db_name=db.db_name, table=table_name)
                        )
                    )
                    alter_table = True
                    db.logger.debug("Attempting to convert former String values to AutoEnum for %s", full_enum_name)

                else:
                    items = map(
                        lambda _: _[0],
                        sorted(field.parse_enum(column.type).iteritems(), key=lambda _: _[1])
                    )
                    db.logger.debug("Values of %s are received from AutoEnum field class", full_enum_name)

            result = field.update_enum(db, cls, field_name, items, alter_table=alter_table)
            db.logger.debug(
                "Enum %s is%s updated on initialization (alter_table=%r)",
                full_enum_name, "" if result else " not", alter_table
            )
