# -*- coding: utf-8 -*-
import re
import time
import logging
import datetime
import collections
import sandbox.sdk2.parameters as parameters

from sandbox import sdk2
from sandbox.sandboxsdk.environments import PipEnvironment
from sql_templates import create_table, drop_table, show_tables

TABLE_PTRN = re.compile(
    r'(?P<main_name>[A-Za-z0-9]+)_(?P<year>\d+)_(?P<month>\d+)_(?P<day>\d+)_(?P<hour>\d+)_(?P<minute>\d+)_(?P<period>[0-9]+)m'
)


def retry(func, *args):
    for delay in tuple(5 for _ in xrange(10)) + (-1,):
        try:
            return func(*args)
        except Exception:
            if delay == -1:
                raise
            time.sleep(delay)


class Replicaset(object):
    def __init__(self, replicas):
        self.clients = self.get_clients_from_replicas(replicas)

    @staticmethod
    def get_clients_from_replicas(replicas):
        from clickhouse_driver import Client

        clients = {}
        for replica in replicas:
            host, port = replica.split(':')
            clients[replica] = Client(host=host, port=int(port), send_receive_timeout=1200)
        return clients

    def execute(self, query, replicas=None):
        answers = {}

        replicas = replicas or self.clients.keys()
        for replica in replicas:
            client = self.clients[replica]
            answers[replica] = retry(client.execute, query)

        return answers


class GencfgUsageStatisticManager(sdk2.Task):
    """ Generate table for resource usage info or/and remove old table """
    class Requirements(sdk2.Requirements):
        environments = (PipEnvironment('clickhouse_driver'),)

    class Context(sdk2.Context):
        droped_tables = []
        created_tables = []

    class Parameters(sdk2.Task.Parameters):
        with parameters.Group('Modes') as modes:
            generation = sdk2.parameters.Bool('Generation mode', default=False)
            cleaning = sdk2.parameters.Bool('Cleaning mode', default=False)

        with parameters.Group('Shared settings') as tables:
            main_table_names = sdk2.parameters.List('Main table names', default=[])
            periods = sdk2.parameters.List('Periods', default=[2, 15])
            custom_start_timestamp = sdk2.parameters.String('Custom start timestamp', default=None, description='%Y-%m-%d %H:%M')

        with parameters.Group('Generation settings') as generation_:
            generation_for_hours = sdk2.parameters.Integer('Generation for N hours in front', default=24)

        with parameters.Group('Cleaning settings') as cleaning_:
            remove_table_hours_delay = sdk2.parameters.Integer('Remove table hours delay', default=48)

        with parameters.Group('ClickHouse settings') as clickhouse:
            replicas = sdk2.parameters.List('Replicas', default=[
                'man1-8406.search.yandex.net:17353',
                'sas1-1716.search.yandex.net:17353',
                'vla1-4552.search.yandex.net:17353'
            ])

        with parameters.Group('Execution settings') as execution:
            dry_run = sdk2.parameters.Bool('Dry Run', default=True)
            query_delay = sdk2.parameters.Integer('Query delay', default=3)

    def on_execute(self):
        start_time = datetime.datetime.strptime(self.Parameters.custom_start_timestamp or self.current_timestamp(), '%Y-%m-%d %H:%M')
        if start_time.minute % 2 != 0:
            start_time -= datetime.timedelta(minutes=1)
        replicaset = Replicaset(self.Parameters.replicas)

        if self.Parameters.generation:
            self.generation_mode(replicaset, start_time)
        if self.Parameters.cleaning:
            self.cleaning_mode(replicaset, start_time)

    def generation_mode(self, replicaset, start_time):
        generated_table_names = self.generate_table_names(start_time)
        for main_table_name, table_names in generated_table_names.items():
            for table_name in table_names:
                create_table_query = create_table[main_table_name].format(table_name=table_name)
                logging.info('Creating table with query: {} (dry_run: {})'.format(create_table_query, self.Parameters.dry_run))
                if not self.Parameters.dry_run:
                    replicaset.execute(create_table_query)
                    time.sleep(self.Parameters.query_delay)
                else:
                    self.set_info(create_table_query)
                self.Context.created_tables.append(table_name)

    def cleaning_mode(self, replicaset, start_time):
        tables_to_remove = self.find_tables_to_remove(replicaset, start_time)
        for table_name in tables_to_remove:
            drop_table_query = drop_table.format(table_name=table_name)
            logging.info('Droping table with query: {} (dry_run: {})'.format(drop_table_query, self.Parameters.dry_run))
            if not self.Parameters.dry_run:
                replicaset.execute(drop_table_query)
                time.sleep(self.Parameters.query_delay)
            else:
                self.set_info(drop_table_query)
            self.Context.droped_tables.append(table_name)

    @staticmethod
    def current_timestamp():
        return datetime.datetime.now().strftime('%Y-%m-%d %H:%M')

    def generate_table_names(self, start_time):
        suffixes = []

        end = start_time + datetime.timedelta(hours=self.Parameters.generation_for_hours)
        for period in map(int, self.Parameters.periods):
            start = start_time
            while start < end:
                suffixes.append('{}_{}m'.format(start.strftime('%Y_%m_%d_%H_%M'), period))
                start = start + datetime.timedelta(minutes=period)

        tables = collections.defaultdict(list)
        for table_name in self.Parameters.main_table_names:
            for suffix in suffixes:
                tables[table_name].append('{}_{}'.format(table_name, suffix))
        return tables

    def find_tables_to_remove(self, replicaset, start_time):
        tables_to_remove = []

        existing_tables = set()
        for output in replicaset.execute(show_tables).values():
            for record in output:
                existing_tables.add(record[0])

        for table_name in existing_tables:
            matched = TABLE_PTRN.match(table_name)
            if not matched:
                continue
            table_name_parts = matched.groupdict()

            if table_name_parts['main_name'] not in self.Parameters.main_table_names:
                continue
            elif table_name_parts['period'] not in self.Parameters.periods:
                continue

            table_create_date = datetime.datetime(
                year=int(table_name_parts['year']),
                month=int(table_name_parts['month']),
                day=int(table_name_parts['day']),
                hour=int(table_name_parts['hour']),
                minute=int(table_name_parts['minute']),
                second=0
            )

            days_deley = (start_time - table_create_date).days
            sec_delay = (start_time - table_create_date).seconds
            hours_deley = days_deley * 24 + sec_delay // 3600

            if hours_deley >= self.Parameters.remove_table_hours_delay:
                tables_to_remove.append(table_name)

        return tables_to_remove
