# -*- coding: utf-8 -*-
import datetime
import logging
import re
import sys
from argparse import ArgumentParser
from typing import Dict

from library.python import resource
from travel.library.python.tools import replace_args_from_env
from yt.wrapper import YtClient

from travel.hotels.tools.dataset_curator.auto_build_context_creator import create_auto_build_context
from travel.hotels.tools.dataset_curator.config_parsing import ConfigParser
from travel.hotels.tools.dataset_curator.data import Dataset
from travel.hotels.tools.dataset_curator.dataset_building import DatasetBuilder
from travel.hotels.tools.dataset_curator.dataset_cleanup import DatasetCleanupRunner
from travel.hotels.tools.dataset_curator.datasets.registry import get_registry_for_use
from travel.hotels.tools.dataset_curator.schedules import OnChangeValidationSchedule
from travel.hotels.tools.dataset_curator.validation_running import ValidationConfiguration, DatasetValidationRunner
from travel.hotels.tools.dataset_curator.validators import SchemaValidator, get_dataset_custom_validations, DatasetDirectoryStructureValidator
from travel.hotels.tools.dataset_curator.yt_storage import ValidationInfoYtStorage
from travel.hotels.tools.dataset_curator.sb_planner_plan_builder import SbPlannerPlanBuilder

LOG = logging.getLogger(__name__)


class Runner:
    def __init__(self, args):
        self.yt_clients = {}
        self.args = args

    def _parse_time_interval(self, interval_str) -> datetime.timedelta:
        match = re.fullmatch('(\\d+):(\\d{2})', interval_str)
        if not match:
            raise ValueError(f'Can\'t parse time interval: {interval_str}')
        return datetime.timedelta(hours=int(match.group(1)), minutes=int(match.group(2)))

    def _get_yt_client(self, yt_cluster: str) -> YtClient:
        if yt_cluster not in self.yt_clients:
            self.yt_clients[yt_cluster] = YtClient(proxy=yt_cluster, token=self.args.yt_token)
        return self.yt_clients[yt_cluster]

    def _load_datasets(self, env=None) -> Dict[str, Dataset]:
        parser = ConfigParser(self.args.env if env is None else env)
        parser.load_types(resource.find('types.yaml'))
        datasets = parser.parse_datasets([v for k, v in resource.iteritems() if k.endswith('datasets.yaml')])
        return datasets

    def _prepare_aux_data_dir(self):
        client = YtClient(self.args.aux_data_yt_proxy, self.args.yt_token)
        client.create('map_node', self.args.aux_data_yt_dir, recursive=True, ignore_existing=True)

    def run_build(self):
        dataset_builders_registry = get_registry_for_use()

        dataset = self._load_datasets()[self.args.dataset]

        if self.args.dataset in dataset_builders_registry.builders and dataset.build is not None and dataset.build.builder is not None:
            raise Exception(f"Builder for {self.args.dataset} defined in code and config simultaneously")

        if self.args.dataset in dataset_builders_registry.builders:
            inner_build_context = dataset_builders_registry.builders[self.args.dataset]
        else:
            inner_build_context = create_auto_build_context(dataset, 'hahn')  # TODO

        builder_args = dict()
        for arg in self.args.builder_args:
            key, value = arg.split('=', 2)
            builder_args[key] = value

        self._prepare_aux_data_dir()
        builder = DatasetBuilder(self.args.tmp_dir, self.args.aux_data_yt_dir, self.args.aux_data_yt_proxy, self.args.yt_token, self.args.yql_token, not self.args.no_latest)
        builder.build_dataset(dataset, inner_build_context, builder_args)

    def run_validations(self):
        datasets = self._load_datasets()

        deadline = datetime.datetime.utcnow() + self._parse_time_interval(self.args.time_limit) if self.args.time_limit is not None else None
        if deadline is not None:
            LOG.info(f'Deadline is {deadline} (utc)')
        self._prepare_aux_data_dir()
        validation_info_storage = ValidationInfoYtStorage(self.args.aux_data_yt_dir, self.args.aux_data_yt_proxy, self.args.yt_token)
        try:
            for dataset_name, dataset in datasets.items():
                LOG.info(f'Processing dataset: {dataset_name}')

                common_validations = [
                    ValidationConfiguration('schema', SchemaValidator(), OnChangeValidationSchedule(validation_info_storage)),
                    ValidationConfiguration('directory_structure', DatasetDirectoryStructureValidator(), OnChangeValidationSchedule(validation_info_storage)),
                ]
                custom_validations = [
                    ValidationConfiguration(id, validation, OnChangeValidationSchedule(validation_info_storage))
                    for id, validation in get_dataset_custom_validations(dataset).items()
                ]
                validation_runner = DatasetValidationRunner(common_validations + custom_validations, self.args.yt_token, self.args.aux_data_yt_dir, self.args.aux_data_yt_proxy, deadline)
                validation_runner.run_validations(dataset, validation_info_storage.sync)

                if deadline is not None and datetime.datetime.utcnow() >= deadline:
                    LOG.info('Stopping all because of time limit')
                    break
        finally:
            validation_info_storage.sync()

    def run_cleanup(self):
        LOG.info('Running cleanup...')
        runner = DatasetCleanupRunner(self.args.yt_token)

        def do_cleanup(dataset: Dataset):
            runner.cleanup_dataset(dataset)

        datasets = self._load_datasets()
        if self.args.dataset is not None:
            do_cleanup(datasets[self.args.dataset])
        else:
            for name, dataset in datasets.items():
                do_cleanup(dataset)

        LOG.info('Cleanup done')

    def update_plan(self):
        datasets = self._load_datasets('testing')
        SbPlannerPlanBuilder().update_plan(datasets)


def main():
    parser = ArgumentParser()
    parser.add_argument('--verbose', action='store_true', default=False)

    subparsers = parser.add_subparsers(dest='mode')
    subparsers.required = True

    def add_common(mode):
        mode.add_argument('--env', choices=['testing', 'prod'], required=True)
        mode.add_argument('--yt-token', required=True)

    def add_aux_data_common(mode):
        mode.add_argument('--aux-data-yt-proxy', default='hahn')
        mode.add_argument('--aux-data-yt-dir', required=True)

    validate_mode = subparsers.add_parser("validate")
    validate_mode.set_defaults(func=Runner.run_validations)
    add_common(validate_mode)
    add_aux_data_common(validate_mode)
    validate_mode.add_argument('--time-limit', help='format: HH:MM')

    build_mode = subparsers.add_parser("build")
    build_mode.set_defaults(func=Runner.run_build)
    add_common(build_mode)
    add_aux_data_common(build_mode)
    build_mode.add_argument('--yql-token', required=False)
    build_mode.add_argument('--dataset', required=True)
    build_mode.add_argument('--tmp-dir', required=True)
    build_mode.add_argument('--no-latest', action='store_true', default=False)
    build_mode.add_argument('--builder-args', nargs='+', default=[])

    cleanup_mode = subparsers.add_parser("cleanup")
    cleanup_mode.set_defaults(func=Runner.run_cleanup)
    add_common(cleanup_mode)
    cleanup_mode.add_argument('--dataset')

    update_plan_mode = subparsers.add_parser("update-plan")
    update_plan_mode.set_defaults(func=Runner.update_plan)

    args = parser.parse_args(args=replace_args_from_env())

    logging.basicConfig(level=(logging.DEBUG if args.verbose else logging.INFO),
                        format="%(asctime)-15s | %(module)s | %(levelname)s | %(message)s",
                        stream=sys.stdout)
    logging.getLogger('yt.packages.urllib3.connectionpool').setLevel(logging.WARNING)

    args.func(Runner(args))


if __name__ == '__main__':
    main()
