from __future__ import unicode_literals

import logging
from datetime import datetime

from six import text_type

from sandbox import sdk2
from sandbox.projects.avia.lib.logs import configure_logging, get_sentry_dsn
from sandbox.projects.avia.lib.yt_helpers import YtClientFactory, tables_for_daterange
from sandbox.sandboxsdk.environments import PipEnvironment

DATE_REGEXP = '[0-9]{4}-[0-9]{2}-[0-9]{2}'


class AviaBookingLogDataCleaner(sdk2.Task):
    class Requirements(sdk2.Task.Requirements):
        # https://wiki.yandex-team.ru/sandbox/clients/#client-tags-multislot
        cores = 1  # exactly 1 core
        ram = 100  # 8GiB or less

        class Caches(sdk2.Requirements.Caches):
            pass  # means that task do not use any shared caches

        environments = (
            PipEnvironment('raven'),
            PipEnvironment('yandex-yt', version='0.10.8'),
            PipEnvironment('yandex-yt-yson-bindings-skynet', version='0.3.32-0'),
        )

    class Parameters(sdk2.Task.Parameters):
        with sdk2.parameters.Group('Backup parameters') as backup_block:
            left_date = sdk2.parameters.StrictString('Start date', regexp=DATE_REGEXP)
            right_date = sdk2.parameters.StrictString('Stop date', regexp=DATE_REGEXP)
            vaults_owner = sdk2.parameters.String('Vaults owner', required=True, default_value='AVIA')
            partner_code = sdk2.parameters.String('Partner\'s code', required=True)
            yt_proxy = sdk2.parameters.String('Reference YT proxy', required=True, default_value='hahn')
            yt_partner_booking_root = sdk2.parameters.String(
                'YT partner booking root', required=True, default_value='//home/avia/logs/avia-partner-booking-log'
            )
            yt_partner_booking_root_backup = sdk2.parameters.String(
                'YT partner booking root BACKUP directory', required=True
            )

    def on_prepare(self):
        configure_logging(
            sentry_dsn=get_sentry_dsn(self)
        )
        self.logger = logging.getLogger(__name__)
        self.left_date = datetime.strptime(text_type(self.Parameters.left_date), '%Y-%m-%d').date()
        self.right_date = datetime.strptime(text_type(self.Parameters.right_date), '%Y-%m-%d').date()
        self.yt = YtClientFactory.create(
            self.Parameters.yt_proxy,
            sdk2.Vault.data(self.Parameters.vaults_owner, 'YT_TOKEN')
        )

    def validate_yt_paths(self, yt_partner_booking_root, yt_partner_booking_root_backup):
        # type: (text_type, text_type) -> None
        if not yt_partner_booking_root:
            raise ValueError('Invalid yt_partner_booking_root source')

        if not yt_partner_booking_root_backup:
            raise ValueError('Invalid yt_partner_booking_root backup')

        if yt_partner_booking_root == yt_partner_booking_root_backup:
            raise ValueError('Source path is also a target: %s' % yt_partner_booking_root)

        yc = self.yt
        if not yc.exists(yt_partner_booking_root):
            raise IOError('yt_partner_booking_root source doesn\'t exist: %s' % yt_partner_booking_root)
        if not yc.exists(yt_partner_booking_root_backup):
            raise IOError(
                'yt_partner_booking_root backup directory doesn\'t exist: %s. Please create it first' %
                yt_partner_booking_root_backup
            )

        if list(yc.search(yt_partner_booking_root_backup, node_type='table')):
            raise IOError(
                'Backup directory %s is not empty. For safety reasons, please point to an empty directory' %
                yt_partner_booking_root_backup
            )

    def do_backup(self, source_path, backup_dir):
        # type: (text_type, text_type) -> text_type
        self.logger.info('Creating backup from %s to %s', source_path, backup_dir)

        from yt.wrapper.ypath import ypath_join
        yc = self.yt

        table_name = source_path.split('/')[-1]
        backup_table_path = ypath_join(backup_dir, table_name)
        if yc.exists(backup_table_path):
            raise IOError(
                'Backup table %s already exist. '
                'For safety reasons, please make sure you\'re backing up into a correct spot' %
                backup_table_path
            )
        self.logger.info('Start: Copy %s -> %s', source_path, backup_table_path)
        yc.copy(source_path, backup_table_path)
        self.logger.info('Done: Copy %s -> %s', source_path, backup_table_path)

        return backup_table_path

    def copy_table_without_partner(self, backup_table_path, target_table_path, partner_code):
        self.logger.info(
            'Filetring partner %s while copying from backup %s to %s',
            partner_code, backup_table_path, target_table_path
        )

        yc = self.yt
        rows = []
        for row in yc.read_table(backup_table_path):
            if 'partner' not in row:
                raise KeyError('Incorrect table spec. No partner_code column')
            if row['partner'] == partner_code:
                continue
            rows.append(row)
        self.logger.info(
            'Done reading from %s. About to write %d rows of data to %s',
            backup_table_path,
            len(rows),
            target_table_path,
        )
        yc.write_table(target_table_path, rows)

    def on_execute(self):
        self.logger.info(
            'Cleaning booking log [%s] for partner %s %s - %s',
            self.Parameters.yt_partner_booking_root,
            self.Parameters.partner_code,
            self.left_date,
            self.right_date,
        )

        self.validate_yt_paths(
            text_type(self.Parameters.yt_partner_booking_root),
            text_type(self.Parameters.yt_partner_booking_root_backup)
        )

        for table in tables_for_daterange(
            self.yt, self.Parameters.yt_partner_booking_root,
            self.left_date, self.right_date,
        ):
            self.logger.info('Table %s', table)
            backup_table_path = self.do_backup(table, text_type(self.Parameters.yt_partner_booking_root_backup))
            if not backup_table_path:
                self.logger.warning('Could not backup data. Aborting')
                return
            self.copy_table_without_partner(backup_table_path, table, self.Parameters.partner_code)

        self.logger.info('Done cleaning')
