import logging
import re
import time

from os.path import join
from sandbox.projects.common.utils import get_or_default
from sandbox.sandboxsdk import errors, parameters


REDUCE_SCRIPT_NAME = './antispam_ban_diff_reduce.py'
REDUCE_SCRIPT_CONTENT = """
import sys

TEXT_EOL = '\\n'
TEXT_TAB = '\\t'

base_key = ''
for line in sys.stdin:
    parts = line.rstrip(TEXT_EOL).split(TEXT_TAB)
    current_key = parts[1]
    if parts[0] == '1':
        if current_key != base_key:
            print TEXT_TAB.join(parts[1:])
    else:
        base_key = current_key
"""


def get_attr(client, path, attr, default_value=None):
    attr_path = path + '/@' + attr
    if not client.exists(attr_path):
        return default_value
    return client.get(attr_path)


def set_attr(client, path, attr, value):
    if not client.exists(path):
        raise errors.SandboxTaskFailureError('Set attribute on invalid path {}'.format(path))
    client.set(join(path + '/@' + attr), value)


class AntispamExportDirParameter(parameters.SandboxStringParameter):
    name = 'antispam_export_dir'
    description = 'Antispam export dir'
    default_value = '//home/antispam/export/images/features'
    required = True


class IndexDirParameter(parameters.SandboxStringParameter):
    name = 'index_dir'
    description = 'Index dir'
    default_value = '//home/images/index'
    required = True


class ArchiveDirParameter(parameters.SandboxStringParameter):
    name = 'archive_dir'
    description = 'Archive dir'
    default_value = '//home/imgdev/images/antispam/archive'


class ArchiveDepthParameter(parameters.SandboxIntegerParameter):
    name = 'archive_depth'
    description = 'Archive depth'
    default_value = 5


class CurrentBanTableParameter(parameters.SandboxStringParameter):
    name = 'current_ban_table'
    description = 'Current ban table to override'


class BaseBanTableParameter(parameters.SandboxStringParameter):
    name = 'base_ban_table'
    description = 'Base ban table to override'


class DisableBuildDiff(parameters.SandboxBoolParameter):
    name = 'disable_build_diff'
    description = 'Disable build of ban diff, use specified ban table (YT table) as is'
    default_value = False


class UseArchiveInsteadOfBaseTable(parameters.SandboxBoolParameter):
    name = 'use_archive_instead_of_base_table'
    description = 'Use archive copy if base ban table doesnt exist'
    default_value = True


class BanDiffParams:
    antispam_dir = ''
    index_dir = ''
    diff_table = ''
    current_ban_table = ''
    excluded_ban_table = ''
    archive_dir = ''
    archive_depth = 0
    disable = False
    use_archive = True

    def get_from_ctx(self, ctx, diff_table_name_param):
        self.antispam_dir = get_or_default(ctx, AntispamExportDirParameter)
        self.index_dir = get_or_default(ctx, IndexDirParameter)
        self.diff_table = diff_table_name_param
        self.archive_dir = get_or_default(ctx, ArchiveDirParameter)
        self.archive_depth = get_or_default(ctx, ArchiveDepthParameter)
        self.current_ban_table = get_or_default(ctx, CurrentBanTableParameter)
        self.excluded_ban_table = get_or_default(ctx, BaseBanTableParameter)
        self.disable = get_or_default(ctx, DisableBuildDiff)
        self.use_archive = get_or_default(ctx, UseArchiveInsteadOfBaseTable)

    @staticmethod
    def create_input():
        return (AntispamExportDirParameter, IndexDirParameter, ArchiveDirParameter, ArchiveDepthParameter, CurrentBanTableParameter, BaseBanTableParameter, DisableBuildDiff, UseArchiveInsteadOfBaseTable)


class BanDiff:
    antispam_export = None
    index = None
    archiver = None

    def __init__(self, client, params):
        self.client = client
        self.params = params

    class StateChecker:

        @staticmethod
        def check_state(state):
            return isinstance(state, str) and re.match('^[0-9]{8}-[0-9]{6}$', state) is not None

        @staticmethod
        def adjust(state):
            return state.encode('utf-8') if isinstance(state, unicode) else state

    class AntispamExport(StateChecker):

        def __init__(self, client, params):
            self.dir = params.antispam_dir
            if not client.exists(self.dir) or get_attr(client, self.dir, 'type') != 'map_node':
                raise errors.SandboxTaskFailureError('Invalid antispam export path {}'.format(self.dir))
            attribute_state = self.adjust(get_attr(client, self.dir, 'current_state'))
            if self.check_state(attribute_state):
                logging.info('Get current state {} from attributes of {}'.format(attribute_state, self.dir))
                self.current_state = attribute_state
            else:
                raise errors.SandboxTaskFailureError('Invalid state data in attributes of {}'.format(self.dir))

        def get_current_state(self):
            return self.current_state

        def get_ban_table(self, state=None):
            if not state:
                state = self.current_state
            return join(self.dir, state, 'state_owner_bans')

    class IndexStates(StateChecker):

        def __init__(self, client, params):
            self.dir = params.index_dir
            if not client.exists(self.dir) or get_attr(client, self.dir, 'type') != 'map_node':
                raise errors.SandboxTaskFailureError('Invalid images index path {}'.format(self.dir))
            attribute_state = self.adjust(get_attr(client, self.dir, 'production'))
            if self.check_state(attribute_state):
                logging.info('Get current state {} from attributes of {}'.format(attribute_state, self.dir))
                self.current_state = attribute_state
            else:
                logging.error('Invalid state data in attributes of {}; trying read state from table'.format(self.dir))
                import yt.wrapper as yt
                state_table = join(self.dir, 'production')
                read_state = None
                for row in client.read_table(state_table, format=yt.JsonFormat()):
                    read_state = self.adjust(row.get('key'))
                    break
                if self.check_state(read_state):
                    logging.info('Get current state {} from table {}'.format(read_state, state_table))
                    self.current_state = read_state
                else:
                    raise errors.SandboxTaskFailureError('Invalid state data in table {}'.format(state_table))

        def get_current_state(self):
            return self.current_state

        def get_antispam_ban_state(self, client, state=None):
            if not state:
                state = self.current_state
            state_dir = join(self.dir, state)
            attrispam_ban_state = get_attr(client, state_dir, '_control_base/ANTISPAM_BAN_STATE')
            if self.check_state(attrispam_ban_state):
                logging.info('Get antispam ban state {} from attributes of {}'.format(attrispam_ban_state, state_dir))
                return attrispam_ban_state
            raise errors.SandboxTaskFailureError('Invalid antispam ban state data in attributes of {}'.format(state_dir))

    class Archiver:

        class StateInfo:
            dir_path_exists = False
            dir_valid = False
            index_state = ''
            timestamp = 0,
            table_path_exists = False
            table_valid = False
            row_count = 0
            full_valid = False

            @staticmethod
            def state_dir(archive_dir, state):
                return join(archive_dir, state)

            @staticmethod
            def state_table(archive_dir, state):
                return join(archive_dir, state, 'state_owner_bans')

            def __init__(self, client, archive_dir, state):
                dir_path = self.state_dir(archive_dir, state)
                table_path = self.state_table(archive_dir, state)
                self.state = state
                if not client.exists(dir_path):
                    return
                self.dir_path_exists = True
                if get_attr(client, dir_path, 'type') != 'map_node':
                    return
                self.dir_valid = True
                self.index_state = get_attr(client, dir_path, 'INDEX_STATE', '')
                self.timestamp = get_attr(client, dir_path, 'TIMESTAMP', 0)
                if not client.exists(table_path):
                    return
                self.table_path_exists = True
                if get_attr(client, table_path, 'type') != 'table':
                    return
                self.table_valid = True
                self.row_count = get_attr(client, table_path, 'row_count', 0)
                self.full_valid = self.row_count > 0

            def get_state_dir(self, archive_dir):
                return self.state_dir(archive_dir, self.state)

            def get_state_table(self, archive_dir):
                return self.state_table(archive_dir, self.state)

        def __init__(self, client, params):
            self.dir = params.archive_dir
            if not client.exists(self.dir) or get_attr(client, self.dir, 'type') != 'map_node':
                raise errors.SandboxTaskFailureError('Invalid archiver path {}'.format(self.dir))
            self.depth = params.archive_depth
            self.state_list = [x for x in client.list(self.dir) if get_attr(client, join(self.dir, x), 'type') == 'map_node']

        def get_state_info(self, client, state):
            return self.StateInfo(client, self.dir, state)

        def remove(self, client, state_info):
            if not state_info.dir_path_exists:
                return True
            if state_info.table_path_exists:
                logging.info('Archiver update: removing table {}'.format(state_info.get_state_table(self.dir)))
                client.remove(state_info.get_state_table(self.dir))
            client.remove(state_info.get_state_dir(self.dir))

        def pack(self, client, ts):
            archive_info = [self.get_state_info(client, x) for x in self.state_list]
            old_index_timestamps = sorted([x.timestamp for x in archive_info if x.full_valid and x.index_state and x.timestamp < ts])
            min_depth = max(1, self.depth)
            if len(old_index_timestamps) <= min_depth:
                logging.info('Archiver update: pack skipped because of small size')
                return
            last_saved_timestamp = old_index_timestamps[-min_depth]
            for removing_state in archive_info:
                if removing_state.timestamp < last_saved_timestamp and removing_state.full_valid:
                    self.remove(client, removing_state)

        def archive(self, client, state, ban_table, index_state=''):
            source_row_count = get_attr(client, ban_table, 'row_count', 0)
            state_info = self.get_state_info(client, state)
            if state_info.full_valid and state_info.row_count == source_row_count:
                logging.info('Archiver update: state {} already archived'.format(state))
                if index_state and index_state != state_info.index_state:
                    logging.info('Archiver update: replace index state with {} for state {}'.format(index_state, state))
                    set_attr(client, state_info.get_state_dir(self.dir), 'INDEX_STATE', index_state)
                    self.pack(client, state_info.timestamp)
                return
            ts = int(time.time())
            if not state_info.dir_path_exists:
                logging.info('Archiver update: creating map node {}'.format(state_info.get_state_dir(self.dir)))
                client.create('map_node', path=state_info.get_state_dir(self.dir), attributes={'INDEX_STATE': index_state, 'TIMESTAMP': ts})
            elif not state_info.dir_valid:
                raise errors.SandboxTaskFailureError('Invalid type of archiver state dir {}'.format(state_info.get_state_dir(self.dir)))
            logging.info('Archiver update: copying {}'.format(ban_table))
            client.copy(ban_table, state_info.get_state_table(self.dir), force=True)
            logging.info('Archiver update: ban archived in {}'.format(state_info.get_state_table(self.dir)))
            if not index_state:
                logging.info('Archiver update: pack skipped for current antispam export state')
                return
            self.pack(client, ts)

    def get_antispam_export(self):
        if not self.antispam_export:
            self.antispam_export = self.AntispamExport(self.client, self.params)
        return self.antispam_export

    def get_index(self):
        if not self.index:
            self.index = self.IndexStates(self.client, self.params)
        return self.index

    def get_archiver(self):
        if not self.archiver:
            if not self.params.archive_dir:
                return None
            self.archiver = self.Archiver(self.client, self.params)
        return self.archiver

    def archive(self, state, ban_table, index_state=''):
        if self.get_archiver():
            self.get_archiver().archive(self.client, state, ban_table, index_state)

    def get_base_ban_info(self):
        current_index_state = self.get_index().get_current_state()
        base_state = self.get_index().get_antispam_ban_state(self.client, current_index_state)
        base_ban_table = self.get_antispam_export().get_ban_table(base_state)
        return current_index_state, base_state, base_ban_table

    def resolve_base_table(self):
        if self.params.excluded_ban_table:
            return
        current_index_state, base_state, base_ban_table = self.get_base_ban_info()
        if self.client.exists(base_ban_table) and get_attr(self.client, base_ban_table, 'type') == 'table':
            return

        logging.error('Current ban table {} not found'.format(base_ban_table))

        failure_info = ''
        if self.params.use_archive:
            if self.get_archiver():
                state_info = self.get_archiver().get_state_info(self.client, base_state)
                if state_info.table_valid:
                    archived_table = state_info.get_state_table(self.params.archive_dir)
                    logging.error('Task parameter Base ban table to override was replaced with {}'.format(archived_table))
                    self.params.excluded_ban_table = archived_table
                    return
                failure_info = 'base ban table is not archived'
            else:
                failure_info = 'archive parameters are not specified'
        else:
            failure_info = 'usage of archive copy is not allowed'

        raise errors.SandboxTaskFailureError('Cannot use archive copy because of {}'.format(failure_info))

    def make_version(self):
        if self.params.disable:
            return ''
        new_ban_table = self.params.current_ban_table
        if not new_ban_table:
            current_state = self.get_antispam_export().get_current_state()
            new_ban_table = self.get_antispam_export().get_ban_table(current_state)
        new_ban_table_time = get_attr(self.client, new_ban_table, 'modification_time')

        self.resolve_base_table()
        base_ban_table = self.params.excluded_ban_table
        if not base_ban_table:
            current_index_state, base_state, base_ban_table = self.get_base_ban_info()
        base_ban_table_time = get_attr(self.client, base_ban_table, 'modification_time')
        return '\t'.join([new_ban_table, base_ban_table, new_ban_table_time, base_ban_table_time])

    def make_diff(self):
        if self.params.disable:
            logging.info('Skipped ban diff build: disabled')
            return
        new_ban_table = self.params.current_ban_table
        if new_ban_table:
            logging.info('Overriding current ban table: {}'.format(new_ban_table))
        else:
            current_state = self.get_antispam_export().get_current_state()
            new_ban_table = self.get_antispam_export().get_ban_table(current_state)
            self.archive(current_state, new_ban_table)

        self.resolve_base_table()
        base_ban_table = self.params.excluded_ban_table
        if base_ban_table:
            logging.info('Overriding base ban table: {}'.format(base_ban_table))
        else:
            current_index_state, base_state, base_ban_table = self.get_base_ban_info()
            self.archive(base_state, base_ban_table, current_index_state)

        if new_ban_table == base_ban_table:
            raise errors.SandboxTaskFailureError('Current and base ban tables are identical: {}'.format(base_ban_table))

        logging.info('Build ban diff in {}'.format(self.params.diff_table))
        with open(REDUCE_SCRIPT_NAME, 'w') as f:
            f.write(REDUCE_SCRIPT_CONTENT)
        import yt.wrapper as yt
        self.client.run_reduce('python {}'.format(REDUCE_SCRIPT_NAME), source_table=[base_ban_table, new_ban_table],
                               destination_table=self.params.diff_table,
                               input_format=yt.SchemafulDsvFormat(columns=['key'], enable_table_index=True),
                               output_format=yt.SchemafulDsvFormat(columns=['key']),
                               reduce_by=['key'], sort_by=['key'], local_files=[REDUCE_SCRIPT_NAME])
        logging.info('Ban diff built successfully')
