# -*- coding: utf-8 -*-

from __future__ import print_function, absolute_import, division

import re
import pandas
import logging


class DetailedTableCleaner(object):
    def __init__(self, config):
        super(DetailedTableCleaner, self).__init__()
        self.config = config
        self.logger = logging.getLogger(__name__)

    def clean(self, table):
        method = {
            'tidy': self.__tidy,
            'erase': self.__erase
        }
        return method[self.config['strategy']](table.copy())

    def __tidy(self, table):
        for column, config in self.config['columns'].items():
            index = self.__filter_index(table, column, config)
            table.loc[~index, column] = config['trash_value']
        return table

    def __erase(self, table):
        index = reduce(
            lambda lhs, rhs: lhs & rhs,
            [
                self.__filter_index(table, column, config)
                for column, config in self.config['columns'].items()
            ]
        )
        return table[index]

    def __filter_index(self, table, column, config):
        method = {
            'nop': self.__nop_filter_index,
            'white_list': self.__white_list_filter_index,
            'black_list': self.__black_list_filter_index,
            'regexp': self.__regexp_filter_index
        }
        return method[config['type']](table, column, **config['params'])

    def __nop_filter_index(self, table, column):
        return table[column].apply(lambda value: True)

    def __white_list_filter_index(self, table, column, white):
        return table[column].isin(set(white))

    def __black_list_filter_index(self, table, column, black):
        return ~table[column].isin(set(black))

    def __regexp_filter_index(self, table, column, pattern):
        re_parser = re.compile(pattern)
        return table[column].apply(lambda value: re_parser.match(value) is not None)
