# -*- coding: utf-8 -*-

import logging
import time

# Main API
import happybase
from happybase.hbase.ttypes import AlreadyExists

# Happybase does not provide any exception-wrapping,
# so we have to use native Thrift stuff
from thrift.transport import TTransport

log = logging.getLogger(__name__)


class RecordAccumulator:
    def __init__(self):
        self.storage = {}

    def append(self, table, data):
        try:
            self.storage[table].append(data)
        except KeyError:
            self.storage[table] = []
            self.storage[table].append(data)

    def size(self, table=False):
        if not table:
            summ = 0
            for table in self.storage.keys():
                summ += len(self.storage.get(table, 0))
            return summ
        else:
            return len(self.storage.get(table, 0))

    def get_tables(self):
        return self.storage.keys()

    def data(self, table):
        return self.storage.get(table, [])

    def dump_all(self):
        return self.storage

    def flush(self):
        # Flush cache by unsetting the storage variable.
        self.storage = {}


class StorageBackend(object):
    """
    This abstract class is inherited by all storage backends.
    Its purpose is to demonstrate the public interface.
    """
    def accumulate(self, **data):
        raise NotImplementedError

    def commit(self):
        raise NotImplementedError


class MockBase(StorageBackend):
    """
    This is a testing backend. It just prints the results.
    """
    def __init__(self, output=None, dictionary=None, *args, **kwargs):
        self.out = output
        self.ac = RecordAccumulator()
        self.d = dictionary

    def _output_batch(self):
        for table in self.ac.get_tables():
            for row in self.ac.data(table):
                key, quals = row
                self.out.write(unicode([table, key, quals]))

    def accumulate(self, table, key, data):
        self.ac.append(table, [key, data])
        if isinstance(self.out, file):
            self.out.write(unicode([table, key, data]))

    def commit(self):
        self.d.update(self.ac.dump_all())
        # Only print if we have a destination.
        if isinstance(self.out, file):
            self._output_batch()


class Hbase(StorageBackend):
    """
    This class provides transport to hbase using Thrift.
    """
    def __init__(self, host=None, **kwargs):
        me = "%s.%s()" % (self.__class__, self.__class__.__name__)
        log.debug("%s invoked." % me)
        # This is the only required arg.
        self.host = host
        if self.host is None:
            raise TypeError("%s: host argument is mandatory" % me)

        # Set defaults for optional args.
        self.port = 9090
        self.timeout = 500  # in Milliseconds
        self.ttl = 1210000  # default table TTL: 2 weeks in seconds.
        self.table_create_opts = {}
        self.schema = {}    # Tables currently present on the server
        for key in ('host', 'port', 'timeout', 'ttl', 'schema'):
            setattr(self, key, kwargs.get(key, getattr(self, key)))
        # accumulator object
        self.ac = RecordAccumulator()
        # Establish conection.
        self.cn = self._connect()
        self.existing_tables = self.cn.tables()

    def _set_table(self, table_name):
        table = False
        try:
            table = self.cn.table(table_name)
        except Exception as e:
            log.error(
                "Error trying to set active DB to {db}: {ex}".format(
                    ex=unicode(e),
                    db=table_name
                )
            )
        return table

    def _connect(self):
        connection = False

        while not connection:
            try:
                log.debug("trying to connect to thrift gateway, at host %s, port %s" % (self.host, self.port))
                connection = happybase.Connection(
                    host=self.host,
                    port=self.port,
                    timeout=self.timeout,
                    transport='framed',
                    autoconnect=False
                )
                # autoconnect=False requires manual connection establishment.
                connection.open()
                log.debug("will try to list tables to test new connection.")
                log.debug("following tables are available: %s" % connection.tables())
                self.tables = connection.tables()

            # Both timeout and closed port trigger this one.
            except TTransport.TTransportException as e:
                log.error("thrift transport connect error: %s", e)
                connection = False
                time.sleep(1)
            # Broken Pipe and friends.
            except IOError as e:
                log.error("thrift transport IO error: %s", e)
                connection = False
                time.sleep(1)
            # XXX: Do I really need to catch it here?
            except Exception as e:
                log.error("connect exception: %s" % e.__class__.__name__)
                connection = False
                time.sleep(1)

        return connection

    def _reconnect(self):
        # attempt closing existing connection
        try:
            self.cn.close()
        except Exception:
            pass
        # open new one
        self.cn = False
        self.cn = self._connect()

    def _commit_batch(self):

        # dont bother with an empty batch
        if self.ac.size() == 0:
            return True
        start_commit = time.time()
        # For each table there is a separate batch() object:
        # This is because the batch() method of Happybase is unable
        # to write to multiple tables in one pass.
        for table in self.ac.get_tables():
            # If table is not present on the server, try creating it.
            if table not in self.existing_tables:
                self.create_table(name=table, **self.schema.get(table, {}))
            # Try sending data indefinitely.
            while True:
                try:
                    # create an instance of HappyBase batch object.
                    # The context manager will take care of sending the data,
                    # once the block is exited.
                    with self._set_table(table).batch() as batch:
                        # iterate data for each table.
                        # row is a list(), where [0] = rowkey, and
                        # [1] = column quals (cf-prepended) and their values.
                        for row in self.ac.data(table):
                            batch.put(*row)

                except IOError as e:
                    # Thrift throws back IOError on just about every occasion.
                    # One will have remote server exception description, e.g.:
                    # "Failed 1 action: yrpop_suid_sessions: 1 time"
                    log.error("thrift transport IO error: %s", e)
                    # if e.errno == errno.EPIPE: # Broken Pipe
                    self._reconnect()

                except TTransport.TTransportException as e:
                    # It seems the connection has been severed.
                    # retry?
                    log.error("thrift transport error: %s", e)
                    self._reconnect()

                except Exception as e:
                    # We re screwed... Dunno what to do.
                    log.error("error while forming hbase batch, table '%s': %s", table, unicode(e))
                    # abandon this batch.
                    return False
                else:
                    # If we made this far, that means all seems to be in order.
                    break
                time.sleep(1)
        # Flush the record accumulator.
        log.debug('commit batch: {sz} records, tables {tb}, time: {tt:.2f}s'.format(sz=self.ac.size(), tb=self.ac.get_tables(), tt=(time.time()-start_commit)))
        self.ac.flush()

        return True

    # Public API
    def accumulate(self, table, key, data):
        self.ac.append(table, [key, data])

    def commit(self):
        self._commit_batch()

    def create_table(self, name=None, cf='cf', **kwargs):
        if name is None:
            raise TypeError('name argument is required')
        # default options:
        options = {
            'max_versions': 1,
            'compression': 'LZ4',
            'in_memory': False,
            'bloom_filter_type': 'ROW',
            'block_cache_enabled': True,
            'time_to_live': 1210000,  # 2 weeks in seconds.
            }
        for key in options.keys():
            options[key] = kwargs.get(key, options[key])
        try:
            log.warning('creating table "{t}", cf "{c}", options {opt}'.format(t=name, c=cf, opt=options))
            self.cn.create_table(name=name, families={cf: options})
            if name not in self.cn.tables():
                raise Exception('table was not created'.format(name))
        except AlreadyExists:
            pass
        except Exception as e:
            log.exception('error creating table {t}: {e}'.format(t=name, e=e))
        self.existing_tables = self.cn.tables()
        log.debug('tables present: {}'.format(self.cn.tables()))
