#!/usr/bin/env python
from itertools import chain

import logging
import time
import psycopg2
import psycopg2.extras
import nirvana.job_context as nv
import uuid
from psycopg2.extensions import string_types

PG_to_YT_type_mapper = {
    'DATE': 'string',
    'TIME': 'string',
    'DATETIME': 'string',
    'DATETIMETZ': 'string',
    'LONGINTEGER': 'int64',
    'INTEGER': 'int32',
    'STRING': 'utf8',
    'BINARY': 'string',
    'FLOAT': 'double',
    'BOOLEAN': 'boolean',
    'DECIMAL': 'double',
}


def retry(n_tries=3, exception_cls=Exception, sleep=0):
    def inner(f):
        def wrapped(*args, **kwargs):
            n = 0
            while True:
                try:
                    return f(*args, **kwargs)
                except exception_cls:
                    if n < n_tries:
                        n += 1
                        time.sleep(sleep)
                        continue
                    else:
                        raise
        return wrapped
    return inner


def datetime_to_iso(dt):
    if dt:
        return dt.isoformat()


converters = {
    'DATE': datetime_to_iso,
    'TIME': datetime_to_iso,
    'DATETIME': datetime_to_iso,
    'DATETIMETZ': datetime_to_iso,
}


def yt_type(column):
    return PG_to_YT_type_mapper.get(string_types[column.type_code].name, 'any')


def get_schema(cursor):
    return [{'name': column.name, 'type': yt_type(column)} for column in cursor.description]


def get_converter(column):
    return converters.get(string_types[column.type_code].name, lambda x: x)


def get_converters(cursor):
    return {column.name: get_converter(column) for column in cursor.description}


def ResultIter(cursor_buffered, converts):
    for result in cursor_buffered:
        yield {k: converts[k](v) for k, v in result.items()}


class Greenplum(object):
    def __init__(self, server, port, database, user, token):
        connection_string = 'postgresql://{}:{}@{}:{}/{}'.format(user, token, server, port, database)
        self.connection = psycopg2.connect(dsn=connection_string)

    def run(self, query, chunk_size):
        cursor = self.connection.cursor(cursor_factory=psycopg2.extras.DictCursor, name=uuid.uuid4().hex)
        cursor.itersize = chunk_size
        cursor.execute(query)
        return cursor

    def commit(self):
        self.connection.commit()

    def close(self):
        self.connection.close()


def write_to_yt(parameters, cursor):
    import yt.wrapper as yt

    append = parameters['yt-append']
    yt_client = yt.YtClient(
        token=parameters['yt-token'],
        proxy=parameters['yt-proxy'],
    )
    table = yt.TablePath(parameters['yt-table'], append=append)

    # move cursor to read cursor description
    first_row = next(cursor)

    result = ResultIter(chain([first_row], cursor), get_converters(cursor))
    schema = get_schema(cursor)
    with yt_client.Transaction():
        yt_client.create('table', table, attributes={'schema': schema, 'optimize_for': 'scan'}, force=True)
        yt_client.write_table(table, result)


def main():
    job_context = nv.context()
    parameters = job_context.get_parameters()
    inputs = job_context.get_inputs()

    with open(inputs.get('query')) as f:
        query = f.read()

    @retry(n_tries=parameters['n-tries'], sleep=10, exception_cls=psycopg2.OperationalError)
    def run(query, parameters):
        cursor = None
        greenplum = None
        try:
            greenplum = Greenplum(
                server=parameters['server'],
                port=parameters['port'],
                database=parameters['database'],
                user=parameters['user'],
                token=parameters['token']
            )

            cursor = greenplum.run(query, chunk_size=parameters['chunk-size'])
            if parameters.get('yt-table'):
                write_to_yt(parameters, cursor)
        finally:
            if cursor:
                cursor.close()
            if greenplum:
                greenplum.close()

    run(query, parameters)

if __name__ == '__main__':
    main()
