import os.path

import json
import click
import time
import uuid
import logging
from business_models.databases import greenplum
from gptransfer_client import GPTransferClient, GPTransferRequestTimeout, GPTransferException
import yt.wrapper as yt
import yt.yson as yson


logging.basicConfig(format='%(asctime)s %(levelname)s %(name)s %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)


yt_to_gp_type_mapping = {
    'string': 'varchar',
    'utf8': 'varchar',
    'boolean': 'bool',
    'double': 'float',
    'uint8': 'int8',
    'uint16': 'int8',
    'uint32': 'int8',
    'int8': 'int8',
    'int16': 'int8',
    'int32': 'int8',
    'int64': 'int8',
}



@click.command()
@click.argument('yt_path', type=str)
@click.argument('gp_path', type=str)
@click.argument('action_if_exists', type=str)
@click.option('--grant-operator', type=str)
@click.option('--grant-to', type=str)
@click.option('--column', '-c', multiple=True)
@click.option('--distributed-by', '-d')
def main(yt_path, gp_path, action_if_exists, grant_operator, grant_to, column, distributed_by):
    if action_if_exists not in ['truncate', 'append']:
        raise ValueError(action_if_exists)

    schema = yson.yson_to_json(yt.get(yt_path + '/@schema'))['$value']

    if column:
        columns = list(column)
    else:
        columns = [row['name'] for row in schema]

    logger.info(f'Columns for replication: {columns}')

    gp_schema = {}

    for row in schema:
        if row['name'] not in columns:
            continue

        if row['type'] not in yt_to_gp_type_mapping:
            raise RuntimeError(f"Unsupported YT type {row['type']} of column {row['name']}")

        gp_schema[row['name']] = yt_to_gp_type_mapping[row['type']]


    if not gp_schema:
        raise RuntimeError('Resulting table will have no columns, aborting')

    gp_types_str = ', '.join(f"{column} {type}" for column, type in gp_schema.items())
    distributed = "RANDOMLY" if not distributed_by else f"BY ({distributed_by})"

    create_query = f'''
    CREATE TABLE IF NOT EXISTS {gp_path} ({gp_types_str})
    DISTRIBUTED {distributed}
    ;
    
    GRANT SELECT, INSERT, TRUNCATE, UPDATE ON TABLE {gp_path} to "robot-taxi-stat";
    '''

    logger.info(create_query)
    greenplum(create_query)

    with open(os.path.expanduser('~/mylib_config.json')) as f:
        mylib_config = json.load(f)

    gptransfer_token = mylib_config['gptransfer_token']
    gp_token = mylib_config['gp_token']

    gptransfer_client = GPTransferClient(
        token=gptransfer_token,
        gp_user=os.environ['USER'],
        gp_password=gp_token,
        host='https://dwh-gptransfer-api.taxi.yandex.net',
        verify_https=False
    )

    target_gp_path = gp_path

    if action_if_exists == 'append': # очищать исходную таблицу не нужно, поэтому зальём во временную
        target_gp_path = gp_path + '_' + str(int(time.time())) + '_' + uuid.uuid4().hex
        logger.info(target_gp_path)

        create_query = f'''
        CREATE TABLE IF NOT EXISTS {target_gp_path} ({gp_types_str})
        DISTRIBUTED {distributed}
        ;

        GRANT SELECT, INSERT, TRUNCATE, UPDATE ON TABLE {target_gp_path} to "robot-taxi-stat";
        '''

        logger.info(create_query)
        greenplum(create_query) # создаём временную таблицу

    logger.info('Start replication')
    try:
        process_uuid = gptransfer_client.yt_to_gp(
            yt_path,
            target_gp_path,
            columns,
            connect_timeout=30,
            read_timeout=15,
        )
    except GPTransferRequestTimeout as exc:
        raise exc
    except GPTransferException as exc:
        if 'UUID: ' in str(exc):
            process_uuid = str(exc).split('UUID: ')[1]
            gptransfer_client.wait_till_finish(process_uuid, connect_timeout=30, read_timeout=15)

    gptransfer_client.wait_till_finish(process_uuid)

    status = gptransfer_client.check_status(process_uuid)
    logger.info(f'Finish: {status}')

    if status.error:
        raise RuntimeError(status)

    if action_if_exists == 'append': # если заливали во временную таблицу, нужно перелить в целевую и дропнуть временную
        query = f'''
        BEGIN;
        INSERT INTO {gp_path} SELECT * FROM {target_gp_path};
        DROP TABLE {target_gp_path};
        COMMIT;
        '''

        logger.info(query)
        greenplum(query)

    if grant_operator and grant_to:
        logger.info('Grant')
        greenplum.grant(table_name=gp_path,
                        operator=grant_operator,
                        to=grant_to)

    logger.info('Done')


if __name__ == '__main__':
    main()
