#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import cStringIO as io
import logging
import os
import sys
import time

from yt import yson
from yt.wrapper import YtClient

'''
Скрипт выполняет замену YtHash таблицы DirectBannerResources c farm_hash(OrderID) на uint64(farm_hash(OrderID) % 256)
1) Создает статическую несортированную таблицу
2) Запускает операцию map, которая копирует все колонки старой таблицы в новую, кроме вычисляемых
3) Сортирует по ключам YtHash, OrderID, BannerID новую статичесую таблицу
4) Сортированную таблицу делает динамоческой
5) Решардирует таблицу с указанным количеством таблетов
6) Делает бэкап старой таблицы
7) Переименовывает новую таблицу
mount для таблицы не делает, так как подразумевается последующее проставление upstream_replica_id
'''
TABLE = '//home/adv/DirectBannerResources'
TABLETS_COUNT = 32

NEW_DEFINITION = """{
    attributes = {
        schema = [
            {"name" = "YtHash"; "sort_order" = "ascending"; "expression" = "uint64(farm_hash(OrderID) % 256)"; "type" = "uint64"};
            {"name" = "OrderID"; "sort_order" = "ascending"; "type" = "int64"; "required" = true};
            {"name" = "BannerID"; "sort_order" = "ascending"; "type" = "int64"; "required" = true};
            {"name" = "ExportID"; "type" = "uint64"};
            {"name" = "AdGroupID"; "type" = "uint64"};
            {"name" = "Logo"; "type" = "any"};
            {"name" = "Button"; "type" = "any"};
            {"name" = "Title"; "type" = "string"};
            {"name" = "TitleExtension"; "type" = "string"};
            {"name" = "Body"; "type" = "string"};
            {"name" = "Href"; "type" = "string"};
            {"name" = "Site"; "type" = "string"};
            {"name" = "DomainFilter"; "type" = "string"};
            {"name" = "SiteFilter"; "type" = "string"};
            {"name" = "DomainFilterID"; "type" = "uint64"};
            {"name" = "SiteFilterID"; "type" = "uint64"};
            {"name" = "TurbolandingID"; "type" = "uint64"};
            {"name" = "TurbolandingHref"; "type" = "string"};
            {"name" = "TurbolandingSite"; "type" = "string"};
            {"name" = "TurbolandingDomainFilter"; "type" = "string"};
            {"name" = "PermalinkID"; "type" = "uint64"};
            {"name" = "PermalinkHref"; "type" = "string"};
            {"name" = "PermalinkSite"; "type" = "string"};
            {"name" = "PermalinkDomainFilter"; "type" = "string"};
            {"name" = "PermalinkAssignType"; "type" = "string"};
            {"name" = "PermalinkChainIDs"; "type" = "any"};
            {"name" = "PermalinkAdPhone"; "type" = "string"};
            {"name" = "VcardDomainFilter"; "type" = "string"};
            {"name" = "MobileContentID"; "type" = "uint64"};
            {"name" = "MobileContentHref"; "type" = "string"};
            {"name" = "MobileContentSite"; "type" = "string"};
            {"name" = "MobileContentDomainFilter"; "type" = "string"};
            {"name" = "MobileContentSiteFilter"; "type" = "string"};
            {"name" = "MobileContentImpressionUrl"; "type" = "string"};
            {"name" = "PlatformName"; "type" = "any"};
            {"name" = "Name"; "type" = "string"};
            {"name" = "IterID"; "type" = "uint64"};
            {"name" = "UpdateTime"; "type" = "uint64"};
            {"name" = "DeletedTime"; "type" = "uint64"};
        ];
    };
}"""



# noinspection PyPep8Naming
def MAPPER(row):
    yield row


def yt_create_static_sorted_table(ytc, table, table_def):
    table_def['attributes']['schema'].attributes['unique_keys'] = True
    ytc.create('table', table, attributes=table_def['attributes'])


def yt_reshard_table(ytc, table, table_def):
    pivot_keys = [[]]
    min_val = 0
    max_val = 256

    delta = max_val - min_val
    for i in xrange(1, TABLETS_COUNT):
        val = min_val + (i * delta) // TABLETS_COUNT
        val = yson.YsonUint64(val)
        pivot_keys.append([val])

    ytc.reshard_table(table, pivot_keys)


def yt_wait_state(ytc, table, state):
    while ytc.get_attribute(table, 'tablet_state') != state:
        logging.info('%s is going to be %s', table, state)
        time.sleep(1)


def get_key_columns(table_def):
    return [c['name'] for c in table_def['attributes']['schema'] if 'sort_order' in c]


def do_map(ytc, cur_table, new_table, cur_schema, new_table_def, mapper):
    mapped_schema = []
    for col in new_table_def['attributes']['schema']:
        mcol = {
            'name': col['name'],
            'type': col['type'],
        }
        if 'expression' in col:
            mcol['expression'] = col['expression']
        mapped_schema.append(mcol)

    mapped_columns = []
    for col in cur_schema:
        if 'expression' not in col:
            mapped_columns.append(col['name'])

    ytc.create('table', new_table, attributes={'schema': mapped_schema})
    ytc.run_map(
        mapper,
        ytc.TablePath(name=cur_table, columns=mapped_columns),
        new_table,
        format='yson',
    )


def do_sort(ytc, cur_table, new_table, new_table_def):
    yt_create_static_sorted_table(ytc, new_table, new_table_def)
    ytc.run_sort(
        cur_table,
        new_table,
        sort_by=get_key_columns(new_table_def),
    )


def parse_args():
    parser = argparse.ArgumentParser(description="""\
    If you want to add, remove or modify column(s),
    you must write MAPPER function.
    Otherwise use "cat" as "--mapper" argument to optimize execution.
    If you want to change only values of table,
    use "--do-not-sort" to optimize execution.
    """)
    parser.add_argument(
        '--proxy',
        default=os.environ.get('YT_PROXY'),
        help="yt url (default: YT_PROXY environment variable)",
    )
    parser.add_argument(
        '--token',
        default=os.environ.get('YT_TOKEN'),
        help="yt token (default: YT_TOKEN environment variable)",
    )

    return parser.parse_args()


def main(args):
    ytc = YtClient(proxy=args.proxy, token=args.token)
    if sys.platform == 'darwin':
        ytc.config['pickling']['module_filter'] = lambda module: hasattr(module, '__file__') and \
            not module.__file__.endswith('.so') and 'hashlib' not in getattr(module, '__name__', '')

    table = TABLE
    table_tmp_mapped = table + 'TmpMapped'
    table_tmp_sorted = table + 'TmpSorted'

    cur_schema = ytc.get_attribute(table, 'schema')
    new_table_def = yson.load(io.StringIO(NEW_DEFINITION))
    new_table_def['attributes']['primary_medium'] = ytc.get_attribute(table, 'primary_medium')
    new_table_def['attributes']['tablet_cell_bundle'] = ytc.get_attribute(table, 'tablet_cell_bundle')

    for table_tmp in (table_tmp_mapped, table_tmp_sorted):
       ytc.remove(table_tmp, force=True)

    ytc.unmount_table(table)
    yt_wait_state(ytc, table, 'unmounted')
    logging.info('Unmounted!')

    do_map(ytc, table, table_tmp_mapped, cur_schema, new_table_def, MAPPER)
    logging.info('Mapped!')
    do_sort(ytc, table_tmp_mapped, table_tmp_sorted, new_table_def)
    logging.info('Sorted!')
    ytc.remove(table_tmp_mapped)
    table_tmp_final = table_tmp_sorted

    ytc.alter_table(table_tmp_final, dynamic=True)
    logging.info('Altered!')

    yt_reshard_table(ytc, table_tmp_final, new_table_def)
    logging.info('Resharded!')

    ytc.move(table, table + 'Old')
    logging.info('Backup: ' + table + 'Old')
    ytc.move(table_tmp_final, table)
    logging.info('Renamed!')


if __name__ == '__main__':
    logging.basicConfig(format='%(asctime)s - ALTER - %(message)s', level=logging.INFO)
    main(parse_args())