#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import with_statement

"""
Экспорт для модерации признака заблокированности пользователей в интерфейсе директа
Кроме модерации таблица также используется в группе генерации контента
https://staff.yandex-team.ru/departments/yandex_search_tech_assesment_toloka_7715/
(ответственные: khalitovn@, adgval@, tsymbalava@)
"""

""" METADATA

<crontab>
    env: YT_DIRECT_CLUSTER=hahn
    time: */28 * * * *
    <switchman>
        group: scripts-other
        <lockname_with_env/>
    </switchman>
    package: scripts-switchman
</crontab>
<crontab>
    env: YT_DIRECT_CLUSTER=arnold
    time: */27 * * * *
    <switchman>
        group: scripts-other
        <lockname_with_env/>
    </switchman>
    package: scripts-switchman
</crontab>
<juggler>
    host:       checks_auto.direct.yandex.ru
    name:       scripts.export_users_statusBlocked.working
    raw_events: scripts.export_users_statusBlocked.working.$cluster
    vars:       cluster=hahn,arnold
    tag:        direct_yt
    tag:        direct_group_internal_systems
    ttl:        1h40m
</juggler>

"""

from datetime import datetime
import logging, os, sys

sys.path[0:0] = [os.path.realpath(os.path.join(os.path.dirname(__file__), ".."),)]

import settings
from yandex.juggler import juggler_queue_event
import direct.tools

from direct.db import db_engine, disconnect_all

from direct.ytutils import (
    YT_PREFIX,
    save_cursor_to_yt,
    check_create_yt_tbl
)

import yt.wrapper as yt
import yt.yson as yson

import MySQLdb
from MySQLdb.cursors import SSCursor

# отключаем варнинги и раундтрипы до сервера с show warnings
import MySQLdb.connections
MySQLdb.connections.Connection.show_warnings = lambda x: ()

yt.config["write_progress_bar"]["enable"] = False
DEFAULT_LOG_LEVEL = logging.INFO
DEFAULT_DBNAME = ['ppc:%d' % (i+1) for i in range(settings.SHARDS_NUM)]

SCHEMA = [
    { 'name': 'uid', 'type': 'int64', 'required': True },
    { 'name': 'ClientID', 'type': 'int64', 'required': True },
    { 'name': 'login', 'type': 'string', 'required': True },
    { 'name': 'statusBlocked', 'type': 'boolean', 'required': True },
]

EXPORT_ROOT = YT_PREFIX + 'export'
TMP_ROOT = YT_PREFIX + 'tmp'

EXPORT_PATH = EXPORT_ROOT + '/usersStatusBlocked'
TMP_PATH = TMP_ROOT + '/%s-%s' % (
    'usersStatusBlocked',
    datetime.now().strftime('%Y-%m-%d-%H:%M:%S'),
)


class ConvertStatuBlockedToBoolean(object):
    def __call__(self, row):
        row['statusBlocked'] = row['statusBlocked'] == "Yes"


def upload_data():
    for dbname in DEFAULT_DBNAME:
        logging.warn("Uploading data from %s to %s" % (dbname, TMP_PATH))
        conn = db_engine(dbname).raw_connection()
        try:
            cursor = conn.cursor(SSCursor)
            cursor.execute("SELECT uid, ClientID, login, statusBlocked FROM users")

            save_cursor_to_yt(TMP_PATH, cursor,
                              chunk_size=500000,
                              fmt=yt.YsonFormat('binary'),
                              cleanup=False,
                              schema=SCHEMA,
                              optimize_for='scan',
                              preprocess=ConvertStatuBlockedToBoolean(),
            )

            cursor.close()
        finally:
            conn.close()

def sort():
    logging.warn("Sort tempory table %s" % TMP_PATH)
    yt.run_sort(TMP_PATH, sort_by=['uid'], spec={"force_transform": "true", "combine_chunks": "true"})

def swap_result_and_cleanup():
    logging.warn("Swap tables")
    if yt.exists(EXPORT_PATH):
        logging.warn("Remove old table %s" % EXPORT_PATH)
        yt.remove(EXPORT_PATH)
    logging.warn("Move new table to export path: %s -> %s" % (TMP_PATH, EXPORT_PATH))
    yt.move(TMP_PATH, EXPORT_PATH)

def main():
    yt_proxy = os.environ.get('YT_PROXY', '-')

    direct.tools.set_logging(loglevel=DEFAULT_LOG_LEVEL, add_info=yt_proxy)

    if not yt.exists(EXPORT_ROOT):
        logging.warn("Create map_node %s" % EXPORT_PATH)
        yt.create('map_node', EXPORT_ROOT, recursive=True)
    if not yt.exists(TMP_ROOT):
        logging.warn("Create map_node %s" % TMP_ROOT)
        yt.create('map_node', TMP_ROOT, recursive=True)

    success = False
    with yt.Transaction(ping=True) as transaction:
        logging.warn("transaction: %s", transaction.transaction_id)
        try:
            upload_data()
            sort()
            swap_result_and_cleanup()
            success = True
        except Exception as e:
            logging.exception(e)
            raise e
    if success:
        cluster = os.environ.get('YT_DIRECT_CLUSTER', 'prod')
        juggler_queue_event('scripts.export_users_statusBlocked.working.%s' % cluster, 'OK', '%s table successfuly uploaded to %s YT' % (EXPORT_PATH, cluster))

if __name__ == '__main__':
    main()
