# -*- coding: utf-8 -*-

import argparse
import datetime
import logging
import logging.config
import os
import sys

from passport.backend.core.lazy_loader import LazyLoader
from passport.backend.social.common.importer import (
    install_file_system_importer,
    install_python_path2,
)
from passport.backend.social.common.social_config import social_config
from passport.backend.social.common.social_logging import SocialFormatter
from passport.backend.social.common.yt import DEFAULT_YT_CLUSTER
from passport.backend.social.dumpers.reactor import YtPathReactorArtifact
from retry import retry
import yenv
import yt.wrapper as yt


logger = logging.getLogger(__name__)


DUMP_DIR = '/opt/profile-dump/crypta/'
EXPIRATION_DAYS = 30
DATA_SCHEMA = [
    {'name': 'profile_id', 'type': 'string'},
    {'name': 'uid', 'type': 'string'},
    {'name': 'provider_id', 'type': 'string'},
    {'name': 'userid', 'type': 'string'},
    {'name': 'username', 'type': 'string'},
    {'name': 'firstname', 'type': 'string'},
    {'name': 'lastname', 'type': 'string'},
    {'name': 'gender', 'type': 'string'},
    {'name': 'birthday', 'type': 'string'},
    {'name': 'email', 'type': 'string'},
    {'name': 'phone', 'type': 'string'},
    {'name': 'created', 'type': 'string'},
]
TARGET_DIR = '//home/passport/%s/socialism/crypta-dump' % yenv.type
REACTOR_ARTIFACT_PATH = '/passport/%s/%s/%s' % (yenv.type, DEFAULT_YT_CLUSTER, TARGET_DIR[2:])
UPLOAD_RETRIES_DELAY = datetime.timedelta(minutes=5).total_seconds()
UPLOAD_RETRIES = 5


def produce_prepared_dump(dump_path):
    names = [a['name'] for a in DATA_SCHEMA]
    with open(dump_path) as f:
        for line in f:
            data = line.split('\t')
            yield dict(zip(
                names,
                map(lambda x: x.strip() or None, data),
            ))


def parse_args(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument('--target_path', help='YT table path')
    parser.add_argument('--dump_path', help='Dump file path')
    parser.add_argument('--keep_dump', action='store_true', help='Keep or delete dump file after upload')
    return parser.parse_args(argv[1:])


def logging_settings_init():
    root_handlers = {'dumpers'}
    if not os.environ.get('SYSTEM_WIDE_SOCIAL_DUMPERS'):
        root_handlers.add('console')

    if os.environ.get('SYSTEM_WIDE_SOCIAL_DUMPERS'):
        LOG_PATH = '/var/log/yandex/socialism'
    else:
        LOG_PATH = '.'

    conf = {
        'version': 1,
        'disable_existing_loggers': False,
        'root': {
            'handlers': root_handlers,
            'level': 'INFO',
        },
        'formatters': {
            'default': {
                '()': SocialFormatter,
                'fmt': '[%(asctime)s][%(levelname)s] %(message)s',
                'context': object(),
            },
        },
        'handlers': {
            'console': {
                'class': 'logging.StreamHandler',
                'formatter': 'default',
                'level': 'DEBUG',
                'stream': sys.stdout,
            },
            'dumpers': {
                'class': 'passport.backend.social.common.social_logging.OsFileHandler',
                'formatter': 'default',
                'filename': os.path.join(LOG_PATH, 'social-utils-dumpers.log'),
                'level': 'INFO',
            },
        },
    }
    logging.captureWarnings(True)
    logging.config.dictConfig(conf)


def backoff_multiplier(total_duration, delay, tries):
    """
    Вычисляет параметр backoff для декоратора retry, так чтобы сумма всех
    задержек равнялась total_duration.

    Входные параметры

    total_duration отведённое всем попыткам число секунд

    delay минимальная пауза между попытками

    tries число попыток
    """
    if tries <= 2:
        return 1

    if (
        delay <= 0 or
        total_duration <= 0 or
        tries <= 0
    ):
        raise ValueError('Invalid backoff_multiplier arguments')

    def f(x):
        # Это функция S - total_duration, где S сумма геометрической прогрессии
        return delay * (1 - x ** (tries - 1)) / (1 - x) - total_duration

    # Дальше ищем корень уравнения f(x) = 0 методом половинного деления

    eps = 10 ** -6

    # Решение всегда больше 1, поэтому делаем левую границу поиска равной 1
    left = 1 + eps / 10

    right = 2
    for _ in xrange(20):
        if f(right) > 0:
            break
        right *= 10
    else:
        logger.error('Failed to find backoff_multiplier, fallback to 1')
        return 1

    x = 0.5 * (left + right)
    fx = f(x)
    for _ in xrange(100):
        if abs(fx) < eps:
            break
        x = 0.5 * (left + right)
        fx = f(x)
        if f(left) * fx < 0:
            right = x
        elif f(right) * fx < 0:
            left = x
        else:
            logger.error('Failed to find backoff_multiplier, fallback to 1')
            return 1
    else:
        logger.error('Failed to find backoff_multiplier, fallback to 1')
        return 1

    return x


@retry(
    backoff=backoff_multiplier(
        total_duration=datetime.timedelta(hours=1).total_seconds(),
        delay=UPLOAD_RETRIES_DELAY,
        tries=UPLOAD_RETRIES,
    ),
    delay=UPLOAD_RETRIES_DELAY,
    jitter=(0, 10),
    tries=UPLOAD_RETRIES,
)
def upload(dump_path, target_path, expiration_days, keep_dump):
    logger.info('Start upload: %s' % target_path)

    if not os.path.exists(dump_path):
        logger.info('Attempt to send nonexistent crypta log to YT')
        return

    try:
        with yt.Transaction():
            target_dir = os.path.dirname(target_path)
            if not yt.exists(target_dir):
                yt.mkdir(target_dir, recursive=True)
            if not yt.exists(target_path):
                expiration_time = datetime.date.today() + datetime.timedelta(days=expiration_days)
                yt.create(
                    type='table',
                    path=target_path,
                    attributes={
                        'schema': DATA_SCHEMA,
                        'providers': social_config.providers,
                        'expiration_time': str(expiration_time),
                    },
                )
                yt.write_table(target_path, produce_prepared_dump(dump_path=dump_path))
                logger.info('Upload completed: %s' % target_path)

                artifact = YtPathReactorArtifact.build(
                    artifact=REACTOR_ARTIFACT_PATH,
                    yt_path=target_path,
                )
                artifact.yt_cluster = DEFAULT_YT_CLUSTER
                artifact.instantiate()
            else:
                logger.info('Target already exists: %s' % target_path)
        if not keep_dump:
            try:
                os.remove(dump_path)
            except OSError:
                logger.info('Somebody stole your dump!')
    except yt.YtError as e:
        logger.error('Intercepted error from YT client: %s' % str(e), exc_info=True)

        # Чтобы сработал декоратор retry пробросим исключение
        raise


def main():
    install_python_path2()
    install_file_system_importer()

    logging_settings_init()

    args = parse_args(sys.argv)

    social_config.init()
    yt.config['token'] = social_config.yt_token
    yt.config.set_proxy(DEFAULT_YT_CLUSTER)

    yt.config['write_progress_bar']['enable'] = False

    LazyLoader.register('chrono', lambda: datetime.datetime)
    LazyLoader.get_instance('chrono')

    today = datetime.date.today()

    if args.target_path is None:
        target_path = os.path.join(TARGET_DIR, str(today))
    else:
        target_path = args.target_path

    if args.dump_path is None:
        dump_path = os.path.join(DUMP_DIR, str(today))
    else:
        dump_path = args.dump_path

    upload(
        dump_path=dump_path,
        target_path=target_path,
        expiration_days=EXPIRATION_DAYS,
        keep_dump=args.keep_dump,
    )


if __name__ == '__main__':
    main()
