# -*- coding: utf-8 -*-

from datetime import (
    date,
    timedelta,
)
import logging
import os

from passport.backend.profile.utils.helpers import to_date_str
from passport.backend.profile.utils.yt import (
    ExclusiveLock,
    get_yt,
)
import yenv


log = logging.getLogger('passport.profile.scripts.cleanup')


RC_POLICY = {
    'max_orig_age': 30,
    'use_symlink': True,
    'max_symlink_age': 7 * 30,
    'symlink_to_env': 'production',
}


PRODUCTION_POLICY = {
    'max_orig_age': 7 * 30,
    'use_symlink': False,
}


POLICY = [
    {
        'env': 'rc',
        'tables': {
            'profile': RC_POLICY,
            'auth': RC_POLICY,
            'oauth': RC_POLICY,
            'bb': RC_POLICY,
            'glogouts': RC_POLICY,
        },
    },
    {
        'env': 'production',
        'tables': {
            'profile': PRODUCTION_POLICY,
            'auth': PRODUCTION_POLICY,
            'oauth': PRODUCTION_POLICY,
            'bb': PRODUCTION_POLICY,
            'glogouts': PRODUCTION_POLICY,
        },
    },
]

# таблицы старше этих дней можно сжимать, так как они использоваться не будут
ERASURE_TABLES_POLICY = {
    'profile': 366 / 2 + 1,  # полгода + 1 день (так как профиль всегда за вчера строится)
    'auth': 7 + 1,  # неделя + 1 день
    'oauth': 7 + 1,
    'bb': 7 + 1,
    'glogouts': 1 + 1,
}


def cleanup(config):
    with ExclusiveLock(config=config, lock_path=config['yt']['cleanup_profile_daily_lock'], log=log):
        if yenv.type != 'production':
            # Нужны права на удаление production-таблиц
            return

        log.info('Starting cleanup job')
        yt = get_yt(config)
        today = date.today()
        env_home_template = '//home/passport/%s/profile/'

        for env_policy in POLICY:
            env = env_policy['env']
            tables_policy = env_policy['tables']
            for table_name, policy in tables_policy.iteritems():
                base_path = os.path.join(env_home_template % env, table_name)
                max_orig_age = policy['max_orig_age']
                use_symlink = policy['use_symlink']
                max_symlink_age = policy.get('max_symlink_age', None)
                if use_symlink and max_symlink_age <= max_orig_age:
                    raise ValueError('max_symlink_age must be greater than max_orig_age')
                max_age = max_orig_age if not use_symlink else max_symlink_age
                oldest_date = to_date_str(today - timedelta(days=max_age))
                erasure_date = to_date_str(today - timedelta(days=ERASURE_TABLES_POLICY[table_name]))
                oldest_orig_date = to_date_str(today - timedelta(days=max_orig_age))
                link_to_base_path = os.path.join(
                    env_home_template % policy['symlink_to_env'],
                    table_name,
                ) if use_symlink else ''

                table_dates = sorted(yt.list(base_path))
                for table_date in table_dates:
                    table_path = os.path.join(base_path, table_date)
                    if table_date < oldest_date:
                        # Совсем старые таблицы и ссылки удаляем
                        log.info('Removing %s', table_path)
                        yt.remove(table_path)
                        continue
                    elif use_symlink and table_date < oldest_orig_date:
                        # Если дата старше оригинала - нужно хранить символьные ссылки
                        target_path = os.path.join(link_to_base_path, table_date)
                        current_target_path = yt.get(os.path.join(table_path, '@path'))
                        if current_target_path != target_path:
                            log.info('Replacing table %s with link to %s', table_path, target_path)
                            yt.remove(table_path)
                            yt.link(target_path, table_path)
                        continue

                    if table_date < erasure_date and yt.get_attribute(table_path, 'type') == 'table':
                        if yt.get_attribute(table_path, 'erasure_codec', 'none') == 'none':
                            log.info('Compress table: %s', table_path)
                            yt.set_attribute(table_path, 'erasure_codec', 'lrc_12_2_2')
                            yt.run_merge(source_table=table_path,
                                         destination_table=table_path,
                                         spec={'force_transform': True,
                                               'data_size_per_job': 20000000000,
                                               'job_io': {
                                                   'table_writer': {
                                                       'desired_chunk_size': 2000000000
                                                   }
                                               }},
                                         )
                            log.info('Table \'%s\' successfully compressed', table_path)
