# -*- coding: utf-8 -*-
import argparse
import getpass

from dateutil import rrule
from passport.backend.profile.jobs.prepare_train_auth_dataset_by_time_batches import prepare_auth_dataset
from passport.backend.profile.scripts.prepare_dataset_for_tensornet import prepare_and_split_dataset_for_tensornet
from passport.backend.profile.utils.helpers import from_str_to_date


def run(
    date_start,
    date_end,
    class_balance,
    passport_dir,
    profile_dir,
    batches_dir,
    tmp_dir,
    output_filename,
    skip_prepare,
    skip_target_count,
):
    if not skip_prepare:
        for date in rrule.rrule(rrule.DAILY, dtstart=date_start, until=date_end):
            prepare_auth_dataset(
                date=date,
                input_dir=passport_dir,
                tmp_dir=tmp_dir,
                profile_dir=profile_dir,
                output_dir=batches_dir,
            )
    prepare_and_split_dataset_for_tensornet(
        date_start,
        date_end,
        class_balance,
        batches_dir,
        output_filename,
        tmp_dir,
        batches_dir,
        skip_target_count,
    )


if __name__ == '__main__':
    from passport_profile import initialize_app, settings
    initialize_app()
    import yenv

    username = getpass.getuser()

    default_passport_dir = settings.YT_PASSPORT_LOG_DIR.replace(yenv.type, 'production')
    default_profile_dir = settings.YT_PROFILE_DIR.replace(yenv.type, 'production')
    default_batches_dir = '//home/passport/{username}/profile-batches/'.format(username=username)
    default_tmp_dir = settings.YT_TMP_DIR

    profile_help = 'Path to profile dir on YT cluster.'
    batches_dir = 'Path where to save dataset on YT cluster.'
    balance_help = 'A proportion between classes 0 and 1: %% of class-0 is R, %% of class-1 is 1-R.'
    skip_prepare_help = 'Skip building dataset and start preparing it for model training.'
    skip_target_count_help = 'Skip counting classes sizes. Should be used with --skip-prepare.'

    parser = argparse.ArgumentParser(
        description='Build training dataset from profile.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument('date_start', type=str)
    parser.add_argument('date_end', type=str)
    parser.add_argument('-p', '--profile', type=str, default=default_profile_dir, help=profile_help)
    parser.add_argument('-b', '--batches', type=str, default=default_batches_dir, help=batches_dir)
    parser.add_argument('-c', '--classbalance', metavar='R', type=float, default=0.5, help=balance_help)
    parser.add_argument('-o', '--output', metavar='FILENAME', help='Dataset filename on local filesystem.')
    parser.add_argument('--skip-prepare', action='store_true', default=False, help=skip_prepare_help)
    parser.add_argument('--skip-target-count', action='store_true', default=False, help=skip_target_count_help)
    args = parser.parse_args()

    run(
        from_str_to_date(args.date_start),
        from_str_to_date(args.date_end),
        class_balance=args.classbalance,
        passport_dir=default_passport_dir,
        profile_dir=args.profile,
        batches_dir=args.batches,
        tmp_dir=default_tmp_dir,
        output_filename=args.output,
        skip_prepare=args.skip_prepare,
        skip_target_count=args.skip_target_count,
    )
