import argparse
import os
from textwrap import dedent
import json

from yt.wrapper import ypath_join

from datacloud.dev_utils.yt import yt_utils
from datacloud.dev_utils.yql.yql_helpers import create_yql_client, execute_yql
from datacloud.dev_utils.logging.logger import get_basic_logger
from datacloud.dev_utils.other.console import query_yes_no

from datacloud.ml_utils.dolphin.prepare_cse.pipeline import (
    step_num2method,
    CleanConfig,
    main
)


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'experiment_name',
        help="Experiment name is used for storing local and YT files"
    )
    parser.add_argument(
        '--force',
        action='store_true',
        help="Force run with wiping out existing files and tables"
    )

    return parser.parse_args()


get_partners_q = dedent("""
    SELECT DISTINCT `partner`
    FROM `%(in_path)s`
""")


if __name__ == '__main__':
    # ----- Params you can change -----
    path_to_original_cse = '//home/mlmarines/datacloud/penguin-diver/2_sets_test/credit_scoring_events/credit_scoring_events_05_12_2019'
    root_folder = '//home/mlmarines/datacloud/penguin-diver/2_sets_test/credit_scoring_events'
    crypta_folder = '//home/x-products/production/crypta_v2/crypta_db_last'
    zeros_vs_ones = 1.
    min_retro_date = '2016-06'
    no_go_partners = [
        "tochka_bank",
        "goodstory",
        "greenmoney",
        "ingos",
        "limecapital",
        "migcredit",
        "moneyman",
        "reso",
        "ya-bk-internal",
    ]
    n_folds = 2
    val_size = 10000
    steps = sorted(step_num2method.keys())
    # ----- End of params -----

    args = parse_args()
    force = args.force

    yt_client = yt_utils.get_yt_client()
    yql_client = create_yql_client(yt_client=yt_client)
    logger = get_basic_logger()

    experiment_name = args.experiment_name
    assert force or not os.path.exists(experiment_name), 'Local folder already exists!'

    aggs_folder = ypath_join(root_folder, experiment_name)
    assert force or not yt_client.exists(aggs_folder), 'YT folder already exists!'

    if no_go_partners is None:
        logger.info('Collecting partners list...')

        query_result = execute_yql(
            query=get_partners_q,
            yql_client=yql_client,
            params=dict(in_path=path_to_original_cse),
            set_owners=False,
            syntax_version=1
        )
        table = query_result.__iter__().next()

        logger.info('\n---------------------------------')
        logger.info('Type yes if you want to use partner')

        good_partners = []
        no_go_partners = []
        for row in table.rows:
            partner = row[0]
            if query_yes_no(partner):
                good_partners.append(partner)
            else:
                no_go_partners.append(partner)
        assert len(good_partners), 'Why no good partners?'
    if len(no_go_partners) == 0:
        logger.warn('Attention! Empty list of no go partners!')

    if not os.path.exists(experiment_name):
        os.mkdir(experiment_name)
    experiment_params_file = os.path.join(experiment_name, 'params.json')

    yt_utils.create_folders([aggs_folder], yt_client)

    cconfig = CleanConfig(
        experiment_name=experiment_name,
        path_to_original_cse=path_to_original_cse,
        aggs_folder=aggs_folder,
        crypta_folder=crypta_folder,
        zeros_vs_ones=zeros_vs_ones,
        min_retro_date=min_retro_date,
        no_go_partners=no_go_partners,
        n_folds=n_folds,
        val_size=val_size,
        steps=steps
    )
    config_json = json.dumps(cconfig._asdict())
    with open(experiment_params_file, 'w') as fd:
        fd.write(config_json)
    with open(experiment_params_file) as fd:
        yt_client.write_file(
            ypath_join(aggs_folder, os.path.split(experiment_params_file)[-1]), fd)

    main(yt_client, yql_client, cconfig, logger)
