import os
import argparse
from collections import namedtuple

from yt.wrapper import ypath_join

from datacloud.dev_utils.yt.yt_files import download_file
from datacloud.dev_utils.yt.yt_utils import get_yt_client
from datacloud.input_pipeline import InputPipeLine

RunConfig = namedtuple('RunParams', [
    'root_folder',
    'suffix',
    'experiment_name',
    'start_from',
    'use_cloud_nodes'
])


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'experiment_name',
        help="Experiment name is used for storing local and YT files"
    )
    parser.add_argument(
        'suffix',
        help="Run pipeline suffix (ex. 1, 2, val)"
    )
    parser.add_argument(
        '--force',
        action='store_true',
        help="Force run with wiping out existing files and tables"
    )
    parser.add_argument(
        '-s',
        '--start_from',
        help="Name of step to run pipeline from"
    )
    parser.add_argument(
        '--use_cloud_nodes',
        action='store_true',
        help="Turn on cloud nodes"
    )

    return parser.parse_args()


def main(yt_client, rconfig):
    file_name = 'normalized_{}.tsv'.format(rconfig.suffix)
    download_file(
        yt_client,
        ypath_join(rconfig.root_folder, rconfig.experiment_name, file_name),
        os.path.join(rconfig.experiment_name, file_name),
        mode='w+'
    )

    partner_id = rconfig.experiment_name
    ticket_name = rconfig.suffix
    all_steps = [
        'run_raw_upload',
        'run_append_meta_table',
        'run_merge_audience',
        'run_make_input',
        'run_make_all_yuid',
        'run_metadata_all_yuid',
        'run_grep',

        'run_calc_cluster_features',
        'run_calc_dssm_features',

        'run_combine_features',

        'run_metadata_features_prod',
        'run_compress',

        'run_train_nirvana'
    ]
    params = {
        "PARTNER_ID": partner_id,
        "TICKET_NAME": ticket_name,

        "PATH_TO_CSV": rconfig.experiment_name,
        "INPUT_FILE": file_name,
        "NORMALIZED_FILE": file_name,

        "RETRO_TAG": "pengd-{partner_id}-{ticket_name}".format(
            partner_id=partner_id,
            ticket_name=ticket_name
        ),

        "AUDIENCE_CUSTOM_BASE_ROOT": ypath_join(rconfig.root_folder, rconfig.experiment_name),
        "FMT_DATA_DIR": ypath_join(rconfig.root_folder, '{0}', '{1}'),
        "PATH_TO_CUSTOM_CRYPTA": ypath_join(rconfig.root_folder, rconfig.experiment_name),
        "USE_CRYPTA_SNAPSHOT": False,

        "IS_CREDIT_SCORING": True,
        "SHUT_UP_ST_BOT": True,

        "STEPS": all_steps[all_steps.index(rconfig.start_from):],

        "EXCLUDE_STEPS": [
            'run_append_history_table'
        ],

        "USE_CLOUD_NODES": rconfig.use_cloud_nodes,
        "HISTORY_TARGET": "not-really-target",
    }

    pipeline = InputPipeLine.from_dict(params)
    pipeline.run_pipeline()


if __name__ == '__main__':
    # ----- Params you can change -----
    root_folder = '//home/mlmarines/datacloud/penguin-diver/2_sets_test/credit_scoring_events'
    start_from = 'run_raw_upload'
    # ----- End of params -----

    args = parse_args()
    force = args.force

    experiment_name = args.experiment_name
    suffix = args.suffix
    start_from = args.start_from or start_from

    yt_client = get_yt_client()
    exp_folder = ypath_join(root_folder, experiment_name)
    assert yt_client.exists(exp_folder), 'Prepare CSE first!'
    assert force or not yt_client.exists(ypath_join(exp_folder, suffix)), 'Pipeline folder alredy exists!'
    if not os.path.exists(experiment_name):
        os.mkdir(experiment_name)

    rconfig = RunConfig(
        root_folder=root_folder,
        suffix=suffix,
        experiment_name=experiment_name,
        start_from=start_from,
        use_cloud_nodes=args.use_cloud_nodes
    )
    main(yt_client, rconfig)
