import os
from datetime import datetime, timedelta
import yt.wrapper as yt_wrapper
from datacloud.features.dssm import dssm_main
from datacloud.dev_utils.logging.logger import get_basic_logger
from datacloud.dev_utils.time.utils import assert_date_str
from datacloud.dev_utils.time.patterns import FMT_DATE_HM, FMT_DATE
from datacloud.dev_utils.yt import yt_utils
from datacloud.dev_utils.crypta import crypta_snapshot
from datacloud.config import yt as yt_path


logger = get_basic_logger(__name__)
DAYS_TO_TAKE = 7
DSSM_175 = 'dssm_175'

DSSM_175_AGGREGATE_FOLDER = '//projects/scoring/tmp/prod_dssm_175/weekly'
DSSM_175_WEEKLY_FOLDER = '//home/x-products/production/datacloud/aggregates/dssm/weekly'
DSSM_175_READY_TABLE = '//home/x-products/production/datacloud/aggregates/dssm/ready/features'
DSSM_175_STEPS = 25
DAYS_IN_STEP = 7


# TODO: Move logic inside detect_ready_dssm_175
def detect_dssm_ready(date_time, days=None):
    yt_wrapper.config.set_proxy(yt_path.YT_PROXY)
    days = days or 10
    date_time = datetime.strptime(date_time, FMT_DATE_HM)
    yt_client = yt_utils.get_yt_client()

    for min_diff in xrange(days):
        time_to_check = date_time - timedelta(days=min_diff)
        logger.info(' Check day: {}'.format(time_to_check))
        date_str = time_to_check.strftime(FMT_DATE)
        dssm_processor = dssm_main.DSSMTables(date_str, yt_client=yt_client)

        snapshot = crypta_snapshot.get_snapshot(yt_wrapper, date_str)
        should_yield = snapshot is not None
        for folder in dssm_processor.log_folders:
            table = yt_wrapper.ypath_join(dssm_processor.grep_root, folder, date_str)
            should_yield = should_yield and (yt_wrapper.exists(table) and yt_wrapper.row_count(table) > 0)
        if should_yield:
            yield date_str, {'date_str': date_str}
            break


def build_dssm_175_key(start_date_str, date_str):
    return 'dssm175#{}#{}'.format(start_date_str, date_str)


def detect_ready_dssm_175(date_time, days=None):
    for start_date_str, _ in detect_dssm_ready(date_time, days):
        date_time = datetime.strptime(start_date_str, FMT_DATE)
        aggregate_dir = yt_wrapper.ypath_join(DSSM_175_AGGREGATE_FOLDER, start_date_str)
        for step in range(DSSM_175_STEPS):
            date_str = (date_time - timedelta(days=DAYS_IN_STEP * step)).strftime(FMT_DATE)
            # TODO: Change weekly_dir to aggregate_dir
            record = {
                'date_str': date_str,
                'start_date_str': start_date_str,
                'mode': DSSM_175,
                'weekly_dir': aggregate_dir
            }
            key = build_dssm_175_key(start_date_str, date_str)
            yield key, record


def run_dssm_175(task):
    date_str = task.data['date_str']
    start_date_str = task.data['start_date_str']
    assert_date_str(date_str)

    start_date_str = task.data['start_date_str']
    weekly_dir = task.data['weekly_dir']

    # hack to use correct token in shell script
    os.environ['YT_TOKEN'] = yt_wrapper.config['token']
    yt_client = yt_utils.get_yt_client()

    yuid_to_cid_path = crypta_snapshot.get_snapshot(yt_client, start_date_str).yuid_to_cid
    processor = dssm_main.DSSMTables(
        date_str, yt_client=yt_client, days_to_take=DAYS_TO_TAKE, test=False,
        yuid2cid_path=yuid_to_cid_path,
        retro_tag='-175-{}-{}'.format(start_date_str, date_str),
        weekly_dir=weekly_dir)

    processor.create_folders()
    processor.step_1_run_prepare_title_url()
    processor.step_2_dssm_step()
    processor.step_3_run_dot_product()
    processor.step_4_join_cids()
    processor.step_5_run_get_max_features()

    return [task.make_done()]


def detect_ready_dssm_175_combine(date_time):
    days = 30
    date_time = datetime.strptime(date_time, FMT_DATE_HM)
    yt_client = yt_utils.get_yt_client()

    folder = None

    for min_diff in xrange(days):
        date_to_check = date_time - timedelta(days=min_diff)
        date_str = date_to_check.strftime(FMT_DATE)
        logger.info(' Check day: {}'.format(date_str))
        folder = yt_wrapper.ypath_join(DSSM_175_AGGREGATE_FOLDER, date_str)
        if not yt_client.exists(folder):
            continue

        tables_found = 0
        logger.info('Folder: {}'.format(folder))
        if not folder:
            continue
        for idx in range(DSSM_175_STEPS):
            table_date = (date_to_check - timedelta(days=7*idx)).strftime(FMT_DATE)
            table = yt_wrapper.ypath_join(folder, table_date)
            if yt_client.exists(table) and yt_client.row_count(table) > 0:
                tables_found += 1
        logger.info('Found {}/{} tables'.format(tables_found, DSSM_175_STEPS))
        if tables_found == DSSM_175_STEPS:
            yield date_str, {'date_str': date_str}


def run_dssm_175_combine(task):
    date_str = task.data['date_str']
    assert_date_str(date_str)

    # hack to use correct token in shell script
    os.environ['YT_TOKEN'] = yt_wrapper.config['token']
    yt_client = yt_utils.get_yt_client()

    aggregate_dir = yt_wrapper.ypath_join(DSSM_175_AGGREGATE_FOLDER, date_str)
    combined_table = yt_wrapper.ypath_join(DSSM_175_WEEKLY_FOLDER, date_str)

    yuid_to_cid_path = crypta_snapshot.get_snapshot(yt_client, date_str).yuid_to_cid
    processor = dssm_main.DSSMTables(
        date_str, yt_client=yt_client, days_to_take=DAYS_TO_TAKE, test=False,
        yuid2cid_path=yuid_to_cid_path,
        retro_tag='-175-{}-{}'.format(date_str, date_str),
        weekly_dir=aggregate_dir, ready_table_path=combined_table)

    processor.step_6_run_join_scores(DSSM_175_STEPS)

    with yt_client.Transaction():
        if yt_client.exists(DSSM_175_READY_TABLE):
            logger.info(' Deleting old DSSM link')
            yt_client.remove(DSSM_175_READY_TABLE)
        logger.info(' Creatomg mew DSSM link')
        yt_client.link(combined_table, DSSM_175_READY_TABLE)
    return [task.make_done()]
