from datetime import datetime, timedelta
import yt.wrapper as yt_wrapper
from datacloud.features.cluster.path_config import PathConfig
from datacloud.features.cluster import clust_features
from datacloud.dev_utils.logging.logger import get_basic_logger
from datacloud.dev_utils.time.utils import assert_date_str, now_str
from datacloud.dev_utils.yt import yt_utils
from datacloud.dev_utils.time.patterns import FMT_DATE_HM, FMT_DATE
from datacloud.dev_utils.status_db.task import Task, Status
from datacloud.dev_utils.crypta import crypta_snapshot


logger = get_basic_logger(__name__)
LAST_STEP = 5


def detect_ready_build_user_vectors(date_time, days=None):
    days = days or 5
    date_time = datetime.strptime(date_time, FMT_DATE_HM)
    for min_diff in xrange(days):
        time_to_check = date_time - timedelta(days=min_diff)
        logger.info(' Check day: {}'.format(time_to_check))
        date_str = time_to_check.strftime(FMT_DATE)
        log_table = yt_wrapper.ypath_join(PathConfig().extracted_urls_dir, date_str)
        snapshot = crypta_snapshot.get_snapshot(yt_wrapper, date_str)
        if (snapshot is not None and yt_wrapper.exists(log_table) and
                yt_wrapper.row_count(log_table) > 0):
            yield date_str, {'date_str': date_str}
            break


def _build_user_vectors(task, step):
    date_str = task.data['date_str']
    assert_date_str(date_str)

    yt_client = yt_utils.get_yt_client()
    yuid_to_cid_table = crypta_snapshot.get_snapshot(yt_client, date_str).yuid_to_cid
    path_config = PathConfig(date=date_str, days_to_take=175, is_retro=False,
                             yuid2cid_table=yuid_to_cid_table)
    logger.info('yuid2cid table: {}'.format(path_config.external_cid2yuid_table))

    if step == 2:
        clust_features.step_2_bow_reducer(
            yt_client=yt_client,
            date=date_str,
            path_config=path_config
        )
    elif step == 3:
        clust_features.step_3_build_user2host(
            yt_client=yt_client,
            date=date_str,
            path_config=path_config
        )
    elif step == 4:
        with yt_client.Transaction():
            clust_features.step_4_build_user2clust(
                yt_client=yt_client,
                date=date_str,
                path_config=path_config
            )
    elif step == 5:
        with yt_client.Transaction():
            if yt_client.exists(path_config.ready_table):
                logger.info(' Deleting old cluster ready table')
                yt_client.remove(path_config.ready_table)
            logger.info(' Copying new cluster ready table')
            yt_client.link(path_config.res_user2clust_features,
                           path_config.ready_table)
            if yt_client.exists(path_config.normed_s2v_ready_table):
                logger.info(' Deleting old cluster ready table')
                yt_client.remove(path_config.normed_s2v_ready_table)
            logger.info(' Copying new cluster ready table')
            yt_client.link(path_config.res_user2normed_s2v_features,
                           path_config.normed_s2v_ready_table)
    else:
        raise Exception('Unknown ClusterFeatures task.')
    new_tasks = [
        task.make_done()
    ]
    if step != LAST_STEP:
        current_time = now_str()
        new_tasks.append(
            Task('cluster_features_{}'.format(step + 1), date_str,
                 Status.READY, {'date_str': date_str}, current_time, current_time)
        )
    return new_tasks


def build_user_vectors_2(task):
    return _build_user_vectors(task, 2)


def build_user_vectors_3(task):
    return _build_user_vectors(task, 3)


def build_user_vectors_4(task):
    return _build_user_vectors(task, 4)


def build_user_vectors_5(task):
    return _build_user_vectors(task, 5)


def detect_ready_hostname_extract(date_time, days=None):
    days = days or 5
    log_folders = ('watch_log_tskv', 'spy_log')
    date_time = datetime.strptime(date_time, FMT_DATE_HM)
    for min_diff in xrange(days):
        time_to_check = date_time - timedelta(days=min_diff)
        date_str = time_to_check.strftime(FMT_DATE)
        should_yield = True
        for folder in log_folders:
            table = yt_wrapper.ypath_join(PathConfig().external_logs_dir, folder, date_str)
            should_yield = should_yield and yt_wrapper.exists(table) and yt_wrapper.row_count(table) > 0
            if not should_yield:
                break
        if should_yield:
            yield date_str, {'date_str': date_str}


def hostname_extract(task):
    date = task.data['date_str']
    assert_date_str(date)
    yt_client, path_config = clust_features.init_config(date, is_retro=False)
    with yt_client.Transaction():
        clust_features.step_1_daily_hostnames_extract(
            yt_client=yt_client,
            date=date,
            path_config=path_config
        )
    return [task.make_done()]
