import os
import copy
import datetime
import itertools

import yt.wrapper as yt_wrapper
from datacloud.config.yt import PRODUCTION_ROOT
from datacloud.dev_utils.logging.logger import get_basic_logger
from datacloud.dev_utils.time.patterns import FMT_DATE_HM, FMT_DATE
from datacloud.dev_utils.time.utils import assert_date_str
from datacloud.launcher.lib.grep import fast_grep


logger = get_basic_logger(__name__)


def date_to_spy_watch_logs_extended(date_time, days=None):
    date_time = datetime.datetime.strptime(date_time, FMT_DATE_HM)
    if days is None:
        days = 3
    for min_diff in range(days):
        time_to_check = date_time - datetime.timedelta(days=min_diff)
        date_str = time_to_check.strftime(FMT_DATE)
        ready = True
        for log_type in ["spy_log", "watch_log_tskv", "search"]:
            table = "//user_sessions/pub/{}/daily/{}/clean".format(log_type, date_str)
            ready = ready and (yt_wrapper.exists(table) and yt_wrapper.row_count(table) > 0)
            if not ready:
                break
        table = "//user_sessions/pub/{}/daily/{}/clean".format("*", date_str)
        if ready:
            yield date_str, {'date_str': date_str, 'table_path': table, 'fast': True}


@yt_wrapper.with_context
def grep_call_reduce_extended(_, recs, context):
    border = 2048
    prev_yielded = None
    is_interesting = False
    for rec in recs:
        if context.table_index == 0:
            is_interesting = True
            continue
        elif not is_interesting:
            break
        value = {
            key: value for key, _, value in
            [it.partition('=') for it in rec['value'].split('\t')]
            # map(lambda rec: rec.partition("="), rec["value"].split("\t"))
        }
        if any(key in value for key in ["title", "url", "query"]):
            result = {"yuid": rec["key"][1:]}
            if "url" in value:
                result["url"] = value["url"].partition("?")[0].partition("#")[0][:border]
            if "query" in value:
                result["query"] = value["query"][:border]
            if "title" in value:
                result["title"] = value["title"][:border]
            if "type" in value:
                result["type"] = value["type"]
            if value.get('url') and value.get('title') and prev_yielded != result:
                prev_yielded = copy.copy(result)
                result["timestamp"] = int(rec["subkey"])
                yield yt_wrapper.create_table_switch(context.table_index - 1)
                yield result


def grep_all_spy_watch_logs_internal_extended(
        table_path,
        dates=None,
        result_folder=None,
        log_types=("spy_log", "watch_log_tskv",)
):
    """
    :param table_path:
    :param dates: - list of str dates to grep, example: ["2017-08-02"], default - None and table name will be used to extract date
    :param result_folder: - default None
    :param log_types: - default: ("spy_log", "watch_log_tskv", "search")
    :return:
    """
    root_folder = PRODUCTION_ROOT
    result_folder = result_folder or root_folder + "/datacloud/grep"
    input_tables = [root_folder + "/crypta_v2/crypta_db_last/all_interesting_yuid"]
    tmp_output_tables, output_tables = [], []
    parts = table_path.split("/")
    dates = dates or [parts[-2]]
    for log_type, date in itertools.product(log_types, dates):
        parts[-4] = log_type
        parts[-2] = date
        input_tables.append(yt_wrapper.TablePath("/".join(parts), attributes={'foreign': True}))
        tmp_output_tables.append("//tmp/x-products " + log_type + date)
        output_tables.append(result_folder + "/" + log_type + "/" + date)
    yt_wrapper.mkdir(result_folder, recursive=True)
    for log_type in log_types:
        yt_wrapper.mkdir(result_folder + "/" + log_type, recursive=True)

    with yt_wrapper.Transaction(attributes={'title': 'datacloud grep all logs'}):
        yt_wrapper.run_reduce(
            grep_call_reduce_extended,
            input_tables,
            tmp_output_tables,
            input_format=yt_wrapper.YsonFormat(control_attributes_mode="iterator"),
            output_format=yt_wrapper.YsonFormat(control_attributes_mode="iterator"),
            reduce_by="key",
            join_by="key",
            spec={"title": "DataCloud log grep"}
        )
        tracker = yt_wrapper.OperationsTracker()
        for tmp_table, result_table in zip(tmp_output_tables, output_tables):
            if not yt_wrapper.exists(result_table):
                yt_wrapper.create_table(
                    result_table,
                    attributes={
                        "compression_codec": "brotli_6",
                        "erasure_codec": "lrc_12_2_2",
                        "schema": [
                            {"name": "yuid", "type": "string"},
                            {"name": "timestamp", "type": "int64"},
                            {"name": "title", "type": "string"},
                            {"name": "url", "type": "string"},
                            {"name": "query", "type": "string"},
                            {"name": "type", "type": "string"}
                        ],
                        "optimize_for": "scan"
                    }
                )
            tracker.add(
                yt_wrapper.run_sort(
                    tmp_table,
                    result_table,
                    sort_by=("yuid", "timestamp"),
                    sync=False
                )
            )
        tracker.wait_all()
        for tmp_table in tmp_output_tables:
            yt_wrapper.remove(tmp_table)


def grep_all_spy_watch_logs_extended(task):
    table_path = task.data['table_path']
    is_fast = task.data.get('fast', False)
    # logger.info('[GREP] Start SLOW grep')
    # grep_all_spy_watch_logs_internal_extended(table_path)
    if is_fast:  # use cpp version
        logger.info('[GREP] Start FAST grep')
        date_str = table_path.split('/')[-2]
        assert_date_str(date_str)
        yt_token = yt_wrapper.config['token'] or os.environ.get('YT_TOKEN')
        if not yt_token:
            logger.warn('[FAST GREP] No YT_TOKEN provided')
        fast_grep.grep(yt_token, date_str)
    else:  # use python version
        logger.info('[GREP] Start SLOW grep')
        grep_all_spy_watch_logs_internal_extended(table_path)

    logger.info('[GREP] Done')
    return [task.make_done()]
