import logging
from google.protobuf import text_format
from yweb.querydata.querydata_indexer_saas.ferryman.idl import config_pb2
from saas.protos import shards_pb2
from saas.library.python.ferryman import FerrymanHTTPAPI

logger = logging.getLogger('script')


def _get_service_balancer_name(service_name, service_ctype):
    # TODO: reuse get_service_balancer_name from ssm_ferryman.py
    prefix = service_name
    if 'prestable' in service_ctype:
        prefix += '_p'
    if 'hamster' in service_ctype:
        prefix += '_h'
    return '%s.ferryman.n.yandex-team.ru' % prefix.replace('_', '-')


def get_ferryman_config(service_name, ctype, fm_host):
    if fm_host is not None:
        if not fm_host.startswith('http'):
            fm_host = 'http://' + fm_host
        balancer = fm_host
    else:
        # We cannot use dm._get_sla_info().json(), because Backup ferrymans are not mentioned in SLA info
        balancer = 'http://' + _get_service_balancer_name(service_name, ctype)

    fm = FerrymanHTTPAPI(balancer)
    cfg = fm.get_config_raw()
    parsed_config = text_format.Parse(cfg, config_pb2.TConfig())
    return parsed_config


def get_task_param(ferryman_config, key):
    for par in ferryman_config.TaskParams:
        if par.Key == key:
            return par.Value
    return None


def validate_ferryman(ferryman_config, service, ctype):
    row_processor = get_task_param(ferryman_config, 'row_processor')
    if not row_processor or row_processor not in ['logbroker', 'logfeller']:
        raise Exception('Incorrect ferryman config: row_processor={}'.format(row_processor))
    actual_service = get_task_param(ferryman_config, 'service')
    if actual_service and actual_service != service:
        raise Exception('Incorrect ferryman: service name is {}, expected {}'.format(actual_service, service))
    actual_ctype = get_task_param(ferryman_config, 'ctype')
    if actual_ctype and actual_ctype != ctype:
        raise Exception('Incorrect ferryman: ctype it {}, expected {}'.format(actual_ctype, ctype))


def get_ferryman_yt_cluster(ferryman_config):
    return ferryman_config.Server


def get_ferryman_dishes_path(ferryman_config):
    path = get_task_param(ferryman_config, 'dish')
    if path:
        return path
    return ferryman_config.RootFolder + '/dishes'


def get_last_dish_ts(yt_client, dishes_path):
    if not yt_client.exists(path=dishes_path):
        return None
    dishes = yt_client.list(path=dishes_path)
    dishes_ts = [int(s) for s in dishes if s.isdigit()]
    if not dishes_ts:
        return None
    return max(dishes_ts)


def read_path_from_dish(yt_client, dishes_path, dish_ts):
    if dish_ts is None:
        return None

    last_dish_file = dishes_path + "/" + str(dish_ts)
    last_dish = yt_client.read_file(last_dish_file).read()
    parsed = text_format.Parse(last_dish, shards_pb2.TShards())

    blob_path = None
    for shard in parsed.Shard:
        if not shard.YTIndexTable:
            continue
        yt_index_table = shard.YTIndexTable.split('[')[0]
        if blob_path is None:
            blob_path = yt_index_table
        elif blob_path != yt_index_table:
            raise Exception("Cannot handle {}: different blobs for different shards".format(last_dish_file))

    return blob_path + "[(0u,65533u)]"


def compare_dumps(yql_client, backup_path, backup_ts, index_path, result_path, title):
    stats = {'web_url': None}
    def print_yql_web_url(request):
        stats['web_url'] = request.web_url
        logger.info("YQL operation started: %s", request.web_url)

    tmpl = """
        PRAGMA yt.InferSchema = '1';

        $backup_ts = {backup_ts};
        $table_dump_index = "{index_path}";
        $table_dump_backup = "{backup_path}";
        $table_result = "{result_path}";
        $max_backup_age = MIN_OF(DateTime::ToSeconds(CurrentUtcDatetime()), $backup_ts) - 2 * 86400;

        INSERT INTO $table_result WITH TRUNCATE
        SELECT backup_ts, index_ts, kps, url FROM (
            SELECT kps, url, index_ts, backup_ts FROM (
                SELECT
                    COALESCE(ind.kps, bak.kps) AS kps,
                    COALESCE(ind.url, bak.url) AS url,
                    COALESCE(ind.`timestamp`, 0) AS index_ts,
                    COALESCE(bak.`timestamp`, 0) AS backup_ts
                FROM $table_dump_index AS ind
                FULL OUTER JOIN $table_dump_backup AS bak
                ON ind.kps == bak.kps AND ind.url == bak.url
            )
            WHERE (backup_ts == 0 AND index_ts != 0 AND index_ts < $max_backup_age)
            OR (backup_ts != 0 AND index_ts != backup_ts AND MIN_OF(index_ts, backup_ts) < $max_backup_age)
            ORDER BY Digest::MurMurHash(url)
            LIMIT 100
        )
        ORDER BY kps, url;
    """
    yql_query = tmpl.format(backup_path=backup_path, backup_ts=backup_ts, index_path=index_path, result_path=result_path)
    yql_syntax_version = 1

    logger.info("Executing YQL query to make {result_path}".format(result_path=result_path))
    if title and 'YQL' not in title:
        title = title + ' YQL'  # to avoid 'Please mention YQL in the title' exception

    request = yql_client.query(yql_query, syntax_version=yql_syntax_version, title=title)
    request.run(pre_start_callback=print_yql_web_url)
    results = request.get_results()

    if not results.is_success:
        msg = '\n'.join([str(err) for err in results.errors])
        logger.error('Error when executing YQL query: %s', msg)
        raise RuntimeError('YQL query failed')
    else:
        logger.info("YQL query finished without a error")

    return stats
