"""
    Search database shards operations.

    Please obey PEP8 coding standards and Sandbox StyleGuide here
    https://wiki.yandex-team.ru/sandbox/codestyleguide

    New functions without documentation comments are FORBIDDEN.

    Maintainers: mvel@, realtim@
"""
import datetime
import logging
import random
from collections import defaultdict

from sandbox.sandboxsdk.channel import channel

from sandbox.projects.common import apihelpers
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common.base_search_quality import settings as bss
from sandbox.projects.common.search import settings as media_settings
from sandbox.projects import resource_types


def get_shard_resource_by_name(shard_name, resource_type=resource_types.SEARCH_DATABASE):
    """
        Shards entropy minimizing routine:
        Returns **first** good resource of search database
        by its (unique) name, e.g. primus022-050-<timestamp>.
        Obviously, shards created earlier will have more
        Sandbox resource copies on clients.

        :shard_name: Shard unique name
        :resource_type: Shard resource type (e.g. IMAGES_SEARCH_DATABASE)
        :return: shard resource object, None if nothing found
    """
    logging.info("Get resource for search database shard '%s'", shard_name)
    database_resources = channel.rest.list_resources(
        resource_type=str(resource_type),
        status='READY',
        attribute_name=media_settings.SHARD_INSTANCE_ATTRIBUTE_NAME,
        attribute_value=shard_name,
        limit=100,  # count does not matter. Typically there will be 0 or 1 or 2 (at last!)
        order_by='+id',
    )
    if database_resources:
        database_resource = database_resources[0]
        logging.info("Resource %s containing shard '%s' already exists", database_resource.id, shard_name)
        # touch resource to prolong its lifetime
        # see https://ml.yandex-team.ru/thread/2370000002016805943/
        channel.rest.server.resource[database_resource.id] = {}
        return database_resource

    logging.info("Resource for search database shard '%s' was not found", shard_name)
    return None


class ShardDownloadTask(object):
    def __init__(self, name, need_backup=False):
        self.name = name
        self.need_backup = need_backup
        # automatically parse timestamp from full shard name if possible
        self.timestamp = None
        name_parts = name.split('-')
        if len(name_parts) == 3:
            self.timestamp = int(name_parts[2])

    @property
    def priority(self):
        """
            Get task priority.
            Priority value is based on legacy logic - need backup -> need high priority
        """
        return 10 if self.need_backup else 1


def get_all_shards_for_tiers(
    configuration, tiers, primus_groups, primus_groups_count,
    cms_cfg=None,
    prod_cms_cfg=None,
):
    from api.cms import Registry
    cms = Registry.nativeInterface()

    conf, itag = None, None
    if configuration == 'production':
        conf = prod_cms_cfg or 'HEAD'
        itag = 'production_base'
    elif configuration == 'newdb':
        if cms_cfg:
            conf = cms_cfg
        else:
            all_confs = cms.listConf()
            rconfs = set()
            for c in all_confs:
                conf_name = c['name']
                if conf_name.startswith('robot-1'):
                    rconfs.add(conf_name)
            latest_robot_conf = sorted(rconfs, reverse=True)[0]
            latest_timestamp = latest_robot_conf.split('-')[1].split('_')[0]

            latest_confs = []
            for name in rconfs:
                if name.find(latest_timestamp) != -1:
                    latest_confs.append(name)
            conf = sorted(latest_confs)[-1]
    else:
        eh.check_failed("Unsupported configuration type: {}".format(configuration))

    # gather all shards, further will refine them
    instances = cms.listSearchInstances({'conf': conf, 'instanceTagName': itag})
    shards = set()
    for i in instances:
        logging.debug("Encountered shard instance: %s", i['shard'])
        if 'primus' in i['shard'] or 'lp' in i['shard'] or 'oxy' in i['shard']:
            shards.add(i['shard'])

    # get shards tiers (use batch request, filter manually)
    all_shards_tags = cms.listAllShardsTags({'conf': conf, 'shards': list(shards)})
    shards = {}  # {'shard': {'tier': 'RrgTier0', 'group': 0}}
    for shard_tag in all_shards_tags:
        shard_name = shard_tag['shard']
        if shard_name in shards:  # already seen this shard
            continue

        shard_tag = shard_tag['name']
        if 'Tier' in shard_tag or 'Kz0' in shard_tag:  # we got what we were looking for
            pg = primus_groups.get(shard_name.split('-')[0], 0)
            shards[shard_name] = {'tier': shard_tag, 'group': pg}

    result = defaultdict(list)
    for tier in tiers:
        for group_id in range(primus_groups_count):
            matching_shards = set()  # shards that have the same group and tier
            for shard_name, shard_info in shards.items():
                if shard_info['tier'] == tier and shard_info['group'] == group_id:
                    matching_shards.add(shard_name)
            if not matching_shards:
                # raise Exception("No shards for tier-group pair: %s:%s in %s" % (tier, group_id, conf))
                continue

            # sorted list of shard names for current tier and group
            matching_shards = sorted(matching_shards)
            # the task takes three:
            # take first one from sorted list
            # according to agri@'s wishes we backup only one from shard group
            result[tier].append(ShardDownloadTask(matching_shards.pop(0), need_backup=True))
            # take two random
            random.shuffle(matching_shards)
            for s in matching_shards[:2]:
                result[tier].append(ShardDownloadTask(s))
    return result


def get_database_shard_resource(
    tier,
    shard_download_task,
    kill_timeout=25200,
    db_type='',
    additional_attrs=dict(),
    check_spam=True,
    cms_configurations=None,
    database_path=None,
    execution_space=None,
):
    """
        Get info about shard resource. If shard doesn't exist - enqueue task to download it.

        @param tier: Tier
        @param shard_download_task: ShardDownloadTask object
        @param kill_timeout: Download job kill timeout
        @param db_type: type of database (ex.: basesearch, oxygen)
        @param additional_attrs: Additional attributes to be added to created resource
        @param check_spam: Check spam or not. Legacy. FIXME(mvel,kulikov): to be removed
        @param cms_configurations: CMS configurations. Legacy. FIXME(mvel,kulikov): to be removed
        @param database_path: use direct rsync path to download from
        :return: Resource object
    """
    shard_name = shard_download_task.name
    database_resource = get_shard_resource_by_name(shard_name)
    if database_resource:
        return database_resource

    logging.info("Create GET_BASESEARCH_DATABASE task for shard '%s'", shard_name)

    shard_attributes = {'tier_name': tier}
    shard_attributes.update(additional_attrs)

    if shard_download_task.need_backup:
        # also set bigger TTL
        shard_attributes['ttl'] = 90

    shard_attributes_str = ','.join(['{}={}'.format(k, shard_attributes[k]) for k in shard_attributes])

    get_database_tasks_ctx = {
        'database_shard_name': shard_name,
        'wait_ready': True,
        'notify_via': '',
        'kill_timeout': kill_timeout,
        'db_type': db_type,
        'resource_attrs': shard_attributes_str,
        'check_if_spam_is_ready': check_spam,
        'cms_configurations': cms_configurations,
    }

    if database_path:
        get_database_tasks_ctx['search_database_rsync_path'] = database_path

    if shard_download_task.need_backup:
        get_database_tasks_ctx['backup_database'] = True

    shard_date = datetime.datetime.utcfromtimestamp(shard_download_task.timestamp)
    task_description = '{}, shard {}, {}'.format(tier, shard_name, shard_date.strftime('%Y-%m-%d'))

    if execution_space is None and tier in bss.MAX_TIER_SHARD_SIZES:
        execution_space = bss.MAX_TIER_SHARD_SIZES[tier]

    get_db_task = channel.task.create_subtask(
        task_type='GET_BASESEARCH_DATABASE',
        description=task_description,
        input_parameters=get_database_tasks_ctx,
        priority=shard_download_task.priority,
        arch='any',
        model='',
        execution_space=execution_space,
    )
    # here we use resource chaining (see also SEARCH-1680)
    return apihelpers.list_task_resources(get_db_task.id, resource_types.SEARCH_DATABASE)[0]
