# -*- coding: utf-8 -*-
import logging
import re
import os
from itertools import groupby

from sandbox import sdk2
from sandbox.projects.advq.common import get_phits_resource_class, get_chrono_resource_class, get_sumhits_resource_class
import sandbox.common.types.resource as ctr

SANDBOX_LIST_LIMIT = 1000
DEFAULT_ADVQ_PREFIX = '//home/advq'
PHITS_DIR_TEMPLATE = 'advq/{type}/{db}/phits/{res_type}'
SUMHITS_DIR_TEMPLATE = 'advq/{type}/{db}/sumhits'
SUMHITS_CHUNK_DIR_TEMPLATE = 'advq/{type}/{db}/sumhits-blob'
# Для phits_type == 'normal'
PHITS_NORMAL_BASENAME_TABLE_RE = re.compile('^([0-9]{8})(?:-delta([0-9]+))?$')
# Для 'partial' и 'video'
PHITS_DATE_BASENAME_TABLE_RE = re.compile('^([0-9]{8})()$')
# Для 'spikes': дата и час
PHITS_SPIKES_BASENAME_TABLE_RE = re.compile('^([0-9]{8}T[0-9]{4})()$')

# Для phits_type == 'normal'
SUMHITS_NORMAL_BASENAME_TABLE_RE = re.compile('^([0-9]{8})(?:-([0-9]+))?$')
# Для 'partial' и 'video'
SUMHITS_DATE_BASENAME_TABLE_RE = re.compile('^([0-9]{8})()$')
# Для 'spikes': дата и час
SUMHITS_SPIKES_BASENAME_TABLE_RE = re.compile('^([0-9]{8}T[0-9]{4})()$')

GENERIC_DATE_BASENAME_TABLE_RE = re.compile('^([0-9]{8})$')

PKZ_DB_NAME_RE = re.compile('pkz-([0-9]{8})\.[0-9]\.db')

PHITS_TABLE_FORMAT_RE_BY_PHITS_TYPE = {
    'hurmal': PHITS_DATE_BASENAME_TABLE_RE,
    'normal': PHITS_NORMAL_BASENAME_TABLE_RE,
    'video': PHITS_DATE_BASENAME_TABLE_RE,
    'partial': PHITS_DATE_BASENAME_TABLE_RE,
    'spikes': PHITS_SPIKES_BASENAME_TABLE_RE,
}

SUMHITS_TABLE_FORMAT_RE_BY_PHITS_TYPE = {
    'hurmal': SUMHITS_NORMAL_BASENAME_TABLE_RE,
    'normal': SUMHITS_NORMAL_BASENAME_TABLE_RE,
    'mini': SUMHITS_NORMAL_BASENAME_TABLE_RE,
    'video': SUMHITS_DATE_BASENAME_TABLE_RE,
    'spikes': SUMHITS_SPIKES_BASENAME_TABLE_RE,
}


def list_phits_tables(phits_type, db,
                      prefix=None,
                      phits_dir_template=PHITS_DIR_TEMPLATE):
    import yt.wrapper as yt
    if not prefix:
        prefix = DEFAULT_ADVQ_PREFIX
    full_path = prefix + '/' + phits_dir_template.format(type=phits_type, db=db, res_type='tables')
    logging.info("Get phits tables from %r", full_path)
    tables = yt.list(full_path, attributes=['type'])

    basename_re = PHITS_TABLE_FORMAT_RE_BY_PHITS_TYPE[phits_type]
    for tbl in tables:
        if tbl.attributes['type'] == 'table':
            m = basename_re.match(tbl)
            if m:
                yield m.group(1), int(m.group(2) or 0), full_path + '/' + tbl


def list_any_tables(yt_directory):
    import yt.wrapper as yt
    tables = yt.list(yt_directory)

    basename_re = GENERIC_DATE_BASENAME_TABLE_RE

    for tbl in tables:
        m = basename_re.match(tbl)
        if m:
            yield m.group(1), yt.ypath_join(yt_directory, tbl)


def list_sumhits_tables(phits_type, db,
                        prefix=None,
                        sumhits_dir_template=SUMHITS_DIR_TEMPLATE):
    import yt.wrapper as yt
    if not prefix:
        prefix = DEFAULT_ADVQ_PREFIX
    if sumhits_dir_template is None:
        sumhits_dir_template = SUMHITS_DIR_TEMPLATE
    full_path = prefix + '/' + sumhits_dir_template.format(type=phits_type, db=db, res_type='tables')
    logging.info("Get phits tables from %r", full_path)
    tables = yt.list(full_path, attributes=['type'])

    basename_re = SUMHITS_TABLE_FORMAT_RE_BY_PHITS_TYPE[phits_type]
    for tbl in tables:
        if tbl.attributes['type'] == 'table':
            m = basename_re.match(tbl)
            if m:
                yield m.group(1), int(m.group(2) or 0), full_path + '/' + tbl


def group_resource_chunks(resource_chunk_list, keys):
    key_func = lambda rec: tuple(rec['attributes'].get(key) or rec.get(key) for key in keys)
    return groupby(
        sorted(resource_chunk_list, key=key_func, reverse=True),
        key=key_func
    )


def chunk_normal_key(res):
    """
    Key for pre-grouping.  It should be enough for valid.

    :type res projects.advq.common.AdvqPhitsDatabaseChunk
    """
    return res.advq_phits_type, res.advq_db, res.advq_date, res.advq_epoch, -res.task_id


def chunk_validation_key(res):
    """
    Key for validation.

    :type res projects.advq.common.AdvqPhitsDatabaseChunk
    """
    return res.advq_total_chunks


def chunk_sorting_key(res):
    """
    Key for sorting.

    :type res projects.advq.common.AdvqPhitsDatabaseChunk
    """
    return chunk_normal_key(res), chunk_validation_key(res), res.advq_chunk


def sandbox_list_dbs(resource_type, phits_type, dbs, release_type, extra_attrs=None, filter_predicate=None):
    """
    Возвращает словарь по dbname, значениями которых являются словари из epodate в набор ресурсов.
    Ресурсы группируются и валидируются (отсутствие выпавших чанков и т.п.).

    :rtype: dict(str, dict(str, list))
    """
    valid_phits_res_epodates = dict()

    for advq_db in dbs:
        # List of complete databases
        valid_phits_res_epodates[advq_db] = dict()

        attrs_query = dict(
            advq_db=advq_db,
            advq_phits_type=phits_type,
        )
        if extra_attrs is not None:
            attrs_query.update(extra_attrs)
        if release_type is not None:
            attrs_query['released'] = release_type
        chunk_resources = list(
            filter(filter_predicate,
                   sdk2.Resource.find(resource_type, attrs=attrs_query, state=ctr.State.READY)
                   .limit(SANDBOX_LIST_LIMIT)))
        logging.debug("%s: Chunk resources: %r", str(resource_type), chunk_resources)
        database_groups = groupby(
            sorted(
                chunk_resources,
                key=chunk_sorting_key,
            ),
            key=chunk_normal_key
        )

        for norm_key, items in database_groups:
            # Теперь нужно валидировать items:
            item_groups = [(gr_total_chunks, list(subgroup))
                           for gr_total_chunks, subgroup in groupby(items, key=chunk_validation_key)]
            if len(item_groups) != 1:
                logging.error("%s: Ambiguous chunks for %r: %r", str(resource_type), norm_key, item_groups)
                continue
            else:
                total_chunks, resources = item_groups[0]
                res_chunk_numbers = [res.advq_chunk for res in resources]
                assert res_chunk_numbers == sorted(res_chunk_numbers)
                if res_chunk_numbers != list(range(1, total_chunks + 1)):
                    logging.error("%s: Missing or extra chunks for %r: %r",
                                  str(resource_type), norm_key, res_chunk_numbers)
                    continue
                else:
                    key = (norm_key[2], norm_key[3])  # date, epoch
                    valid_phits_res_epodates[advq_db][key] = resources

    return valid_phits_res_epodates


def sandbox_list_phits_dbs(phits_type, dbs, release_type):
    """
    Discover available phits database chunks, group into databases and validate,
    returning only valid databases.  If release_type is not None, use only released
    resources.

    :param phits_type str: phits type ('normal', 'video', 'partial', etc)
    :param dbs list: list of databases ('rus', 'tur-robots', etc).
    :param release_type Optional[str]: discover only released resources.
    :return: dictionary from db name to dict from tuple (date, epoch) to list of resources
    """
    return sandbox_list_dbs(get_phits_resource_class(phits_type), phits_type, dbs, release_type)


def sandbox_list_sumhits_dbs(phits_type, dbs, release_type, period_size, extra_attrs=None):
    """
    Discover available sumhits database chunks, group into databases and validate,
    returning only valid databases.  If release_type is not None, use only released
    resources.

    :param phits_type str: phits type ('normal', 'video', 'partial', etc)
    :param dbs list: list of databases ('rus', 'tur-robots', etc).
    :param release_type Optional[str]: discover only released resources.
    :return: dictionary from db name to dict from tuple (date, epoch) to list of resources
    """
    if extra_attrs is None:
        extra_attrs = {}
    else:
        extra_attrs = dict(extra_attrs)
    extra_attrs['period_size'] = period_size
    logging.debug("Looking for sumhits dbs, phits_type %r, dbs %r, release_type: %r, extra_attrs %r",
                  phits_type, dbs, release_type, extra_attrs)
    return sandbox_list_dbs(get_sumhits_resource_class(phits_type), phits_type, dbs, release_type,
                            extra_attrs=extra_attrs)

def yt_list_sumhits_dbs(phits_type, db, advq_date, prefix=DEFAULT_ADVQ_PREFIX, sumhits_chunk_dir_template=SUMHITS_CHUNK_DIR_TEMPLATE):
    """
    Discover available sumhits database chunks, group into databases and validate,
    returning only valid databases.

    :param phits_type str: phits type ('normal', 'video', 'partial', etc)
    :param dbs list: list of databases ('rus', 'tur-robots', etc).
    :param advq_date: date ('20220331')
    :return: list of chunks available for date
    """
    import yt.wrapper as yt
    logging.debug("Looking for sumhits dbs in YT, phits_type %r, dbs %r, date %r",
                  phits_type, db, advq_date)
    full_path = os.path.join(prefix, sumhits_chunk_dir_template.format(type=phits_type, db=db))
    logging.info("Get sumhits chunks from %r", full_path)
    tables = yt.list(full_path)

    for tbl in tables:
        if advq_date in tbl:
            yield tbl


def sandbox_list_chrono_dbs(phits_type, dbs, release_type, chrono_type, filter_predicate=None, res_class=None):
    """
    Discover available phits database chunks, group into databases and validate,
    returning only valid databases.  If release_type is not None, use only released
    resources.

    :param phits_type str: phits type ('normal', 'video', 'partial', etc)
    :param dbs list: list of databases ('rus', 'tur-robots', etc).
    :param release_type Optional[str]: discover only released resources.
    :param chrono_type str: 'weekly' or 'monthly'
    :param filter: function for filtering resources
    :param res_class: resource class for special needs (e.g. legacy)
    :return: dictionary from db name to dict from tuple (date, epoch) to list of resources
    """
    if res_class is None:
        res_class = get_chrono_resource_class(phits_type)
    return sandbox_list_dbs(res_class, phits_type, dbs, release_type,
                            extra_attrs={'advq_chrono_type': chrono_type}, filter_predicate=filter_predicate)


def pkz_list_dbs(yt_directory):
    import yt.wrapper as yt
    files = yt.list(yt_directory)
    for file in files:
        m = PKZ_DB_NAME_RE.match(file)
        if m:
            yield m.group(1)
