"""
Module for loading json serialized pages data from specified directory.
Data filename must be equal to mysql page id.
"""

import logging
import threading
import urllib.parse
from collections import deque
from datetime import datetime as dt

from django.conf import settings
from django.db.models import Q

from wiki.files.models import File
from wiki.pages import dao as pages_dao
from wiki.pages.access import get_bulk_raw_access, interpret_raw_access
from wiki.pages.models import Page, Revision
from wiki.utils.backports.mds_compat import APIError
from wiki.utils.base_command import get_storage, print_percents, spawner
from wiki.utils.models import get_chunked
from wiki.utils.supertag import translit
from wiki.utils.timer import track_time

generator_lock = threading.RLock()


class SkipCheckingAccessToTag(object):
    """
    Пропустить проверку доступа этого тега при переливке дампа.

    Это интерфейс между тем что вводит пользователь и тем что нужно коду.
    Пользователь вводит "homepage/*" или "homepage", а код должен понять
    что запросил пользователь.
    """

    __supertag = None
    __is_wildcard = False

    def __init__(self, tag):
        """
        @type tag: unicode
        """
        tag = tag
        if tag.endswith('*'):
            tag = tag[:-1]
            self.__is_wildcard = True

        self.__supertag = translit(tag)

    @property
    def supertag(self):
        return self.__supertag

    @property
    def is_wildcard(self):
        return self.__is_wildcard


def filter_chunk_pages(subset, logger):
    """
    Generator returns pages allowed for dumping
    @type subset: QuerySet
    @param subset: raw pages QuerySet
    @param logger: script logger
    """
    free_pages = []  # supertags of pages without restricted access

    # raw access dict, keys are pages supertags
    access = get_bulk_raw_access(subset)

    for page in access:
        # interpreted raw access
        page_access = interpret_raw_access(access[page])

        # we need only pages with unrestricted access
        if not page_access['is_restricted']:
            free_pages.append(page)

        else:
            logger.info('Page "%s" is restricted, skipped', page)

    # yield allowed pages from subset
    for page in subset:
        if page in free_pages:
            yield page


def load_object(mds_storage_id, from_storage, to_storage):
    """

    @type mds_storage_id: str
    @param from_storage: source storage url
    @param to_storage: destination storage url
    @param logger: script logger
    """

    # get page body from source storage
    source_page = from_storage.open(mds_storage_id)

    # save page body to target storage
    to_storage.save(mds_storage_id, source_page)


def safely_load_object(mds_storage_id, from_storage, to_storage, logger):
    """
    Залогировать ошибки и вернуть True если успешно.
    @param mds_storage_id:
    @param from_storage:
    @param to_storage:
    @return:
    """
    saved = False
    try:
        load_object(mds_storage_id, from_storage, to_storage)
        saved = True
    except APIError as exc:
        logger.warning(
            'Cannot get/save data from/to "%s": %s. "%s"', mds_storage_id, repr(exc), from_storage.url(mds_storage_id)
        )

    except ValueError as exc:
        logger.warning('"%s" wrong data: %s', mds_storage_id, repr(exc))

    except Exception as exc:
        logger.warning('"%s" unknown error: %s', mds_storage_id, repr(exc))

    else:
        logger.info('"%s" loaded', mds_storage_id)

    return saved


def object_human_id(obj):
    """

    @rtype: unicode
    @raise: RuntimeError
    """
    if obj.__class__ == Page:
        return 'Page "%s" [%s]' % (obj.supertag, obj.mds_storage_id.name)
    elif obj.__class__ == Revision:
        return 'Revision "%s" [%s]' % (obj.id, obj.mds_storage_id.name)
    elif obj.__class__ == File:
        return 'File "%s" [%s]' % (obj.id, obj.mds_storage_id.name)
    raise RuntimeError('unknown type of object')


def generator_aware_logic(generator, load_from, token_from, load_to, token_to, count, errors, logger_name):
    logger = logging.getLogger(logger_name)
    while True:
        generator_lock.acquire()
        try:
            chunk = next(generator)
        except StopIteration:
            logger.debug('Generator exhausted, exiting')
            return
        finally:
            generator_lock.release()

        failed_ids = logic(chunk, load_from, token_from, load_to, token_to, count, logger_name)

        generator_lock.acquire()
        try:
            errors.extend(failed_ids)
        finally:
            generator_lock.release()


def logic(objects_to_copy, load_from, token_from, load_to, token_to, count, logger_name):
    """
    Copy pages in chunks from source storage to target storage.

    @param chunks: pages querysets generator
    @param load_from: source storage url
    @param load_to: target storage url
    @param count: структура хранящая состояние прогресса всего скрипта.
    @param errors: errors deque, all script errors will be stored here in the end.
    @param logger: name of script logger
    """
    loaded = 0
    failed_ids = []

    from_storage = get_storage(load_from, load_from, token_from)  # source storage storage
    to_storage = get_storage(load_to, load_to, token_to)  # target storage storage

    logger = logging.getLogger(logger_name)

    for mds_storage_id in objects_to_copy:
        if not safely_load_object(mds_storage_id, from_storage, to_storage, logger):
            failed_ids.append(mds_storage_id)

        loaded += 1

        print_percents(loaded, count, logger=logger)

    # add local error to process errors deque
    return failed_ids


def validate_args(el_from, el_to, threads):
    """
    Validate script arguments
    @param el_from: source storage url
    @param el_to: target storage url
    @param threads: number of threads to run
    @raise ValueError
    """
    if not settings.DEBUG:
        if el_from == el_to or urllib.parse.urlparse(el_from).hostname == urllib.parse.urlparse(el_to).hostname:
            raise ValueError('Source storage and target storage are the same "{0}"'.format(el_from))

    if threads < 1:
        raise ValueError('Number of threads must be greater than 0. Got {0}'.format(threads))


@track_time
def copy_data(
    load_from,
    token_from,
    load_to,
    token_to,
    dt_from=None,
    revisions_number=None,
    backup_restricted=False,
    backup_files=False,
    threads=0,
    logger_name=None,
    skip_access_checks_for=None,
):
    """
    Load pages data from one storage instance to another

    @param threads: number of threads to run
    @param logger_name: script logger name
    """

    logger = logging.getLogger(logger_name)

    validate_args(load_from, load_to, threads)

    pages = Page.objects.all() if backup_restricted else Page.active.all()

    # filter by date
    if dt_from:
        assert isinstance(dt_from, dt)
        pages_ids = File.active.filter(created_at__gte=dt_from).values_list('page_id', flat=True)
        pages = pages.filter(Q(modified_at__gte=dt_from) | Q(id__in=set(pages_ids))).distinct()

    # all script errors will be stored here
    failed_ids = deque()

    # count structure -- needed only for pretty output with percents
    count = {'all': pages.count(), 'current': 0}

    logger.info(
        '{0} pages to load by {1} threads from "{2}" to "{3}"'.format(count['all'], threads, load_from, load_to)
    )

    chunks = extended_generator(
        pages=pages,
        logger=logger,
        dt_from=dt_from,
        revisions_number=revisions_number,
        backup_restricted=backup_restricted,
        backup_files=backup_files,
        skip_access_checks_for_supertags=skip_access_checks_for or [],
    )

    spawner(
        target=generator_aware_logic,
        args=(chunks, load_from, token_from, load_to, token_to, count, failed_ids, logger_name),
        maxthreads=threads,
    )

    errors_len = len(failed_ids)

    if errors_len > 0:
        new_failed_ids = failed_ids
        for i in range(5):
            logger.info('Command execution done with %d errors, repeating', len(new_failed_ids))
            new_failed_ids = logic(new_failed_ids, load_from, token_from, load_to, token_to, count, logger_name)
            if len(new_failed_ids) == 0:
                break

        errors_len = len(new_failed_ids)

    logger.info('Command execution done with %d errors', errors_len)


def get_file_ids(page, dt_from):
    """
    Вернуть айдишники в хранилище файлов, прикрепленных к странице.

    @type dt_from: datetime
    """
    files = pages_dao.get_files(page)
    if dt_from:
        files = files.filter(modified_at__gte=dt_from, size__gt=0)
    return list(files.values_list('mds_storage_id', flat=True))


def get_revisions_ids(page, dt_from, revisions_number):
    """
    Вернуть айдишники в хранилище ревизий страницы.

    @type dt_from: datetime
    @type revisions_number: int
    """
    revisions = page.revision_set.all()
    if dt_from:
        revisions = revisions.filter(created_at__gte=dt_from)
    if revisions_number:
        revisions = revisions.order_by('-id')[:revisions_number]
    return list(revisions.values_list('mds_storage_id', flat=True))


def extended_generator(
    pages, logger, dt_from, revisions_number, backup_restricted, backup_files, skip_access_checks_for_supertags
):
    # serialize pages data
    for chunk in get_chunked(pages):

        if not backup_restricted:
            chunk = filter_chunk_pages(chunk, logger)

        objects_to_copy = []
        for page in chunk:

            if page.mds_storage_id is None:
                logger.warning('Page id %s is empty', page.id)

            # It is possible that page.modified_at < dt_from
            # if the page contains a file created after dt_from
            if not dt_from or dt_from <= page.modified_at:
                objects_to_copy.append(page.mds_storage_id.name)

            if revisions_number is not None:
                objects_to_copy.extend(get_revisions_ids(page, dt_from, revisions_number))

            if backup_files:
                objects_to_copy.extend(get_file_ids(page, dt_from))

        yield tuple(objects_to_copy)

    # добавить страницы, которые надо перелить минуя проверки доступа
    for skip_check in skip_access_checks_for_supertags:
        if skip_check.is_wildcard:
            pages_query_set = Page.active.filter(supertag__startswith=skip_check.supertag)
        else:
            pages_query_set = Page.active.filter(supertag=skip_check.supertag)
        for page in pages_query_set:
            logger.debug('Going to copy page "%s", (skip_access_checks_for_supertags=True)', page.supertag)
        yield tuple(page.mds_storage_id.name for page in pages_query_set)
        if backup_files:
            for page in pages_query_set:
                yield get_file_ids(page, dt_from)
        if revisions_number is not None:
            for page in pages_query_set:
                yield get_revisions_ids(page, dt_from, revisions_number)
