import logging
from abc import ABC, abstractmethod
from copy import copy
from typing import TypeVar, Callable, Generic, Collection, Any

import gevent
from django.conf import settings
from sentry_sdk import Hub
from ylog import context

logger = logging.getLogger(__name__)

T = TypeVar('T')


class YlogGreenlet(gevent.Greenlet):
    """
    When running in monkeypathced environment, every gevent greenlet has its own independent ylog and sentry scope.
    In chaining we need to use context of the parent greenlet to keep qloud logs and sentry events informative
    (request_id, path, device identifiers, etc)
    """

    def __init__(self, run, *args, **kwargs):
        sentry_scope = copy(Hub.current.scope)
        ylog_context = context.get_log_context()

        def _wrapped_run(*args_, **kwargs_):
            top_stack = Hub.current._stack[-1]
            Hub.current._stack[-1] = (top_stack[0], sentry_scope)
            with context.LogContext(**ylog_context):
                return run(*args_, **kwargs_)

        super().__init__(_wrapped_run, *args, **kwargs)


def get_results_from_greenlets(greenlet_list: list,
                               error_message: str,
                               timeout_message: str = None,
                               validator: Callable = None,
                               metadata_handler: Callable[[gevent.Greenlet, dict, dict], None] = None,
                               parent_result: Collection = None,
                               message_prefix: str = None):
    """
    :param greenlet_list: where to get the results
    :param error_message: message will be logged if a result is not available due to an exception
    :param timeout_message: message will be logged if a greenlet hasn't finished in time
    :param validator: checks that result of this greenlet should be put in the result
    :param metadata_handler: retrieves metadata from greenlet object if presented and puts to result
    :param parent_result: where to put the result
    :param message_prefix: if present, added as [] prefix to error messages. Supposed to be indicating the source and a greenlet
    :return: the list with greenlet's results
    """
    timeout_message = timeout_message or error_message
    if message_prefix:
        error_message = f'[{message_prefix}] {error_message}'
        timeout_message = f'[{message_prefix}] {timeout_message}'
    results = []
    for result in greenlet_list:
        if result.ready():
            if result.successful():
                value = result.value
                if not validator or validator(value):
                    if metadata_handler:
                        metadata_handler(result, value, parent_result)
                    results.append(value)
            else:
                logger.error(error_message, exc_info=result.exception)
                if getattr(result, 'raise_exception', False):
                    raise result.exception
        else:
            result.kill(block=False)
            logger.error(timeout_message, exc_info=TimeoutError())
    return results


class DataSource(ABC):
    """
    Knows how to take the data
    Provides data to result builder
    """

    @abstractmethod
    def get_result(self) -> Any:
        pass


class ChunkDataSource(DataSource):
    """
    Can load sequential data by splitting it into chunks and load then in parallel
    """
    GREENLET_ERROR_MESSAGE = 'Error loading chunk'
    GREENLET_TIMEOUT_MESSAGE = 'Timeout while trying to load a chunk'

    @abstractmethod
    def load_chunk(self, chunk_params) -> Collection:
        """
        Loads chunk of data
        :param chunk_params: - Collection of params required to query a specific chunk
        :return: chunk of sequential data
        """
        pass

    @abstractmethod
    def get_chunk_params(self) -> Collection:
        """
        Returns a collection of params. Default used collection is list.
        Params can be in a format, which is known by ChunkDataSource's get_result() implementation.
        :return: collection with params for loading every required chunk
        """
        pass

    @property
    @abstractmethod
    def root_list_field_name(self):
        """
        Returns the name of root field which containing root list
        """
        pass

    def group_result(self, result_chunks: list) -> dict:
        if not result_chunks:
            return {}

        if len(result_chunks) == 1:
            return result_chunks[0]

        container = result_chunks[0]
        field_root_list = self.root_list_field_name
        grouped = [chunk_item for chunk in result_chunks for chunk_item in chunk.get(field_root_list, [])]
        container[field_root_list] = grouped

        return container

    @property
    def wait_timeout(self):
        """
        How long(in seconds) to wait for all chunks to be loaded
        :return:
        """
        return settings.DEFAULT_NETWORK_GREENLET_TIMEOUT

    def get_result(self) -> dict:
        chunk_params = self.get_chunk_params()
        threads = []
        for params in chunk_params:
            greenlet = YlogGreenlet.spawn(self.load_chunk, params)
            threads.append(greenlet)

        gevent.wait(threads, timeout=self.wait_timeout)

        return self.group_result(
            get_results_from_greenlets(
                threads,
                error_message=self.GREENLET_ERROR_MESSAGE,
                timeout_message=self.GREENLET_TIMEOUT_MESSAGE,
                message_prefix=self.__class__.__name__,
            )
        )


class RangedChunkDataSource(ChunkDataSource):
    """
    Chunk data source which can automatically split request to data source, based on 'offset' and 'limit'
    """

    def __init__(self, offset=0, limit=5) -> None:
        super().__init__()
        self._offset = offset
        self._limit = limit

    def _build_params(self, offset, limit, params):
        params.update({
            self.offset_key: offset,
            self.limit_key: limit,
        })
        return params

    @property
    def offset(self) -> int:
        return self._offset

    @property
    def offset_key(self) -> str:
        return 'offset'

    @property
    def limit(self) -> int:
        return self._limit

    @property
    def limit_key(self) -> str:
        return 'limit'

    @property
    def chunk_size(self):
        return settings.DEFAULT_CHUNK_SIZE

    def get_additional_params(self) -> dict:
        """
        A way to  provide params which are the same for all chunks
        :return: dict with general params for all chunks
        """
        return {}

    def group_result(self, result_chunks: list) -> dict:
        result = super().group_result(result_chunks)
        field_root_list = self.root_list_field_name
        root_list = result.get(field_root_list)
        root_list = root_list[:self.limit]
        result[field_root_list] = root_list
        return result

    def get_chunk_params(self) -> list:
        constant_params = self.get_additional_params()
        offset = self.offset
        limit = self.limit
        chunk_size = self.chunk_size

        return [self._build_params(offset_param, chunk_size, constant_params.copy())
                for offset_param in range(offset, limit + offset, chunk_size)]


class FilterableDataSource(DataSource):
    nested_filterable_source_implementation = None
    GREENLET_ERROR_MESSAGE = 'Error loading nested items'
    GREENLET_TIMEOUT_MESSAGE = 'Timeout while trying to load nested items'

    def __init__(self, offset, limit, requested_amount, auto_load_enabled=True):
        """

        :param offset: how much elements should be skipped
        :param limit: how much elements(at most) will be queried per 1 page
        :param requested_amount: how much elements to put in the result
        """
        super().__init__()
        self.offset = offset
        self.limit = limit
        self._requested_amount = requested_amount
        self.auto_load_enabled = auto_load_enabled

    @property
    def wait_timeout(self):
        return settings.DEFAULT_NETWORK_GREENLET_TIMEOUT

    @abstractmethod
    def get_filters(self) -> list:
        """
        :return: list of filters which can be applied to elements of data source
        """
        pass

    @abstractmethod
    def wrap_root_list(self, container, target_list, pagination_state_params):
        """
        Puts target list into container
        """
        pass

    @abstractmethod
    def get_root_list(self, container) -> list:
        pass

    @abstractmethod
    def get_nested_list(self, nested_item):
        pass

    @abstractmethod
    def get_initial_additional_params(self):
        pass

    @abstractmethod
    def extract_additional_params(self, page):
        pass

    def get_initial_page(self):
        """
        Through this method initial data can be provided. By default returns empty dict
        """
        return {}

    @abstractmethod
    def get_next_page(self, offset, limit, additional_params=None):
        """
        Returns next page of data
        :param offset: how much elements to skip from the beginning
        :param limit: how much elements(at most) will be in the result
        :param additional_params: any other params required by request
        :return: next page with data. If amount of elements less the requested limit - it means it's the last page
        """
        pass

    @property
    def has_nested_filterable_data_source(self):
        """
        Override it to control usage of nested filterable data source
        :return:
        """
        return False

    @abstractmethod
    def create_nested_filterable_source(self, nested_item) -> DataSource:
        pass

    @property
    def requested_amount(self) -> int:
        """
        How much elements required to get from this DataSource
        It's better to use different from 'limit' parameter, because with limit data is queried, and with
        'requested_amount' data is truncated
        :return: integer value, indicates how much elements this data source will return
        """
        return self._requested_amount

    @property
    def truncating_result_list_enabled(self):
        """
        Should result list be <= limit or not
        """
        return True

    @property
    def max_empty_objects(self):
        """
        How much empty responses can be get, for continuing querying the source
        If amount of empty responses is bigger, than this value - consider that there is no more data in this source

        Example:
            actual_empty_response_amount = 0

            query some url with offset = 5:
            got 5 items

            pass these items through filter
            0 items after filtration
            ---> actual_empty_response_amount += 1
            actual_empty_response_amount is 1

            is actual_empty_response_amount > max_empty_responses ? (1 > 1) - No -> continue

            query some url with offset = 10:
            got 5 items

            pass these items through filter
            0 items after filtration
            ---> actual_empty_response_amount += 1
            actual_empty_response_amount is 2

            is actual_empty_response_amount > max_empty_responses ? (2 > 1) - Yes -> stop
        """
        return settings.ALLOWED_EMPTY_RESPONSES

    @property
    def empty_object_threshold(self):
        return settings.EMPTY_OBJECT_THRESHOLD

    def is_object_empty(self, obj: Collection):
        """
        Checks is object can be considered as empty

        The real empty objects, like [] or None - are always considered as empty
        But if N items were queried, and after filtration there are only few of them(some percentage) - then this
        object can be considered as empty
        """
        if not obj:
            return True
        if len(obj) < self.limit * self.empty_object_threshold:
            return True
        return False

    def _filter(self, root_list):
        """
        At this step, root list filtered based on the top level information
        For example, this is the input object('root_list_container'):
        {
          "some_field_1": "some value 1",
          "some_field_2": "some value 2",
          "field_containing_list": [
            {
              "some_item_field_11": "some_item_field_value_11",
              "some_item_field_12": "some_item_field_value_12",
              "nested_item_list_1": [{...}, {...}, ...]
            },
            {
              "some_item_field_21": "some_item_field_value_21",
              "some_item_field_22": "some_item_field_value_22",
              "nested_item_list_2": [{...}, {...}, ...]
            },
            ...
          ]
        }
        Top level information is 'some_item_field_xx', len("nested_item_list_1")
        The items from field 'nested_item_list_x' is not top level information, we don't look at it at this step
        :return:
        """
        filters = self.get_filters()
        if not filters:
            return root_list
        for _filter in filters:
            root_list = _filter(root_list)
        return root_list

    def _validate_nested_greenlet_result(self, greenlet_result: dict) -> bool:
        """
        Validates the result of greenlet obtained from method `_filter_nested`
        :return: True - if greenlet's result is valid, otherwise - False
        """
        return bool(self.get_nested_list(greenlet_result))  # only non empty items

    def _filter_nested(self, root_list):
        if not self.has_nested_filterable_data_source:
            return root_list

        threads = []
        for nested_item in root_list:
            filterable_data_source = self.create_nested_filterable_source(nested_item)
            nested_greenlet = YlogGreenlet.spawn(filterable_data_source.get_result)
            threads.append(nested_greenlet)

        gevent.wait(threads, timeout=self.wait_timeout)
        return get_results_from_greenlets(
            threads,
            error_message=self.GREENLET_ERROR_MESSAGE,
            timeout_message=self.GREENLET_TIMEOUT_MESSAGE,
            message_prefix=self.__class__.__name__,
        )

    def _get_state_params(self, offset, requested_amount, additional_params) -> dict:
        """
        Provides pagination state params
        :return: the dict with amount of processed items(offset), requested elements amount, and additional params
        """
        params = {
            'offset': offset,
            'requested_amount': requested_amount
        }

        if additional_params:
            params.update(additional_params)

        return params

    def get_result(self) -> dict:
        root_list = []
        container = None
        can_query_next_page = True
        offset = self.offset
        first_request = True
        additional_request_params = self.get_initial_additional_params()
        actual_empty_objects = 0

        while len(root_list) < self.requested_amount and can_query_next_page:
            page = None
            if first_request:
                page = self.get_initial_page()
                first_request = False

            if not page or not self.get_root_list(page):
                page = self.get_next_page(offset, self.limit, additional_request_params)
            logger.debug('%s loaded %s items', self.__class__.__name__, len(self.get_root_list(page)))

            additional_request_params = self.extract_additional_params(page)
            next_page_root_list = self.get_root_list(page)

            if not next_page_root_list:
                offset = 0
                next_page_root_list_size = 0
            else:
                next_page_root_list_size = len(next_page_root_list)
                offset += next_page_root_list_size

            if not container:
                container = page

            if next_page_root_list_size < self.limit:
                # since we get page with amount of data less than requested limit, or even empty page - don't query
                # for the next page
                can_query_next_page = False

            if next_page_root_list:
                next_page_root_list = self._filter_nested(self._filter(next_page_root_list))
                logger.debug('%s after filtration has %s items', self.__class__.__name__, len(next_page_root_list))
                root_list += next_page_root_list

                if self.is_object_empty(next_page_root_list):
                    actual_empty_objects += 1
                    if actual_empty_objects > self.max_empty_objects:
                        can_query_next_page = False
                else:
                    # forget empty objects amount if after empty object we got non empty
                    actual_empty_objects = 0

            if not self.auto_load_enabled:
                break

        if self.truncating_result_list_enabled:
            root_list = root_list[:self.requested_amount]
        logger.debug('%s has result list with %s items', self.__class__.__name__, len(root_list))
        state_params = self._get_state_params(offset, self.requested_amount, additional_request_params)
        return self.wrap_root_list(container, root_list, state_params)


class Extendable:
    def __init__(self):
        self.dependencies = []

    def add_extension(self, extension):
        self.dependencies.append(extension)

    def add_extensions(self, extensions):
        self.dependencies.extend(extensions)

    def get_extensions(self):
        return self.dependencies.copy()


class ResultBuilderExtension(Generic[T], Extendable):
    """
    Defines how to extend the result from parent
    """

    def __init__(self,
                 data_source_provider: Callable[[T], DataSource],
                 name: str = None,
                 extensions: list = None,
                 raise_exception=False):
        """

        :param data_source_provider: provides DataSource instance
        :param name: field name in the parent for result of this extension
        :param extensions: extensions for this extension
        :param raise_exception: if True - in case of error through the process of getting result - error will be thrown
            if False - None will be returned instead of result of this extension
        """
        super().__init__()
        self.data_source_provider_callable = data_source_provider
        if extensions:
            self.add_extensions(extensions)
        self.raise_exception = raise_exception
        self.name = name


class ResultBuilder(DataSource, Extendable):
    """
    Abstraction with base interface for building chained results
    """
    GREENLET_ERROR_MESSAGE = 'Error loading chunk'
    GREENLET_TIMEOUT_MESSAGE = 'Timeout while trying to load a chunk'

    class NoResultError(ValueError):
        pass

    def __init__(self, data_source: DataSource, raise_exception=False, stacksize=0,
                 parent_extension=None):
        super().__init__()
        # Provides data for current result builder
        self._data_source = data_source
        self.raise_exception = raise_exception
        self.stack = stacksize or 0
        self.stack += 1
        self.parent_extension = str(parent_extension) if parent_extension else ''

    def __str__(self):
        name = f'{self.__class__.__name__} ({self._data_source})'
        if self.parent_extension:
            name += f' from {self.parent_extension}'
        return name

    def extend(self, *extensions):
        self.add_extensions(extensions)
        return self

    @property
    def wait_timeout(self):
        """
        How long (in seconds) to wait for all chunks to be loaded
        """
        return settings.DEFAULT_NETWORK_GREENLET_TIMEOUT

    def _metadata_handler(self, greenlet, value, parent):
        if parent:
            parent.update({greenlet.extension_name: value})
        # todo: это очень странный elif, тк первое условие совсем про другое на первый взгляд - разобраться
        elif greenlet.raise_exception:
            raise ValueError(f'Greenlet is {greenlet}, value is {value}, parent is {parent}')
        else:
            logger.debug(f'Greenlet is {greenlet}, value is {value}, parent is {parent}')

    def get_result(self) -> dict:
        """
        :return: result object(dict) ready for serialization
        """
        logger.debug('%s Getting result of %s builder...', ' -> ' * self.stack, self)

        # noinspection PyBroadException
        try:
            parent_result = self._data_source.get_result()
            if isinstance(parent_result, dict):
                logger.debug('Got result with keys %s', parent_result.keys())
        except Exception as err:
            logger.warning(f'Error when getting result from data source: {err}')
            if self.raise_exception:
                raise
            else:
                return {}

        threads = []
        logger.debug('builder has %d extensions: %s', len(self.dependencies), self.dependencies)
        for extension in self.dependencies:
            # Create data source, to be able to pass it to constructor of 'ResultBuilder'
            data_source = extension.data_source_provider_callable(parent_result)

            # Resolve result builder, to be able to access required data through 'get_result'
            result_builder = ResultBuilder(data_source, extension.raise_exception, self.stack, extension)
            result_builder.add_extensions(extension.get_extensions())

            greenlet = YlogGreenlet.spawn(result_builder.get_result)
            greenlet.raise_exception = extension.raise_exception
            greenlet.extension_name = extension.name
            threads.append(greenlet)

        gevent.wait(threads, timeout=self.wait_timeout)

        get_results_from_greenlets(
            threads,
            metadata_handler=self._metadata_handler,
            parent_result=parent_result,
            error_message=self.GREENLET_ERROR_MESSAGE,
            timeout_message=self.GREENLET_TIMEOUT_MESSAGE,
            message_prefix=self.__class__.__name__,
        )

        if isinstance(parent_result, dict):
            parent_result = {k: v for k, v in parent_result.items() if v}
        elif isinstance(parent_result, list):
            parent_result = [v for v in parent_result if v]
        elif self.raise_exception:
            raise self.NoResultError("Can't obtain result")

        return parent_result
