from __future__ import absolute_import, unicode_literals, print_function
from hec_sender import SplunkHECSender
from typing import Optional, Union  # noqa
import yt.wrapper as yt


class YtTableAsLogSource(object):
    """
    Abstraction for YT table reading with further data sending to Splunk cluster.

    .. important::
        Please, before usage, contact with SOC team (soc@yandex-team.ru).

    Usage::

        >> # Prepare instance
        >> table_processor = YtTableAsLogSource(cluster="hahn", token="AZAZ", hec_token="ZAZA", table_path="//home/user/table")  # noqa
        >> # Run the magic
        >> table_processor.main()
        >> # Or process manually (for example different table)
        >> data = table_processor.read_table(table="//home/user/table2")
        >> table_processor.send_data_via_hec(data)

    """

    def __init__(
        self,
        cluster,
        token,
        table_path,
        hec_token,
        hec_source=None,
        table_format=None,
        start_row_number=None,
        end_row_number=None,
    ):
        # type: (str, str, str, str, Optional[str], Optional[str], Optional[int], Optional[int]) -> None  # noqa
        """

        :param cluster: YT cluster name, where table is located. Required for yt.client setup
        :type cluster: str
        :param token: YT token, which have access to provided cluster
        :type token: str
        :param table_path: YT table path, which you want to read and send to Splunk.
        :type table_path:  str
        :param hec_token: Splunk HEC token
        :type hec_token: str
        :param hec_source: Splunk source field value.
            Defaults to *None* (token source setting or table name will be used).
        :type hec_source: str
        :param table_format: Defaults to *None* (yson used). Custom table format for reading.
            Possible values: "yson", "json", "yt.YsonFormat()".
            For more information, read: https://wiki.yandex-team.ru/yt/userdoc/pythonwrapper/#otherformats
        :type table_format: str
        :param start_row_number: Defaults to *None* - whole table will be returned.
            If you want to read only some part of table - provide start_row_number and end_row_number arguments.
            Use row number as indexes for reading.
        :type start_row_number: int
        :param end_row_number: Defaults to *None* - whole table will be returned.
            If you want to read only some part of table - provide start_row_number and end_row_number arguments.
            Use row number as indexes for reading.
        :type end_row_number: int
        """
        # Prepare attributes
        self.table_path = table_path
        self.table_format = table_format
        self.start_row_number = start_row_number
        self.end_row_number = end_row_number
        self.cluster = cluster
        self._token = token
        self._hec_token = hec_token

        self._hec_source = hec_source

        # Prepare proxy
        self.proxy = "".join([self.cluster, ".yt.yandex.net"])
        # Prepare YT client
        self.yt = yt.client.YtClient(proxy=self.proxy, token=self._token)

        # Filters for compability
        self.yt.config["pickling"]["module_filter"] = (
            lambda lib: hasattr(lib, "__file__")
            and not lib.__file__.endswith(".so")
            and "hashlib" not in getattr(lib, "__name__", "")
        )
        self.yt.config["pickling"]["force_using_py_instead_of_pyc"] = True
        # Tunning maximum resource usage
        self.yt_spec = {
            "job_io": {"table_writer": {"max_row_weight": 128 * 1024 * 1024}}
        }

    @property
    def hec_source(self):
        """
        Property attribute for hec_source re-assign in hec_sender

        :return:
        :rtype:
        """
        if self._hec_source is None:
            hec_source = self.table_path.split("/")[-1]
        else:
            hec_source = self._hec_source

        return hec_source

    @property
    def hec_sender(self):
        """
        Property attribute for hec_sender re-assign with new arguments

        :return:
        :rtype:
        """
        # Prepare HEC sender
        hec_sender = SplunkHECSender(token=self._hec_token, source=self.hec_source)

        return hec_sender

    def read_table(
        self, table, table_format=None, start_row_number=None, end_row_number=None
    ):
        # type: (str, Optional[str], Optional[int], Optional[int]) -> list
        """
        Function for table reading from YT.

        :param table: Path to table (Example: "//home/infrasec")
        :type table: str
        :param table_format: Defaults to *None* (yson used). Custom table format for reading.
            Possible values: "yson", "json", "yt.YsonFormat()".
            For more information, read: https://wiki.yandex-team.ru/yt/userdoc/pythonwrapper/#otherformats
        :type table_format: str
        :param start_row_number: Defaults to *None* - whole table will be returned.
            If you want to read only some part of table - provide start_row_number and end_row_number arguments.
            Use row number as indexes for reading.
        :type start_row_number: int
        :param end_row_number: Defaults to *None* - whole table will be returned.
            If you want to read only some part of table - provide start_row_number and end_row_number arguments.
            Use row number as indexes for reading.
        :type end_row_number: int
        :return: List of dict, where dicts are rows key-value representation parsed internally by format type.
        :rtype: list
        """
        if start_row_number is None and end_row_number is None:
            table = self.yt.read_table(table=table, format=table_format)
        else:
            control_attributes = {"enable_row_index": True, "enable_range_index": True}

            # TOOD: Check TablePath params (contradiction in documentation):
            # 1. https://wiki.yandex-team.ru/yt/userdoc/pythonwrapper/#tablepath
            # 2. https://wiki.yandex-team.ru/yt/userdoc/ypath/#kanonicheskajaformaypath -- current working version
            ranges = [
                {
                    "lower_limit": {"row_index": start_row_number},
                    "upper_limit": {"row_index": end_row_number},
                }
            ]

            table = self.yt.read_table(
                self.yt.TablePath(table, ranges=ranges, attributes=control_attributes),
                format=table_format,
            )

        rows = list(table)
        return rows

    def read_table_parallel(self):
        # TODO: https://wiki.yandex-team.ru/yt/userdoc/pythonwrapper/#parallelnoechtenietablic
        pass

    def send_data_via_hec(self, data):
        # type: (Union[dict, list]) -> bool
        """
        Alias for self.hec_sender.send_data function.

        :param data: Dict or list of dicts.
        :type data: dict or list
        :return: True if operation succeeded, False otherwise
        :rtype: bool
        """
        result = self.hec_sender.send_data(data)
        return result

    def main(self):
        # type: () -> bool
        """
        Main function, which running these steps based on instance attributes:

            1. Read table data
            2. Send table data via hec_sender

        :return:
        :rtype:
        """
        data = self.read_table(
            table=self.table_path,
            table_format=self.table_format,
            start_row_number=self.start_row_number,
            end_row_number=self.end_row_number,
        )
        return self.send_data_via_hec(data)
