import logging
import os
import re
import six
import subprocess
import traceback

from sandbox.common import share
import sandbox.common.types.client as ctc

from sandbox.sandboxsdk import process
from sandbox.sandboxsdk import task
from sandbox.sandboxsdk import paths

from sandbox.projects.common.search.eventlog import eventlog
from sandbox.projects.common import utils
from sandbox.sandboxsdk.copy import RemoteCopy

_EVENTLOG_NAME = 'current-eventlog-{meta_type}-{port}'
_EVENTLOG_URL_PREFIX_SKYNET = "{host}:/usr/local/www/logs"


class RemoteCopyHeadSkynet(RemoteCopy):
    @classmethod
    def compatible(cls, src, parsed_url):
        return True

    def __call__(self, files=None, head=50000000, user=None, **kws):
        host, _, srcdir = self._src.partition(":")
        share.skynet_run_and_copy(
            host,
            srcdir,
            self._dst,
            files=files,
            method=share.ShareAndCopyHeadFiles(user, files, srcdir, head),
            user=user
        )


class LoadEventlog(task.SandboxTask):
    """
        Base task to load queries from production
    """

    client_tags = ctc.Tag.Group.LINUX & ~ctc.Tag.LXC

    def _filter_eventlog(self, eventlog_file, *filter_args):
        raise NotImplementedError

    def _load_eventlog(
        self, meta_instances,
        meta_type='int',
        eventlog_name=_EVENTLOG_NAME,
        filter_args=None
    ):
        """
            Try to load logs from production.

        """
        if filter_args is None:
            filter_args = []
        utils.receive_skynet_key(self.owner)
        skynet_user = "prod-resource-getter"
        if self.owner.islower():
            skynet_user = self.owner

        for instance in meta_instances:
            log_url = _EVENTLOG_URL_PREFIX_SKYNET.format(
                meta_type=meta_type, host=instance["host"])
            file_name = eventlog_name.format(meta_type=meta_type, port=instance["port"])
            try:
                if self._read_eventlog(file_name, log_url, filter_args, skynet_user):
                    return True
            except Exception:
                logging.info('Ignore exception:\n%s', traceback.format_exc())
                continue

        return False

    def _read_eventlog(self, file_name, log_url, filter_args, skynet_user):
        """
            Download specified log and pass control to 'on download' callback
        """
        local_path = self.abs_path(file_name)
        RemoteCopyHeadSkynet(log_url, os.path.dirname(local_path))(files=[file_name], head=50000000, user=skynet_user)
        logging.info(
            'Trying to process eventlog: %s, size = %s bytes',
            log_url, os.stat(local_path).st_size
        )
        proc = None
        try:
            proc = process.run_process(
                [eventlog.get_evlogdump(), "-o", "-i", "300", local_path],
                stdout=subprocess.PIPE,
                wait=False,
                outputs_to_one_file=False,
                log_prefix="evlogdump"
            )
            return self._filter_eventlog(proc.stdout, *filter_args)
        finally:
            utils.terminate_process(proc)
            paths.remove_path(local_path)


def make_url_filter(base_hosts):
    """
        Returns correct filter (regexp) for queries to basesearch hosts
        :param base_hosts: hosts, queries needed for
            (format: [(host, port), ...] without "http://" and ".yandex.ru")
        :return: regexp
    """
    def host_template(host, port):
        logging.info("Gather requests to basesearch {}:{}".format(host, port))
        # if port is not defined - use any
        return r'{}(?:\.yandex\.ru|\.search\.yandex\.net)?:{}'.format(host, port or "\\d+")

    return re.compile(
        r'http2?://(?:' +
        "|".join([host_template(host, port) for host, port in base_hosts]) +
        r')/yandsearch(\S+)'
    )


def get_requests_to_hosts(queries_source, hosts, filter_monitoring=True):
    """
        Gets requests to hosts from dumped eventlog.
        :param queries_source: filename, list or tuple
        :param hosts: list of hosts [(host, port), ...] host without "http://" and ".yandex.ru"
        :param filter_monitoring: if True - cut queries like "info=checkconfig"
        :return: queries, start with "?"
    """
    regex = make_url_filter(hosts)
    queries = []

    if isinstance(queries_source, six.string_types):
        source_iter = open(queries_source)
    elif isinstance(queries_source, (list, tuple)):
        source_iter = queries_source
    else:
        raise Exception("queries_source should be string or list")

    try:
        for line in source_iter:
            match = regex.search(line)
            if match:
                query = match.group(1)

                if not (filter_monitoring and utils.is_check_config_request(query)):
                    queries.append(query)

    finally:
        if isinstance(queries_source, six.string_types):
            source_iter.close()

    return queries
