import base64
import logging
import re
import subprocess

from sandbox.sandboxsdk import errors
from sandbox.sandboxsdk import process

from sandbox.projects import resource_types
from sandbox.projects.common import utils
from sandbox.projects.common.dolbilka import resources as dolbilka_resources
from sandbox.projects.common.search import settings as search_settings
from sandbox.projects.common.search.response import cgi as search_cgi
from sandbox.projects.common.search.eventlog import eventlog as search_eventlog


_WEB_REQID_RE = re.compile(r"^[0-9]+-[0-9]+-[a-z]+[0-9]+-[0-9]+$")
_TEXT_RE = re.compile(r"(&|^)text=")
_REQUEST_TEXT_RE = re.compile(".*text=(?P<query_text>[^&]*).*")

_EVENT_TYPES = [0, 284, 286, 288, 287, 298, 300, 302, 303, 488, 505]  # TODO: Generate from used event types?
_COLLECTION_ALL = search_settings.ImagesSettings.COLLECTION_ALL


class QueriesFile:
    """Plugin to store some specific subset of queries from eventlog (traditional variant)"""

    queries_resource = resource_types.IMAGES_MIDDLESEARCH_PLAIN_TEXT_REQUESTS
    plan_resource = resource_types.IMAGES_MIDDLESEARCH_PLAN
    plan_type = 'plain'

    def __init__(self, index_type, collection, variant=""):
        self.index_type = index_type
        self.variant = index_type + variant
        self.description = "collection '{}'".format(collection)
        self.queries_path = 'metasearch-{}-{}.queries.txt'.format(collection, self.variant)
        self.plan_path = 'metasearch-{}-{}.plan.bin'.format(collection, self.variant)
        self.queries_key = 'out_{}_{}_queries_resource_id'.format(collection, self.variant)
        self.plan_key = 'out_{}_{}_plan_resource_id'.format(collection, self.variant)
        self.collection = collection

    def open(self, task):
        self._file = open(self.queries_path, "w")
        self._counter = 0

    def close(self):
        self._file.close()

    def validate(self, min_counters):
        if self.collection not in min_counters:
            return

        min_value = min_counters[self.collection]
        if self._counter < min_value:
            raise errors.SandboxTaskFailureError("Too little {} queries ({} < {})".format(
                self.queries_path,
                self._counter,
                min_value
            ))

    def write(self, data):
        collection, query = data["collection"], data["url"]

        if self.collection != _COLLECTION_ALL and self.collection != collection:
            return

        if not query or not self._test_query(data):
            return

        query = search_cgi.remove_db_timestamp(query)
        query = search_cgi.remove_supermind_controls(query)
        self._file.write("/{}?{}\n".format(collection, query))
        self._counter += 1

    def _test_query(self, data):
        query = data["url"]
        # Sometimes we have completely broken queries (GATEWAY-4887)
        if not _TEXT_RE.search(query):
            logging.info("Skipping broken query: {}".format(query))
            return False
        if len(re.match(_REQUEST_TEXT_RE, query).group('query_text')) == 0:
            logging.info("Skipping query with empty text: {}".format(query))
            return False
        return True


class ApphostQueriesFile(QueriesFile):
    """Plugin to store some specific subset of queries from eventlog (apphost variant)"""

    queries_resource = dolbilka_resources.DOLBILKA_STPD_QUERIES
    plan_resource = resource_types.BASESEARCH_PLAN
    plan_type = 'phantom'

    def __init__(self, index_type, collection):
        QueriesFile.__init__(self, index_type, collection, "-apphost")
        self.description = "{}, apphost".format(self.description)

    def write(self, data):
        collection, query = data["collection"], data["apphost"]

        if self.collection != _COLLECTION_ALL and self.collection != collection:
            return

        if not query:
            return

        self._file.write(_make_apphost_request(query))
        self._counter += 1

    def validate(self, min_counters):
        pass  # TODO: validate number of requests on apphost too


class Evlogdump:
    """Simple wrapper over evlogdump process to use context manager for termination"""

    def __init__(self, evlog_path):
        self.__proc = None
        self.__evlog_path = evlog_path

    def __enter__(self):
        logging.info("Starting evlogdump process")
        self.__proc = process.run_process(
            [search_eventlog.get_evlogdump(), "-o", "-i", ",".join(str(e) for e in _EVENT_TYPES), self.__evlog_path],
            stdout=subprocess.PIPE,
            wait=False,
            outputs_to_one_file=False,
            log_prefix="evlogdump"
        )
        return self.__proc.stdout

    def __exit__(self, exc_type, exc_value, traceback):
        logging.info("Terminating evlogdump process")
        if self.__proc:
            utils.terminate_process(self.__proc)


def read_eventlog(evlogdump):
    data = None
    source_names = {}

    for line in evlogdump:
        tabs = line.strip().split("\t")
        if len(tabs) < 3:
            continue
        timestamp = int(tabs[0])
        event_type = tabs[2]
        if event_type == "CreateYSRequest":
            data = {
                "web": False,
                "url": "",
                "apphost": "",
                "collection": "",
                "full_start": timestamp,
                "full_delta": 0,
                "source_start": {},
                "source_delta": {},
            }
        elif not data:  # all other cases depends on dictionary with data
            continue
        elif event_type == "AppHostRequest" and len(tabs) > 4:
            data.update({
                "apphost": tabs[5],
            })
        elif event_type == "EnqueueYSRequest":
            data.update({
                "collection": tabs[6].lstrip("/"),
            })
        elif event_type in ("ContextCreated", "FastCacheHit"):
            data.update({
                "url": tabs[3],
                "web": _WEB_REQID_RE.search(tabs[4]) and data["collection"] == "yandsearch",
            })
        elif event_type == "SubSourceInit" and data["web"]:
            source_num = tabs[3]
            source_names[source_num] = "{}-{}".format(tabs[6], tabs[7])
        elif event_type == "SubSourceRequest" and data["web"]:
            source_num = tabs[3]
            data["source_start"][source_num] = timestamp
        elif event_type in ("SubSourceOk", "SubSourceError") and data["web"]:
            source_num = tabs[3]
            if source_num in data["source_start"]:
                delta = float(timestamp - data["source_start"][source_num]) / 1000.0
                data["source_delta"].setdefault(source_names[source_num], []).append(delta)
        elif event_type == "ReportPrinted":
            data["full_delta"] = float(timestamp - data["full_start"]) / 1000.0
        elif event_type == "EndOfFrame":
            yield data
            data = None


def _make_apphost_request(query):
    binary_data = base64.b64decode(query)
    payload = "POST / HTTP/1.1\r\nContent-Length: {}\r\n\r\n{}".format(
        len(binary_data),
        binary_data
    )
    return "{}\n{}".format(len(payload), payload)
