#!/usr/bin/env python
# -*- coding: utf-8 -*-

from six.moves.urllib import error as urllib_err
from six.moves import http_client
from six.moves import cPickle
from six.moves import range
import os
import time
import re
import socket
import logging
import struct

from . import threadPool
from . import zerolinger
from .tree.node import WalkPath
from .tree.node import Node
from .tree import htmldiff

from sandbox.common import errors as common_errors
from sandbox.projects.common import string as us
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import utils
from sandbox.projects.common.search import factors_decompressor
from sandbox.projects.common.search.requester import sequence_binary_data
from sandbox.projects.common.search.response import patterns as rp
from sandbox.projects.common.search.response import cgi as response_cgi

_RESPONSE_DELIMITER = "-" * 100
_TIMEOUT_TEMPLATE = re.compile(r"&timeout=\\d*")

non_stable_props = [
    "La",
    "HostName",
    "Ncpu",
    "Nctx",
    "scheme.json.nodump",
    "Acc",
    "BinaryBuildTag",  # unstable because of revision frequent changes, SEARCH-2056
    "BsTouched",
    "CalcMetaRelevanceMcs",  # SEARCH-9579
    "Al.Prior",
    "WaitInfo.debug",
    "WaitInfo2.debug",
    "ReqError.debug",
    # unstable because of containing host name:
    "CurHttpUrl.debug",  # basically it's not dumped at all, but in case of SEARCH-1912 it's dumped
    "Cached",  # SEARCH-4748
    "DocumentsStats",  # SEARCH-11681
]


non_stable_props_regex = [
    # Script execution time
    # TODO: Exception messages also removed, but they should not. See search/web/rearrange/lua/lua.cpp
    re.compile(r'^Lua.*'),

    # REVIEW:36892
    re.compile(r'.*_TimeDistr'),

    # Source configuration data (https://wiki.yandex-team.ru/users/and42/configinfo)
    re.compile(r'^ConfigInfo_'),
    re.compile(r'^DebugConfigInfo_'),

    # Debug info about cache keys (SEARCH-4139)
    re.compile(r'^ClientGroup_'),

    # Unstable Chached_ props
    re.compile(r'^Cached_'),

    # This should be stable, but FastCache architecture is ill-formed and should be fixed (SEARCH-4317)
    # Disabled via SEARCH-4340
    # re.compile(r'^StabilityCacheKey'),
]


def _parse_single_response(response_str):
    root_obj = Node()
    parse_obj_stack = [root_obj]

    line_counter = 0
    response_lines = response_str.split("\n")
    for line in response_lines:
        line_counter += 1
        line = line.strip()

        if line == "":
            continue

        if line.startswith('EventLog: "'):
            # encoded eventlog
            # for SEARCH-575 problem
            # example: EventLog: "1411824203698577\t0\tEnqueueYSRequest\t75\tSEARCH\t0\t/yandsearch"
            sl_sl = '\\\\'
            sl = '__@@SLASH@@__'
            root_obj._props.setdefault("EventLog", []).append(
                # cut timestamp and frame number (that is always zero in our case)
                # perform stupid quoted C-string decoding,
                line[32:-1].replace(sl_sl, sl).replace('\\t', '\t').replace(sl, '\\')
            )
        elif line.startswith('EventLog: '):
            # decoded encoded eventlog (some tasks emit such dumps. maybe some protobuf changes)
            # for SEARCH-575 problem
            # example: EventLog: FrameName<tab>SEARCH<tab>0<tab>yandsearch
            root_obj._props.setdefault("EventLog", []).append(
                # no processing, it was already decoded
                line[10:]
            )
        elif line.endswith(" {"):
            name = line.rstrip("{").strip()
            obj = Node()

            parse_obj_stack[-1]._nodes.setdefault(name, []).append(obj)
            parse_obj_stack.append(obj)
        elif ":" in line:
            parts = line.partition(":")
            name = parts[0].strip()
            value = parts[2].strip()

            parse_obj_stack[-1]._props.setdefault(name, []).append(value)

        else:
            if line != "}":
                eh.check_failed(
                    "This response line should be a closing bracket.\n"
                    "Line: <BEGIN-OF-LINE>{}<END-OF-LINE>\n"
                    "Line Number: {}\n"
                    "Response: <BEGIN-OF-RESPONSE>{}<END-OF-RESPONSE>\n".format(
                        line,
                        line_counter,
                        response_str,
                    )
                )

            try:
                parse_obj_stack.pop()
            except IndexError:
                eh.fail(
                    "Bad response (pop from empty list): <BEGIN-OF-RESPONSE>{}<END-OF-RESPONSE>".format(
                        response_lines
                    )
                )

    return root_obj


def _patch_single_response(response, remove_non_stable_props=False, response_patchers=None, piped_decompressor=None):
    if remove_non_stable_props:
        remove_unstable_properties(response, piped_decompressor)

    if response_patchers:
        for p in response_patchers:
            p(response)


def load_responses(file_name, ui32separator=False):
    """
        Load text responses from specified file

        Deprecated. Use binary responses and load_binary_responses() in new tasks
    """

    responses = []

    logging.info("loading {} file {}".format(('---separated', 'ui32separated')[ui32separator], file_name))
    response_str = ""
    with utils.TimeCounter("loading responses"):
        if ui32separator:
            for r in sequence_binary_data(file_name):
                responses.append(r)
        else:
            for line in open(file_name):
                if line.startswith(_RESPONSE_DELIMITER):
                    responses.append(response_str)
                    response_str = ""
                    continue

                response_str += line

    logging.info("load {} responses".format(len(responses)))
    return responses


def load_binary_responses(file_name):
    """
        Load binary responses from specified file
    """

    with open(file_name, "r") as f:
        while True:
            head = f.read(4)
            if not head:
                break
            if len(head) != 4:
                raise Exception("Broken file content")
            size = struct.unpack("i", head)[0]
            body = f.read(size)
            if len(body) != size:
                raise Exception("Broken file content")
            yield body


def write_responses(file_name, responses):
    """
        Сохраняет ответы в файл, распаковывая их (unpickle).
    """
    with open(file_name, "w") as f:
        counter = 1

        for response in responses:
            response = cPickle.loads(response)
            response = response.ToStr()

            f.write(response)
            f.write("\n" + _RESPONSE_DELIMITER + str(counter) + "\n")

            counter += 1


class Params(object):
    def __init__(self, remove_unstable_props, response_patchers, c_pickle_dump):
        self.RemoveNonStableProps = remove_unstable_props
        self.ResponsePatchers = response_patchers
        self.CPickleDump = c_pickle_dump


def _thread_parse_func(responses_list, params):
    try:
        responses = []

        for response in responses_list:
            obj = _parse_single_response(response)

            _patch_single_response(
                obj,
                remove_non_stable_props=params.RemoveNonStableProps,
                response_patchers=params.ResponsePatchers,
            )

            if params.CPickleDump:
                obj = cPickle.dumps(obj, cPickle.HIGHEST_PROTOCOL)

            responses.append(obj)

        return responses
    except Exception as e:
        eh.log_exception("Response parsing error in _thread_parse_func", e)
        raise


def parse_responses(
    file_name,
    remove_unstable_props=False,
    response_patchers=None,
    use_processes=False,
    process_count=None,
    c_pickle_dump=True,
    min_response_size=0,
    search_hr_responses=False,
    ctx=None,
):
    """
        Returns lists of cPickled representations of the parsed responses
    """
    list_str_responses = load_responses(file_name, search_hr_responses)

    response_counter = 0
    logging.debug("Minimum response size: %d", min_response_size)
    if min_response_size > 0:
        # check response sizes
        for response in list_str_responses:
            response_counter += 1
            # logging.debug("response #%d, len: %d", response_counter, len(response))
            eh.ensure(
                len(response) >= min_response_size,
                "Response size is too small: {} bytes while min is {} at response #{},\n"
                "response is <BEGIN>\n{}\n<END>".format(
                    len(response), min_response_size, response_counter, response
                )
            )
    logging.info("Parsed %d responses", response_counter)

    # Initialize decompressor once (not to deal with multithreaded resource downloading, it is a mess)
    factors_decompressor.FactorsDecompressor.init()

    responses = threadPool.process_data(
        _thread_parse_func,
        list_str_responses,
        Params(remove_unstable_props, response_patchers, c_pickle_dump),
        use_processes=use_processes,
        process_count=process_count,
        ctx=ctx,
    )

    eh.verify(
        len(responses) == len(list_str_responses),
        "Responses count {} does not match responses count {} in source list".format(
            len(responses), len(list_str_responses)
        )
    )

    return responses


def patch_responses(responses, response_patchers=None, use_multiprocessing=True):
    class PatchThreadParams:
        def __init__(self, **kwargs):
            self.response_patchers = kwargs.get('response_patchers')

    def _thread_patch_func(responses_list, params):
        patched_responses = []
        for response in responses_list:
            response = cPickle.loads(response)
            _patch_single_response(
                response,
                remove_non_stable_props=True,
                response_patchers=params.response_patchers
            )
            response = cPickle.dumps(response, cPickle.HIGHEST_PROTOCOL)
            patched_responses.append(response)
        return patched_responses

    result = threadPool.process_data(
        _thread_patch_func,
        responses,
        PatchThreadParams(
            response_patchers=response_patchers,
        ),
        use_processes=use_multiprocessing,
    )

    eh.verify(
        len(responses) == len(result),
        "Different length of patched ({}) and initial ({}) responses".format(
            len(result), len(responses)
        )
    )

    return result


def compare_responses(
    responses1, responses2,
    diff_indexes=None,
    custom_comparer=None,
    custom_comparer_with_info=None,
):
    """
        :@param responses1: cPickled representations of parsed responses
        :@param responses2: cPickled representations of parsed responses
        :@param[out] diff_indexes: list of indexes where diff occured
        :@param custom_comparer custom comparer function callback (r1, r2)
        :@param custom_comparer_with_info: - dict with keys [comparer, info]
        True - no diff
    """
    logging.info("Comparing responses")
    eh.verify(
        len(responses1) == len(responses2),
        "Responses1 and responses2 have different length\n"
        "(len(responses1) = {}, len(responses2) = {})".format(len(responses1), len(responses2))
    )

    result = True

    for index, r1, r2 in zip(range(len(responses1)), responses1, responses2):
        if r1 == r2:
            continue

        r1 = cPickle.loads(r1)
        r2 = cPickle.loads(r2)

        if custom_comparer and custom_comparer(r1, r2):
            continue
        if custom_comparer_with_info is not None:
            comparer = custom_comparer_with_info.get("comparer")
            info = custom_comparer_with_info.get("info")
            if comparer is not None and comparer(r1, r2, info):
                continue

        if not r1.Compare(r2):
            if diff_indexes is not None:
                diff_indexes.append(index)
            result = False

    return result


def compare_response_files(file_name1, file_name2):
    responses1 = parse_responses(file_name1)
    responses2 = parse_responses(file_name2)

    return compare_responses(responses1, responses2)


class ExpiredTimeoutException(common_errors.TaskError):
    """
        This type exceptions can be arisen when the base search fails
        to respond by the expired timeout reason.
    """
    pass


def get_error_info(response):
    if "ErrorInfo" not in response.nodes():
        return None, 0, "<no error>"

    error_info = response.get_node("ErrorInfo")
    got_error = error_info.GetPropValue("GotError")
    if got_error == "NO":
        return got_error, 0, "<no error>"

    return got_error, int(error_info.GetPropValue("Code")), error_info.GetPropValue("Text")


def check_response(response, text=None, url=None, ignore_got_error=False):
    def get_exception_msg(msg):
        if text is not None:
            msg = "{},\r\nURL: {}\r\nRESPONSE: {}".format(msg, url, text)

        return msg

    if len(response.nodes()) == 0 and len(response.props()) == 0:
        eh.check_failed(get_exception_msg("response is empty"))

    if ignore_got_error:
        return True  # success

    got_error, code, _ = get_error_info(response)
    if code == 0:
        return True

    if code == 15:
        # no documents found
        return False

    if code == 20:
        raise ExpiredTimeoutException(get_exception_msg("timed out"))

    # generic failure
    eh.check_failed(
        get_exception_msg("ErrorInfo::GotError = {}, Code = {}".format(got_error, code))
    )


def _get_parsing_err(response):
    return "Exception during parsing response:\n{}\nResponse:\n{}".format(
        eh.shifted_traceback(), us.shift_right(response)
    )


MAX_TIMEOUT_ATTEMPTS = 10


def fetch_data(url, timeout, attempt, max_attempts=MAX_TIMEOUT_ATTEMPTS, ignore_empty_response=False):
    try:
        data = zerolinger.urlopen(url, timeout=timeout).read()
        if data == "" and not ignore_empty_response:
            eh.check_failed("response is empty")
        return data

    except socket.timeout:
        if 1 + attempt == max_attempts:
            logging.error("Socket timeout [%s sec] for url\n    %s", timeout, url)
            raise
        else:
            time.sleep(5)
            return None
    except urllib_err.HTTPError as e:
        if hasattr(e, 'code') and e.code == 503 and (1 + attempt) < max_attempts:
            time.sleep(5)
            return None
        raise
    except urllib_err.URLError as e:
        if hasattr(e, 'reason') and isinstance(e.reason, socket.timeout) and (1 + attempt) < max_attempts:
            time.sleep(5)
            return None
        raise

    except http_client.BadStatusLine as e:
        eh.check_failed("Bad status line for url {0}\nStatus line: '{1}'".format(url, e.line))


def _get_parsed_response(url, response_as_str, transform_if_response_is_xml):
    # First, try to interpret response as xml
    if transform_if_response_is_xml and response_cgi.get_request_type(url) == 'xml':
        # Remove newlines and spaces
        response_as_stripped_str = response_as_str.strip()
        if response_as_stripped_str.startswith("<?xml"):
            return htmldiff.convert_xml_to_tree(response_as_stripped_str)
    else:
        # Requests for configuration checking depend on concrete resources used, no point in comparing them
        if utils.is_check_config_request(url):
            logging.info("Erasing response for checkconfig request %s", url)
            response_as_str = ""
        return _parse_single_response(response_as_str)


def _get_checking_result(url, response_as_str, parsed_response, ignore_got_error):
    if utils.is_check_config_request(url):
        return True
    else:
        return check_response(
            parsed_response, response_as_str, url,
            ignore_got_error=ignore_got_error
        )


def fetch_response(
    url,
    parse_and_check_response=True,
    remove_non_stable_props=False,
    response_patchers=None,
    pickle=False,
    save_response_file=None,
    ignore_empty_response=False,
    ignore_got_error=False,
    raise_http_errors=False,
    transform_if_response_is_xml=False,
    callback=None,
    get_responses_from_binary=False,
    piped_decompressor=None,
):
    timeout = 300

    for attempt in range(MAX_TIMEOUT_ATTEMPTS):
        start_time = time.time()
        try:
            response_as_str = fetch_data(url, timeout, attempt, ignore_empty_response=ignore_empty_response)
            if not response_as_str:
                timeout = 100
                continue
            if get_responses_from_binary:
                return response_as_str, None, None
            if save_response_file:
                fu.append_file(
                    save_response_file % os.getpid(),
                    "{}\n{}\n{}\n".format(url, response_as_str, _RESPONSE_DELIMITER)
                )

        except Exception as error:
            logging.info('\n---\nException in fetch_response at url "%s"\n---', url)
            if raise_http_errors and isinstance(error, urllib_err.HTTPError):
                raise

            eh.check_failed(
                "Cannot fetch response:\n"
                "Url: {}\n"
                "Attempt: {}\n"
                "Processing time (seconds): {}\n"
                "Error: {}".format(url, attempt, time.time() - start_time, eh.shifted_traceback())
            )

        parsed_response = None
        checking_result = None
        if parse_and_check_response:
            try:
                parsed_response = _get_parsed_response(url, response_as_str, transform_if_response_is_xml)
                checking_result = _get_checking_result(url, response_as_str, parsed_response, ignore_got_error)

                _patch_single_response(
                    parsed_response,
                    remove_non_stable_props=remove_non_stable_props,
                    response_patchers=response_patchers,
                    piped_decompressor=piped_decompressor
                )

                if callback:
                    callback(parsed_response)

                if pickle:
                    parsed_response = cPickle.dumps(parsed_response, cPickle.HIGHEST_PROTOCOL)

            except ExpiredTimeoutException:
                if attempt + 1 == MAX_TIMEOUT_ATTEMPTS:
                    raise
                else:
                    time.sleep(1)
                    continue

            except common_errors.TaskFailure:
                eh.check_failed(
                    'Parse error with request: {}. Got error: {}'.format(url, _get_parsing_err(response_as_str))
                )

            except Exception:
                eh.fail(
                    'Parse error with request: {}. Got error: {}'.format(url, _get_parsing_err(response_as_str))
                )

        return response_as_str, parsed_response, checking_result

    if ignore_empty_response:
        parsed_response = Node()
        if pickle:
            parsed_response = cPickle.dumps(parsed_response, cPickle.HIGHEST_PROTOCOL)
        return '', parsed_response, None


def has_removed_docs(response):
    """
        Returns whether search answer is complete (SEARCH-2136)
        @param response: response tree
        @return: True iff answer is complete
    """
    searcher_props = response.nodes().get("SearcherProp")
    for searcher_prop in searcher_props:
        key = searcher_prop.GetPropValue("Key")
        if 'RemovedDoc_' in key:
            logging.debug("RemovedDoc found: %s", searcher_prop.GetPropValue("Value"))
            return True
    return False


def decompress_all_factors(value, piped_decompressor):
    if piped_decompressor:
        return piped_decompressor.extract(value)
    else:
        return factors_decompressor.extract_compressed_factors(value)


def remove_unstable_properties(response, piped_decompressor=None):
    if "BalancingInfo" in response._nodes:
        del response._nodes["BalancingInfo"]

    # See SEARCH-4561, this field cannot be stable by definition
    # It contains lots of random things: ruid-s, host names, etc
    if "EventLog" in response._nodes:
        del response._nodes["EventLog"]

    searcher_props = response._nodes.get("SearcherProp")
    if searcher_props:
        searcher_props = [
            sp for sp in searcher_props if (
                sp.GetPropValue("Key") not in non_stable_props and
                all(
                    regex.match(sp.GetPropValue("Key")) is None for regex in non_stable_props_regex
                )
            )
        ]

        def get_key_from_prop(sp):
            return sp.GetPropValue("Key")

        searcher_props.sort(key=get_key_from_prop)

        response._nodes["SearcherProp"] = searcher_props

    def process_debug_info(debug_info):
        if "HostChain" in debug_info._props:
            del debug_info._props["HostChain"]
        if "CPUUsage" in debug_info._props:
            del debug_info._props["CPUUsage"]
        if "NotRespondSourceName" in debug_info._props:
            del debug_info._props["NotRespondSourceName"]
        if "ConfigInfo" in debug_info._nodes:
            del debug_info._nodes["ConfigInfo"]
        if "BinaryBuildTag" in debug_info._nodes:
            del debug_info._nodes["BinaryBuildTag"]

    WalkPath(response, ["DebugInfo"], process_debug_info)

    def process_head(head):
        if "SegmentId" in head._props:
            del head._props["SegmentId"]

    WalkPath(response, ["Head"], process_head)

    def process_document(document):
        def process_archive_info(archive_info):
            gta_related_attrs = archive_info._nodes.get("GtaRelatedAttribute")
            if not gta_related_attrs:
                return

            nodes = []
            for attr in gta_related_attrs:
                key = attr.GetPropValue("Key")
                if key in ["_SearcherHostname", "_MetaSearcherHostname", "_CompressedFactors"]:
                    continue

                if key == "_CompressedAllFactors":
                    value = attr.GetPropValue("Value")
                    attr.SetPropValue("Value", value)
                nodes.append(attr)

            archive_info._nodes["GtaRelatedAttribute"] = nodes

        WalkPath(document, ["ArchiveInfo"], process_archive_info)

        for property_name in ["DocRankingFactors", "SearchInfo"]:
            if property_name in document._props:
                del document._props[property_name]

        first_stage_attr = document._nodes.get("FirstStageAttribute")
        if first_stage_attr:
            del document._nodes["FirstStageAttribute"]
            document._nodes["FirstStageAttribute"] = [
                attr for attr in first_stage_attr if attr.GetPropValue("Key") != "_SearcherHostname"
            ]

    WalkPath(response, ["Grouping", "Group", "Document"], process_document)


class DocData:
    def __init__(self, doc_id, relevance, factors, group_mode, group_attr):
        self.DocId = doc_id
        self.Relevance = relevance
        self.Factors = factors
        self.GroupMode = group_mode
        self.GroupAttr = group_attr


def get_docs_from_response(response, piped_decompressor=None):
    docs = []

    def process_grouping(grouping):
        group_mode = grouping.GetPropValue("Mode", required=False)
        group_attr = grouping.GetPropValue("Attr", required=False)

        def process_document(document):
            relevance = (
                document.GetPropValue("Relevance", required=False) or
                document.GetPropValue("SRelevance", required=False)
            )
            try:
                doc_id = str(document.GetPropValue("DocHash"))  # SEARCH-2286
                if "Route" in document.props():
                    doc_id = str(document.GetPropValue("Route")) + "/" + doc_id
            except Exception:
                doc_id = document.GetPropValue("DocId")

            doc_data = DocData(
                doc_id=doc_id,
                relevance=relevance,
                factors=None,
                group_mode=group_mode,
                group_attr=group_attr,
            )

            def process_gta_related_attr(node):
                if node.GetPropValue("Key") == "_RelevFactors":
                    value = node.GetPropValue("Value")
                    doc_data.Factors = rp.compress_factors(value)
                if node.GetPropValue("Key") == "_CompressedAllFactors":
                    value = node.GetPropValue("Value")
                    doc_data.Factors = decompress_all_factors(value, piped_decompressor)

            WalkPath(document, ["ArchiveInfo", "GtaRelatedAttribute"], process_gta_related_attr)

            docs.append(doc_data)

        WalkPath(grouping, ["Group", "Document"], process_document)

    WalkPath(response, ["Grouping"], process_grouping)

    return docs
