# -*- coding: utf-8 -*-

import six
import copy
import json
import logging
import urllib
import urllib2
import time

from sandbox.projects import resource_types
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk import errors as se
from sandbox.sandboxsdk import parameters as sp
from sandbox.sandboxsdk.sandboxapi import ARCH_ANY

from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import utils
from sandbox.projects.common import dolbilka
from sandbox.projects.common.search import queries as sq
from sandbox.projects.common.search.response import cgi as response_cgi

from . import basesearch_response_parser
from . import threadPool
from .qtree_sample import get_qtreedbg_sample_value


class QueriesParameter(sp.ResourceSelector):
    name = 'queries_resource_id'
    description = 'Queries'
    resource_type = sq.QUERIES_RESOURCE_TYPES
    group = "CGI params"
    required = True


class ParseAndCheckResponseParameter(sp.SandboxBoolParameter):
    name = 'parse_and_check_response'
    description = 'Parse and check responses'
    group = "CGI params"
    default_value = True


class MaxQueriesParameter(sp.SandboxIntegerParameter):
    name = 'max_queries'
    description = 'Max source search queries to get'
    group = "CGI params"
    default_value = 100


class MaxFetchQueriesParameter(sp.SandboxIntegerParameter):
    name = 'max_fetch_queries'
    description = 'Max source snippet queries to get'
    group = "CGI params"
    default_value = 100


class UseQueriesWithFoundDocsParameter(sp.SandboxBoolParameter):
    name = 'use_queries_with_found_docs'
    description = 'Use only queries with found docs'
    group = "CGI params"
    default_value = False


class ResponseTimeoutParameter(sp.SandboxIntegerParameter):
    name = 'response_timeout'
    description = 'Response timeout'
    group = "CGI params"
    default_value = 0


class SaveCgiInfoParameter(sp.SandboxBoolParameter):
    name = 'save_cgi_info'
    description = 'Save CGI params info'
    group = "CGI params"
    default_value = False


class SaveTestingQueriesParameter(sp.SandboxBoolParameter):
    name = 'save_testing_queries'
    description = 'Save testing queries info'
    group = "CGI params"
    default_value = False


class SavePlanParameter(sp.SandboxBoolParameter):
    name = 'save_plan'
    description = 'Save dolbilka plan'
    group = "CGI params"
    default_value = False


class UseOnlyTheseParams(sp.SandboxStringParameter):
    name = 'cgi_params_to_use'
    description = 'Use only these cgi parameters:'
    group = "CGI params"
    default_value = ''


class UseDolbilka(sp.SandboxBoolParameter):
    name = 'use_dolbilka'
    description = "Use dolbilka instead of threading"
    group = "CGI params"
    default_value = False


class CgiDiagonalMode(sp.SandboxBoolParameter):
    name = 'cgi_diagonal_mode'
    description = "Use fast diagonal CGI test method"
    group = "CGI params"
    default_value = False


PARAMS = (
    QueriesParameter,
    UseOnlyTheseParams,
    ParseAndCheckResponseParameter,
    MaxQueriesParameter,
    MaxFetchQueriesParameter,
    UseQueriesWithFoundDocsParameter,
    ResponseTimeoutParameter,
    SaveCgiInfoParameter,
    SaveTestingQueriesParameter,
    SavePlanParameter,
    UseDolbilka,
    CgiDiagonalMode,
) + threadPool.PARAMS


class _Params(object):
    def __init__(self, component, parse_and_check_response, response_timeout):
        self.component = component
        self.parse_and_check_response = parse_and_check_response
        self.response_timeout = response_timeout


def _get_queries_with_found_docs(component, queries, max_queries=0):
    logging.info("use only queries with found docs")

    if max_queries == 0:
        max_queries = len(queries)
    result = []

    for query in queries:
        response_as_str, parsed_response, checking_result = _fetch_response(
            component, query, parse_and_check_response=True
        )
        docs = basesearch_response_parser.get_docs_from_response(parsed_response)
        logging.info("Found %s docs", len(docs))

        if docs:
            result.append(query)
        else:
            logging.info("ignore query")

        if len(result) == max_queries:
            break

    logging.info("result: %s queries", len(result))
    return result


def _get_queries(ctx, component, count=None):
    queries_file = channel.task.sync_resource(ctx[QueriesParameter.name])
    queries = fu.read_lines(queries_file)

    search_queries, fetch_queries = _divide_queries(queries)

    def do(queries_list, max_queries):
        if ctx[UseQueriesWithFoundDocsParameter.name]:
            return _get_queries_with_found_docs(component, queries_list, max_queries)
        else:
            if max_queries > 0:
                return queries_list[:max_queries]
            return queries_list

    logging.info("Filter search queries")
    if count is None:
        count = utils.get_or_default(ctx, MaxQueriesParameter)
    search_queries = do(search_queries, count)

    logging.info("Filter fetch queries")
    if count is None:
        count = utils.get_or_default(ctx, MaxFetchQueriesParameter)
    fetch_queries = do(fetch_queries, count)

    return search_queries, fetch_queries


def _divide_queries(queries):
    div = ([], [])
    for q in queries:
        div[q.find('&dh=') != -1].append(q)
    return div


def _join_query(parsed_query):
    return '?' + urllib.urlencode(parsed_query, doseq=True)


_SUBPARAM_TYPE_EXACT, _SUBPARAM_TYPE_PREFIX, _SUBPARAM_TYPE_SUFFIX = range(3)
class _SubParamValue(object):
    def __init__(self, name, value, type_):
        self.name = name
        self.value = value
        self.type_ = type_

def _mixin_cgi_param_values(parsed_query, param_name, param_values):
    parsed_query_copy = copy.deepcopy(parsed_query)

    if param_name in parsed_query_copy:
        parsed_query_copy.pop(param_name)

    queries = [
        (_join_query(parsed_query_copy), 'removed param "{}"'.format(param_name))
    ]

    splitted_original = None
    filtered_original = None
    filtered_for = None
    for value in param_values:
        if isinstance(value, _SubParamValue):
            subparam_name = value.name
            subparam_value = value.value
            subparam_type = value.type_
            if splitted_original is None:
                splitted_original = []
                for original in parsed_query.get(param_name, ''):
                    splitted_original += original.split(';')
            if filtered_for != subparam_name:
                filtered_for = subparam_name
                if subparam_type == _SUBPARAM_TYPE_PREFIX:
                    filtered_original = [x for x in splitted_original if not x.startswith(subparam_name)]
                elif subparam_type == _SUBPARAM_TYPE_SUFFIX:
                    filtered_original = [x for x in splitted_original if not x.endswith(subparam_name)]
                else:
                    filtered_original = [x for x in splitted_original if x != subparam_name and not x.startswith(subparam_name + '=')]
            if subparam_value is None:
                parsed_query_copy[param_name] = [';'.join(filtered_original)]
                queries.append(
                    (_join_query(parsed_query_copy), 'removed param "{}.{}"'.format(param_name, subparam_name))
                )
            else:
                filtered_original.append(subparam_value)
                parsed_query_copy[param_name] = [';'.join(filtered_original)]
                queries.append(
                    (_join_query(parsed_query_copy), u'set {}.{}'.format(unicode(param_name), unicode(subparam_value)))
                )
                filtered_original.pop()
        else:
            parsed_query_copy[param_name] = value
            queries.append(
                (_join_query(parsed_query_copy), u'set {}="{}"'.format(unicode(param_name), unicode(value)))
            )

    return queries


def _mixin_cgi_params(queries_with_type, generator):
    result_queries = []

    def process_query(q):
        result_queries.extend(generator(q))

    for query, is_search_query in queries_with_type:
        parsed_query = sq.parse_cgi_params(query)

        if is_search_query:
            parsed_query['haha'] = ['da']
            process_query(parsed_query)

            del parsed_query['haha']
            process_query(parsed_query)
        else:
            process_query(parsed_query)

    return result_queries


def _mixin_cgi_params_optimized(queries_with_type, generator_optimized):
    parsed_queries = []

    # build parsed queries list
    for query, is_search_query in queries_with_type:
        parsed_query = sq.parse_cgi_params(query)

        if is_search_query:
            parsed_query['haha'] = ['da']
            parsed_queries.append(parsed_query)

            del parsed_query['haha']
            parsed_queries.append(parsed_query)
        else:
            parsed_queries.append(parsed_query)

    # generate
    return generator_optimized(parsed_queries)


def _save_testing_queries(name, queries_with_info):
    testing_queries_res = channel.task.create_resource(
        'Annotated {} param requests'.format(name),
        '{}_annotated_queries.txt'.format(name),
        resource_types.OTHER_RESOURCE,
        arch=ARCH_ANY,
        attributes={"ttl": 5},
    )

    testing_plan_res = channel.task.create_resource(
        '{} param requests plan'.format(name),
        '{}_plain_text_queries.txt'.format(name),
        resource_types.PLAIN_TEXT_QUERIES,
        arch=ARCH_ANY,
        attributes={"ttl": 5},
    )

    with open(testing_queries_res.path, 'w') as f:
        for query, info in queries_with_info:
            f.write('# ' + info + '\n')
            f.write(query + '\n')
    channel.task.mark_resource_ready(testing_queries_res)

    with open(testing_plan_res.path, 'w') as f:
        for query, info in queries_with_info:
            f.write(query + '\n')
    channel.task.mark_resource_ready(testing_plan_res)


def _save_plan(name, queries):
    queries_file_name = 'generated_cgi_queries.txt'
    fu.write_lines(queries_file_name, queries)
    plan_resource = channel.task.create_resource(
        'Dolbilka plan {}'.format(name),
        '{}_generated_plan.txt'.format(name),
        resource_types.BASESEARCH_PLAN,
        arch=ARCH_ANY,
        attributes={"ttl": 5},
    )
    dolbilka.convert_queries_to_plan(queries_file_name, plan_resource.path)
    channel.task.mark_resource_ready(plan_resource)


def _thread_query_func(queries, params):
    for query in queries:
        _fetch_response(params.component, query, params.response_timeout, params.parse_and_check_response)


def _fetch_response(component, query, response_timeout=None, parse_and_check_response=True):
    # FIXME(mvel): Use component.fetch() instead
    url = 'http://localhost:{0}/yandsearch'.format(component.port) + query

    if response_timeout is not None and response_timeout > 0:
        url = response_cgi.force_timeout(url, response_timeout)
    if parse_and_check_response:
        url += '&hr=da'

    try:
        return basesearch_response_parser.fetch_response(
            url,
            parse_and_check_response=parse_and_check_response,
            ignore_empty_response=True,
            # skip all queries that give us errors, see https://st.yandex-team.ru/SEARCH-955#1507655504000
            ignore_got_error=parse_and_check_response,
            raise_http_errors=True,
        )
    except Exception as e:
        if isinstance(e, urllib2.HTTPError) or isinstance(e, se.SandboxTaskFailureError):
            if isinstance(e, urllib2.HTTPError) and e.getcode() == 400:
                pass
            else:
                eh.check_failed(
                    "Cannot fetch response for url\n"
                    "{0}\n"
                    "Exception:\n{1}".format(url, eh.shifted_traceback())
                )
        else:
            raise


def _test_params(ctx, component, name, generator, optimized=False, chunked_generator=None):
    """
        Сгенерировать запросы с помощью указанного генератора
        и протестировать с ними базовый
    """

    # 3k queries is enough for mixing with all possible cgi params
    # (necessary count for diagonal mode is approximately 1100)
    count = 3000 if optimized else None
    search_queries, fetch_queries = _get_queries(ctx, component, count=count)

    with utils.TimeCounter("mixing params"):
        typed_queries = [(x, True) for x in search_queries] + [(x, False) for x in fetch_queries]
        if optimized:
            # diagonal mode
            queries_with_info = _mixin_cgi_params_optimized(typed_queries, generator)
        else:
            # old algo
            # Python multiprocessing fails ('bad message length')
            # if very large data needs to be transferred;
            # test_cgi_params has a very large multiplier for queries count,
            # allow the caller to split multiprocessing calls in chunks
            if chunked_generator:
                queries_with_info = []
                for g in chunked_generator:
                    queries_with_info += threadPool.process_data(
                        func=lambda queries_with_type, _: _mixin_cgi_params(queries_with_type, g),
                        list_to_process=typed_queries,
                        params=None,
                        ctx=ctx
                    )
            else:
                def _thread_mixin_func(queries_with_type, _):
                    # _ is for params (not used)
                    return _mixin_cgi_params(queries_with_type, generator)

                queries_with_info = threadPool.process_data(
                    func=_thread_mixin_func,
                    list_to_process=typed_queries,
                    params=None,
                    ctx=ctx,
                )

    logging.info("Generated %s queries of type %s", len(queries_with_info), name)

    if utils.get_or_default(ctx, SaveTestingQueriesParameter):
        _save_testing_queries(name, queries_with_info)

    queries = [q for q, info in queries_with_info]
    parse_responses = utils.get_or_default(ctx, ParseAndCheckResponseParameter)

    if utils.get_or_default(ctx, SavePlanParameter):
        _save_plan(name, queries)

    start = time.time()
    num_queries = len(queries)
    if utils.get_or_default(ctx, UseDolbilka):
        logging.info("Using dolbilka mode, response timeout will be ignored")
        queries = [response_cgi.remove_timeout(q) for q in queries]
        eh.ensure(not parse_responses, "Cannot parse responses in dolbilka mode. ")
        queries_file_name = 'generated_cgi_queries.txt'
        plan_file_name = 'generated_cgi_plan.dat'
        fu.write_lines(queries_file_name, queries)
        del queries  # let's hope this will free some memory for middlesearch
        dolbilka.convert_queries_to_plan(queries_file_name, plan_file_name)
        d_executor = dolbilka.DolbilkaExecutor()
        d_executor.requests = num_queries
        d_executor.run_session_and_dumper(
            plan_file_name,
            component,
            session_name='cgi',
            run_once=True,
        )
    else:
        threadPool.process_data(
            func=_thread_query_func,
            list_to_process=queries,
            params=_Params(component, ctx[ParseAndCheckResponseParameter.name], ctx[ResponseTimeoutParameter.name]),
            ctx=ctx
        )

    return num_queries / (time.time() - start)


def _get_cgi_info(component, save_info=False):
    """
        Возвращает метаданные по cgi-параметрам, которые сообщил базовый
    """
    cgi_meta = None
    cgi_meta_raw = None
    try:
        cgi_meta_raw, cgi_meta_parsed, cgi_meta_error = _fetch_response(
            component, '?info=cgimetadata', parse_and_check_response=False,
        )
        cgi_meta = json.loads(cgi_meta_raw)
    except Exception as err:
        eh.log_exception("Cannot fetch CGI metadata:\n{}".format(cgi_meta_raw), err)
        eh.check_failed("Cannot fetch CGI metadata: {}, see logs for details".format(err))

    if save_info:
        cgi_info_resource = channel.task.create_resource(
            'cgi metadata',
            'cgi_info.txt',
            resource_types.OTHER_RESOURCE,
            ARCH_ANY,
            attributes={"ttl": 5},
        )
        fu.write_file(cgi_info_resource.path, cgi_meta_raw)
        channel.task.mark_resource_ready(cgi_info_resource)

    return cgi_meta


def _get_complex_param_values(param):
    if param['is_single_cgi']:
        exact_subparams = [p for p in param['exact_subparams'] if 'values' in p]
        values = range(max([0] + [len(p['values']) for p in exact_subparams]))

        for i in values:
            sub_param_with_values = []

            for sub_param in exact_subparams:
                vv = sub_param['values']
                sub_param_with_values.append(u'{0}={1}'.format(unicode(sub_param['name']), unicode(vv[i % len(vv)])))

            values[i] = ';'.join(sub_param_with_values)
    else:
        values = []
        for sub_param in param['exact_subparams']:
            values.append(_SubParamValue(sub_param['name'], None, _SUBPARAM_TYPE_EXACT))
            for value in ['', 'foobar'] + sub_param.get('values', []):
                values.append(_SubParamValue(sub_param['name'], sub_param['name'] + '=' + value, _SUBPARAM_TYPE_EXACT))

        for prefix_sub in param['prefix_subparams']:
            values.append(_SubParamValue(prefix_sub['name'], None, _SUBPARAM_TYPE_PREFIX))
            for value in ['', 'foobar'] + prefix_sub.get('values', []):
                values.append(_SubParamValue(prefix_sub['name'], prefix_sub['name'] + value, _SUBPARAM_TYPE_PREFIX))

        for suffix_sub in param['suffix_subparams']:
            values.append((suffix_sub['name'], None, _SUBPARAM_TYPE_SUFFIX))
            for value in ['', 'foobar'] + suffix_sub.get('values', []):
                values.append((suffix_sub['name'], value + suffix_sub['name'], _SUBPARAM_TYPE_SUFFIX))

    return values


def _update_cgi_meta(cgi_params, cgi_meta):
    list_of_params = cgi_params.split("&")
    complex_params = [param.split("->") for param in list_of_params if "->" in param]
    simple_params = [param for param in list_of_params if "->" not in param]

    new_cgi_meta = {
        'cgi_params': [],
        'complex_params': []
    }

    for p in cgi_meta['cgi_params']:
        if p["name"] in simple_params:
            new_cgi_meta['cgi_params'].append(p)

    for head_param in cgi_meta['complex_params']:
        for name in complex_params:
            if len(name) <= 2 and head_param["name"] == name[0]:
                new_cgi_meta['complex_params'].append(head_param)
                if name[1] != "":
                    subnames = name[1].split("#")
                    types_subparam = ["prefix_subparams", "suffix_subparams", "exact_subparams"]
                    for type_subparam in types_subparam:
                        new_cgi_meta['complex_params'][-1][type_subparam] = [
                            k for k in new_cgi_meta['complex_params'][-1][type_subparam] if k["name"] in subnames
                        ]

    if not new_cgi_meta['cgi_params']:
        new_cgi_meta['cgi_params'] = cgi_meta['cgi_params']

    if not new_cgi_meta['complex_params']:
        new_cgi_meta['complex_params'] = cgi_meta['complex_params']

    return new_cgi_meta


def test_cgi_params(ctx, component):
    """
        Протестировать поиск (базовый/средний) с различными значениями cgi-параметров
    """
    cgi_meta = _get_cgi_info(component, ctx[SaveTestingQueriesParameter.name])
    use_these_params = utils.get_or_default(ctx, UseOnlyTheseParams)
    if use_these_params:
        cgi_meta = _update_cgi_meta(use_these_params, cgi_meta)

    # August 2021: number of parameters and query length have finally grown too large
    # for Python multiprocessing, split generator in chunks
    # (chunks are not exactly equivalent since every param has its own cost, but whatever)
    num_chunks = (len(cgi_meta['cgi_params']) + len(cgi_meta['complex_params'])) // 20 + 1
    assert num_chunks > 0
    logging.info('test_cgi_params: {} simple params, {} complex params, using {} chunks'.format(
        len(cgi_meta['cgi_params']), len(cgi_meta['complex_params']), num_chunks))

    def _generate_cgi(parsed_query, chunk):
        logging.info('generating chunk {}'.format(chunk))
        result_queries = []

        pcount = len(cgi_meta['cgi_params'])
        for param in cgi_meta['cgi_params'][pcount*chunk//num_chunks : pcount*(chunk+1)//num_chunks]:
            logging.info("Testing param '%s'...", param['name'])
            eh.ensure('values' in param, "No 'values' in param {}, CGI metadata is corrupt".format(param['name']))
            result_queries += _mixin_cgi_param_values(parsed_query, param['name'], ['', 'foobar'] + param['values'])

        pcount = len(cgi_meta['complex_params'])
        for param in cgi_meta['complex_params'][pcount*chunk//num_chunks : pcount*(chunk+1)//num_chunks]:
            values = _get_complex_param_values(param)
            result_queries += _mixin_cgi_param_values(parsed_query, param['name'], ['', 'foobar'] + values)

        if chunk == 0:
            result_queries += _mixin_cgi_param_values(parsed_query, 'qtreedbg', ['', 'foobar', get_qtreedbg_sample_value()])

        return result_queries

    chunked_generator = [lambda parsed_query, chunk=i: _generate_cgi(parsed_query, chunk) for i in range(num_chunks)]
    return _test_params(ctx, component, 'cgi', None, chunked_generator=chunked_generator)


def test_cgi_params_optimized(ctx, component):
    """
        Протестировать поиск (базовый/средний) с различными значениями cgi-параметров.
        Оптимизированная версия теста, см. SEARCH-3253
    """
    cgi_meta = _get_cgi_info(component, utils.get_or_default(ctx, SaveTestingQueriesParameter))
    use_these_params = utils.get_or_default(ctx, UseOnlyTheseParams)
    if use_these_params:
        cgi_meta = _update_cgi_meta(use_these_params, cgi_meta)

    def _generate_cgi_optimized(parsed_queries):
        result_queries = []

        eh.ensure(
            len(parsed_queries) > len(cgi_meta['cgi_params']) + len(cgi_meta['complex_params']),
            "Not enough queries to cast cgi spell. Please give me more source queries to mix in. "
        )

        source_queries_index = 0
        for param in cgi_meta['cgi_params']:
            parsed_query = parsed_queries[source_queries_index]
            logging.info(
                "Mixing regular param '%s'... with source query index %s, queries count %s",
                param['name'], source_queries_index, len(result_queries),
            )
            eh.ensure('values' in param, "No 'values' in param {}, CGI metadata is corrupt".format(param['name']))
            result_queries += _mixin_cgi_param_values(parsed_query, param['name'], ['', 'foobar'] + param['values'])
            source_queries_index += 1

        for param in cgi_meta['complex_params']:
            parsed_query = parsed_queries[source_queries_index]
            values = _get_complex_param_values(param)
            logging.info(
                "Mixing complex param '%s'... with source query index %s, queries count %s",
                param['name'], source_queries_index, len(result_queries),
            )
            result_queries += _mixin_cgi_param_values(parsed_query, param['name'], ['', 'foobar'] + values)
            source_queries_index += 1

        logging.info("Mixed %s queries before qtreedbg", len(result_queries))

        parsed_query = parsed_queries[source_queries_index]
        result_queries += _mixin_cgi_param_values(parsed_query, 'qtreedbg', ['', 'foobar', get_qtreedbg_sample_value()])

        return result_queries

    return _test_params(ctx, component, 'cgi', _generate_cgi_optimized, optimized=True)


def _get_gta_info(component, save_info=False):
    """
        Возвращает метаданные по gta-параметрам, которые сообщил базовый
    """
    try:
        gta_meta_raw, gta_meta_parsed, gta_meta_error_ = _fetch_response(
            component, '?info=gtametadata:json', parse_and_check_response=False
        )
        gta_meta = json.loads(gta_meta_raw)

        if 'GtaValues' not in gta_meta or 'rows' not in gta_meta['GtaValues']:
            raise Exception('Invalid response from server')
        gta_meta = list(set(item["cells"][0] for item in gta_meta['GtaValues']['rows']))

    except ValueError:  # fallback to old-style info-request
        gta_meta_raw = _fetch_response(component, '?info=gtavalues', parse_and_check_response=False)[0]
        gta_meta = gta_meta_raw.splitlines()

    if save_info:
        gta_info_resource = channel.task.create_resource(
            'gta metadata',
            'gta_info.txt',
            'OTHER_RESOURCE',
            ARCH_ANY,
            attributes={"ttl": 5},
        )
        fu.write_file(gta_info_resource.path, gta_meta_raw)
        channel.task.mark_resource_ready(gta_info_resource)

    return gta_meta


def test_gta_params(ctx, component):
    """
        Протестировать поисковый компонент (например, базовый поиск)
        с различными значениями gta-параметров
    """

    gta_meta = _get_gta_info(component, ctx[SaveTestingQueriesParameter.name])
    gta_param_values = [
        [gta_meta[j] for j in six.moves.xrange(i)]
        for i in six.moves.xrange(len(gta_meta))
    ]

    def _generate_gta(parsed_query):
        return (_mixin_cgi_param_values(parsed_query, 'gta', gta_param_values) +
                _mixin_cgi_param_values(parsed_query, 'fsgta', gta_param_values))

    return _test_params(ctx, component, 'gta', _generate_gta)


def test_params(ctx, component, optimized=False):
    """
        Протестировать поисковый компонент (например, базовый поиск)
        с различными значениями cgi- и gta-параметров
    """

    if optimized:
        cgi_rps = test_cgi_params_optimized(ctx, component)
    else:
        cgi_rps = test_cgi_params(ctx, component)

    gta_rps = test_gta_params(ctx, component)
    ctx['rps'] = (cgi_rps + gta_rps) / 2
