# -*- coding: utf-8 -*-

import base64
import logging
import os
import struct
import socket
import time
import urlparse
import urllib
import requests
import copy

from sandbox.common.types.client import Tag

from sandbox import sdk2
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.paths import make_folder
from sandbox.sandboxsdk.parameters import LastReleasedResource
from sandbox.sandboxsdk.parameters import SandboxStringParameter

import sandbox.projects.common.app_host.converter as apphost_converter
from sandbox.projects import resource_types
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common.apihelpers import get_last_released_resource
from sandbox.projects.common.noapacheupper import request as nr
from sandbox.projects.common.noapacheupper import search_component as nsc
from sandbox.projects.common.search import requester
from sandbox.projects.common.search.config import NoapacheupperConfig
from sandbox.projects.websearch.upper import resources as upper_resources
from sandbox.common.fs import get_dir_size


class Params:

    class ReportUrl(SandboxStringParameter):
        name = "report_url"
        description = "Use this upper/report for 'wizardings' requests"
        default_value = "http://hamster.yandex.ru/search?"

    class TextQueries(LastReleasedResource):
        name = "misc_text_queries_resource_id"
        description = "Misc cgi requests (start from / <path>)"
        resource_type = [
            resource_types.MISC_TEXT_QUERIES,
            resource_types.USERS_QUERIES,
        ]
        required = True

    class EvlogdumpExecutable(LastReleasedResource):
        name = 'evlogdump_executable_resource_id'
        description = 'Evlogdump executable'
        resource_type = resource_types.EVLOGDUMP_EXECUTABLE
        required = False

    class ConverterExecutable(LastReleasedResource):
        name = 'converter_executable_resource_id'
        description = 'Converter executable (apphost tool)'
        resource_type = resource_types.APP_HOST_TOOL_CONVERTER_EXECUTABLE
        required = False

    class AdditionalCgiParams(SandboxStringParameter):
        name = "additional_cgi_params"
        description = "Additional cgi params for report (string for trivial append to query for report)"
        default_value = ""

    params = (
        ReportUrl,
        TextQueries,
        EvlogdumpExecutable,
        ConverterExecutable,
        AdditionalCgiParams,
    )


class BuildNoapacheupperRequests2(SandboxTask, object):
    """
        Построение запросов для обстрела noapacheupper, - запросов на report
        с перенаправлением запросов на локально поднятый noapache
        (с cgi параметрами search=remote&srcrwr=UPPER...&pron=req_marker_... для перенаправления запросов
        на наш noapacheupper)
        и выборка 'переколдованных' запросов из лога noapacheupper (ContextCreated/PostBody записи)

        На выходе - ресурсы с использованными пользовательскими запросами
        (не все запросы удаётся переколдовать, некоторые report не пробрасывает на noapache)
        и cgi параметрами запросов к noapacheupper (по запросу на строку).
    """

    type = "BUILD_NOAPACHEUPPER_REQUESTS2"

    input_parameters = nsc.create_noapacheupper_params().params + Params.params
    required_ram = 80 * 1024  # 80 GB

    upper_int_requests_res_id = "upper_int_requests_res_id"
    blender_requests_res_id = "blender_requests_res_id"
    upper_int_requests = "upper_int_requests"
    blender_requests = "blender_requests"
    blender_used_user_queries_res_id = "blender_used_users_queries_res_id"
    blender_used_user_queries_file = "blender_used_users_queries.txt"
    upper_int_used_user_queries_res_id = "upper_int_used_users_queries_res_id"
    upper_int_used_user_queries_file = "upper_int_used_users_queries.txt"
    pron_req_marker = 'req_marker_'

    this_hostname = None

    def __init__(self, task_id=0):
        super(BuildNoapacheupperRequests2, self).__init__(task_id)

    def on_enqueue(self):
        super(BuildNoapacheupperRequests2, self).on_enqueue()
        channel.task = self
        self.ctx['kill_timeout'] = 6 * 60 * 60
        self.ctx[self.upper_int_requests_res_id] = self.create_resource(
            self.descr,
            self.upper_int_requests,
            resource_types.BINARY_REQUESTS_FOR_NOAPACHEUPPER,
            attributes={
                "ttl": 90,
            },
            arch="any",
        ).id
        self.ctx[self.blender_requests_res_id] = self.create_resource(
            self.descr,
            self.blender_requests,
            upper_resources.BinaryRequestsForBlender,
            attributes={
                "ttl": 90,
            },
            arch="any",
        ).id
        self.ctx[self.upper_int_used_user_queries_res_id] = self.create_resource(
            self.descr,
            self.upper_int_used_user_queries_file,
            resource_types.USERS_QUERIES,
            attributes={
                "ttl": 90,
            },
            arch="any",
        ).id
        self.ctx[self.blender_used_user_queries_res_id] = self.create_resource(
            self.descr,
            self.blender_used_user_queries_file,
            resource_types.USERS_QUERIES,
            attributes={
                "ttl": 90,
            },
            arch="any",
        ).id

    def on_execute(self):
        # load config from ctx, create fixed version
        # (remove all exist SearchSource, add one fake SearchSource)
        self.this_hostname = socket.gethostname()
        orig_cfg = self.sync_resource(self.ctx[nsc.Params.Config.name])
        cfg = NoapacheupperConfig.get_config_from_file(orig_cfg)
        cfg.enable_async_search()
        #cfg.text = patch_meta_config_to_fake(cfg.text)
        config_fixed = self.abs_path('config.fixed')
        cfg.save_to_file(config_fixed)

        search_component = nsc.get_noapacheupper(config_file=config_fixed)
        self.current_action("start noapacheupper ...")
        search_component.start()
        search_component.wait()
        logging.info("... noapacheupper started")
        text_queries = channel.sandbox.get_resource(self.ctx[Params.TextQueries.name])
        self.has_misc_requests = text_queries.type == resource_types.MISC_TEXT_QUERIES
        try:
            sent_queries = self.send_queries(
                self.ctx[Params.ReportUrl.name],
                search_component,
                self.sync_resource(self.ctx[Params.TextQueries.name]),
                channel.sandbox.get_resource(self.ctx[Params.TextQueries.name]).type == resource_types.USERS_QUERIES,
            )
        finally:
            search_component.stop()

        len_sended_queries = len(sent_queries)
        if not len_sended_queries:
            eh.check_failed('empty sended requests pool')

        evlogdump_binary = self.sync_resource(self.ctx[Params.EvlogdumpExecutable.name])
        # FIXME: seems converter is not used, or used indirectly
        if self.ctx[Params.ConverterExecutable.name]:
            converter = self.sync_resource(self.ctx[Params.ConverterExecutable.name])
        else:
            dumper_res = get_last_released_resource(resource_types.APP_HOST_TOOL_CONVERTER_EXECUTABLE)
            if not dumper_res:
                eh.check_failed('not found released (apphost) converter tools')
            converter = self.sync_resource(dumper_res.id)

        if not converter:
            eh.check_failed('not found (apphost) converter tools')

        self.current_action("parse binary eventlogs to text format")
        with open(search_component.get_event_log_path(), 'rb') as inp:
            proc = process.run_process(
                [evlogdump_binary, '-i', 'Base64PostBody,AppHostRequest,ErrorMessage'],
                stdin=inp,
                log_prefix="parse_evlog",
                wait=True,
                check=True,
                outputs_to_one_file=False,
            )

        self.current_action("parsed eventlog text and save detected requests to file")

        bad_frame = ''
        sent_queries_copy = copy.deepcopy(sent_queries)

        reqs_and_queries = {'upper_int': {'reqs': [], 'queries': []}, 'blender': {'reqs': [], 'queries': []}}

        len_pron_req_marker = len(self.pron_req_marker)
        with open(proc.stdout_path) as fev:
            for line in fev:
                t = line.rstrip('\n').split('\t')
                query, req, req_marker = None, None, None
                component_apphost = None

                if t[2] == 'ErrorMessage':
                    if t[3].startswith('Caught exception in MakePage'):
                        # ignore bad requests (as sample: "' --")
                        bad_frame = t[1]

                elif len(t) >= 4 and t[2] == 'Base64PostBody':
                    # parse request, extract pron=req_marker_ from request (t[3])
                    req = base64.decodestring(t[3])
                    req = nr.Request(req)
                    pron_vals = req.global_ctx.get('pron', [''])
                    for v in pron_vals:
                        if v.startswith(self.pron_req_marker):
                            req_marker = int(v[len_pron_req_marker:])
                            query = sent_queries_copy.get(req_marker)
                            component_apphost = reqs_and_queries['upper_int']
                            logging.debug('try_find sended requests (Base64PostBody): {} {}'.format(str(query), str(req_marker)))
                            break

                elif len(t) >= 6 and t[2] == 'AppHostRequest':
                    req = base64.decodestring(t[5])

                    # extract pron params (find request marker)
                    json_apphost = apphost_converter.convert_input_request(converter, t[5])
                    for src in json_apphost["answers"]:
                        if query and component_apphost:
                            break

                        if src.get("name") == "UPPER_INT_PARAMS":
                            component_apphost = reqs_and_queries['upper_int']
                            continue
                        if src.get("name") == "BLENDER_PARAMS":
                            component_apphost = reqs_and_queries['blender']
                            continue

                        if not query:
                            results = src["results"]
                            for res in (results if isinstance(results, list) else [results]):
                                ctx = res.get("global_ctx", res.get("params", {}))
                                for pron in ctx.get("pron", []):
                                    if pron.startswith(self.pron_req_marker):
                                        req_marker = int(pron[len_pron_req_marker:])
                                        query = sent_queries_copy.get(req_marker)
                                        logging.debug('try_find sended requests (AppHostRequest): {} {}'.format(str(query), str(req_marker)))
                                        break
                                if query:
                                    break

                if query:
                    if t[1] != bad_frame:
                        component_apphost['reqs'].append(req)
                        component_apphost['queries'].append(query)
                    try:
                        del sent_queries[req_marker]
                    except Exception:
                        pass

        for name, component_apphost in reqs_and_queries.iteritems():
            reqs_folder = channel.sandbox.get_resource(self.ctx["{}_requests_res_id".format(name)]).path
            make_folder(reqs_folder)
            reqs_file = os.path.join(reqs_folder, 'data')
            reqs_index_file = os.path.join(reqs_folder, 'index')
            with open(reqs_index_file, 'w') as fidx, open(reqs_file, 'w') as f:
                next_result_pos = 0
                for apphost_req in component_apphost['reqs']:
                    fidx.write(struct.pack('Q', next_result_pos))
                    next_result_pos += 4 + len(apphost_req)
                    requester.write_binary_data(f, apphost_req)

        for id, queries_list in [
            (self.blender_used_user_queries_res_id, reqs_and_queries['blender']),
            (self.upper_int_used_user_queries_res_id, reqs_and_queries['upper_int']),
        ]:
            used_user_queries_filename = channel.sandbox.get_resource(self.ctx[id]).path
            with open(used_user_queries_filename, 'w') as uuq:
                for used_user_query in queries_list['queries']:
                    print >> uuq, used_user_query

        # not all requests can be transitted to tdi, so enable lost <1%
        left_unprocessed_cnt_queries = len(sent_queries)

        if left_unprocessed_cnt_queries:
            for k, v in sent_queries.iteritems():
                logging.debug('left [{}]: {}'.format(k, v))
        if (left_unprocessed_cnt_queries/float(len_sended_queries)) > 0.01:
            eh.check_failed('has to many unprocessed {} reqs (>1%)'.format(left_unprocessed_cnt_queries))

        # прописываем ресурсам с переколдованными запросами в атрибуты id ресурса с переупорядоченными запросами
        # (их человечнее отображать в diff results файлах)
        for res, queries_res in [
            (self.upper_int_requests_res_id, self.upper_int_used_user_queries_res_id),
            (self.blender_requests_res_id, self.blender_used_user_queries_res_id),
        ]:
            channel.sandbox.set_resource_attribute(
                self.ctx[res],
                Params.TextQueries.name,
                self.ctx[queries_res]
            )
            # SEARCH-6786
            reqs_folder = channel.sandbox.get_resource(self.ctx[res]).path
            eh.ensure(
                get_dir_size(str(reqs_folder)) > 100,
                "Requests size is too small",
            )

    def send_queries(self, url, search_component, fname, users_queries=False):
        # dirty hack for support not misc type (not startswith('/') apphost requests
        if self.has_misc_requests:
            # remove path from url
            uuu = urlparse.urlparse(url)
            url = urlparse.urlunparse((uuu[0], uuu[1], '', '', '', ''))
        else:
            if '?' not in url:
                url += '?'

        additional_cgi_params = "search=remote&rearr=dump_blender_request&srcrwr=UPPER:{}:{}&{}&{}&{}&{}".format(
            self.this_hostname,
            search_component.get_port(),
            "srcrwr=METASEARCH:inproc://localhost/_subhost/sync_metasearch",
            "rearr=dump_hr_props",
            "srcrwr=APP_HOST_WEB:::1500",
            "srcrwr=SRC_SETUP:::200",
        )
        input_acp = self.ctx[Params.AdditionalCgiParams.name]
        if input_acp:
            additional_cgi_params += '&'
            additional_cgi_params += input_acp

        sent_queries = {}
        with open(fname) as f:
            cnt = 0
            for line in f:
                if cnt % 100 == 0:
                    self.current_action("ask queries via report (processed {} queries)".format(cnt))
                line = line.rstrip('\n')
                tk = line.split('\t')
                req = tk[0]
                # req_descr = tk[1] if len(tk) >= 2 else None
                if users_queries:
                    text = urllib.quote(tk[0], '')
                    lr = tk[1] if len(tk) >= 2 else "213"
                    qurl = "{}&pron={}{}&search=remote&text={}&lr={}&{}".format(url, self.pron_req_marker, cnt, text, lr, additional_cgi_params)
                else:
                    qurl = "{}{}&pron={}{}&{}".format(url, req, self.pron_req_marker, cnt, additional_cgi_params)
                try_count = 0
                while True:  # 'zelo.yandex.ru' often reject(or return empty empty response) requests, so make few try
                    logging.info("send[{}/{}]: {}".format(cnt, try_count, qurl))
                    try_count += 1
                    try:
                        requests.get(qurl, timeout=30)
                        break
                    except:
                        if try_count > 7:
                            raise
                        time.sleep(1)

                if cnt % 100 == 0:
                    self.current_action("send to report {} requests".format(cnt))
                sent_queries[cnt] = line
                logging.debug('add sended request: {} {}'.format(cnt, line))
                cnt += 1
        return sent_queries


def patch_meta_config_to_fake(text):
    fixed_text = ""
    fake_source_created = False
    inside_source_section = False
    for line in text.split('\n'):
        sl = line.rstrip('\n').lstrip(' ')
        if sl.startswith("ReArrangeOptions"):  # disable all rearranges (prevent segfault in fresh rearr, etc)
            continue
        if sl.startswith("</SearchSource>"):
            inside_source_section = False
            if not fake_source_created:
                fake_source_created = True
                fixed_text += """
    #fake source for possibility start search
    <SearchSource>
        ServerDescr FAKE
        CgiSearchPrefix http://localhost:4/yandsearch?@500
        Options MaxAttempts=1,ProtocolType=proto,TimeOut=50000
    </SearchSource>
"""
            continue
        elif sl.startswith("<SearchSource>"):
            inside_source_section = True
            continue
        elif inside_source_section:
            continue

        fixed_text += line + '\n'
    return fixed_text


__Task__ = BuildNoapacheupperRequests2


def create_build_noapacheupper_requests2(
    report_url,
    text_queries_resource_id,
    resources,
    descr,
    additional_cgi_params=None,
    noapacheupper_apphost_mode=False,
    verify_stderr=True,
    current_sdk2_task=None,
    space=30*1024,
):
    sub_ctx = {
        Params.EvlogdumpExecutable.name:  resources['EVLOGDUMP_EXECUTABLE'],
        Params.ReportUrl.name: report_url,
        Params.TextQueries.name: text_queries_resource_id,
        nsc.Params.Binary.name: resources['NOAPACHE_UPPER'],
        nsc.Params.Config.name: resources['NOAPACHEUPPER_CONFIG'],
        nsc.Params.RearrangeData.name: resources['REARRANGE_DATA'],
        nsc.Params.RearrangeDynamicData.name: resources['REARRANGE_DYNAMIC_DATA'],
        nsc.Params.RearrangeDataFast.name: resources['REARRANGE_DATA_FAST'],
        nsc.Params.AppHostMode.name: noapacheupper_apphost_mode,
        'verify_stderr': verify_stderr,
    }
    if additional_cgi_params:
        sub_ctx[Params.AdditionalCgiParams.name] = additional_cgi_params

    logging.info("create {}({})".format(BuildNoapacheupperRequests2.type, str(sub_ctx)))
    if current_sdk2_task:
        task = sdk2.Task[BuildNoapacheupperRequests2.type](
            current_sdk2_task,
            description=descr,
            arch="linux",
            priority=current_sdk2_task.Parameters.priority,
            **sub_ctx
        )
        task.Requirements.disk_space = space
        return task.save().enqueue()
    else:
        return channel.task.create_subtask(
            task_type=BuildNoapacheupperRequests2.type,
            description=descr,
            input_parameters=sub_ctx,
            execution_space=space,
            arch="linux",
        )
