# -*- coding: utf-8 -*-

import urllib
import socket
import cPickle
import os.path
import logging
import json
import re

from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.paths import make_folder
from sandbox.sandboxsdk.channel import channel

from sandbox.sandboxsdk import parameters
from sandbox.projects import resource_types
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import utils
from sandbox.projects.common.base_search_quality import threadPool as TP
from sandbox.projects.common.base_search_quality import basesearch_response_parser as BRP
from sandbox.projects.common.base_search_quality import response_diff_task as RDT
from sandbox.projects.common.base_search_quality import response_diff as RDIFF
from sandbox.projects.common.base_search_quality.tree import htmldiff
from sandbox.projects.common.base_search_quality.tree.node import Node

import sandbox.projects.common.proxy_wizard as PW


def snippet_parser(data, types):
    results = {}
    for doc_idx, doc in enumerate(data['searchdata.docs.*.snippets']):
        for snip_type, snip_list in doc.iteritems():
            if not isinstance(snip_list, list):
                snip_list = [snip_list]

            for snip_idx, snip_data in enumerate(snip_list):
                t = snip_data['type'].strip().strip('/')
                if t in types:
                    key = 'Doc index: {}, Snippet Group Type: {}, Snippet type: {}, Snippet type index: {}'.format(doc_idx, snip_type, t, snip_idx)
                    results[key] = {
                        'snippet': json.dumps(snip_data, sort_keys=True, indent=2, ensure_ascii=False, separators=(',', ': ')),
                        'group': snip_type,
                        'doc_idx': doc_idx,
                        'snip_idx': snip_idx
                    }
    return results


def wizard_parser(data, types):
    results = {}
    for place, wizards in data['wizplaces'].iteritems():
        for wiz_idx, wizard in enumerate(wizards):
            t = wizard['type'].strip().strip('/')
            if t in types:
                key = 'Wizard place: {}, type: {}, num in list: {}'.format(place, t, wiz_idx)
                results[key] = {
                    'snippet': json.dumps(wizard, sort_keys=True, indent=2, ensure_ascii=False, separators=(',', ': ')),
                    'place': place,
                    'wiz_idx': wiz_idx,
                    'wiz_type': t
                }

    return results


DUMP_TYPE = {
    'snippet': {
        'param': 'searchdata.docs.*.snippets',
        'cb': snippet_parser
    },
    'wizard': {
        'param': 'wizplaces',
        'cb': wizard_parser,
    }
}


class DumpTypeParameter(parameters.SandboxSelectParameter):
    name = 'proxy_wizard_dump_type_value'
    description = 'json_dump value param'
    default = 'snippet'
    choices = [(i, i) for i in DUMP_TYPE.iterkeys()]
    group = PW.GROUP_NAME
    required = True


def _convert_to_node(dictObj):
    node = Node()

    for key, value in dictObj.iteritems():
        if isinstance(value, dict):
            node._nodes[key] = [_convert_to_node(value)]
        elif type(value) is unicode or type(value) is str:
            node._props[key] = [str(value)]
        else:
            node._props[key] = [] if value is None else [str(value)]

    return node


class Params(object):
    SaveErrorsInDir = False

    def __init__(self, ctx, mss, port, query):
        self.ctx = ctx
        myquery = []
        if isinstance(query, str) or isinstance(query, unicode):
            for i in query.split('\r'):
                kw = i.strip().split('=', 1)
                if len(kw) == 2 and len(kw[0]):
                    myquery.append(tuple(i.strip().split('=', 1)))
        else:
            for i in query:
                myquery.append(tuple(i))

        self.host = ctx[PW.HostnameParameter.name].strip()
        self.collection = ctx.get(PW.CollectionParameter.name, 'yandsearch').strip()

        myhostport = ':'.join([mss, socket.gethostbyname(socket.gethostname()), port])

        url = 'http://' + self.host + '/' + self.collection + '?'

        myquery.append(('json_dump', DUMP_TYPE[ctx.get(DumpTypeParameter.name, 'snippet')]['param']))
        logging.info("for url_orig: " + str(myquery))
        self.url_orig = url + urllib.urlencode(myquery) + '&'

        myquery.append(('metahost', myhostport))
        logging.info("for url_test: " + str(myquery))
        self.url_test = url + urllib.urlencode(myquery) + '&'

        logging.info(self.url_test)
        logging.info(self.url_orig)

    @staticmethod
    def ProcessQueries(queries, params):
        responses1 = [None for _ in range(len(queries))]
        responses2 = [None for _ in range(len(queries))]

        types = set([i.strip('/') for i in re.split('\s+', params.ctx[PW.CheckSnippetTypesParameter.name])])

        cb = DUMP_TYPE[params.ctx.get(DumpTypeParameter.name, 'snippet')]['cb']

        def fetch(url_prefix, query):
            url = url_prefix.format(tld=query.get('tld', 'ru')) + query['params']
            logging.debug(url)
            data = json.loads(BRP.fetch_data(url, 1000000, 0), encoding='utf-8')
            return cPickle.dumps(_convert_to_node(cb(data, types)), cPickle.HIGHEST_PROTOCOL)

        for (idx, query) in enumerate(queries):
            responses1[idx] = fetch(params.url_test, query)
            responses2[idx] = fetch(params.url_orig, query)

        return responses1, responses2


class QueriesBasedTask(SandboxTask):
    input_parameters = [PW.QueriesResourceParameter]

    def load_queries(self, queries_resource_name=PW.QueriesResourceParameter.name):
        queries = []
        queries_data = []

        queries_resource_path = self.sync_resource(self.ctx[queries_resource_name])
        with open(queries_resource_path) as queries_file:
            for line in queries_file:
                splitted = line.split('\t')
                queries_data.append({'tld': splitted[1].strip(), 'params': splitted[0].strip()})
                queries.append(splitted[0])

        return queries, queries_data

    def write_diff_result(self, queries, responses1, responses2):
        diff_indexes = []

        with utils.TimeCounter('getting compare_result'):
            self.ctx['compare_result'] = BRP.compare_responses(
                responses1, responses2,
                diff_indexes=diff_indexes,
            )

        self.ctx['num_of_diffs'] = len(diff_indexes)

        diff_path = self.get_compare_result_path()
        make_folder(diff_path)
        if not self.ctx['compare_result']:
            changed_props = htmldiff.ChangedProps()

            with utils.TimeCounter('generating full html diff'):
                RDIFF.write_html_diff(
                    queries,
                    responses1,
                    responses2,
                    changed_props,
                    diff_indexes,
                    diff_path=diff_path,
                    queries_per_file=self.ctx[RDT.QueriesPerFileParameter.name],
                    max_diff_size=self.ctx[RDT.MaxDiffSizeParameter.name],
                    max_diff_files=self.ctx[RDT.MaxDiffFilesParameter.name],
                    ctx=self.ctx,
                )

            with utils.TimeCounter('generating changed props'):
                with open(os.path.join(diff_path, 'changed_props.html'), "w") as out_file:
                    changed_props.write(out_file)
        else:
            no_diff_name = os.path.join(diff_path, utils.NO_DIFF_NAME)
            fu.write_file(no_diff_name, utils.no_diff_detected())

        self.mark_resource_ready(self.ctx['compare_resource_id'])


class ProxyWizardResponses(QueriesBasedTask):
    input_parameters = QueriesBasedTask.input_parameters + [
        PW.BinaryProxyWizardParameter,
        PW.GeoDBDataParameter,
        PW.ConfigResourceParameter,
        PW.HostnameParameter,
        PW.CollectionParameter,
        PW.MssResourceParameter,
        PW.CheckSnippetTypesParameter,
        DumpTypeParameter,
        PW.CgiParameter,
        TP.UseMultiprocessing,
        PW.ProcessCountParameter,
        RDT.QueriesPerFileParameter,
        RDT.MaxDiffSizeParameter,
        RDT.MaxDiffFilesParameter,
    ]

    def get_params(self, port):
        return Params(self.ctx, self.ctx[PW.MssResourceParameter.name], port, self.ctx.get(PW.CgiParameter.name, ''))

    def get_compare_result_path(self):
        return channel.sandbox.get_resource(self.ctx['compare_resource_id']).path

    def on_enqueue(self):
        channel.task = self
        resp1_res = self.create_resource(
            self.descr, 'responses1',
            resource_types.PROXY_WIZARD_RESPONSES_RESULT
        )
        self.ctx['responses1_resource_id'] = resp1_res.id

        resp2_res = self.create_resource(
            self.descr, 'responses2',
            resource_types.PROXY_WIZARD_RESPONSES_RESULT
        )
        self.ctx['responses2_resource_id'] = resp2_res.id

        compare_result = self.create_resource(
            self.descr, 'compare_result',
            resource_types.PROXY_WIZARD_RESPONSES_COMPARE_RESULT
        )
        self.ctx['compare_resource_id'] = compare_result.id

    def on_after_execute(self, queries, r1, r2):
        pass

    def on_execute(self):
        resp1_res = channel.sandbox.get_resource(self.ctx['responses1_resource_id'])
        resp2_res = channel.sandbox.get_resource(self.ctx['responses2_resource_id'])

        proxy_bin_file = self.sync_resource(self.ctx['resource_proxy_wizard_bin_id'])
        geodb_file = self.sync_resource(self.ctx[PW.GeoDBDataParameter.name])
        config_file = self.sync_resource(self.ctx['proxy_wizard_config_resource_id'])

        softname = os.path.basename(proxy_bin_file)

        proxy_bin = PW.SearchExecutable(
            softname,
            channel.task.abs_path(),
            proxy_bin_file,
            config_file,
            ctx=self.ctx,
            config_kws={
                'MYDIR': channel.task.abs_path(),
                'GEODB_PATH': geodb_file,
            }
        )
        proxy_bin.start()
        proxy_bin.wait()

        (queries, queries_data) = self.load_queries()
        (responses1, responses2) = proxy_bin.process_queries(queries_data, self.get_params(proxy_bin.port))
        proxy_bin.stop()

        BRP.write_responses(resp1_res.path, responses1)
        BRP.write_responses(resp2_res.path, responses2)

        self.on_after_execute(queries, responses1, responses2)
