import contextlib
import itertools
import json
import logging
import multiprocessing
import urllib
import urllib2
import urlparse
import xml.etree.ElementTree as xmlTree

from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk import sandboxapi
from sandbox.sandboxsdk import task

from sandbox.projects import resource_types
from sandbox.projects.common import decorators
from sandbox.projects.common import dolbilka
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import string
from sandbox.projects.common import utils
from sandbox.projects.app_host import resources as app_host_resources
from sandbox.projects.common.dolbilka import resources as dolbilka_resources


_DEFAULT_CGI_PARAMS = "&".join((
    "no-tests=da",
    "nocache=da",
))


class ReportUrl(parameters.SandboxStringParameter):
    name = "report_url"
    description = "Use this upper/report to get middle search requests"
    default_value = "https://hamster.yandex.ru/images/search/?"


class XmlReport(parameters.SandboxBoolParameter):
    name = 'xml_type'
    default_value = False
    description = "Using XML report"


class TextQueries(parameters.ResourceSelector):
    name = "text_queries_resource_id"
    description = "Users text queries (optionaly can have region-id after <TAB>)"
    resource_type = resource_types.USERS_QUERIES
    required = True


class AdditionalCgiParams(parameters.SandboxStringParameter):
    name = "additional_cgi_params"
    description = "Additional cgi params for report (string for trivial append to query for report)"


class AdditionalAttributes(parameters.SandboxStringParameter):
    name = 'additional_attributes'
    description = 'Set additional attrs to resources (ex.: attr1=v1, attr2=v2)'
    do_not_copy = True


class SourceName(parameters.SandboxStringParameter):
    name = "sub_source_name"
    description = "Source name (comma separated)"
    default_value = "IMAGES"
    required = True


class PoolSizeParameter(parameters.SandboxIntegerParameter):
    name = 'pool_size'
    description = 'Working pool size'
    default_value = 5


class MinimalResultRequestsParameter(parameters.SandboxIntegerParameter):
    name = 'minimal_result_requests'
    description = 'Minimum requests count to allow'
    default_value = 1


class ImagesGenerateMiddlesearchRequests(task.SandboxTask):
    """
        Generate middlesearch requests from user queries

        Sent request to specified server, dump eventlog and extract middlesearch queries from it
    """

    type = "IMAGES_GENERATE_MIDDLESEARCH_REQUESTS"

    input_parameters = (
        ReportUrl,
        XmlReport,
        TextQueries,
        SourceName,
        AdditionalCgiParams,
        AdditionalAttributes,
        PoolSizeParameter,
        MinimalResultRequestsParameter,
    )

    def on_enqueue(self):
        task.SandboxTask.on_enqueue(self)
        attributes = string.parse_attrs(utils.get_or_default(self.ctx, AdditionalAttributes))

        self.create_resource(
            self.descr,
            self.__get_output_path(),
            self.__get_output_resource(),
            attributes=attributes,
            arch=sandboxapi.ARCH_ANY
        )
        self.create_resource(
            self.descr,
            self.__get_output_path(binary=True),
            self.__get_output_resource(binary=True),
            attributes=attributes,
            arch=sandboxapi.ARCH_ANY
        )

    def on_execute(self):
        report_url = self.ctx[ReportUrl.name]
        users_queries_filename = self.sync_resource(self.ctx[TextQueries.name])
        additional_cgi_params = self.ctx[AdditionalCgiParams.name]
        source_name = self.ctx[SourceName.name].split(",")
        xml_mode = self.ctx[XmlReport.name]
        pool = multiprocessing.Pool(utils.get_or_default(self.ctx, PoolSizeParameter))

        if additional_cgi_params:
            eh.verify(additional_cgi_params.startswith('&'), "cgi params must start with '&'")
        if xml_mode:
            additional_cgi_params += "&xml_dump=eventlog"
        else:
            additional_cgi_params += "&json_dump=eventlog"
        #  https://st.yandex-team.ru/IMAGESUI-8188
        additional_cgi_params += ''.join(['&dump_source_request={}'.format(src) for src in source_name])

        with open(users_queries_filename) as queries_file:
            with open(self.__get_output_path(intermediate=not xml_mode), 'w') as request_file:
                queries_iterator = itertools.izip(
                    itertools.repeat(report_url),
                    itertools.repeat(source_name),
                    itertools.repeat(additional_cgi_params),
                    itertools.repeat(xml_mode),
                    queries_file
                )
                requests_count = 0
                for output in pool.imap_unordered(_process_query_line, queries_iterator):
                    if output is not None:
                        request_file.write(output + "\n")
                        requests_count += 1
                eh.verify(
                    requests_count >= utils.get_or_default(self.ctx, MinimalResultRequestsParameter),
                    'Too few result requests ({})'.format(requests_count)
                )

        if not xml_mode:
            app_host_plan_builder = self.sync_resource(utils.get_and_check_last_released_resource_id(
                app_host_resources.APP_HOST_TOOL_MAKE_TANK_AMMO_EXECUTABLE,
                arch=sandboxapi.ARCH_LINUX
            ))
            process.run_process(
                [
                    app_host_plan_builder,
                    "-i", self.__get_output_path(intermediate=True),
                    "-o", self.__get_output_path(),
                ],
                outputs_to_one_file=False,
                log_prefix="make_tank_ammo"
            )

        dolbilka.convert_queries_to_plan(
            self.__get_output_path(),
            self.__get_output_path(binary=True),
            loader_type="plain" if xml_mode else "phantom"
        )

    def __get_output_path(self, binary=False, intermediate=False):
        if binary:
            suffix = "plan"
        elif intermediate:
            suffix = "dump"
        else:
            suffix = "txt"

        return "requests.{}".format(suffix)

    def __get_output_resource(self, binary=False):
        if binary:
            return resource_types.IMAGES_MIDDLESEARCH_PLAN
        elif self.ctx[XmlReport.name]:
            return resource_types.IMAGES_MIDDLESEARCH_PLAIN_TEXT_REQUESTS
        else:
            return dolbilka_resources.DOLBILKA_STPD_QUERIES


def _process_query_line(args):
    report_url, source_name, additional_cgi_params, xml_mode, line = args

    fields = line.rstrip('\n').split('\t')
    query_text = urllib.quote(fields[0], safe='')
    query_cgi_params = fields[2] if len(fields) >= 3 else ""
    report_region = fields[3] if len(fields) >= 4 else "ru"
    url = report_url.format(report_region)

    qurl = "{}{}&text={}{}".format(
        url,
        _DEFAULT_CGI_PARAMS,
        query_text,
        query_cgi_params.replace('&type=', '&kind=') + additional_cgi_params
    )

    try:
        request = _get_middlesearch_request(qurl, source_name, xml_mode)
        if request:
            logging.info("Adding [{}] to plan".format(qurl))
            return request
        else:
            logging.info("Skipping [{}]: request has no result from '{}'".format(qurl, source_name))
            return None
    except Exception as e:
        logging.info("Problem during processing [{}]: {}".format(qurl, str(e)))
        return None


class RequestBuilder(object):
    def __init__(self, qurl, sources):
        self.__query_url = qurl
        self.__sources = sources
        self.__source_id = None
        self.__request = None
        self.__requestCount = 0
        self.__maxRequestsCount = 1 + self.__sources.count(",")

    def get_result(self):
        return self.__request

    def feed_eventlog_line(self, logline):
        fields = logline.rstrip('\n').split('\t')
        return self.__feed_fields(fields)

    def __feed_fields(self, fields):
        if len(fields) < 2:
            return False
        event_type = fields[2]
        if event_type == "TSourceRequest":
            return self.__on_source_request(fields)
        elif event_type == "TSourceLogRecord":
            return self.__on_source_log_record(fields)
        elif event_type == "SubSourceInit":
            return self.__on_subsource_init(fields)
        elif event_type == "SubSourceRequest":
            return self.__on_subsource_request(fields)
        else:
            return False

    def __on_source_request(self, fields):
        if self.__is_right_source(fields[3]):
            payload = fields[4]
            if RequestBuilder.__is_json(payload):
                if self.__request is not None:
                    self.__request += "\n"
                    self.__request += payload
                else:
                    self.__request = payload

                self.__requestCount += 1
                if self.__requestCount >= self.__maxRequestsCount:
                    return True
            else:
                logging.info(
                    "Invalid TSourceRequest payload: '{}' for query url '{}'".format(
                        payload, self.__query_url
                    )
                )
        return False

    def __on_source_log_record(self, fields):
        #  subgraph
        return self.__feed_fields(fields[4:])

    def __on_subsource_init(self, fields):
        if self.__is_right_source(fields[6]):
            self.__source_id = fields[3]
        return False

    def __on_subsource_request(self, fields):
        if self.__source_id and self.__is_right_source(fields[3]):
            url_parts = urlparse.urlparse(fields[7])
            self.__request = "".join([url_parts.path, "?", url_parts.query])

    def __is_right_source(self, src):
        return src in self.__sources

    @staticmethod
    def __is_json(value):
        try:
            json.loads(value)
        except ValueError:
            return False
        return True


@decorators.retries(max_tries=3, delay=1)
def _get_middlesearch_request(qurl, source_name, xml_mode):
    with contextlib.closing(urllib2.urlopen(qurl)) as connection:
        if xml_mode:
            root = xmlTree.parse(connection).getroot()
            elem = root if root.tag == 'eventlog' else root.find('eventlog')
            eventlog = elem.text
        else:
            eventlog = json.load(connection)['eventlog']
        builder = RequestBuilder(qurl, source_name)
        lines = eventlog.split('\n')
        eh.verify(len(lines) >= 2, "Too few lines in eventlog")
        for logline in lines:
            if builder.feed_eventlog_line(logline):
                break
        return builder.get_result()


__Task__ = ImagesGenerateMiddlesearchRequests
