import itertools
import logging
import multiprocessing
import os.path
import urllib

from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import sandboxapi
from sandbox.sandboxsdk import task

from sandbox.projects import resource_types
from sandbox.projects.common import string
from sandbox.projects.common import utils
from sandbox.projects.common.search import settings as media_settings
from sandbox.projects.common.mediasearch import user_requests
from sandbox.projects.common.mediasearch import yasm_task
from sandbox.projects.images.pumpkin import resources as images_pumpkin_resources
from sandbox.projects.images.daemons import components as daemons_components
from sandbox.projects.images.resources import task as resources_task


CBIRDAEMON_PARAMS = daemons_components.create_cbirdaemon_params()


class ReportUrl(parameters.SandboxStringParameter):
    name = "report_url"
    description = "Url to load SERP of Images"
    default_value = "https://hamster.yandex.ru/images/search?"


# Note: Thumbdaemon failed to find thumb if 'id' is not a first argument
class ThumbdaemonUrl(parameters.SandboxStringParameter):
    name = "thumbdaemon_url"
    description = "Thumbdaemon url"
    default_value = "http://im0-tub-ru.yandex.net/i?id={thumb_id}&n=13"


class QueriesNumberParameter(parameters.SandboxIntegerParameter):
    name = 'queries_num'
    description = 'Number of queries to generate'
    default_value = 10000


class AddAttrs(parameters.SandboxStringParameter):
    name = 'resource_attrs'
    description = 'Set additional attrs to resources (ex.: attr1=v1, attr2=v2)'


class PoolSizeParameter(parameters.SandboxIntegerParameter):
    name = 'pool_size'
    description = 'Working pool size'
    default_value = 10


class ImagesGenerateCbirSignUserQueries(yasm_task.YasmTask,
                                        resources_task.ImagesProductionResourcesTask,
                                        task.SandboxTask):
    """
        Generate user queries with signatures for CBIR

        Extract thumbs from requests, send them to cbirdaemon to retrieve signatures
    """

    type = "IMAGES_GENERATE_CBIRSIGN_USER_QUERIES"
    push_signal_name = "generate_cbirsign_user_queries"

    input_parameters = (
        ReportUrl,
        ThumbdaemonUrl,
        QueriesNumberParameter,
        AddAttrs,
        PoolSizeParameter,
    )

    def on_enqueue(self):
        task.SandboxTask.on_enqueue(self)
        self.create_resource(
            self.descr,
            self.__get_cbir_queries(),
            resource_types.USERS_QUERIES,
            attributes=string.parse_attrs(utils.get_or_default(self.ctx, AddAttrs)),
            arch=sandboxapi.ARCH_ANY,
        )

    def on_execute(self):
        pool = multiprocessing.Pool(utils.get_or_default(self.ctx, PoolSizeParameter))
        max_count = utils.get_or_default(self.ctx, QueriesNumberParameter)

        with self.__get_cbir_daemon() as cbir_daemon:
            with open(self.__get_cbir_queries(), 'w') as cbir_queries_file:
                with open(self.__get_top_queries()) as top_queries_file:
                    self.__generate_queries(
                        pool,
                        cbir_daemon.port,
                        cbir_queries_file,
                        (line.split("\t")[1] for line in top_queries_file),
                        max_count=max_count
                    )

        self._yasm_notify()

    def __generate_queries(self, pool, cbir_daemon_port, cbir_queries_file, queries_iterator, max_count=None):
        report_url = self.ctx[ReportUrl.name]
        thumbdaemon_url = self.ctx[ThumbdaemonUrl.name]
        cbirdaemon_url = "http://localhost:{}".format(cbir_daemon_port)
        queries_iterator = itertools.izip(
            itertools.repeat(report_url),
            itertools.repeat(thumbdaemon_url),
            itertools.repeat(cbirdaemon_url),
            queries_iterator
        )
        count = 0
        for output in pool.imap_unordered(_process_query_line, queries_iterator):
            if output is not None:
                cbir_queries_file.write(output + "\n")
                count += 1
            if max_count is not None and count > max_count:
                break
        return count

    def __get_cbir_daemon(self):
        index_type = media_settings.INDEX_CBIR_DAEMON
        self.ctx.update({
            CBIRDAEMON_PARAMS.Binary.name: self._get_basesearch_executable_last_released(index_type),
            CBIRDAEMON_PARAMS.Config.name: self._get_basesearch_config_last_released(index_type),
            CBIRDAEMON_PARAMS.Settings.name: self._get_basesearch_database(index_type),
        })
        return daemons_components.get_cbirdaemon()

    def __get_cbir_queries(self):
        return self.abs_path("cbir-queries.txt")

    def __get_top_queries(self):
        pumpkin_attributes = media_settings.ImagesSettings.pumpkin_resource_attributes(user_requests.DEFAULT_TLD)
        pumpkin_resource_id = utils.get_and_check_last_resource_with_attribute(
            images_pumpkin_resources.IMAGES_PUMPKIN_INDEX,
            pumpkin_attributes[0],
            pumpkin_attributes[1]
        ).id
        return os.path.join(self.sync_resource(pumpkin_resource_id), "topqueries.txt")


def _process_query_line(args):
    report_url, thumbdaemon_url, cbirdaemon_url, line = args
    query_text, query_region, query_cgi_params, query_tld = user_requests.split_query_line(line)
    url = report_url.format(query_tld)

    for thumb_id in user_requests.get_image_thumbs(url, query_text, query_cgi_params):
        logging.info("Processing thumb_id={}...".format(thumb_id))
        try:
            signature_data = user_requests.get_image_signature(cbirdaemon_url, thumbdaemon_url.format(thumb_id=thumb_id))
            if signature_data:
                logging.info("Adding [{}] to plan".format(line))
                return user_requests.join_query_line(
                    query_text,
                    query_region,
                    "{}&like_nn_signature={}".format(query_cgi_params, urllib.quote(signature_data)),
                    query_tld
                )
        except Exception as e:
            logging.info("Failed to process thumb={}: {}".format(thumb_id, e))
            continue
    else:
        logging.info("Nothing found for [{}]".format(line))


__Task__ = ImagesGenerateCbirSignUserQueries
