import itertools
import logging
import multiprocessing
import os.path

from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import sandboxapi
from sandbox.sandboxsdk import task

from sandbox.projects import resource_types
from sandbox.projects.common import string
from sandbox.projects.common import utils
from sandbox.projects.common.search import settings as media_settings
from sandbox.projects.common.mediasearch import user_requests
from sandbox.projects.common.mediasearch import request_templates
from sandbox.projects.common.mediasearch import yasm_task
from sandbox.projects.images.pumpkin import resources as images_pumpkin_resources


_MAX_URLS = 2
_EXECUTED_KEY = "_task_executed"


class ReportUrl(parameters.SandboxStringParameter):
    name = "report_url"
    description = "Url to load SERP of Images"
    default_value = "https://hamster.yandex.{}/images/search?"


class QueriesAttributesParameter(parameters.SandboxStringParameter):
    name = 'queries_attributes'
    description = 'Queries attributes'


class MaxQueriesParameter(parameters.SandboxIntegerParameter):
    name = 'max_queries'
    description = 'Max number of queries'
    default_value = 10000


class PoolSizeParameter(parameters.SandboxIntegerParameter):
    name = 'pool_size'
    description = 'Working pool size'
    default_value = 30


class ImagesGenerateBaseUserQueries(yasm_task.YasmTask, task.SandboxTask):
    """
        Generate user queries for Images search according templates
    """

    type = "IMAGES_GENERATE_BASE_USER_QUERIES"
    push_signal_name = "generate_base_user_queries"

    input_parameters = (
        ReportUrl,
        MaxQueriesParameter,
        QueriesAttributesParameter,
        PoolSizeParameter,
    )

    def on_enqueue(self):
        task.SandboxTask.on_enqueue(self)
        self.create_resource(
            self.descr,
            self.__get_out_queries(),
            resource_types.USERS_QUERIES,
            attributes=string.parse_attrs(self.ctx[QueriesAttributesParameter.name]),
            arch=sandboxapi.ARCH_ANY,
        )

    def on_execute(self):
        # yasm_notify can raise TemporaryError, so we need to avoid a double work
        if _EXECUTED_KEY not in self.ctx:
            self.__on_execute()
            self.ctx[_EXECUTED_KEY] = 1

        self._yasm_notify()

    def __on_execute(self):
        pool = multiprocessing.Pool(self.ctx[PoolSizeParameter.name])
        with open(self.__get_out_queries(), 'w') as out_queries_file:
            with open(self.__get_top_queries()) as top_queries_file:
                max_count = self.ctx[MaxQueriesParameter.name]
                report_url = self.ctx[ReportUrl.name]
                queries_iterator = itertools.izip(
                    itertools.repeat(report_url),
                    (line.split()[1] for line in top_queries_file)
                )

                count = 0
                for output in pool.imap_unordered(_process_query_line, queries_iterator):
                    for out_line in output:
                        out_queries_file.write(out_line + "\n")
                        count += 1
                    if count > max_count:
                        break

    def __get_out_queries(self):
        return self.abs_path("out-queries.txt")

    def __get_top_queries(self):
        pumpkin_attributes = media_settings.ImagesSettings.pumpkin_resource_attributes(user_requests.DEFAULT_TLD)
        pumpkin_resource_id = utils.get_and_check_last_resource_with_attribute(
            images_pumpkin_resources.IMAGES_PUMPKIN_INDEX,
            pumpkin_attributes[0],
            pumpkin_attributes[1]
        ).id
        return os.path.join(self.sync_resource(pumpkin_resource_id), "topqueries.txt")


def _process_query_line(args):
    report_url, line = args
    query_text, query_region, query_cgi_params, query_tld = user_requests.split_query_line(line)
    url = report_url.format(query_tld)

    image_urls = list(itertools.islice(user_requests.get_image_urls(url, query_text, query_cgi_params), _MAX_URLS))
    if len(image_urls) >= 2:
        logging.info("Adding [{}]".format(line))
        return [
            user_requests.join_query_line(
                formatted_text,
                query_region,
                query_cgi_params,
                query_tld
            )
            for _, formatted_text in request_templates.generate_requests(query_text, image_urls)
        ]
    else:
        logging.info("Nothing found for [{}]".format(line))
        return []


__Task__ = ImagesGenerateBaseUserQueries
