# coding: utf-8
from sandbox.projects.common.mediasearch.online import control, command
from sandbox.projects.common.mediasearch import request_templates
import itertools as itt
import tarfile
import time
import sys

_NUMBER_OF_RETRIES = 3
_RETRY_AFTER_SECONDS = 2
_TIMEOUT_BASE_SECONDS = 2
_TIMEOUT_EXTRA_SECONDS = 10


class ImagesExamineSpecialRequests(control.BaseControl):
    email_subject_template = "[url:-integrity] {descr} results"
    default_query_url_template = \
        "https://priemka-img.hamster.yandex.ru/images/search?text=%s&exp_flags=images_numdoc=40&srcask=IMAGES&" \
        "pron=ipornmax255&source=wiz&json_dump=searchdata.images"

    @staticmethod
    def iterate_queries(pumpkin_index, queries_count):
        with tarfile.open(pumpkin_index) as tar:
            topq = tar.extractfile("./topqueries.txt")
            for line in itt.islice(topq, queries_count):
                yield line.split("\t")[1]

    def persistent_fetch_json(self, request_url, error_counter):
        first_time = True
        for retry in xrange(_NUMBER_OF_RETRIES):
            if first_time:
                first_time = False
            else:
                time.sleep(_RETRY_AFTER_SECONDS)

            timeout = _TIMEOUT_BASE_SECONDS + retry * _TIMEOUT_EXTRA_SECONDS
            serp_data = self.fetch_json(request_url, error_counter, error_counter, timeout)
            if len(serp_data["searchdata.images"]) > 0:
                return serp_data

            self.logger.info("Empty serp")

    def process_query(self, query):
        query_url = self.create_query_url(query)
        serp_data = self.fetch_json(query_url, "serp_error", "serp_error")

        self.counter["serp_docs"] = len(serp_data["searchdata.images"])

        if len(serp_data["searchdata.images"]) == 0:
            self.counter.inc("serp_empty")
            return

        # Exception may arise here
        image = serp_data["searchdata.images"][0]
        image_url = image["url"]
        image_page_url = image["html_href"]
        image_basehost = image["basehost"]
        image_shard = image["shard"]
        image_docid = image["docid"]

        # Some queries are discarded here
        if "%" in image_url or "xn--" in image_url or "%" in image_page_url or "xn--" in image_page_url:
            self.logger.info("Suspecting problems with urls, skipping. %s %s", image_url, image_page_url)
            return

        self.counter.inc("serp_ok")

        for fmt, request in request_templates.generate_requests(query, [image_url, image_page_url]):
            try:
                request_url = self.create_query_url(request)
                serp_data = self.persistent_fetch_json(request_url, fmt + "_error")

                if serp_data is None:
                    self.counter.inc(fmt + "_empty")

                    self.logger.info("<<<")
                    self.logger.info("Request: %s", query)
                    self.logger.info("Request URL: %s", query_url)
                    self.logger.info("Synthetic: %s", request)
                    self.logger.info("Synthetic URL: %s",
                        request_url.decode("utf-8").replace('&json_dump=da', '').replace('priemka-img.hamster.', ''))
                    self.logger.info("Format: %s", fmt)
                    self.logger.info("Basehost: %s", image_basehost)
                    self.logger.info("Shard: %s", image_shard)
                    self.logger.info("Docid: %s", image_docid)
                    self.logger.info(">>>")
                else:
                    self.counter[fmt + "_docs"] = len(serp_data["searchdata.images"])
                self.counter.inc(fmt + "_ok")
            except:
                if self.counter[fmt + "_error"] == 0:
                    self.logger.info("EXCEPTION IN LOOP %s", str(sys.exc_info()[1]))

    @staticmethod
    def template_data(res):
        data = []
        total_queries = res.get("total_queries", 0)
        data.append({"name": "Total queries", "value": str(total_queries), "fraction": None})

        if total_queries > 0:
            good_serps = res.get("serp_ok", 0)
            data.append({"name": "Good serps",
                         "value": str(good_serps),
                         "fraction": float(good_serps) / total_queries})

            serp_errors = res.get("serp_error", 0)
            if serp_errors > 0:
                data.append({"name": "Serp errors",
                             "value": str(serp_errors),
                             "fraction": float(serp_errors) / total_queries})

            empty_serps = res.get("serp_empty", 0)
            if empty_serps > 0:
                data.append({"name": "Empty serps",
                             "value": str(empty_serps),
                             "fraction": float(empty_serps) / total_queries})

            if good_serps > 0:
#                serp_docs = res.get("serp_docs", 0)
#                data.append({"name": "Docs in average",
#                             "value": float(serp_docs) / good_serps,
#                             "fraction": None})

                for fmt in request_templates.REQUEST_TEMPLATES:
                    fmt_error = res.get(fmt + "_error", 0)
                    if fmt_error > 0:
                        data.append({"name": fmt + " serp errors",
                                     "value": str(fmt_error),
                                     "fraction": float(fmt_error) / good_serps})

                    fmt_ok = res.get(fmt + "_ok", 0)
                    fmt_internal_error = good_serps - fmt_error - fmt_ok
                    if fmt_internal_error != 0:
                        data.append({"name": fmt + " internal errors",
                                     "value": str(fmt_internal_error),
                                     "fraction": float(fmt_internal_error) / good_serps})

                    if fmt_ok > 0:
#                        fmt_docs = res.get(fmt + "_docs", 0)
#                        data.append({"name": fmt + " docs avg",
#                                     "value": float(fmt_docs) / fmt_ok,
#                                     "fraction": None})

                        fmt_empty = res.get(fmt + "_empty", 0)
                        if fmt_empty > 0:
                            data.append({"name": fmt + " empty",
                                         "value": str(fmt_empty),
                                         "fraction": float(fmt_empty) / fmt_ok})

        return data


if __name__ == '__main__':
    command.command(ImagesExamineSpecialRequests)
