# coding: utf-8
from sandbox.projects.common.mediasearch.online import control, command
import itertools as itt
import tarfile


class ImagesCreateThumbsPlan(control.BaseControl):
    email_subject_template = "[create-thumbs-plan] {descr} results"
    default_query_url_template = "https://priemka-img.hamster.yandex.ru/images/search?" \
        "text=%s&json_dump=searchdata.images&json_dump=reqdata.reqid&exp_flags=images_numdoc=40&" \
        "srcask=IMAGES&thumb-host=imtub-test.search.yandex.net"

    @staticmethod
    def iterate_queries(pumpkin_index, queries_count):
        with tarfile.open(pumpkin_index) as tar:
            topq = tar.extractfile("./topqueries.txt")
            for line in itt.islice(topq, queries_count):
                yield line.split("\t")[1]

    @classmethod
    def prepare(cls, extra_resource, context):
        # Makes use of "index_state" context (task or CL) property
        if context and "index_state" in context and context["index_state"]:
            cls.index_state = context["index_state"]

    def iterate_thumb_data(self, serp_json, query, query_url):
        "Iterates through thumb urls in JSON SERP"

        if "tmpl_data" in serp_json:
            reqid = serp_json["tmpl_data"]["reqdata"]["reqid"]
            images = serp_json["tmpl_data"]["searchdata"]["images"]
        else:
            reqid = serp_json["reqdata.reqid"]
            images = serp_json["searchdata.images"]

        debug_info = query_url + " " + reqid

        bad_basesearches = []
        for doc in images:
            if hasattr(self, "index_state"):
                if self.index_state not in doc["shard"]:
                    bad_basesearches.append("{} {}".format(doc["basehost"], doc["shard"]))
                    self.counter.inc("invalid_state_docs")
                    continue

            yield ("http:" + doc["thmb_href"], debug_info + " " + doc["basehost"] + " " + doc["shard"])

            # Fast and dirty way to test big thumbs as well
            if "big_thmb_href" in doc:
                yield ("http:" + doc["big_thmb_href"], debug_info + " " + doc["basehost"] + " " + doc["shard"] + " BIG")

        if bad_basesearches:
            self.logger.info("Bad basesearches (index_state doesn't contain %s):\n%s\nreqid: %s",
                             self.index_state, "\n".join(bad_basesearches), reqid)

    def process_query(self, query):
        query_url = self.create_query_url(query)
        serp = self.fetch_json(query_url, on_404="serp_error", on_error="serp_error")
        self.counter.inc("serp_ok")

        for thumb, debug_info in self.iterate_thumb_data(serp, query, query_url):
            self.output += thumb + "\t" + debug_info + "\n"
            self.counter.inc("thumbs")

    @staticmethod
    def template_data(res):
        data = []
        total_queries = res.get("total_queries", 0)
        data.append({"name": "Total queries", "value": str(total_queries), "fraction": None})

        if total_queries > 0:
            good_serps = res.get("serp_ok", 0)
            data.append({"name": "Good serps",
                         "value": str(good_serps),
                         "fraction": float(good_serps) / total_queries})

            thumbs = res.get("thumbs", 0)
            data.append({"name": "Thumbs",
                         "value": str(thumbs),
                         "fraction": None})

            invalid_state_docs = res.get("invalid_state_docs", 0)
            data.append({"name": "Invalid state docs",
                         "value": str(invalid_state_docs),
                         "fraction": None})

            if good_serps > 0:
                data.append({"name": "Thumbs per serp",
                             "value": float(thumbs) / good_serps,
                             "fraction": None})

        return data


if __name__ == '__main__':
    command.command(ImagesCreateThumbsPlan)
