# coding: utf-8
from sandbox import sdk2
from sandbox.sandboxsdk import environments
from sandbox.projects.common.mediasearch import scraper_task as sct
from sandbox.projects.common.mediasearch import yql as yql
from sandbox.projects.common import link_builder as lb
from sandbox.projects.images.pumpkin import resources as images_pumpkin_resources
from sandbox.projects import resource_types
import itertools as itt
import sandbox.common.errors as ce
import sandbox.common.types.misc as ctm
import logging
import tarfile
import textwrap
import time

# NOTE One may convert IMAGES_THUMBS_PLAN to ImagesThumbsPlan to match sdk2 style and decrease enthropy

_DEFAULT_YQL_VAULT_OWNER_NAME = "robot-images-st:yql_token"
_DEFAULT_YT_VAULT_OWNER_NAME = "robot-images-st:yt_token"


class ImagesMineThumbsPlan(sct.ScraperTask):
    """
    Create plan of thumbs in live serps for later checks
    """

    class Parameters(sct.ScraperTask.Parameters):
        input_plan = sct.SimplyLastResource(
            "Top queries",
            resource_type=images_pumpkin_resources.IMAGES_PUMPKIN_INDEX_ARCHIVE,
            attrs={"images_pumpkin_ru": "yes"},
            required=True
        )
        plan_limit = sdk2.parameters.Integer("Number of top queries", required=True)
        target_host = sdk2.parameters.String("Target host URL", required=True, default="https://hamster.yandex.ru")
        scraper_profile = sdk2.parameters.String("Scraper profile", required=True, default="weak_consistency/image/desktop/hamster")
        index_state = sdk2.parameters.String("Index state", required=False)
        yql_poll_interval = sdk2.parameters.Integer("Delay between yql polls (seconds)", default=30)
        yql_token = sdk2.parameters.String("YQL token Vault owner:name", default=_DEFAULT_YQL_VAULT_OWNER_NAME)
        yt_token = sdk2.parameters.String("YQL token Vault owner:name", default=_DEFAULT_YT_VAULT_OWNER_NAME)

    class Requirements(sdk2.Task.Requirements):
        environments = (environments.PipEnvironment('yandex-yt'),)

    YQL_EXTRACT_THUMBS_FROM_SERPS = textwrap.dedent("""
        $script = @@
        import json

        def find_thumbs(content):
            data = json.loads(content)
            try:
                query_url = data["serp-resources"]["main-page-url"]
                raw = json.loads(data["serp-resources"]["resources"][0]["content"])
            except KeyError:
                return
            reqid = raw["reqdata.reqid"]
            index_state = "{{index_state}}"
            for image in raw["searchdata.images"]:
                if not index_state or index_state in image["shard"]:
                    yield "https:{}\t{} {} {} {}".format(image["thmb_href"], query_url, reqid, image["basehost"], image["shard"])
                    if "big_thmb_href" in image:
                        yield "https:{}\t{} {} {} {} BIG".format(image["big_thmb_href"], query_url, reqid, image["basehost"], image["shard"])
        @@;

        $extract_thumbs = Python::find_thumbs(Callable<(String?)->List<String>>, $script);

        SELECT * FROM (
            SELECT
                $extract_thumbs(
                    ToBytes(Yson::SerializeJsonEncodeUtf8(Yson::YPath(Yson::Parse(_other["full-serp"]), "/serp-page")))
                ) AS thumb
            FROM {{scraper_results_table}}
        ) FLATTEN BY thumb;
    """)
    YQL_TASK_URL = "https://yql.yandex-team.ru/Operations/{task_id}"

    def on_enqueue(self):
        super(ImagesMineThumbsPlan, self).on_enqueue()
        if len(self.Parameters.yql_token.split(":")) != 2:
            raise ce.TaskFailure(
                "Invalid YQL token {} (\"owner:name\" format expected)".format(self.Parameters.yql_token)
            )
        if len(self.Parameters.yt_token.split(":")) != 2:
            raise ce.TaskFailure(
                "Invalid YT token {} (\"owner:name\" format expected)".format(self.Parameters.yt_token)
            )

    def queries_iterator(self, feedback):
        """
        Iterator of queries for Scraper batch

        (Scraper task preparation)

        :param feedback: Handle to send progress notifications
        :return: Image urls iterator (to go to query-text)
        """
        queries_count = self.Parameters.plan_limit
        feedback.set_total(queries_count)
        plan_data = sdk2.ResourceData(self.Parameters.input_plan)

        with tarfile.open(str(plan_data.path.resolve())) as tar:
            topq = tar.extractfile("./topqueries.txt")
            for line in itt.islice(topq, queries_count):
                yield line.split("\t")[1]
                feedback.progress()
                self.counter_inc("queries")

    @property
    def batch_properties(self):
        """
        Scraper batch properties

        (Scraper task preparation)
        """
        if "http" not in self.Parameters.target_host:
            raise ce.TaskError("Target host URL should be URL like https://hamster.yandex.ru")

        return {
            "host": self.Parameters.target_host,
            "description": {
                "name": "Thumbs plan generation",
                "comments": "https://sandbox.yandex-team.ru/task/{}/view".format(self.id),
                "creator": "Sandbox task",
                "quota-project": "imgbase-acceptance"
            },
            "per-set-parameters": {
                "additional-cgi": {
                    "json_dump": ["reqdata.reqid"],
                    "srcask": [],
                    "srcskip": ["IMAGESQUICK", "IMAGESULTRA"],
                    "exp_flags": ["images_numdoc=40"],
                    "thumb-host": ["imtub-test.search.yandex.net"]
                }
            },
            "store-results-period": 2592000000,
            "profile": self.Parameters.scraper_profile,
            "search-engine": "yandex-images-json",
            "parse-serps": True
        }

    def store_results(self, results_iterator):
        """
        Process scraper batch results
        """
        # NOTE Previous incarnation of similar tasks sent emails upon completion
        # email_subject_template = "[create-thumbs-plan] {descr} results"

        output_resource = resource_types.IMAGES_THUMBS_PLAN(
            self,
            "Thumbs for {} from Scraper with love".format(self.Parameters.description),
            "thumbs_plan",
            ttl=30
        )

        self.Context.output_resource_id = output_resource.id

        if self.Parameters.index_state:
            output_resource.index_state = self.Parameters.index_state

        with output_resource.path.open("wb") as output:
            for item in results_iterator:
                print >> output, item["thumb"]
                self.counter_inc("thumbs")

    def results_processor(self, scraper, scraper_batch_id, feedback):
        """
        Process scraper results in YT using YQL

        * Run YQL query
        * Wait for completion
        * Fetch and process results
        """

        feedback.action("Running YQL query")
        scraper_results_table = scraper.find_results_table(scraper_batch_id, yql_version=1)
        query = self.YQL_EXTRACT_THUMBS_FROM_SERPS. \
            replace("{{index_state}}", self.Parameters.index_state or ""). \
            replace("{{scraper_results_table}}", scraper_results_table)

        yql_token = sdk2.Vault.data(*self.Parameters.yql_token.split(":"))
        logging.debug("YQL token length is %d", len(yql_token))
        yt_token = sdk2.Vault.data(*self.Parameters.yt_token.split(":"))
        logging.debug("YT token length is %d", len(yt_token))

        y = yql.YQL(yql_token, yt_token)
        self.Context.yql_task = y.run(query)
        # self.Context.yql_task = "59e8a0b01d62715411f42864"
        self.Context.yql_task_status = y.get_status(self.Context.yql_task)["status"]
        self.Context.yql_share_id = y.get_share_id(self.Context.yql_task)
        self.Context.save()
        while self.Context.yql_task_status in ("RUNNING", "PENDING"):
            # NOTE WaitTime can be utilized here as well
            time.sleep(self.Parameters.yql_poll_interval)
            self.Context.yql_task_status = y.get_status(self.Context.yql_task)["status"]
            self.Context.save()

        feedback.action("Saving results")
        self.store_results(y.results_iterator(self.Context.yql_task))

    @property
    def _yql_link(self):
        return lb.HREF_TO_ITEM.format(
            link=self.YQL_TASK_URL.format(task_id=self.Context.yql_task),
            name="YQL task"
        ) + " (" + self.Context.yql_task_status + ")"

    @property
    def _yql_public_link(self):
        if self.Context.yql_share_id == ctm.NotExists:
            return 'Unknown public url'
        return lb.HREF_TO_ITEM.format(
            link=self.YQL_TASK_URL.format(task_id=self.Context.yql_share_id),
            name="YQL public url"
        ) + " (" + self.Context.yql_task_status + ")"

    @property
    def footer(self):
        """
        Common footer + YQL link
        """
        result = super(ImagesMineThumbsPlan, self).footer
        if isinstance(result, list) and self.Context.yql_task != ctm.NotExists:
            result.extend([
                {
                    "content": self._yql_link,
                    "helperName": ""
                },
                {
                    "content": self._yql_public_link,
                    "helperName": ""
                }
            ])
        return result

    @property
    def stats(self):
        """
        Table data for footer

        The method is called on server so use self.counters_ro to access counters data
        """
        results = []
        if self.counters_ro.get("queries"):
            results.append(("Queries", self.counters_ro.get("queries", "&mdash;")))
        results += [
            ("Thumbs", self.counters_ro.get("thumbs", "&mdash;")),
        ]
        return results
