# -*- coding: utf-8 -*-

import os
import re
import time
import jinja2
import tarfile
import itertools

from sandbox.sdk2.helpers import misc

from sandbox.projects.common import apihelpers
from sandbox.projects.images.pumpkin import resources as images_pumpkin_resources
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.sandboxsdk.parameters import ResourceSelector
from sandbox.sandboxsdk.parameters import SandboxIntegerParameter
from sandbox.sandboxsdk.parameters import SandboxStringParameter
from sandbox.sandboxsdk.parameters import SandboxBoolParameter

from sandbox.sandboxsdk.channel import channel

import checker


_GAP_TO_ESCAPE_BEFORE_TIMEOUT = 60
"Finish task when it's less than specified seconds till kill_timeout"


_TOTAL_QUERIES = "total_queries"
"Processed queries counter"

_BAD_SERPS = "bad_serps"
"Failed query-requests counter"

_TOTAL_THUMBS = "total_thumbs"
"Fetched thumbs counter"

_THUMBS_404 = "thumbs_404"
"Number of thumbs with 404 responses"

_BAD_THUMBS = "bad_thumbs"
"Number of thumbs with other errors during fetch"


class PumpkinIndexResourceParameter(ResourceSelector):
    name = 'pumpkin_index_resource_id'
    description = 'Pumpkin index. Leave empty for latest.'
    resource_type = images_pumpkin_resources.IMAGES_PUMPKIN_INDEX_ARCHIVE
    filter_attribute_name = "images_pumpkin_ru"
    filter_attribute_value = "yes"
    default_value = None


class TopQueriesCountParameter(SandboxIntegerParameter):
    name = 'top_queries_count'
    description = 'Number of top queries'
    default_value = 10000
    required = True


class RPSLimit(SandboxIntegerParameter):
    name = 'rps_limit'
    description = 'Maximum total RPS'
    default_value = checker.RPS
    required = True


class JSONForQueryURL(SandboxStringParameter):
    name = 'json_for_query_url'
    description = 'Search URL producing JSON (use %s for query)'
    default_value = checker.QUERY_URL_PATTERN
    required = True

    @classmethod
    def cast(cls, value):
        if "json_dump" not in value or value.count("%s") != 1:
            raise ValueError
        return super(JSONForQueryURL, cls).cast(value)


class FixThumbURLFlag(SandboxBoolParameter):
    name = 'fix_thumb_url_flag'
    description = 'Crossfire -- exchange production and priemka servers in Thumb URLs'
    default_value = checker.FIX_THUMB_URL


class NotificationList(SandboxStringParameter):
    name = 'notification_list'
    description = 'Notification list (comma-separated)'
    default_value = "images-index"


class ImagesPriemkaThumbs(SandboxTask):
    type = "IMAGES_PRIEMKA_THUMBS"

    input_parameters = (
        PumpkinIndexResourceParameter,
        TopQueriesCountParameter,
        JSONForQueryURL,
        RPSLimit,
        FixThumbURLFlag,
        NotificationList
    )

    def on_execute(self):

        # Prepare
        self.ensure_pumpkin_index_parameter()

        # Fetch pumpkin index
        pumpkin_index = self.sync_resource(self.ctx[PumpkinIndexResourceParameter.name])

        top_queries_count = self.ctx[TopQueriesCountParameter.name]
        checker.FIX_THUMB_URL = self.ctx[FixThumbURLFlag.name]
        checker.QUERY_URL_PATTERN = self.ctx[JSONForQueryURL.name]
        checker.RPS = self.ctx[RPSLimit.name]

        # Iterate queries from pumpkin's topqueries.txt
        #   - Fetch JSON serp
        #   - Fetch thumbs from serp
        #   - Interactively update task's status
        with open(self.log_path("checker.log"), "w") as log:
            with misc.ProgressMeter("Charge...") as status:
                start = time.time()
                for (
                    total_queries, bad_serps, total_thumbs, thumbs_404, bad_thumbs
                ) in checker.multiprocess(self._iterate_queries(pumpkin_index, top_queries_count), log):
                    # Submit results to ctx
                    self.ctx[_TOTAL_QUERIES] = total_queries
                    self.ctx[_BAD_SERPS] = bad_serps
                    self.ctx[_TOTAL_THUMBS] = total_thumbs
                    self.ctx[_THUMBS_404] = thumbs_404
                    self.ctx[_BAD_THUMBS] = bad_thumbs

                    if self.ctx["kill_timeout"] - (time.time() - self.updated) < _GAP_TO_ESCAPE_BEFORE_TIMEOUT:
                        break

                    delta = time.time() - start
                    if delta > 0 and total_thumbs > 0:
                        rps = (total_thumbs + total_queries) / delta
                        thumb_errors = 100.0 * (thumbs_404 + bad_thumbs) / total_thumbs

                        status.maxval = top_queries_count
                        status.value = total_queries
                        status.message = "Fire! RPS %.2f Errors %.2f%%" % (rps, thumb_errors)

        self._send_mail()

    def ensure_pumpkin_index_parameter(self):
        if self.ctx.get(PumpkinIndexResourceParameter.name):
            return

        resource = apihelpers.get_last_resource_with_attribute(
            PumpkinIndexResourceParameter.resource_type,
            PumpkinIndexResourceParameter.filter_attribute_name,
            PumpkinIndexResourceParameter.filter_attribute_value
        )

        if not resource:
            raise SandboxTaskFailureError('Unable to find pumpkin index resource')

        self.ctx[PumpkinIndexResourceParameter.name] = resource.id

    def _iterate_queries(self, pumpkin_index, top_queries_count):
        """
        Iterate queries from pumpkin's topqueries.txt

        :param pumpkin_index: pumpkin index tar file
        :param top_queries_count: upper limit for number of queries iterated
        """

        with tarfile.open(pumpkin_index) as tar:
            topq = tar.extractfile("./topqueries.txt")
            for line in itertools.islice(topq, top_queries_count):
                yield line.split("\t")[1]

    def _send_mail(self):
        "Results notification via email"

        emails = re.split(r'[, ]+', self.ctx[NotificationList.name])

        if emails:
            results = self.get_results()

            message = "Total queries:  %d\n" % results["total_queries"]
            if results["total_queries"] > 0:
                message += "Failed queries: %d (%f%%)\n" % (results["bad_serps"], results["bad_serps_percent"])
            message += "Total thumbs: %d\n" % results["total_thumbs"]
            if results["total_thumbs"] > 0:
                message += "404 thumbs:     %d (%f%%)\n" % (results["thumbs_404"], results["thumbs_404_percent"])
                message += "Failed thumbs:  %d (%f%%)\n" % (results["bad_thumbs"], results["bad_thumbs_percent"])
            message += "\n%s" % self.http_url()

            channel.sandbox.send_email(emails, [], "[thumbs-priemka] %s results" % self.descr.strip(), message)

    # Form data
    def get_results(self):
        if _TOTAL_QUERIES in self.ctx:
            results = {
                "total_queries": self.ctx[_TOTAL_QUERIES],
                "bad_serps": self.ctx[_BAD_SERPS],
                "total_thumbs": self.ctx[_TOTAL_THUMBS],
                "thumbs_404": self.ctx[_THUMBS_404],
                "bad_thumbs": self.ctx[_BAD_THUMBS],
            }
            if results["total_queries"] > 0:
                results.update({"bad_serps_percent": 100.0 * results["bad_serps"] / results["total_queries"]})
            if results["total_thumbs"] > 0:
                results.update({"thumbs_404_percent": 100.0 * results["thumbs_404"] / results["total_thumbs"]})
                results.update({"bad_thumbs_percent": 100.0 * results["bad_thumbs"] / results["total_thumbs"]})

            return results
        else:
            return {}

    @property
    def footer(self):
        template_path = os.path.dirname(os.path.abspath(__file__))
        env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_path))
        return env.get_template("footer.html").render(self.get_results())


__Task__ = ImagesPriemkaThumbs
