# coding: utf-8

from sandbox import sdk2
from sandbox.common.types import task as ctt
from sandbox.projects.common import decorators
from sandbox.projects import resource_types
from sandbox.projects.images import resource_types as images_resource_types
from sandbox.projects.images.ImagesExamineThumbsIntegrity import ImagesExamineThumbsIntegrity
from sandbox.sandboxsdk.channel import channel

import contextlib
import functools
import itertools as itt
import jinja2
import json
import logging
import multiprocessing
import os
import requests

from urllib import urlencode
try:
    from urllib.parse import parse_qs, urlsplit, urlunsplit
except ImportError:
    from urlparse import parse_qs, urlsplit, urlunsplit


class ResourceInfo(object):
    def __init__(self, resource, host):
        self._resource = resource
        self._host = host
        self._data = sdk2.ResourceData(resource)
        self._output = None

    @property
    def resource(self):
        return self._resource

    @property
    def host(self):
        return self._host

    @property
    def data(self):
        return self._data

    @property
    def path(self):
        return str(self._data.path)

    @property
    def output(self):
        return self._output

    @output.setter
    def output(self, value):
        self._output = value


@contextlib.contextmanager
def _resources_files(resources):
    try:
        for resource in resources:
            resource.output = open(resource.path, "a+")

        yield resources
    finally:
        for resource in resources:
            resource.output.close()
            resource.output = None


class ThumbInfo(object):
    def __init__(self, id = 0, checksum = 0, is_valid = True):
        self._id = id
        self._checksum = checksum
        self._is_valid = is_valid
        self._empty = True if id == 0 else False

    @property
    def id(self):
        return self._id

    @property
    def checksum(self):
        return self._checksum

    @property
    def valid(self):
        return self._is_valid

    @property
    def empty(self):
        return self._empty


def _iterate_thumbs(file_path, thumbs_count):
    with open(file_path, "r") as data:
        for line in itt.islice(data, thumbs_count):
            line = tuple(line.strip().split("\t", 2))
            is_valid = "valid" if len(line) < 3 else line[2]
            yield ThumbInfo(line[0], line[1], (is_valid == "valid"))


def _generate_query(target_hosts, thumb_info):
    query_item = {}
    for target_host in target_hosts:
        scheme, netloc, path, query_string, fragment = urlsplit(target_host)
        query_params = parse_qs(query_string)
        query_params.update({
            "id": thumb_info.id
        })
        new_query_string = urlencode(query_params, doseq=True)

        query_item[target_host] = urlunsplit((scheme, netloc, path, new_query_string, fragment))

    return query_item


class ImagesCheckThumbsChecksum(sdk2.Task):
    """Thumbs integrity checker"""

    OUTPUT_PATH_TEMPLATE = "output_{}"
    TARGET_HOST = "host"
    POOLS_COUNT = 100

    class Parameters(sdk2.Task.Parameters):

        thumbs_resource_id = sdk2.parameters.Resource(
            "Thumbs ids with checksums",
            resource_type=images_resource_types.IMAGES_THUMBS_CHECKSUMS,
            required=True
        )

        queries_limit = sdk2.parameters.Integer(
            "Number of top queries",
            default=200000,
            required=True)

        target_hosts_urls = sdk2.parameters.List(
            "Target hosts urls",
            sdk2.parameters.String,
            required=True)

        tokens = sdk2.parameters.YavSecret(
            "Yav secret with robot tokens (with solomon_token inside)",
            default="sec-01ehs4x7aqqbd15w2keczwknnq"
        )

    class Context(sdk2.Task.Context):
        statistics = []
        top_10_broken_thumbs = []
        top_10_invalid_thumbs = []
        subtasks = []

    def run_child_tasks(self):
        thumbs_ids_data_path = str(sdk2.ResourceData(self.Parameters.thumbs_resource_id).path)
        queries_limit = int(self.Parameters.queries_limit)
        target_hosts = list(self.Parameters.target_hosts_urls)

        resources = []
        for i, target_host in enumerate(target_hosts):
            output_path = self.OUTPUT_PATH_TEMPLATE.format(i)
            resource = resource_types.IMAGES_THUMBS_PLAN(
                self, "Thumbs plan for {}".format(target_host),
                output_path,
                ttl=30
            )
            resources.append(ResourceInfo(resource, target_host))

        thumb_iterator = _iterate_thumbs(thumbs_ids_data_path, queries_limit)
        bound_generate_query = functools.partial(_generate_query, target_hosts)
        pool = multiprocessing.Pool(self.POOLS_COUNT)

        with _resources_files(resources):
            for query_item in pool.imap(bound_generate_query, thumb_iterator):
                for resource_info in resources:
                    resource_info.output.write(query_item[resource_info.host] + os.linesep)

        for resource_info in resources:
            resource_info.data.ready()

            subtask = sdk2.Task[ImagesExamineThumbsIntegrity.type](
                self,
                description="Target host: {}".format(resource_info.host),
                input_resource_id=resource_info.resource.id,
                queries_limit=queries_limit,
                rps_limit=1000,
                build_output=True,
                ordered_output=True,
                attributes='{{"{}": "{}"}}'.format(self.TARGET_HOST, resource_info.host)
            ).enqueue()
            self.Context.subtasks.append(subtask.id)

        raise sdk2.WaitTask(self.Context.subtasks, ctt.Status.Group.FINISH | ctt.Status.Group.BREAK, wait_all=True)

    @decorators.retries(3, delay=5)
    def post_stats_to_solomon(self, url, data_for_send):
        token = self.Parameters.tokens.data()

        response = requests.post(
            url,
            headers={
                "Content-Type": "application/json",
                "Authorization": "OAuth %s" % (token["solomon_token"]),
            },
            data=data_for_send
        )
        response.raise_for_status()
        return response

    def send_stats_to_solomon(self, data, labels):
        url = "https://solomon.yandex.net/api/v2/push?project=images&cluster=thumbs&service=thumbs_cache_integrity"
        data_json = {}
        data_json["commonLabels"] = {
            "project": "images",
            "cluster": "thumbs",
            "service": "thumbs_cache_integrity"
        }
        data_json["sensors"] = []

        for name, value in data.items():
            labels_for_sensor = {"sensor": name}
            labels_for_sensor.update(labels)

            data_json["sensors"].append({
                "labels": labels_for_sensor,
                "value": value
            })

        data_for_send = json.dumps(data_json)
        logging.debug("JSON for solomon %s", data_for_send)

        solomon_response = self.post_stats_to_solomon(url, data_for_send)
        logging.debug("Solomon response %s", solomon_response)

    def on_execute(self):
        with self.memoize_stage.create_children:
            self.run_child_tasks()

        thumbs_ids_data_path = str(sdk2.ResourceData(self.Parameters.thumbs_resource_id).path)
        queries_limit = int(self.Parameters.queries_limit)

        for subtask_id in self.Context.subtasks:
            resource = channel.sandbox.list_resources(task_id=subtask_id, resource_type=images_resource_types.IMAGES_THUMBS_CHECKSUMS)[0]
            resource_data = sdk2.ResourceData(sdk2.Resource[resource.id])
            target_host = channel.sandbox.get_resource(resource.id).attributes[self.TARGET_HOST]
            location = urlsplit(target_host).netloc.split(".")[0]

            source_thumb_iterator = _iterate_thumbs(thumbs_ids_data_path, queries_limit)
            target_thumb_iterator = _iterate_thumbs(str(resource_data.path), queries_limit)
            wrong_checksums_count = 0
            wrong_checksums = []
            invalid_thumbs_count = 0
            invalid_thumbs = []

            while True:
                source = next(source_thumb_iterator, ThumbInfo())
                target = next(target_thumb_iterator, ThumbInfo())

                if target.empty:
                    break

                while source.id != target.id:
                    if source.empty:
                        raise Exception("Target thumbs are not found in source.")
                    source = next(source_thumb_iterator)

                if source.checksum != target.checksum:
                    wrong_checksums_count += 1
                    wrong_checksums.append(source.id)

                if not target.valid:
                    invalid_thumbs_count += 1
                    invalid_thumbs.append(source.id)

            statistics = channel.sandbox.get_task(subtask_id).ctx["results"]
            statistics["wrong_checksums"] = wrong_checksums_count
            statistics["invalid_thumbs"] = invalid_thumbs_count

            self.send_stats_to_solomon(statistics, {"location": location})

            statistics["host"] = target_host
            self.Context.statistics.append(statistics)

            self.Context.top_10_broken_thumbs.append({
                "host": target_host,
                "ids": wrong_checksums[:10]
            })

            self.Context.top_10_invalid_thumbs.append({
                "host": target_host,
                "ids": invalid_thumbs[:10]
            })

        self.Context.save()

    @sdk2.footer()
    def footer(self):
        template_path = os.path.dirname(os.path.abspath(__file__))
        env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_path), extensions=['jinja2.ext.do'])
        return env.get_template("footer.html").render({"result": self.Context.statistics})
