# -*- coding: utf-8 -*-

import os
import shutil
import requests
import logging
import collections

from sandbox import sdk2

from sandbox.sandboxsdk.errors import SandboxTaskFailureError

from sandbox.common.types.task import Status
from sandbox.common.types.client import Tag

from sandbox.projects.images import resource_types as images_resource_type

from sandbox.projects import resource_types
from sandbox.projects.common.decorators import retries
from sandbox.projects.common.search.components import create_improxy_params
from sandbox.projects.common.search.components import get_improxy
from sandbox.projects.common.search.settings import ImagesSettings
from sandbox.projects.common.search.settings import VideoSettings

from sandbox.projects.common.base_search_quality import threadPool

from sandbox.projects.common.http_responses.request import host_and_path, filename_for_request
from sandbox.projects.common.http_responses.BaseLocalResponsesTask import BaseLocalResponsesTask, create_parameters
from sandbox.projects.common.http_responses.BaseLocalResponsesTask import REQUESTS_RESOURCE_KEY, REQUESTS_LIMIT_KEY, \
    REQUIRE_ALL_SUCCESS_KEY, RESPONSES_RESOURCE_KEY

from kernel.util.functional import memoized

from sandbox.sandboxsdk import process
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.paths import make_folder


class GetProdResponseException(Exception):
    pass


@retries(max_tries=5, delay=1, exceptions=(GetProdResponseException, ))
def _get_request(url, headers):
    try:
        response = requests.get(url, headers=headers, timeout=10, stream=True)
        # 404 is ok
        if response.status_code != 404:
            response.raise_for_status()
        return response
    except requests.RequestException as e:
        err = "Failed to retrieve '{}' error {}".format(url, e)
        logging.info(err)
        raise GetProdResponseException(err)


class ImproxyGetResponses(BaseLocalResponsesTask):
    """
        Собирает выдачу с локально запущенной тумбнейлерной прокси
    """
    type = 'IMPROXY_GET_RESPONSES'
    client_tags = Tag.LINUX_PRECISE & ~Tag.LXC

    input_parameters = create_improxy_params(ImagesSettings.IMPROXY_CONFIGS + VideoSettings.IMPROXY_CONFIGS) +\
        create_parameters(images_resource_type.IMAGES_THUMBS_REQUESTS, 10000)

    def get_responses_resource_type(self):
        return resource_types.THUMB_DAEMON_RESPONSES_ARCHIVE

    @memoized
    def get_component(self):
        return get_improxy()

    def save_responses(self, component, result_dir, on_ready=None):
        requests_resource_path = self.sync_resource(self.ctx[REQUESTS_RESOURCE_KEY])

        with open(requests_resource_path, 'r') as requests_file:
            requests = [line.strip() for line in requests_file]
        if not requests:
            raise SandboxTaskFailureError("Requests file is empty")
        requests = requests[:self.ctx[REQUESTS_LIMIT_KEY]]

        make_folder(self.get_data_dir(), True)
        make_folder(result_dir, True)

        def response_getter(urls, *args, **kwargs):
            result = list()

            for url in urls:
                request_host, request_path = host_and_path(url)

                accept_header = '*/*'
                if request_host == ImagesSettings.IMPROXY_WEBP_HOST:
                    accept_header = 'image/webp,image/*,*/*;q=0.8'

                headers = {
                    'Host': request_host,
                    'Accept': accept_header
                }

                response = None
                try:
                    response = _get_request(url, headers)
                    response.raw.decode_content = True
                except GetProdResponseException as e:
                    if self.ctx[REQUIRE_ALL_SUCCESS_KEY]:
                        raise SandboxTaskFailureError("Bad response: {} error: {}".format(url, e))

                data_file_path = os.path.join(self.get_data_dir(), filename_for_request(request_path))
                with open(data_file_path, 'w') as data_file:
                    logging.info("Save response from {} to {} file".format(url, data_file_path))
                    if hasattr(response, 'raw') and response.raw is not None:
                        shutil.copyfileobj(response.raw, data_file)

                if os.stat(data_file_path).st_size == 0 and self.ctx[REQUIRE_ALL_SUCCESS_KEY]:
                    raise SandboxTaskFailureError('Got empty response with success status: {}'.format(url))

                header_dump = list()
                if hasattr(response, 'headers') and isinstance(response.headers, collections.Iterable):
                    header_dump = ["{}: {}".format(key, value) for key, value in response.headers.iteritems()]
                status_code = '000'
                if hasattr(response, 'status_code') and response.status_code is not None:
                    status_code = response.status_code
                result.append("-- {} {}\n{}{}".format(url, str(status_code), "\n".join(header_dump),
                                                      '\n' if len(header_dump) > 0 else ''))
            return result

        result_list = threadPool.process_data(response_getter, requests, params=None,
                                              process_count=int(self.client_info['ncpu'] // 2),
                                              use_processes=False)
        with open(os.path.join(result_dir, self.get_responses_resource_type().meta_file), 'w') as stats_file:
            for item in result_list:
                stats_file.write("-- %s %d\n%s\n" % item)

        archive_path = os.path.join(result_dir, self.get_responses_resource_type().archive_file)
        process.run_process(['tar', '-czf', archive_path, self.get_responses_resource_type().data_dir],
                            wait=True, check=True)

        if on_ready is not None:
            on_ready()

        if self.ctx[REQUIRE_ALL_SUCCESS_KEY] and any((x[1] != 200 for x in result_list)):
            raise SandboxTaskFailureError('Some responses have non-200 status')

    def on_execute(self):
        requests_resource = sdk2.Resource.find(id=self.ctx[REQUESTS_RESOURCE_KEY]).first()
        if not requests_resource:
            requests_resource = channel.sandbox.get_resource(self.ctx[REQUESTS_RESOURCE_KEY])
            requests_task_status = channel.sandbox.get_task(requests_resource.task_id).new_status
        else:
            requests_task_status = requests_resource.task.status

        if requests_task_status not in tuple(Status.Group.FINISH + Status.Group.BREAK):
            self.wait_tasks(
                tasks=[requests_resource.task_id, ],
                statuses=tuple(Status.Group.FINISH + Status.Group.BREAK),
                wait_all=True
            )

        component = self.get_component()
        component.start()
        component.wait()
        try:
            self.save_responses(
                component,
                self.get_result_dir(),
                on_ready=lambda: self.mark_resource_ready(self.ctx[RESPONSES_RESOURCE_KEY])
            )
        finally:
            component.stop()
            component.save_logs_resource()


__Task__ = ImproxyGetResponses
