# coding: utf8
import contextlib
import logging
import sys
import time
from datetime import timedelta

import sandbox.common.types.misc as ctm
from sandbox.sandboxsdk import svn

from collections import defaultdict
from sandbox import sdk2

MEMCACHE_SERVER = ('front-gb-cache.vs.market.yandex.net', 11241)
MEMCACHE_MAX_KEY = 250
KEY_TOO_LONG = 'key too long'
HTTPCLIENT_CONNECT_TIMEOUT = 60    # seconds
HTTPCLIENT_REQUEST_TIMEOUT = 60    # seconds
DEFAULT_TTL = timedelta(hours=5).seconds
# Падать с ошибкой, если процент ответов с кодом 2xx ниже указанного
# Может происходить в двух случаях, оба из которых являются ошибкой:
#   - Большое количество ошибок 4xx/5xx
#   - Большое количество редиректов 3xx
MIN_SUCCESS_RATE = 0.8


log = logging.getLogger(__name__)


class GooglebotCacheWarmerBlueWorker(sdk2.Task):
    """
        Таска для прогрева кэша googlebot. Выбирает последние N урлов из access-лога,
        делает на них запрос и кладёт тело ответа в memcache.
    """
    class Requirements(sdk2.Requirements):
        dns = ctm.DnsType.DNS64

    class Parameters(sdk2.Parameters):
        urls = sdk2.parameters.Resource('Url list to warm up. Defaults to top 10k market urls.')
        parallel_requests = sdk2.parameters.Integer('Number of parallel requests', required=True, default=50)
        ttl = sdk2.parameters.Integer('Cache item ttl', required=True, default=DEFAULT_TTL)

    @contextlib.contextmanager
    def log_time(self, message):
        start = time.time()
        yield
        end = time.time()
        self.set_info("[TIME] {}: {:.2f}".format(message, end - start))

    def on_execute(self):
        sys.path.append(svn.Arcadia.get_arcadia_src_dir("arcadia:/arc/trunk/arcadia/contrib/python/tornado"))
        sys.path.append(svn.Arcadia.get_arcadia_src_dir("arcadia:/arc/trunk/arcadia/contrib/python/toro"))

        from tornado import gen
        from tornado.ioloop import IOLoop
        from tornado.httpclient import AsyncHTTPClient

        client = AsyncHTTPClient()
        loop = IOLoop.instance()
        codes = defaultdict(int)

        @gen.coroutine
        def _on_execute():
            successes = 0
            urls_resource = self.Parameters.urls
            # if not set - default to the latest MarketTopUrlListResource released to stable.
            if not urls_resource:
                urls_resource = sdk2.Resource.find(sdk2.Resource["MARKET_BLUE_TOP_URL_LIST_RESOURCE"]).first()

            with sdk2.ResourceData(urls_resource).path.open() as fd:
                all_urls = [line.strip() for line in fd]

            for i in range(0, len(all_urls), self.Parameters.parallel_requests):
                urls = all_urls[i: i + self.Parameters.parallel_requests]
                with self.log_time('Fetch urls'):
                    responses = yield [
                        client.fetch(
                            url,
                            headers={'User-Agent': 'YandexBot Yandex-Market-Cache-Warmer'},
                            connect_timeout=HTTPCLIENT_CONNECT_TIMEOUT,
                            request_timeout=HTTPCLIENT_REQUEST_TIMEOUT,
                            raise_error=False,
                        )
                        for url in urls
                    ]

                for response in responses:
                    codes[response.code] += 1
                    if response.code < 300:
                        successes += 1

            log.info("HTTP fetch response summary")
            for code in sorted(codes.keys()):
                log.info("%s: %s", code, codes[code])

            success_rate = float(successes) / float(len(all_urls))
            if success_rate < MIN_SUCCESS_RATE:
                raise RuntimeError('Success rate {} ({}/{}) is lower than minimum {}'.format(
                    success_rate, successes, len(all_urls), MIN_SUCCESS_RATE
                ))

        loop.run_sync(_on_execute)
