# coding: utf8
import contextlib
import logging
import sys
import time
from collections import defaultdict
from datetime import timedelta
from urlparse import urlparse

import sandbox.common.types.misc as ctm
from sandbox import sdk2
from sandbox.sandboxsdk import svn

MEMCACHE_SERVER = ('front-gb-cache.vs.market.yandex.net', 11241)
MEMCACHE_MAX_KEY = 250
KEY_TOO_LONG = 'key too long'
HTTPCLIENT_CONNECT_TIMEOUT = 60    # seconds
HTTPCLIENT_REQUEST_TIMEOUT = 60    # seconds
DEFAULT_TTL = timedelta(hours=5).seconds


log = logging.getLogger(__name__)


def get_cache_key(url):
    """
    Соответстсвующая настройка в nginx: set $memcached_key "$uri?$args"
    - Знак вопроса в ключе есть всегда, даже с пустым query string-ом
    - Не указанный path преобразовывается в /
    - К ключу добавляется префикс "mc". Причина - ключи начинающиеся со слэша и
    содержащие более двух слэшей вызывают "local error" в mcrouter.
    """
    parts = urlparse(url)
    return 'mc{}?{}'.format(
        parts.path or "/",
        parts.query,
    )


class GooglebotCacheWarmerWorker(sdk2.Task):
    """
        Таска для прогрева кэша googlebot. Выбирает последние N урлов из access-лога,
        делает на них запрос и кладёт тело ответа в memcache.
    """
    class Requirements(sdk2.Requirements):
        dns = ctm.DnsType.DNS64

    class Parameters(sdk2.Parameters):
        urls = sdk2.parameters.Resource('Url list to warm up. Defaults to top 10k market urls.')
        parallel_requests = sdk2.parameters.Integer('Number of parallel requests', required=True, default=50)
        ttl = sdk2.parameters.Integer('Cache item ttl', required=True, default=DEFAULT_TTL)

    @contextlib.contextmanager
    def log_time(self, message):
        start = time.time()
        yield
        end = time.time()
        self.set_info("[TIME] {}: {:.2f}".format(message, end - start))

    def on_execute(self):
        sys.path.append(svn.Arcadia.get_arcadia_src_dir("arcadia:/arc/trunk/arcadia/contrib/python/tornado"))
        sys.path.append(svn.Arcadia.get_arcadia_src_dir("arcadia:/arc/trunk/arcadia/contrib/python/toro"))

        import socket
        from tornado.iostream import IOStream
        from tornado import gen
        from tornado.ioloop import IOLoop
        from tornado.httpclient import AsyncHTTPClient

        class Memcache(object):
            """
            https://github.com/memcached/memcached/blob/master/doc/protocol.txt
            """
            def __init__(self, server):
                self.server = server
                self.stream = None

            @gen.coroutine
            def connect(self):
                if self.stream:
                    return

                # TODO: TCP_NODELAY
                s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM, 0)
                self.stream = IOStream(s)
                yield self.stream.connect(self.server)

            @gen.coroutine
            def set(self, key, value, ttl, flags=0):
                yield self.stream.write("set {key} {flags} {ttl} {length}\r\n{value}\r\n".format(
                    key=key,
                    flags=flags,
                    value=value,
                    ttl=ttl,
                    length=len(value),
                ))
                status = yield self.stream.read_until("\r\n")
                status = status.strip()

                raise gen.Return(status)

        client = AsyncHTTPClient()
        loop = IOLoop.instance()
        cache = Memcache(server=MEMCACHE_SERVER)
        cache.connect()
        codes = defaultdict(int)
        cache_answers = defaultdict(int)

        @gen.coroutine
        def _on_execute():
            urls_resource = self.Parameters.urls
            # if not set - default to the latest MarketTopUrlListResource released to stable.
            if not urls_resource:
                urls_resource = sdk2.Resource.find(sdk2.Resource["MARKET_TOP_URL_LIST_RESOURCE"]).first()

            with sdk2.ResourceData(urls_resource).path.open() as fd:
                all_urls = [line.strip() for line in fd]

            for i in range(0, len(all_urls), self.Parameters.parallel_requests):
                urls = all_urls[i: i + self.Parameters.parallel_requests]
                with self.log_time('Fetch urls'):
                    responses = yield [
                        client.fetch(
                            url,
                            headers={'User-Agent': 'YandexBot Yandex-Market-Cache-Warmer'},
                            connect_timeout=HTTPCLIENT_CONNECT_TIMEOUT,
                            request_timeout=HTTPCLIENT_REQUEST_TIMEOUT,
                            raise_error=False,
                        )
                        for url in urls
                    ]

                with self.log_time('Write cache'):
                    for response in responses:
                        codes[response.code] += 1
                        if response.code < 400:
                            key = get_cache_key(response.request.url)
                            if len(key) > MEMCACHE_MAX_KEY:
                                cache_answers[KEY_TOO_LONG] += 1
                                continue
                            status = yield cache.set(key, response.body, ttl=self.Parameters.ttl)
                            cache_answers[status] += 1

        loop.run_sync(_on_execute)
        log.info("HTTP fetch response summary")
        for code in sorted(codes.keys()):
            log.info("%s: %s", code, codes[code])

        log.info("Memcache response summary")
        for answer in sorted(cache_answers.keys()):
            log.info("%s: %s", answer, cache_answers[answer])
