# coding: utf8
import contextlib
import logging
import time
from datetime import timedelta, date

import sandbox.common.types.misc as ctm
import sandbox.common.types.task as ctt

from sandbox import common
from sandbox import sdk2
from sandbox.projects.market.sre.googlebot.BaseGooglebotTask import BaseGooglebotTask
from sandbox.projects.market.sre.googlebot.GooglebotCacheWarmerBlueWorker import GooglebotCacheWarmerBlueWorker

DEFAULT_TTL = timedelta(hours=5).seconds
VHOST_URL = 'https://m.beru.ru'
VHOST_TLD = 'm.beru.%'
log = logging.getLogger(__name__)


class BlueUrlListResource(sdk2.Resource):
    """ Список url для прогрева, полученный из health-house """


class GooglebotCacheWarmerBlueMaster(BaseGooglebotTask):
    """
        Таска для прогрева кэша googlebot. Выбирает последние N урлов из access-лога,
        делает на них запрос и кладёт тело ответа в memcache.
    """
    class Requirements(sdk2.Requirements):
        dns = ctm.DnsType.DNS64

    class Parameters(sdk2.Parameters):
        url_count = sdk2.parameters.Integer('Number of urls to warm up', required=True, default=1000)
        workers = sdk2.parameters.Integer('Number of workers', required=True, default=8)
        parallel_requests = sdk2.parameters.Integer('Number of parallel requests', required=True, default=50)
        ttl = sdk2.parameters.Integer('Cache item ttl', required=True, default=DEFAULT_TTL)
        test_urls = sdk2.parameters.Bool('Test urls')

    @contextlib.contextmanager
    def log_time(self, message):
        start = time.time()
        yield
        end = time.time()
        self.set_info("[TIME] {}: {:.2f}".format(message, end - start))

    def on_execute(self):
        with self.memoize_stage.run_task:
            all_urls = self.fetch_urls()

            workers = []
            wait_statuses = set(common.utils.chain(ctt.Status.Group.FINISH, ctt.Status.Group.BREAK))
            url_count = len(all_urls)
            urls_per_worker = url_count / self.Parameters.workers
            for i in range(0, url_count, urls_per_worker):
                urls = all_urls[i:i + urls_per_worker]

                resource = BlueUrlListResource(self, "Список URL для прогрева", "urls{}.txt".format(i))
                resource_data = sdk2.ResourceData(resource)
                text = "\n".join(urls) + "\n"
                text = text.encode('utf8')
                resource_data.path.write_bytes(text)
                resource_data.ready()

                worker = GooglebotCacheWarmerBlueWorker(
                    self,
                    urls=resource.id,
                    parallel_requests=self.Parameters.parallel_requests,
                    ttl=self.Parameters.ttl,
                ).enqueue()
                workers.append(worker)
            raise sdk2.WaitTask(workers, wait_statuses)

    def fetch_urls(self):
        if self.Parameters.test_urls:
            return [VHOST_URL] * self.Parameters.url_count

        all_urls = []
        remaining_urls = self.Parameters.url_count
        day_delta = 0
        while remaining_urls > 0:
            with self.log_time('Fetch url list from clickhouse'):
                urls = self.clickhouse_execute(
                    '''
                        select concat('https://', vhost, url) as url
                        from nginx2
                        where date = toDate(%(date)s)
                            and http_code = 200
                            and vhost like %(vhost)s
                            and user_agent like '%%Googlebot%%'
                            and timestamp > toUnixTimestamp(toDate(%(date)s))
                        order by timestamp desc
                        limit %(limit)d
                    ''',
                    {
                        'limit': remaining_urls,
                        'date': date.today() + timedelta(days=day_delta),
                        'vhost': VHOST_TLD,
                    },
                )

            log.debug('Fetched %s urls: %s', len(urls), urls)
            all_urls += [url for (url,) in urls]
            remaining_urls = remaining_urls - len(urls)
            day_delta -= 1

        return all_urls
