# coding: utf8
import contextlib
import logging
import time
from datetime import timedelta

import sandbox.common.types.misc as ctm

from sandbox import sdk2
from sandbox.projects.market.sre.googlebot.BaseGooglebotTask import BaseGooglebotTask

DEFAULT_TTL = timedelta(hours=5).seconds
log = logging.getLogger(__name__)


class MarketTopUrlListResource(sdk2.Resource):
    """ Топ 10к url по обращениям за месяц, полученный из health-house """
    ttl = 60


class GooglebotCacheWarmerTop(BaseGooglebotTask):
    """ Таска для формирования списка top 10k url по посещениям за месяц. """
    class Requirements(sdk2.Requirements):
        dns = ctm.DnsType.DNS64

    @contextlib.contextmanager
    def log_time(self, message):
        start = time.time()
        yield
        end = time.time()
        self.set_info("[TIME] {}: {:.2f}".format(message, end - start))

    def on_execute(self):
        with self.log_time('Fetch url list from clickhouse'):
            all_urls = self.clickhouse_execute('''
                select concat('https://', vhost, url) as url, count(url) as frequency
                from market.nginx2
                where date between today() and today() - 30
                    and vhost like 'm.market.yandex.%'
                    and user_agent like '%Googlebot%'
                group by url    
                order by frequency desc
                limit 10000;                    
            ''')

        all_urls = [url for (url,) in all_urls]

        resource = MarketTopUrlListResource(self, "Top 10k url маркета по заходам", "top_urls.txt")
        resource_data = sdk2.ResourceData(resource)
        text = "\n".join(all_urls) + "\n"
        text = text.encode('utf8')
        resource_data.path.write_bytes(text)
        resource_data.ready()
