# coding: utf8
import contextlib
import logging
import time
from datetime import timedelta

import sandbox.common.types.misc as ctm

from sandbox import sdk2
from sandbox.projects.market.sre.googlebot.BaseGooglebotTask import BaseGooglebotTask

DEFAULT_TTL = timedelta(hours=5).seconds
VHOST_TLD = 'm.beru.%'
log = logging.getLogger(__name__)


class MarketBlueTopUrlListResource(sdk2.Resource):
    """ Топ 10к url по обращениям за месяц, полученный из health-house """
    ttl = 60


class GooglebotCacheWarmerBlueTop(BaseGooglebotTask):
    """ Таска для формирования списка top 10k url по посещениям за месяц. """
    class Requirements(sdk2.Requirements):
        dns = ctm.DnsType.DNS64

    @contextlib.contextmanager
    def log_time(self, message):
        start = time.time()
        yield
        end = time.time()
        self.set_info("[TIME] {}: {:.2f}".format(message, end - start))

    def on_execute(self):
        with self.log_time('Fetch url list from clickhouse'):
            all_urls = self.clickhouse_execute('''
                    select concat('https://', vhost, url) as url, count(url) as frequency
                    from market.nginx2
                    where date between today() - 30 and today()
                        and vhost like %(vhost)s
                        and user_agent in (
                            'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
                            'Googlebot/2.1 (+http://www.google.com/bot.html)',
                            'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
                            'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Safari/537.36'
                        )
                        and http_code = 200
                    group by url    
                    order by frequency desc
                    limit 10000;                                     
                ''',
               {
                   'vhost': VHOST_TLD,
               },
           )

        all_urls = [url for (url, _) in all_urls]

        resource = MarketBlueTopUrlListResource(self, "Top 10k url маркета по заходам", "top_urls.txt")
        resource_data = sdk2.ResourceData(resource)
        text = "\n".join(all_urls) + "\n"
        text = text.encode('utf8')
        resource_data.path.write_bytes(text)
        resource_data.ready()
