# -*- coding: utf-8 -*-

import json
import logging
import random
import subprocess

from datetime import timedelta
from datetime import datetime
from sandbox import sdk2


class PumpkinPlan(sdk2.Resource):
    pass


class PumpkinPlanCollectorExecutable(sdk2.Resource):
    """
    See search/pumpkin/plan_collector
    """


class CollectPumpkinPlan(sdk2.Task):
    """
    **Описание**
    Создает план для стрельбы по Тыкве. Запросы берутся из логов балансера по 4-ем вертикалям:
    web, images, video, touch, а именно из часовых табличек отсюда
    https://yt.yandex-team.ru/hahn/navigation?path=//home/logfeller/logs/l7-knoss-search-access-log/1h&
    и отсюда https://yt.yandex-team.ru/hahn/navigation?path=//home/logfeller/logs/l7-balancer-access-log/1d&.
    Запросы для каждой вертикали отбираются так: есть заранее заданные выражения, из логов отбираются те
    запросы, поле query которых содержит хотя бы одно из этих выражений.
    Потом выбранные запросы перемешиваются по всем вертикалям и получается план.
    """

    ACCESS_LOGS_PREFIX = "//logs/l7-balancer-access-log/1d/"
    KNOSS_LOGS_PREFIX = "//logs/l7-knoss-search-access-log/1h/"

    INPUT_TABLES_FIELD = "input_tables"
    MAX_QUERIES_COUNT_FIELD = "max_queries_count"
    PATTERNS_FIELD = "patterns"
    PLAN_FILE_PATH_FIELD = "plan_file_path"
    TAG_FIELD = "tag"

    IMAGES_PATTERN = "GET /images/search?text"
    SEARCH_PATTERN = "GET /search"
    TOUCH_PATTERN = "GET /search/touch?text"
    VIDEO_PATTERN = "GET /video/search"
    YANDSEARCH_PATTERN = "GET /yandsearch"

    IMAGES_PLAN_PATH = "images_plan.txt"
    SEARCH_PLAN_PATH = "search_plan.txt"
    TOUCH_PLAN_PATH = "touch_plan.txt"
    VIDEO_PLAN_PATH = "video_plan.txt"
    YANDSEARCH_PLAN_PATH = "yandsearch_plan.txt"

    IMAGES_TAG = "images"
    SEARCH_TAG = "web"
    TOUCH_TAG = "touch"
    VIDEO_TAG = "video"
    YANDSEARCH_TAG = "web"

    CONFIG_FILE_PATH = "collector_config.json"
    LOGGER_NAME = "collect_pumpkin_plan"
    PUMPKIN_PLAN_PATH = "pumpkin_plan.txt"
    YT_PROXY = "hahn"

    class Parameters(sdk2.Parameters):
        collector_binary = sdk2.parameters.Resource(
            "Pumpkin plan collector executable",
            resource_type=PumpkinPlanCollectorExecutable,
            required=True
        )
        hour = sdk2.parameters.String("Hour to collect queries from, required format: HH:00:00", required=True)

        images_queries_count = sdk2.parameters.Integer("Max images queries count", default=25000, required=True)
        touch_queries_count = sdk2.parameters.Integer("Max touch queries count", default=25000, required=True)
        video_queries_count = sdk2.parameters.Integer("Max video queries count", default=25000, required=True)
        web_queries_count = sdk2.parameters.Integer("Max web queries count", default=25000, required=True)

        yt_token_owner = sdk2.parameters.String("YT OAuth token owner", required=True)
        yt_token_vault = sdk2.parameters.String("YT OAuth token vault name", required=True)

    def _get_sources_info(self):
        today_date = datetime.today().strftime("%Y-%m-%d")
        date_time = 'T'.join([today_date, self.Parameters.hour])
        yesterday_date = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")

        images_params = {
            self.INPUT_TABLES_FIELD: [self.ACCESS_LOGS_PREFIX + yesterday_date],
            self.MAX_QUERIES_COUNT_FIELD: self.Parameters.images_queries_count,
            self.PATTERNS_FIELD: [self.IMAGES_PATTERN],
            self.PLAN_FILE_PATH_FIELD: self.IMAGES_PLAN_PATH,
            self.TAG_FIELD: self.IMAGES_TAG
        }

        # web queries are divided into 2 parts: yandsearch and search
        search_params = {
            self.INPUT_TABLES_FIELD: [self.KNOSS_LOGS_PREFIX + date_time],
            self.MAX_QUERIES_COUNT_FIELD: self.Parameters.web_queries_count // 2,
            self.PATTERNS_FIELD: [self.SEARCH_PATTERN],
            self.PLAN_FILE_PATH_FIELD: self.SEARCH_PLAN_PATH,
            self.TAG_FIELD: self.SEARCH_TAG
        }

        touch_params = {
            self.INPUT_TABLES_FIELD: [self.KNOSS_LOGS_PREFIX + date_time],
            self.MAX_QUERIES_COUNT_FIELD: self.Parameters.touch_queries_count,
            self.PATTERNS_FIELD: [self.TOUCH_PATTERN],
            self.PLAN_FILE_PATH_FIELD: self.TOUCH_PLAN_PATH,
            self.TAG_FIELD: self.TOUCH_TAG
        }

        video_params = {
            self.INPUT_TABLES_FIELD: [self.ACCESS_LOGS_PREFIX + yesterday_date],
            self.MAX_QUERIES_COUNT_FIELD: self.Parameters.video_queries_count,
            self.PATTERNS_FIELD: [self.VIDEO_PATTERN],
            self.PLAN_FILE_PATH_FIELD: self.VIDEO_PLAN_PATH,
            self.TAG_FIELD: self.VIDEO_TAG
        }

        yandsearch_params = {
            self.INPUT_TABLES_FIELD: [self.KNOSS_LOGS_PREFIX + date_time],
            self.MAX_QUERIES_COUNT_FIELD: self.Parameters.web_queries_count // 2,
            self.PATTERNS_FIELD: [self.YANDSEARCH_PATTERN],
            self.PLAN_FILE_PATH_FIELD: self.YANDSEARCH_PLAN_PATH,
            self.TAG_FIELD: self.YANDSEARCH_TAG
        }

        sources = [images_params, touch_params, video_params, search_params, yandsearch_params]
        return sources

    # get queries from plan and append tags
    def _get_queries_from_plan(self, source_params):
        queries = []
        plan_file_path = source_params[self.PLAN_FILE_PATH_FIELD]
        tag = source_params[self.TAG_FIELD]
        with open(plan_file_path, "r") as plan:
            while True:
                query_size = plan.readline()
                if len(query_size) == 0:
                    break
                query_size = int(query_size)
                query = plan.read(query_size)

                head = ' '.join([str(query_size), tag])
                queries.append('\n'.join([head, query]))
        return queries

    def on_execute(self):
        sources = self._get_sources_info()
        config = {}
        config.update({"sources": sources})
        config.update({"yt_proxy": self.YT_PROXY})
        with open(self.CONFIG_FILE_PATH, "w") as config_file:
            json.dump(config, config_file)

        collector_binary = sdk2.ResourceData(self.Parameters.collector_binary)
        call = [
            str(collector_binary.path),
            "--config", self.CONFIG_FILE_PATH
        ]
        environment = {}
        yt_token = sdk2.Vault.data(
            self.Parameters.yt_token_owner,
            self.Parameters.yt_token_vault
        )
        environment.update({"YT_TOKEN": yt_token})
        with sdk2.helpers.ProcessLog(self, logger=logging.getLogger(self.LOGGER_NAME)) as pl:
            pl.logger.propagate = 1
            subprocess.Popen(
                call,
                env=environment,
                stdout=pl.stdout,
                stderr=subprocess.STDOUT
            ).wait()

        queries = []
        for source in sources:
            queries.extend(self._get_queries_from_plan(source))
        random.shuffle(queries)

        with open(self.PUMPKIN_PLAN_PATH, "w") as plan:
            plan.write(''.join(queries))
        pumpkin_plan = sdk2.ResourceData(PumpkinPlan(self, "Pumpkin plan", self.PUMPKIN_PLAN_PATH))
        pumpkin_plan.ready()
