# -*- coding: utf-8 -*-

import json
import os
import shlex
import logging
import requests

from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

from sandbox.projects import resource_types
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.paths import copy_path
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.sandboxsdk.parameters import SandboxBoolParameter
from sandbox.sandboxsdk.parameters import SandboxStringParameter
from sandbox import sdk2

from sandbox.projects.common.pumpkin import index_task

PUMPKIN_BASKET_URL = "https://metrics-qgaas.metrics.yandex-team.ru/api/basket/370514/query-generator"

class BuildForProductionParameter(SandboxBoolParameter):
    name = 'queue_for_production'
    description = 'Use this index in next Production release. Do NOT set this parameter.'
    default_value = False
    do_not_copy = True


class CheckIndexParameter(SandboxBoolParameter):
    name = 'check_for_production'
    description = 'check production pumpkin index invariants (size, queries, etc)'
    default_value = False
    do_not_copy = True


class AdditionalOptionsParameter(SandboxStringParameter):
    name = 'cmd_options'
    description = 'additional mrcollector options:'
    default_value = None


class BuildPumpkinIndex(index_task.BaseBuildPumpkinIndexTask):
    """
    **Описание**
    Задача для сборки поискового индекса для `Яндекс.Лайт поиска "Тыква" <https://wiki.yandex-team.ru/SergejjSavostjanov/Pumpkin>`_
    Данные собираются с указанного MapReduce кластера.
    На кластере должны присутствовать таблицы user_sessions/yyyymmdd
    Перед завершением таска, проверяются несколько инвариантов создаваемых ресурсов.

    **Создаваемые ресурсы**

        * PUMPKIN_QUERIES - запросы для таска SERP_COLLECTOR
        * PUMPKIN_INDEX - индекс для тыквы
    """
    type = 'BUILD_PUMPKIN_INDEX'

    input_parameters = \
        index_task.BaseBuildPumpkinIndexTask.input_parameters + \
        (BuildForProductionParameter, CheckIndexParameter, AdditionalOptionsParameter)

    QUERIES_RESOURCE_KEY = "queries_resource_id"

    def on_enqueue(self):
        index_task.BaseBuildPumpkinIndexTask.on_enqueue(self)
        channel.task = self
        production_tag = 'production_tag'
        if self.ctx['ui_language'] == 'tr':
            production_tag = 'production_tag_tr'

        attrs = {production_tag: 1} if self.ctx[BuildForProductionParameter.name] else {}
        self._make_resources(attrs)

        queries_resource = self.create_resource(
            self.descr, 'topqueries.txt', resource_types.PUMPKIN_QUERIES, attributes=attrs)
        self.ctx['queries_resource_id'] = queries_resource.id

    def on_execute(self):
        index_path = channel.sandbox.get_resource(self.ctx[self.INDEX_RESOURCE_KEY]).path
        queries_path = channel.sandbox.get_resource(self.ctx[self.QUERIES_RESOURCE_KEY]).path

        collector_args = shlex.split(self.ctx[AdditionalOptionsParameter.name])
        collector_cmd = self._make_collector_cmd(collector_args)
        self._build_index(collector_cmd)
        self._check_index(index_path)
        self._make_queries(index_path, queries_path)
        self._update_basket(queries_path)

    def _check_index(self, index_path):
        ensure_condition(os.path.isfile('%s/queryrec.weights' % index_path), 'required file is missing')
        ensure_condition(os.path.isfile('%s/queryrec.dict' % index_path), 'required file is missing')
        ensure_condition(os.path.isfile('%s/topqueries.txt' % index_path), 'required file is missing')
        ensure_condition(os.path.isfile('%s/metadata' % index_path), 'required file is missing')

        if self.ctx[CheckIndexParameter.name]:
            top_queries_size = count_lines('%s/topqueries.txt' % index_path)
            ensure_condition(top_queries_size == self.ctx[index_task.TopQueriesParameter.name], 'not enough queries found for topqueries.txt')
            index_size = os.path.getsize('%s/indexpumpkin' % index_path)
            ensure_condition(index_size % 12 == 0, 'broken index: each entry must take exactly 12 bytes')
            ensure_condition(index_size > 1000000000 and index_size < 14000000000, 'index is either too big or too small')

    def _make_queries(self, index_path, queries_path):
        copy_path('%s/topqueries.txt' % index_path, queries_path)

    def _update_basket(self, queries_path):
        try:
            queries = self._extract_queries_for_basket(queries_path)
            self._send_basket_queries(queries)
        except Exception as e:
            logging.error("Failed to update basket\nError: {}".format(e))

    def _extract_queries_for_basket(self, queries_path):
        queries = {
            "type": "RAW",
            "queries": []
        }
        with open(queries_path, "r") as f:
            for i in range(50):
                line = f.readline()
                query = line.split("\t")[1]
                if query == 'mebbis':
                    continue
                queries["queries"].append({
                    "text": query,
                    "regionId": 213,
                    "device": "DESKTOP"
                })
                queries["queries"].append({
                    "text": query,
                    "regionId": 213,
                    "device": "ANDROID"
                })
        return queries

    def _send_basket_queries(self, queries):
        retry_strategy = Retry(
            total=3,
            connect=3,
            status_forcelist=tuple(x for x in requests.status_codes._codes if x != 200),
            method_whitelist=["PUT"]
        )
        adapter = HTTPAdapter(max_retries=retry_strategy)
        session = requests.Session()
        session.mount("https://", adapter)
        session.mount("http://", adapter)

        token = sdk2.Vault.data('YALITE', 'robot-sepe-pumpkin-oauth-token')
        headers = {
            "Authorization": "OAuth " + token,
            "Content-Type": "application/json;charset=UTF-8"
        }
        r = session.put(PUMPKIN_BASKET_URL, headers=headers, data=json.dumps(queries))


def count_lines(fname):
    with open(fname) as f:
        for i, l in enumerate(f):
            pass
    return i + 1


def ensure_condition(condition, error_msg):
    if not condition:
        raise SandboxTaskFailureError(error_msg)


__Task__ = BuildPumpkinIndex
