# -*- coding: utf-8 -*-

import logging
import os
import shutil
import requests
import math
from distutils.dir_util import copy_tree

from sandbox import sdk2
from sandbox.common.types import task as ctt
from sandbox.common.types import misc as ctm
from sandbox.projects.market.front.helpers.node import create_node_selector
from sandbox.projects.market.front.helpers.ubuntu import create_ubuntu_selector, setup_container
from sandbox.projects.market.front.helpers.sandbox_helpers \
    import rich_check_call, report_data, format_header, get_resource_http_proxy_link
from sandbox.projects.market.front.helpers.MetatronEnv import MetatronEnv
from sandbox.projects.market.resources import \
    MARKET_EXPERIMENT_ANALYZER_DATA_GRABBER_INPUT, MARKET_EXPERIMENT_ANALYZER_DATA_GRABBER_RESULT, \
    MARKET_EXPERIMENT_ANALYZER_DATA_GRABBER_OUTPUT
from sandbox.projects.market.front.helpers.github import clone_repo
from sandbox.projects.market.front.MarketDataGrabberRunner import MarketDataGrabberRunner
from sandbox.projects.sandbox_ci.utils import env

DEFAULT_GITHUB_OWNER = 'market'
DEFAULT_GITHUB_REPO = 'marketfront'
DEFAULT_GITHUB_BRANCH = 'master'
DEFAULT_GRABBER_PATH = os.path.join('node_modules', '@yandex-market', 'data-grabber')
DEFAULT_GRABBER_BIN_PATH = os.path.join(DEFAULT_GRABBER_PATH, 'bin')
DEFAULT_INPUT_FILE_PATH = 'input.txt'
INPUT_FILENAME = "input.dsv"

DISK_SPACE = 3 * 1024  # 3 Gb
SUBTASK_TIMEOUT = 3 * 60 * 60  # 3 h

KNOWN_VISITORS_LIST_DESKTOP = [
    'km-do',
    'km-do-and-top6',
    'km-top6-list',
    'km-top6-snippets',
    'km-top6',
    'km-vizitka',
    'km-offers-main',
    'km-offers-full-page',
    'catalog-grid',
    'catalog-list',
    'catalog-grid-snippet',
    'catalog-list-snippet',
    'catalog-grid-offer-snippet',
    'catalog-list-offer-snippet',
    'catalog-breadcrumbs',
    'catalog-controls',
    'catalog-filters',
    'catalog-intents',
    'catalog-legal-info',
    'catalog-questions-entrypoint',
    'catalog-recommend-carousel',
    'catalog-any-serp',
    'catalog-title',
    'catalog-seo',
]
KNOWN_VISITORS_LIST_TOUCH = [
    'catalog-snippet',
]

VISTOR_CHOICES = [(x, x) for x in KNOWN_VISITORS_LIST_DESKTOP] \
                 + [(x, x) for x in KNOWN_VISITORS_LIST_TOUCH]


class MarketDataGrabber(sdk2.Task):
    """
    Сравнение веб-элементов на различных стендах на большом количестве урлов
    """

    root_dir = ''
    app_src_dir = None
    grabber_work_dir = None
    grabber_getUrls_path = None
    input_filepath = None
    result_dir = None
    full_config_path = None
    input_num_lines = 0

    class Context(sdk2.Context):
        input_resource_ids = []
        child_tasks_ids = []
        result_resource_id = None

    class Parameters(sdk2.Task.Parameters):
        ubuntu_version = create_ubuntu_selector()
        node_version = create_node_selector()

        with sdk2.parameters.Group("GitHub репозиторий проекта") as github_repo_block:
            app_owner = sdk2.parameters.String(
                "Github owner",
                default_value=DEFAULT_GITHUB_OWNER,
            )

            app_repo = sdk2.parameters.String(
                "Github repo",
                required=True,
                default_value=DEFAULT_GITHUB_REPO
            )

            app_branch = sdk2.parameters.String(
                "Тестируемая ветка",
                required=True,
                default_value=DEFAULT_GITHUB_BRANCH
            )

        with sdk2.parameters.Group("Параметры data_grabber"):
            test_domain = sdk2.parameters.String(
                "Хост для тестирования",
                required=True,
            )

            control_domain = sdk2.parameters.String(
                "Хост для контроля",
                required=True,
            )

            worker_count = sdk2.parameters.Integer(
                "На сколько тасков разбить input",
                required=True,
                default_value=4
            )

            work_dir = sdk2.parameters.String(
                "Из какой директории проекта запускать тесты",
                required=True,
                default_value='market/platform.desktop'
            )

            config_path = sdk2.parameters.String(
                "Путь к конфигам из рабочей директории",
                required=True,
                default_value='configs/data-grabber.js'
            )

            visitor = sdk2.parameters.String(
                "Какие данные собирать",
                choices=VISTOR_CHOICES,
            )

        input_url = sdk2.parameters.String(
            "Input data url",
            description="Ссылка на файл с входными данными. При отсутствии будет пытаться вполнить YQL запрос.",
            required=False,
            default_value=None
        )

        input_limit = sdk2.parameters.Integer(
            "Сколько записей input-а использовать",
            description="Позволяет ограничить ",
            required=False,
            default_value=None,
        )

        with sdk2.parameters.Group('Environment') as environ_block:
            environ = sdk2.parameters.Dict('Environment variables')

    class Requirements(sdk2.Task.Requirements):
        dns = ctm.DnsType.DNS64
        disk_space = DISK_SPACE

    @sdk2.header()
    def header(self):
        resource_id = self.Context.result_resource_id

        if resource_id:
            resource = self.server.resource[resource_id].read()
            data = report_data(resource)
            report = {'<h3 id="checks-reports">Result resource</h3>': [format_header(**data)]}
            return report

    def on_enqueue(self):
        super(MarketDataGrabber, self).on_enqueue()
        setup_container(self)

    def on_prepare(self):
        env.export(self.Parameters.environ)
        assert self.Parameters.visitor, 'visitor пустой или не указан'

    def on_execute(self):
        with MetatronEnv(self, nodejs_version=self.Parameters.node_version):
            # ~/
            self.root_dir = str(self.path())
            # ~/marketfront
            self.app_src_dir = os.path.join(self.root_dir, self.Parameters.app_repo)
            # ~/marketfront/node_modules/@yandex-market/data-grabber/bin/getUrls.js
            self.grabber_getUrls_path = os.path.join(self.app_src_dir, DEFAULT_GRABBER_BIN_PATH, 'getUrls.js')
            # ~/marketfront/market/platform.desktop
            self.grabber_work_dir = os.path.join(self.app_src_dir, self.Parameters.work_dir)
            # ~/marketfront/market/platform.desktop/configs/data-grabber.js
            self.full_config_path = os.path.join(self.grabber_work_dir, self.Parameters.config_path)

            # ~/result
            self.result_dir = os.path.join(self.root_dir, "result")

            self._prepare()
            self._clone_repo()
            self._install_node_modules()
            self._download_input()
            self._prepare_input_resources()
            self._run_subtasks()

    def on_finish(self, prev_status, status):
        self._prepare_result_resource()

    def _prepare(self):
        if not os.path.exists(self.result_dir):
            os.mkdir(self.result_dir)

        if 'NODE_PATH' not in os.environ:
            os.environ['NODE_PATH'] = '.'

    def _clone_repo(self):
        with self.memoize_stage.clone_repo(max_runs=1):
            clone_repo(
                self.Parameters.app_owner,
                self.Parameters.app_repo,
                self.Parameters.app_branch,
                self.app_src_dir
            )

    def _install_node_modules(self):
        with self.memoize_stage.npm_install(max_runs=1), MetatronEnv(self, nodejs_version=self.Parameters.node_version):
            rich_check_call(
                ["npm", "run", "bootstrap"],
                task=self, alias="bootstrap", cwd=self.grabber_work_dir
            )
            # need to install peer dependencies for data-grabber
            data_grabber_dir = os.path.join(self.app_src_dir, DEFAULT_GRABBER_PATH)
            rich_check_call(
                ["npm", "install"],
                task=self, alias="npm i grabber", cwd=data_grabber_dir
            )

    def _download_input(self):
        with self.memoize_stage.download_input(max_runs=1):
            if self.Parameters.input_url is not None and str(self.Parameters.input_url).strip():
                logging.info("download input")

                self.input_filepath = os.path.join(self.grabber_work_dir, DEFAULT_INPUT_FILE_PATH)

                with requests.get(self.Parameters.input_url, stream=True) as r:
                    with open(self.input_filepath, 'wb') as f:
                        shutil.copyfileobj(r.raw, f)

                rich_check_call(
                    ["cat", self.input_filepath],
                    task=self, alias="cat_input", cwd=self.grabber_work_dir
                )

                logging.info("download input complete")
            else:
                logging.info("No input url specified. Will try to make an YQL query.")

                out_file_path = os.path.join(self.grabber_work_dir, "pathToInput.txt")

                rich_check_call(
                    [
                        "node",
                        self.grabber_getUrls_path,
                        "--config={}".format(self.full_config_path),
                        "--outPath={}".format(out_file_path),
                        "--visitor={}".format(self.Parameters.visitor)
                    ],
                    task=self, alias="get_urls", cwd=self.grabber_work_dir
                )

                with open(out_file_path) as f:
                    self.input_filepath = os.path.join(self.grabber_work_dir, f.readline())

                rich_check_call(
                    ["cat", self.input_filepath],
                    task=self, alias="cat_input", cwd=self.grabber_work_dir
                )

                logging.info("YQL query finished, result saved in {}".format(self.input_filepath))

            # save number of lines in input for later
            with open(self.input_filepath) as f:
                num_lines = sum(1 for line in f)

            # Exclude header line from input size
            self.input_num_lines = num_lines - 1

            if self.Parameters.input_limit is not None:
                try:
                    self.input_num_lines = min(self.input_num_lines, int(self.Parameters.input_limit))
                except ValueError:
                    # Значит передали пустую строку и не надо менять self.input_num_lines
                    pass

    def _prepare_input_resources(self):
        with self.memoize_stage.prepare_input(max_runs=1):
            logging.info("creating MARKET_EXPREIMENT_ANALYZER_DATA_GRABBER_INPUT resource")

            input_resources = []

            input_name = "input_0"
            # ~/marketfront/market/platform.desktop/in.tsv
            input_filepath = self.input_filepath
            # ~/marketfront/market/platform.desktop/input_0
            input_resource_dir = os.path.join(self.grabber_work_dir, input_name)
            # ~/marketfront/market/platform.desktop/input_0/input.dsv
            input_resource_filepath = os.path.join(input_resource_dir, INPUT_FILENAME)

            # create resource directory
            os.mkdir(input_resource_dir)

            # copy input file with name changed
            shutil.copy(input_filepath, input_resource_filepath)

            # create resource
            input_resource = MARKET_EXPERIMENT_ANALYZER_DATA_GRABBER_INPUT(
                self,
                "{} resource".format(input_name),
                input_resource_dir,
                type='market-analyzer-data-grabber-input',
                project=self.Parameters.app_repo,
                report_description=self.Parameters.app_repo,
                status=ctt.Status.SUCCESS,
                root_path=INPUT_FILENAME
            )

            input_resource_data = sdk2.ResourceData(input_resource)
            input_resource_data.ready()

            input_resources.append(input_resource)

            self.Context.input_resource_ids.append(input_resource.id)

    def _run_subtasks(self):
        with self.memoize_stage.run_subtasks(max_runs=1):
            worker_count = int(self.Parameters.worker_count)
            batch_size = int(math.ceil(self.input_num_lines / (1.0 * worker_count)))
            batch_start = 0

            for index in range(1, worker_count + 1):
                data_grabber_task = MarketDataGrabberRunner(
                    self,
                    ubuntu_version=self.Parameters.ubuntu_version,
                    node_version=self.Parameters.node_version,
                    app_owner=self.Parameters.app_owner,
                    app_repo=self.Parameters.app_repo,
                    app_branch=self.Parameters.app_branch,
                    input_resource_id=self.Context.input_resource_ids[0],
                    input_part_number=index,
                    test_domain=self.Parameters.test_domain,
                    control_domain=self.Parameters.control_domain,
                    visitor=self.Parameters.visitor,
                    work_dir=self.Parameters.work_dir,
                    config_path=self.Parameters.config_path,
                    skip=batch_start,
                    limit=batch_size,
                    environ=self.Parameters.environ,
                    priority=self.Parameters.priority,
                )
                batch_start += batch_size

                data_grabber_task.enqueue()

                self.Context.child_tasks_ids.append(data_grabber_task.id)

            raise sdk2.WaitTask(
                self.Context.child_tasks_ids,
                ctt.Status.Group.FINISH | ctt.Status.Group.BREAK,
                wait_all=True,
                timeout=SUBTASK_TIMEOUT
            )

    def _prepare_result_resource(self):
        if not ('child_tasks_ids' in self.Context):
            return

        # create result resource
        result_resource = MARKET_EXPERIMENT_ANALYZER_DATA_GRABBER_RESULT(
            self,
            "market experiment analyzer data grabber result resource",
            self.result_dir,
            type='market-analyzer-data-grabber-result',
            project=self.Parameters.app_repo,
            report_description=self.Parameters.app_repo,
            status=ctt.Status.SUCCESS,
        )

        subtasks = sdk2.Task.find(
            parent=self,
            children=True,
            id=self.Context.child_tasks_ids
        ).limit(self.Parameters.worker_count)

        if subtasks.count == 0:
            return

        resources = list(
            sdk2.Resource.find(
                resource_type=MARKET_EXPERIMENT_ANALYZER_DATA_GRABBER_OUTPUT,
                task=subtasks
            ).limit(self.Parameters.worker_count)
        )

        # copy output resource to result resource
        for output_resource in resources:
            logging.info("copy output resource '{}' to result resource.".format(output_resource.root_path))

            resource_data = sdk2.ResourceData(output_resource)

            output_filepath = str(resource_data.path)

            # Can be replaced with shutil.copytree(src, dst, dirs_exist_ok=True) in python 3.8
            copy_tree(output_filepath, self.result_dir)

        # result resource ready
        result_resource_data = sdk2.ResourceData(result_resource)

        # Don't mark resource as ready if result dir is empty
        if not os.listdir(self.result_dir):
            result_resource_data.broken()
            return

        result_resource_data.ready()

        if os.path.exists('result/index.html'):
            http_report_url = '{}/{}'.format(get_resource_http_proxy_link(result_resource), 'index.html')
            self.set_info(
                "Отчёт: <a href=\"{url}\">{url}</a>".format(url=http_report_url),
                do_escape=False
            )

        self.Context.result_resource_id = result_resource.id
