# -*- coding: utf-8 -*-

import datetime
import os
import logging
import shutil
import stat
import hashlib
import json
from dateutil import parser
import calendar
import time

from sandbox.common.errors import TaskFailure
from sandbox.common.types import resource as ctr
from sandbox.sdk2 import Attributes
from sandbox.sdk2 import Resource
from sandbox.sdk2 import ResourceData
from sandbox.sdk2 import Task
from sandbox.sdk2 import path
from sandbox.sdk2 import parameters
from sandbox.sdk2.helpers import ProcessLog
from sandbox.sdk2.helpers import subprocess as sp
from sandbox.projects.common import task_env
from sandbox.projects.common.nanny import nanny
from sandbox.projects.common.ya_deploy import release_integration


from sandbox.projects.market.sre.CreateResourceWithDebPackage import MarketSreDebPackage


class MarketDataGetterSourceData(Resource):
    target_bundle = Attributes.String("Bundle resource type", required=True)
    environment_type = Attributes.String("Environment type", required=True)
    auto_backup = True


class RunMarketDataGetter(nanny.ReleaseToNannyTask2, release_integration.ReleaseToYaDeployTask2, Task):
    """Запуск market-data-getter и подготовка результирующих ресурсов

    Эта задача запускает market-data-getter для загрузки только указанных ресурсов и
    сохраняет полученные данные как ресурс указанного типа.
    В случае успеха будет автоматически запущен релиз в Nanny
    """
    class Requirements(task_env.TinyRequirements):
        # see RMDEV-2285
        disk_space = 20 * 1024   # 20 Gb (15Gb detected in some runs)

    class Parameters(Task.Parameters):
        target_service = parameters.String("Target service name", required=True)
        all_resources = [(r.name, r.name) for r in Resource]
        resource_type_name = parameters.String("Type of generated resource", choices=all_resources,
                                               required=True)
        resources = parameters.String("List of data getter resources separated by new line",
                                      required=True, multiline=True)
        with parameters.String("Environment type", required=True) as environment_type:
            environment_type.values["stable"] = environment_type.Value("STABLE")
            environment_type.values["prestable"] = environment_type.Value("PRESTABLE")
            environment_type.values["testing"] = environment_type.Value("TESTING")

        with parameters.Group("Required resources") as resource_block:
            use_getter_latest_version = parameters.Bool("Use latest version of resourcce with data-getter package",
                                                        default=True)
            with use_getter_latest_version.value[False]:
                data_getter_resource = parameters.Resource(
                    "Resource with data-getter package",
                    resource_type=MarketSreDebPackage,
                    state=ctr.State.READY,
                    attrs=dict(package_name="yandex-market-data-getter"),
                    required=False
                )
            use_xmllint_latest_version = parameters.Bool("Use latest version of resource with libxml2-utils package",
                                                         default=True)
            with use_xmllint_latest_version.value[False]:
                xmllint_resource = parameters.Resource(
                    "Resource with libxml2-utils package",
                    resource_type=MarketSreDebPackage,
                    state=ctr.State.READY,
                    attrs=dict(package_name="libxml2-utils"),
                    required=False
                )

        with parameters.Group("Additional parameters") as additional_block:
            create_nanny_release = parameters.Bool("Create Nanny release on success", default=True)
            create_deploy_release = parameters.Bool("Create Y.Deploy release on success", default=False)
            create_resource_bundle = parameters.Bool("Create resource bundle on success", default=True)
            archive_bundle = parameters.Bool("Create archive instead of raw files", default=False)
            create_stat_file = parameters.Bool("Add to bundle stat file", default=True)
            create_meta_file = parameters.Bool("Add to bundle meta file", default=False)
            save_cache = parameters.Bool("Save cache", default=False)
            with save_cache.value[True]:
                cache_ttl = parameters.Integer("Cache TTL in days", default=1)
            use_resource_cache = parameters.Bool("Use cache from last run to speed up", default=True)
            yt_token_vault = parameters.Vault("Vault secret contains YT token", required=False)
            arc_token_vault = parameters.Vault("Vault secret contains ARC_TOKEN", required=False)

    def on_execute(self):
        if self.Parameters.resource_type_name not in Resource:
            raise TaskFailure('Invalid value for \"{}\" input parameter. Unknown resource type: {}'.format(
                RunMarketDataGetter.Parameters.resource_type_name.description,
                self.Parameters.resource_type_name))

        result_dir = os.path.join(os.getcwd(), "result-dir")
        bundle_dir = os.path.join(os.getcwd(), "bundle-dir", self.Parameters.target_service)

        start_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        if self.Parameters.use_getter_latest_version:
            data_getter_resource = MarketSreDebPackage.find(
                state=ctr.State.READY,
                attrs=dict(package_name="yandex-market-data-getter"),
            ).first()
        else:
            data_getter_resource = self.Parameters.data_getter_resource
        data_getter_data_path = self._extract_package_data_from_resource('data-getter', data_getter_resource)

        if self.Parameters.use_xmllint_latest_version:
            xmllint_resource = MarketSreDebPackage.find(
                state=ctr.State.READY,
                attrs=dict(package_name="libxml2-utils"),
            ).first()
        else:
            xmllint_resource = self.Parameters.xmllint_resource
        xmllint_data_path = self._extract_package_data_from_resource("xmllint", xmllint_resource)

        config_file = self._generate_config_file()
        target_service_list = self._get_target_service_list()

        data_getter_runner = self._DataGetterRunner(config_file, result_dir, data_getter_data_path, xmllint_data_path,
                                                    self.Parameters.environment_type, self.Parameters.yt_token_vault, self.Parameters.arc_token_vault)

        if self.Parameters.use_resource_cache:
            self._sync_result_dir(result_dir, self.Parameters.resource_type_name, target_service_list,
                                  self.Parameters.environment_type)

        for service_name in target_service_list:
            recent_generation = data_getter_runner.get_recent_generation(service_name)

            data_getter_runner.get_data(service_name)

            if recent_generation and data_getter_runner.get_recent_generation(service_name) != recent_generation:
                data_getter_runner.remove_generation(service_name, recent_generation)

        data_getter_runner.create_bundle(bundle_dir, self.Parameters.create_stat_file, self.Parameters.create_meta_file)

        if self.Parameters.save_cache:
            ResourceData(MarketDataGetterSourceData(self, "Source data", result_dir,
                                                    target_bundle=self.Parameters.resource_type_name,
                                                    environment_type=self.Parameters.environment_type,
                                                    ttl=self.Parameters.cache_ttl)).ready()

        if self.Parameters.create_resource_bundle:
            resource_description = "data bundle for {}, generated {} in {} env".format(
                self.Parameters.target_service,
                start_time,
                self.Parameters.environment_type
            )

            # Move resource data out from "bundle-dir" subdir to the working directory
            src = path.Path(bundle_dir)
            dst = self.path(src.name)
            logging.info("Moving '%s'->'%s'", src, dst)
            src.rename(dst)
            if self.Parameters.archive_bundle:
                archive_name = "{}-data-getter-bundle.tar.gz".format(self.Parameters.target_service)
                with ProcessLog(self, logger=logging.getLogger("tar")) as pl:
                    cmd = ["tar", "-czvf", archive_name, "-C", os.getcwd(), self.Parameters.target_service]
                    p = sp.Popen(cmd, stdout=pl.stdout, stderr=pl.stderr)
                    p.wait()
                    if p.returncode != 0:
                        logging.info("Error while extracting. returncode: %s", p.returncode)
                        raise TaskFailure('Error archiving bundle.')
                dst = os.path.join(os.getcwd(), archive_name)

            ResourceData(Resource[self.Parameters.resource_type_name](self, resource_description, str(dst),
                                                                      env=self.Parameters.environment_type,
                                                                      date=start_time)).ready()

    def on_success(self, prev_status):
        Task.on_success(self, prev_status)

        # https://st.yandex-team.ru/CSADMIN-24017
        if self.owner != 'MARKET':
            logging.warning("Owner is %s. Skip any futher action", self.owner)
        else:
            additional_parameters = dict(
                release_comments=None,
                release_status=self.Parameters.environment_type,
                release_subject="Auto release from sandbox data-getter",
                releaser=self.author
            )
            if self.Parameters.create_nanny_release:
                nanny.ReleaseToNannyTask2.on_release(self, additional_parameters)
            if self.Parameters.create_deploy_release:
                release_integration.ReleaseToYaDeployTask2.on_release(self, additional_parameters)

    def mark_released_resources(self, status, ttl=7):
        return Task.mark_released_resources(self, status, ttl)

    def _sync_result_dir(self, result_dir, target_bundle, target_service_list, environment_type):
        resource = MarketDataGetterSourceData.find(
            state=ctr.State.READY,
            attrs=dict(target_bundle=target_bundle, environment_type=environment_type),
        ).first()
        if not resource:
            return

        source_dir = str(ResourceData(resource).path)

        for service_name in os.listdir(source_dir):
            if service_name in target_service_list:
                source_service_path = os.path.join(source_dir, service_name)
                target_service_path = os.path.join(result_dir, service_name)
                shutil.copytree(source_service_path, target_service_path, symlinks=True)

        self._change_permission(result_dir,
                                dir_mode=stat.S_IRWXU | stat.S_IRWXG | stat.S_IROTH | stat.S_IXOTH,
                                file_mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH)

    @staticmethod
    def _change_permission(directory, dir_mode, file_mode):
        os.chmod(directory, dir_mode)
        for root, dirs, files in os.walk(directory):
            for sub_dir in dirs:
                os.chmod(os.path.join(root, sub_dir), dir_mode)
            for file_name in files:
                os.chmod(os.path.join(root, file_name), file_mode)

    def _extract_package_data_from_resource(self, name, resource):
        package_path = ResourceData(resource).path
        data_path = str(path.Path() / name)

        with ProcessLog(self, logger=logging.getLogger("dpkg_extract_{}".format(name))) as pl:
            sp.Popen("dpkg -x {} {}".format(package_path, data_path),
                     shell=True, stdout=pl.stdout, stderr=sp.STDOUT).wait()

        return data_path

    def _generate_config_file(self):
        config_file = str(path.Path() / "data-getter.conf")
        with open(config_file, "w") as fp:
            fp.write(self.Parameters.resources)

        return config_file

    def _get_target_service_list(self):
        result = set()
        for line in self.Parameters.resources.split("\n"):
            if line.strip():
                result.add(line.split(":")[0])
        return result

    class _DataGetterRunner(object):

        def __init__(
            self,
            config_file,
            output_dir,
            data_getter_data_path,
            xmllint_data_path,
            environment_type,
            yt_token_vault,
            arc_token_vault,
        ):
            self.data_getter_data_path = data_getter_data_path
            self.xmllint_data_path = xmllint_data_path
            self.config_file = config_file
            self.environment_type = environment_type
            self.output_dir = output_dir

            self.base_command = "{0}/usr/bin/market_data_getter " \
                                "--dir={1} " \
                                "--validators-dir={0}/usr/lib/yandex/getter/validators/ " \
                                "--confd={2} " \
                                "--{3}".format(data_getter_data_path, output_dir, config_file, environment_type)

            self.my_env = os.environ.copy()
            self.my_env["PATH"] = "{}/usr/bin:{}".format(xmllint_data_path, self.my_env["PATH"])
            if yt_token_vault:
                self.my_env["YT_TOKEN"] = yt_token_vault.data()
            if arc_token_vault:
                self.my_env["ARC_TOKEN"] = arc_token_vault.data()

        def get_data(self, service_name):
            if self._run_command("get {}".format(service_name)) != 0:
                raise TaskFailure('Get command of market-data-getter return non zero exit code')

        def create_bundle(self, bundle_dir, is_stat_file_required, is_meta_file_required):
            if self._run_command("copy {} {}".format(self.config_file, bundle_dir)) != 0:
                raise TaskFailure('Copy command of market-data-getter return not zero exit code')

            if is_stat_file_required:
                self._create_state(bundle_dir)
            if is_meta_file_required:
                self._create_meta_file(bundle_dir)

        def _run_command(self, command_line):
            with ProcessLog(self, logger=logging.getLogger("market_data_getter")) as pl:
                return sp.Popen("{} {}".format(self.base_command, command_line),
                                shell=True, stdout=pl.stdout, stderr=sp.STDOUT, env=self.my_env).wait()

        def get_recent_generation(self, service):
            recent_generation_file_path = os.path.join(self.output_dir, service, "recent-generation")
            if not os.path.exists(recent_generation_file_path):
                return None
            with open(recent_generation_file_path, "r") as fp:
                return fp.readline()

        def remove_generation(self, service, generation):
            generation_path = os.path.join(self.output_dir, service, generation)
            if not os.path.exists(generation_path):
                return

            shutil.rmtree(generation_path)

        def _create_state(self, bundle_dir):
            logging.info("Create state file for bundle")
            path_f = {}
            state_info = self._get_all_state_files()
            for service in os.listdir(bundle_dir):
                service_dir = os.path.join(bundle_dir, service)
                service_state = state_info.get(service)
                if not service_state:
                    service_state = {}
                for root, dirs, files in os.walk(service_dir):
                    for f in files:
                        full_path = os.path.join(root, f)
                        related_path = os.path.relpath(full_path, service_dir)

                        precalc = service_state.get(related_path)
                        if precalc:
                            logging.debug('md5 for {} founded in MD5SUMS'.format(full_path))
                            state = self._extract_precalc_state(precalc)
                        else:
                            logging.debug('Calculate md5 for {}'.format(full_path))
                            state = {'modification_time': 0, 'md5': self._md5(full_path)}
                        path_f[os.path.join(service, related_path)] = state

            with open(os.path.join(bundle_dir, "stat.json"), 'w') as outfile:
                logging.info("Write state to {}".format(outfile.name))
                json.dump(path_f, outfile, sort_keys=True, indent=4)

        def _get_all_state_files(self):
            state_info = {}
            for service in os.listdir(self.output_dir):
                data = self._get_state_file_for_service(service)
                if data:
                    state_info[service] = data

            return state_info

        def _get_state_file_for_service(self, service):
            state_file = os.path.join(self.output_dir, service, "recent/state.json")
            if os.path.exists(state_file):
                with open(state_file, "r") as state_file_p:
                    return json.load(state_file_p)

            return {}

        @staticmethod
        def _md5(fname):
            hash_md5 = hashlib.md5()
            with open(fname, "rb") as f:
                for chunk in iter(lambda: f.read(4096), b""):
                    hash_md5.update(chunk)
            return hash_md5.hexdigest()

        @staticmethod
        def _extract_precalc_state(precalc):
            state = {"md5": precalc["md5"]}
            if "Last-Modified" in precalc:
                date_obj = parser.parse(precalc["Last-Modified"])
                state["modification_time"] = int(calendar.timegm(date_obj.timetuple()))
            else:
                state["modification_time"] = 0
            if "yt_gen" in precalc:
                state["yt_gen"] = precalc["yt_gen"]
            return state

        @staticmethod
        def _create_meta_file(bundle_dir):
            content = {
                'creation_time': int(time.time())
            }
            with open(os.path.join(bundle_dir, 'meta.json'), 'w') as fd:
                json.dump(content, fd, indent=4)
