import os
import io
import shutil
import logging
import tarfile
from distutils import version

from concurrent import futures

from sandbox import common
import sandbox.common.types.misc as ctm
import sandbox.common.types.task as ctt
import sandbox.common.types.client as ctc
import sandbox.common.types.resource as ctr

from sandbox import sdk2
from sandbox.sdk2.helpers import misc as helpers
from sandbox.sdk2.helpers import subprocess as sp

import sandbox.projects.sandbox.resources as sb_resources

from sandbox.projects.common.arcadia import sdk as arcadiasdk
from sandbox.projects.common.constants import constants as arcadiasdk_constants


VIRTUALENV_BUILDER_DIR = "sandbox/scripts/tools/virtualenv-builder"
VIRTUALENV_BUILDER_BINARY = os.path.join(VIRTUALENV_BUILDER_DIR, "virtualenv-builder")

DEBPACKAGES_LIST = os.path.join(VIRTUALENV_BUILDER_DIR, "packages", "linux.txt")
REQUIREMENTS_LIST = "sandbox/requirements.txt"
MONGOENGINE_SOURCES = "contrib/python/mongoengine/py2"

ARCHIVE_FILENAME_TEMPLATE = "sandbox-venv-{platform}.{revision}.tar.gz"

ADDED = "added"
CHANGED = "changed"
REMOVED = "removed"


def parse_requirements(text):
    d = {}
    for line in text.splitlines():
        line = line.split("#")[0].strip()
        try:
            package, pkg_version = line.split("==")
            d[package] = pkg_version
        except ValueError:
            continue
    return d


def make_arcadia_url(path=""):
    return os.path.join(sdk2.svn.Arcadia.ARCADIA_TRUNK_URL, path)


class BuildSandboxDeps2(sdk2.Task):
    class Parameters(sdk2.Parameters):
        kill_timeout = 3600
        release_to = "sandbox-releases@yandex-team.ru"
        revision = sdk2.parameters.Integer(
            "Desired revision of requirements.txt / debpackages list",
            description="Leave empty to build from trunk", default=None
        )
        platforms = sdk2.parameters.List(
            "Target platforms",
            default=[
                "linux_ubuntu_10.04_lucid",
                "linux_ubuntu_12.04_precise",
                "linux_ubuntu_14.04_trusty",
                "linux_ubuntu_16.04_xenial",
                "linux_ubuntu_18.04_bionic",
                "linux_ubuntu_20.04_focal",
                # "osx_10.12_sierra",  # FIXME: SANDBOX-6976
                "osx_10.13_high_sierra",
                "osx_10.14_mojave",
            ]
        )
        reuse_script = sdk2.parameters.Bool(
            "Use previously built script for every platform if its revision hasn't changed",
            description="Uncheck if it's broken and needs rebuild", default=True
        )

        with sdk2.parameters.Output():
            arcadia_revision = sdk2.parameters.Integer("Revision of Arcadia the environment was built on")
            installed_requirements = sdk2.parameters.JSON(
                "Packages installed for this platform",
                description="Only filled for child tasks (or a single task itself)",
                default={}
            )
            requirements_difference = sdk2.parameters.JSON(
                "Per-platform difference between current and previous builds",
                description="For master task, there's summary for all platforms of children tasks",
                default={}
            )

    class Requirements(sdk2.Requirements):
        client_tags = ctc.Tag.Group.LINUX | ctc.Tag.Group.OSX
        disk_space = 3 * 1024

    class Context(sdk2.Context):
        tasks = []
        revision = None

    @property
    def is_multiplatform(self):
        return len(self.Parameters.platforms) > 1

    @property
    def single_platform(self):
        return self.Parameters.platforms[0]

    @property
    def building_on_linux(self):
        return not self.is_multiplatform and "linux" in self.single_platform

    def on_save(self):
        if self.building_on_linux:
            self.Requirements.privileged = True
            self.Requirements.dns = ctm.DnsType.DNS64
            self.Requirements.ramdrive = ctm.RamDrive(ctm.RamDriveType.TMPFS, 8 * 1024, None)

    def on_enqueue(self):
        if self.is_multiplatform and self.parent is not None:
            raise common.errors.TaskFailure("Fork bomb detected")

        if not self.is_multiplatform:
            self.Requirements.client_tags &= common.platform.PLATFORM_TO_TAG.get(self.single_platform, ctc.Tag.VOID)

    @common.utils.singleton_property
    def build_results_dir(self):
        return self.path("build-artifacts")

    @common.utils.singleton_property
    def builder_path(self):
        return self.build_results_dir / VIRTUALENV_BUILDER_BINARY

    @common.utils.singleton_property
    def temporary_storage(self):
        if self.building_on_linux:
            return self.ramdrive.path
        return self.path()

    @common.utils.singleton_property
    def arcadia(self):
        return self.temporary_storage / "arcadia"

    @common.utils.singleton_property
    def build_cache(self):
        return self.temporary_storage / "build_cache"

    @common.utils.singleton_property
    def venv_workdir(self):
        return self.temporary_storage / "venv-workdir"

    def install_debpackages(self):
        debpackages_list = str(self.arcadia / DEBPACKAGES_LIST)
        with open(debpackages_list, "r") as fd:
            packages = sorted(_.strip() for _ in fd.read().splitlines())

        logging.debug("Installing packages: %s", " ".join(packages))
        os.environ["DEBIAN_FRONTEND"] = "noninteractive"
        with sdk2.helpers.ProcessLog(self, logger="apt-get") as pl:
            sp.check_call(["apt-get", "update"], stdout=pl.stdout, stderr=sp.STDOUT)
            sp.check_call(
                ["apt-get", "install", "--yes", "--force-yes"] + packages,
                stdout=pl.stdout, stderr=sp.STDOUT
            )

    def on_prepare(self):
        pass

    def on_execute(self):
        # Freeze Arcadia revision
        if not self.Parameters.arcadia_revision:
            revision = self.Parameters.revision
            if not revision:
                info = sdk2.svn.Arcadia.info(make_arcadia_url())
                revision = info["commit_revision"]
            self.Parameters.arcadia_revision = revision

        if self.is_multiplatform:
            with helpers.ProgressMeter("Launching a subtask for every platform specified"):
                self.work_with_subtasks()  # raises WaitTask -- be careful here

            with helpers.ProgressMeter("Calculating difference between this and previous release"):
                self.calculate_difference()

        else:
            with helpers.ProgressMeter("Updating pieces of Arcadia"):
                sdk2.svn.Arcadia.checkout(
                    make_arcadia_url(), str(self.arcadia), sdk2.svn.Arcadia.Depth.IMMEDIATES,
                    revision=self.Parameters.arcadia_revision
                )
                for path in (
                    DEBPACKAGES_LIST,
                    REQUIREMENTS_LIST,
                    MONGOENGINE_SOURCES,
                ):
                    sdk2.svn.Arcadia.update(
                        str(self.arcadia / path), revision=self.Parameters.arcadia_revision, parents=True
                    )

            with helpers.ProgressMeter("Building/downloading virtualenv-builder"):
                self.build_virtualenv_builder()

            if self.building_on_linux:
                with helpers.ProgressMeter("Installing Debian packages"):
                    self.install_debpackages()

            self.build_virtualenv()  # ProgressMeters are inside

            with helpers.ProgressMeter("Calculating difference between this and previous release"):
                self.calculate_difference()

    def build_virtualenv_builder(self):
        # Don't rebuild the script if it hasn't changed -- save some time
        builder_info = sdk2.svn.Arcadia.info(make_arcadia_url(VIRTUALENV_BUILDER_DIR))
        builder_revision = builder_info.get("commit_revision")
        if builder_revision and self.Parameters.reuse_script:
            resource = sb_resources.SandboxEnvironmentBuilder.find(
                owner=self.owner,
                attrs=dict(
                    revision=builder_revision,
                    platform=self.single_platform,
                ),
                state=ctr.State.READY,
            ).order(-sdk2.Resource.id).first()
            if resource:
                logging.debug("Using pre-built virtualenv builder from resource #%d", resource.id)
                self.builder_path.parent.mkdir(exist_ok=True, parents=True)
                self.builder_path.symlink_to(str(sdk2.ResourceData(resource).path))
                return

        # Build and link against musl (https://www.musl-libc.org/faq.html) instead of libc.
        # Among other features, this means that we can build and run the executable on Ubuntu 10.04 Lucid. Ha
        musl = set(common.config.Registry().client.tags) & set(ctc.Tag.Group.LINUX)
        arcadiasdk.do_build(
            build_system=arcadiasdk_constants.YMAKE_BUILD_SYSTEM,
            source_root=str(self.arcadia),
            targets=[VIRTUALENV_BUILDER_DIR],
            results_dir=str(self.build_results_dir),
            clear_build=False,
            checkout=True,
            revision=self.Parameters.arcadia_revision,
            build_dir=str(self.build_cache),
            musl=musl,
        )
        sdk2.ResourceData(sb_resources.SandboxEnvironmentBuilder(
            self,
            path=str(self.builder_path),
            description="Sandbox virtualenv builder r{} for {}".format(builder_revision, self.single_platform),
            revision=builder_revision,
            platform=self.single_platform,
        )).ready()

    def build_virtualenv(self):
        # The mongoengine package in Arcadia is patched for full queryset commenting support,
        # and the patches are missing in upstream; see: SANDBOX-5317,
        # https://a.yandex-team.ru/arc/history/trunk/arcadia/contrib/python/mongoengine/mongoengine/queryset/base.py?rev=3615133
        # To prevent removal of this patch in the future, a test has been added in r5323250
        mongoengine_path = str(self.arcadia / MONGOENGINE_SOURCES)
        requirements = str(self.arcadia / REQUIREMENTS_LIST)

        # virtualenv-builder saves list of requirements into its working directory
        installed_packages = self.venv_workdir / "venv" / "requirements.txt"

        virtualenv = self.path(ARCHIVE_FILENAME_TEMPLATE.format(
            platform=self.single_platform, revision=self.Parameters.arcadia_revision
        ))

        with helpers.ProgressMeter("Building virtual environment"):
            with sdk2.helpers.ProcessLog(self, logger="virtualenv-builder") as log:
                sp.check_call(
                    [
                        str(self.builder_path),
                        "--workdir", str(self.venv_workdir),
                        "--requirements", requirements,
                        "--revision", str(self.Parameters.arcadia_revision),
                        "--log-directory", str(self.log_path("venv-logs")),
                        "--path", mongoengine_path,
                        "--output", str(virtualenv),
                    ],
                    stdout=log.stdout, stderr=sp.STDOUT
                )

        if not virtualenv.exists():
            raise common.errors.TaskFailure(
                "Failed to discover virtualenv archive built by the tool (should be at {}, but there's none!)".format(
                    str(virtualenv)
                )
            )

        if installed_packages.exists():
            self.Parameters.installed_requirements = parse_requirements(installed_packages.read_text())

        with helpers.ProgressMeter("Cooking virtualenv as Sandbox resources"):
            duplicate_virtualenv = virtualenv.parent / "prestable-{}".format(virtualenv.name)
            shutil.copy(str(virtualenv), str(duplicate_virtualenv))

            for future_release_tag, resource_path in (
                (ctt.ReleaseStatus.PRESTABLE, duplicate_virtualenv),
                (ctt.ReleaseStatus.STABLE, virtualenv),
            ):
                sdk2.ResourceData(sb_resources.SandboxDeps(
                    self.parent or self,
                    "Sandbox {} virtualenv r{} for {}".format(
                        future_release_tag.upper(), self.Parameters.arcadia_revision, self.single_platform
                    ),
                    path=resource_path,
                    platform=self.single_platform,
                    version=self.Parameters.arcadia_revision,
                    to_be_released=future_release_tag,
                )).ready()

    def work_with_subtasks(self):
        def run_builder(platform_name):
            task_class = type(self)
            task = task_class(
                self,
                description="Build virtualenv for {}".format(platform_name),
                revision=self.Parameters.arcadia_revision,
                reuse_script=self.Parameters.reuse_script,
                platforms=platform_name,
            )
            task.Requirements.tasks_resource = self.Requirements.tasks_resource
            return task.save().enqueue().id

        with self.memoize_stage.run_tasks:
            with futures.ThreadPoolExecutor(max_workers=len(self.Parameters.platforms)) as pool:
                pending_results = [
                    pool.submit(run_builder, p)
                    for p in self.Parameters.platforms
                ]
                for f in futures.as_completed(pending_results):
                    exc = f.exception()
                    if exc:
                        logging.error("Failed to launch a subtask: %s", exc)
                    else:
                        self.Context.tasks.append(f.result())

            raise sdk2.WaitTask(self.Context.tasks, ctt.Status.Group.FINISH | ctt.Status.Group.BREAK, wait_all=True)

        children = list(self.find(id=self.Context.tasks))
        if not all(task.status in ctt.Status.Group.SUCCEED for task in children):
            raise common.errors.TaskFailure("Failed to build virtualenv for certain platforms -- see \"Children\" tab ")

    def __calculate_difference_for_archive(self, archive):
        try:
            logging.debug("Building difference for resource #%d", archive.id)
            previous_archives = sb_resources.SandboxDeps.find(
                state=ctr.State.READY,
                attrs=dict(
                    platform=archive.platform,
                    released=ctt.ReleaseStatus.STABLE,
                )
            ).order(-sdk2.Resource.id).limit(2)
            previous_archives = [r for r in previous_archives if r.task_id != archive.task_id]

            # We're always interested in the original task that built the current package for us
            builder_tid = archive.from_task or archive.task_id
            previous_packages = {}
            if previous_archives:
                previous_archive = previous_archives[0]
                prev_tid = previous_archive.from_task or previous_archive.task_id
                logging.debug(
                    "Platform %s; previous task #%d / resource #%d VS current task #%d / resource #%d",
                    archive.platform, prev_tid, previous_archive.id, builder_tid, archive.id
                )

                try:
                    # For further builds, we still want to retrieve original builder tasks
                    task = sdk2.Task[prev_tid]
                    # Easy way: check if it's been cooked by BUILD_SANDBOX_DEPS_2 task itself
                    if task.type == self.type:
                        previous_packages = task.Parameters.installed_requirements
                except common.errors.UnknownTaskType:
                    logging.warning("The code for task #%d has been removed", prev_tid)
                except common.errors.TaskNotFound:
                    logging.warning("Task #%s is not found", prev_tid)

                # If it's an old task or the archive has been uploaded manually by someone, download the resource and
                # see what's inside -- the builder leaves a dump of `pip freeze` inside the environment's directory
                if not previous_packages:
                    logging.info("Reading packages info from the resource")
                    venv_archive_path = sdk2.ResourceData(previous_archive).path
                    base_path = self.path("extracted-{}".format(venv_archive_path.name))
                    with tarfile.open(str(venv_archive_path)) as tar:
                        tar.extractall(str(base_path))

                    requirements = base_path / "requirements.txt"
                    if requirements.exists():
                        previous_packages = parse_requirements(requirements.read_text())
                    else:
                        # Luckily, pip is always a script, so we can download it from an OS X host and run right away
                        cmd = [str(base_path / "bin" / "pip"), "freeze"]
                        out, err = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE).communicate()
                        logging.debug("Ran \"pip freeze\" on previous archive (%s); it said: %r", archive.platform, err)
                        previous_packages = parse_requirements(out)

            local_difference = {
                ADDED: {},
                CHANGED: {},
                REMOVED: {},
            }

            builder = sdk2.Task[builder_tid]
            for package in builder.Parameters.installed_requirements.keys() + previous_packages.keys():
                previous_version = previous_packages.get(package)
                current_version = builder.Parameters.installed_requirements.get(package)
                if previous_version is None and current_version is not None:
                    local_difference[ADDED][package] = current_version
                elif previous_version is not None and current_version is None:
                    local_difference[REMOVED][package] = previous_version
                elif version.LooseVersion(previous_version) != version.LooseVersion(current_version):
                    local_difference[CHANGED][package] = "{} -> {}".format(previous_version, current_version)

            return archive.platform, local_difference

        except:
            logging.error("Failed to build difference for platform %s -- be careful!", archive.platform, exc_info=True)
            return archive.platform, None

    def calculate_difference(self):
        """
        For every package, find the environment built for the same platform and figure what has changed,
        going from the easiest way up to more elaborate ones

        :return: difference between two releases of a virtual environment (see docstring for example)
        """

        """
        Example return value:
        {
            "linux_ubuntu_10.04_lucid": {
                "added": {
                    "six": "1.0.3",
                },
                "changed": {
                    "py": "1.2.5 -> 1.2.6",
                },
                "removed": {
                    "pytest": "4.0.1",
                },
            },
        }
        """

        difference = {}

        attrs = dict(
            to_be_released=ctt.ReleaseStatus.STABLE
        )
        if not self.is_multiplatform:
            attrs.update(platform=self.single_platform)

        archives = list(sb_resources.SandboxDeps.find(
            state=ctr.State.READY,
            # This is because virtual environment archives are registered for a parent task
            task_id=self.parent.id if self.parent is not None else self.id,
            attrs=attrs,
        ).limit(1000))

        with futures.ThreadPoolExecutor(max_workers=len(archives)) as pool:
            pending_results = [
                pool.submit(self.__calculate_difference_for_archive, archive)
                for archive in archives
            ]
            for f in futures.as_completed(pending_results):
                platform, local_difference = f.result()
                difference[platform] = local_difference

        self.Parameters.requirements_difference = difference
        return difference

    @property
    def release_template(self):
        revision = "r{}".format(self.Parameters.arcadia_revision)

        changelog = io.StringIO()
        for platform in self.Parameters.platforms:
            changelog.write(platform)
            changelog.write(u":\n")

            changes = self.Parameters.requirements_difference.get(platform)
            if not changes:
                changelog.write(u"  (!) failed to build changelog -- be careful\n\n")
                continue

            for key, list_marker in (
                (ADDED, "(+)"),
                (CHANGED, "(*)"),
                (REMOVED, "(-) REMOVED"),  # attract extra attention
            ):
                packages = changes.get(key)
                if not packages:
                    continue

                for package, pkg_version in sorted(packages.items()):
                    changelog.write(u" ".join((" ", list_marker, package, pkg_version)))  # extra indent
                    changelog.write(u"\n")
            changelog.write(u"\n")

        return sdk2.ReleaseTemplate(
            [self.Parameters.release_to],
            "Virtual environment {}".format(revision),
            changelog.getvalue(),
            [
                ctt.ReleaseStatus.STABLE,
                ctt.ReleaseStatus.PRESTABLE,
                ctt.ReleaseStatus.CANCELLED,
            ]
        )

    def on_release(self, parameters_):
        """
        Release every package to its respective release tag, or reset its release status on CANCELLED
        """
        super(BuildSandboxDeps2, self).on_release(parameters_)
        destination = parameters_["release_status"]
        for resource in sb_resources.SandboxDeps.find(task=self, state=ctr.State.READY).limit(0):
            if destination == ctt.ReleaseStatus.CANCELLED:
                del resource.released
            else:
                if (
                    # when releasing to prestable, only release packages built for prestable
                    resource.to_be_released == destination or
                    # otherwise, release both versions of packages into respective release tags
                    destination == ctt.ReleaseStatus.STABLE
                ):
                    resource.released = resource.to_be_released
