"""
Hermetic Python Environment (HPE) is a bundle containing Python and some installed packages.
It is similar to virtualenv but the last one is not fully hermetic.

HPE has the following properties:
1. Hermeticity: does not depend on any other Python on machine.
2. Almost portability: although raw copy of HPE to another place may become broken,
there is a simple method to avoid it (use `pack` and `unpack` methods of `HPEBundle`).
3. Determinancy: HPE prepared on host depends only on its parameters (Python version, list of packages)
but not on environment on the host (for example, system Python), certainly except OS and architecture.

The basis of HPE are "base Pythons" - archives with Python distribution without some additional packages installed,
except pip, setuptools and wheel. Base Pythons for different platforms are pre-uploaded to S3.
To prepare HPE, one should download base Python and install specified requirements to it.
Then it is possible to pack HPE to archive (with trick from item 2) and distribute it between various machines.
"""

import abc
import io
import hashlib
import json
import logging
import os
import platform
import re
import sys
import tarfile
import tempfile

import requests
import six

from sandbox.common import errors
from sandbox.common import patterns
import sandbox.common.types.resource as ctr

from sandbox.projects.browser import common
from sandbox.projects.browser.common import contextmanagers
from sandbox.projects.browser.common import timeout
from sandbox.projects.common import decorators

from sandbox import sdk2
from sandbox.sdk2.helpers import subprocess

logger = logging.getLogger(__name__)

PIP_EXECUTION_LIMIT = 20 * 60
PYPI_URL = 'https://pypi.yandex-team.ru/simple/'


@six.add_metaclass(abc.ABCMeta)
class _BaseHPEBundle(object):
    """
    A directory containing HPE.
    """
    _EXE_EXTENSION = abc.abstractproperty()

    # Some packages create own executables ("entry points") on install.
    # They often contain path to Python executable (encoded to bytes with utf8).
    # To make HPE portable, we will replace this path by placeholder as necessary.
    _PYTHON_PATH_PLACEHOLDER = b'%HPE_PYTHON_PATH%'
    _PYTHON_PATH_ENCODING = 'utf8'
    # List of patched files (containing placeholder) will be written to specific file in HPE archive.
    _PATCHED_FILES_LIST_NAME = 'patched_files.json'

    @classmethod
    def _find_executable(cls, root_dir):
        for exe_name in ('python', 'python3'):
            path = root_dir.joinpath('bin', exe_name + cls._EXE_EXTENSION)
            if path.is_file():
                return path
        raise RuntimeError('No python executable found')

    def __init__(self, root_dir):
        """
        :type root_dir: sdk2.Path
        """
        self.root_dir = root_dir
        self.executable = self._find_executable(root_dir)
        self.bin_dir = root_dir.joinpath('bin')

    @property
    @abc.abstractmethod
    def scripts_dir(self):
        pass

    @timeout.timeout(PIP_EXECUTION_LIMIT, errors.TemporaryError)
    @decorators.retries(5, delay=1, exceptions=subprocess.CalledProcessError)
    def install_packages(self, requirements_files=(), packages=(), stdout=None):
        if not requirements_files and not packages:
            return
        cmd = [str(self.executable), '-m', 'pip', 'install', '--index-url', PYPI_URL] + list(packages)
        for path in requirements_files:
            cmd += ['--requirement', str(path)]
        subprocess.check_call(cmd, stdout=stdout, stderr=subprocess.STDOUT)

    def path_with_bins(self, initial_path_var_value=None):
        """
        Return PATH-like string with HPE binaries dirs prepended.

        :param initial_path_var_value: initial PATH-like string that will be updated
                                       (if None, get PATH from current environment)
        :return: updated PATH-like string
        """
        if initial_path_var_value is None:
            initial_path_var_value = os.environ.get('PATH', '')
        parts = [str(self.bin_dir)]
        if self.scripts_dir != self.bin_dir:
            parts.append(str(self.scripts_dir))
        if initial_path_var_value:
            parts.append(initial_path_var_value)
        return os.pathsep.join(parts)

    def _fs_is_case_sensitive(self):
        """
        Check if file system on disk partition with HPE is case-sensitive.
        """
        lowercase_executable = self.executable.parent.joinpath(self.executable.name.lower())
        uppercase_executable = self.executable.parent.joinpath(self.executable.name.upper())
        assert self.executable.is_file()
        return not (lowercase_executable.is_file() and uppercase_executable.is_file())

    def pack(self, archive_path):
        """
        Pack HPE to archive.
        Scan all files that can contain hardcoded path to Python executable and replace this path
        with special placeholder.

        :type archive_path: sdk2.Path
        """
        encoded_python_path = six.ensure_text(
            str(self.executable), sys.getfilesystemencoding()).encode(self._PYTHON_PATH_ENCODING)
        python_path_re = re.compile(re.escape(encoded_python_path),
                                    flags=0 if self._fs_is_case_sensitive() else re.IGNORECASE)

        dirs_with_entry_points = [self.scripts_dir] if self.scripts_dir.is_dir() else []
        files_to_check = {
            path
            for dirpath in dirs_with_entry_points
            for path in dirpath.iterdir()
            if path.is_file() and not path.is_symlink()
        }
        patched_tar_files = set()

        def write_bytes_to_tar(tar, tar_info, content):
            tar_info.size = len(content)
            tar.addfile(tar_info, io.BytesIO(content))

        with tarfile.open(str(archive_path), 'w:gz') as tar:
            for path in self.root_dir.rglob('*'):
                if not path.is_file():
                    continue

                arc_name = str(path.relative_to(self.root_dir))

                if path in files_to_check:
                    content = path.read_bytes()

                    if self._PYTHON_PATH_PLACEHOLDER in content:
                        raise RuntimeError('{} is unexpectedly found in {}'.format(self._PYTHON_PATH_PLACEHOLDER, path))

                    if python_path_re.search(content):
                        patched_content = python_path_re.sub(lambda _: self._PYTHON_PATH_PLACEHOLDER, content)
                        write_bytes_to_tar(tar, tar.gettarinfo(str(path), arc_name), patched_content)
                        patched_tar_files.add(arc_name)

                if arc_name not in patched_tar_files:
                    tar.add(str(path), arc_name)

            if patched_tar_files:
                patched_tar_files = sorted(patched_tar_files)
                logger.debug('Patched Python executable path ("%s" -> "%s") in the following files:\n%s',
                             encoded_python_path, self._PYTHON_PATH_PLACEHOLDER, '\n'.join(patched_tar_files))
                write_bytes_to_tar(tar, tarfile.TarInfo(self._PATCHED_FILES_LIST_NAME),
                                   content=json.dumps(patched_tar_files, indent=4).encode())

    @classmethod
    def unpack(cls, archive, target_dir):
        """
        Unpack archived HPE to directory.
        Resolve placeholders with target Python executable path.

        :type archive: sdk2.Path or typing.BinaryIO
        :type target_dir: sdk2.Path
        :rtype: _BaseHPEBundle
        """
        if isinstance(archive, sdk2.Path):
            filepath, fileobj = str(archive), None
        else:
            filepath, fileobj = None, archive
        with tarfile.open(name=filepath, fileobj=fileobj, mode='r|*') as tar:
            tar.extractall(str(target_dir))

        hpe = cls(target_dir)
        patched_files_list = hpe.root_dir.joinpath(cls._PATCHED_FILES_LIST_NAME)

        if patched_files_list.is_file():
            patched_tar_files = json.loads(patched_files_list.read_text())
            patched_files_list.unlink()

            encoded_python_path = six.ensure_text(
                str(hpe.executable), sys.getfilesystemencoding()).encode(cls._PYTHON_PATH_ENCODING)
            for rel_path in patched_tar_files:
                path = hpe.root_dir.joinpath(rel_path)
                content = path.read_bytes()
                content = content.replace(cls._PYTHON_PATH_PLACEHOLDER, encoded_python_path)
                path.write_bytes(content)

            logger.debug('Patched Python executable path ("%s" -> "%s") in the following files:\n%s',
                         cls._PYTHON_PATH_PLACEHOLDER, encoded_python_path, '\n'.join(patched_tar_files))

        return hpe


class WinHPEBundle(_BaseHPEBundle):
    _EXE_EXTENSION = '.exe'

    @property
    def scripts_dir(self):
        return self.bin_dir.joinpath('Scripts')


class _UnixHPEBundle(_BaseHPEBundle):
    _EXE_EXTENSION = ''

    @property
    def scripts_dir(self):
        return self.bin_dir


class LinuxHPEBundle(_UnixHPEBundle):
    pass


class MacHPEBundle(_UnixHPEBundle):
    pass


HPEBundle = {
    'Darwin': MacHPEBundle,
    'Linux': LinuxHPEBundle,
    'Windows': WinHPEBundle,
}.get(platform.system())


class BrowserHermeticPythonEnvironment(sdk2.Resource):
    auto_backup = True
    restart_policy = ctr.RestartPolicy.IGNORE
    ttl = 7

    cache_version = sdk2.Attributes.Integer('Cache version')
    base_python_url = sdk2.Attributes.String('URL to base python', required=True)
    packages_hash = sdk2.Attributes.String('SHA1 hash of all packages', required=True)


class PipRequirementSpec(object):
    def __init__(self, req_line):
        self.req_line = req_line
        self._req = req_line.split(';')[0]  # Strip environment markers.

    @classmethod
    def from_requirements_file(cls, path):
        specs = []
        with path.open() as requirements:
            for line in requirements:
                req = line.split('#')[0].strip()
                if not req:  # Empty line or comment.
                    continue
                if req.startswith('-'):  # Pip option (i.e. --index-url).
                    continue
                specs.append(cls(req))
        return specs

    def is_link(self):
        return '://' in self._req

    def is_pinned(self):
        return '==' in self._req


BASE_PYTHON_AVAILABLE_VERSIONS = frozenset((
    ('2.7.17', 'linux', 'x64'),
    ('2.7.17', 'mac', 'x64'),
    ('2.7.17', 'win', 'x64'),
    ('3.8.10', 'linux', 'x64'),
    ('3.8.10', 'mac', 'x64'),
    ('3.8.10', 'win', 'x64'),
    ('3.9.7', 'linux', 'x64'),
    ('3.9.7', 'mac', 'x64'),
    ('3.9.7', 'win', 'x64'),
))
BASE_PYTHON_URL_TEMPLATE = 'https://s3.mds.yandex.net/broinfra-tools/python/{os}/{arch}/python-{version}.tar.gz'


def _current_os_name():
    name = {
        'Linux': 'linux',
        'Darwin': 'mac',
        'Windows': 'win',
    }.get(platform.system())
    if name is None:
        raise EnvironmentError('Unknown OS: {}'.format(platform.system()))
    return name


def _current_arch_name():
    name = {
        'AMD64': 'x64',
        'arm64': 'arm64',
        'x86_64': 'x64',
    }.get(platform.machine())
    if name is None:
        raise EnvironmentError('Unknown architecture: {}'.format(platform.machine()))
    return name


def _available_python_versions(os_name=None, arch_name=None):
    os_name = os_name or _current_os_name()
    arch_name = arch_name or _current_arch_name()
    return {
        version for version, os_name_, arch_name_ in BASE_PYTHON_AVAILABLE_VERSIONS
        if os_name == os_name_ and arch_name == arch_name_
    }


class HermeticPythonEnvironment(sdk2.environments.SandboxEnvironment):
    """
    Sandbox environment providing HPE with specified requirements to the current task.
    Prepared HPE are being cached as Sandbox resources, if possible.

    Usage example:
    >>> class MyTask(sdk2.Task):
    >>>     class Requirements(sdk2.Task.Requirements):
    >>>         environments = (
    >>>             HermericPythonEnvironment(packages=['numpy']),
    >>>         )
    or
    >>> with HermeticPythonEnvironment(
    >>>     python_version='3.9.7',
    >>>     requirements_files=[self.path('my_repo', 'requirements.txt')],
    >>>     packages=['numpy'],
    >>> ) as my_hpe:
    >>>     subprocess.call([my_hpe.python_executable, 'script.py'])
    """
    _CACHE_VERSION = 1

    def __init__(self, python_version, pip_version=None, requirements_files=(), packages=()):
        """
        :param python_version: version of Python (must be in `_available_python_versions()`)
        :param pip_version: if specified, pip will be upgraded (downgraded) to specified version before
                            installing requirements

        :type python_version: str
        :type pip_version: str or NoneType
        :type requirements_files: collections.Sequence[sdk2.Path]
        :type packages: collections.Sequence[str]
        """
        if python_version not in _available_python_versions():
            raise EnvironmentError('No base Python {} found for current platform'.format(python_version))

        super(HermeticPythonEnvironment, self).__init__()

        self._python_version = python_version
        self._pip_requirement = 'pip=={}'.format(pip_version) if pip_version else None
        self._requirements_files = requirements_files
        self._packages = packages

        self.hpe = None
        self._temp_env = contextmanagers.TempEnvironment()

    def _base_python_url(self):
        return BASE_PYTHON_URL_TEMPLATE.format(
            version=self._python_version,
            os=_current_os_name(),
            arch=_current_arch_name(),
        )

    @patterns.singleton_property
    def _requirement_specs(self):
        specs = []
        if self._pip_requirement:
            specs.append(PipRequirementSpec(self._pip_requirement))
        specs.extend(PipRequirementSpec(package) for package in self._packages)
        for path in self._requirements_files:
            specs.extend(PipRequirementSpec.from_requirements_file(path))
        return specs

    @patterns.singleton
    def _may_cache(self):
        return all(not spec.is_link() and spec.is_pinned() for spec in self._requirement_specs)

    def _packages_hash(self):
        hash_parts = sorted({spec.req_line for spec in self._requirement_specs})
        key = ' '.join(hash_parts).encode('utf-8')
        return hashlib.sha1(key).hexdigest()

    def _prepare_hpe(self, target_dir):
        url = self._base_python_url()

        session = requests.Session()
        session.mount(url, requests.adapters.HTTPAdapter(max_retries=requests.packages.urllib3.Retry(
            total=3,
            backoff_factor=1,
            status_forcelist=[500, 502, 503, 504],
        )))

        logger.debug('Downloading base Python from %s', url)
        with session.get(url, stream=True) as response:
            response.raise_for_status()
            hpe = HPEBundle.unpack(response.raw, target_dir)

        with sdk2.helpers.ProcessLog(logger='prepare_hpe') as log:
            if self._pip_requirement:
                hpe.install_packages(packages=[self._pip_requirement], stdout=log.stdout)
            hpe.install_packages(self._requirements_files, self._packages, stdout=log.stdout)

        return hpe

    def _download_hpe(self):
        resource = BrowserHermeticPythonEnvironment.find(
            attrs=dict(
                cache_version=self._CACHE_VERSION,
                base_python_url=self._base_python_url(),
                packages_hash=self._packages_hash(),
            ),
            state=ctr.State.READY,
        ).first()
        if resource:
            logger.debug('Will use HPE from resource %d.', resource.id)
            return sdk2.ResourceData(resource).path
        else:
            logger.debug('No suitable HPE resources found.')
            return None

    def _publish_hpe(self):
        hpe_archive = sdk2.Task.current.path('{}.tar.gz'.format(self.hpe.root_dir.name))
        self.hpe.pack(hpe_archive)

        resource = BrowserHermeticPythonEnvironment(
            sdk2.Task.current, 'Browser Hermetic Python Environment', hpe_archive,
            cache_version=self._CACHE_VERSION,
            base_python_url=self._base_python_url(),
            packages_hash=self._packages_hash(),
        )
        sdk2.ResourceData(resource).ready()
        logger.debug('Published HPE as resource %d.', resource.id)

    def prepare(self):
        may_cache = self._may_cache()
        if not may_cache:
            logger.debug('HPE caching is not allowed.')

        target_dir = sdk2.Path(tempfile.mkdtemp(dir=str(sdk2.Task.current.path()), prefix='hpe-'))
        logger.debug('HPE target dir: %s', target_dir)

        if may_cache:
            hpe_archive = self._download_hpe()
            if hpe_archive:
                logger.debug('HPE archive path: %s', hpe_archive)
                self.hpe = HPEBundle.unpack(hpe_archive, target_dir)
                logger.debug('Unpacked HPE archive to %s', target_dir)

        if not self.hpe:
            self.hpe = self._prepare_hpe(target_dir)
            if may_cache:
                self._publish_hpe()

        self._temp_env.set_var('PATH', self.hpe.path_with_bins())

    @property
    def python_executable(self):
        """
        :rtype: sdk2.Path
        """
        return self.hpe.executable

    def __enter__(self):
        self.prepare()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        common.rmtree_forcedly(str(self.hpe.root_dir))
        self._temp_env.restore()
