#!/usr/bin/env python
# coding=utf-8
"""
patched version for https://st.yandex-team.ru/ISS-2929

Goals:
- make mono script with skynet python which already has built-in pycrypto
- avoid packaging pycrypto compiled C extensions into python script


Generates a standalone python script including its dependencies from setup.py
Aiming on packaging scripts which uses SkyNet API

- install virtualenv into temporary directory
- make local temporary virtualenv
- install target module into it
- install py.test
- collect dependencies
- make script for assembling
- create script
- append extra information as python comments

TODO:
- check for "def main" in main script
"""
import datetime
import getpass
import logging
import optparse
import os
import re
import shutil
import socket
import stat
import subprocess
import sys
import tempfile
import types
import urllib2
import urlparse


__author__ = "abcdenis"
VERSION = "0.4-skynet"

DEBUG = False
DEBUG_OUT = True

LOGGING_FORMAT = "%(message)s"

DEFAULT_YANDEX_PYPI_URL = "https://pypi.yandex-team.ru/simple/"
PYTHON_BIN = "python2.7"

SKYNET_PYTHON = "/skynet/python/bin/python2.7"
# SKYNET_VIRTUALENV = "/skynet/python/bin/virtualenv"  # virtualenv==1.10.1

MODULES_ALREADY_AVAILABLE_IN_SKYNET = ("pycrypto", )

FORWARDER_PY = "forwarder.py"

VIRTUALENV_VERSION_TO_USE = "15.0.1"
VIRTUALENV_DIRNAME = "virtualenv-" + VIRTUALENV_VERSION_TO_USE

PYTEST = "pytest"

SETUP_PY_FILE = "setup.py"
DEPENDENCIES_TO_OMIT = ("distribute", )

TAR_GZ_VERSION_MASK = re.compile(r"^\S+?-([\d\.]+)\.tar\.gz$")

DELIMITER = "=" * 65
DELIMITER_LITE = "-" * 65

log = logging.getLogger()


class AssembleError(StandardError):
    pass


def error(msg, must_exit=False):
    sys.stderr.write("%s\n" % (msg, ))
    sys.stderr.flush()
    if must_exit:
        sys.exit(1)


def die(msg):
    error(msg, must_exit=True)


def read_file(fn, **kw):
    assert os.path.isfile(fn), "file not found: '%s'" % fn

    binmode = kw.get("binmode", 0)
    if binmode:
        open_mode = "rb"
    else:
        open_mode = "r"

    f = open(fn, open_mode)
    if binmode:
        rc = f.read()
        file_size = os.path.getsize(fn)
        assert len(rc) == file_size, "read_file error: read %d bytes, file on disk: %d bytes -- %s" % \
                                     (len(rc), file_size, fn)
    else:
        rc = map(lambda x: x.splitlines()[0], f)
    f.close()

    return rc


def write_file(fn, data, **kw):
    binmode = kw.get("binmode", 0)
    if binmode:
        open_mode = "wb"
    else:
        open_mode = "w"

    f = open(fn, open_mode)
    if binmode:
        f.write(data)
    else:
        f.write("\n".join(data))

    f.close()
    return True


def out(cmds, fail=True, env=None, shell=False):
    """
    get command output
    """
    def out_debug(msg):
        if DEBUG_OUT:
            logging.debug("out: " + msg)

    if isinstance(cmds, types.StringTypes):
        cmd_str = cmds
    else:
        cmd_str = " ".join(cmds)

    log.info(DELIMITER)
    log.info("RUN: %s" % (cmd_str, ))

    if env:
        out_debug("env: %r" % (env, ))

    if env is None:  # not "if env:" because there can be empty dict: env={}
        local_env = dict(os.environ)
    else:
        local_env = dict(env)

    local_env.update({"LANG": "en_US.UTF-8"})

    p = subprocess.Popen(cmds, close_fds=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
                         env=local_env, shell=shell)
    s = p.stdout.read()
    ret = p.wait()

    if fail and ret != 0:
        raise Exception("Process ret=%d: %s" % (ret, s))

    log.info("%s\n%s\n%s\n" % (DELIMITER_LITE, s, DELIMITER_LITE))
    return s


def get_tar_gz_version(filename):
    match = TAR_GZ_VERSION_MASK.match(filename)
    if not match:
        return None
    return match.group(1)


def load_url(url):
    data = urllib2.urlopen(url).read()
    log.debug("loaded: %s (%d bytes)" % (url, len(data)))
    return data


def load_file(url, path):
    body = load_url(url)
    write_file(path, body, binmode=True)
    log.debug("%s saved to %s (%d bytes)" % (url, path, len(body)))


def extract_links(html):
    """
    dirty html parsing hack
    """
    mask = re.escape("<a href=") + r"([^\s>]+)[^>]*>([^<]*)" + re.escape("</a>")
    link_re = re.compile(mask, re.M | re.S | re.I)
    links = link_re.findall(html)

    records = set()
    for url, filename in links:
        if url.startswith(('"', "'")):
            url = url[1:-1]
        records.add((url, filename), )

    return records


def fix_pip_module_name(module_name):
    new_module_name = module_name.replace("_", "-")

    if new_module_name != module_name:
        log.warning("pip will change module name upon install from %r to %r" % (module_name, new_module_name))

    return new_module_name


def fix_python_module_name(module_name):
    return module_name.replace("-", "_").lower()


def replace_python_path(script, python_path):
    """
    assert script is Unicode
    """
    lines = script.splitlines(True)
    first_line = lines.pop(0)
    if first_line.startswith("#!") and "python" in first_line:
        first_line = u"#!%s\n" % python_path
        lines.insert(0, first_line)
        return u"".join(lines)
    else:
        raise RuntimeError("cant parse shebang line: %r" % first_line)


class PythonScriptAssembler(object):
    """
    do the job :)
    """
    FAKE_MODULE_NAME_RE = re.compile(r'^([a-z\-_]+)=(\d[\.\-\d]*)$')
    FAKE_MODULE_SETUP_PY_BODY_MASK = """#!/usr/bin/env python
from setuptools import setup, find_packages

__version__ = "%(version)s"

if __name__ == "__main__":
    setup(
        name="%(module)s",
        version=__version__,
        install_requires=[],
        description="fake %(module)s=%(version)s module to fool virtualenv/pip",
        packages=[],
        package_data={},
        package_dir={},
        zip_safe=False,
    )
"""

    def __init__(self, pypi_url, python_path, main_module, script_name, do_not_cleanup=False, skynet=False,
                 info_file=None, fool_cc=False, custom_dirs=None, fake_modules=None):
        self.pypi_url = pypi_url
        self.python_path = python_path
        self.do_not_cleanup = do_not_cleanup
        self.main_module = main_module
        self.script_name = script_name
        self.skynet = skynet
        self.info_file = info_file
        self.fool_cc = fool_cc
        self.custom_dirs = custom_dirs
        self.fake_modules = fake_modules

        # calculate other fields
        self.main_module_path = os.path.abspath(self.main_module)

        # placeholders
        self.work_dir = None
        self.ve_room_dir = None
        self.ve_bin_dir = None
        self.ve_pip_binary = None
        self.ve_python_binary = None
        self.setup_py_file = None
        self.main_module_genuine = None
        self.main_module_pip_format = None
        self.main_module_deps = None
        self.main_module_dep_versions = None
        self.custom_deps = None
        self.virtualenv_script = None

    def setup(self):
        self.work_dir = tempfile.mkdtemp(prefix="MacLeod-work-dir-")
        log.debug("work_dir = %s" % self.work_dir)

    def teardown(self):
        if self.do_not_cleanup:
            log.warning("skip cleaning up temporary dir -- %s" % self.work_dir)
            return

        if os.path.isdir(self.work_dir):
            log.debug("remove work_dir -- %s" % self.work_dir)
            shutil.rmtree(self.work_dir)

    def prepare_virtualenv_room(self):
        """
        prepare temporary
        """
        log.info('prepare virtualenv directory')
        self.ve_room_dir = os.path.join(self.work_dir, 'venv')
        args = [
            self.python_path,
            self.virtualenv_script,
            self.ve_room_dir
        ]
        out(args)

        self.ve_bin_dir = os.path.join(self.ve_room_dir, 'bin')
        assert os.path.isdir(self.ve_bin_dir)

        self.ve_pip_binary = os.path.join(self.ve_bin_dir, 'pip')
        assert os.path.isfile(self.ve_pip_binary)

        self.ve_python_binary = os.path.join(self.ve_bin_dir, 'python')
        assert os.path.isfile(self.ve_python_binary)

    def install_virtualenv(self):
        log.info('install virtualenv')
        index_url = self.pypi_url + 'virtualenv'
        log.debug('virtualenv index: %s' % index_url)

        # get virtualenv package index
        index_page = load_url(index_url)
        links = extract_links(index_page)

        # filter .tar.gz & extract version
        links = [(url, filename, get_tar_gz_version(filename)) for url, filename in links if
                 filename.endswith('.tar.gz')]
        # find correct version
        links = [link for link in links if link[2] == VIRTUALENV_VERSION_TO_USE]  # version found
        if len(links) != 1:
            raise AssembleError("unable to find virtualenv version=%s on %s" % (VIRTUALENV_VERSION_TO_USE, index_url))

        tar_gz_url, filename, _ = links[0]

        # download source archive
        archive_url = urlparse.urljoin(self.pypi_url, tar_gz_url)
        archive_file = os.path.join(self.work_dir, filename)
        load_file(archive_url, archive_file)

        # unpack
        args = [
            'tar',
            'xvf',
            archive_file,
            '-C',
            self.work_dir]
        out(args)

        self.virtualenv_script = os.path.join(self.work_dir, VIRTUALENV_DIRNAME, 'virtualenv.py')
        assert os.path.isfile(self.virtualenv_script)

    def _get_os_environment(self):
        env = os.environ
        if self.fool_cc:
            env["CC"] = ""
        return env

    def install_one_fake_module(self, module_string):
        log.info("=> install_one_fake_module(%r)" % (module_string, ))
        match = self.FAKE_MODULE_NAME_RE.match(module_string)
        if not match:
            die("wrong fake module format: %r, it should be %r" % (module_string, self.FAKE_MODULE_NAME_RE.pattern))

        module, version = match.group(1, 2)

        temp_dir = tempfile.mkdtemp(prefix=module_string, dir=self.work_dir)

        module_init_file = os.path.join(temp_dir, "__init__.py")
        write_file(module_init_file, "# autogenerated stub file\n", binmode=True)

        setup_py_body = self.FAKE_MODULE_SETUP_PY_BODY_MASK % dict(module=module, version=version)
        setup_py_file = os.path.join(temp_dir, "setup.py")
        write_file(setup_py_file, setup_py_body, binmode=True)

        args = [
            self.ve_pip_binary,
            "install",
            temp_dir,
        ]
        out(args)

    def install_fake_modules(self):
        log.info("=> install_fake_modules")

        if not self.fake_modules:
            log.info("No fake modules to install.")
            return

        for module_string in self.fake_modules:
            self.install_one_fake_module(module_string)

    def install_main_module(self):
        log.info("install module %r into virtualenv" % self.main_module)
        self.setup_py_file = os.path.join(os.path.dirname(self.main_module_path), SETUP_PY_FILE)

        if not os.path.isfile(self.setup_py_file):
            raise AssembleError("setup.py file not found: %s" % self.setup_py_file)

        # get package name
        args = [
            self.python_path,
            self.setup_py_file,
            "--name",
        ]
        self.main_module_genuine = out(args, env=self._get_os_environment()).splitlines()[0]
        self.main_module_pip_format = fix_pip_module_name(self.main_module_genuine)
        log.info("main module name: %s" % self.main_module_pip_format)

        # call virtualenv python to install module from directory
        args = [
            self.ve_pip_binary,
            "install",
            "--index-url",
            self.pypi_url,
            "-e",
            os.path.dirname(self.main_module_path),
        ]
        out(args)

    def get_module_deps(self, module_name):
        # log.debug("get deps for module %r" % module_name)
        args = [
            self.ve_pip_binary,
            "show",
            module_name,
        ]
        pip_raw = out(args)
        deps = set()
        for line in pip_raw.splitlines():
            if line.startswith("Requires:"):
                modules_line = line.split(":", 1)[1]
                modules = set(m.strip() for m in modules_line.split(",") if m.strip())
                for m in modules:
                    if m in DEPENDENCIES_TO_OMIT:
                        log.debug("skip: %r depends on %r" % (module_name, m))
                    else:
                        deps.add(m)
                # wont break: waiting for another "Requires:" clause :-)
        return deps

    def get_dep_versions(self):
        """
        $ bin/pip list
        distribute (0.7.3)
        dummy-main (1.0.1)
        kafka (0.8.1-3)
        pip (1.5)
        poolbase (0.1.3)
        setuptools (2.0.2)
        wsgiref (0.1.2)
        """
        args = [
            self.ve_pip_binary,
            "list"
        ]
        raw_result = out(args)
        module_version_re = re.compile(r"^((\S+)\s+\(\S+\))$", re.M | re.S)
        version_records = module_version_re.findall(raw_result)

        # will collect main module version too
        local_deps = self.main_module_deps + [self.main_module_pip_format, ]
        log.info("local_deps: %s" % (local_deps, ))
        versions = [line for line, module_name in version_records if module_name in local_deps]
        self.main_module_dep_versions = versions
        log.debug("deps versions obtained: %s" % (versions, ))

    @staticmethod
    def filter_skynet_modules(dependencies):
        new_deps = set(x for x in dependencies if x not in MODULES_ALREADY_AVAILABLE_IN_SKYNET)
        if new_deps != dependencies:
            log.info("stripped out modules already available in skynet: %r" % (dependencies - new_deps, ))
        return new_deps

    def collect_main_module_dependencies(self, options):
        # get main module
        deps = set()
        processed = set()

        # hack: main module doesnt add to dependencies upon adding to queue
        dep_queue = [self.main_module_pip_format, ]

        # collect dependencies
        while dep_queue:
            module = dep_queue.pop()

            # simple processing without sets:
            if module in processed:
                continue

            module_deps = self.get_module_deps(module)
            deps |= module_deps
            dep_queue.extend(list(module_deps))
            processed.add(module)

        # if options.skynet:
        deps = PythonScriptAssembler.filter_skynet_modules(deps)

        self.main_module_deps = list(sorted(deps))
        log.info("%r depends on %s" % (self.main_module_pip_format, self.main_module_deps))

        self.get_dep_versions()

    def collect_custom_dependencies(self):
        self.custom_deps = []
        if not self.custom_dirs:
            return

        for dir_path in self.custom_dirs:
            basename = os.path.basename(dir_path)
            self.custom_deps.append(basename)
            os.symlink(os.path.realpath(dir_path), os.path.join(self.work_dir, basename))

    def _install_pytest_module(self):
        # call virtualenv python to install module from directory
        args = [
            self.ve_pip_binary,
            "install",
            "--index-url",
            self.pypi_url,
            PYTEST,
        ]
        out(args)

    def write_and_run_assembler_script(self):
        log.info("assemble script using pytest")
        self._install_pytest_module()

        assembler_script_template = '''
import py
from _pytest.genscript import generate_script

def generate_standalone_python_script(main_module, script_name, req_modules):
    """
    create standalone doit script
    """
    script = generate_script("import %s; %s.main()" % (main_module, main_module), req_modules)
    genscript = py.path.local(script_name)
    genscript.write(script)
    return 0

main_module = <MAIN_MODULE>
script_name = <SCRIPT_FILE>
deps = <DEPS>

generate_standalone_python_script(main_module, script_name, deps)
'''

        assembler_script_file = os.path.join(self.work_dir, "_assembler.py")
        temp_script_file = os.path.join(self.work_dir, "temp-MacLeod.py")

        script = assembler_script_template
        script = script.replace("<MAIN_MODULE>", repr(self.main_module_genuine))
        script = script.replace("<SCRIPT_FILE>", repr(temp_script_file))
        dep_modules = [fix_python_module_name(m) for m in self.main_module_deps + [self.main_module_genuine, ] +
                       self.custom_deps]
        script = script.replace("<DEPS>", repr(dep_modules))

        write_file(assembler_script_file, script, binmode=True)

        # copy original file to assembler directory
        shutil.copyfile(self.main_module_path + ".py", os.path.join(self.work_dir, self.main_module_genuine + ".py"))

        args = [
            self.ve_python_binary,
            assembler_script_file,
        ]
        out(args)

        assert os.path.isfile(temp_script_file)

        return temp_script_file

    @staticmethod
    def patch_shebang_string(script):
        log.info("patch shebang line")
        shebang_path = SKYNET_PYTHON

        log.info("shebang python path: %s" % shebang_path)

        script = replace_python_path(script, shebang_path)

        return script

    @staticmethod
    def append_script_comments(script, extra_info, label):
        if isinstance(extra_info, types.StringTypes):
            lines = extra_info.splitlines()
        elif isinstance(extra_info, (types.ListType, types.TupleType)):
            lines = list(extra_info)
        else:
            raise TypeError("unsupported extra_info type: %r" % (extra_info, ))

        lines.insert(0, "=== %s ===" % (label, ))
        lines = ["# " + i for i in lines]
        lines = [i.rstrip() for i in lines]
        footer = "\n".join(lines)
        script += "\n" + footer + "\n"
        return script

    @staticmethod
    def _get_environment_info():
        lines = [
            "date: %s" % (datetime.datetime.now(), ),
            "user: %s" % (getpass.getuser(), ),
            "host: %s" % (socket.gethostname(), ),
            "platform: %s" % (sys.platform, ),
            "args: %s" % (" ".join([str(x) for x in sys.argv[1:]])),
            "workdir: %s" % (os.getcwd(), ),
        ]
        return lines

    def add_bundled_dependencies_list(self, script):
        log.info("add bundled modules info")
        script = self.append_script_comments(script, self.main_module_dep_versions, "bundled modules")
        return script

    def add_environment_info(self, script):
        log.info("add environment info")
        env_info = self._get_environment_info()
        script = self.append_script_comments(script, env_info, "Environment info")
        return script

    def add_info_file(self, script):
        if self.info_file:
            log.info("add text from file %s" % self.info_file)
            info = read_file(self.info_file, binmode=True)
            script = self.append_script_comments(script, info, "file %s:" % (self.info_file, ))
        return script

    def assemble_script(self):
        log.info("assemble script")
        temp_script = self.write_and_run_assembler_script()
        # make up script source
        script = read_file(temp_script, binmode=True)
        script = PythonScriptAssembler.patch_shebang_string(script)
        script = self.add_environment_info(script)
        script = self.add_bundled_dependencies_list(script)
        script = self.add_info_file(script)

        write_file(self.script_name, script, binmode=True)

        st = os.stat(self.script_name)
        os.chmod(self.script_name, st.st_mode | stat.S_IXUSR)  # add permission owner execute

        log.info("saved %s" % self.script_name)

    def install_forwarder(self):
        forwarder_module_path = os.path.join(os.path.dirname(os.path.dirname(self.main_module_path)), "forwarder-src")
        print "forwarder_module_path:", forwarder_module_path

        # call virtualenv python to install module from directory
        args = [
            self.ve_pip_binary,
            "install",
            "--index-url",
            self.pypi_url,

            "-e",
            os.path.dirname(forwarder_module_path),
        ]
        out(args)

        log.info("forwarder.py installed")

    def run(self, options):
        self.install_virtualenv()
        self.prepare_virtualenv_room()
        self.install_fake_modules()
        self.install_main_module()
        self.collect_main_module_dependencies(options)
        self.collect_custom_dependencies()
        self.assemble_script()


def read_config(test_args=None):
    global DEBUG

    parser = optparse.OptionParser(usage="usage: %prog [options]", version=VERSION)
    parser.add_option("-m", "--module", dest="main_module_name", default=None,
                      help="main module name from current directory (should contain function main())")
    parser.add_option("-o", "--output", dest="script_name", default=None,
                      help="output single script name")
    parser.add_option("-f", "--force", action="store_true", dest="force", default=False,
                      help="overwrite existing scripts")
    parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
                      help="print extra details")
    parser.add_option("--pypi-url", dest="pypi_url", default=DEFAULT_YANDEX_PYPI_URL,
                      help="URL of PyPi repository, default to %s" % (DEFAULT_YANDEX_PYPI_URL, ))
    parser.add_option("--info-file", dest="info_file", default=None,
                      help="description file name to append to resulting script as Python comment")
    parser.add_option("--do-not-cleanup", action="store_true", dest="do_not_cleanup", default=False,
                      help="do not delete temporary directory")
    parser.add_option("--no-cc", action="store_true", dest="fool_cc", default=False,
                      help="set [CC=] before installing extensions, useful for Apache Thrift")
    parser.add_option("--install-from-dir", action="append", dest="custom_dirs",
                      help="add module from local directory to result (useful for incorporating results of build)")
    parser.add_option("--fake-module", action="append", dest="fake_modules",
                      help="create and install into virtualenv fake empty module with specified name and version. " +
                      "Format: name=version (pycrypto=2.5.0)")

    if test_args:
        args = test_args
    else:
        args = sys.argv[1:]

    options, arguments = parser.parse_args(args=args)

    main_module_name = options.main_module_name
    if not main_module_name:
        error("please specify module name (--module)", True)

    # A little gift for user :)
    if main_module_name.endswith(".py"):
        options.main_module_name = main_module_name[:-3]

    python_file = options.main_module_name + ".py"
    if not os.path.isfile(python_file):
        die("file %s not found" % python_file)

    options.main_module_path = os.path.abspath(options.main_module_name)

    script = options.script_name
    if not script:
        die("please specify script path (-o/--output)")

    if os.path.exists(script):
        if options.force:
            os.unlink(script)
        else:
            die("script already exists: %s (use -f/--force to overwrite)" % script)

    if not options.pypi_url.endswith("/"):
        die("pypi URL should ends with '/': %s" % (options.pypi_url, ))

    if options.info_file and not os.path.isfile(options.info_file):
        die("info_file not found: %s" % (options.info_file, ))

    return options


def init_console_logging(debug):
    if debug:
        level = logging.DEBUG
    else:
        level = logging.INFO

    logging.basicConfig(level=level, stream=sys.stdout, format=LOGGING_FORMAT)


def main():
    global DEBUG

    options = read_config()
    DEBUG = options.verbose

    init_console_logging(DEBUG)

    logging.debug("parsed options: %s" % (options, ))

    args = {
        "pypi_url": options.pypi_url,
        "python_path": "python2.7",
        "main_module": options.main_module_name,
        "script_name": options.script_name,
        "skynet": True,
        "info_file": options.info_file,
        "do_not_cleanup": options.do_not_cleanup,
        "fool_cc": options.fool_cc,
        "custom_dirs": options.custom_dirs,
        "fake_modules": options.fake_modules,
    }

    assembler = PythonScriptAssembler(**args)
    try:
        assembler.setup()
        assembler.run(options)
    except AssembleError as exc:
        die("ERROR: %s" % str(exc))
    finally:
        assembler.teardown()

    return 0


if __name__ == "__main__":
    sys.exit(main())
