import json
import logging
import re
from multiprocessing.dummy import Pool

import yaml
from semantic_version import Spec, Version

from sandbox.projects.oko.crawl_repositories.clickhouse_upload import ClickhouseUploader
from sandbox.projects.oko.crawl_repositories.github import GithubCrawler

logger = logging.getLogger(__name__)


class PackageExtractor(object):
    def __init__(self, task):
        self.task = task
        self.uploader = ClickhouseUploader(
            token=self.task.Parameters.ch_token.data()["token"],
            hosts=self.task.Parameters.clickhouse_hosts,
        )

    def extract_versions(self, files):
        version_records, package_cache = PackageExtractor._extract_versions(files)
        result_records = self._transform_version_records(version_records, package_cache)
        self.uploader.write_packages(result_records, package_cache)

    @staticmethod
    def _extract_versions(files):
        version_records = PackageExtractor._extract_package_versions(files)
        version_records = PackageExtractor._set_deprecated(version_records)

        packages = PackageExtractor._get_packages(version_records)
        package_cache = PackageExtractor._get_all_versions(packages)
        del packages

        return version_records, package_cache

    @staticmethod
    def _extract_package_versions(files):
        logger.debug("Extracting package versions")
        return [PackageExtractor._transform_package_record(file) for file in files]

    @staticmethod
    def _transform_package_record(record):
        try:
            data = json.loads(record["file"])
        except ValueError:
            data = {}

        deps = data.get("dependencies") if isinstance(data.get("dependencies"), dict) else None
        dev_deps = data.get("devDependencies") if isinstance(data.get("devDependencies"), dict) else None

        package_lock = PackageExtractor._parse_package_lock(record)

        return {
            "url": record["url"],
            "project": record["project"],
            "last_commit_time": record["last_commit_time"],
            "vcs_type": record["vcs_type"],
            "path": record["path"],
            "file_version": data.get("version"),
            "is_deprecated": True if data.get("deprecated") == "yes" else False,
            "dev_deps": dev_deps,
            "deps": deps,
            "package_lock": package_lock,
            "lock_variant": record.get('lock_variant', 'package-lock.json'),
        }

    @staticmethod
    def _validate_semver(version):
        try:
            Spec.parse(str(version))
        except ValueError:
            return False
        return True

    @staticmethod
    def _is_unstable_version(version):
        for c in ['alpha', 'beta', 'pre', 'rc']:
            if c in version:
                return True
        return False

    @staticmethod
    def _parse_package_lock_json(record):
        lock_file = record['lock_file']

        try:
            lock = json.loads(lock_file)
            deps = lock.get("dependencies", None)
            if not deps:
                raise TypeError('Unsupported NPM version')
        except ValueError:
            deps = None
        except TypeError:
            deps = None
            logger.debug("Project {} ({}) has no dependencies field in package-lock.json".format(record['project'], record['url']))

        if deps is None:
            return {}

        result = {}

        for key in deps:
            try:
                version = deps[key]["version"]
                if PackageExtractor._validate_semver(version):
                    if key not in result or deps[key].get("dev") != "true":
                        result[key] = version
                else:
                    logger.debug("Project {} ({}) has invalid package {} semver {} in package-lock.json".format(record['project'], record['url'], key, version))
            except Exception as e:
                logging.debug("Something is wrong!\n{}".format(e))
                continue

        return result

    @staticmethod
    def _parse_yarn_lock(record):
        lock_file = record['lock_file']

        for key in ['version', 'resolved', 'integrity', 'uid']:
            lock_file = lock_file.replace('\n  {} '.format(key), '\n  {}: '.format(key))
            lock_file = lock_file.replace('\n  "{}" '.format(key), '\n  "{}": '.format(key))

        transformed = []
        lines = lock_file.split('\n')
        for line in lines:
            if not line or line.startswith(' ') or not line.endswith(':'):
                transformed.append(line)
                continue
            # From
            # schema-utils@^3.1.0, schema-utils@^3.1.1:
            # "@types/json-schema@*", "@types/json-schema@^7.0.8":
            # "errno@>=0.1.1 <0.2.0-0", errno@^0.1.3:
            # To
            # "schema-utils@^3.1.0":
            # "@types/json-schema@*":
            # "errno@>=0.1.1":
            package = line[:-1].split(',')[0]
            package = package.replace('"', '')
            parts = package.split('@')
            name = '@'.join(parts[:-1])
            version = parts[-1]
            version = version.split(' ')[0]
            package = '"{}@{}":'.format(name, version)
            transformed.append(package)
        lock_file = '\n'.join(transformed)

        # Dependencies
        lock_file = re.sub(r'\n {4}([^: ]+) (.*)\n', r'\n    \1: \2\n', lock_file)
        lock_file = re.sub(r'\n {4}([^: ]+) (.*)\n', r'\n    \1: \2\n', lock_file)  # Twice because even/odd LF captures

        try:
            deps = yaml.safe_load(lock_file)
        except yaml.YAMLError as e:
            deps = None
            logger.debug("Project {} ({}) has invalid yaml in their yarn.lock file\n{}".format(record['project'], record['url'], e))

        if deps is None:
            return {}

        result = {}

        for key in deps:
            try:
                # schema-utils@^3.1.0
                # @types/estree@*
                # errno@>=0.1.1
                parts = key.split('@')
                # schema-utils
                # @types/estree
                # errno
                name = '@'.join(parts[:-1])

                version = deps[key]["version"]

                if PackageExtractor._validate_semver(version):
                    if name not in result or Version(version) < Version(result[name]):
                        result[name] = version
                else:
                    logger.debug("Project {} ({}) has invalid entry {} semver {} in yarn.lock".format(record['project'], record['url'], key, version))
            except Exception as e:
                logging.debug("Something is wrong!\n{}".format(e))
                continue

        return result

    @staticmethod
    def _parse_unknown(record):
        logger.debug("Project {} ({}) has very strange lockfile – {}".format(record['project'], record['url'], record['lock_variant']))
        return {}

    @staticmethod
    def _parse_package_lock(record):
        lock_file = record['lock_file']

        if lock_file is None:
            logger.debug("Project {} ({}) has None instead of lock_file".format(record['project'], record['url']))
            return {}

        return {
            'package-lock.json': PackageExtractor._parse_package_lock_json,
            'yarn.lock': PackageExtractor._parse_yarn_lock
        }.get(record.get('lock_variant', 'package-lock.json'), PackageExtractor._parse_unknown)(record)

    @staticmethod
    def _set_deprecated(records):
        logger.debug("Setting deprecated status on %s records", len(records))
        deprecated = set()

        for record in records:
            if (
                record["vcs_type"] == "github" and
                record["is_deprecated"] and
                GithubCrawler.check_root_package(record["url"])
            ):
                deprecated.add(record["project"])

        deprecated_count = 0
        for record in records:
            if record["vcs_type"] == "github" and record["project"] in deprecated:
                record["is_deprecated"] = True
                deprecated_count += 1

        logger.info("%s projects are deprecated", len(deprecated))
        logger.info("%s packages are deprecated", deprecated_count)
        return records

    @staticmethod
    def _get_packages(versions):
        logger.debug("Getting packages")
        packages = set()

        for record in versions:
            for field in ("deps", "dev_deps"):
                if isinstance(record[field], dict):
                    for key in record[field]:
                        packages.add(key)

        logger.info("Got %s unique packages", len(packages))
        return sorted(list(packages))

    @staticmethod
    def _get_all_versions(packages):
        logger.debug("Getting all versions")
        pool = Pool(8)

        versions_list = pool.map(PackageExtractor._get_versions, packages)
        package_cache = {}
        for package in versions_list:
            package_cache.update(package)

        logger.info(
            "Got %s unique versions of packages",
            sum([len(pkg) for pkg in package_cache.itervalues()]),
        )
        return package_cache

    @staticmethod
    def _get_versions(package):
        import requests

        try:
            json_data = requests.get("https://npm.yandex-team.ru/" + package).json()
            versions = sorted(list(map(str, json_data["versions"].keys())))

            time = json_data["time"]

            result = {}
            for key in versions:
                if key in time:
                    result[key] = time[key]

            return {package: result}
        except (KeyError, ValueError):
            return {package: {}}

    def _transform_version_records(self, version_records, package_cache):
        logger.debug("Transforming version records into final lines")
        result = []

        for record in version_records:
            result.extend(self._transform_one_version_record(record, package_cache))

        logger.info("Got %s result lines", len(result))
        logger.info(
            "%s lines have non-empty lock_version",
            len(filter(lambda line: line["lock_version"] is not None, result)),
        )
        logger.info(
            "%s lines have non-empty repo_version",
            len(filter(lambda line: line["repo_version"] is not None, result)),
        )
        return result

    @staticmethod
    def _get_repo_version(package, version, package_cache):
        if version is None:
            return None

        try:
            versions_list = package_cache[package].keys()

            if not PackageExtractor._is_unstable_version(version):
                versions_list = filter(lambda x: not PackageExtractor._is_unstable_version(x), versions_list)

            versions = map(Version.coerce, versions_list)
            result = Spec(version).select(versions)
            return str(result)
        except (
            ValueError,
            TypeError,
            AttributeError,
        ) as e:  # Bad user versions give these exceptions
            logger.error("Bad version %s for %s: %s", version, package, e)
            return None

    def _transform_one_version_record(self, record, package_cache):
        result = []
        package_lock = record["package_lock"]
        for field in ("deps", "dev_deps"):
            if field in record and isinstance(record[field], dict):
                for key in record[field]:
                    r = record.copy()

                    version = r[field][key]
                    if not PackageExtractor._validate_semver(version):
                        logger.debug("Project {} ({}) has invalid package {} semver {} in package.json".format(record['project'], record['url'], key, version))
                        version = None

                    for delete_field in ("deps", "dev_deps", "package_lock"):
                        if delete_field in r:
                            del r[delete_field]

                    result.append(
                        dict(
                            name=key,
                            version=version,
                            repo_version=PackageExtractor._get_repo_version(key, version, package_cache),
                            is_devdependency=(field == "dev_deps"),
                            lock_version=package_lock.get(key),
                            fielddate=self.task.Context.date,
                            **r
                        )
                    )
        return result
