from os.path import join as pj
import os
import logging
import subprocess
import re

from sandbox.common.types.client import Tag
from sandbox.common.errors import SandboxException

from sandbox.sandboxsdk.svn import Arcadia
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk.parameters import (
    LastReleasedResource,
    ResourceSelector,
    SandboxArcadiaUrlParameter,
    SandboxStringParameter,
)
from sandbox.sandboxsdk.paths import get_unique_file_name, make_folder

from sandbox.projects import resource_types
from sandbox.projects.common.mapreduce_stored_tables import SortMode
from sandbox.projects.common.userdata import mr_base_task, util
from sandbox.projects.common.userdata import resource_types as userdata_resource_types
from sandbox.projects.common.userdata.packages_installer import PackagesInstaller
from sandbox.projects.geobase.Geodata5BinStable import resource as gbr

PACKAGES_GROUP_NAME = "Packages"
DATA_FILES_GROUP_NAME = "Data files"


class ScriptsArcadiaUrlParameter(SandboxArcadiaUrlParameter):
    name = 'scripts_arcadia_url'
    description = 'Use scripts (for those not in packages) from this branch:'
    required = False
    default_value = Arcadia.trunk_url()
    group = mr_base_task.MISC_PARAMS_GROUP_NAME


class MrAppsPackage(ResourceSelector):
    name = 'mr_apps_resid'
    description = 'Resource with MR_APPS_DEV package'
    required = False
    resource_type = resource_types.MRAPPS_DEV_PACKAGE
    group = PACKAGES_GROUP_NAME


class UserdataCommonDataUpdaterPackage(ResourceSelector):
    name = 'data_updater_package_resid'
    description = 'Resource with yandex-search-common-data-updater package'
    required = False
    resource_type = resource_types.USERFEAT_DATA_UPDATER_PACKAGE
    group = PACKAGES_GROUP_NAME


class Geobase5Parameter(LastReleasedResource):
    name = 'geodata5_resource_id'
    description = 'Resource with geodata5.bin'
    required = False  # don't require it yet
    resource_type = gbr.GEODATA5BIN_STABLE
    group = DATA_FILES_GROUP_NAME
    default_value = None


# not added to Task.input_parameters, use in derived classes
class RflForCanonizerParameter(LastReleasedResource):
    name = 'rfl_for_canonizer_resource_id'
    description = 'Resource with rfl for canonizer files (filter.rfl, filter.robots.rfl)'
    required = False
    resource_type = userdata_resource_types.RFL_FOR_CANONIZER
    group = DATA_FILES_GROUP_NAME
    default_value = None


class StatePrefixParameter(SandboxStringParameter):
    """
        Table name prefix of source state
    """
    name = 'state_prefix'
    description = "Table name prefix of source state (with timestamp and trailing slash):"
    required = False
    default_value = 'userdata/'
    group = mr_base_task.INPUT_PARAMS_GROUP_NAME


class StateResourceParameter(ResourceSelector):
    """
        Get MR data from resource
    """
    name = 'state_resource_id'
    description = "Resource with stored tables:"
    required = False
    resource_type = resource_types.USERDATA_TABLES_ARCHIVE
    group = mr_base_task.INPUT_PARAMS_GROUP_NAME


class StateObtainSelector(SandboxStringParameter):
    """
        Source/destination of state tables
    """
    name = 'state_source_selector'
    description = 'Source/destination of state tables:'
    choices = [('MR', 'MR'), ('resource', 'resource')]
    sub_fields = {'MR': [StatePrefixParameter.name], 'resource': [StateResourceParameter.name]}
    default_value = 'resource'
    group = mr_base_task.INPUT_PARAMS_GROUP_NAME


class Task(mr_base_task.Task):
    """
        Base class with common userfeat-processing functionality
    """

    # noinspection PyUnresolvedReferences
    client_tags = (Tag.GENERIC | Tag.CUSTOM_USER_DATA) & mr_base_task.Task.client_tags
    cores = 17

    sort_source_tables = True

    input_parameters = util.smart_join_params(
        mr_base_task.Task.input_parameters,
        StateObtainSelector,
        StatePrefixParameter,
        StateResourceParameter,
        MrAppsPackage,
        UserdataCommonDataUpdaterPackage,
        Geobase5Parameter,
        ScriptsArcadiaUrlParameter
    )

    def check_params(self):
        if self.ctx['mr_server'].endswith("local"):
            if self.ctx['state_source_selector'] == 'MR' or self.ctx.get('state_resource_id') is None:
                raise SandboxTaskFailureError("Need source state to run locally")

    def get_tables_prefix(self):
        if self.ctx['mr_server'].endswith('local'):
            return mr_base_task.Task.get_tables_prefix(self)
        return self.ctx["state_prefix"]

    def get_state_attrs(self):
        res = getattr(self, 'state_resource', None)
        if res is None:
            raise SandboxTaskFailureError("No resource to get date attrs from")
        return res.attributes.copy()

    def prepare_mr(self):
        if self.ctx['mr_server'] == 'yt_local':
            if self.sort_source_tables:
                self.mr_tables_io.set_sort_input_tables_mode(SortMode.get_testable(self.yt_testable))
            res = self.mr_tables_io.upload_tables(
                self,
                self.ctx['state_resource_id'],
                self.get_tables_prefix()
            )
            self.state_resource = res

        self.config_patcher.add(**{
            "ytTokenPath": "",
            "ytMountSandbox": "0",
            "ytPrefix": "//",
            "ytSpec": "{}",
            "ytSpecLong": "{}",
            "ytSpecSlowLong": "{}",
            "mrServer": self.ctx['real_mr_server'],
            "mrCluster": self.ctx['mr_cluster'],
            "fileSystemRoot": self.ctx["root"],
            "sandboxTestRun": True,
            "stderrlevel": 5 if self.ctx.get(mr_base_task.YtEnableDebugLogs.name) else 4  # TODO: make selector
        })
        if self.need_rem:
            self.config_patcher.add(**{
                "remServer": self.rem_runner.rem_url,
                "remChecksumsCacheDB": pj(self.rem_runner.scripts_dir, "var/checksum_cache.db"),
            })

    def get_project_bin_dir(self):
        raise NotImplementedError()

    def init_files(self):
        data_parameters = filter(lambda p: p.group == DATA_FILES_GROUP_NAME, self.input_parameters)
        package_parameters = filter(lambda p: p.group == PACKAGES_GROUP_NAME, self.input_parameters)
        for dclass in data_parameters:
            if not self.ctx.get(dclass.name, None):
                logging.warn("Data for {} not supplied, will skip it".format(dclass.name))

        p = PackagesInstaller()
        root = self.ctx["root"] = get_unique_file_name(self.abs_path(""), "ROOT")
        if not p.install(root, self, package_parameters):
            return False

        berkanavt = pj(root, 'Berkanavt')
        self.ctx['berkanavt'] = berkanavt

        testdata_items = dict()
        for dclass in data_parameters:
            if self.ctx.get(dclass.name):
                id = self.ctx[dclass.name]
                testdata_items[dclass.resource_type.__name__] = self.sync_resource(id)

        links = {
            "yt2": ["yt", "yt2"],
            "mapreduce-yt": ["mapreduce-yt"]
        }
        usr_bin = os.path.join(self.ctx["root"], "usr/bin")
        make_folder(usr_bin)
        for target, dests in links.items():
            target_full = self.yt_package.path_to_bin(target)

            for dest in dests:
                full_dest = os.path.join(usr_bin, dest)
                if not os.path.exists(full_dest):
                    logging.info("linking {} to {}".format(target_full, full_dest))
                    os.symlink(target_full, full_dest)

        run_process(
            (
                "{berkanavt}/userdata_common/scripts/postinst_mr_symlinks.py "
                "--project_dir {project_dir} "
                "--root {root} "
            ).format(
                berkanavt=berkanavt,
                project_dir=os.path.join(self.get_project_bin_dir(), ".."),
                root=root
            ),
            shell=True,
            check=True,
            log_prefix='postinst_mr_symlinks',
        )
        run_process(
            (
                "{berkanavt}/userdata_common/scripts/update_common_data.py "
                " --conf-dir {berkanavt}/userdata_common/config " +
                " --dst-prefix {root} " +
                " --verbose " +
                " ".join(["--test-data {c}:{p}".format(c=c, p=testdata_items[c]) for c in testdata_items.keys()])
            ).format(berkanavt=berkanavt, root=root),
            shell=True,
            check=True,
            log_prefix='update_common_data',
        )

        self._task_specific_logs_dir = get_unique_file_name(self.log_path(), 'task_spec_logs')
        make_folder(self._task_specific_logs_dir)

        self.config_patcher = util.ConfigPatcher(self.log_path())
        self.config_patcher.add(**{
            "robotRoot": self.ctx["berkanavt"],
            "systemUser": self.get_system_user(),
            "logPath": self._task_specific_logs_dir
        })

        return True

    def _is_yt_failed_because_the_lack_of_ports(self):
        for dirpath, _, filenames in os.walk(self._task_specific_logs_dir):
            for f in filenames:
                name = os.path.join(dirpath, f)
                if not os.path.isfile(name):
                    continue
                with open(name) as inp:
                    tail = inp.read()[-20000:]

                if "can not connect to" in tail and "(attempt 9)" in tail:
                    return name

                if "(attempt 9)" in tail and "(Cannot assign requested address)" in tail:
                    if "can not connect to" in tail:
                        return name
        return None

    def check_for_yt_failure_heuristic(self):
        reason = self._is_yt_failed_because_the_lack_of_ports()
        if reason:
            raise SandboxException("Looks like it is YT-5427, see {}. Will restart task".format(reason))

    def specific_logs_prefix(self, rel_path):
        return os.path.join(os.path.basename(self._task_specific_logs_dir), rel_path)

    def package_version(self, pkg):
        return int(self.ctx.get('_' + pkg + '_VER', -1))

    def package_svn_path(self, pkg):
        return self.ctx.get('_' + pkg + '_SVN_PATH')

    def test_requirements(self, pkg, trunk_revision=None, branch_num=0, branch_revision=None):
        rev = self.package_version(pkg)
        svn_path = self.package_svn_path(pkg)
        if "/trunk/arcadia" in svn_path:
            return trunk_revision is None or rev >= trunk_revision

        num_match = re.search(r"stable-(\d+)", svn_path)
        if not num_match:
            return True
        if int(num_match.group(1)) < branch_num:
            return False
        return branch_revision is None or rev >= branch_revision

    def get_system_user(self):
        p = run_process('whoami', shell=True, check=True, wait=True, stdout=subprocess.PIPE)
        (uname, _) = p.communicate()
        return uname.rstrip()
