# coding: utf-8

import json
import logging
import os
import psutil
import re
import requests
import socket
import threading
import time
import urllib2

from sandbox.projects import resource_types
from sandbox.projects.EntitySearch import resource_types as es_resource_types

from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk.paths import make_folder

from sandbox.projects.common import sanitizer
from sandbox.projects.common.environments import ValgrindEnvironment
from sandbox.projects.common.profiling import gperftools as profiler
from sandbox.projects.common.wizard.current_production import get_current_production_resource_id
from sandbox.projects.common.wizard.exception_manager import ExceptionManager
from sandbox.projects.common.wizard.wizard_builder import WizardBuilder

from sandbox.projects.common.utils import wait_searcher_start


DEFAULT_NANNY_ENTITYSEARCH_SERVICE = 'sas-production-entitysearch-yp'


BACKEND_RUN_GROUP = 'EntitySearch backend run options'
BACKEND_RESOURCES_GROUP = 'EntitySearch backend resources'

ADVANCED_GROUP = 'EntitySearch Provider Advanced Settings'


class EntitySearchBinary(parameters.ResourceSelector):
    name = "entitysearch_binary"
    description = "EntitySearch binary"
    group = BACKEND_RESOURCES_GROUP
    resource_type = es_resource_types.ENTITY_SEARCH_EXECUTABLE
    required = True


class EntitySearchConfig(parameters.ResourceSelector):
    name = "entitysearch_config"
    description = "EntitySearch config"
    group = BACKEND_RESOURCES_GROUP
    resource_type = es_resource_types.ENTITY_SEARCH_CONFIG
    required = True


class EntitySearchData(parameters.ResourceSelector):
    name = "entitysearch_data"
    description = "EntitySearch data"
    group = BACKEND_RESOURCES_GROUP
    resource_type = es_resource_types.ENTITY_SEARCH_DATA
    required = True


class Fresh(parameters.ResourceSelector):
    name = "fresh"
    description = "EntitySearch fresh"
    group = BACKEND_RESOURCES_GROUP
    resource_type = es_resource_types.ENTITY_SEARCH_FRESH
    required = True


class NerData(parameters.ResourceSelector):
    name = "ner"
    description = "EntitySearch ner"
    group = BACKEND_RESOURCES_GROUP
    resource_type = es_resource_types.ENTITY_SEARCH_NER_DATA
    required = True


class MainDbData(parameters.ResourceSelector):
    name = "main_db"
    description = "EntitySearch main db archive (default is from prod)"
    group = BACKEND_RESOURCES_GROUP
    resource_type = es_resource_types.ENTITY_SEARCH_CONVERT_DB_ARCHIVE
    required = False


class StartTimeoutParameter(parameters.SandboxIntegerParameter):
    name = 'entitysearch_start_timeout'
    description = 'Start timeout (sec)'
    group = BACKEND_RUN_GROUP
    default_value = 1500


class UseMemoryMapping(parameters.SandboxBoolParameter):
    name = 'use_mmap'
    description = 'Use memory mapping to load data'
    group = BACKEND_RUN_GROUP
    default_value = False


class TurnOffFetcher(parameters.SandboxBoolParameter):
    name = 'turn_off_fetcher'
    description = 'Turn off fetcher'
    group = BACKEND_RUN_GROUP
    default_value = False


class DisableKvsaasMainDb(parameters.SandboxBoolParameter):
    name = 'disable_kvsaas_main_db'
    description = 'Disable KVSaaS main database'
    group = BACKEND_RUN_GROUP
    default_value = True

class SanitizerType(sanitizer.SanitizerType):
    group = BACKEND_RUN_GROUP


class UseGPerfToolsParameter(parameters.SandboxBoolParameter):
    name = 'use_gperftools'
    description = 'Use GPerfTools'
    group = BACKEND_RUN_GROUP
    default_value = False


class DebugToolParameter(parameters.SandboxStringParameter):
    NONE = 'none'
    VALGRIND = 'valgrind'
    PROFILER = 'profiler'
    SANITIZER = 'sanitizer'

    name = 'debug_tool'
    description = 'Run under specified debug tool'
    choices = [('None', NONE), ('Valgrind', VALGRIND), ('Profiler', PROFILER), ('Sanitizer', SANITIZER)]
    group = BACKEND_RUN_GROUP
    default_value = NONE
    sub_fields = {
        PROFILER: [UseGPerfToolsParameter.name],
        SANITIZER: [SanitizerType.name]
    }


class ParentNannyService(parameters.SandboxStringParameter):
    name = 'parent_nanny_service'
    description = 'Nanny service to get latest artifacts from'
    group = ADVANCED_GROUP
    default_value = DEFAULT_NANNY_ENTITYSEARCH_SERVICE


class EntitySearchProvider(object):
    # Requirements
    MIN_RAM = 170 * 1024  # 170 GB
    DISK_SPACE_FOR_DATA = 100 * 1024  # 100 GB

    input_parameters = [
        EntitySearchBinary,
        EntitySearchConfig,
        EntitySearchData,
        Fresh,
        NerData,
        MainDbData,
        StartTimeoutParameter,
        UseMemoryMapping,
        TurnOffFetcher,
        DisableKvsaasMainDb,
        DebugToolParameter,
        UseGPerfToolsParameter,
        SanitizerType,
        ParentNannyService,
    ]

    cmd_tpl = '{binary_path} --data {data_path} --fresh {fresh_path} --ner {ner_path} --cfg {cfg_path}' \
              ' -p {port} --apphost-port {apphost_port} --apphost-threads {apphost_threads}'

    # part of prepare_script from nanny service
    prepare_tpl = '{hook_notifier_path} prepare_entitysearch_data_dir --shard-dest-dir {data_path} '\
                  '--shard-data-resource-dir {data_resource_path} --main-db-resource-dir {main_db_resource_path}'

    def __init__(
        self,
        build_entitysearch_id=None,
        fresh_id=None,
        ner_id=None,
        data_id=None,
        binary_id=None,
        config_id=None,
        main_db_id=None,
        custom_cmd_tpl=None,
        debug_tool=DebugToolParameter.NONE,
        use_gperftools=False,
        sanitizer_type=SanitizerType.NONE,
        start_timeout=10*60,
        use_mmap=False,
        turn_off_fetcher=False,
        disable_kvsaas_main_db=DisableKvsaasMainDb.default_value,
        parent_nanny_service=None,
        nanny_token=None,
    ):
        self.port = 8895
        self.apphost_port = 8896
        self.apphost_threads = 4
        self.host = 'localhost'
        self.start_timeout = start_timeout
        self.load_time = None

        self.build = build_entitysearch_id
        self.fresh = fresh_id if fresh_id else WizardBuilder.resource_from_task(
            self.build,
            es_resource_types.ENTITY_SEARCH_FRESH,
            None
        )
        self.ner = ner_id if ner_id else WizardBuilder.resource_from_task(
            self.build,
            es_resource_types.ENTITY_SEARCH_NER_DATA,
            None
        )
        self.binary = binary_id if binary_id else WizardBuilder.resource_from_task(
            self.build,
            es_resource_types.ENTITY_SEARCH_EXECUTABLE
        )
        self.data = data_id if data_id else WizardBuilder.resource_from_task(
            self.build,
            es_resource_types.ENTITY_SEARCH_DATA,
            None
        )
        self.config = config_id if config_id else WizardBuilder.resource_from_task(
            self.build,
            es_resource_types.ENTITY_SEARCH_CONFIG,
            None
        )

        self.find_main_db_resources(main_db_id, nanny_token, parent_nanny_service)

        if not self.fresh:
            raise Exception('No fresh specified')

        if not self.ner:
            raise Exception('No ner specified')

        if not self.binary:
            raise Exception('No binary specified')

        if not self.data:
            raise Exception('No data specified')

        if not self.config:
            raise Exception('No config specified')

        self.resources_path = make_folder('resources')
        self.binary_path = None
        self.fresh_path = None
        self.ner_path = None
        self.data_path = None
        self.config_path = None

        self.process = None

        if custom_cmd_tpl:
            self.cmd_tpl = custom_cmd_tpl

        self.use_mmap = use_mmap
        self.turn_off_fetcher = turn_off_fetcher
        self.disable_kvsaas_main_db = disable_kvsaas_main_db

        self.debug_tool = debug_tool
        self.use_gperftools = use_gperftools
        self.sanitizer_type = sanitizer_type

        if self.debug_tool == DebugToolParameter.VALGRIND:
            valgrind = ValgrindEnvironment()
            valgrind.prepare()

        self.cmd = self.get_run_cmd()

    def find_main_db_resources(self, main_db_id, nanny_token, parent_nanny_service):
        self.main_db = None
        self.hook_notifier = None

        self.main_db_path = None
        self.hook_notifier_path = None

        if main_db_id:
            self.main_db = main_db_id
        else:
            self.main_db = get_current_production_resource_id(
            parent_nanny_service,
            es_resource_types.ENTITY_SEARCH_CONVERT_DB_ARCHIVE,
            nanny_token
        )

        # es hook notifier and fs setup is required
        self.hook_notifier = get_current_production_resource_id(
            parent_nanny_service,
            es_resource_types.ENTITY_SEARCH_HOOK_NOTIFIER,
            nanny_token
        )

    @staticmethod
    def from_task_context(ctx, nanny_token=None):
        return EntitySearchProvider(
            build_entitysearch_id=None,
            fresh_id=ctx.get(Fresh.name),
            ner_id=ctx.get(NerData.name),
            data_id=ctx.get(EntitySearchData.name),
            binary_id=ctx.get(EntitySearchBinary.name),
            config_id=ctx.get(EntitySearchConfig.name),
            main_db_id=ctx.get(MainDbData.name),
            debug_tool=ctx.get(DebugToolParameter.name, DebugToolParameter.NONE),
            use_gperftools=ctx.get(UseGPerfToolsParameter.name, False),
            start_timeout=ctx[StartTimeoutParameter.name],
            use_mmap=ctx[UseMemoryMapping.name],
            turn_off_fetcher=ctx[TurnOffFetcher.name],
            disable_kvsaas_main_db=ctx[DisableKvsaasMainDb.name],
            sanitizer_type=ctx.get(SanitizerType.name, SanitizerType.NONE),
            parent_nanny_service=ctx.get(ParentNannyService.name),
            nanny_token=nanny_token,
            custom_cmd_tpl=ctx.get('custom_cmd')
        )

    def get_memory(self):
        return psutil.Process(self.process.pid).get_memory_info().rss

    def get_binary(self):
        logging.info('syncing entitysearch binary resource #{}'.format(self.binary))
        self.binary_path = channel.task.sync_resource(self.binary)
        logging.info('synced entitysearch binary {}'.format(self.binary_path))

    def get_config(self):
        logging.info('syncing entitysearch config resource #{}'.format(self.config))
        self.config_path = channel.task.sync_resource(self.config)
        logging.info('synced entitysearch config {}'.format(self.config_path))
        if self.turn_off_fetcher:
            original_config_path = self.config_path
            self.config_path = 'config.cfg'
            with open(original_config_path) as original_config:
                with open(self.config_path, 'w') as config:
                    config_data = original_config.read()
                    config_data = re.sub(
                        '(UseFetcher|FetchSaas|FetcherIsEnabled)\\s*=\\s*true', '\\1 = false',
                        config_data
                    )
                    config.write(config_data)

    def get_fresh(self):
        logging.info('syncing entitysearch fresh resource #{}'.format(self.fresh))
        fresh_path = channel.task.sync_resource(self.fresh)
        self.fresh_path = fresh_path
        logging.info('synced entitysearch fresh {}'.format(self.fresh_path))

    def get_ner(self):
        logging.info('syncing entitysearch ner resource #{}'.format(self.ner))
        ner_path = channel.task.sync_resource(self.ner)
        self.ner_path = ner_path
        logging.info('synced entitysearch ner {}'.format(self.ner_path))

    def get_data(self):
        logging.info('syncing entitysearch data resource #{}'.format(self.data))
        self.data_path = channel.task.sync_resource(self.data)
        logging.info('synced entitysearch data {}'.format(self.data_path))

    def get_main_db(self):
        if self.main_db is not None:
            logging.info('syncing entitysearch main_db resource #{}'.format(self.main_db))
            self.main_db_path = channel.task.sync_resource(self.main_db)
            logging.info('synced entitysearch main_db {}'.format(self.main_db_path))

    def get_hook_notifier(self):
        if self.hook_notifier is not None:
            logging.info('syncing entitysearch hook_notifier resource #{}'.format(self.hook_notifier))
            self.hook_notifier_path = channel.task.sync_resource(self.hook_notifier)
            logging.info('synced entitysearch hook_notifier {}'.format(self.hook_notifier_path))

    def get_resources(self, use_threads=True):
        actions = [
            self.get_binary,
            self.get_config,
            self.get_fresh,
            self.get_ner,
            self.get_data,
            self.get_main_db,
            self.get_hook_notifier,
        ]
        if use_threads:
            emanager = ExceptionManager()
            threads = [threading.Thread(target=ExceptionManager.run_target, args=(emanager, action)) for action in actions]
            for thread in threads:
                thread.start()
            for thread in threads:
                thread.join()
            emanager.check_exceptions()
        else:
            for action in actions:
                action()

    def get_run_cmd(self):
        logging.info('syncing resources ...')
        self.get_resources()
        logging.info('done syncing resources ...')

        if self.main_db_path is not None:
            logging.info('preparing resources on fs ...')
            cmd_prepare = self.make_prepare_cmd()
            run_process(cmd_prepare.split(), outputs_to_one_file=False, wait=True, log_prefix='hook_notifier_prepare_data')
            logging.info('done preparing resources on fs ...')

        cmd = self.cmd_tpl.format(
            binary_path=self.binary_path,
            data_path=self.data_path,
            fresh_path=self.fresh_path,
            ner_path=self.ner_path,
            cfg_path=self.config_path,
            port=self.port,
            apphost_port=self.apphost_port,
            apphost_threads=self.apphost_threads,
        )

        if self.use_mmap:
            cmd += ' --mmap'

        if self.disable_kvsaas_main_db:
            cmd += ' --disable-kvsaas-main-db'

        if self.debug_tool == DebugToolParameter.VALGRIND:
            cmd = 'valgrind --leak-check=full --undef-value-errors=yes --num-callers=100 --error-exitcode=1 ' + cmd

        return cmd

    def make_prepare_cmd(self):
        linked_data_path = os.path.abspath('./linked_data_path')

        result = self.prepare_tpl.format(
            hook_notifier_path=self.hook_notifier_path,
            data_path=linked_data_path,
            data_resource_path=self.data_path,
            main_db_resource_path=self.main_db_path
        )

        # a bit hacky, but whatever
        self.data_resource_path = self.data_path
        self.data_path = linked_data_path

        return result

    def get_load_time(self):
        return self.load_time

    def start(self):
        logging.info('starting entitysearch')
        environment = dict()
        if self.debug_tool == DebugToolParameter.PROFILER:
            if self.use_gperftools:
                env_upd = profiler.get_profiler_environment(
                    use_gperftools=self.use_gperftools,
                    executable_path=self.binary_path,
                    session_name='entitysearch'
                )
            else:
                env_upd = profiler.get_profiler_environment()

            environment.update(env_upd)

        if self.debug_tool == DebugToolParameter.SANITIZER:
            environment.update(sanitizer.get_sanitizer_environment())

        if not self.alive():
            start_time = time.time()
            self.process = run_process(self.cmd.split(), outputs_to_one_file=False, wait=False, log_prefix='entitysearch', environment=environment)
            logging.info('===waiting for entitysearch to start %s:%s' % (socket.gethostname(), self.port))
            wait_searcher_start('localhost', self.port, subproc_list=[self.process], timeout=self.start_timeout)
            wait_searcher_start('localhost', self.apphost_port, subproc_list=[self.process], timeout=self.start_timeout)
            self.load_time = time.time() - start_time
        else:
            logging.info('steel alive')

        version_to_log = json.dumps(self.get_version(), indent=4, ensure_ascii=False)
        logging.info('===backend version:\n' + version_to_log)

    def get_version(self):
        resp = requests.get('http://localhost:{port}/v'.format(port=self.port))
        resp.raise_for_status()

        return resp.json()

    def wait_process(self, timeout=180):
        for i in range(timeout):
            if self.process.poll() is not None:
                break
            time.sleep(1)

        if self.process.poll() is None:
            logging.info('killing process %s', self.process.pid)
            os.kill(self.process.pid, 6)
            time.sleep(10)

    def stop(self):
        logging.info('stopping entitysearch')
        pid = self.process.pid
        if not self.alive():
            raise SandboxTaskFailureError('entitysearch is dead')
        else:
            urllib2.urlopen('http://localhost:%s/admin?action=shutdown' % self.port, timeout=5).read()
            self.wait_process()
            logging.info('Process return code: {}'.format(self.process.poll()))
            self.process = None

        if self.debug_tool == DebugToolParameter.PROFILER:
            profiler.read_profile_data(
                executable_path=self.binary_path,
                pid=pid,
                session_name='entitysearch',
                use_gperftools=self.use_gperftools
            )

        # PGO
        if os.path.exists('default.profraw'):
            channel.task.create_resource('default.profraw', 'default.profraw', resource_types.PGO_PROFILE_RESOURCE)

    def alive(self):
        return self.process and self.process.poll() is None

    def __enter__(self):
        self.start()
        return self

    def __exit__(self, type, value, traceback):
        self.stop()
