# -*- coding: utf-8 -*-

import json
import logging
import random
import urllib2
import os
import subprocess

from sandbox.sandboxsdk.ssh import Key
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk import process
from sandbox.sandboxsdk.parameters import SandboxStringParameter, SandboxIntegerParameter, ResourceSelector

from sandbox.projects import resource_types

from sandbox.projects.common.wizard.log_splitter import CgiParamLogSplitter
from sandbox.projects.common.apihelpers import get_last_released_resource

MIN_DISK_SPACE_FOR_LOGS = 100 * 1024  # 100 GB


class Host(SandboxStringParameter):
    name = 'hostname'
    description = 'hostname'


class Port(SandboxIntegerParameter):
    name = 'port'
    description = 'port'


class RequestsLimit(SandboxIntegerParameter):
    name = 'req_limit'
    description = 'max requests to store (0 = unlimit)'
    default_value = 300000


class EvlogDumpExecutable(ResourceSelector):
    resource_type = resource_types.EVLOGDUMP_EXECUTABLE
    name = 'evlogdump_executable'
    description = 'evlogdump executable. Current production if not specified'


class EntitySearchLogs(SandboxTask):
    """
        Вытаскивает логи в текстовом виде из продакшен-хоста
    """
    type = 'ENTITYSEARCH_LOGS'

    input_parameters = (Host, Port, RequestsLimit, EvlogDumpExecutable)

    execution_space = MIN_DISK_SPACE_FOR_LOGS

    @staticmethod
    def get_host_and_port():
        host = None
        port = None
        data = urllib2.urlopen('https://nanny.yandex-team.ru/v2/services/sas-production-entitysearch-yp/current_state/instances/').read()
        if data:
            dataJson = json.loads(data)
            if 'result' in dataJson:
                if dataJson['result']:
                    choice = random.choice(dataJson['result'])
                    host = choice['container_hostname']
                    port = choice['port']
        return host, port

    @staticmethod
    def process_requests_from_dumped_logs(dumped_log, splitter):
        import lib.filter

        last_frame = None
        for line in open(dumped_log):
            if 'ContextCreated' in line or 'ReqWizardRequestReceived' in line:

                parts = line.split('\t')
                if 'ReqWizardRequestReceived' in line:
                    if 'apphost' in parts[-1]:
                        continue # cancel apphost-context

                frame = parts[1]
                if last_frame is not None and frame == last_frame:
                    continue
                last_frame = frame

                request = parts[3].replace('\n', '')

                if request.startswith('/admin') or request.startswith('/shutdown'):
                    continue

                is_apphost_request = 'ContextCreated' in line
                if is_apphost_request:
                    if not request.startswith('/search?'):
                        request = '/search?' + request

                if ' POST ' in request:
                    request = request.replace(' POST ', '&text=')

                splitter.process_request(lib.filter.remove_experiment_flags(request))

    def get_evlogdump(self):
        logging.info('Getting evlogdump....')
        evlogdump_id = self.ctx[EvlogDumpExecutable.name]
        if not evlogdump_id:
            evlogdump_id = get_last_released_resource(resource_type=resource_types.EVLOGDUMP_EXECUTABLE).id
        self.ctx['use_evlogdump'] = evlogdump_id
        return self.sync_resource(evlogdump_id)

    def get_eventlog(self, remote_name, local_name):
        logging.info('rsync {} logs from {}', remote_name, self.ctx[Host.name])
        try:
            cmd_tpl = 'rsync -avz -e "ssh -l  \'//user:robot-ontodb\'" {host}:/logs/{remote_name} {local_name}'
            cmd = cmd_tpl.format(host=self.ctx[Host.name], remote_name=remote_name, local_name=local_name)
            with Key(self, 'robot-ontodb', 'robot-ontodb-ssh-key'):
                output = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
            logging.info('Rsync output: {}'.format(output))
            return local_name
        except Exception as expt:
            logging.error('Failed to download eventlog {}: {}'.format(remote_name, str(expt)))

    @staticmethod
    def is_splitter_full(splitter, limit):
        if not limit:
            return False
        for _, f in splitter.splitted_files.items():
            if f.requests_written < limit:
                return False
        return True

    def patch_attributes(self, attributes):
        # Write additional attributes for easy matching logs in test environment
        has_client = 'client' in attributes
        client = attributes.get('client')
        lines_str = attributes.get('lines')
        if not lines_str:
            return

        lines = int(lines_str)
        if lines >= 10000:  # don't export small logs to testenv tests
            if has_client:
                attributes['entitysearch_test_requests_for_client'] = client
            else:
                attributes['entitysearch_test_requests_all_clients'] = '1'

    def export_resources(self, splitter):
        splitter.close()
        for params, f in splitter.splitted_files.items():
            name, cnt = f.name, f.requests_written
            attributes = {'entitysearch_requests': '1', 'lines': str(cnt)}
            if params:
                param, value = params
                attributes[param] = value

            self.patch_attributes(attributes)

            self.create_resource(
                'entitysearch requests ({}) from {} ({} lines)'.format(os.path.basename(name), self.ctx[Host.name], cnt),
                name,
                'PLAIN_TEXT_QUERIES',
                attributes=attributes
            )

    def extract_logs(self, evlogdump):
        requests_base_path = self.abs_path('requests')
        limit = self.ctx[RequestsLimit.name]
        if not limit:
            limit = None
        splitter = CgiParamLogSplitter(requests_base_path, [None, 'client'], limit)

        logging.info('Dumping logs')
        logs = (
            ('eventlog-wizard-{}.PREV'.format(self.ctx[Port.name]), 'prev-eventlog'),
            ('current-eventlog-wizard-{}'.format(self.ctx[Port.name]), 'current-eventlog')
        )

        processed_some = False
        for remote_name, local_name in logs:
            logging.info('Processing log {}'.format(local_name))
            log = self.get_eventlog(remote_name, local_name)
            if not log:
                continue
            dumped_eventlog = self.abs_path('{}.txt'.format(log))
            evlogdump_cmd = "{} {} > {}".format(evlogdump, log, dumped_eventlog)
            process.run_process(evlogdump_cmd, shell=True, log_prefix='log-dump.{}'.format(log))

            EntitySearchLogs.process_requests_from_dumped_logs(dumped_eventlog, splitter)

            processed_some = True
            if EntitySearchLogs.is_splitter_full(splitter, limit):
                break

        if not processed_some:
            raise Exception('Failed to process any logs')

        self.export_resources(splitter)

    def on_execute(self):
        if not self.ctx[Host.name] or not self.ctx[Port.name]:
            self.ctx[Host.name], self.ctx[Port.name] = self.get_host_and_port()

        evlogdump = self.get_evlogdump()
        self.extract_logs(evlogdump)


__Task__ = EntitySearchLogs
