# -*- coding: utf-8 -*-

import json
import logging
import os
import psutil
import re
import subprocess
import time

from sandbox import sdk2
from sandbox.projects.common import link_builder as lb
from sandbox.sandboxsdk import environments as env
from sandbox.sandboxsdk import errors
from sandbox.sandboxsdk import process
from sandbox.sdk2.vcs.svn import Arcadia

from sandbox.projects.websearch.CheckPrechargeAfterMemoryMap.components.noapache import Noapache


COMPONENTS = {
    Noapache.name(): Noapache,
}

FIND_MMAP_NOPRECHARGE_SCRIPT = os.path.join(os.path.dirname(__file__), 'find_mmap_noprecharge.gdb')
GET_IO_COUNTERS_SCRIPT = os.path.join(os.path.dirname(__file__), 'io_counters.py')
FIND_MMAP_NOPRECHARGE_SCRIPT_PATCHED = 'find_mmap_noprecharge.gdb.patched'


class CheckPrechargeAfterMemoryMap(sdk2.Task):
    """
        1. Checks whether files are loaded into memory
        right after they have been mmapped at the start of search component.
        2. Checks whether the component reads from disk at first request.
        Binary must be built with --build=debug.

        (NOAPACHE-50, SPI-1240, SPI-4253)
    """

    class Parameters(sdk2.Task.Parameters):
        read_at_first_request_limit = sdk2.parameters.Integer(
            "Maximum acceptable number of bytes that component can read from disk at first request"
        )

        with sdk2.parameters.String("Component") as component:
            for component_name in COMPONENTS.keys():
                component.values[component_name] = component.Value(component_name)

        for name, desc in COMPONENTS.items():
            with component.value[name]:
                component_params = desc.Parameters()

        with sdk2.parameters.Output:
            gdb_output = sdk2.parameters.Resource('GDB output', resource_type=sdk2.service_resources.TaskCustomLogs)

    class Context(sdk2.Task.Context):
        has_errors = False

    def _get_precharged_addresses(self, gdb_output):
        gdb_output_lines = gdb_output.path.open('r').read().split('\n')
        addresses = []
        for line in gdb_output_lines:
            match = re.search('(?:Data: )(\w+)', line)
            if match is not None:
                addresses.append(match.group(1))
        return addresses

    def _get_mmapped_notprecharged_files(self, smaps, precharged_addresses, data_paths):
        result = []
        for smap in smaps:
            for data_path in data_paths:
                if smap.path.startswith(data_path):
                    if smap.size != smap.rss and smap.addr.split('-')[0] not in precharged_addresses:
                        start_index = len(data_path) - len(os.path.split(os.path.normpath(data_path))[1])
                        result.append({
                            'path': smap.path[start_index:],
                            'size': smap.size,
                            'rss': smap.rss,
                        })
                    break
        return result

    def _get_io_counters(self, proc):
        with env.VirtualEnvironment() as venv:
            env.PipEnvironment('psutil', version="5.4.8", venv=venv, use_wheel=True).prepare()
            p = process.run_process(
                [venv.executable, GET_IO_COUNTERS_SCRIPT, '-p', str(proc.pid)],
                outs_to_pipe=True
            )
            io_counters = json.loads(p.communicate()[0])
            return io_counters

    def on_execute(self):
        port = 1337
        component = COMPONENTS[self.Parameters.component](self.Parameters, port)

        Arcadia.export(url='arcadia:/arc/trunk/arcadia/ya', path='ya')
        gdb_script = open(FIND_MMAP_NOPRECHARGE_SCRIPT, 'r').read().format(
            args=' '.join(component.execution_arguments()),
            stdout='{}.out'.format(component.name()), stderr='{}.err'.format(component.name()),
        )
        open(FIND_MMAP_NOPRECHARGE_SCRIPT_PATCHED, 'w').write(gdb_script)

        cmd = [
            './ya', 'tool', 'gdb',
            '-x', FIND_MMAP_NOPRECHARGE_SCRIPT_PATCHED,
            '--silent', component.binary_path(),
        ]

        self.Parameters.gdb_output = sdk2.service_resources.TaskCustomLogs(self, self.Parameters.description, 'gdb_output', ttl=3)
        with self.Parameters.gdb_output.path.open('w') as out:
            gdb_process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=out, stderr=subprocess.STDOUT)
            gdb_process.stdin.close()

            while not component.ready_for_first_request():
                if gdb_process.poll() is not None:
                    raise errors.SandboxTaskFailureError('Failed to start {}'.format(component.name()))
                time.sleep(5)

            component_process = filter(
                lambda proc: proc.name == os.path.basename(component.binary_path()),
                psutil.Process(gdb_process.pid).get_children()
            )[0]
            logging.info(open('/proc/{pid}/smaps'.format(pid=component_process.pid), 'r').read())
            smaps = component_process.get_memory_maps(grouped=False)
            self.Context.io_counters_before_request = self._get_io_counters(component_process)

            component.send_request()
            self.Context.io_counters_after_request = self._get_io_counters(component_process)

            component.stop()
            gdb_process.wait()
            out.close()

        self.Context.read_chars = self.Context.io_counters_after_request['read_chars'] - self.Context.io_counters_before_request['read_chars']
        if self.Context.read_chars > self.Parameters.read_at_first_request_limit:
            self.set_info(
                "Disk read limit exceeded on first request:\n"
                "I/O counters before request: {}\n"
                "I/O counters after request: {}\n"
                "Read bytes: {}\n".format(
                    self.Context.io_counters_before_request, self.Context.io_counters_after_request, self.Context.read_chars
                )
            )
            self.Context.has_errors = True

        precharged_addresses = self._get_precharged_addresses(self.Parameters.gdb_output)
        files_info = self._get_mmapped_notprecharged_files(smaps, precharged_addresses, component.data_paths())
        if files_info:
            file_list = '\n'.join(map(
                lambda file_info: '{}: Size {:.1f}Kb Rss {:.1f}Kb'.format(
                    file_info['path'], file_info['size'] / 1024., file_info['rss'] / 1024.
                ), files_info
            ))
            self.set_info(
                "Found mmapped files without further precharging:\n"
                "{}\n"
                "See {} for more information".format(file_list, lb.resource_link(self.Parameters.gdb_output.id, 'backtraces')),
                do_escape=False
            )
            self.Context.has_errors = True

        if self.Context.has_errors:
            raise errors.SandboxTaskFailureError
