# -*- coding: utf-8 -*-

import os
import logging
import datetime
import json
import time
import urlparse
import hashlib

from sandbox import sdk2

from sandbox.common.fs import read_file
from sandbox.common.types import task as ctt, resource as ctr, misc as ctm

from sandbox.projects.common.yasm import push_api

from sandbox.projects.sandbox_ci import parameters
from sandbox.projects.sandbox_ci.task import ManagersTaskMixin
from sandbox.projects.sandbox_ci.utils import flow
from sandbox.projects.sandbox_ci.utils.context import Node
from sandbox.projects.sandbox_ci.utils.process import run_process
from sandbox.projects.sandbox_ci.utils import env
from sandbox.projects.sandbox_ci.utils import request


REPORT_RESOURCE_TYPES = (
    'SANDBOX_CI_ARTIFACT',
)
MAX_PARALLEL = 20
MAX_REPORTS_AT_ONCE = 500
CLICKHOUSE_HOSTS = ','.join((
    'vla-4729cn2jora9xcyd.db.yandex.net',
    'vla-oipzzetwt0vdc25w.db.yandex.net',
    'sas-o9rluglvdi19ohfm.db.yandex.net'
))
CLICKHOUSE_USER = 'testcopadmin'
CLICKHOUSE_DATABASE = 'testcop'


class UploadResult(object):
    pass


class UploadSuccessResult(UploadResult):
    pass


class UploadSkippedResult(UploadResult):
    pass


class UploadErrorResult(UploadResult):
    def __init__(self, error):
        self.error = error


class ReportFetcher(object):
    def __init__(self, report, base_path):
        """
        :param report: report resource
        :type report: sdk2.Resource
        :param base_path: base path for downloaded resources
        :type base_path: str
        """
        self.report = report
        self.base_path = base_path

    def fetch_database_urls(self):
        """
        Returns paths/urls of the report's databases.

        :rtype: list of str
        """
        # Check `report_data_urls` attribute first to skip resource syncing.
        if getattr(self.report, 'report_data_urls', None):
            return self.report.report_data_urls.split('\n')

        report_path = sdk2.ResourceData(self.report).path
        db_urls_path = str(report_path / 'databaseUrls.json')

        if not os.path.isfile(db_urls_path):
            return []

        return json.loads(read_file(db_urls_path))['dbUrls']

    def fetch_databases(self):
        """
        Returns paths of synced/downloaded databases.

        :rtype: list of str
        """
        return map(self._fetch_database, self.fetch_database_urls())

    def _fetch_database(self, uri):
        """
        :param uri: database URI (URL or path, relative to the report resource's root)
        :type uri: str
        :rtype: str
        """
        if urlparse.urlparse(uri).scheme in ('http', 'https'):
            return self._download_database(uri)

        return self._get_database_from_resource(uri)

    def _download_database(self, url):
        """
        :param path: database URL
        :type path: str
        :rtype: str
        """
        db_filename = '{}_{}'.format(self.report.id, hashlib.md5(url).hexdigest())
        db_path = os.path.join(self.base_path, db_filename)
        db_content = request.send_request('get', url).content
        open(db_path, 'wb').write(db_content)

        return db_path

    def _get_database_from_resource(self, path):
        """
        :param path: database path
        :type path: str
        :rtype: str
        """
        return str(sdk2.ResourceData(self.report).path / path)


def load_all_in_chunks(request, chunk_size=250):
    limited_req = request.limit(chunk_size)
    first = limited_req.offset(0)
    result = list(first)
    count = min(MAX_REPORTS_AT_ONCE, first.count)

    for offset in range(chunk_size, count, chunk_size):
        result += limited_req.offset(offset)

    return result


class SandboxCiHermioneReportCollector(ManagersTaskMixin, sdk2.Task):
    class Requirements(sdk2.Requirements):
        dns = ctm.DnsType.DNS64
        disk_space = 10 * 1024
        cores = 8

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Parameters):
        with sdk2.parameters.Group('Advanced'):
            _container = parameters.environment_container()

        node_js = parameters.NodeJsParameters

    lifecycle_steps = {
        'npm_install': 'npm i --production --registry=http://npm.yandex-team.ru @yandex-int/messier@0.5.0',
    }

    # Lifecycle manager requires the following props/methods: `project_name`, `project_dir`, `working_path`, `project_path`.
    @property
    def project_name(self):
        return ''

    @property
    def project_dir(self):
        return self.working_path()

    def working_path(self, *args):
        return self.path(*args)

    def project_path(self, *args):
        return self.project_dir.joinpath(*args)

    def on_execute(self):
        self._init_environ()

        started_at = time.time()

        report_resources = self._prepare_report_resources()
        if not report_resources:
            logging.info('No suitable resources found')
            return

        logging.info('Found {} report resources'.format(len(report_resources)))

        with Node(self.Parameters.node_js_version):
            # Prevent cache collisions on npx call.
            self.lifecycle('npm_install')

            results = flow.parallel(self._safe_upload_report, report_resources, MAX_PARALLEL)

        self.Context.reports = len(report_resources)
        self.Context.upload_success = len(filter(lambda r: type(r) is UploadSuccessResult, results))
        self.Context.upload_errors = len(filter(lambda r: type(r) is UploadErrorResult, results))
        self.Context.execution_time = int(time.time() - started_at)

    def on_finish(self, prev_status, status):
        self._send_signals(status)

    def on_break(self, prev_status, status):
        self._send_signals(status)

    def _prepare_report_resources(self):
        """
        :rtype: list of sdk2.Resource
        """
        query = dict(
            type=REPORT_RESOURCE_TYPES,
            state=ctr.State.READY,
            attrs=dict(collect_stats='true'),
        )
        last_processed_resource = self._get_last_processed_resource()

        if last_processed_resource:
            query['created'] = '{}..{}'.format(
                last_processed_resource.created.isoformat(),
                datetime.datetime.utcnow().isoformat(),
            )

        resources = load_all_in_chunks(sdk2.Resource.find(**query))

        if last_processed_resource:
            resources = filter(lambda r: r.id != last_processed_resource.id, resources)

        if resources:
            self._store_last_processed_resource(resources)
        elif last_processed_resource:
            self._store_last_processed_resource([last_processed_resource])

        return resources

    def _safe_upload_report(self, report):
        """
        :param report: sdk2.Resource
        :rtype: UploadResult
        """
        try:
            return self._upload_report(report)
        except Exception as e:
            logging.exception('Error occurred while processing report resource #%s', report.id)

            return UploadErrorResult(e)

    def _upload_report(self, report):
        """
        :param report: sdk2.Resource
        :rtype: UploadResult
        """
        logging.info('Process report resource #%s', report.id)

        if getattr(report, 'report_description', None) == 'merged':
            logging.info('Skip merged report resource #%s', report.id)
            return UploadSkippedResult()

        fetcher = ReportFetcher(report, base_path=str(self.working_path()))
        db_paths = fetcher.fetch_databases()

        if not db_paths:
            logging.info('Skip report resource #%s without databases', report.id)
            return UploadSkippedResult()

        logging.info('Processing report resource #%s database', report.id)
        run_process([
            'npx',
            '@yandex-int/messier',
            'upload',
            '--reports={}'.format(' '.join(db_paths)),
            '--report-url=https://proxy.sandbox.yandex-team.ru/{}/index.html'.format(report.id),
            '--sandbox-task-id={}'.format(getattr(report, 'sandbox_task_id', getattr(report, 'build_id', report.task_id))),
            '--project={}'.format(report.project),
            '--tool={}'.format(report.tool),
            '--build-context={}'.format(report.build_context),
            '--commit-sha={}'.format(report.commit_sha),
            '--branch={}'.format(report.branch),
        ])

        return UploadSuccessResult()

    def _init_environ(self):
        vault_env = self.vault.read_all_env()

        env.export(dict(
            CLICKHOUSE_HOSTS=CLICKHOUSE_HOSTS,
            CLICKHOUSE_USER=CLICKHOUSE_USER,
            CLICKHOUSE_DATABASE=CLICKHOUSE_DATABASE,
            CLICKHOUSE_PASSWORD=vault_env['MESSIER_CLICKHOUSE_PASSWORD'],
        ))

        env.log(self)

    def _store_last_processed_resource(self, resources):
        """
        :param resources: resources
        :type resources: list of sdk2.Resource
        """
        self.Context.last_resource_id = max(map(int, resources))

    def _get_last_processed_resource(self):
        """
        :rtype: sdk2.Resource or None
        """
        task = SandboxCiHermioneReportCollector.find(
            status=ctt.Status.SUCCESS,
        ).order(-sdk2.Task.id).first()

        if not task or type(task.Context.last_resource_id) is not int:
            return None

        return sdk2.Resource.find(id=task.Context.last_resource_id).first()

    def _send_signals(self, status):
        try:
            def get_ctx_attr(attr, default_value):
                value = getattr(self.Context, attr)
                return default_value if value is ctm.NotExists else (value or default_value)

            push_api.push_signals(
                signals={
                    'status_{}_mmmm'.format(status.lower()): 1,
                    'reports_mmmm': get_ctx_attr('reports', 0),
                    'upload_success_mmmm': get_ctx_attr('upload_success', 0),
                    'upload_errors_mmmm': get_ctx_attr('upload_errors', 0),
                    'execution_time_min_hgram': [get_ctx_attr('execution_time', 0) / 60, 1],
                },
                tags={
                    'itype': 'sandboxci',
                    'prj': 'sandboxci',
                    'build_context': 'default',
                    'service_name': 'hermione_report_collector',
                    'task_type': str(self.type),
                }
            )
        except Exception:
            logging.exception('Exception while sending status signal to yasm')
