# -*- coding: utf-8 -*-

import datetime
import logging
import os
import tempfile
import time

from sandbox import sdk2
from sandbox.sdk2.helpers import process
from sandbox.projects import resource_types
from sandbox.projects.common.arcadia import sdk as arcadiasdk
from sandbox.projects.common.constants import constants as sdk_constants


def build_project_from_trunk(target, output_directory):
    logging.info('Building {} into {}'.format(target, output_directory))
    with arcadiasdk.mount_arc_path(sdk2.svn.Arcadia.ARCADIA_TRUNK_URL) as arc_root:
        arcadiasdk.do_build(
            build_system=sdk_constants.DISTBUILD_BUILD_SYSTEM,
            source_root=arc_root,
            targets=[target],
            results_dir=output_directory,
            clear_build=False,
        )
    logging.info('Finished building ' + target)


class MakeSamohodQueries(sdk2.Task):
    """
    Task assumptions:
    - eventlog table name looks like YYYYMMDD
    - MakeSamohodQueries runs in the same time zone as eventlog dumper
    """

    parse_int_eventlog_util = 'parse_int_eventlog'
    event_log_delivery_delay = 7200

    class Requirements(sdk2.Requirements):
        cores = 1
        ram = 6 * 1024
        disk_space = 10 * 1024

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Parameters):
        yt_proxy = sdk2.parameters.String('YT proxy', required=True)
        event_log_table = sdk2.parameters.String('Eventlog', description='path to eventlog table on YT', required=True)
        save_yt_output = sdk2.parameters.String('Tmp table', description='path to temporary YT table (can be empty)')
        n_queries = sdk2.parameters.Integer('N queries', description='number of queries needed (of each type)', default=1000)
        include_re = sdk2.parameters.List('Include regexp', description='grep -P')
        exclude_re = sdk2.parameters.List('Exclude regexp', description='grep -P -v')
        auto_sample = sdk2.parameters.Bool('auto sample')
        exact_sample = sdk2.parameters.Float('exact sample factor', default=1.0)
        timestamp_offset = sdk2.parameters.Float('timestamp offset (hours)', description='timestamp offset relative to rtyserver backup timestamp, or to 3:00AM today', default=0)
        timestamp_interval = sdk2.parameters.Float('timestamp interval (hours)', default=3600)
        search_rtyserver_backup = sdk2.parameters.Bool('search rtyserver backup', default=True, description='if set then timestamp offset is relative to the latest backup')
        rtyserver_service = sdk2.parameters.String('rtyserver service name')
        rtyserver_shard = sdk2.parameters.String('rtysever shard range')

    def on_execute(self):
        logging.info('Starting...')
        tmp_dir = tempfile.mkdtemp()
        build_project_from_trunk('saas/tools/' + self.parse_int_eventlog_util, tmp_dir)
        parse_int_eventlog_path = os.path.join(tmp_dir, 'saas/tools', self.parse_int_eventlog_util, self.parse_int_eventlog_util)

        logging.info('Calculate timestamp range')
        now = time.time()
        base_timestamp = (now // 86400) * 86400

        if self.Parameters.search_rtyserver_backup:
            if not self.Parameters.rtyserver_service:
                raise Exception('rtyserver service is not specified')
            if not self.Parameters.rtyserver_shard:
                raise Exception('rtyserver shard is not specified')
            db_resource = self.find_db_resource()
            base_timestamp = int(db_resource.detach_timestamp)
            logging.info('Found RTYSERVER_SEARCH_DATABASE id={}, ts={}'.format(db_resource.id, base_timestamp))

        min_timestamp = int(base_timestamp + self.Parameters.timestamp_offset * 3600)
        max_timestamp = int(min_timestamp + self.Parameters.timestamp_interval * 3600)
        if min_timestamp + self.event_log_delivery_delay > now:
            raise Exception('It is too early to get queries for {} timestamp'.format(min_timestamp))
        table_date = datetime.datetime.fromtimestamp(min_timestamp)
        logging.info('Timestamp range calculated: [{}, {})'.format(min_timestamp, max_timestamp))

        logging.info('Creating output resources...')
        search_queries = self.create_queries('search', 'Search stage requests to prod Samohod', min_timestamp, max_timestamp)
        factor_queries = self.create_queries('factor', 'Factor stage requests to prod Samohod', min_timestamp, max_timestamp)
        snippet_queries = self.create_queries('snippet', 'Snippet stage requests to prod Samohod', min_timestamp, max_timestamp)
        logging.info('Created output resources')

        cmd = [
            parse_int_eventlog_path,
            '--verbose',
            '--yt', self.Parameters.yt_proxy,
            '--eventlog', self.Parameters.event_log_table + table_date.strftime('/%Y%m%d'),
            '--save-yt-output', self.Parameters.save_yt_output,
            '--limit', str(self.Parameters.n_queries),
            '--sample', str(self.Parameters.exact_sample),
            '--min-timestamp', str(int(min_timestamp)),
            '--max-timestamp', str(int(max_timestamp)),
            '--search-queries', str(search_queries.path),
            '--factor-queries', str(factor_queries.path),
            '--snippet-queries', str(snippet_queries.path),
        ]
        if self.Parameters.auto_sample:
            cmd.append('--auto-sample')
        for re in self.Parameters.include_re:
            cmd += ['--include', re]
        for re in self.Parameters.exclude_re:
            cmd += ['--exclude', re]

        logging.info('Read YT token from the vault')
        os.environ['YT_TOKEN'] = sdk2.Vault.data('SAAS-ROBOT', 'YT_TOKEN_ARNOLD')
        logging.info('YT token successfully read from the vault')

        with sdk2.helpers.ProcessLog(self, logger=self.parse_int_eventlog_util) as pl:
            pl.logger.propagate = 1
            process.subprocess.check_call(cmd, stdout=pl.stdout, stderr=process.subprocess.STDOUT)
        logging.info('Finished parse_int_eventlog')

        search_queries.ready()
        factor_queries.ready()
        snippet_queries.ready()
        logging.info('Done.')

    def find_db_resource(self):
        logging.info('Searching for rtyserver backup, service={}, shard={}'.format(self.Parameters.rtyserver_service, self.Parameters.rtyserver_shard))
        return sdk2.Resource['RTYSERVER_SEARCH_DATABASE'].find(attrs=dict(service=self.Parameters.rtyserver_service, shard=self.Parameters.rtyserver_shard)).limit(1).first()

    def create_queries(self, search_stage, description, min_timestamp, max_timestamp):
        name = search_stage + '_queries'
        resource = resource_types.PLAIN_TEXT_QUERIES(
            self,
            description,
            path=str(self.path(name)),
            search_stage=search_stage,
            basesearch_type='samohod',
            min_timestamp=min_timestamp,
            max_timestamp=max_timestamp
        )
        return sdk2.ResourceData(resource)
