# -*- coding: utf-8 -*-

import os
import json
import logging
from datetime import date, timedelta

from sandbox import common
from sandbox import sdk2
from sandbox.projects.common import environments
from sandbox.sdk2.helpers import subprocess

from sandbox.projects.websearch.begemot import resources as br


class ExtractBegemotProductionRequests(sdk2.Task):
    """
    Extract production apphost requests to begemot/wizard from YT tables
    """
    class Requirements(sdk2.Requirements):
        environments = (environments.PipEnvironment('yandex-yt', version='0.10.8'))

    class Parameters(sdk2.Parameters):
        number_of_requests_to_extract = sdk2.parameters.Integer(
            'Number of requests to include into result', default=10000, required=True
        )

        with sdk2.parameters.RadioGroup('What to fetch') as what_to_fetch:
            what_to_fetch.values['wizard'] = what_to_fetch.Value('wizard', default=True)
            what_to_fetch.values['begemot_worker_bravo'] = what_to_fetch.Value('begemot_worker_bravo')
            what_to_fetch.values['plain_service'] = what_to_fetch.Value('plain_service')
            # Apphost dispatcher requests (plain json apphost input context)
            what_to_fetch.values['apphost_dispatcher_requests'] = what_to_fetch.Value('apphost_dispatcher_requests')

        with what_to_fetch.value['wizard']:
            yt_table_name_template = sdk2.parameters.String(
                'YT table name template',
                default='//home/begemot/eventlogdata/production_wizard_web/{date}'
            )

        with what_to_fetch.value['begemot_worker_bravo']:
            yt_table_name_template_2 = sdk2.parameters.String(
                'YT table name template',
                default='//home/begemot/eventlogdata/begemot_worker_bravo_production/{date}'
            )

        with what_to_fetch.value['wizard'], what_to_fetch.value['begemot_worker_bravo']:
            ah_converter_binary = sdk2.parameters.Resource(
                'Apphost request converter binary', resource_type=[br.BEGEMOT_AH_CONVERTER]
            )
            yt_proxy = sdk2.parameters.String(
                'YT Proxy', default='banach', required=True
            )

        with what_to_fetch.value['plain_service']:
            yt_table_name_template_3 = sdk2.parameters.String(
                'YT table name template',
                default='//home/apphost_event_log_filter/1h/{date}T{hour}:00:00.d/begemot-workers.at.revision/input-dumps_success'
            )
            hours_to_fetch = sdk2.parameters.String(
                'From which hours of day requests will be fetched (00,02,03-05,23)', default=['00-23'], required=True
            )

        with what_to_fetch.value['apphost_dispatcher_requests']:
            # web5 graph input context
            # May not have today and yesterday dumps
            yt_table_name_template_4 = sdk2.parameters.String(
                'YT table name template',
                default='//home/apphost_event_log_filter/1d/{date}.d/web5b/input-dumps_success'
            )

        with what_to_fetch.value['plain_service'], what_to_fetch.value['apphost_dispatcher_requests']:
            yt_proxy_2 = sdk2.parameters.String(
                'YT Proxy', default='hahn', required=True
            )

        yt_vault_token_owner = sdk2.parameters.String(
            'Vault owner for YT token', default='SEARCH-RELEASERS', required=True
        )
        yt_vault_token_name = sdk2.parameters.String(
            'Vault name for YT token', default='yt_token_for_testenv', required=True
        )
        fetch_yesterday_requests = sdk2.parameters.Bool(
            'Fetch yesterday requests table (default = today)', default=False, required=False
        )
        fetch_requests_for_a_specified_date = sdk2.parameters.String(
            'Fetch requests for a specified date (date must be in YYYY-MM-DD format)', required=False
        )
        extra_requests = sdk2.parameters.Resource(
            'Additional requests that will be added to extracted requests',
            resource_type=br.BEGEMOT_APPHOST_QUERIES
        )

    def _make_table_name(self, template, date_transform=None, **additional_kwargs):
        if self.Parameters.fetch_requests_for_a_specified_date:
            date_ = self.Parameters.fetch_requests_for_a_specified_date
        else:
            yesterday = self.Parameters.fetch_yesterday_requests
            date_ = str((date.today() - timedelta(1)) if yesterday else date.today())
        if date_transform:
            date_ = date_transform(date_)
        table_name = template.format(date=date_, **additional_kwargs)
        logging.info('Will fetch data from ' + table_name)
        return table_name

    def _prepare_yt_client(self, proxy):
        import yt.wrapper as yt
        return yt.YtClient(
            proxy=proxy,
            token=sdk2.Vault.data(self.Parameters.yt_vault_token_owner, name=self.Parameters.yt_vault_token_name)
        )

    def add_extra_requests(self, output):
        if self.Parameters.extra_requests:
            extra_requests_path = str(sdk2.ResourceData(self.Parameters.extra_requests).path)
            with open(extra_requests_path, 'r') as input_file:
                for line in input_file:
                    output.write(line + '\n')

    def ah_convert(self, ah_converter_path, requests, output, errors):
        args = [ah_converter_path, '-i', 'service_request']
        with open(requests, 'r') as input_file, open(output, 'w') as output_file, open(errors, 'w') as err_file:
            self.add_extra_requests(output_file)
            for request in input_file:
                p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                out, err = p.communicate(request.strip())
                if err:
                    err_file.write(err + '\n')
                    self.Context.lines_error += 1
                else:
                    try:
                        data = json.loads(out)['answers']
                    except Exception as e:
                        err_file.write('Python exception: {0.__class__.__name__}: {0}\n'.format(e))
                        self.Context.lines_error += 1
                    else:
                        json.dump(data, output_file)
                        output_file.write('\n')
                code = p.wait()
                if code != 0:
                    raise common.errors.TaskFailure('Apphost converter exited with code {}'.format(code))

    def bin_yt_convert_and_write(self, requests, output):
        errors = os.path.join(str(self.log_path()), 'apphost_converter.err.txt')
        ah_converter_path = str(sdk2.ResourceData(self.Parameters.ah_converter_binary).path)
        self.ah_convert(ah_converter_path, requests, output, errors)

    def bin_process_request_line(self, line):
        try:
            return line.split('\t')[0].strip()
        except IndexError:
            raise common.errors.TaskFailure('Error occurred during request extraction. Original field: %s', line)

    def bin_make_yt_table_name(self, yt_table_name_template):
        return self._make_table_name(yt_table_name_template, date_transform=lambda d: d.replace('-', ''))

    def bin_yt_read_data(self, output, yt_table_name_template, row_filter):
        client = self._prepare_yt_client(self.Parameters.yt_proxy)
        with open(output, 'w') as output_file:
            for row in client.read_table(self.bin_make_yt_table_name(yt_table_name_template), format='json'):
                if not row_filter(row):
                    continue
                line = self.bin_process_request_line(row['event_data'])
                if not line:
                    self.Context.lines_error += 1
                else:
                    output_file.write(line + '\n')
                    self.Context.lines_fetched += 1
                    if self.Context.lines_fetched >= self.Parameters.number_of_requests_to_extract:
                        return

    def bin_read_data(self, output, yt_table_name_template, row_filter):
        if self.Parameters.ah_converter_binary:
            temp_output = str(self.path('requests_tmp.txt'))
            self.bin_yt_read_data(temp_output, yt_table_name_template, row_filter)
            self.bin_yt_convert_and_write(requests=temp_output, output=output)
        else:
            self.bin_yt_read_data(output, yt_table_name_template, row_filter)

    @staticmethod
    def _parse_hours_to_fetch(hours):
        for i in hours.split(','):
            j = i.split('-', 2)
            if len(j) > 2:
                for hour in xrange(int(j[0]), int(j[1]) + 1):
                    yield '{:02d}'.format(hour)
            else:
                yield '{:02d}'.format(int(j[0]))

    def plain_make_yt_table_name(self, hour):
        return self._make_table_name(self.Parameters.yt_table_name_template_3, hour=hour)

    def plain_yt_read_data(self, output):
        client = self._prepare_yt_client(self.Parameters.yt_proxy_2)
        with open(output, mode='w') as output_file:
            self.add_extra_requests(output_file)
            for hour in self._parse_hours_to_fetch(self.Parameters.hours_to_fetch):
                for row in client.read_table(self.plain_make_yt_table_name(hour), format='json'):
                    if row['event_name'] != 'TInputDump':
                        continue
                    json.dump(row['dict']['Data'], output_file)
                    output_file.write('\n')
                    self.Context.lines_fetched += 1
                    if self.Context.lines_fetched >= self.Parameters.number_of_requests_to_extract:
                        return

    def dispatcher_reqs_make_yt_table_name(self):
        return self._make_table_name(self.Parameters.yt_table_name_template_4)

    def dispatcher_reqs_yt_read_data(self, output):
        """
        Writes json-deserializable apphost input contexts
        """
        client = self._prepare_yt_client(self.Parameters.yt_proxy_2)
        with open(output, 'w') as output_file:
            self.add_extra_requests(output_file)
            for row in client.read_table(self.dispatcher_reqs_make_yt_table_name(), format='json'):
                if not row['event_name'] == 'TInputDump':
                    continue
                json.dump(row['dict']['Data'], output_file)
                output_file.write('\n')
                self.Context.lines_fetched += 1
                if self.Context.lines_fetched >= self.Parameters.number_of_requests_to_extract:
                    return

    def on_enqueue(self):
       self.Context.out_resource_id = br.BEGEMOT_APPHOST_QUERIES(
           self,
           'Apphost requests to web5b',
           'output.txt'
       ).id

    def on_execute(self):
        self.Context.lines_fetched = 0
        self.Context.lines_error = 0
        output = str(sdk2.Resource[self.Context.out_resource_id].path)
        if not self.Parameters.number_of_requests_to_extract:
            return
        if self.Parameters.what_to_fetch == 'wizard':
            row_filter = lambda row: row['event_type'] == 'ReqWizardRequestReceived' and '\tapphost/' in row['event_data']
            self.bin_read_data(output, self.Parameters.yt_table_name_template, row_filter)
        elif self.Parameters.what_to_fetch == 'begemot_worker_bravo':
            row_filter = lambda row: row['event_type'] == 'TRequestReceived'
            self.bin_read_data(output, self.Parameters.yt_table_name_template_2, row_filter)
        elif self.Parameters.what_to_fetch == 'plain_service':
            self.plain_yt_read_data(output)
        else:
            self.dispatcher_reqs_yt_read_data(output)
