import json
import time
import urllib
import urllib2
import logging

from sandbox.projects.common import utils

from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk import parameters as sp
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.errors import SandboxTaskFailureError


class ProxyName(sp.SandboxStringParameter):
    name = 'proxy_name'
    description = 'YT proxy name'
    required = True
    group = 'Input data parameters'


class InputTable(sp.SandboxStringParameter):
    name = 'input_table'
    description = 'Input table path'
    required = True
    group = 'Input data parameters'


class KeyColumn(sp.SandboxStringParameter):
    name = 'key_column'
    description = 'Key column'
    required = True
    group = 'Input data parameters'


class ValueColumn(sp.SandboxStringParameter):
    name = 'value_column'
    description = 'Value column'
    required = True
    group = 'Input data parameters'


class Namespace(sp.SandboxStringParameter):
    name = 'namespace'
    description = 'Data namespace'
    required = True
    default = 'domain_has_metrika'
    group = 'SaaS data parameters'


class Timestamp(sp.SandboxIntegerParameter):
    name = 'timestamp'
    description = 'Data timestamp'
    required = True
    default_value = int(time.time())
    do_not_copy = True
    group = 'SaaS data parameters'


class EventParams(sp.DictRepeater, sp.SandboxStringParameter):
    name = 'event_params'


class ReleaseDomainHasMetrikaTrie(SandboxTask):
    """
        Build trie with metrika factors
    """

    type = 'RELEASE_DOMAIN_HAS_METRIKA_TRIE'

    input_parameters = (
        ProxyName,
        InputTable,
        KeyColumn,
        ValueColumn,
        Namespace,
        Timestamp,
    )

    environment = (
        environments.PipEnvironment('yandex-yt', use_wheel=True),
    )

    def on_execute(self):
        logging.info('Dump table')
        records = self._dump_table(
            self._get_event_or_context_param('cluster', ProxyName),
            self._get_event_or_context_param('path', InputTable),
            utils.get_or_default(self.ctx, KeyColumn),
            utils.get_or_default(self.ctx, ValueColumn),
        )

        logging.info('Dump table')
        self._release_saas_data(
            records,
            self._get_event_or_context_param('group', Namespace),
        )

    def _get_event_or_context_param(self, event_param_name, context_param):
        if utils.get_or_default(self.ctx, EventParams):
            if event_param_name in self.ctx[EventParams.name]:
                return self.ctx[EventParams.name][event_param_name]
            else:
                raise SandboxTaskFailureError('Malformed event parameters, "%s" is absent: %r', event_param_name, self.ctx[EventParams.name])
        return utils.get_or_default(self.ctx, context_param)

    def _release_saas_data(self, records, namespace):
        import yt.wrapper as yt
        yt.config['proxy']['url'] = 'banach'  # SaaS works with banach only
        yt.config['token'] = self.get_vault_data(self.owner, 'yt_token')

        with yt.TempTable() as temp:
            yt.write_table(temp, records, format=yt.JsonFormat())

            timestamp = utils.get_or_default(self.ctx, Timestamp)
            if timestamp is None:
                timestamp = int(time.time())

            table_info = [{
                'Path': temp,
                'Namespace': namespace,
                'Timestamp': timestamp * 10 ** 6,
            }]
            args = urllib.urlencode({'tables': json.dumps(table_info)})
            req = 'http://rankingmid.ferryman.n.yandex-team.ru/add-full-tables?' + args
            logging.info('SaaS request: "{}"'.format(req))

            resp = json.load(urllib2.urlopen(req))
            logging.info('SaaS response: "{}"'.format(resp))

            args = urllib.urlencode({'batch': resp['batch']})
            req = 'http://rankingmid.ferryman.n.yandex-team.ru/get-batch-status?' + args
            logging.info('SaaS request: "{}"'.format(req))

            while True:
                resp = json.load(urllib2.urlopen(req))
                logging.info('SaaS response: "{}"'.format(resp))

                if resp['status'] in ('final', 'searchable'):
                    break
                time.sleep(60)

    def _dump_table(self, proxy, input_table, key_column, value_column):
        import yt.wrapper as yt
        yt.config['proxy']['url'] = proxy
        yt.config['token'] = self.get_vault_data(self.owner, 'yt_token')

        records = []
        for row in yt.read_table(input_table, format=yt.JsonFormat()):
            domain = row[key_column]
            has_metrika = row[value_column]
            if has_metrika > 0.0:
                records.append({'Subkey_Owner': domain, 'Data_TSKV': 'val:s={}'.format(has_metrika)})

        return records


__TASK__ = ReleaseDomainHasMetrikaTrie
