import logging
import urllib2
import os
import sys

from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.parameters import SandboxStringParameter, SandboxBoolParameter

from sandbox.sandboxsdk.process import get_process_info
from sandbox.sandboxsdk import environments

from sandbox.projects.common.wizard.providers import EntitySearchProvider
from sandbox.projects.common.wizard.current_production import get_current_production_resource_id
from sandbox.projects import resource_types
from sandbox.projects.common.utils import get_or_default
from sandbox.projects.EntitySearch import resource_types as es_resource_types


def collect_pairs(token, cluster, table, columns):
    from yt.wrapper import YtClient
    res = []
    client = YtClient(cluster, token)
    rows = client.read_table(table, format='dsv', raw=False)
    for r in rows:
        res.append((r[columns[0]], r[columns[1]]))
    return res


def get_prod_res_id(res_type, nanny_token):
    return get_current_production_resource_id('sas-production-entitysearch-yp', res_type, nanny_token)


class UploadGeosearchEntities(SandboxTask):
    """
        Gathers all EntitySearch objects corresponding to geosearch organizations and loads to SaaS snippets.
    """
    type = 'UPLOAD_GEOSEARCH_ENTITIES'

    class InputTableParameter(SandboxStringParameter):
        description = 'Input table with objects from entity search'
        name = 'entity_table'
        default_value = '//home/geosearch/entity_search_permalinks'

    class InputClusterParameter(SandboxStringParameter):
        description = 'YT cluster'
        name = 'cluster'
        default_value = 'hahn.yt.yandex.net'

    class DumpOnly(SandboxBoolParameter):
        description = 'Just dump to SaaS'
        name = 'dump_only'
        hidden = False
        default_value = False

    class ToponymEntitiesTableParameter(SandboxStringParameter):
        description = 'Table with source_id <-> onto_id mapping'
        name = 'toponym_entities_table'
        default_value = ''

    class OutputTableParameter(SandboxStringParameter):
        description = 'Table with prepared snippets'
        name = 'snippets_table'
        default_value = '//home/geosearch/mesherin/entities_temp'

    input_parameters = (
        InputClusterParameter,
        InputTableParameter,
        DumpOnly,
        ToponymEntitiesTableParameter,
        OutputTableParameter,
    )
    environment = (environments.PipEnvironment('yandex-yt'), environments.PipEnvironment('saaspy'),)

    @staticmethod
    def collect_permalinks(token, cluster, table):
        return collect_pairs(token, cluster, table, ['ontoid', 'ysr_id'])

    @staticmethod
    def collect_toponyms(token, cluster, table):
        return collect_pairs(token, cluster, table, ['ontoid', 'source_id'])

    @staticmethod
    def compose_query(host, port, eid, lang):
        return 'http://{host}:{port}/get?obj={eid}&lang={lang}'.format(**locals())

    @staticmethod
    def upload_to_saas(oid, data, saas):
        from xml.sax.saxutils import escape
        from saaspy import SaasDocument, SaasClient
        tmpl = u'''<EntitySearch>{}</EntitySearch>'''
        res = tmpl.format(escape(data))
        service = 'maps'
        key = '125eacfe0f6b45c6a7476d7575527d72'
        client = SaasClient(saas, None, service, key)
        doc = SaasDocument(oid)
        doc.add_property('entity/1.x', res)
        client.send_index_request('update', [doc.to_json()], prefix=1, n_retries=5)

    def on_execute(self):
        from yt.wrapper import YtClient

        nanny_token = self.get_vault_data('GEOMETA-SEARCH', 'nanny_token')
        yt_token = self.get_vault_data('GEOMETA-SEARCH', 'yt-token')
        cluster = self.ctx['cluster']
        client = YtClient(proxy=cluster, token=yt_token)

        os.environ['MR_RUNTIME'] = 'YT'
        os.environ['YT_TOKEN'] = yt_token

        entity_to_permalink = self.collect_permalinks(yt_token, cluster, self.ctx['entity_table'])
        entity_to_source_id = []
        if self.ctx['toponym_entities_table']:
            entity_to_source_id = self.collect_toponyms(yt_token, cluster, self.ctx['toponym_entities_table'])

        es_binary = get_prod_res_id(es_resource_types.ENTITY_SEARCH_EXECUTABLE, nanny_token)
        es_config = get_prod_res_id(es_resource_types.ENTITY_SEARCH_CONFIG, nanny_token)
        es_data = get_prod_res_id(es_resource_types.ENTITY_SEARCH_DATA, nanny_token)
        es_fresh = get_prod_res_id(es_resource_types.ENTITY_SEARCH_FRESH, nanny_token)
        es_ner = get_prod_res_id(es_resource_types.ENTITY_SEARCH_NER_DATA, nanny_token)

        file_dump = './es_dump'

        if not get_or_default(self.ctx, self.DumpOnly):
            with open(file_dump, 'w') as es_dump:
                with EntitySearchProvider(
                    fresh_id=es_fresh,
                    ner_id=es_ner,
                    data_id=es_data,
                    binary_id=es_binary,
                    config_id=es_config,
                    start_timeout=60 * 60,
                    use_mmap=True,
                ) as provider:
                    logging.info('ps info: {0}'.format(get_process_info(provider.process.pid)))
                    for record_to_key_constructor in [
                        (entity_to_permalink, lambda oid, lang: '{0}~{1}'.format(oid, lang)),
                        (entity_to_source_id, lambda oid, lang: 'geocoder_id_{0}~{1}'.format(oid, lang)),
                    ]:
                        for rec in record_to_key_constructor[0]:
                            oid = rec[1]
                            for lang in ['ru', 'en', 'uk', 'be', 'tr', 'tt', 'uz', 'kz']:
                                req = self.compose_query('localhost', str(8895), rec[0], lang)
                                resp = urllib2.urlopen(req).read()
                                resp = resp.replace('\\"', '')
                                key = record_to_key_constructor[1](oid, lang)
                                es_dump.write('key={0}\tvalue=<EntitySearch><![CDATA[{1}]]></EntitySearch>\n'
                                              .format(key, resp.strip()))
            self.create_resource('saas_dump', file_dump, resource_types.OTHER_RESOURCE)
            try:
                client.write_table(self.ctx['snippets_table'], (x for x in open(file_dump)), format="dsv", raw=True)
            except Exception as e:
                print >> sys.stderr, 'Cannot dump to yt'
                raise e


__Task__ = UploadGeosearchEntities
