import codecs
import json
import logging

from sandbox import common
import sandbox.projects.resource_types.releasers as resource_releasers
from sandbox import sdk2
from sandbox.common.types.client import Tag
#from projects.BnoAppHostToAppBuild.resource_types import BNO_APPS_TRIE, BNO_APPS_TRIE_EXECUTABLE
from sandbox.projects.common import utils
from sandbox.projects.common.nanny import nanny
from sandbox.projects.resource_types import BNO_HOST_TO_APP, BNO_GEMINICL_EXECUTABLE, PLAIN_TEXT
from sandbox.sandboxsdk import environments
from sandbox.sdk2.helpers import subprocess

logger = logging.getLogger(__name__)


class BNO_DOCID_TO_APP(sdk2.Resource):
    releasable = True
    any_arch = False
    executable = False
    auto_backup = True
    releasers = resource_releasers.bno_releasers


def is_dict(obj):
    import collections
    return isinstance(obj, collections.Mapping)


def is_iter(obj):
    import collections
    return isinstance(obj, collections.Iterable) and not isinstance(obj, basestring)


class BuildBnoAppsTrie(sdk2.Task, nanny.ReleaseToNannyTask):
    class Requirements(sdk2.Requirements):
        client_tags = Tag.LINUX_PRECISE
        disk_space = 10 * 1024
        ram = 8 * 1024
        cores = 1
        environments = sdk2.Requirements.environments.default + (environments.PipEnvironment("yandex-yt"),)

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Task.Parameters):
        yt_path = sdk2.parameters.String(
            'Yt path to bno apps table',
            default='hahn://home/extdata/release/bno/new_base/ru',
            required=True)
        yt_token = sdk2.parameters.String(
            'Yt token name',
            description='vault secret name',
            default='mrtoken',
            required=True
        )
        kill_timeout = 3600 * 3

    def on_execute(self):
        yt_table = str(self.Parameters.yt_path)
        yt_token = sdk2.Vault.data(self.owner, self.Parameters.yt_token)

        logger.info('Read table')
        host2apps, resource = self.read_table(yt_token, yt_table)

        logger.info('Expand hosts')
        urls2app, resource = self.expand_hosts(host2apps)

        logger.info('Cannonize urls')
        urls_normalized_json_path, resource = self.canonize_urls(urls2app)

        logger.info('Join docids with apps')
        self.join_docids_with_apps(urls2app, urls_normalized_json_path)

        # logger.info('Build bno.trie'):
        # self.build_bno_trie()

    @staticmethod
    def yt_read_table(token, table):
        from yt.wrapper import YtClient
        parts = table.split(':')
        client = YtClient(parts[0], token)
        return client.read_table(parts[1], format='dsv', raw=False)

    def read_table(self, token, table):
        path = self.path('host2apps.txt')
        t = self.yt_read_table(token, table)
        data = [
            [unicode(x['host'], 'utf-8'), unicode(x['gplay'], 'utf-8'), unicode(x['itunes_iphon'], 'utf-8')] for x in t
            ]
        return data, self._save_resource(data=data, path=path)

    def expand_hosts(self, data):
        http_data = [['http://{}'.format(item[0])] + item for item in data]
        https_data = [['https://{}'.format(item[0])] + item for item in data]
        data = http_data + https_data
        return data, self._save_resource(data=data, path=self.path('urls2apps.txt'))

    def canonize_urls(self, urls2apps):
        gemini_tool = utils.sync_last_stable_resource(BNO_GEMINICL_EXECUTABLE, arch='linux')

        urls_file = self.path('urls.txt')
        with codecs.open(urls_file.as_posix(), 'w', encoding='utf-8') as f:
            for item in urls2apps:
                f.write(item[0])  # 'http[s]://<host>'
                f.write('\n')

        urls_normalized_json_path = self.path('urls_normalized.json')
        with sdk2.helpers.ProcessLog(self, logger='gemini') as pl:
            cmd = [gemini_tool, '--format', 'json', '--type', 'search_doc_id', '--quota', '50',
                   '-f', urls_file.as_posix(), '>', urls_normalized_json_path.as_posix()]
            ret_code = subprocess.Popen(cmd, stdout=pl.stdout, stderr=subprocess.STDOUT).wait()
            if ret_code != 0:
                raise common.errors.TaskFailure('gemini failed')

        return urls_normalized_json_path, self._save_resource(path=urls_normalized_json_path)

    def join_docids_with_apps(self, urls2apps_list, urls_normalized_json_path):
        urls2apps = {}
        for x in urls2apps_list:
            urls2apps[x[0]] = (x[1], x[2], x[3])  # (host, gplay, itunes)

        docids2apps = self.path('docids2app.txt')
        with codecs.open(urls_normalized_json_path.as_posix(), 'r', encoding='utf-8') as f, \
                codecs.open(docids2apps.as_posix(), 'w', encoding='utf-8') as out:

            responses = [json.loads(x).get('Response', {}) for x in f]
            # todo: handle errors from gemini

            # [ (original_url, canonized_url, canonized_url_docid) ]
            data = ((x.get('OriginalUrl'), x.get('CanonizedUrl'), x.get('MainUrl', [None])[0]) for x in responses)

            for original_url, canonized_url, canonized_url_docid in data:
                if not canonized_url:
                    logger.error('No canonized url for {}'.format(original_url))
                    continue
                host, gplay_app, itunes_app = urls2apps[original_url]
                out.write(canonized_url_docid)
                out.write('\t')
                out.write(original_url)
                out.write('\t')
                out.write(canonized_url)
                out.write('\t')
                out.write(host)
                out.write('\t')
                out.write(gplay_app)
                out.write('\t')
                out.write(itunes_app)
                out.write('\n')

        self._save_resource(path=docids2apps, resource_type=BNO_HOST_TO_APP)

    # def build_bno_trie(self):
    #     tool = utils.sync_last_stable_resource(BNO_APPS_TRIE_EXECUTABLE, arch='linux')
    #     run_cmd([tool, self.path('docids2app.txt')])
    #     self._save_resource(path=self.path('bno.trie'), resource_type=BNO_APPS_TRIE)
    #
    #     release_to = ['stable']
    #     logger.info('release BNO_APPS_TRIE to: {}'.format(release_to))
    #
    #     for release_type in release_to:
    #         release_params = {
    #             'release_comments': 'automatic release',
    #             'release_subject': '',
    #             'release_status': release_type,
    #             'email_notifications': {
    #                 'cc': [],
    #                 'to': []
    #             },
    #             'releaser': self.owner
    #         }
    #         nanny.ReleaseToNannyTask.on_release(self, release_params)

    def _save_resource(self, data=None, path=None, resource_type=PLAIN_TEXT):
        path = path.as_posix() if path else None
        if data is not None:
            import tempfile
            path = path or tempfile.mktemp(dir=self.path().as_posix())
            with codecs.open(path, 'w', encoding='utf-8') as f:
                if is_dict(data):
                    for key, val in data.iteritems():
                        f.write(key)
                        f.write('\t')
                        if is_iter(val):
                            val = '\t'.join(map(unicode, val))
                        f.write(val)
                        f.write('\n')
                elif is_iter(data):
                    for item in data:
                        if is_iter(item):
                            item = '\t'.join(map(unicode, item))
                        f.write(item)
                        f.write('\n')
                else:
                    f.write(data)
        import os
        if path and os.path.exists(path):
            resource = sdk2.ResourceData(resource_type(self, os.path.basename(path), os.path.basename(path), ttl='inf'))
            resource.ready()
            return resource
        raise ValueError("path or data is required")
