import codecs
import json
import logging

import sandbox.common.types.misc as ctm
import sandbox.sandboxsdk.parameters as sdk_parameters
from sandbox.projects.BnoAppHostToAppBuild.resource_types import BNO_URL_TO_APP
from sandbox.projects.common import utils
from sandbox.projects.common.bno.resources import save_resource
from sandbox.projects.common.bno.utils import run_cmd
from sandbox.projects.common.nanny import nanny
from sandbox.projects.resource_types import BNO_HOST_TO_APP, BNO_GEMINICL_EXECUTABLE
from resource_types import BNO_APPS_TRIE_EXECUTABLE, BNO_APPS_TRIE
from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk.task import SandboxTask

logger = logging.getLogger(__name__)


class MrPathParam(sdk_parameters.SandboxStringParameter):
    name = 'path'
    description = 'Mapreduce table path'
    default_value = 'hahn://home/extdata/spk3/bno/TEST/new_base_DO_NOT_REMOVE'
    required = True


class VaultTokenItemNameParam(sdk_parameters.SandboxStringParameter):
    name = 'vault_item'
    description = 'Vault item name for mr token'
    default_value = 'mrtoken'
    required = True


def iread_file(path):
    with codecs.open(path, 'r', encoding="utf-8") as f:
        data = (item.strip().split('\t') for item in f)
    return data


class BnoAppHostToAppBuildTask(SandboxTask, nanny.ReleaseToNannyTask):
    type = 'BNO_APP_HOST_TO_APP_BUILD'
    dns = ctm.DnsType.DNS64
    input_parameters = [MrPathParam, VaultTokenItemNameParam]
    environment = (
        environments.PipEnvironment("yandex-yt"),
    )

    def __init__(self, task_id=0):
        SandboxTask.__init__(self, task_id)

    def on_execute(self):
        vault_item = self.ctx[VaultTokenItemNameParam.name]
        yt_token = self.get_vault_data(vault_item)
        yt_table = self.ctx[MrPathParam.name]

        with self.current_action('Read table'):
            host2app, resource = self.read_table(yt_token, yt_table)
        with self.current_action('Normalize data'):
            urls2app, resource = self.normalize_data(host2app)
        with self.current_action('Cannonize urls'):
            docids, resource = self.canonize_urls(urls2app)
        with self.current_action('Build bno.trie'):
            self.build_bno_trie()

    @staticmethod
    def yt_read_table(token, table):
        from yt.wrapper import YtClient
        parts = table.split(':')
        client = YtClient(parts[0], token)
        return client.read_table(parts[1], format='dsv', raw=False)

    def read_table(self, token, table):
        path = self.path('host2app.txt')
        t = self.yt_read_table(token, table)
        data = [
            [unicode(item['host'], 'utf-8'), unicode(item['gplay'], 'utf-8'), unicode(item['itunes_iphon'], 'utf-8'),
             unicode(item['itunes_iPad'], 'utf-8')] for item in t]
        return data, save_resource(self, data, path=path)

    def normalize_data(self, data):
        http_data = [['http://{}'.format(item[0])] + item for item in data]
        https_data = [['https://{}'.format(item[0])] + item for item in data]
        data = http_data + https_data
        return data, save_resource(self, data, self.path('urls2app.txt'))

    def canonize_urls(self, urls2app):
        file_name = self.path('urls.txt')
        with codecs.open(file_name, 'w', encoding='utf-8') as f:
            for item in urls2app:
                print >> f, item[0]

        tool = utils.sync_last_stable_resource(BNO_GEMINICL_EXECUTABLE, arch='linux')

        urls_normalized_json_path = self.path('urls_normalized.json')
        run_cmd([tool, '--format', 'json', '--type', 'strong', '--quota', '50', '-f', file_name, '>', urls_normalized_json_path])
        urls = {}
        with codecs.open(urls_normalized_json_path, 'r', encoding='utf-8') as f:
            data = (json.loads(item).get('Response', {}) for item in f)
            data = ((item.get('OriginalUrl'), item.get('MainUrl', [None])[0]) for item in data)
            for original_url, main_url in data:
                if not main_url:
                    continue
                urls[main_url] = original_url
        save_resource(self, path=urls_normalized_json_path)

        urls_normalized_txt_path = self.path('urls_normalized.txt')
        with codecs.open(urls_normalized_txt_path, 'w', encoding='utf-8') as f:
            for url in urls:
                print >> f, url
        path = self.path('gemini.json')
        save_resource(self, path=urls_normalized_txt_path)

        run_cmd([tool, '--format', 'json', '--type', 'search_doc_id', '--quota', '50', '-f', urls_normalized_txt_path, '>', path])
        save_resource(self, path=path)

        with codecs.open(path, 'r', encoding='utf-8') as f:
            data = (json.loads(item).get('Response', {}) for item in f)
            data = [(urls[item.get('OriginalUrl')], item.get('CanonizedUrl')) for item in data]
            save_resource(self, data, self.path('url2canonized.txt'))
            data = dict(data)
        data = [[data.get(item[0], '#')] + item[2:] for item in urls2app]
        save_resource(self, data, self.path('doc_url2app.txt'), BNO_URL_TO_APP)

        with codecs.open(path, 'r', encoding='utf-8') as f:
            data = (json.loads(item).get('Response', {}) for item in f)
            data = ((urls[item.get('OriginalUrl')], item.get('MainUrl', [None])[0]) for item in data)
            data = list(data)
            save_resource(self, data, self.path('docids.txt'))
            data = dict(data)
        data = [[data.get(item[0], '#')] + item[2:] for item in urls2app]
        return data, save_resource(self, data, self.path('docids2app.txt'), BNO_HOST_TO_APP)

    def build_bno_trie(self):
        tool = utils.sync_last_stable_resource(BNO_APPS_TRIE_EXECUTABLE, arch='linux')
        run_cmd([tool, self.path('docids2app.txt')])
        save_resource(self, path=self.path('bno.trie'), resource_type=BNO_APPS_TRIE)

        release_to = ['testing', 'prestable', 'stable']
        logger.info('release BNO_APPS_TRIE to: {}'.format(release_to))

        for release_type in release_to:
            release_params = {
                'release_comments': 'automatic release',
                'release_subject': '',
                'release_status': release_type,
                'email_notifications': {
                    'cc': [],
                    'to': []
                },
                'releaser': self.owner
            }
            nanny.ReleaseToNannyTask.on_release(self, release_params)


__Task__ = BnoAppHostToAppBuildTask
