import hashlib
import json
import logging
import os

from sandbox.sandboxsdk import channel
from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk import task
from sandbox.sandboxsdk.parameters import SandboxStringParameter

from sandbox.projects.common.utils import get_or_default

from sandbox.projects.geosuggest.common import parameters


class LastYTTableRevision(SandboxStringParameter):
    name = 'geosuggest_last_yt_revision'
    description = 'Revision, if not empty, the task will download only if new table is available'
    default_value = '0'
    required = False


class LastYTTableResource(SandboxStringParameter):
    name = 'geosuggest_last_yt_table_resource'
    description = 'Resource id, if not empty, the task will not upload resource if md5 has not changed'
    default_value = ''
    required = False


class ResourceTypeNameParameter(SandboxStringParameter):
    name = 'geosuggest_resource_type_name'
    description = 'Type of resource'
    default_value = ''
    required = True


class IgnoreZeroFieldParameter(SandboxStringParameter):
    name = 'geosuggest_ignore_zero_field'
    description = 'Field name, all records with 0 in this field will be ignored'
    default_value = ''
    required = False


class GeoSuggestDownloadYTTable(task.SandboxTask):
    """
        Download table from YT to a file, compare with "previous" file and
        create resource if table has been updated
    """

    type = "GEO_SUGGEST_DOWNLOAD_YT_TABLE"

    input_parameters = [
        parameters.GeoSuggestYTProxyParameter,
        parameters.GeoSuggestYTTablePath,
        parameters.GeoSuggestYTFormat,
        parameters.GeoSuggestYTRaw,
        ResourceTypeNameParameter,
        LastYTTableRevision,
        LastYTTableResource,
        IgnoreZeroFieldParameter,
        parameters.SandboxVaultOwner,
        parameters.SandboxVaultYtTokenName,
    ]

    environment = (
        environments.PipEnvironment('yandex-yt', "0.7.34-0"),
        environments.PipEnvironment("yandex-yt-yson-bindings-skynet"),
    )

    TIMEOUT = 24 * 3600
    execution_space = 512 * 1024
    disk_space = 512 * 1024

    def download_table_to_file(self):
        import yt.wrapper

        yt_proxy = get_or_default(self.ctx, parameters.GeoSuggestYTProxyParameter)
        vault_owner = get_or_default(self.ctx, parameters.SandboxVaultOwner)
        vault_name = get_or_default(self.ctx, parameters.SandboxVaultYtTokenName)
        yt_token = self.get_vault_data(vault_owner, vault_name)
        yt_table = get_or_default(self.ctx, parameters.GeoSuggestYTTablePath)
        ignore_zero_field = get_or_default(self.ctx, IgnoreZeroFieldParameter)

        client = yt.wrapper.YtClient(proxy=yt_proxy, token=yt_token)

        previous_revision = get_or_default(self.ctx, LastYTTableRevision)
        revision = int(client.get_attribute(yt_table, 'revision'))
        if previous_revision is not None and previous_revision != '0':
            if int(revision) <= int(previous_revision):
                logging.info('Table has old revision, will not downloaded: ({0} <= revision parameter {1})'.format(revision, previous_revision))
                return {'downloaded': False}

        resource_id = get_or_default(self.ctx, LastYTTableResource)
        previous_geosuggest_hash = None
        if resource_id != '':
            previous_geosuggest_hash = channel.channel.sandbox.get_resource_attribute(resource_id, 'geosuggest_hash')
            previous_revision = channel.channel.sandbox.get_resource_attribute(resource_id, 'geosuggest_yt_revision')
            if previous_revision != '':
                if int(revision) <= int(previous_revision):
                    logging.info('Table has old revision, will not downloaded: ({0} <= revision parameter {1})'.format(revision, previous_revision))
                    return {'downloaded': False}

        lines = client.read_table(
            get_or_default(self.ctx, parameters.GeoSuggestYTTablePath),
            format=str(get_or_default(self.ctx, parameters.GeoSuggestYTFormat)),
            raw=get_or_default(self.ctx, parameters.GeoSuggestYTRaw)
        )
        filename = os.path.join(os.path.realpath(self.abs_path()), os.path.basename(os.path.normpath(yt_table)) + '.txt')
        geosuggest_hash = hashlib.md5()
        with open(filename, 'w') as outfile:
            for line in lines:
                if ignore_zero_field is not None and ignore_zero_field != '':
                    j = json.loads(line)
                    if j.get(ignore_zero_field, 0) == 0:
                        continue
                outfile.write("{0}\n".format(line.rstrip('\n')))
                geosuggest_hash.update(str(line))

        if previous_geosuggest_hash == geosuggest_hash.hexdigest():
            logging.info('Data hash is the same as in last resource ({0} == {1})'.format(previous_geosuggest_hash, geosuggest_hash.hexdigest()))
            return {'downloaded': False}

        return {
            'downloaded': True,
            'filename': filename,
            'revision': revision,
            'hash': geosuggest_hash.hexdigest(),
        }

    def on_execute(self):
        result = self.download_table_to_file()

        if result.get('downloaded', False):
            self.create_resource(
                self.descr,
                result['filename'],
                get_or_default(self.ctx, ResourceTypeNameParameter),
                attributes={
                    'geosuggest_yt_revision': result.get('revision', 0),
                    'geosuggest_hash': result.get('hash', ''),
                }
            )

        return


__Task__ = GeoSuggestDownloadYTTable
