# -*- coding: utf-8 -*-

import os
import tarfile
from sandbox import sdk2

from sandbox.projects import resource_types


class IMAGES_TAGS_DICT_MODELS_BUNDLE(sdk2.Resource):
    """
       Models needed for IMAGES_TAGS_DICT_UTIL_EXECUTABLE:
       - lang recognizer
       - word/sentence tagger
       - set of regex filters
    """
    releasable = True
    any_arch = False
    executable = False
    auto_backup = True
    releasers = resource_types.images_tags_releasers
    release_subscribers = resource_types.images_tags_subscribers
    ttl = 'inf'


class ImagesBuildTagsDictModelsBundle(sdk2.Task):
    """
        Gathers models needed to filter by POS during index contruction.
    """

    class Requirements(sdk2.Task.Requirements):
        disk_space = 5 * 1024  # 1 Gb
        ram = 2 * 1024  # 2 Gb
        cores = 1

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Task.Parameters):
        pos_taggers_models_resource = sdk2.parameters.Resource(
            'POS taggers resource. Should contain word and sentence taggers for russian and english.',
            required=True,
            default_value=607664006,
        )
        lang_recognizer_resource = sdk2.parameters.Resource(
            'Lang recognizer model.',
            required=True,
            default_value=515468179,
        )
        regex_resourse = sdk2.parameters.Resource(
            'RegEx and whitewords fpr POS filtering.',
            required=True,
            default_value=612890394,
        )

    def on_execute(self):
        result_resource = IMAGES_TAGS_DICT_MODELS_BUNDLE(
            self,
            '',
            'dict_models.tar'
        )
        result_resource_data = str(sdk2.ResourceData(result_resource).path)

        pos_taggers_models_data_path = str(sdk2.ResourceData(self.Parameters.pos_taggers_models_resource).path)
        with tarfile.open(pos_taggers_models_data_path) as tar:
            tar.extractall()

        regex_resourse_data_path = str(sdk2.ResourceData(self.Parameters.regex_resourse).path)
        with tarfile.open(regex_resourse_data_path) as tar:
            tar.extractall()

        lang_recognizer_data_path = str(sdk2.ResourceData(self.Parameters.lang_recognizer_resource).path)
        lang_recognizer_data_path = os.path.join(lang_recognizer_data_path, 'data')
        print('pos_taggers_models_data_path ', pos_taggers_models_data_path)
        print('lang_recognizer_data_path ', lang_recognizer_data_path)
        print('self.Context.dict_models_id: {}'.format(result_resource.id))

        with tarfile.open(result_resource_data, 'w|gz') as tar:
            tar.add(lang_recognizer_data_path, os.path.basename(lang_recognizer_data_path))
            tar.add('en.sent-pos.tag', 'en.sent-pos.tag')
            tar.add('en.word-pos.txt', 'en.word-pos.txt')
            tar.add('ru.lemmer', 'ru.lemmer')
            tar.add('ru.sent-pos.tag', 'ru.sent-pos.tag')
            tar.add('pos_filtering_regex.txt', 'pos_filtering_regex.txt')
            tar.add('pos_filtering_whitewords.txt', 'pos_filtering_whitewords.txt')
        return
