# -*- coding: utf-8 -*-

import os.path

import sandbox.common.types.client as ctc

from sandbox.projects import resource_types
import sandbox.projects.yane.common as yane
from sandbox.sandboxsdk.parameters import SandboxStringParameter, LastReleasedResource


class YaneParseFreebase(yane.YaneTaskBase):
    """
        Parses Freebase sources and generates MR-table
    """

    type = 'YANE_PARSE_FREEBASE'

    # tools - 5Gb, resource - 1Gb
    execution_space = 20 * 1024
    client_tags = ctc.Tag.LINUX_PRECISE

    class Config(LastReleasedResource):
        name = 'config'
        description = 'Config'
        resource_type = resource_types.YANE_CONFIG
        group = yane.GROUP_IN

    class ObjectsTrie(LastReleasedResource):
        name = 'trie'
        description = 'Objects external IDs trie'
        resource_type = resource_types.OTHER_RESOURCE
        group = yane.GROUP_IN

    class Target(SandboxStringParameter):
        name = 'target'
        description = 'Target MR folder'
        default_value = 'home/dict/yane/db.NNN/freebase'
        group = yane.GROUP_OUT
        required = True

    input_parameters = yane.get_base_params().params + \
        [Config, ObjectsTrie, Target] + \
        yane.get_mr_params().params

    def __init__(self, task_id=0):
        yane.YaneTaskBase.__init__(self, task_id)
        self.ctx['kill_timeout'] = 3 * 60 * 60

    def do_execute(self):
        cfg = self.get_config('config')
        self.run_tool('freebasefeatures',
                      ['-s', self.ctx['mr_server'],
                       '-i', cfg['freebase']['source'],
                       '-o', os.path.join(self.ctx['target'], 'object.data'),
                       '-e', self.sync_resource(self.ctx['trie'])
                       ], self.get_mr_env())


__Task__ = YaneParseFreebase
