# -*- coding: utf-8 -*-

import os.path

import sandbox.common.types.client as ctc
from sandbox.projects import resource_types
import sandbox.projects.yane.common as yane
from sandbox.sandboxsdk.parameters import SandboxStringParameter, LastReleasedResource


class YaneParseWikilinks(yane.YaneTaskBase):
    """
        Parses Wiki sources and generates MR-table
    """

    type = 'YANE_PARSE_WIKILINKS'

    # tools - 5Gb, resource - 1Gb
    execution_space = 20 * 1024

    class Config(LastReleasedResource):
        name = 'config'
        description = 'Config'
        resource_type = resource_types.YANE_CONFIG
        group = yane.GROUP_IN

    class ObjectsTrie(LastReleasedResource):
        name = 'trie'
        description = 'Objects external IDs trie'
        resource_type = resource_types.OTHER_RESOURCE
        group = yane.GROUP_IN

    class SourceOntoTable(SandboxStringParameter):
        name = 'source_onto'
        description = 'Source table with OntoDB related objects'
        default_value = 'home/dict/yane/db.NNN/cards/related.objects'
        group = yane.GROUP_IN
        required = True

    class Target(SandboxStringParameter):
        name = 'target'
        description = 'Target MR folder'
        default_value = 'home/dict/yane/db.NNN/relobject'
        group = yane.GROUP_OUT
        required = True

    input_parameters = \
        yane.get_base_params().params + \
        [Config, ObjectsTrie, SourceOntoTable, Target] + \
        yane.get_mr_params().params

    # All MR-clusters are linux-only
    client_tags = ctc.Tag.LINUX_PRECISE

    def __init__(self, task_id=0):
        yane.YaneTaskBase.__init__(self, task_id)
        self.ctx['kill_timeout'] = 3 * 60 * 60

    def _run_wikilinksfeatures(self, sources, target, merge, **kwargs):
        args = ['-s', self.ctx['mr_server'],
                '-i', ','.join(sources),
                '-o', target,
                ]
        if merge:
            args.append('-m')
        else:
            args.extend(['-d', kwargs['trie']])
        self.run_tool('wikilinksfeatures', args, self.get_mr_env())

    @yane.run_once
    def _parse(self, trie, rel_links):
        yane_config = self.get_config('config')
        self._run_wikilinksfeatures(yane_config['wikilinks']['source'],
                                    rel_links,
                                    False,
                                    trie=trie)

    @yane.run_once
    def _merge(self, rel_links):
        self._run_wikilinksfeatures([rel_links, self.ctx['source_onto']],
                                    os.path.join(self.ctx['target'], 'object.data'),
                                    True)

    def do_execute(self):
        rel_links = os.path.join(self.ctx['target'], 'relative.links')
        trie = self.sync_resource(self.ctx['trie'])
        self._parse(trie, rel_links)
        self._merge(rel_links)


__Task__ = YaneParseWikilinks
