# coding: utf-8

import datetime
import os
from sandbox import sdk2
from sandbox.projects.answers import resources
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.projects.answers.common.utils import get_last_dump_dir
from sandbox.sdk2 import parameters

TEMPLATE = ('{query}\t\t{weight}\t' + chr(0xA))
FILENAME = 'suggest.tsv'
TAGS_TABLENAME = 'answers__tags'


class BuildAnswersTagsSuggestions(sdk2.Task):
    class Parameters(sdk2.Parameters):
        yt_dumps_directory = parameters.String('YT Directory With Dumps', required=True)
        yt_cluster = parameters.String('YT Cluster', required=True, default='banach')
        yt_token_secret = parameters.String('YT Token Secret', required=True)
        env = parameters.String(
            'Database environment',
            choices=[
                ('dev', resources.Environments.DEV),
                ('prod', resources.Environments.PROD),
                ('prestable', resources.Environments.PRESTABLE),
            ],
            required=True,
        )

    class Requirements(sdk2.Task.Requirements):
        environments = (
            PipEnvironment('yandex-yt'),
            PipEnvironment('yandex-yt-yson-bindings-skynet'),
        )

    def get_yt_client(self):
        from yt.wrapper.client import YtClient
        token = sdk2.Vault.data(self.Parameters.yt_token_secret)
        yt_client = YtClient(
            proxy=self.Parameters.yt_cluster,
            token=token,
        )
        return yt_client

    def get_data(self, yt_client, table_path, temp_table_path):
        yt_client.run_sort(table_path, temp_table_path, sort_by='tag')
        rows = yt_client.read_table(temp_table_path)
        result = []
        for row in rows:
            if row['status'] != 0 or row['questions_count'] < 3:
                continue
            tag = row['tag'].replace('\t', ' ').replace('\n', ' ')
            if not tag.strip():
                continue
            result.append({'query': tag, 'weight': row['questions_count']})
        return result

    def on_execute(self):
        yt_client = self.get_yt_client()
        dump_dir = get_last_dump_dir(yt_client, self.Parameters.yt_dumps_directory)
        tags_table_path = os.path.join(dump_dir, TAGS_TABLENAME)

        with yt_client.TempTable() as temp_table:
            data = self.get_data(yt_client, tags_table_path, temp_table)

            result_path = os.path.join(os.getcwd(), FILENAME)
            with open(result_path, 'w') as f:
                for item in data:
                    s = TEMPLATE.format(**item)
                    f.write(s)

        resource_metadata = resources.AnswersSuggestTagsData(
            self,
            "Znatoki tags suggest map at {}".format(
                datetime.datetime.utcnow().isoformat()
            ),
            result_path,
            env=self.Parameters.env,
        )
        resource_data = sdk2.ResourceData(resource_metadata)
        resource_data.ready()
