from sandbox import sdk2
from sandbox.common.types import resource as ctr
from sandbox.projects.common import binary_task

YAV_ID_ROBOT_PCODE_YT_TOKEN = 'sec-01cv39zerwcz9zk1wjxght1zb8'


class PcodeTopPagesDict(sdk2.Resource):
    """
    Resource containing mapping for pageid partner name for top page ids
    """
    releasable = True
    any_arch = True
    auto_backup = True
    ttl = 30
    group = 'PCODE'
    releasers = ['PCODE']
    release_subscribers = ["PCODE"]


class PcodeTopPagesDictBuilder(binary_task.LastBinaryTaskRelease, sdk2.Task):
    name = "PCODE_TOP_PAGES_DICT_BUILDER"

    class Parameters(sdk2.Task.Parameters):
        yt_cluster = sdk2.parameters.String('YT Cluster', default_value="hahn")
        ext_params = binary_task.binary_release_parameters(stable=True)

    @property
    def binary_executor_query(self):
        return {
            "attrs": {"task_type": PcodeTopPagesDictBuilder.name,
                      "released": self.Parameters.binary_executor_release_type},
            "state": [ctr.State.READY]
        }

    def on_execute(self):
        import yt.wrapper as yt
        from transliterate import translit
        from collections import defaultdict

        yav_secrets = sdk2.yav.Secret(YAV_ID_ROBOT_PCODE_YT_TOKEN).data()
        yt_token = yav_secrets['yt-token']
        yt.config['proxy']['url'] = self.Parameters.yt_cluster
        yt.config['token'] = yt_token
        resource = PcodeTopPagesDict(self, "pcode top page ids", "dict.tsv")
        rows = yt.read_table(yt.TablePath('//home/yabs/dict/TopPages', columns=['PageID', 'Name']),
                             format=yt.JsonFormat(encoding="utf-8"))
        partner_ids = defaultdict(list)
        for row in rows:
            name = translit(row['Name'], 'ru', reversed=True)[:200]
            if name in partner_ids or len(partner_ids) < 50:
                partner_ids[name].append(row['PageID'])

        result_dict = {}
        for partner, page_ids in partner_ids.items():
            for page_id in page_ids:
                result_dict[page_id] = partner

        result = '\n'.join('{}\t{}'.format(page_id, partner) for page_id, partner in result_dict.items())
        resource.path.write_bytes(result)
