# coding=utf-8

import json

from sandbox.projects.Strm.StrmBuildClickhouseDictionary.common import StrmBuildClickhouseDictionaryBase


class StrmBuildClickhouseDictionaryContentTop(StrmBuildClickhouseDictionaryBase):
    """
    Build Clickhouse dictionary from Gorshok API and upload it to S3
    """

    class Parameters(StrmBuildClickhouseDictionaryBase.Parameters):
        dictionary_name = StrmBuildClickhouseDictionaryBase.Parameters.dictionary_name(default="content_top")
        output_name = StrmBuildClickhouseDictionaryBase.Parameters.output_name(
            default="content_top.csv",
        )

        source_url = StrmBuildClickhouseDictionaryBase.Parameters.source_url(
            default="http://content-top.strm.yandex.net/content_top",
        )

    @staticmethod
    def parse_lines(lines):
        data = json.loads("/n".join(lines))
        cumulative_size = 0

        for entry in data["vod"]:
            cumulative_size += entry["size"]
            yield entry["content"], cumulative_size
