import yt.wrapper
import yt.wrapper.schema as schema
from itertools import accumulate


@yt.wrapper.yt_dataclass
class TTableStatsRow:
    TablePath: str
    NodeCount: schema.Int64
    Chunks: schema.Int64
    DiskSpaceGB: schema.Int64
    LastAccess: str
    LastModification: str


single_table_export_path = '//home/webmaster/prod/analytics/consumption_yt/table_stats'
directories_export_path = '//home/webmaster/prod/analytics/consumption_yt/directory_stats'
root_path = '//home/webmaster/prod'


def get_path_prefixes(table_path):
    return list(accumulate(table_path.split('/')[4:], lambda x, y: x + '/' + y))

if __name__ == "__main__":
    table_infos = yt.wrapper.search(
        root_path,
        node_type=['table'],
        attributes=['resource_usage', 'modification_time', 'access_time']
    )
    table_stats = [
        TTableStatsRow(
            table,
            table.attributes['resource_usage']['node_count'],
            table.attributes['resource_usage']['chunk_count'],
            schema.Int64(table.attributes['resource_usage']['disk_space'] / 1024 / 1024 / 1024),
            table.attributes['access_time'],
            table.attributes['modification_time']
        ) for table in table_infos
    ]
    with yt.wrapper.Transaction():
        path_to_stat = {}
        print("Removing")
        yt.wrapper.remove(single_table_export_path, force=True)
        yt.wrapper.remove(directories_export_path, force=True)
        for table in table_stats:
            for _prefix in get_path_prefixes(table.TablePath)[:-1]:
                prefix = '//home/webmaster/' + _prefix
                if prefix not in path_to_stat:
                    path_to_stat[prefix] = TTableStatsRow(prefix, 0, 0, 0, table.LastAccess, table.LastModification)
                path_to_stat[prefix].NodeCount += table.NodeCount
                path_to_stat[prefix].Chunks += table.Chunks
                path_to_stat[prefix].DiskSpaceGB += table.DiskSpaceGB
                path_to_stat[prefix].LastAccess = max(path_to_stat[prefix].LastAccess, table.LastAccess)
                path_to_stat[prefix].LastModification = max(path_to_stat[prefix].LastModification, table.LastModification)
        print("Creating")
        yt.wrapper.write_table_structured(
            single_table_export_path,
            TTableStatsRow,
            table_stats)
        yt.wrapper.write_table_structured(
            directories_export_path,
            TTableStatsRow,
            list(path_to_stat.values())
        )
        print("Sorting")
        yt.wrapper.run_sort(
            directories_export_path,
            sort_by=['TablePath', 'Chunks', 'LastAccess', 'LastModification']
        )
        yt.wrapper.run_sort(
            single_table_export_path,
            sort_by=['TablePath', 'Chunks', 'LastAccess', 'LastModification']
        )
