#!/usr/bin/env python
# -*-  coding: utf8 -*

# There are problems with running YT operations from SandBox tasks directly, so we run them from a subprocess

import sys
import json
import logging
import subprocess
import yt.wrapper
from sandbox.projects.modadvert.rm.constants import AUTOMODERATOR_OBJECT_FIELDS, KEY_COLUMNS
from sandbox.projects.modadvert.common.ytutils import yt_connect


ROWS_LIMIT = int(1e6)


class TypeSplitter(object):
    def __init__(self, object_types):
        self.type_index = {object_type: i for i, object_type in enumerate(object_types)}

    def __call__(self, row):
        if row['type'] in self.type_index:
            yield yt.wrapper.create_table_switch(self.type_index[row['type']])
            yield row


@yt.wrapper.with_context
class TableCutter(object):
    def __init__(self, tables_row_count):
        self.tables_row_count = tables_row_count

    def __call__(self, row, context):
        rows_count = self.tables_row_count[context.table_index]
        div = rows_count // ROWS_LIMIT
        if div == 0 or context.row_index % div == 0 and context.row_index // div <= ROWS_LIMIT:
            yield yt.wrapper.create_table_switch(context.table_index)
            yield row


def create_temp_tables(yt_client, object_types, tmp_directory, expiration_timeout, attributes=None):
    return [
        yt_client.create_temp_table(
            prefix=tmp_directory,
            expiration_timeout=expiration_timeout,
            attributes=attributes,
        ) for _ in object_types
    ]


def prepare_tables(params):
    yt_client = yt_connect(
        params['yt_proxy_url'],
        params['yt_token'],
    )
    src_tables = create_temp_tables(
        yt_client=yt_client,
        object_types=params['object_types'],
        tmp_directory=params['tmp_directory'],
        expiration_timeout=params['expiration_timeout'],
        attributes={
            # Key columns must form a prefix of schema
            'schema': [
                {'name': column_name, 'type': AUTOMODERATOR_OBJECT_FIELDS[column_name], 'sort_order': 'ascending'}
                for column_name in KEY_COLUMNS
            ] + [
                {'name': column_name, 'type': column_type}
                for column_name, column_type in AUTOMODERATOR_OBJECT_FIELDS.items()
                if column_name not in KEY_COLUMNS
            ],
            'dynamic': False,
        },
    )
    # Split objects by type
    yt_client.run_map(
        TypeSplitter(params['object_types']),
        params['src_tables'],
        src_tables,
        ordered=True,
    )

    # Count table rows
    tables_row_count = [yt_client.row_count(table_name) for table_name in src_tables]

    # cut tables
    yt_client.run_map(
        TableCutter(tables_row_count),
        src_tables,
        src_tables,
        job_io={'control_attributes': {'enable_row_index': True, 'enable_table_index': True}},
        ordered=True,
    )

    dst_tables = create_temp_tables(
        yt_client=yt_client,
        object_types=params['object_types'],
        tmp_directory=params['tmp_directory'],
        expiration_timeout=params['expiration_timeout'],
    )
    return json.dumps({'src_tables': src_tables, 'dst_tables': dst_tables})


def concatenate_results(params):
    yt_client = yt_connect(
        params['yt_proxy_url'],
        params['yt_token']
    )
    yt_client.concatenate(params['result_tables'], params['dst_table'])


def run(**kwargs):
    p = subprocess.Popen(
        [sys.executable, __file__],
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE
    )
    (out, err) = p.communicate(input=json.dumps(kwargs))
    logging.error(err)
    if p.returncode:
        raise Exception("run exited with code: {:d}".format(p.returncode))
    return out


if __name__ == "__main__":
    input_params = json.load(sys.stdin)
    print(globals()[input_params['task']](input_params))
