import logging
import re
from sandbox import sdk2

from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.projects.modadvert.common import modadvert


class ModadvertChunkMerger(modadvert.ModadvertBaseYtTask):

    class Requirements(modadvert.ModadvertBaseYtTask.Requirements):
        environments = (
            PipEnvironment('yandex-yt'),
        )

    class Parameters(modadvert.ModadvertBaseYtTask.Parameters):
        directory = sdk2.parameters.String('YT directory')
        tables_pattern = sdk2.parameters.String('Table name regexp pattern', default='.*')
        # operation_pool_capacity = sdk2.parameters.Integer('Classifier timeout', default=5)

    def on_execute_inner(self):
        import yt.wrapper
        yt_client = yt.wrapper.YtClient(self.Parameters.yt_proxy_url, self.get_yt_token())
        tables = yt_client.search(
            self.Parameters.directory,
            node_type='table',
            depth_bound=1,
            path_filter=lambda path: re.match(self.Parameters.tables_pattern, path),
        )
        for table in tables:
            logging.info('Processing table {}'.format(table))
            yt_client.run_merge(
                table,
                table,
                mode='ordered',
                spec={
                    'combine_chunks': True,
                    'schema_inference_mode': 'from_output',
                    'job_io': {
                        'table_writer': {
                            'desired_chunk_size': 200000000
                        }
                    },
                }
            )
            logging.info('Finished {}'.format(table))
