from datacloud.dev_utils.yt.yt_utils import get_yt_client
from datacloud.dev_utils.yql.yql_helpers import create_yql_client
from datacloud.dev_utils.yt import features
from datacloud.config.yt import GREP_ROOT


def get_log_tables(date_str):
    pattern = '//user_sessions/pub/{}/daily/{}/clean'
    logs = ['watch_log_tskv', 'spy_log']
    return [pattern.format(log, date_str) for log in logs]


def run_grep(log_date, yql_client=None, yt_client=None, use_cloud_nodes=True):
    yt_client = yt_client or get_yt_client()
    yql_client = yql_client or create_yql_client(yt_client=yt_client)
    spec = features.cloud_nodes_spec(use_cloud_nodes)

    log_tables = get_log_tables(log_date)
    result_table_path = '{}/region_log/{}'.format(GREP_ROOT, log_date)

    if yt_client.exists(result_table_path):
        raise ValueError(' Table %s already exist', result_table_path)

    result_table = yt_client.TablePath(
        result_table_path,
        schema=[
            {'name': 'yuid', 'type': 'string'},
            {'name': 'log_date', 'type': 'string'},
            {'name': 'user_region', 'type': 'uint32'},
        ]
    )

    def reduce_user_region(key, recs):
        for rec in recs:
            value = rec['value']
            if '\tuser-region=' in value:
                index = value.index('\tuser-region=')
                user_region = value[index + 13:].split('\t', 1)[0]
                if user_region.isdigit():
                    yield {
                        'yuid': rec['key'][1:],
                        'log_date': log_date,
                        'user_region': int(user_region)
                    }
                    break

    with yt_client.Transaction():
        yt_client.run_reduce(
            reduce_user_region,
            log_tables,
            result_table,
            reduce_by=['key'],
            spec=dict(
                title=' grep user-region for date {}'.format(log_date),
                **spec
            )
        )
        yt_client.run_sort(
            result_table,
            result_table,
            sort_by=['yuid'],
            spec=dict(
                title=' sort user-region for date {}'.format(log_date),
                **spec
            )
        )
        yt_client.run_merge(
            result_table,
            result_table,
            spec=dict(
                title=' merge user-region for date {}'.format(log_date),
                combine_chunks=True,
                **spec
            )
        )
