import logging
import os
import subprocess


from sandbox import sdk2
from sandbox.common import errors

import yql_scripts

logger = logging.getLogger()

MIN_GS_SIZE_ROWS = 3
SAMPLING_PERCENT_ADDITIVE = 2


class YtPathContext:
    def __init__(self, yt_dir):
        self._base_path = yt_dir

    def subpath(self, path):
        if path == '':
            return YtPathContext(self._base_path)
        return YtPathContext(os.path.join(self._base_path, path))

    def __str__(self):
        return self._base_path


def run_binary_cmd(binary_resource_id, cmd, n_retries=5):
    binary = str(sdk2.ResourceData(sdk2.Resource[binary_resource_id]).path)
    subprocess.check_call('chmod +x {}'.format(binary), shell=True)
    cmd = [binary] + cmd
    logger.info('running binary command: %s', ' '.join(cmd))

    for i in range(n_retries):
        try:
            cmd_output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
            logger.info('command output:\n%s', cmd_output)
            return cmd_output
        except subprocess.CalledProcessError as e:
            logger.error('command output:\n%s', e.output)
            logger.debug('retry %d for binary command failed', i)

            if i + 1 != n_retries:
                continue
            raise


def run_yql_query(template, **kwargs):
    from yql.api.v1.client import YqlClient

    query = template.format(**kwargs)
    request = YqlClient().query(query, syntax_version=1)
    request.run()
    request.get_results(wait=True)
    if not request.is_success:
        raise errors.TaskError('YQL query failed')


def filter_golden_set_table(table, condition):
    run_yql_query(
        yql_scripts.FILTER_TABLE_TEMPLATE,
        table=table,
        condition=condition,
    )

    import yt.wrapper as yt
    yt.config['proxy']['url'] = 'hahn.yt.yandex.net'

    if yt.get('{}/@row_count'.format('{}_filtered'.format(table))) < MIN_GS_SIZE_ROWS:
        raise errors.TaskError('Too small golden set')


def sample_golden_set(
    task_yt_dir,
    data_table,
    gs_table,
    sample_percent,
    condition=None,
):
    import yt.wrapper as yt
    yt.config['proxy']['url'] = 'hahn.yt.yandex.net'

    logger.info('sampling golden set from existing data')

    data_nrows = yt.get('{}/@row_count'.format(data_table))
    gs_nrows = yt.get('{}/@row_count'.format(gs_table))

    run_yql_query(
        yql_scripts.SAMPLE_GS_QUERY_TEMPLATE,
        task_dir=task_yt_dir,
        gs_table=gs_table,
        sample_percent=min(100, data_nrows * sample_percent / gs_nrows + SAMPLING_PERCENT_ADDITIVE),
    )

    if condition is not None:
        filter_golden_set_table(task_yt_dir.subpath('golden_results'), condition)


def separate_golden_set(task_yt_dir, data_table, sample_percent):
    logger.info('splitting data into gs/non-gs tables')

    run_yql_query(
        yql_scripts.SPLIT_GS_QUERY_TEMPLATE,
        task_dir=task_yt_dir,
        input_table=data_table,
        sample_percent=sample_percent,
    )
