import argparse
import base64
import json
import logging
import os
import random
import time
import yt.wrapper as yt


min_data_length = 70000
max_requests_count = 500000
cards_requests_percentage = 0.987

def remove_param(request, param_name):
    param_key = param_name + '='
    param_start_pos = request.find(param_key)
    if param_start_pos == -1:
        return request

    param_end_pos = request.find('&', param_start_pos + len(param_key))
    if param_end_pos == -1:
        param_end_pos = len(request)
    else:
        param_end_pos += 1

    return request[:param_start_pos] + request[param_end_pos:]


def simplify(request):
    request_without_exp = remove_param(request, "exp_name")
    return remove_param(request_without_exp, "return_profiles")


def make_deterministic(request):
    return request + "&seed=test_seed&timestamp=" + str(1000 * int(time.time()))


def normalize_request(request):
    return make_deterministic(simplify(request))


def make_default_row(row):
    return {
        'data': row['PostBody'],
        'request': row['HttpPath'] + '?' + row['CgiParams'],
    }


def heavy_requests_mapper(row):
    new_row = make_default_row(row)
    if new_row['request'].find('/api/card_recommender/cards?') != -1 and len(new_row['data']) >= min_data_length:
        new_row['request'] = normalize_request(new_row['request'])
        new_row['data_length'] = -len(new_row['data'])
        yield new_row

def cards_requests_mapper(row):
    new_row = make_default_row(row)
    yield yt.create_table_switch(0)
    yield new_row
    if new_row['request'].find('/api/card_recommender/cards?') != -1:
        new_row['request'] = normalize_request(new_row['request'])
        yield yt.create_table_switch(1)
        yield new_row


def prepare_request(row):
    unpacked_data = base64.b64decode(row['data']) if row['data'] is not None else ''

    request = 'POST {} HTTP/1.0\r\n'.format(row['request'])
    request += 'Content-Length: {}\r\n'.format(len(unpacked_data))
    request += 'User-Agent: Yandex.Tank (sandbox)\r\n\r\n'
    request += unpacked_data

    return '{}\n{}\r\n'.format(len(request), request)


def create_requests_file(table, file_path, count):
    progress = 0
    requests_content = ''
    for row in yt.read_table(table):
        requests_content += prepare_request(row)
        progress += 1
        if progress % 100 == 0:
            logging.info(progress)
        if progress == count:
            break

    with open(file_path, 'w') as f:
        f.write(requests_content)


def create_file(table, configs, name, count):
    for config in configs["plan_configs"]:
        if config["name"] == name:
            create_requests_file(table, config["file_path"], count)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_table', dest='input_table', required=True)
    parser.add_argument('--server', dest='yt_server', required=True)
    parser.add_argument('--pool', dest='yt_pool', required=True)
    parser.add_argument('--requests_configs', dest='requests_configs', required=True)
    args = parser.parse_args()

    args.requests_configs = json.loads(args.requests_configs)
    count = int(args.requests_configs["global_count"])

    if count > max_requests_count:
        raise RuntimeError("global_count should not be more than 500.000")

    yt.config.set_proxy(args.yt_server)
    yt.config.token = os.environ.get('YT_TOKEN')
    yt.config['pickling']['force_using_py_instead_of_pyc'] = True
    yt.config["pickling"]["ignore_yson_bindings_for_incompatible_platforms"] = False
    yt.config['pickling']['python_binary'] = '/skynet/python/bin/python'

    logging.info('Start preparing requests.')

    with yt.TempTable() as all_requests_table, yt.TempTable() as cards_requests_table, yt.TempTable() as heavy_requests_table:
        sampling_rate = min(1, float(count) / yt.row_count(args.input_table) * 100 /  cards_requests_percentage)
        default_spec = {'pool' : args.yt_pool}

        cards_mp = yt.run_map(
            cards_requests_mapper,
            args.input_table,
            [all_requests_table, cards_requests_table],
            spec={'sampling': {'sampling_rate': sampling_rate}, 'pool' : args.yt_pool},
            sync=False
        )

        yt.run_map(
            heavy_requests_mapper,
            args.input_table,
            heavy_requests_table,
            spec=default_spec
        )

        heavy_requests_sorter = yt.run_sort(
            heavy_requests_table,
            heavy_requests_table,
            sort_by=['data_length'],
            spec=default_spec,
            sync=False
        )

        cards_mp.wait()
        create_file(all_requests_table, args.requests_configs, "general", count)
        create_file(cards_requests_table, args.requests_configs, "perf", count)

        heavy_requests_sorter.wait()
        create_file(heavy_requests_table, args.requests_configs, "heavy_perf", count / 10)

    logging.info('Finish preparing requests')


if __name__ == '__main__':
    main()
