import argparse
import base64
import json
import logging
import os
import random
import time
import yt.wrapper as yt


def get_parameter(request, param_name):
    param_key = "&" + param_name + '='
    param_start_pos = request.find(param_key)
    if param_start_pos == -1:
        return ""

    param_end_pos = request.find('&', param_start_pos + len(param_key))
    if param_end_pos == -1:
        param_end_pos = len(request)

    return request[param_start_pos:param_end_pos]


def make_clear_request(request, i, requests_configs):
    clear_request = "/pdb/master_item_recommender/" + requests_configs["plan_configs"][i]["name"] + "?"

    if "global_cgi_white_list" in requests_configs:
        clear_request += "".join(get_parameter(request, cgi_param)
            for cgi_param in requests_configs["global_cgi_white_list"])

    if "cgi_white_list" in requests_configs["plan_configs"][i]:
        clear_request += "".join(get_parameter(request, cgi_param)
            for cgi_param in requests_configs["plan_configs"][i]["cgi_white_list"])

    return clear_request


def make_deterministic(request):
    return request + "&seed=test_seed"


def normalize_request(request, i, requests_configs):
    return make_deterministic(make_clear_request(request, i, requests_configs))


class Mapper(object):
    def __init__(self, configs):
        self.requests_configs = configs

    def __call__(self, row):
        if int(row['status']) == 200:
            new_row = {'request': row['request'], 'real_request': row['request']}
            for i in range(len(self.requests_configs["plan_configs"])):
                if new_row['request'].find(self.requests_configs["plan_configs"][i]["name"]) != -1:
                    new_row['request'] = normalize_request(new_row['request'], i, self.requests_configs)
                    yield yt.create_table_switch(i)
                    yield new_row


def prepare_request(row):
    request = 'GET {} HTTP/1.0\r\n'.format(row['request'])
    request += 'User-Agent: Yandex.Tank (sandbox)\r\n\r\n'
    return '{}\n{}\r\n'.format(len(request), request)


def create_requests_file(table, file_path, count):
    progress = 0
    requests_content = ''
    for row in yt.read_table(table):
        requests_content += prepare_request(row)
        progress += 1
        if progress % 100 == 0:
            logging.info(progress)
        if progress == count:
            break

    with open(file_path, 'w') as f:
        f.write(requests_content)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_table', dest='input_table', required=True)
    parser.add_argument('--server', dest='yt_server', required=True)
    parser.add_argument('--pool', dest='yt_pool', required=True)
    parser.add_argument('--requests_configs', dest='requests_configs', required=True)
    args = parser.parse_args()

    args.requests_configs = json.loads(args.requests_configs)

    yt.config.set_proxy(args.yt_server)
    yt.config.token = os.environ.get('YT_TOKEN')
    yt.config['pickling']['force_using_py_instead_of_pyc'] = True
    yt.config["pickling"]["ignore_yson_bindings_for_incompatible_platforms"] = False
    yt.config['pickling']['python_binary'] = '/skynet/python/bin/python'

    logging.info('Start preparing requests.')

    temp_tables = [yt.create_temp_table() for i in range(len(args.requests_configs["plan_configs"]))]
    yt.run_map(
        Mapper(args.requests_configs),
        args.input_table,
        temp_tables,
        spec={'pool' : args.yt_pool}
    )

    for i, config in enumerate(args.requests_configs["plan_configs"]):
        count = int(config["count"])
        sampling_rate = min(1, float(count) / yt.row_count(args.input_table) * 100)
        yt.run_merge(
            temp_tables[i],
            temp_tables[i],
            mode="unordered",
            spec={'sampling': {'sampling_rate': sampling_rate}, 'pool' : args.yt_pool}
        )
        create_requests_file(
            temp_tables[i],
            config["file_path"],
            count
        )

    logging.info('Finish preparing requests')

if __name__ == '__main__':
    main()
