import argparse
import logging
import os
import yt.wrapper as yt


def create_request(row):
    request = row['HttpRequest']
    ids = row['UserIds']

    request += '&update_feed=false'

    if ids is None:
        return request

    if len(ids['DeviceId']) > 0:
        request += '&deviceid='
        request += ids['DeviceId']
    if len(ids['Uuid']) > 0:
        request += '&uuid='
        request += ids['Uuid']
    if len(ids['YandexUid']) > 0:
        request += '&yandexuid='
        request += ids['YandexUid']

    return request


def mapper(row):
    if row['HttpRequest'].find('/pdb/hot_feed') != -1 or row['HttpRequest'].find('/pdb/heavy_hot_feed') != -1:
        yield {'request': create_request(row)}


def create_requests_file(table, file_path, count):
    progress = 0
    requests_content = ''
    for row in yt.read_table(table):
        requests_content += row['request']
        requests_content += '\n'
        progress += 1
        if progress % 100 == 0:
            logging.info(progress)
        if progress == count:
            break

    with open(file_path, 'w') as f:
        f.write(requests_content)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--table', dest='table', required=True)
    parser.add_argument('--cluster', dest='cluster', required=True)
    parser.add_argument('--requests_file', dest='requests_file', required=True)
    parser.add_argument('--requests_count', dest='requests_count', type=int, required=True)
    parser.add_argument('--filtering_coefficient', dest='filtering_coefficient', type=float, required=True)
    args = parser.parse_args()

    yt.config.set_proxy(args.cluster)
    yt.config.token = os.environ.get('YT_TOKEN')
    yt.config['pickling']['python_binary'] = '/skynet/python/bin/python'

    row_count = yt.row_count(args.table)
    sampling_rate = min(args.requests_count / (row_count * args.filtering_coefficient), 1)

    logging.info('Start preparing requests.')

    with yt.TempTable() as requests_table:
        yt.run_map(
            mapper,
            args.table,
            requests_table,
            spec={"sampling": {"sampling_rate": sampling_rate}},
        )

        create_requests_file(requests_table, args.requests_file, args.requests_count)

    logging.info('Finish preparing requests')

if __name__ == '__main__':
    main()
