# !/usr/bin/env python2.7
# coding=utf-8


from optparse import OptionParser
import optparse
from collections import defaultdict, Counter
import random


def get_options():
    p = optparse.OptionParser()
    p.add_option("-i", "--inputData", type=str)
    p.add_option("-o", "--outputData", type=str)
    p.add_option("-s", "--step", type=str)
    p.add_option("-l", "--sampleLen", type=str)
    (options, args) = p.parse_args()
    return options


def main():
    options = get_options()
    step = int(options.step)
    sample_len = options.sampleLen
    data_pool = defaultdict(list)
    res_pool = []
    tmp_coef = 1
    count = 0

    for data in open(options.inputData):
        data_part = data.split('\t')
        if data_part[0] != 'ceil':
            count += 1
            if count < int(sample_len):
                data_pool[data_part[2]].append(data_part[3])
            else:
                break

    for host, urls in data_pool.items():

        if len(urls) <= int(step):
            for url in urls:
                res_pool.append(str(url) + '\t' 'https://yandex.ru/turbo?text=' + str(url)
                                + '&exp_flags=adv-disabled' + '\t' + str(tmp_coef))
        else:
            random.shuffle(urls)
            for random_urls in urls[: step]:
                res_pool.append(
                    str(random_urls) + '\t' 'https://yandex.ru/turbo?text=' + str(random_urls)
                    + '&exp_flags=adv-disabled' + '\t' + str(float(len(urls)) / step))

    with open(options.outputData, 'w') as res:
        for data in res_pool:
            res.write(str(data) + '\n')


if __name__ == '__main__':
    main()
