#include "pre_describe_filter.h"
#include "queue_item.h"

#include <library/cpp/bloom_filter/bloomfilter.h>

#include <util/generic/queue.h>
#include <util/random/random.h>

using namespace NCrypta;
using namespace NCrypta::NSiberia;

void TPreDescribeFilter::Do(TReader* reader, TWriter* writer) {
    i32 actualSegmentSize = 0;

    TMaybe<TBloomFilter> unionFilter;

    TIdsToDescribe output;
    TId idBuffer;

    TPriorityQueue<TQueueItem> ids;

    for (; reader->IsValid(); reader->Next()) {
        const auto& row = reader->GetRow();

        for (const auto& id: row.GetIds().GetIds()) {
            ids.push({.Random=RandomNumber<ui32>(), .Id=id});

            if (ids.size() > State->GetSampleSize()) {
                ids.pop();
            }
        }

        actualSegmentSize += row.GetActualSegmentSize();

        if (!output.HasGroupID()) {
            output.SetGroupID(row.GetGroupID());
        }

        if (row.HasFilter()) {
            if (!unionFilter.Defined()) {
                const auto& options = row.GetFilter().GetOptions();
                *output.MutableFilter()->MutableOptions() = options;
                unionFilter = TBloomFilter(options.GetCapacity(), options.GetErrorRate());
            }

            TBloomFilter filter;
            TStringInput stringFilter(row.GetFilter().GetBloomFilter());
            filter.Load(&stringFilter);
            unionFilter->Union(filter);
        }
    }

    if (ids.empty()) {
        return;
    }

    if (unionFilter.Defined()) {
        TStringStream stream;
        unionFilter->Save(&stream);
        output.MutableFilter()->SetBloomFilter(stream.Str());
    }

    output.SetActualSegmentSize(actualSegmentSize);

    while (!ids.empty()) {
        *output.MutableIds()->MutableIds()->Add() = ids.top().Id;
        ids.pop();
    }

    writer->AddRow(output);
}
