#include "dict_filter.h"

#include <dict/dictutil/str.h>
#include <util/charset/wide.h>
#include <util/stream/file.h>
#include <util/string/vector.h>

TDictFilter::TDictFilter(const TVector<TString>& dicts) {
    for (size_t i = 0; i < dicts.size(); i++) {
        TUnbufferedFileInput in(dicts[i]);
        TString line;
        while (in.ReadLine(line)) {
            ReplaceAll(line, "%", "([ \\t\\.\\-@\\\\/]+|^)");
            std::vector<wchar32> ucs4;
            NPire::NEncodings::Utf8().FromLocal(line.data(), line.data() + line.size(), std::back_inserter(ucs4));
            Scanners.push_back(NPire::TLexer(ucs4.begin(), ucs4.end())
                .SetEncoding(NPire::NEncodings::Utf8())
                .Parse()
                .Surround()
                .Compile<NPire::TNonrelocScanner>());
        }
    }
}

bool TDictFilter::Reject(const TUtf16String& text) const {
    TString str = WideToUTF8(text);
    for (size_t i = 0; i < Scanners.size(); i++) {
        if (NPire::Runner(Scanners[i]).Begin().Run(str).End()) {
            return true;
        }
    }
    return false;
}
