package ru.yandex.analyzer;

import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

import java.io.IOException;

import java.util.regex.Pattern;

/**
 * A {@link TokenFilter} that performs regular expression substitution.
 */
public class SedFilter extends TokenFilter {
    private final Pattern pattern;
    private final String replacement;
    private final CharTermAttribute termAtt;

    public SedFilter(final TokenStream in, final Pattern pattern,
        final String replacement)
    {
        super(in);
        this.pattern = pattern;
        this.replacement = replacement;
        termAtt = addAttribute(CharTermAttribute.class);
    }

    @Override
    public final boolean incrementToken() throws IOException {
        if (input.incrementToken()) {
            String str = pattern.matcher(termAtt).replaceAll(replacement);
            if (str.isEmpty()) {
                return incrementToken();
            }
            termAtt.setEmpty();
            termAtt.append(str);
            return true;
        } else {
            return false;
        }
    }
}
