package ru.yandex.mail.so.factors.extractors;

import java.io.IOException;
import java.io.Writer;
import java.util.Collections;
import java.util.List;

import org.apache.http.concurrent.FutureCallback;

import ru.yandex.io.AllocationlessWriter;
import ru.yandex.io.StringBuilderWriter;
import ru.yandex.io.TrimmingWriter;
import ru.yandex.mail.so.factors.SoFactor;
import ru.yandex.mail.so.factors.SoFunctionInputs;
import ru.yandex.mail.so.factors.types.HtmlSoFactorType;
import ru.yandex.mail.so.factors.types.SoFactorType;
import ru.yandex.mail.so.factors.types.StringSoFactorType;
import ru.yandex.parser.config.ConfigException;
import ru.yandex.sanitizer2.HtmlCDataTag;
import ru.yandex.sanitizer2.HtmlNode;
import ru.yandex.sanitizer2.HtmlNodeVisitor;
import ru.yandex.sanitizer2.HtmlTag;
import ru.yandex.sanitizer2.HtmlText;

public enum HtmlTextExtractor implements SoFactorsExtractor {
    INSTANCE;

    private static final List<SoFactorType<?>> INPUTS =
        Collections.singletonList(HtmlSoFactorType.HTML);
    private static final List<SoFactorType<?>> OUTPUTS =
        Collections.singletonList(StringSoFactorType.STRING);

    @Override
    public void close() {
    }

    @Override
    public List<SoFactorType<?>> inputs() {
        return INPUTS;
    }

    @Override
    public List<SoFactorType<?>> outputs() {
        return OUTPUTS;
    }

    @Override
    public void extract(
        final SoFactorsExtractorContext context,
        final SoFunctionInputs inputs,
        final FutureCallback<? super List<SoFactor<?>>> callback)
    {
        HtmlNode html = inputs.get(0, HtmlSoFactorType.HTML);
        if (html == null) {
            callback.completed(NULL_RESULT);
            return;
        }
        StringBuilder sb = new StringBuilder();
        try {
            html.accept(
                new HtmlTextCollector(
                    new TrimmingWriter(
                        new StringBuilderWriter(sb))));
        } catch (IOException e) {
            // can't be
            callback.failed(e);
            return;
        }

        if (sb.length() == 0) {
            context.logger().info("No text found");
            callback.completed(NULL_RESULT);
        } else {
            callback.completed(
                Collections.singletonList(
                    StringSoFactorType.STRING.createFactor(new String(sb))));
        }
    }

    @Override
    public void registerInternals(final SoFactorsExtractorsRegistry registry)
        throws ConfigException
    {
        registry.typesRegistry().registerSoFactorType(HtmlSoFactorType.HTML);
        registry.typesRegistry().registerSoFactorType(
            StringSoFactorType.STRING);
    }

    private static class HtmlTextCollector
        implements HtmlNodeVisitor<Void, IOException>
    {
        private final AllocationlessWriter writer;

        HtmlTextCollector(final Writer writer) {
            this.writer = new AllocationlessWriter(writer);
        }

        @Override
        public Void visit(final HtmlTag tag) throws IOException {
            writer.write('\n');
            int size = tag.size();
            for (int i = 0; i < size; ++i) {
                tag.get(i).accept(this);
            }
            writer.write('\n');
            return null;
        }

        @Override
        public Void visit(final HtmlCDataTag cdata) throws IOException {
            writer.write('\n');
            writer.append(cdata.text());
            writer.write('\n');
            return null;
        }

        @Override
        public Void visit(final HtmlText text) throws IOException {
            writer.write(text.text());
            return null;
        }
    }
}

