package ru.yandex.parser.html;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

public class HtmlEntities {
    public static final HtmlEntities DEFAULT;

    static {
        try {
            DEFAULT = new HtmlEntities(
                HtmlEntities.class.getResourceAsStream("entities.txt"));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private static final int CODES_START = 3;
    private static final int HEX_RADIX = 16;

    private final Map<String, String> entities = new HashMap<>();

    public HtmlEntities(final InputStream in) throws IOException {
        try (BufferedReader reader = new BufferedReader(
                new InputStreamReader(in, StandardCharsets.UTF_8)))
        {
            String line;
            StringBuilder sb = new StringBuilder();
            while ((line = reader.readLine()) != null) {
                if (line.isEmpty() || line.charAt(0) == '#') {
                    continue;
                }
                String[] components = line.split(",");
                String codesLine = null;
                for (int i = CODES_START; i < components.length; ++i) {
                    if (components[i].startsWith(" U+")) {
                        codesLine = components[i];
                        break;
                    }
                }
                if (codesLine == null) {
                    continue;
                }
                String[] codes = codesLine.split(" U\\+");
                sb.setLength(0);
                for (int i = 1; i < codes.length; ++i) {
                    sb.appendCodePoint(
                        Integer.parseInt(codes[i].trim(), HEX_RADIX));
                }
                entities.put(components[0].trim(), sb.toString());
            }
        }
    }

    public Map<String, String> entities() {
        return Collections.unmodifiableMap(entities);
    }
}

