package ru.yandex.calendar.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.mozilla.universalchardet.UniversalDetector;

import ru.yandex.bolts.collection.Cf;
import ru.yandex.misc.lang.StringUtils;

/**
 * @author gutman
 */
public class CharsetUtils {

    public static String guessEncoding(byte[] bytes) {
        UniversalDetector detector = new UniversalDetector(null);
        int charsToUse = Math.min(bytes.length, 10000);
        detector.handleData(bytes, 0, charsToUse);
        detector.dataEnd();
        String res = detector.getDetectedCharset();
        return (StringUtils.isNotEmpty(res) ? res : "UTF-8"); // default
    }

    public static String toAscii(String s) {
        if (StringUtils.isEmpty(s)) { return s; }
        StringBuilder sb = new StringBuilder();
        int sLength = s.length();
        for (int i = 0; i < sLength; ++i) {
            char c = s.charAt(i);
            if (c < 0x80) {
                sb.append(c);
            } else {
                sb.append("&#");
                sb.append((int) c);
                sb.append(";");
            }
        }
        return sb.toString();
    }

    public static String fromAscii(String s) {
        if (StringUtils.isEmpty(s)) { return s; }
        Pattern p = Pattern.compile("&#(\\d+);");
        Matcher m = p.matcher(s);
        boolean result = m.find();
        if (result) {
            StringBuffer sb = new StringBuffer();
            do {
                char c = (char) Cf.Integer.parseSafe(m.group(1)).getOrElse(0).intValue();
                m.appendReplacement(sb, String.valueOf(c));
                result = m.find();
            } while (result);
            m.appendTail(sb);
            return sb.toString();
        }
        return s;
    }

    public static String cutUselessAsciiControls(String s) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < s.length(); ++i) {
            char ch = s.charAt(i);
            if (!(ch < 9 || ch > 10 && ch < 32 || ch == 127)) {
                sb.append(ch);
            }
        }
        return sb.toString();
    }

    public static boolean hasUtf8Bom(byte[] bytes) {
        return (
            bytes.length >= 3 && bytes[0] == (byte) 0xEF &&
            bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF
        );
    }

}
