package ru.yandex.util.unicode;

import java.nio.charset.StandardCharsets;

import ru.yandex.unsafe.NativeMemory2;

public final class UnicodeUtil {
    public static final int UNI_SUR_HIGH_START = 0xD800;
    public static final int UNI_SUR_HIGH_END = 0xDBFF;
    public static final int UNI_SUR_LOW_START = 0xDC00;
    public static final int UNI_SUR_LOW_END = 0xDFFF;
    public static final int UNI_REPLACEMENT_CHAR = 0xFFFD;

    private static final long HALF_SHIFT = 10;
    private static final byte[] INT_MIN =
        Integer.toString(Integer.MIN_VALUE).getBytes(StandardCharsets.US_ASCII);
    private static final byte[] LONG_MIN =
        Long.toString(Long.MIN_VALUE).getBytes(StandardCharsets.US_ASCII);

    private static final int SURROGATE_OFFSET =
        Character.MIN_SUPPLEMENTARY_CODE_POINT
        - (UNI_SUR_HIGH_START << HALF_SHIFT) - UNI_SUR_LOW_START;

    private static final int[] SIZE_TABLE =
        {9, 99, 999, 9999, 99999, 999999, 9999999,
        99999999, 999999999, Integer.MAX_VALUE};

    private static final byte[] DIGITS = {
        (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5',
        (byte) '6', (byte) '7', (byte) '8', (byte) '9', (byte) 'a', (byte) 'b',
        (byte) 'c', (byte) 'd', (byte) 'e', (byte) 'f', (byte) 'g', (byte) 'h',
        (byte) 'i', (byte) 'j', (byte) 'k', (byte) 'l', (byte) 'm', (byte) 'n',
        (byte) 'o', (byte) 'p', (byte) 'q', (byte) 'r', (byte) 's', (byte) 't',
        (byte) 'u', (byte) 'v', (byte) 'w', (byte) 'x', (byte) 'y', (byte) 'z'};

    private static final byte[] DIGIT_TENS = {
        (byte) '0', (byte) '0', (byte) '0', (byte) '0', (byte) '0', (byte) '0',
        (byte) '0', (byte) '0', (byte) '0', (byte) '0',
        (byte) '1', (byte) '1', (byte) '1', (byte) '1', (byte) '1', (byte) '1',
        (byte) '1', (byte) '1', (byte) '1', (byte) '1',
        (byte) '2', (byte) '2', (byte) '2', (byte) '2', (byte) '2', (byte) '2',
        (byte) '2', (byte) '2', (byte) '2', (byte) '2',
        (byte) '3', (byte) '3', (byte) '3', (byte) '3', (byte) '3', (byte) '3',
        (byte) '3', (byte) '3', (byte) '3', (byte) '3',
        (byte) '4', (byte) '4', (byte) '4', (byte) '4', (byte) '4', (byte) '4',
        (byte) '4', (byte) '4', (byte) '4', (byte) '4',
        (byte) '5', (byte) '5', (byte) '5', (byte) '5', (byte) '5', (byte) '5',
        (byte) '5', (byte) '5', (byte) '5', (byte) '5',
        (byte) '6', (byte) '6', (byte) '6', (byte) '6', (byte) '6', (byte) '6',
        (byte) '6', (byte) '6', (byte) '6', (byte) '6',
        (byte) '7', (byte) '7', (byte) '7', (byte) '7', (byte) '7', (byte) '7',
        (byte) '7', (byte) '7', (byte) '7', (byte) '7',
        (byte) '8', (byte) '8', (byte) '8', (byte) '8', (byte) '8', (byte) '8',
        (byte) '8', (byte) '8', (byte) '8', (byte) '8',
        (byte) '9', (byte) '9', (byte) '9', (byte) '9', (byte) '9', (byte) '9',
        (byte) '9', (byte) '9', (byte) '9', (byte) '9'};

    private static final byte[] DIGIT_ONES = {
        (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5',
        (byte) '6', (byte) '7', (byte) '8', (byte) '9',
        (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5',
        (byte) '6', (byte) '7', (byte) '8', (byte) '9',
        (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5',
        (byte) '6', (byte) '7', (byte) '8', (byte) '9',
        (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5',
        (byte) '6', (byte) '7', (byte) '8', (byte) '9',
        (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5',
        (byte) '6', (byte) '7', (byte) '8', (byte) '9',
        (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5',
        (byte) '6', (byte) '7', (byte) '8', (byte) '9',
        (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5',
        (byte) '6', (byte) '7', (byte) '8', (byte) '9',
        (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5',
        (byte) '6', (byte) '7', (byte) '8', (byte) '9',
        (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5',
        (byte) '6', (byte) '7', (byte) '8', (byte) '9',
        (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5',
        (byte) '6', (byte) '7', (byte) '8', (byte) '9'};

    private UnicodeUtil() {
    }

    public static void toUtf8(
        final CharSequence s,
        final byte[] out,
        final int outOffset)
    {
        toUtf8(s, 0, s.length(), out, outOffset);
    }

    //CSOFF: MagicNumber
    //CSOFF: ParameterNumber
    public static int toUtf8(
        final CharSequence s,
        final int offset,
        final int length,
        final byte[] out,
        final int outOffset)
    {
        final int end = offset + length;
        int upto = outOffset;

        for (int i = offset; i < end; i++) {
            final int code = (int) s.charAt(i);

            if (code < 0x80) {
                out[upto++] = (byte) code;
            } else if (code < 0x800) {
                out[upto++] = (byte) (0xC0 | (code >> 6));
                out[upto++] = (byte) (0x80 | (code & 0x3F));
            } else if (code < 0xD800 || code > 0xDFFF) {
                out[upto++] = (byte) (0xE0 | (code >> 12));
                out[upto++] = (byte) (0x80 | ((code >> 6) & 0x3F));
                out[upto++] = (byte) (0x80 | (code & 0x3F));
            } else {
                // surrogate pair
                // confirm valid high surrogate
                if (code < 0xDC00 && (i < end - 1)) {
                    int utf32 = (int) s.charAt(i + 1);
                // confirm valid low surrogate and write pair
                    if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) {
                        utf32 = (code << 10) + utf32 + SURROGATE_OFFSET;
                        i++;
                        out[upto++] = (byte) (0xF0 | (utf32 >> 18));
                        out[upto++] = (byte) (0x80 | ((utf32 >> 12) & 0x3F));
                        out[upto++] = (byte) (0x80 | ((utf32 >> 6) & 0x3F));
                        out[upto++] = (byte) (0x80 | (utf32 & 0x3F));
                        continue;
                    }
                }
                // replace unpaired surrogate or out-of-order low surrogate
                // with substitution character
                out[upto++] = (byte) 0xEF;
                out[upto++] = (byte) 0xBF;
                out[upto++] = (byte) 0xBD;
            }
        }
        return upto;
    }

    //CSOFF: MagicNumber
    //CSOFF: ParameterNumber
    public static int toUtf8(
        final char[] s,
        final int offset,
        final int length,
        final byte[] out,
        final int outOffset)
    {
        final int end = offset + length;
        int upto = outOffset;

        for (int i = offset; i < end; i++) {
            final int code = (int) s[i];

            if (code < 0x80) {
                out[upto++] = (byte) code;
            } else if (code < 0x800) {
                out[upto++] = (byte) (0xC0 | (code >> 6));
                out[upto++] = (byte) (0x80 | (code & 0x3F));
            } else if (code < 0xD800 || code > 0xDFFF) {
                out[upto++] = (byte) (0xE0 | (code >> 12));
                out[upto++] = (byte) (0x80 | ((code >> 6) & 0x3F));
                out[upto++] = (byte) (0x80 | (code & 0x3F));
            } else {
                // surrogate pair
                // confirm valid high surrogate
                if (code < 0xDC00 && (i < end - 1)) {
                    int utf32 = (int) s[i + 1];
                // confirm valid low surrogate and write pair
                    if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) {
                        utf32 = (code << 10) + utf32 + SURROGATE_OFFSET;
                        i++;
                        out[upto++] = (byte) (0xF0 | (utf32 >> 18));
                        out[upto++] = (byte) (0x80 | ((utf32 >> 12) & 0x3F));
                        out[upto++] = (byte) (0x80 | ((utf32 >> 6) & 0x3F));
                        out[upto++] = (byte) (0x80 | (utf32 & 0x3F));
                        continue;
                    }
                }
                // replace unpaired surrogate or out-of-order low surrogate
                // with substitution character
                out[upto++] = (byte) 0xEF;
                out[upto++] = (byte) 0xBF;
                out[upto++] = (byte) 0xBD;
            }
        }
        return upto;
    }

    public static void toUtf8(
        final CharSequence s,
        final NativeMemory2 out,
        final int outOffset)
    {
        toUtf8(s, 0, s.length(), out, outOffset);
    }

    public static void toUtf8(
        final CharSequence s,
        final int offset,
        final int length,
        final NativeMemory2 out,
        final int outOffset)
    {
        final int end = offset + length;
        int upto = outOffset;

        for (int i = offset; i < end; i++) {
            final int code = (int) s.charAt(i);

            if (code < 0x80) {
                out.setByte(upto++, (byte) code);
            } else if (code < 0x800) {
                out.setByte(upto++, (byte) (0xC0 | (code >> 6)));
                out.setByte(upto++, (byte) (0x80 | (code & 0x3F)));
            } else if (code < 0xD800 || code > 0xDFFF) {
                out.setByte(upto++, (byte) (0xE0 | (code >> 12)));
                out.setByte(upto++, (byte) (0x80 | ((code >> 6) & 0x3F)));
                out.setByte(upto++, (byte) (0x80 | (code & 0x3F)));
            } else {
                // surrogate pair
                // confirm valid high surrogate
                if (code < 0xDC00 && (i < end - 1)) {
                    int utf32 = (int) s.charAt(i + 1);
                // confirm valid low surrogate and write pair
                    if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) {
                        utf32 = (code << 10) + utf32 + SURROGATE_OFFSET;
                        i++;
                        out.setByte(
                            upto++,
                            (byte) (0xF0 | (utf32 >> 18)));
                        out.setByte(
                            upto++,
                            (byte) (0x80 | ((utf32 >> 12) & 0x3F)));
                        out.setByte(
                            upto++,
                            (byte) (0x80 | ((utf32 >> 6) & 0x3F)));
                        out.setByte(
                            upto++,
                            (byte) (0x80 | (utf32 & 0x3F)));
                        continue;
                    }
                }
                // replace unpaired surrogate or out-of-order low surrogate
                // with substitution character
                out.setByte(upto++, (byte) 0xEF);
                out.setByte(upto++, (byte) 0xBF);
                out.setByte(upto++, (byte) 0xBD);
            }
        }
    }

    public static void toUtf8(final CharSequence s, final long out) {
        toUtf8(s, 0, s.length(), out);
    }

    public static void toUtf8(
        final CharSequence s,
        final int offset,
        final int length,
        final long out)
    {
        final int end = offset + length;
        long outPtr = out;

        for (int i = offset; i < end; i++) {
            final int code = (int) s.charAt(i);

            if (code < 0x80) {
                NativeMemory2.unboxedSetByte(outPtr++, (byte) code);
            } else if (code < 0x800) {
                NativeMemory2.unboxedSetByte(
                    outPtr++,
                    (byte) (0xC0 | (code >> 6)));
                NativeMemory2.unboxedSetByte(
                    outPtr++,
                    (byte) (0x80 | (code & 0x3F)));
            } else if (code < 0xD800 || code > 0xDFFF) {
                NativeMemory2.unboxedSetByte(
                    outPtr++,
                    (byte) (0xE0 | (code >> 12)));
                NativeMemory2.unboxedSetByte(
                    outPtr++,
                    (byte) (0x80 | ((code >> 6) & 0x3F)));
                NativeMemory2.unboxedSetByte(
                    outPtr++,
                    (byte) (0x80 | (code & 0x3F)));
            } else {
                // surrogate pair
                // confirm valid high surrogate
                if (code < 0xDC00 && (i < end - 1)) {
                    int utf32 = (int) s.charAt(i + 1);
                // confirm valid low surrogate and write pair
                    if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) {
                        utf32 = (code << 10) + utf32 + SURROGATE_OFFSET;
                        i++;
                        NativeMemory2.unboxedSetByte(
                            outPtr++,
                            (byte) (0xF0 | (utf32 >> 18)));
                        NativeMemory2.unboxedSetByte(
                            outPtr++,
                            (byte) (0x80 | ((utf32 >> 12) & 0x3F)));
                        NativeMemory2.unboxedSetByte(
                            outPtr++,
                            (byte) (0x80 | ((utf32 >> 6) & 0x3F)));
                        NativeMemory2.unboxedSetByte(
                            outPtr++,
                            (byte) (0x80 | (utf32 & 0x3F)));
                        continue;
                    }
                }
                // replace unpaired surrogate or out-of-order low surrogate
                // with substitution character
                NativeMemory2.unboxedSetByte(outPtr++, (byte) 0xEF);
                NativeMemory2.unboxedSetByte(outPtr++, (byte) 0xBF);
                NativeMemory2.unboxedSetByte(outPtr++, (byte) 0xBD);
            }
        }
    }

    public static int toUtf8(
        final int val,
        final byte[] buf,
        final int off)
    {
        if (val == Integer.MIN_VALUE) {
            System.arraycopy(INT_MIN, 0, buf, 0, INT_MIN.length);
            return INT_MIN.length;
        }
        int q;
        int r;
        int stringSize;
        if (val < 0) {
            stringSize = stringSize(-val) + 1;
        } else {
            stringSize = stringSize(val);
        }
        int charPos = stringSize;
        int i = val;
        byte sign = 0;

        if (i < 0) {
            sign = (byte) '-';
            i = -i;
        }
        while (i >= 65536) {
            q = i / 100;
            r = i - ((q << 6) + (q << 5) + (q << 2));
            i = q;
            buf[--charPos] = DIGIT_ONES[r];
            buf[--charPos] = DIGIT_TENS[r];
        }
        for (;;) {
            q = (i * 52429) >>> (16 + 3);
            r = i - ((q << 3) + (q << 1));
            buf[--charPos] = DIGITS[r];
            i = q;
            if (i == 0) {
                break;
            }
        }
        if (sign != 0) {
            buf[--charPos] = sign;
        }
        return stringSize;
    }

    public static int toUtf8(
        final long val,
        final byte[] buf,
        final int off)
    {
        if (val == Long.MIN_VALUE) {
            System.arraycopy(LONG_MIN, 0, buf, 0, LONG_MIN.length);
            return LONG_MIN.length;
        }
        int stringSize;
        if (val < 0) {
            stringSize = stringSize(-val) + 1;
        } else {
            stringSize = stringSize(val);
        }
        long q;
        int r;
        int charPos = stringSize;
        byte sign = 0;

        long i = val;
        if (i < 0) {
            sign = (byte) '-';
            i = -i;
        }

        while (i > Integer.MAX_VALUE) {
            q = i / 100;
            // really: r = i - (q * 100);
            r = (int) (i - ((q << 6) + (q << 5) + (q << 2)));
            i = q;
            buf[--charPos] = DIGIT_ONES[r];
            buf[--charPos] = DIGIT_TENS[r];
        }

        int q2;
        int i2 = (int) i;
        while (i2 >= 65536) {
            q2 = i2 / 100;
            // really: r = i2 - (q * 100);
            r = i2 - ((q2 << 6) + (q2 << 5) + (q2 << 2));
            i2 = q2;
            buf[--charPos] = DIGIT_ONES[r];
            buf[--charPos] = DIGIT_TENS[r];
        }

        // Fall thru to fast mode for smaller numbers
        // assert(i2 <= 65536, i2);
        for (;;) {
            q2 = (i2 * 52429) >>> (16 + 3);
            r = i2 - ((q2 << 3) + (q2 << 1));  // r = i2-(q2*10) ...
            buf[--charPos] = DIGITS[r];
            i2 = q2;
            if (i2 == 0) {
                break;
            }
        }
        if (sign != 0) {
            buf[--charPos] = sign;
        }
        return stringSize;
    }

    public static int toUtf8(
        final short val,
        final byte[] buf,
        final int off)
    {
        return toUtf8((int) val, buf, off);
    }

    public static int toUtf8(
        final byte val,
        final byte[] buf,
        final int off)
    {
        return toUtf8((int) val, buf, off);
    }

    public static int utf8Length(final CharSequence s) {
        return utf8Length(s, 0, s.length());
    }

    public static int utf8Length(
        final CharSequence s,
        final int offset,
        final int length)
    {
        final int end = offset + length;

        int upto = 0;
        for (int i = offset; i < end; i++) {
            final int code = (int) s.charAt(i);

            if (code < 0x80) {
                upto++;
            } else if (code < 0x800) {
                upto += 2;
            } else if (code < 0xD800 || code > 0xDFFF) {
                upto += 3;
            } else {
                // surrogate pair
                // confirm valid high surrogate
                if (code < 0xDC00 && (i < end - 1)) {
                    int utf32 = (int) s.charAt(i + 1);
                    // confirm valid low surrogate and write pair
                    if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) {
                        i++;
                        upto += 4;
                        continue;
                    }
                }
                // replace unpaired surrogate or out-of-order low surrogate
                // with substitution character
                upto += 3;
            }
        }
        return upto;
    }

    public static int stringSize(final int x) {
        for (int i = 0;; i++) {
            if (x <= SIZE_TABLE[i]) {
                return i + 1;
            }
        }
    }

    public static int stringSize(final long x) {
        long p = 10;
        for (int i = 1; i < 19; i++) {
            if (x < p) {
                return i;
            }
            p = 10 * p;
        }
        return 19;
    }
}
