package ru.yandex.solomon.math.stat;

import javax.annotation.Nullable;

import com.google.common.primitives.Doubles;

import ru.yandex.monlib.metrics.histogram.Histograms;

/**
 * This class represents pre-parsed label. It contains original label and its pattern.
 * pattern is similar to the original lablel except:
 * - it is lower-case (for better matching)
 * - all numerical character sequences in the original labels are replaced with single-character zero.
 *
 * thus multiple labels representing different values in the same format will have the same pattern and thus can be
 * grouped by this pattern.
 *
 * Also this class remembers last parsed integer value - it is the value we take as this metric's bucket.
 * If label contained no numerical value then this value will be null.
 *
 * We only remember last numerical part because we want to standardize ho we parse the label.
 * It looks like a good approach (most likely it will be either the only value in the label or last value in
 * something like 'host_12_bucket_34_apples'). Using some heuristics is the bad idea because if we have
 * 10 servers pushing data into 10 buckets we won't be able to determine what field the data is:
 * server number or value. Thus let's just parse the data from the latest field.
 *
 * @author Maksim Leonov
 */
public class LabelValueNumberPattern {
    public static double parse(String labelValue) throws NumberFormatException {
        if (isInf(labelValue)) {
            return Histograms.INF_BOUND;
        }

        Double doubleValue = Doubles.tryParse(labelValue);
        if (doubleValue == null) {
            doubleValue = parseBoundsFromString(labelValue);
        }
        if (doubleValue == null) {
            throw new NumberFormatException(labelValue);
        }
        if (!Double.isFinite(doubleValue)) {
            throw new NumberFormatException(doubleValue.toString());
        }
        return doubleValue;
    }

    /**
     * Matches given labels value to a set of strings "inf", "Inf", "INF", etc.
     */
    private static boolean isInf(String labelValue) {
        int len = labelValue.length();
        if (len < 3) {
            return false;
        }

        char c1 = labelValue.charAt(0);
        if (c1 == 'i' || c1 == 'I') {
            char c2 = labelValue.charAt(1);
            if (c2 == 'n' || c2 == 'N') {
                char c3 = labelValue.charAt(2);
                if (c3 == 'f' || c3 == 'F') {
                    // match only whole word
                    return len == 3 || labelValue.charAt(3) == ' ';
                }
            }
        }

        return false;
    }

    @Nullable
    private static Double parseBoundsFromString(String label) {
        char[] input = label.toLowerCase().toCharArray();

        char[] numberBuffer = new char[input.length];
        int numberBufferIdx = -1;
        boolean inNumber = false;

        Double lastNumericalPart = null;

        for (int idx = 0; idx < input.length; idx++) {
            char c = input[idx];
            boolean isNumber = Character.isDigit(c);
            if (inNumber) {
                if (isNumber) {
                    numberBuffer[numberBufferIdx++] = c;
                } else if (c == '.' && idx + 1 < input.length && Character.isDigit(input[idx + 1])) {
                    numberBuffer[numberBufferIdx++] = c;
                } else {
                    inNumber = false;
                    lastNumericalPart = Double.valueOf(String.copyValueOf(numberBuffer, 0, numberBufferIdx));
                }
            } else {
                if (isNumber) {
                    inNumber = true;
                    numberBufferIdx = 0;
                    numberBuffer[numberBufferIdx++] = c;
                }
            }
        }
        if (inNumber) {
            lastNumericalPart = Double.valueOf(String.copyValueOf(numberBuffer, 0, numberBufferIdx));
        }

        return lastNumericalPart;
    }
}
