package ru.yandex.market.logshatter.parser.auto;

import com.google.common.base.Strings;
import org.apache.commons.lang.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import ru.yandex.market.clickhouse.ddl.Column;
import ru.yandex.market.logshatter.parser.*;
import ru.yandex.market.logshatter.config.ParserConfig;
import ru.yandex.market.logshatter.config.TimestampFormat;

import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/**
 * @author Tatiana Litvinenko <a href="mailto:tanlit@yandex-team.ru"></a>
 * @date 03.06.2015
 */
public class AutoParser implements LogParser {

    private static final Logger log = LogManager.getLogger();

    private static final Pattern VALUE_NAME_PATTERN = Pattern.compile("%\\{(.+?)\\}");
    private static final Pattern CONDITION_PATTERN = Pattern.compile("%\\{(.+?)\\} ?(!=|=) ?\"(.+?)\"");
    private static final String WHOLE_LINE = "message";
    private static final int VALUE_NAME_GROUP = 1;
    private static final int SYMBOL_GROUP = 2;
    private static final int VALUE_GROUP = 3;
    private static final String TIMESTAMP_COLUMN_NAME = "timestamp";
    private static final String TIMESTAMP_MICROS_COLUMN_NAME = "timestamp_micros";
    private static final int UNIXTIME_DOT_MICROS_FRACTIONAL_PART_LENGTH = 6;
    private final DateFormat dateFormat;
    private final Map<String, DateFormat> dateFormats;

    private final ParserConfig parserConfig;
    private final List<Column> columns;

    private final Map<String, String> tskvMatch;
    private final Set<String> tskvMatchReplacedFileds;

    private final EnvironmentMapper environmentMapper = new EnvironmentMapper(EnvironmentMapper.LOGBROKER_PROTOCOL_PREFIX);

    public AutoParser(ParserConfig parserConfig) {
        this.parserConfig = parserConfig;
        this.columns = parserConfig.getTableDescription().getColumns();
        this.dateFormat = buildDateFormat(parserConfig.getDateFormat());
        this.dateFormats = parserConfig.getDateFormats().entrySet()
            .stream()
            .collect(Collectors.toMap(Map.Entry::getKey, entry -> buildDateFormat(entry.getValue())));
        tskvMatch = parserConfig.getTskvMatch();
        tskvMatchReplacedFileds = (tskvMatch != null) ? new HashSet<>(tskvMatch.values()) : null;
    }

    private SimpleDateFormat buildDateFormat(String dateFormat) {
        return new SimpleDateFormat(dateFormat, Locale.ENGLISH);
    }

    @Override
    public void parse(String line, ParserContext context) throws Exception {

        Map<String, String> values = new HashMap<>(columns.size());
        values.put("host", context.getHost());
        values.put("environment", environmentMapper.getEnvironment(context).toString());

            Predicate<String> shouldGetDefault = StringUtils::isEmpty;
        if (parserConfig.hasColumns()) {
            List<String> parserColumns = parserConfig.getFields();
            String[] splits = line.split(parserConfig.getSeparator(), -1);
            if (splits.length < parserColumns.size()) {
                throw new ParserException(
                    "Got " + splits.length + " splits for line from " + context.getFile() + ". Required at least " +
                        parserColumns.size() + ". Separator: " + parserConfig.getSeparator()
                );
            }
            for (int i = 0; i < parserColumns.size(); i++) {
                values.put(parserColumns.get(i), splits[i]);
            }
        } else if (parserConfig.hasTskvMatch()) {
            if (!parserConfig.isUseDefaultOnEmpty()) {
                shouldGetDefault = Objects::isNull;
            }
            Map<String, String> match = parserConfig.getTskvMatch();
            String[] splits = line.split(parserConfig.getSeparator());
            for (String part : splits) {
                int eqIndex = part.indexOf("=");
                if (eqIndex < 0) {
                    continue;
                }
                String fieldName = part.substring(0, eqIndex);
                String value = part.substring(eqIndex + 1);
                if (match.containsKey(fieldName)) {
                    String matchedFieldName = match.get(fieldName);

                    if (matchedFieldName.equals(TIMESTAMP_COLUMN_NAME)) {
                        parseTimestamp(value, values);
                    } else {
                        values.put(matchedFieldName, value);
                    }
                } else if (!tskvMatchReplacedFileds.contains(fieldName) || !values.containsKey(fieldName)) {
                    values.put(fieldName, value);
                }
            }
        }

        if (parserConfig.hasPatterns()) {
            boolean wasApplied = false;
            for (Map.Entry<String, String> entry : parserConfig.getPatterns().entrySet()) {
                String valueName = entry.getKey();
                String pattern = entry.getValue();

                if (values.containsKey(valueName)) {
                    wasApplied |= applyPattern(pattern, values.get(valueName), values);
                } else if (valueName.equals(WHOLE_LINE)) {
                    wasApplied |= applyPattern(pattern, line, values);
                }
            }
            if (!wasApplied && parserConfig.isIgnoreNoMatches()) {
                return;
            }
        }

        if (parserConfig.hasConditions()) {
            for (ParserConfig.Condition condition : parserConfig.getConditions()) {
                Boolean result = checkCondition(condition.getCondition(), values);
                if (result == null) {
                    continue;
                }
                values.putAll(result ? condition.getThenValuesMap() : condition.getElseValuesMap());
            }
        }

        if (parserConfig.hasRequirement()) {
            for (Map.Entry<String, List<String>> entry : parserConfig.getRequiredMap().entrySet()) {
                String valueName = entry.getKey();
                String value = values.get(valueName);
                if (value != null && !entry.getValue().contains(value)) {
                    throw new ParserException("Column '" + valueName + "' doesn't fit requirement: " + value);
                }
            }
        }
        if (parserConfig.hasAliases()) {
            for (Map.Entry<String, String> entry : parserConfig.getAliases().entrySet()) {
                values.put(entry.getKey(), values.get(entry.getValue()));
            }
        }

        Object[] resultValues = new Object[columns.size() - 2];

        for (int i = 2; i < columns.size(); i++) {
            Column column = columns.get(i);
            String value = values.get(column.getName());
            resultValues[i - 2] = getValueOrDefault(value, column, shouldGetDefault);
        }

        if (values.containsKey(TIMESTAMP_COLUMN_NAME)) {
            int timestamp = Integer.parseInt(values.get(TIMESTAMP_COLUMN_NAME));
            context.write(timestamp, resultValues);
            return;
        }

        if (values.containsKey("date")) {
            Date date = dateFormat.parse(values.get("date"));
            context.write(date, resultValues);
            return;
        }
        if (!parserConfig.isIgnoreNoMatches()) {
            throw new ParserException("Didn't find 'date' or 'timestamp' column: " + values.keySet());
        }
    }

    private Object getValueOrDefault(String value, Column column, Predicate<String> shouldGetDefault)
        throws ParserException {

        if (shouldGetDefault.test(value)) {
            if (column.getDefaultObject() == null) {
                throw new ParserException("Can't find value for column '" + column.getName() + "'");
            }
            return column.getDefaultObject();
        }
        return column.getType().parseValue(value, dateFormats.getOrDefault(column.getName(), dateFormat));
    }

    private void parseTimestamp(String value, Map<String, String> values) throws ParserException {
        TimestampFormat timestampFormat = parserConfig.getTimestampFormat();
        switch (timestampFormat) {
            case UNIXTIME:
                values.put(TIMESTAMP_COLUMN_NAME, value);
                break;
            case UNIXTIME_DOT_MICROS:
                int dotIndex = value.indexOf(".");

                if (dotIndex < 0) {
                    throw new ParserException(
                        "Unable to find dot in timestamp value " +
                            value + ", expected unixtime with decimal microseconds"
                    );
                }

                String integerPart = value.substring(0, dotIndex);
                String fractionalPart = value.substring(dotIndex + 1);

                values.put(TIMESTAMP_COLUMN_NAME, integerPart);
                values.put(
                    TIMESTAMP_MICROS_COLUMN_NAME,
                    integerPart + Strings
                        .padEnd(fractionalPart, UNIXTIME_DOT_MICROS_FRACTIONAL_PART_LENGTH, '0')
                        .substring(0, UNIXTIME_DOT_MICROS_FRACTIONAL_PART_LENGTH)
                );

                break;
            default:
                throw new ParserException("Unexpected timestamp format: " + timestampFormat);
        }
    }

    @Override
    public TableDescription getTableDescription() {
        return parserConfig.getTableDescription();
    }

    private Boolean checkCondition(String condition, Map<String, String> values) {
        Matcher matcher = CONDITION_PATTERN.matcher(condition);
        if (!matcher.matches() || matcher.groupCount() < VALUE_GROUP) {
            return null;
        }
        String value = values.get(matcher.group(VALUE_NAME_GROUP));
        if (value == null) {
            return null;
        }

        switch (matcher.group(SYMBOL_GROUP)) {
            case "!=":
                return !value.equals(matcher.group(VALUE_GROUP));
            case "=":
                return value.equals(matcher.group(VALUE_GROUP));
            default:
                return null;
        }
    }

    private boolean applyPattern(String pattern, String line, Map<String, String> values) {
        Matcher valueNameMatcher = VALUE_NAME_PATTERN.matcher(pattern);
        List<String> valueNames = new ArrayList<>();
        while (valueNameMatcher.find()) {
            pattern = pattern.replace(valueNameMatcher.group(), "(.+?)");
            valueNames.add(valueNameMatcher.group(1));
        }

        Pattern valuesPattern = Pattern.compile(pattern);
        Matcher valuesMatcher = valuesPattern.matcher(line);
        if (!valuesMatcher.matches() || valuesMatcher.groupCount() < valueNames.size()) {
            return false;
        }
        for (int i = 0; i < valueNames.size(); i++) {
            values.put(valueNames.get(i), valuesMatcher.group(i + 1));
        }
        return true;
    }

}
