package ru.yandex.market.logshatter.parser.direct;

import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;

import ru.yandex.market.clickhouse.ddl.Column;
import ru.yandex.market.clickhouse.ddl.ColumnType;
import ru.yandex.market.clickhouse.ddl.engine.EngineType;
import ru.yandex.market.clickhouse.ddl.engine.MergeTree;
import ru.yandex.market.logshatter.parser.LogParser;
import ru.yandex.market.logshatter.parser.ParserContext;
import ru.yandex.market.logshatter.parser.TableDescription;

public class MessagesLogParser implements LogParser {
    public static final String DATE_TIME_PATTERN = "yyyy-MM-dd:HH:mm:ss";
    public final SimpleDateFormat dateTimeFormat = new SimpleDateFormat(DATE_TIME_PATTERN);


    public static final Column DATE_COLUMN = new Column("log_date", ColumnType.Date);
    public static final Column TIMESTAMP_COLUMN = new Column("log_time", ColumnType.DateTime);

    private static final String sampleName = "intHash64(span_id)";

    private static final Column SERVICE = new Column("service", ColumnType.String);
    private static final Column METHOD = new Column("method", ColumnType.String);

    private static final int MAX_STRING_SIZE = 1048576; //2Mb при расчете на 2 байтовые символы
    private static final String MSG_SPLIT = "<Строка обрезана. Изначальная длина: %d. Полная версия доступна в YT> ";

    public static List<String> additionalPrimaryKeys = Arrays.asList(sampleName);
    private static final List<String> primaryKeys = getPrimary();
    private static final String partName = DATE_COLUMN.getName();

    private static final MergeTree
        DEFAULT_ENGINE = new MergeTree("toYYYYMM(" + partName + ")", primaryKeys, sampleName, 8192);

    private static final Gson GSON = new Gson();

    private static List<Column> columnsStruct = new ArrayList<>(Arrays.asList(
        new Column("log_time_nanos", ColumnType.UInt32),
        new Column("host", ColumnType.String),
        new Column("trace_id", ColumnType.UInt64),
        new Column("parent_id", ColumnType.UInt64),
        new Column("span_id", ColumnType.UInt64),
        new Column("prefix", ColumnType.String),
        new Column("log_level", ColumnType.String),
        new Column("class_name", ColumnType.String),
        new Column("message", ColumnType.String, null, null, "ZSTD(5)")
    ));

    private static List<String> getPrimary() {
        List<String> aggrKeys = new ArrayList<>();
        aggrKeys.add(DATE_COLUMN.getName());
        aggrKeys.add(SERVICE.getName());
        aggrKeys.add(METHOD.getName());
        aggrKeys.add(TIMESTAMP_COLUMN.getName());
        aggrKeys.addAll(additionalPrimaryKeys);
        return aggrKeys;
    }

    public static TableDescription create(EngineType engineType, List<Column> columns) {
        List<Column> allColumns = new ArrayList<>(columns.size() + 4);
        allColumns.add(DATE_COLUMN);
        allColumns.add(TIMESTAMP_COLUMN);
        allColumns.add(SERVICE);
        allColumns.add(METHOD);
        allColumns.addAll(columns);
        return new TableDescription(allColumns, engineType);
    }

    private static final TableDescription TABLE_DESCRIPTION = create(DEFAULT_ENGINE, columnsStruct);

    @Override
    public TableDescription getTableDescription() {
        return TABLE_DESCRIPTION;
    }

    private static final String DATE =
        "((?<date>\\d\\d\\d\\d-\\d\\d-\\d\\d):(?<time>\\d\\d:\\d\\d:\\d\\d)(\\.(?<nanos>\\d+))?)";
    private static final String HOST = "(?<host>[^,]+)";
    private static final String SERVICE_REGEX = "(?<service>[^\\/]*)";
    private static final String METHOD_REGEX = "(?<method>[^,]*)";
    private static final String TRACE_ID = "(?<traceId>\\d+)";
    private static final String PARENT_ID = "(?<parentId>\\d+)";
    private static final String SPAN_ID = "(?<spanId>\\d+)";
    private static final String BULK_FLAG = "(?<bulkFlag>#bulk)";
    private static final String MESSAGES = "(?<messages>\\[.*\\])";
    private static final String PREFIX_PERL = "\\[(?<prefixPerl>[^\\]]*)\\]";
    private static final String PREFIX_JAVA = "\\[(?<prefixJava>[^\\]]*)\\]";
    private static final String LOG_LEVEL = "(?<logLevel>[_a-zA-Z0-9.\\-]*)";
    private static final String CLASS_NAME = "(?<className>[_a-zA-Z0-9.\\-$]*)";
    private static final String MESSAGE_PERL = "(?<messagePerl>.*)";
    private static final String MESSAGE_JAVA = "(?<messageJava>.*)";
    private static final String PARSE_REGEX = "^" + DATE + "\\s" + HOST + "," + SERVICE_REGEX + "/" + METHOD_REGEX + "," +
        TRACE_ID + ":" + PARENT_ID + ":" + SPAN_ID +
        "(" + BULK_FLAG + "\\s" + MESSAGES +
        "|(\\s" + PREFIX_JAVA + "\\s" + LOG_LEVEL + "\\s+" + CLASS_NAME + "\\s-\\s" + MESSAGE_JAVA + ")" +
        "|((\\s" + PREFIX_PERL + ")?\\s" + MESSAGE_PERL + "))" + "$";

    private static final Pattern PATTERN = Pattern.compile(PARSE_REGEX);

    @Override
    public void parse(String line, ParserContext context) throws Exception {
        Matcher matcher = PATTERN.matcher(line);

        if (matcher.find()) {
            String logDate = matcher.group("date");
            String logTime = matcher.group("time");

            int logTimeNanos = DirectParserUtils.parseNanos(matcher.group("nanos"));

            String host = matcher.group("host");
            String service = matcher.group("service");
            String method = matcher.group("method");
            long traceId = Long.parseLong(matcher.group("traceId"));
            long parentId = Long.parseLong(matcher.group("parentId"));
            long spanId = Long.parseLong(matcher.group("spanId"));

            String prefix;
            if (matcher.group("prefixPerl") != null) {
                prefix = matcher.group("prefixPerl");
            } else {
                prefix = matcher.group("prefixJava") != null ? matcher.group("prefixJava") : "";
            }

            String logLevel = matcher.group("logLevel") != null ? matcher.group("logLevel") : "";
            String className = matcher.group("className") != null ? matcher.group("className") : "";

            if (matcher.group("bulkFlag") == null) {
                String message = matcher.group("messagePerl") != null ?
                    matcher.group("messagePerl") : matcher.group("messageJava");

                String subStr = message.length() > MAX_STRING_SIZE ?
                    String.format(MSG_SPLIT, message.length()) + message.substring(0, MAX_STRING_SIZE-150) : message;

                context.write(
                    dateTimeFormat.parse(logDate + ":" + logTime),
                    service,
                    method,
                    logTimeNanos,
                    host,
                    traceId,
                    parentId,
                    spanId,
                    prefix,
                    logLevel,
                    className,
                    subStr
                );
            } else {
                JsonArray jsonArray = GSON.fromJson(matcher.group("messages"), JsonArray.class);
                for (int i = 0; i < jsonArray.size(); i++) {
                    JsonElement msg = jsonArray.get(i);

                    String message = msg.isJsonPrimitive() ?
                        msg.getAsString() : msg.getAsJsonObject().toString();

                    String subStr = message.length() > MAX_STRING_SIZE ?
                        String.format(MSG_SPLIT, message.length()) + message.substring(0, MAX_STRING_SIZE-150) : message;

                    context.write(
                        dateTimeFormat.parse(logDate + ":" + logTime),
                        service,
                        method,
                        logTimeNanos,
                        host,
                        traceId,
                        parentId,
                        spanId,
                        prefix,
                        logLevel,
                        className,
                        subStr
                    );
                }
            }
        } else {
            throw new IllegalArgumentException("Can't parse line: " + line);
        }
    }
}
