package ru.yandex.market.logshatter.parser.direct;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;

import com.google.common.base.CharMatcher;
import org.apache.commons.io.IOUtils;

import ru.yandex.market.clickhouse.ddl.Column;
import ru.yandex.market.clickhouse.ddl.ColumnType;
import ru.yandex.market.clickhouse.ddl.engine.MergeTree;
import ru.yandex.market.logshatter.parser.LogParser;
import ru.yandex.market.logshatter.parser.ParserContext;
import ru.yandex.market.logshatter.parser.TableDescription;

import static ru.yandex.market.logshatter.parser.direct.DirectParserUtils.nvl;


public class JavaThreaddumpLogParser implements LogParser {
    public static final String DATETIME_FORMAT = "yyyy-MM-dd:HH:mm:ss";
    private final SimpleDateFormat dateFormat = new SimpleDateFormat(DATETIME_FORMAT);
    private final static Pattern THREAD_SPLIT_RE = Pattern.compile("\n+(?=\")");
    private final static Pattern LINE_SPLIT_RE = Pattern.compile("\\s");

    private final static CharMatcher WHITESPACE_MATCHER = CharMatcher.whitespace();

    private final static Pattern THREAD_DATA_HEADER_RE = Pattern.compile("^\"(?<name>[^\"]+)\"" +
        "(?:\\s+#(?<id>\\d+))?" +
        "(?:\\s+(?<daemon>daemon))?" +
        "(?:\\s+\\S+=\\S*)*" + // prio=8 os_prio=0 cpu=26.41ms elapsed=517100.70s
        "\\s+(?<status>[^\n\\[]+?)" +
        "(?:\\s+\\[\\S+\\])?" + // [0x00007fd3a8ff3000]
        "$");

    private final static Pattern THREAD_DATA_STATE_RE = Pattern.compile(
        "^\\s+java.lang.Thread.State: (?<state>[^\n]+?)$"
    );

    @Override
    public TableDescription getTableDescription() {
        Column DATE_COLUMN = new Column("log_date", ColumnType.Date);
        Column TIMESTAMP_COLUMN = new Column("log_time", ColumnType.DateTime);

        return new TableDescription(
            new ArrayList<>(Arrays.asList(
                DATE_COLUMN,
                TIMESTAMP_COLUMN,

                new Column("host", ColumnType.String),
                new Column("pid", ColumnType.Int64),
                new Column("service", ColumnType.String),

                new Column("header", ColumnType.String),
                new Column("footer", ColumnType.String),

                new Column("thread.id", ColumnType.ArrayInt64),
                new Column("thread.name", ColumnType.ArrayString),
                new Column("thread.is_daemon", ColumnType.ArrayUInt8),
                new Column("thread.status", ColumnType.ArrayString),
                new Column("thread.state", ColumnType.ArrayString),
                new Column("thread.header", ColumnType.ArrayString),
                new Column("thread.stacktrace", ColumnType.ArrayString)
            )),
            new MergeTree(
                String.format("toYYYYMM(%s)", DATE_COLUMN.getName()),
                new ArrayList<>(Arrays.asList(DATE_COLUMN.getName(), TIMESTAMP_COLUMN.getName())),
                null,
                1024
            ));
    }

    @Override
    public void parse(String line, ParserContext context) throws Exception {
        String[] split = LINE_SPLIT_RE.split(line);
        if (split.length != 5) {
            throw new IllegalStateException("Incorrect fields num");
        }

        Date date = dateFormat.parse(split[0]);
        String host = split[1];
        long pid = Long.parseLong(split[2]);
        String service = split[3];
        String base64Data = split[4];

        String data = new String(
            gzipDecompress(Base64.getDecoder().decode(base64Data)),
            StandardCharsets.UTF_8);

        int footerStart = data.indexOf("\nJNI");
        String footer = "";
        if (footerStart != -1 && footerStart < data.length() - 1) {
            footer = data.substring(footerStart + 1);
            data = data.substring(0, footerStart);
        }

        int headerEnd = data.indexOf("\n\"");
        String globalHeader = "";
        if (headerEnd != -1) {
            globalHeader = data.substring(0, headerEnd);
            data = data.substring(headerEnd + 1);
        }

        List<Long> threadIds = new ArrayList<>();
        List<String> threadNames = new ArrayList<>();
        List<Integer> threadIsDaemon = new ArrayList<>();
        List<String> threadStatuses = new ArrayList<>();
        List<String> threadStates = new ArrayList<>();
        List<String> threadHeaders = new ArrayList<>();
        List<String> threadStacktraces = new ArrayList<>();

        for (String threadData : THREAD_SPLIT_RE.split(data)) {
            String[] lines = WHITESPACE_MATCHER.trimFrom(threadData).split("\n");
            if (lines.length == 0) {
                continue;
            }

            long id = 0L;
            String name = "_parse_error";
            int isDaemon = 0;
            String status = "";
            String state = "";
            String header = lines[0];
            String stacktrace = "";

            Matcher headerMatcher = THREAD_DATA_HEADER_RE.matcher(header);
            if (headerMatcher.find()) {
                id = Long.parseLong(nvl(headerMatcher.group("id"), "0"));
                name = headerMatcher.group("name");
                isDaemon = "daemon".equals(headerMatcher.group("daemon")) ? 1 : 0;
                status = headerMatcher.group("status");
            }

            if (lines.length > 1) {
                Matcher stateMatcher = THREAD_DATA_STATE_RE.matcher(lines[1]);
                if (stateMatcher.find()) {
                    state = stateMatcher.group("state");
                } else {
                    state = "_parse_error";
                }
            }

            if (lines.length > 2) {
                stacktrace = Arrays.stream(lines).skip(2).collect(Collectors.joining("\n"));
            }

            threadIds.add(id);
            threadNames.add(name);
            threadIsDaemon.add(isDaemon);
            threadStatuses.add(status);
            threadStates.add(state);
            threadHeaders.add(header);
            threadStacktraces.add(stacktrace);
        }


        context.write(
            date,

            host,
            pid,
            service,

            globalHeader,
            footer,

            threadIds,
            threadNames,
            threadIsDaemon,
            threadStatuses,
            threadStates,
            threadHeaders,
            threadStacktraces
        );
    }

    public static byte[] gzipDecompress(byte[] compressed) {
        try (
            ByteArrayInputStream bis = new ByteArrayInputStream(compressed);
            GZIPInputStream gis = new GZIPInputStream(bis);
        ) {
            return IOUtils.toByteArray(gis);
        } catch (IOException e) {
            throw new IllegalArgumentException("Can't decompress data", e);
        }
    }
}
