package ru.yandex.market.logshatter.parser.direct;

import ru.yandex.market.clickhouse.ddl.Column;
import ru.yandex.market.clickhouse.ddl.ColumnType;
import ru.yandex.market.clickhouse.ddl.engine.EngineType;
import ru.yandex.market.clickhouse.ddl.engine.MergeTree;
import ru.yandex.market.logshatter.parser.LogParser;
import ru.yandex.market.logshatter.parser.ParserContext;
import ru.yandex.market.logshatter.parser.TableDescription;

import java.text.SimpleDateFormat;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.google.common.base.Splitter;


public class BalanceLogParser implements LogParser {

    public static final String DATE_PATTERN = "yyyy-MM-dd'T'HH:mm:ss";
    public final SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_PATTERN);

    public static final Column DATE_COLUMN = new Column("log_date", ColumnType.Date);
    public static final Column TIMESTAMP_COLUMN = new Column("log_time", ColumnType.DateTime);

    private static final String semplName = "intHash64(reqid)";
    public static List<String> additionalPrimaryKeys = Arrays.asList(semplName);
    private static final List<String> primaryKeys = getPrimary();
    private static final String partName = DATE_COLUMN.getName();

    private static final MergeTree DEFAULT_ENGINE = new MergeTree("toYYYYMM(" + partName + ")", primaryKeys, semplName, 8192);

    private static List<Column> columnsStruct = new ArrayList<Column>(Arrays.asList(
        new Column("method", ColumnType.String),
        new Column("param", ColumnType.String),
        new Column("reqid", ColumnType.Int64),
        new Column("host", ColumnType.String),
        new Column("proc_id", ColumnType.Int64),
        new Column("data_type", ColumnType.String),
        new Column("source", ColumnType.String)
    ));

    public static TableDescription create(EngineType engineType, List<Column> columns) {
        List<Column> allColumns = new ArrayList<>(columns.size() + 2);
        allColumns.add(DATE_COLUMN);
        allColumns.add(TIMESTAMP_COLUMN);
        allColumns.addAll(columns);
        return new TableDescription(allColumns, engineType);
    }

    private static final TableDescription TABLE_DESCRIPTION = create(DEFAULT_ENGINE, columnsStruct);

    private static List<String> getPrimary() {
        List<String> aggrKeys = new ArrayList<>();
        aggrKeys.add(DATE_COLUMN.getName());
        aggrKeys.add(TIMESTAMP_COLUMN.getName());
        aggrKeys.addAll(additionalPrimaryKeys);
        return aggrKeys;
    }

    private static final String DATA = "^(<134>1\\s+)?(?<date>\\d+-\\d+-\\d+T\\d+:\\d+:\\d+)[^\\]]*\\[(?<metadata>[^\\]]*)\\]\\s+(?<data>.*)$";
    private Pattern regxp = Pattern.compile(DATA);

    private static final String IGNORE = "^.*(create_new_client_balance:\\s+save\\s+ClientID|Balance.CreateClient\\s+fault|Balance.\\w+\\s+died\\s+-\\s+500|Balance.\\w+\\s+fault).*";
    private Pattern igxp = Pattern.compile(IGNORE);

    @Override
    public void parse(String line, ParserContext context) throws Exception {
        if (line.startsWith(" ") || line.startsWith("\t")) {
            return;
        }

        if (line.isEmpty()) {
            return;
        }

        Matcher r = regxp.matcher(line);
        if (!r.matches()) {
            if (igxp.matcher(line).matches()) {
                return;
            }
            throw new IllegalArgumentException(String.format("Can't parse line: '%s'", line));
        }

        String host = context.getHost();
        String source = context.getFile().toString();

        String logTime = r.group("date");
        Date date = dateFormat.parse(logTime);
        String param = r.group("data");
        String metadata = r.group("metadata");

        Map<String, String> metaMap = Splitter.on(",").withKeyValueSeparator("=").split(metadata);
        String method = metaMap.get("method");
        Long reqid = Long.parseLong(metaMap.get("reqid"), 10);
        Long procId = Long.parseLong(metaMap.get("pid"), 10);
        String dataType = metaMap.get("data_type");

        context.write(date, method, param, reqid, host, procId, dataType, source);
    }

    @Override
    public TableDescription getTableDescription() {
        return TABLE_DESCRIPTION;
    }
}
