package ru.yandex.market.logshatter.parser.front;

import ru.yandex.market.clickhouse.ddl.Column;
import ru.yandex.market.clickhouse.ddl.ColumnType;
import ru.yandex.market.logshatter.parser.TableDescription;
import ru.yandex.market.logshatter.parser.LogParser;
import ru.yandex.market.logshatter.parser.ParserContext;
import ru.yandex.market.logshatter.parser.front.logformat.LogField;
import ru.yandex.market.logshatter.parser.front.logformat.LogFormat;

import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;

/**
 * @author Insaf Latypov <a href="mailto:saferif@yandex-team.ru"></a>
 * @date 22/09/15
 */
public class XScriptErrorLogParser implements LogParser {

    private static final TableDescription TABLE_DESCRIPTION = TableDescription.createDefault(
        new Column("host", ColumnType.String),
        new Column("vhost", ColumnType.String),
        new Column("level", ColumnType.String),
        new Column("ip", ColumnType.String),
        new Column("info", ColumnType.String),
        new Column("path", ColumnType.String),
        new Column("query", ColumnType.String),
        new Column("xpath", ColumnType.String),
        new Column("uid", ColumnType.String),
        new Column("request_id", ColumnType.String)
    );

    private final DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
    private static final LogFormat LOG_FORMAT = getLogFormat();

    private static LogFormat getLogFormat() {
        LogField date = new LogField("date", ".*?");
        LogField level = new LogField("level", "\\\\w+?");
        LogField ip = new LogField("ip", "[abcdef\\\\d\\\\.\\\\:]+");
        LogField description = new LogField("description", "[\\\\s\\\\S]*", "");
        LogField host = new LogField("host", "[^\\\\/]*");
        LogField path = new LogField("path", "[^\\\\?\\\\s\\\\:]*", "/");
        LogField query = new LogField("query", "[\\\\S]*", "");
        LogField page = new LogField("page", "https?:\\\\/\\\\/<<host>><<path>>(?:\\\\?<<query>>)?");
        LogField location = new LogField("location", ".*?");
        LogField prev = new LogField("prev", ".*?");
        LogField uid = new LogField("uid", "\\\\d*", "");
        LogField requestId = new LogField("requestid", "[\\\\/\\\\w]*", "");

        return new LogFormat("^\\[<<date>>\\]\\s+warn(ing)?:\\s+bugaga\\s*:\\s*<<level>>\\s+<<ip>>\\s*:\\s*<<description>>\\s*" +
            "<<page>>\\s*:\\s*<<location>>\\s+prev:\\s*<<prev>>\\s+(?:uid:\\s*<<uid>>)?\\s*(?:request_id:\\s*<<requestid>>)?",
            date, level, ip, description, host, path, query, page, location, prev, uid, requestId);
    }

    @Override
    public TableDescription getTableDescription() {
        return TABLE_DESCRIPTION;
    }

    @Override
    public void parse(String line, ParserContext context) throws Exception {
        Map<String, String> fields = LOG_FORMAT.parse(line);
        if (fields != null) {
            Date date = DATE_FORMAT.parse(fields.get("date"));

            context.write(
                date,
                context.getHost(),
                fields.get("host"),
                fields.get("level"),
                fields.get("ip"),
                fields.get("description"),
                fields.get("path"),
                fields.get("query"),
                fields.get("location"),
                fields.get("uid"),
                fields.get("requestid")
            );
        }
    }

}
