package ru.yandex.market.logshatter.parser.l7balancer;

import com.google.common.base.Strings;
import ru.yandex.market.clickhouse.ddl.Column;
import ru.yandex.market.clickhouse.ddl.ColumnType;
import ru.yandex.market.logshatter.parser.TableDescription;
import ru.yandex.market.logshatter.parser.LogParser;
import ru.yandex.market.logshatter.parser.ParserContext;

import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @author Ilya Sapachev <a href="mailto:sid-hugo@yandex-team.ru"></a>
 * @date 05.12.16
 */
public class L7BalancerLogParser implements LogParser {
    private static final TableDescription TABLE_DESCRIPTION = TableDescription.createDefault(
        new Column("host", ColumnType.String),
        new Column("source_host", ColumnType.String),
        new Column("source_port", ColumnType.Int32),
        new Column("http_method", ColumnType.String),
        new Column("url", ColumnType.String),
        new Column("protocol", ColumnType.String),
        new Column("duration_ms", ColumnType.Int32),
        new Column("referer", ColumnType.String),
        new Column("request_host", ColumnType.String),
        new Column("request_port", ColumnType.Int32),
        new Column("regexp", ColumnType.String),
        new Column("backend_host", ColumnType.String),
        new Column("backend_port", ColumnType.Int32),
        new Column("response_code", ColumnType.String)
    );
    private DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd\'T\'HH:mm:ss.SSSSSSZ");

    private static final Pattern PATTERN = Pattern.compile(
        "\\[regexp (\\S+)*(?:.* \\[(?:proxy (\\S+):(\\d*).* )*(\\S[^\\]]+)\\])?\\]"
    );

    @Override
    public TableDescription getTableDescription() {
        return TABLE_DESCRIPTION;
    }

    @Override
    public void parse(String line, ParserContext context) throws Exception {
        String[] splits = line.split("\t");
        int i = 0;
        String[] tempSplits = splits[i++].split(":");
        String sourceHost = tempSplits[0];
        Integer sourcePort = -1;
        if (tempSplits.length > 1) {
            sourcePort = Integer.valueOf(tempSplits[1]);
        }
        Date handleDate = dateFormat.parse(splits[i++]);

        tempSplits = splits[i++].replaceAll("\"", "").split(" ");
        String httpMethod = tempSplits[0];
        String url = tempSplits[1];
        String protocol = tempSplits[2];

        Integer durationMs = Math.round(
            Float.valueOf(splits[i].substring(0, splits[i++].length() - 1)) * TimeUnit.SECONDS.toMillis(1)
        );

        String referer = splits[i++].replaceAll("\"", "");


        tempSplits = splits[i++].replaceAll("\"", "").split(":");
        String requestHost = tempSplits[0];
        Integer requestPort = -1;
        if (tempSplits.length > 1) {
            requestPort = Integer.valueOf(tempSplits[1]);
        }

        String regexp = "";
        String backendHost = "";
        Integer backendPort = -1;
        String responseCode = "";

        if (i < splits.length) {
            Matcher matcher = PATTERN.matcher(splits[i]);
            if (matcher.find()) {
                regexp = Strings.nullToEmpty(matcher.group(1));
                backendHost = Strings.nullToEmpty(matcher.group(2));
                String port = matcher.group(3);
                if (port != null) {
                    backendPort = Integer.valueOf(port);
                }
                responseCode = Strings.nullToEmpty(matcher.group(4));
            }
        }
        context.write(
            handleDate, context.getHost(), sourceHost, sourcePort, httpMethod,
            url, protocol, durationMs, referer, requestHost, requestPort,
            regexp, backendHost, backendPort, responseCode
        );
    }
}
