package ru.yandex.market.logshatter.parser.marketout;

import ru.yandex.market.clickhouse.ddl.Column;
import ru.yandex.market.clickhouse.ddl.ColumnType;
import ru.yandex.market.logshatter.parser.LogParser;
import ru.yandex.market.logshatter.parser.ParserContext;
import ru.yandex.market.logshatter.parser.ParserException;
import ru.yandex.market.logshatter.parser.TableDescription;

import java.util.regex.Pattern;

/**
 * @author Oleg Makovski <a href="mailto:omakovski@yandex-team.ru"></a>
 * @date 04/05/17
 */
public class ReportNginxLogParser implements LogParser {

    private static final TableDescription TABLE_DESCRIPTION = TableDescription.createDefault(
        new Column("host", ColumnType.String),
        new Column("upstream_resp_time_ms", ColumnType.Int32),
        new Column("req_time_ms", ColumnType.Int32),
        new Column("upstream_header_time_ms", ColumnType.Int32),
        new Column("place", ColumnType.String),
        new Column("status", ColumnType.UInt16),
        new Column("request_length", ColumnType.UInt32),
        new Column("bytes_sent", ColumnType.UInt32),
        new Column("env", ColumnType.String),
        new Column("cluster", ColumnType.UInt8),
        new Column("bot", ColumnType.String),
        new Column("client", ColumnType.String)
    );

    @Override
    public void parse(String line, ParserContext context) throws Exception {
        ReportNginxTskvLogEntry logEntry = new ReportNginxTskvLogEntry(line, context.getPageMatcher());
        //filter GET requests, allow all POSTs
        if (logEntry.getHttpMethod().equals("GET") && !logEntry.getUrl().startsWith("/yandsearch"))
            return;
        final String place = logEntry.getHttpMethod().equals("POST") ? "parallel" : logEntry.getPlace();
        if (place.equals("consistency_check") || place.equals("report_status")) {
            return;
        }
        if (logEntry.getUserAgent().startsWith("pinger-report")) {
            return;
        }
        context.write(
            logEntry.getDateTime(), context.getHost(),
            logEntry.getRespTimeMillis(), logEntry.getClientReqTimeMillis(), logEntry.getHeaderTimeMillis(),
            place, logEntry.getHttpCode(), logEntry.getRequestLength(), logEntry.getBytesSent(),
            logEntry.getEnv(), logEntry.getCluster(), BotDetector.detect(logEntry.getUserAgent()).name(),
            getClientType(logEntry)
        );
    }

    @Override
    public TableDescription getTableDescription() {
        return TABLE_DESCRIPTION;
    }

    private String getClientType(ReportNginxTskvLogEntry logEntry) throws ParserException {

        if (logEntry.getTouch().longValue() != 0) {
            return "TOUCH";
        }

        if (marketDesktopDomainRegex.matcher(logEntry.getBase()).matches()) {
            return "DESKTOP";
        }

        final String client = logEntry.getClient();
        if (!client.isEmpty()) {
            return client;
        }

        return "OTHER";
    }

    private Pattern marketDesktopDomainRegex = Pattern.compile("market.yandex.[^.]+");
}
