package ru.yandex.market.logshatter.parser.direct;

import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;

import ru.yandex.market.clickhouse.ddl.Column;
import ru.yandex.market.clickhouse.ddl.ColumnType;
import ru.yandex.market.clickhouse.ddl.engine.EngineType;
import ru.yandex.market.clickhouse.ddl.engine.MergeTree;
import ru.yandex.market.logshatter.parser.LogParser;
import ru.yandex.market.logshatter.parser.ParserContext;
import ru.yandex.market.logshatter.parser.TableDescription;

public class LogApiParser implements LogParser {
    public static final String DATE_PATTERN = "yyyy-MM-dd HH:mm:ss";
    public final SimpleDateFormat dateTimeFormat = new SimpleDateFormat(DATE_PATTERN);

    public static final Column DATE_COLUMN = new Column("log_date", ColumnType.Date);
    public static final Column TIMESTAMP_COLUMN = new Column("log_time", ColumnType.DateTime);

    private static final String semplName = "intHash64(reqid)";

    public static List<String> additionalPrimaryKeys = Arrays.asList(semplName);
    private static final List<String> primaryKeys = getPrimary();
    private static final String partName = DATE_COLUMN.getName();

    private static final MergeTree DEFAULT_ENGINE =
        new MergeTree("toYYYYMM(" + partName + ")", primaryKeys, semplName, 8192);

    private static final String CID_COLUMN_NAME = "cid";
    private static final String BID_COLUMN_NAME = "bid";
    private static final String IP_COLUMN_NAME = "ip";
    private static final String CMD_COLUMN_NAME = "cmd";
    private static final String RUNTIME_COLUMN_NAME = "runtime";
    private static final String PARAM_COLUMN_NAME = "param";
    private static final String HTTP_STATUS_COLUMN_NAME = "http_status";
    private static final String CLUID_COLUMN_NAME = "cluid";
    private static final String REQID_COLUMN_NAME = "reqid";
    private static final String UID_COLUMN_NAME = "uid";
    private static final String HOST_COLUMN_NAME = "host";
    private static final String PROC_ID_COLUMN_NAME = "proc_id";
    private static final String FULLTIME_COLUMN_NAME = "fulltime";
    private static final String ERROR_DETAIL_COLUMN_NAME = "error_detail";
    private static final String UNITS_COLUMN_NAME = "units";
    private static final String UNITS_STATS_COLUMN_NAME = "units_stats";
    private static final String API_VERSION_COLUMN_NAME = "api_version";
    private static final String INTERFACE_COLUMN_NAME = "interface";
    private static final String APPLICATION_ID_COLUMN_NAME = "application_id";
    private static final String RESPONSE_COLUMN_NAME = "response";
    private static final String RESPONSE_IDS_COLUMN_NAME = "response_ids";
    private static final String SOURCE_COLUMN_NAME = "source";
    private static final String UNITS_SPENDING_USER_CLIENT_ID_COLUMN_NAME = "units_spending_user_client_id";
    private static final String ERROR_OBJECT_COUNT = "error_object_count";
    private static final String WARNING_OBJECT_COUNT = "warning_object_count";

    private static final String CAMPAIGN_ID = "CampaignID";

    private static List<Column> columnsStruct = new ArrayList<>(Arrays.asList(
        new Column(CID_COLUMN_NAME, ColumnType.ArrayInt64),
        new Column(BID_COLUMN_NAME, ColumnType.ArrayInt64),
        new Column(IP_COLUMN_NAME, ColumnType.String),
        new Column(CMD_COLUMN_NAME, ColumnType.String),
        new Column(RUNTIME_COLUMN_NAME, ColumnType.Float32),
        new Column(PARAM_COLUMN_NAME, ColumnType.String, null, null, "ZSTD(5)"),
        new Column(HTTP_STATUS_COLUMN_NAME, ColumnType.Int64),
        new Column(CLUID_COLUMN_NAME, ColumnType.ArrayInt64),
        new Column(REQID_COLUMN_NAME, ColumnType.Int64),
        new Column(UID_COLUMN_NAME, ColumnType.Int64),
        new Column(HOST_COLUMN_NAME, ColumnType.String),
        new Column(PROC_ID_COLUMN_NAME, ColumnType.Int64),
        new Column(FULLTIME_COLUMN_NAME, ColumnType.Float32),
        new Column(ERROR_DETAIL_COLUMN_NAME, ColumnType.String),
        new Column(UNITS_COLUMN_NAME, ColumnType.Int64),
        new Column(UNITS_STATS_COLUMN_NAME, ColumnType.String),
        new Column(API_VERSION_COLUMN_NAME, ColumnType.UInt8),
        new Column(INTERFACE_COLUMN_NAME, ColumnType.String),
        new Column(APPLICATION_ID_COLUMN_NAME, ColumnType.String),
        new Column(RESPONSE_COLUMN_NAME, ColumnType.String, null, null, "ZSTD(5)"),
        new Column(RESPONSE_IDS_COLUMN_NAME, ColumnType.ArrayInt64),
        new Column(SOURCE_COLUMN_NAME, ColumnType.String),
        new Column(UNITS_SPENDING_USER_CLIENT_ID_COLUMN_NAME, ColumnType.Int64),
        new Column(ERROR_OBJECT_COUNT, ColumnType.Int64),
        new Column(WARNING_OBJECT_COUNT, ColumnType.Int64)
    ));

    public static TableDescription create(EngineType engineType, List<Column> columns) {
        List<Column> allColumns = new ArrayList<>(columns.size() + 2);
        allColumns.add(DATE_COLUMN);
        allColumns.add(TIMESTAMP_COLUMN);
        allColumns.addAll(columns);
        return new TableDescription(allColumns, engineType);
    }

    private static List<String> getPrimary() {
        List<String> aggrKeys = new ArrayList<>();
        aggrKeys.add(DATE_COLUMN.getName());
        aggrKeys.add(TIMESTAMP_COLUMN.getName());
        aggrKeys.addAll(additionalPrimaryKeys);
        return aggrKeys;
    }

    private static final TableDescription TABLE_DESCRIPTION = create(DEFAULT_ENGINE, columnsStruct);

    public TableDescription getTableDescription() {
        return TABLE_DESCRIPTION;
    }

    private static final Gson GSON = new Gson();

    private static final String SYSLOG_PREFIX = "(<\\d+>\\d+\\s+\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\+\\d{2}:\\d{2}\\s+\\S+\\s+\\S+\\s+\\d+\\s+-\\s+-\\s+)?";
    private static final String DATE_TIME = "(?<logDate>\\d{4}-\\d{2}-\\d{2})[ T:](?<logTime>\\d{2}:\\d{2}:\\d{2})";
    private static final String DATA = "(?<data>\\{.*})";

    private static final String REGEX = "^" + SYSLOG_PREFIX + DATE_TIME + "\\s+" + DATA + "\\s*$";
    private static final Pattern PATTERN = Pattern.compile(REGEX, Pattern.DOTALL);

    @Override
    public void parse(String line, ParserContext context) throws Exception {
        Matcher matcher = PATTERN.matcher(line);

        if (matcher.matches()) {
            String logDate = matcher.group("logDate");
            String logTime = matcher.group("logTime");

            JsonObject data = GSON.fromJson(matcher.group("data"), JsonObject.class);
            List<Long> bid = new ArrayList<>();
            if (data.has(BID_COLUMN_NAME)) {
                JsonElement jsonBid = data.get(BID_COLUMN_NAME);
                if (jsonBid.isJsonArray()) {
                   jsonBid.getAsJsonArray().forEach(jsonElement -> bid.add(jsonElement.getAsLong()));
                } else if (jsonBid.isJsonPrimitive()) {
                    bid.add(jsonBid.getAsLong());
                } else {
                    throw new IllegalStateException("Unsupported type of bid in line: " + line);
                }
            }

            String ip = !data.get(IP_COLUMN_NAME).isJsonNull() ? data.getAsJsonPrimitive(IP_COLUMN_NAME).getAsString() : "";
            String cmd = !data.get(CMD_COLUMN_NAME).isJsonNull() ? data.getAsJsonPrimitive(CMD_COLUMN_NAME).getAsString() : "";

            JsonArray cidArray = !data.get(CID_COLUMN_NAME).isJsonNull() ? data.getAsJsonArray(CID_COLUMN_NAME) : new JsonArray();
            List<Long> cid = new ArrayList<>();
            cidArray.forEach(element -> cid.add(element.getAsLong()));

            String param;
            JsonElement jsonParam = data.get(PARAM_COLUMN_NAME);
            if (!jsonParam.isJsonNull()) {
                param = jsonParam.toString();
                cid.addAll(addCidsFromParam(data, cmd));
            } else {
                param = "";
            }

            float runtime = data.get(RUNTIME_COLUMN_NAME).getAsFloat();

            long httpStatus = data.get(HTTP_STATUS_COLUMN_NAME).getAsLong();

            JsonElement jsonCluid = data.get(CLUID_COLUMN_NAME);
            List<Long> cluid = !jsonCluid.isJsonNull() && !jsonCluid.getAsString().equals("") ?
                Arrays.stream(jsonCluid.getAsString().split(","))
                    .map(String::trim)
                    .map(Long::parseLong)
                    .collect(Collectors.toList())
                : new ArrayList<>();

            long reqid = data.get(REQID_COLUMN_NAME).getAsLong();

            long uid = data.get(UID_COLUMN_NAME).getAsLong();

            String host = data.get(HOST_COLUMN_NAME).getAsString();

            long procId = data.get(PROC_ID_COLUMN_NAME).getAsLong();

            float fulltime = data.has(FULLTIME_COLUMN_NAME) ? data.get(FULLTIME_COLUMN_NAME).getAsFloat() : 0f;

            String errorDetail = !data.get(ERROR_DETAIL_COLUMN_NAME).isJsonNull() ?
                data.get(ERROR_DETAIL_COLUMN_NAME).getAsString() : "";

            long units = !data.get(UNITS_COLUMN_NAME).isJsonNull() ? data.get(UNITS_COLUMN_NAME).getAsLong() : 0L;

            String unitsStats;
            JsonElement jsonUnitsStats = data.get(UNITS_STATS_COLUMN_NAME);
            if (jsonUnitsStats.isJsonPrimitive()) {
                unitsStats = jsonUnitsStats.getAsString();
            } else if (jsonUnitsStats.isJsonArray()) {
                unitsStats = jsonUnitsStats.getAsJsonArray().toString();
            } else if (jsonUnitsStats.isJsonNull()) {
                unitsStats = "";
            } else {
                throw new IllegalStateException("Unsupported type of units_stats in line: " + line);
            }

            short apiVersion = !data.get(API_VERSION_COLUMN_NAME).isJsonNull() ?
                data.get(API_VERSION_COLUMN_NAME).getAsShort() : 0;

            String interfaceColumn = !data.get(INTERFACE_COLUMN_NAME).isJsonNull() ?
                data.get(INTERFACE_COLUMN_NAME).getAsString() : "";

            String applicationId = !data.get(APPLICATION_ID_COLUMN_NAME).isJsonNull() ?
                data.get(APPLICATION_ID_COLUMN_NAME).getAsString() : "";

            String response;
            JsonElement jsonResponse = data.get(RESPONSE_COLUMN_NAME);
            if (!data.has(RESPONSE_COLUMN_NAME) || jsonResponse.isJsonNull()) {
                response = "";
            } else if (jsonResponse.isJsonObject()) {
                response = jsonResponse.getAsJsonObject().toString();
            } else if (jsonResponse.isJsonPrimitive()) {
                response = GSON.fromJson(jsonResponse.getAsString(), JsonObject.class).toString();
            } else {
                throw new IllegalStateException("Unsupported type of response in line: " + line);
            }

            List<Long> responseIds = new ArrayList<>();
            if (data.has(RESPONSE_IDS_COLUMN_NAME) && !data.get(RESPONSE_IDS_COLUMN_NAME).isJsonNull()) {
                data.getAsJsonArray(RESPONSE_IDS_COLUMN_NAME)
                    .forEach(jsonElement -> responseIds.add(!jsonElement.isJsonNull() ? jsonElement.getAsLong() : 0L));
            }

            String source = "file:" + context.getFile().toString();

            long unitsSpendingUserClientId = data.has(UNITS_SPENDING_USER_CLIENT_ID_COLUMN_NAME) ?
                data.get(UNITS_SPENDING_USER_CLIENT_ID_COLUMN_NAME).getAsLong() : 0L;

            long errorObjectCount = data.has(ERROR_OBJECT_COUNT) ?
                data.get(ERROR_OBJECT_COUNT).getAsLong() : 0L;

            long warningObjectCount = data.has(WARNING_OBJECT_COUNT)?
                data.get(WARNING_OBJECT_COUNT).getAsLong() : 0L;

            context.write(dateTimeFormat.parse(logDate + " " + logTime),
                cid,
                bid,
                ip,
                cmd,
                runtime,
                param,
                httpStatus,
                cluid,
                reqid,
                uid,
                host,
                procId,
                fulltime,
                errorDetail,
                units,
                unitsStats,
                apiVersion,
                interfaceColumn,
                applicationId,
                response,
                responseIds,
                source,
                unitsSpendingUserClientId,
                errorObjectCount,
                warningObjectCount
            );
        } else {
            throw new IllegalArgumentException("Can't parse line: " + line);
        }
    }

    private List<Long> addCidsFromParam(JsonObject data, String cmd) {
        List<Long> res = new ArrayList<>();

        if (cmd.equals("GetBalance")) {
            JsonArray cids = data.getAsJsonArray(PARAM_COLUMN_NAME);
            for (int i = 0; i < cids.size(); i++) {
                res.add(!cids.get(i).isJsonNull() ? cids.get(i).getAsLong() : 0L);
            }
        }

        if (cmd.equals("TransferMoney")) {
            JsonArray fromCampaigns = data.getAsJsonObject(PARAM_COLUMN_NAME).getAsJsonArray("FromCampaigns");
            for (int i = 0; i < fromCampaigns.size(); i++) {
                JsonElement jsonCampaignId = fromCampaigns.get(i).getAsJsonObject().get(CAMPAIGN_ID);
                res.add(!jsonCampaignId.isJsonNull() ? jsonCampaignId.getAsLong() : 0L);
            }

            JsonArray toCampaigns = data.getAsJsonObject(PARAM_COLUMN_NAME).getAsJsonArray("ToCampaigns");
            for (int i = 0; i < toCampaigns.size(); i++) {
                JsonElement jsonCampaignId = toCampaigns.get(i).getAsJsonObject().get(CAMPAIGN_ID);
                res.add(!jsonCampaignId.isJsonNull() ? jsonCampaignId.getAsLong() : 0L);
            }
        }

        JsonElement jsonParam = data.get(PARAM_COLUMN_NAME);
        if (jsonParam.isJsonObject() && jsonParam.getAsJsonObject().has(CAMPAIGN_ID)) {
            res.add(jsonParam.getAsJsonObject().get(CAMPAIGN_ID).getAsLong());
        }

        return res;
    }
}
