package ru.yandex.market.logshatter.parser.direct;

import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import ru.yandex.market.clickhouse.ddl.Column;
import ru.yandex.market.clickhouse.ddl.ColumnType;
import ru.yandex.market.clickhouse.ddl.engine.EngineType;
import ru.yandex.market.clickhouse.ddl.engine.MergeTree;
import ru.yandex.market.logshatter.parser.LogParser;
import ru.yandex.market.logshatter.parser.ParserContext;
import ru.yandex.market.logshatter.parser.TableDescription;

import com.google.common.primitives.UnsignedLong;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;

public class PpcCmdLogParser implements LogParser {
    public static final String strPattern = "((^\\s*<\\d+>\\d+"                       //(pri version
        + "\\s\\d\\d\\d\\d-\\d\\d-\\d\\dT\\d\\d:\\d\\d:\\d\\d[+-]\\d\\d:\\d\\d"       //timestamp
        + "\\s(?<host>\\S+)"                                                          //host
        + "\\s(?<type>\\S+)"                                                          //app-name (logtype)
        + "\\s\\d+"                                                                   //pid
        + "\\s\\S+"                                                                   //msgid
        + "\\s\\S+"                                                                   //structured data id
        + "\\s(?<date>\\d\\d\\d\\d-\\d\\d-\\d\\d)"                                    //date
        + "\\s(?<time>\\d\\d:\\d\\d:\\d\\d)(?:\\.\\d+)?)|"                            //time) or
        + "(^\\s*(?<date2>\\d\\d\\d\\d-\\d\\d-\\d\\d)"                                //(date2
        + "\\s(?<time2>\\d\\d:\\d\\d:\\d\\d)(?:\\.\\d+)?))"                           //time2)
        + "\\s(?<json>\\{.*\\})";                                                     //json
    public static final Pattern pattern = Pattern.compile("^" + strPattern + "$", Pattern.DOTALL);

    public static final String DATE_PATTERN = "yyyy-MM-dd HH:mm:ss";
    public final SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_PATTERN);

    public static final Column DATE_COLUMN = new Column("log_date", ColumnType.Date);
    public static final Column TIMESTAMP_COLUMN = new Column("log_time", ColumnType.DateTime);

    private static final String semplName = "intHash64(reqid)";

    public static List<String> additionalPrimaryKeys = Arrays.asList(semplName);
    private static final List<String> primaryKeys = getPrimary();
    private static final String partName = DATE_COLUMN.getName();

    private static final MergeTree
        DEFAULT_ENGINE = new MergeTree("toYYYYMM(" + partName + ")", primaryKeys, "intHash64(reqid)", 8192);

    private static List<Column> columnsStruct = new ArrayList<>(Arrays.asList(
        new Column("cid", ColumnType.ArrayInt64),
        new Column("bid", ColumnType.ArrayInt64),
        new Column("pid", ColumnType.ArrayInt64),
        new Column("ip", ColumnType.String),
        new Column("cmd", ColumnType.String),
        new Column("runtime", ColumnType.Float32),
        new Column("service", ColumnType.String),
        new Column("param", ColumnType.String, null, null, "ZSTD(5)"),
        new Column("response", ColumnType.String),
        new Column("http_status", ColumnType.Int16),
        new Column("cluid", ColumnType.ArrayInt64),
        new Column("client_id", ColumnType.ArrayUInt64),
        new Column("trace_id", ColumnType.UInt64),
        new Column("reqid", ColumnType.Int64),
        new Column("uid", ColumnType.Int64),
        new Column("host", ColumnType.String),
        new Column("proc_id", ColumnType.Int64),
        new Column("fulltime", ColumnType.Float32),
        new Column("cpu_user_time", ColumnType.Float32),
        new Column("yandexuid", ColumnType.String),
        new Column("role", ColumnType.String),
        new Column("source", ColumnType.String),
        new Column("tvm_service_id", ColumnType.Int32)
    ));

    public static TableDescription create(EngineType engineType, List<Column> columns) {
        List<Column> allColumns = new ArrayList<>(columns.size() + 2);
        allColumns.add(DATE_COLUMN);
        allColumns.add(TIMESTAMP_COLUMN);
        allColumns.addAll(columns);
        return new TableDescription(allColumns, engineType);
    }

    private static final TableDescription TABLE_DESCRIPTION = create(DEFAULT_ENGINE, columnsStruct);

    private static final Gson gson = new Gson();

    private static List<String> getPrimary() {
        List<String> aggrKeys = new ArrayList<>();
        aggrKeys.add(DATE_COLUMN.getName());
        aggrKeys.add(TIMESTAMP_COLUMN.getName());
        aggrKeys.addAll(additionalPrimaryKeys);
        return aggrKeys;
    }

    private List<Long> jsonArrayToLongList(JsonArray jsonArray) {
        List<Long> longList = new ArrayList<>();
        jsonArray.forEach(jsonElement -> {
            longList.add(Long.valueOf(jsonElement.getAsString()));
        });
        return longList;
    }

    private List<Long> getLongListFromJson(JsonObject element, String field) {
        JsonElement subElement = element.get(field);
        if(subElement != null && !subElement.isJsonNull()) {
            if(subElement.isJsonArray()) {
                return jsonArrayToLongList(subElement.getAsJsonArray());
            }
            try {
                return Arrays.stream(getStringFromJson(element, field).replaceAll(" ", "").split(",")).
                    filter(num -> !"".equals(num)).
                    map(this::parseLong).collect(Collectors.toList());
            } catch (NumberFormatException e) {
                throw new IllegalStateException("Number is too long", e);
            }
        }
        return Collections.emptyList();
    }

    private long parseLong(String number) {
        try {
            return Long.valueOf(number);
        } catch (NumberFormatException e) {
            return 0;
        }
    }

    private List<UnsignedLong> jsonArrayToUnsignedLongList(JsonArray jsonArray) {
        List<UnsignedLong> unsignedLongList = new ArrayList<>();
        jsonArray.forEach(jsonElement -> {
            unsignedLongList.add(UnsignedLong.valueOf(jsonElement.getAsString()));
        });
        return unsignedLongList;
    }

    private List<UnsignedLong> getUnsignedLongListFromJson(JsonObject element, String field) {
        JsonElement subElement = element.get(field);
        if(subElement != null && !subElement.isJsonNull()) {
            if(subElement.isJsonArray()) {
                return jsonArrayToUnsignedLongList(subElement.getAsJsonArray());
            }
            return Arrays.stream(getStringFromJson(element, field).replaceAll(" ", "").split(",")).
                map(UnsignedLong::valueOf).collect(Collectors.toList());
        }
        return Collections.emptyList();
    }

    private<T> List<T> makeListUnique(List<T> list) {
        Set<T> uniqueElements = new HashSet<>(list);
        return new ArrayList<T>(uniqueElements);
    }


    private String getStringFromJson(JsonObject element, String field) {
        JsonElement subElement = element.get(field);
        if(subElement != null && !subElement.isJsonNull()) {
            try {
                return subElement.getAsString();
            } catch (UnsupportedOperationException ex) {
                return subElement.toString();
            }
        }
        return "";
    }

    private Integer getIntegerFromJson(JsonObject element, String field) {
        JsonElement subElement = element.get(field);
        if(subElement != null && !subElement.isJsonNull()) {
            return subElement.getAsInt();
        }
        return null;
    }

    private Long getLongFromJson(JsonObject element, String field) {
        JsonElement subElement = element.get(field);
        if(subElement != null && !subElement.isJsonNull()) {
            return subElement.getAsLong();
        }
        return null;
    }

    private UnsignedLong getUnsignedLongFromJson(JsonObject element, String field) {
        JsonElement subElement = element.get(field);
        if(subElement != null && !subElement.isJsonNull()) {
            return UnsignedLong.valueOf(subElement.getAsString());
        }
        return null;
    }

    private Float getFloatFromJson(JsonObject element, String field) {
        JsonElement subElement = element.get(field);
        if(subElement != null && !subElement.isJsonNull()) {
            return subElement.getAsFloat();
        }
        return null;
    }

    private List<Long> makeFieldList(String field, JsonObject element, JsonObject paramJson) {
        List<Long> list = new ArrayList<>();
        list.addAll(getLongListFromJson(element, field));
        list.addAll(getLongListFromJson(element, field + "s"));
        list.addAll(getLongListFromJson(paramJson, field));
        list.addAll(getLongListFromJson(paramJson, field + "s"));
        list = makeListUnique(list);
        Collections.sort(list);
        return list;
    }

    @Override
    public void parse(String line, ParserContext context) throws Exception {
        Matcher matcher = pattern.matcher(line);
        if(matcher.find()){
            String strDate = matcher.group("date");
            String strTime = matcher.group("time");
            if(strDate == null || strTime == null){
                strDate = matcher.group("date2");
                strTime = matcher.group("time2");
            }
            Date date = dateFormat.parse(strDate + " " + strTime);

            String json = matcher.group("json");
            JsonObject element = gson.fromJson(json, JsonObject.class);
            JsonObject paramJson = element.getAsJsonObject("param");

            // Если нет листовых полей, то результат пустой лист
            // Для строковых полей - пустая строка
            // Из числовых полей не может быть только uid, proc_id, fulltime, trace_id
            // uid, proc_id, fulltime заменяем на ноль
            // trace_id заменям на reqid
            List<Long> cid = makeFieldList("cid", element, paramJson);
            List<Long> bid = makeFieldList("bid", element, paramJson);
            List<Long> pid = makeFieldList("pid", element, paramJson);
            String ip = getStringFromJson(element, "ip");
            String cmd = getStringFromJson(element, "cmd");
            Float runtime = getFloatFromJson(element, "runtime");
            String service = getStringFromJson(element, "service");
            String param = getStringFromJson(element, "param");
            String response = getStringFromJson(element, "response");
            Integer httStatus = getIntegerFromJson(element, "http_status");
            List<Long> cluid = getLongListFromJson(element, "cluid");
            cluid = makeListUnique(cluid);
            Collections.sort(cluid);
            List<UnsignedLong> clientId = getUnsignedLongListFromJson(element, "client_id");
            clientId = makeListUnique(clientId);
            Collections.sort(clientId);
            Long reqid = getLongFromJson(element, "reqid");
            UnsignedLong traceId = getUnsignedLongFromJson(element, "trace_id");
            if(traceId == null) {
                traceId = UnsignedLong.valueOf(reqid);
            }
            Long uid = getLongFromJson(element, "uid");
            if(uid == null) {
                uid = 0L;
            }
            String host = getStringFromJson(element, "host");
            Long procId = getLongFromJson(element, "proc_id");
            if(procId == null) {
                procId = 0L;
            }
            Float fulltime = getFloatFromJson(element, "fulltime");
            if(fulltime == null) {
                fulltime = 0F;
            }
            Float cpuUserTime = getFloatFromJson(element, "cpu_user_time");
            String yandexuid = getStringFromJson(element, "yandexuid");
            String role = getStringFromJson(element, "role");
            String source = "";
            Integer tvmServiceId = getIntegerFromJson(element, "tvm_service_id");
            if (tvmServiceId == null) {
                tvmServiceId = 0;
            }
            context.write(date, cid, bid, pid, ip, cmd, runtime, service, param, response, httStatus,
                cluid, clientId, traceId, reqid, uid, host, procId, fulltime, cpuUserTime, yandexuid,
                role, source, tvmServiceId);
        } else {
            throw new IllegalArgumentException(String.format("Can't parse line: %s", line));
        }
    }

    @Override
    public TableDescription getTableDescription() {
        return TABLE_DESCRIPTION;
    }
}
