package ru.yandex.market.logshatter.parser.front.errorBooster.csp;

import com.google.common.primitives.UnsignedLong;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.curator.shaded.com.google.common.net.InternetDomainName;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import org.json.JSONArray;
import ru.yandex.market.clickhouse.ddl.Column;
import ru.yandex.market.clickhouse.ddl.ColumnType;
import ru.yandex.market.clickhouse.ddl.engine.MergeTree;
import ru.yandex.market.clickhouse.ddl.enums.EnumColumnType;
import ru.yandex.market.logshatter.parser.LogParser;
import ru.yandex.market.logshatter.parser.ParseUtils;
import ru.yandex.market.logshatter.parser.ParserContext;
import ru.yandex.market.logshatter.parser.ParserException;
import ru.yandex.market.logshatter.parser.TableDescription;
import ru.yandex.market.logshatter.parser.TskvSplitter;
import ru.yandex.market.logshatter.parser.front.errorBooster.Platform;

import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.time.LocalDateTime;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class CSPLogParser implements LogParser {

    private static final Column YANDEXUID_COLUMN = new Column("yandexuid", ColumnType.UInt64);
    private static final Column FROM_COLUMN = new Column("from", ColumnType.String);
    private static final Column PROJECT_COLUMN = new Column("project", ColumnType.String);

    public static final TableDescription TABLE_DESCRIPTION = TableDescription.create(
        new MergeTree(
            "toYYYYMMDD(" + TableDescription.DATE_COLUMN.getName() + ")",
            Arrays.asList(
                PROJECT_COLUMN.getName(),
                FROM_COLUMN.getName(),
                TableDescription.TIMESTAMP_COLUMN.getName(),
                YANDEXUID_COLUMN.getName()
            ),
            YANDEXUID_COLUMN.getName()
        ),
        Arrays.asList(
            new Column("user_agent", ColumnType.String),
            new Column("origin", ColumnType.String),
            new Column("line", ColumnType.UInt32),
            new Column("col", ColumnType.UInt32),
            new Column("source_file", ColumnType.String),
            new Column("referer", ColumnType.String),
            new Column("blocked_uri", ColumnType.String),
            new Column("violated_directive", ColumnType.String),
            new Column("effective_directive", ColumnType.String),
            new Column("disposition", ColumnType.String),
            new Column("document_uri", ColumnType.String),
            new Column("script_sample", ColumnType.String),
            new Column("status_code", ColumnType.UInt16),
            FROM_COLUMN,
            YANDEXUID_COLUMN,
            new Column("request_id", ColumnType.String),
            new Column("blocked_uri_scheme", ColumnType.String),
            new Column("blocked_uri_host", ColumnType.String),
            new Column("blocked_uri_is_yandex", ColumnType.UInt8),
            new Column("kv_keys", ColumnType.ArrayString),
            new Column("kv_values", ColumnType.ArrayString),
            PROJECT_COLUMN,
            new Column("platform", EnumColumnType.enum8(Platform.class)),
            new Column("page", ColumnType.String)
        )
    );

    @Override
    public TableDescription getTableDescription() {
        return TABLE_DESCRIPTION;
    }

    public OffsetDateTime getDateTime(TskvSplitter splitter) throws ParserException {
        String date = splitter.getString("timestamp").replace(" ", "T");
        String timezone = splitter.getString("timezone");
        LocalDateTime dateTime = LocalDateTime.parse(date);

        return dateTime.atOffset(ZoneOffset.of(timezone));
    }

    private JsonObject getJsonObjectContent(String str) {
        if (str.contains("{")) {
            try {
                str = StringEscapeUtils.unescapeJson(str);
                JsonElement jsonElement = new JsonParser().parse(str);
                return jsonElement.getAsJsonObject().getAsJsonObject("csp-report");
            } catch (Exception ignored) {}
        }

        return null;
    }

    private String getSchemeAndHost(String url) {
        String[] urlSplits = url.split("/");

        if (urlSplits.length < 3) {
            return "";
        }

        return String.join("/", Arrays.copyOfRange(urlSplits, 0, 3));
    }

    public String getHost(String url) {
        String host = "";

        try {
            host = new URI(getSchemeAndHost(url)).getHost();
            if (host == null) {
                host = "";
            }
        } catch (URISyntaxException ignored) {}

        return host;
    }

    public String getScheme(String url) {
        String scheme = "";

        try {
            scheme = new URI(getSchemeAndHost(url)).getScheme();
            if (scheme == null) {
                scheme = "";
            }
        } catch (URISyntaxException ignored) {}

        return scheme;
    }

    public Boolean isYandexHost(String host, JSONArray yandexHosts) {
        String topLevelHost = "";
        try {
            topLevelHost = InternetDomainName.from(host).topPrivateDomain().toString();
        } catch (Exception ignored) {
            return false;
        }

        try {
            for (int i = 0; i < yandexHosts.length(); i++) {
                if (StringUtils.equals(yandexHosts.get(i).toString(), topLevelHost.toString())) {
                    return true;
                }
            }
        } catch (Exception ignored) {
            return false;
        }

        return false;
    }

    public String prepareDirective(String directive) {
        if (directive.indexOf(" ") > 0) {
            directive = directive.substring(0, directive.indexOf(" "));
        }

        if (!directive.matches("[a-zA-Z-]+")) {
            return "";
        }

        directive = StringUtils.removeEnd(directive, "-attr");
        directive = StringUtils.removeEnd(directive, "-elem");

        return directive;
    }

    public String prepareUrl(String url) {
        if (url.startsWith("data:")) {
            return "data";
        }

        return url;
    }

    private static final List<String> PARSED_URL_PARAMS = Arrays.asList(
        "url",
        "reqid",
        "reqId",
        "showid",
        "uid",
        "yandexuid",
        "from",
        "project",
        "platform",
        "page"
    );

    private static final List<String> IGNORED_URL_PARAMS = Arrays.asList(
        "yandex_login",
        "login",
        "date"
    );

    @Override
    public void parse(String line, ParserContext context) throws Exception {
        if (!line.startsWith("tskv")) {
            return;
        }
        TskvSplitter splitter = new TskvSplitter(line);
        JSONArray yandexHosts = new JSONArray(context.getParam("yandexHosts"));

        String userAgent = splitter.getString("user-agent");
        String origin = splitter.getString("origin");

        if (StringUtils.equals(origin, "null")) {
            origin = "";
        }

        JsonObject jsonObjectContent = getJsonObjectContent(splitter.getString("content"));

        Integer columnNumber = 0;
        Integer lineNumber = 0;
        String blockedUri = "";
        String documentUri = "";
        String effectiveDirective = "";
        String violatedDirective = "";
        String disposition = "";
        String referrer = "";
        String scriptSample = "";
        String sourceFile = "";
        Integer statusCode = 0;
        String from = "";
        String path = "";
        UnsignedLong yandexuid = UnsignedLong.valueOf(0);
        String requestId = "";
        String blockedUriScheme = "";
        String blockedUriHost = "";
        String project = "";
        Platform platform = null;
        String page = "";
        List<String> kvKeys = new ArrayList<>();
        List<String> kvValues = new ArrayList<>();

        boolean blockedUriIsYandex = false;

        if (jsonObjectContent != null) {
            if (jsonObjectContent.has("column-number")) {
                columnNumber = ParseUtils.parseUnsignedInt(jsonObjectContent.get("column-number").getAsString(), 0);
            }
            if (jsonObjectContent.has("line-number")) {
                lineNumber = ParseUtils.parseUnsignedInt(jsonObjectContent.get("line-number").getAsString(), 0);
            }
            if (jsonObjectContent.has("blocked-uri")) {
                blockedUri = prepareUrl(jsonObjectContent.get("blocked-uri").getAsString());
            }
            if (jsonObjectContent.has("document-uri")) {
                documentUri = prepareUrl(jsonObjectContent.get("document-uri").getAsString());
            }
            if (jsonObjectContent.has("effective-directive")) {
                effectiveDirective = prepareDirective(jsonObjectContent.get("effective-directive").getAsString());
            }
            if (jsonObjectContent.has("violated-directive")) {
                violatedDirective = prepareDirective(jsonObjectContent.get("violated-directive").getAsString());
            }
            if (jsonObjectContent.has("disposition")) {
                disposition = jsonObjectContent.get("disposition").getAsString();
            }
            if (jsonObjectContent.has("referrer")) {
                referrer = jsonObjectContent.get("referrer").getAsString();
            }
            if (jsonObjectContent.has("script-sample")) {
                scriptSample = jsonObjectContent.get("script-sample").getAsString();
            }
            if (jsonObjectContent.has("source-file")) {
                sourceFile = prepareUrl(jsonObjectContent.get("source-file").getAsString());
            }
            if (jsonObjectContent.has("status-code")) {
                statusCode = jsonObjectContent.get("status-code").getAsInt();
            }
            path = StringEscapeUtils.unescapeJava(splitter.getString("path"));
        } else {
            path = StringEscapeUtils.unescapeJava(splitter.getString("content"));
        }
        from = ParseUtils.extractDecodedStringParam(path, "from");

        String yandexuidStr = ParseUtils.extractDecodedStringParam(path, "yandexuid");
        if (StringUtils.isEmpty(yandexuidStr)) {
            yandexuidStr = ParseUtils.extractDecodedStringParam(path, "uid");
        }
        yandexuid = ParseUtils.parseUnsignedLong(yandexuidStr);

        project = ParseUtils.extractDecodedStringParam(path, "project");
        platform = Platform.fromString(ParseUtils.extractDecodedStringParam(path, "platform"));
        page = ParseUtils.extractDecodedStringParam(path, "page");
        if (StringUtils.isEmpty(project)) {
            project = "unknown";
        }

        requestId = ParseUtils.extractDecodedStringParam(path, "reqid");
        if (StringUtils.isEmpty(requestId)) {
            requestId = ParseUtils.extractDecodedStringParam(path, "showid");
        }
        if (StringUtils.isEmpty(requestId)) {
            requestId = ParseUtils.extractDecodedStringParam(path, "reqId");
        }

        if (StringUtils.isEmpty(blockedUri)) {
            blockedUri = prepareUrl(ParseUtils.extractDecodedStringParam(path, "url"));
        }

        blockedUriScheme = getScheme(blockedUri);
        blockedUriHost = getHost(blockedUri);
        blockedUriIsYandex = isYandexHost(blockedUriHost, yandexHosts);

        String queryString = StringUtils.removeStart(path,"/csp?");

        for (NameValuePair item: URLEncodedUtils.parse(queryString, Charset.defaultCharset())) {
            if (!PARSED_URL_PARAMS.contains(item.getName()) && !IGNORED_URL_PARAMS.contains(item.getName())) {
                if (item.getName() != null && item.getValue() != null) {
                    kvKeys.add(item.getName());
                    kvValues.add(item.getValue());
                }
            }
        }

        if (StringUtils.isEmpty(blockedUri) && StringUtils.isEmpty(sourceFile) && StringUtils.isEmpty(documentUri)) {
            return;
        }

        context.write(
            getDateTime(splitter),
            userAgent,
            origin,
            lineNumber,
            columnNumber,
            sourceFile,
            referrer,
            blockedUri,
            violatedDirective,
            effectiveDirective,
            disposition,
            documentUri,
            scriptSample,
            statusCode,
            from,
            yandexuid,
            requestId,
            blockedUriScheme,
            blockedUriHost,
            blockedUriIsYandex,
            kvKeys,
            kvValues,
            project,
            platform,
            page
        );
    }
}
