package ru.yandex.msearch.proxy.socheck;

import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;

import java.net.URLEncoder;
import java.net.SocketTimeoutException;

import java.nio.charset.StandardCharsets;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeoutException;

import ru.yandex.http.util.BadResponseException;

import ru.yandex.msearch.proxy.HttpServer;
import ru.yandex.msearch.proxy.logger.Logger;

public class HttpSoCheck implements SoCheck {
    private static final int MAX_LENGTH = 4096;

    private final SoCheckFactory soCheckFactory;
    private final HttpServer.RequestContext ctx;
    private final long start;
    private final String request;
    private final Future<Result> callback;

    private static String encodeAndTrim(String value, final int len) {
        if (value.length() > len) {
            value = value.substring(0, len);
        }
        try {
            value = URLEncoder.encode(value, "utf-8");
        } catch (UnsupportedEncodingException e) {
            // do nothing
        }
        return value;
    }

    public static String normalizeRequest(final String text) {
        List<String> quoted = new ArrayList<>();
        List<String> unquoted = new ArrayList<>();
        String[] parts = text
            .toLowerCase(Locale.ENGLISH)
            .replaceAll("[*\\\\(){}\\[\\]'?~\u00a0\t\r\n\f+:!^-]", " ")
            .split("\"");
        StringBuilder phraseBuilder = new StringBuilder();
        for (int i = 0; i < parts.length; ++i) {
            phraseBuilder.setLength(0);
            String part = parts[i].trim().replaceAll(" +", " ");
            StringBuilder wordBuilder = new StringBuilder();
            for (String word: part.split(" ")) {
                wordBuilder.setLength(0);
                for (int j = 0; j < word.length(); ++j) {
                    char c = word.charAt(j);
                    if (Character.isLetterOrDigit(c)) {
                        wordBuilder.append(c);
                    } else {
                        wordBuilder.append('.');
                    }
                }
                String trimmed = wordBuilder.toString()
                    .replaceAll("^[.]+", "")
                    .replaceAll("[.]+$", "")
                    .replaceAll("[.]+", ".");
                if (!trimmed.isEmpty()) {
                    if ((i & 1) == 0) {
                        unquoted.add(trimmed);
                    } else {
                        phraseBuilder.append(
                            trimmed.replace('.', ' ').replaceAll(" +", " "));
                        phraseBuilder.append(' ');
                    }
                }
            }
            if ((i & 1) == 1 && phraseBuilder.length() > 0) {
                phraseBuilder.setLength(phraseBuilder.length() - 1);
                quoted.add(phraseBuilder.toString());
            }
        }
        Collections.sort(quoted);
        Collections.sort(unquoted);
        StringBuilder norm = new StringBuilder();
        String prev = null;
        for (String phrase: quoted) {
            if (!phrase.equals(prev)) {
                prev = phrase;
                norm.append('"');
                norm.append(phrase);
                norm.append('"');
                norm.append(' ');
            }
        }
        prev = null;
        for (String word: unquoted) {
            if (!word.equals(prev)) {
                prev = word;
                norm.append(word);
                norm.append(' ');
            }
        }
        if (norm.length() > 0) {
            norm.setLength(norm.length() - 1);
        }
        return new String(norm);
    }

    public HttpSoCheck(
        final HttpServer.HttpParams params,
        final HttpServer.RequestContext ctx,
        final SoCheckFactory soCheckFactory)
    {
        this.soCheckFactory = soCheckFactory;
        this.ctx = ctx;
        start = System.currentTimeMillis();
        String user = params.get("user");
        String mdb = params.get("db");
        String text = params.get("text");
        if (user == null || text == null || text.trim().isEmpty()
            || mdb == null)
        {
            ctx.log.info("socheck: Request is incomplete, skipping SO check");
            request = null;
            callback = null;
        } else if ("0".equals(user) && "$$$".equals(text)) {
            ctx.log.debug("socheck: This is a fake request, skipping");
            request = null;
            callback = null;
        } else if ("1".equals(params.get("nosocheck"))) {
            ctx.log.debug("socheck: 'nosocheck' requested, skipping");
            request = null;
            callback = null;
        } else {
            StringBuilder sb = new StringBuilder();
            Hasher hasher = Hashing.murmur3_128().newHasher();
            String ip = params.get("remote_ip");
            // XXX: Start of MPROTO-1469 workaround. Delete after fix
            if (ip == null) {
                ip = "127.0.0.1";
            }
            // XXX: Enf of MPROTO-1469 workaround
            if (ip != null) {
                sb.append("&so_ip=");
                sb.append(ip);
                hasher.putString(ip, StandardCharsets.UTF_8);
            }
            String side = params.get("side");
            if (side != null) {
                sb.append("&side=");
                sb.append(side);
            }
            sb.append("&text=");
            sb.append(encodeAndTrim(text, MAX_LENGTH));
            hasher.putString(text, StandardCharsets.UTF_8);
            hasher.putString(soCheckFactory.hostname(),
                StandardCharsets.UTF_8);
            if ("pg".equals(mdb)) {
                sb.append("&uid=");
            } else {
                sb.append("&suid=");
            }
            sb.append(user);
            sb.append("&so_mdb=");
            sb.append(mdb);
            hasher.putString(user, StandardCharsets.UTF_8);
            hasher.putLong(start);
            sb.append("&so_uid=");
            sb.append(hasher.hash().toString());
            sb.append("&imap=");
            String imap = params.get("imap");
            String normalized;
            if (imap != null && imap.equals("1")) {
                if (params.get("user_request") == null) {
                    sb.append('1');
                } else {
                    sb.append('2');
                }
                sb.append("&norm=");
                normalized = text.replace(" +", " ").trim();
                sb.append(encodeAndTrim(normalized, MAX_LENGTH));
            } else {
                sb.append('0');
                sb.append("&norm=");
                normalized = normalizeRequest(text);
                sb.append(encodeAndTrim(normalized, MAX_LENGTH));
            }
            String offset = params.get("offset");
            if (offset == null) {
                offset = "0";
            }
            sb.append("&so_offset=");
            sb.append(offset);
            request = sb.toString();
            ctx.log.debug(
                "socheck: For user " + user + '@' + mdb
                + " request normalized to: " + normalized);
            callback = soCheckFactory.sendStep1Request(request, ctx);
        }
    }

    public int timeout() {
        return soCheckFactory.config().timeout();
    }

    @Override
    public Result result(final int found) {
        if (callback == null) {
            return Result.IGNORED;
        } else {
            try {
                Result result = callback.get();
                ctx.log.debug("socheck: Response received: " + result);
                soCheckFactory.sendStep2Request(
                    request + "&found=" + found,
                    ctx);
                return result;
            } catch (InterruptedException e) {
                ctx.log.warn("socheck: Request interrupted: "
                    + Logger.exception(e));
                return Result.INTERRUPTED;
            } catch (ExecutionException e) {
                Throwable error = e.getCause();
                if (error instanceof BadResponseException) {
                    ctx.log.warn("socheck: Error received from SO: "
                        + Logger.exception(e));
                    return Result.FAILED;
                } else if (error instanceof SocketTimeoutException) {
                    ctx.log.warn("socheck: Request timed out: "
                        + Logger.exception(e));
                    return Result.TIMEOUT;
                } else if (error instanceof TimeoutException) {
                    ctx.log.warn("socheck: No available connections to SO: "
                        + Logger.exception(e));
                    return Result.UNKNOWN;
                } else {
                    ctx.log.warn("socheck: Request failed: "
                        + Logger.exception(e));
                    return Result.REQUEST_FAILED;
                }
            } finally {
                ctx.log.debug("socheck: Total SO request execution time: "
                    + (System.currentTimeMillis() - start));
            }
        }
    }

    public static void main(final String... args) throws IOException {
        try (BufferedReader reader = new BufferedReader(
            new InputStreamReader(System.in, StandardCharsets.UTF_8)))
        {
            while (true) {
                String line = reader.readLine();
                if (line == null) {
                    break;
                }
                System.out.println("*text: " + line);
                System.out.println("*norm: " + normalizeRequest(line));
                System.out.println();
            }
        }
    }
}

