package ru.yandex.msearch.proxy.api.async.suggest.contact;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import ru.yandex.logger.PrefixedLogger;

import ru.yandex.msearch.proxy.Synonyms;
import ru.yandex.msearch.proxy.api.async.mail.Side;
import ru.yandex.msearch.proxy.api.async.suggest.contact.TokenizeRule.RuleComparator;
import ru.yandex.msearch.proxy.api.suggest.Translit;

public class Util {
    private static final int MAX_RULES_SIZE = 60;

    private static final String EMAIL_DOMAINS =
        "yandex,ya,gmail,yahoo,mail,hotmail,aol,msn,live,outlook,googlemail,"
        + "rambler,inbox,list,bk";
    private static final String[] EMAIL_DOMAINS_ARRAY =
        EMAIL_DOMAINS.split(",");
    private static final String[] TLDS = new String[]{
        "ru", "com", "net", "org", "info", "int", "edu", "ua", "by",
        "kz", "de", "cn", "uk", "nl", "su", "eu", "tk", "us", "tr",
        "рф"};
    private static final String PUNCT_MARKS =
        "./\\:;!#$%&()*+=[]^`{|}~_-,<>@\"' \n\t\f\r";

    public static boolean containsAllTokens(
        final Set<String> parts,
        final Set<Token> tokens)
    {
        if (tokens.isEmpty()) {
            return false;
        }
        for (Token token: tokens) {
            if (token.finished) {
                if (!parts.contains(token.str)) {
                    return false;
                }
            } else {
                boolean matches = false;
                for (String part: parts) {
                    if (part.startsWith(token.str)) {
                        matches = true;
                        break;
                    }
                }
                if (!matches) {
                    return false;
                }
            }
        }
        return true;
    }

    public static String removePopularDomains(final String domain) {
        if (domain == null || domain.isEmpty()) {
            return domain;
        }
        for (String searchDomain: EMAIL_DOMAINS_ARRAY) {
            if (domain.startsWith(searchDomain)) {
                int next = searchDomain.length();
                if (next == domain.length()
                    || PUNCT_MARKS.indexOf(domain.charAt(next)) != -1)
                {
                    return domain.substring(next);
                }
            }
        }
        return domain;
    }

    public static String removeTLD(final String emailStr) {
        char[] email = emailStr.toCharArray();
        StringBuilder sb = new StringBuilder(email.length);
        for (int i = 0; i < email.length; i++) {
            if (email[i] == '.') {
                sb.append(' ');
                int pos = i + 1;
                for (String domain: TLDS) {
                    if (pos > email.length - domain.length()) {
                        continue;
                    }
                    boolean containsDomain = true;
                    for (int j = 0; j < domain.length(); j++) {
                        if (email[pos + j] != domain.charAt(j)) {
                            containsDomain = false;
                            break;
                        }
                    }
                    if (containsDomain) {
                        int nextPos = pos + domain.length();
                        if (nextPos == email.length
                            || PUNCT_MARKS.indexOf(email[nextPos]) != -1)
                        {
                            i = nextPos - 1;
                            break;
                        }
                    }
                }
            } else {
                sb.append(email[i]);
            }
        }
        return sb.toString();
    }

    public static String generateRequest(
        final Collection<TokenizeRule> rules,
        final String header)
    {
        HashSet<RuleComparator> ruleSet = new HashSet<>();
        StringBuilder text = new StringBuilder();
        for (TokenizeRule rule: rules) {
            if (!rule.searchable() || rule.tokens().isEmpty()) {
                continue;
            }
            RuleComparator comparator = new RuleComparator(rule.tokens());
            if (ruleSet.contains(comparator)) {
                continue;
            }
            ruleSet.add(comparator);
            StringBuilder andRequest = new StringBuilder();
            for (Token token : rule.tokens()) {
                if (andRequest.length() > 0) {
                    andRequest.append(" AND ");
                }
                andRequest.append(token.str.replaceAll("@", ""));
                if (!token.finished) {
                    andRequest.append('*');
                }
            }
            if (text.length() > 0) {
                text.append(" OR ");
            }
            text.append(header).append(":(").append(andRequest).append(')');
        }
        return text.toString();
    }

    public static String generateFilterByContactsDP(
        final Collection<TokenizeRule> rules,
        final String header)
    {
        if (rules.isEmpty() || header == null || header.isEmpty()) {
            return "";
        }
        StringBuilder sb = new StringBuilder();
        for (TokenizeRule rule: rules) {
            if (!rule.searchable() || rule.tokens().isEmpty())  {
                continue;
            }
            StringBuilder request = new StringBuilder();
            for (Token token: rule.tokens()) {
                request.append(token.str).append(',');
            }
            request.setLength(request.length() - 1);
            if (request.length() > 0) {
                sb.append(' ').append(request);
            }
        }
        String request = sb.toString();
        if (!request.isEmpty()) {
            String domains;
            domains = EMAIL_DOMAINS;
            request =
                "filter_by_contacts(" + header + ' ' + domains + request + ')';
        }
        return request;
    }

    private static TokenizeRule tokenizeEntire(final String str) {
        Set<Token> tokens = Collections.singleton(new Token(str, false));
        return new TokenizeRule(tokens, false);
    }

    private static List<TokenizeRule> tokenize(
        final String str,
        final Synonyms synonymService,
        final PrefixedLogger logger,
        final String[] stringTokens,
        final boolean searchable)
    {
        //Single token followed by a separator should be finalized
        boolean forceFinishToken = stringTokens.length == 1
            && (str.endsWith(" ") || str.endsWith(".") || str.endsWith("_")
                || str.endsWith("-") || str.endsWith("@"));

        List<TokenizeRule> rules = new ArrayList<>();
        for (int i = 0; i < stringTokens.length; i++) {
            String tokenStr = stringTokens[i];

            if (tokenStr.trim().length() == 0) {
                continue;
            }
            logger.info("Request token[" + i + "]: " + tokenStr);

            //Not the last token - should finalize
            boolean finishToken = forceFinishToken
                || i < stringTokens.length - 1;

            logger.info("Request tokens suggest: " + tokenStr + " ("
                + finishToken + ")");

            Token token = new Token(tokenStr, finishToken);

            List<TokenizeRule> updated = new ArrayList<>();
            addTokenToRules(token, rules, updated, searchable);
            if (updated.size() < MAX_RULES_SIZE) {
                Set<Token> synonyms =
                    getSynonyms(tokenStr, synonymService, finishToken);
                for (Token synonym: synonyms) {
                    addTokenToRules(synonym, rules, updated, searchable);
                    if (updated.size() >= MAX_RULES_SIZE) {
                        logger.info("Rules over limit: " + updated.size()
                            + ", ignore some synonyms");
                        break;
                    }
                }
            } else {
                logger.info("Rules over limit: " + updated.size()
                    + ", ignore all synonyms");
            }
            rules = updated;
        }
        return rules;
    }

    private static void addTokenToRules(
        final Token token,
        final List<TokenizeRule> rules,
        final List<TokenizeRule> newRules,
        final boolean searchable)
    {
        if (rules.isEmpty()) {
            TokenizeRule rule = new TokenizeRule(new HashSet<>(), searchable);
            rule.tokens().add(token);
            newRules.add(rule);
        } else {
            for (TokenizeRule rule: rules) {
                TokenizeRule newRule =
                    new TokenizeRule(new HashSet<>(rule.tokens()), searchable);
                newRule.tokens().add(token);
                newRules.add(newRule);
            }
        }
    }

    private static Set<Token> getSynonyms(
        final String token,
        final Synonyms synonymService,
        final boolean finishToken)
    {
        if (token.length() < 3) {
            return Collections.emptySet();
        }
        Set<Token> result = new HashSet<>();
        List<String> synonyms = synonymService.suggest(token, !finishToken);
        for (String syn: synonyms) {
            result.add(new Token(syn, true));
            String translit = Translit.translit(syn);
            result.add(new Token(translit, true));
        }
        return result;
    }


    private static List<TokenizeRule> tokenizeRequest(
        final String str,
        final Synonyms synonymService,
        final PrefixedLogger logger)
    {
        String[] stringTokens = str.toLowerCase().split("[._\\-, \\t]");
        return tokenize(str, synonymService, logger, stringTokens, false);
    }

    private static List<TokenizeRule> tokenizeSearchable(
        final String str,
        final Synonyms synonymService,
        final PrefixedLogger logger)
    {
        String[] stringTokens = str.toLowerCase().split("[._\\-@, \\t]");
        return tokenize(str, synonymService, logger, stringTokens, true);
    }

    public static List<TokenizeRule> parseRequestString(
        final Synonyms synonymService,
        final String requestStr,
        final PrefixedLogger logger,
        final Side side)
    {
        String str = requestStr.toLowerCase();
        List<TokenizeRule> rules = new ArrayList<>();
        Set<String> suggest = Translit.suggestSet(str, side);
        for (String s: suggest) {
            rules.add(tokenizeEntire(s));
            rules.addAll(tokenizeRequest(s, synonymService, logger));
            rules.addAll(tokenizeSearchable(s, synonymService, logger));
        }
        return rules;
    }
}
