package ru.yandex.market.logshatter.parser.marketout;

import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Pattern;

// Простой определятор ботов по user agent

public class BotDetector {
    enum BotType { GOOGLE, BING, YANDEX, MAIL_RU, MARKET, SCRIPT, OTHER, NONE }

    public static BotType detect(String userAgent) {

        if (userAgent.isEmpty()) {
            return BotType.NONE;
        }

        if (possiblyNotBotRegex.matcher(userAgent).find() || userAgent.startsWith("Mozilla/")) {
            if (!possiblyHasUrlRegex.matcher(userAgent).find()) {
                return BotType.NONE;
            }
        }

        for (String key : BOT_DEFINITIONS.keySet()) {
            if (userAgent.contains(key)) {
                return BOT_DEFINITIONS.get(key);
            }
        }
        return BotType.OTHER;
    }

    private static final Pattern possiblyNotBotRegex = Pattern.compile("^[^(]+\\([^)]+\\)");
    private static final Pattern possiblyHasUrlRegex = Pattern.compile("https?://");

    private static final Map<String, BotType> BOT_DEFINITIONS = new LinkedHashMap<String, BotType>() {{
        put("Googlebot", BotType.GOOGLE);
        put("AdsBot-Google", BotType.GOOGLE);
        put("http://www.google.com/bot.html", BotType.GOOGLE);
        put("http://www.google.com/adsbot.html", BotType.GOOGLE);
        put("http://www.google.com/mobile/adsbot.html", BotType.GOOGLE);
        put("bingbot", BotType.BING);
        put("http://www.bing.com/bingbot.htm", BotType.BING);
        put("http://yandex.com/bots", BotType.YANDEX);
        put("Mail.RU_Bot", BotType.MAIL_RU);
        put("http://go.mail.ru/help/robots", BotType.MAIL_RU);
        put("Yandex-Market-Cache-Warmer", BotType.MARKET);
        put("YMarketTarantino", BotType.MARKET);
        put("python-requests/", BotType.SCRIPT);
        put("Python-urllib/", BotType.SCRIPT);
        put("libtorrent/", BotType.SCRIPT);
        put("Java/", BotType.SCRIPT);
        put("curl/", BotType.SCRIPT);
        put("WinHTTP", BotType.SCRIPT);
    }};
}
