--[[creating new features: 
existence of linked phone number (1 or 0), network parameters (such as the using of VPN, hosting, tor, organization name),
the entropy of the login and its larger and smaller parts (in the case of a dash, point or a at), change time to local
also  model was also trained on new set of uids with karma 100]]

local uatraits = require 'uatraits';
local luajava = require 'luajava';
local util = require('util');


local Instant = luajava.bindClass("java.time.Instant");
local LocalDateTime = luajava.bindClass("java.time.LocalDateTime");
local ZoneId = luajava.bindClass("java.time.ZoneId");


local function entropy (X)
    local N, count, sum, i = X:len(), {}, 0
    for char = 1, N do
        i = X:sub(char, char)
        if count[i] then
            count[i] = count[i] + 1
        else
            count[i] = 1
        end
    end
    for n_i, count_i in pairs(count) do
        sum = sum + count_i / N * math.log(count_i / N)
    end
    return math.floor(-sum * 10000)
end

local function min_max_len(list_of_strings)
    local min_len = 10000000;
    local max_len = 0;
    local word_with_min_len = "";
    local word_with_max_len = "";

    for wordCount = 1,  #(list_of_strings) do
        if #list_of_strings[wordCount] < min_len and #(list_of_strings) > 1 then
            min_len = #(list_of_strings[wordCount])
            word_with_min_len = list_of_strings[wordCount]
        end
        if #(list_of_strings[wordCount]) > max_len then
            max_len = #(list_of_strings[wordCount])
            word_with_max_len = list_of_strings[wordCount]
        end
    end
    return word_with_max_len, word_with_min_len
end



local module = {}

function module.make_features(context)
    local aggregates = context.aggrs;
    local request_data = context.src;
    local bb = context.raw_bb;
    local rbl = context.rbl;

    -- entropy calculation
    local entropy_login = 0;
    local entropy_big_login_part = 0;
    local entropy_small_login_part = 0;
    local cat_features = {};
    local num_features = {};
    local with_phones = -1;
    -- bb section (login and phone)

    if bb then
        with_phones = (next(bb.phones) == nil and 0) or 1;

        local login = bb.login;

        local big_login_part = bb.big_login_part;
        local small_login_part = bb.small_login_part;
        local splitted_login;

        if string.find(login, "-", 1, true) then
            splitted_login = util.split(login, "-")
        elseif string.find(login, ".", 1, true) then
            splitted_login = util.split(login, ".")

        elseif string.find(login, "@", 1, true) then
            splitted_login = util.split(login, "@")
        else
            splitted_login = util.split(login, ".")
        end;

        big_login_part, small_login_part = min_max_len(splitted_login);
        entropy_login = entropy(login);
        entropy_big_login_part = entropy(big_login_part);
        entropy_small_login_part = entropy(small_login_part);

    end




    -- ip section
    local country_id = "";
    local region_id = "";
    local city_id = "";
    local provider = "";
    local asset = "";

    local is_hosting = -1;
    local is_tor = -1;
    local is_yandex_net = -1;
    local is_yandex_turbo = -1;
    local is_vpn = -1;
    local org_name =  "";
    local isp_name = "";

    if rbl then
        local geobase = context.rbl.infos.geobase;
        local region_info = geobase.region_info;
        local asn_list = geobase.ip_traits.asn_list or {};

        -- network part
        is_hosting =(geobase.ip_traits.is_hosting == "true" and 1) or 0;
        is_tor =(geobase.ip_traits.is_tor == "true" and 1) or 0;
        is_yandex_net =(geobase.ip_traits.is_yandex_net == "true" and 1) or 0;
        is_yandex_turbo =(geobase.ip_traits.is_yandex_turbo == "true" and 1) or 0;
        is_vpn =(geobase.ip_traits.is_vpn == "true" and 1) or 0;
        org_name = geobase.ip_traits.org_name or "";
        isp_name = geobase.ip_traits.isp_name or "";

        -- region part
        country_id = region_info.country_id or "";
        region_id = region_info.id or "";
        city_id = region_info.city_id or "";
        provider = geobase.ip_traits.org_name or "";
        asset = asn_list[1] or "";
    end

    -- time section change to local time
    local tmz = request_data.v2_account_timezone or "Europe/London";
    local now = LocalDateTime:ofInstant(Instant:ofEpochMilli(request_data.t), ZoneId:of(tmz));

    local day_part = now:getHour() / 6;
    local weekday = now:getDayOfWeek():getValue();
    local hour = now:getHour();
    local month = now:getMonthValue();

    -- user_agent section
    local is_mobile = -1;
    local browser_name = "";
    local browser_version = "";
    local os_family = "";
    local os_version = "";

    if request_data.user_agent then
        local parts = uatraits.parse(request_data.user_agent);

        is_mobile = (parts.isMobile == "true" and 1) or 0;
        browser_name = parts.BrowserName or "";
        browser_version = parts.BrowserVersion or "";
        os_family = parts.OSFamily or "";
        os_version = parts.OSVersion or "";

    end

    -- passwd && passwdex section
    local passwd = {"0", "0", "0", "0"};
    if request_data.passwd then
        passwd = util.split(request_data.passwd, ".")
    end;
    local passwdex = {"0", "0", "0", "0"};
    if request_data.passwdex then
        passwdex = util.split(request_data.passwdex, ".")
    end;

    num_features["ip_countries_cnt_d"] = aggregates["ip_countries_cnt_d"] or 0
    num_features["ip_foreign_uids_cnt_d"] = aggregates["ip_foreign_uids_cnt_d"] or 0
    num_features["ip_tnx_cnt_d"] = aggregates["ip_tnx_cnt_d"] or 0
    num_features["ip_uids_cnt_d"] = aggregates["ip_uids_cnt_d"] or 0
    num_features["ip_uids_cnt_m"] = aggregates["ip_uids_cnt_m"] or 0
    num_features["mail_ips_cnt_w"] = aggregates["mail_ips_cnt_w"] or 0
    num_features["mail_tnx_cnt_w"] = aggregates["mail_tnx_cnt_w"] or 0
    num_features["mail_uids_cnt_w"] = aggregates["mail_uids_cnt_w"] or 0
    num_features["uid_countries_cnt_d"] = aggregates["uid_countries_cnt_d"] or 0
    num_features["uid_foreign_ip_tnx_cnt_d"] = aggregates["uid_foreign_ip_tnx_cnt_d"] or 0
    num_features["uid_foreign_ips_cnt_d"] = aggregates["uid_foreign_ips_cnt_d"] or 0
    num_features["uid_ips_cnt_d"] = aggregates["uid_ips_cnt_d"] or 0
    num_features["uid_tnx_cnt_d"] = aggregates["uid_tnx_cnt_d"] or 0


    cat_features["action"] = request_data.action or ""
    num_features["captchacount"] = request_data.captchacount or 0
    cat_features["consumer"] = request_data.consumer or ""
    cat_features["host"] = request_data.host or ""
    cat_features["ip"] = request_data.ip or ""
    cat_features["country_id"] = country_id
    cat_features["region_id"] = region_id
    cat_features["city_id"] = city_id
    cat_features["provider"] = provider
    cat_features["asset"] = asset

    cat_features["is_suggested_login"] = request_data.is_suggested_login or -1
    cat_features["lang"] = request_data.lang or ""
    num_features["lcheck"] = request_data.lcheck or 0
    cat_features["passwd"] = request_data.passwd or ""
    cat_features["passwd_0"] = passwd[1]
    cat_features["passwd_1"] = passwd[2]
    cat_features["passwd_2"] = passwd[3]
    cat_features["passwd_3"] = passwd[4]
    cat_features["passwdex"] = request_data.passwdex or ""
    cat_features["passwdex_0"] = passwdex[1]
    cat_features["passwdex_1"] = passwdex[2]
    cat_features["passwdex_2"] = passwdex[3]
    cat_features["passwdex_3"] = passwdex[4]
    cat_features["proxyvalue"] = request_data.proxyvalue or -1

    cat_features["day_part"] = day_part
    --cat_features["weekday"] = weekday
    cat_features["hour"] = hour
    --cat_features["month"] = month

    cat_features["is_mobile"] = is_mobile
    cat_features["browser_name"] = browser_name
    cat_features["browser_version"] = browser_version
    cat_features["os_family"] = os_family
    cat_features["os_version"] = os_version

    cat_features["v2_account_country"] = request_data.v2_account_country or ""
    cat_features["v2_account_language"] = request_data.v2_account_language or ""
    cat_features["v2_account_timezone"] = request_data.v2_account_timezone or ""

    if ((request_data.v2_app_uuid) and not(request_data.v2_app_uuid == "")) then
        cat_features["v2_app_uuid"] = 1
    else
        cat_features["v2_app_uuid"] = 0
    end
    cat_features["v2_application"] = request_data.v2_application or ""
    num_features["v2_captcha_check_count"] = request_data.v2_captcha_check_count or 0
    cat_features["v2_cell_provider"] = request_data.v2_cell_provider or ""

    if ((request_data.v2_cookie_l_login) and not(request_data.v2_cookie_l_login == "")) then
        cat_features["v2_cookie_l_login"] = 1
    else
        cat_features["v2_cookie_l_login"] = 0
    end
    num_features["v2_cookie_my_block_count"] = request_data.v2_cookie_my_block_count or 0
    if ((request_data.v2_hardware_id) and not(request_data.v2_hardware_id == "")) then
        cat_features["v2_hardware_id"] = 1
    else
        cat_features["v2_hardware_id"] = 0
    end
    cat_features["v2_hardware_model"] = request_data.v2_hardware_model or ""

    cat_features["v2_has_cookie_l"] = request_data.v2_has_cookie_l or -1
    cat_features["v2_has_cookie_my"] = request_data.v2_has_cookie_my or -1
    cat_features["v2_has_cookie_yandex_login"] = request_data.v2_has_cookie_yandex_login or -1
    cat_features["v2_has_cookie_yp"] = request_data.v2_has_cookie_yp or -1
    cat_features["v2_has_cookie_ys"] = request_data.v2_has_cookie_ys or -1

    cat_features["v2_image_captcha_type"] = request_data.v2_image_captcha_type or ""
    num_features["v2_login_validation_count"] = request_data.v2_login_validation_count or 0
    
    if (request_data.v2_page_loading_info) then
        cat_features["v2_page_loading_info"] = 1
    else
        cat_features["v2_page_loading_info"] = 0
    end
    num_features["v2_password_quality"] = request_data.v2_password_quality or 0
    num_features["v2_password_validation_count"] = request_data.v2_password_validation_count or 0
    num_features["v2_phone_bindings_count"] = request_data.v2_phone_bindings_count or 0
    num_features["v2_phone_confirmation_confirms_count"] = request_data.v2_phone_confirmation_confirms_count or 0
    cat_features["v2_phone_confirmation_send_count_limit_reached"] = request_data.v2_phone_confirmation_send_count_limit_reached or -1
    cat_features["v2_phone_confirmation_confirms_count_limit_reached"] = request_data.v2_phone_confirmation_confirms_count_limit_reached or -1
    cat_features["v2_phone_confirmation_send_ip_limit_reached"] = request_data.v2_phone_confirmation_send_ip_limit_reached or -1
    num_features["v2_phone_confirmation_sms_count"] = request_data.v2_phone_confirmation_sms_count or 0
    cat_features["v2_phone_validation_changes"] = request_data.v2_phone_validation_changes or -1
    cat_features["v2_phone_validation_error"] = request_data.v2_phone_validation_error or -1
    num_features["v2_sanitize_phone_count"] = request_data.v2_sanitize_phone_count or 0
    num_features["v2_suggest_login_length"] = request_data.v2_suggest_login_length or 0
    cat_features["xcountry"] = request_data.xcountry or ""
    num_features["entropy_login"] = entropy_login or 0
    num_features["entropy_small_login_part"] = entropy_small_login_part or 0
    num_features["entropy_big_login_part"] = entropy_big_login_part or 0

    
    cat_features["is_hosting"] = is_hosting or -1
    cat_features["is_tor"] = is_tor or -1
    cat_features["is_yandex_net"] = is_yandex_net or -1
    cat_features["is_yandex_turbo"] = is_yandex_turbo or -1
    cat_features["is_vpn"] = is_vpn or -1
    cat_features["org_name"] = org_name or ""
    cat_features["isp_name"] = isp_name or -1
    cat_features["with_phones"] = with_phones or -1

    return num_features, cat_features;
    end

return module;
