#!/bin/bash -E
set -aeo pipefail # x

script_dir="$(dirname $(readlink -f $0))"
. ${script_dir}/common.sh

geodata_format="6.0.0"
required_debs_list="curl yandex-internal-root-ca geobase5-checker=5.3-14 geobase6-generator=6.0-41 geobase6-utils=6.0-40 libgeobase6-python=6.0-34 python-ipaddr python-netaddr awscli"

geogen_tool="/opt/yandex/geobase/geobase6-generator"
geochecker_tool="/opt/yandex/geobase/ipreg-checker"
geoutils_tool="geobase6-util"
geobin_datafile="${data_path}/geodata6.bin"

borders_file="reverse_borders_world.bin"
borders_datafile="${data_path}/${borders_file}"
borders_datafile_src_url="NO-DATA"

check_resources() {
    echo $FUNCNAME

    check_file ${eu_regions_fname} 7000

    check_file ${data_path}/GeoIP2-Anonymous-IP-Blocks-IPv4.csv 550000
    check_file ${data_path}/GeoIP2-Anonymous-IP-Blocks-IPv6.csv 7900
    check_file ${data_path}/GeoIP2-ISP-Blocks-IPv4.csv 525000
    check_file ${data_path}/GeoIP2-ISP-Blocks-IPv6.csv 40000

    check_file ${data_path}/IPREG-yandex.patch 450
    check_file ${data_path}/IPREG-as.patch.orig.json 200000
    check_file ${data_path}/IPREG-as.patch.list 60000
    check_file ${data_path}/IPREG-mmisp.patch.names 70000
    check_file ${data_path}/IPREG-mobile.patch 400000 # full 500000
    check_file ${data_path}/IPREG-traits.list 225000 # full 485000

    check_file ${data_path}/IPREG.json_wo_reliabilities 2250000 # full 5000000
    check_file ${data_path}/IPREG.final 2250000 # full 5000000
}

generate_geodata6_bin() {
    echo $FUNCNAME

    . ${db_traits_datafile}

    echo "'${geobin_datafile}' generation..."
    $geogen_tool \
        $geodata_check_flag \
        --format=$(echo $geodata_format | tr -d '.') \
        --output=$geobin_datafile \
        --regions-section=@DB \
        --regions-locales-section=@DB \
        --dbhost=$db_host \
        --dbuser=$db_user \
        --dbpass="${db_pswd}" \
        --dbport=$db_port \
        --dbname=$db_name \
        --as-section ${data_path}/IPREG-as.patch.list \
        --isp-names-section ${data_path}/IPREG-mmisp.patch.names \
        --ip-traits-section ${data_path}/IPREG-traits.list \
        --ipreg-section ${data_path}/IPREG.final \
        --borders-section=$borders_datafile \
        $*
}

test_ipreg_data() {
    echo $FUNCNAME

    if [ -f ${data_path}/IPREG-yandex.patch ]; then
        echo "check YANDEX..."
        cat ${data_path}/IPREG-yandex.patch \
        | sed 's/":1/":true/g' \
        | ${geochecker_tool} --geodata=${geobin_datafile} --data-version=6 --crash-on-error --full-check --attrs-match=users:yandex_staff,turbo:yandex_turbo,yandex:yandex_net
    fi

    if [ -f ${data_path}/IPREG-mobile.patch ]; then
        echo "check MOBILE..."
        cat ${data_path}/IPREG-mobile.patch \
        | sed 's/":1/":true/g' \
        | ${geochecker_tool} --geodata=${geobin_datafile} --data-version=6 --full-check
    fi

    if [ -f ${data_path}/IPREG-as.patch.orig.json ]; then
        echo "check AS..."
        cat ${data_path}/IPREG-as.patch.orig.json \
        | GEOBASE_IP6_TO_IP4=0 ${geochecker_tool} --data-version=6 --geodata=${geobin_datafile} --full-check --attrs-match=as:asn_list
    fi

    if [ -f ${data_path}/GeoIP2-Anonymous-IP-Blocks-IP46.csv ]; then
        local json_data_mode=$1
        echo "check MaxMind-ANON... [${json_data_mode}]"
        cat ${data_path}/GeoIP2-Anonymous-IP-Blocks-IP46.csv \
        | ${script_dir}/maxmind_anon_data_processor.py --mode=check --geodata=${geobin_datafile} ${json_data_mode}
    fi

    if [ -f ${data_path}/GeoIP2-Anonymous-IP-Blocks-IP?.csv ]; then
        echo "check MaxMind-ISP..."
        cat ${data_path}/GeoIP2-ISP-Blocks-IPv?.csv \
        | ${script_dir}/maxmind_isp_data_processor.py --mode=check --geodata=${geobin_datafile} --output=${geobin_datafile}.mm_isp.check_warns
    fi

    if [ -f ${data_path}/IPREG.json_wo_reliabilities ]; then
        echo "check IPREG (IP-only)..."
        cat ${data_path}/IPREG.json_wo_reliabilities \
        | ${script_dir}/ipreg-fmt --split-mix-ranges \
        | ${geochecker_tool} --geodata=${geobin_datafile} --data-version=6
    fi
}

upload_yt_export() {
    echo $FUNCNAME

    check_file ${geobin_datafile}
    check_file ${yt_traits_datafile}
    . ${yt_traits_datafile}

    local yt_proxies="hahn arnold"
    local yt_file_path="//home/geotargeting/public/geobase/$(basename ${geobin_datafile})"

    # upload to tmp-table
    for yt_proxy_name in ${yt_proxies}; do
        echo "{yt_proxy_name} (tmp) - ${yt_file_path}..."
        cat ${geobin_datafile} | YT_PROXY=${yt_proxy_name} YT_TOKEN=${yt_token} yt write-file --destination ${yt_file_path}.tmp
    done

    # file-subst
    for yt_proxy_name in ${yt_proxies}; do
        YT_PROXY=${yt_proxy_name} YT_TOKEN=${yt_token} yt move --force ${yt_file_path}.tmp ${yt_file_path}
    done
}

check_regs_centers() {
    echo $FUNCNAME

    ${script_dir}/LAAS-1981.check-regions-centers.py --geodata ${geobin_datafile} --countries-ids all --verbose-log 1>check-regions.log
    cat check-regions.log | awk '{ print $1 }' | sort | uniq -c
}

build() {
    debug_info
    view_remote_content
    install_required_debs ${required_debs_list}
    check_awscli

    prepare_eu_list
    prepare_iso_alpha3_list
    download_borders_section
    ${script_dir}/prepare_full_patch.sh ${data_path} yandex
    check_resources
    check_file ${iso_alpha3_fname} 210  # was extracted, because xurma.bin
    generate_geodata6_bin "--new-fields-data=${eu_regions_fname},${iso_alpha3_fname}"
    check_geodata_bin
    check_regs_centers
    test_ipreg_data
    check_eu_list
    check_iso_alpha3_list

    move_result_file_if_required
    upload_s3mds
}
