#!/bin/bash

### BEGIN INIT INFO
# Provides:          balancer_irqtuning
# Required-Start:    $remote_fs $network
# Required-Stop:     $remote_fs $network
# Default-Start:     2 3 4 5
# Default-Stop:      0 1 6
# Short-Description: start/stop tuning irq balance
# Description:       start/stop tuning irq balance
### END INIT INFO

#### Begin of Global variables
# PATH should only include /usr/* if it runs after the mountnfs.sh script
PATH=/sbin:/usr/sbin:/bin:/usr/bin
NAME="balancer_irqtuning"
# DESC="tunning smp affinity by "
SCRIPTNAME=/etc/init.d/$NAME
#### End of Global variables


#### NETWORK CARDS
INTEL_82574="0x10d3"
INTEL_82599="0x1557"
MELLANOX_MT27500="0x1003"

# sanity checks
if ! [ -x /usr/sbin/set_irq_affinity ]; then
    echo 'There is now set_irq_affinity script'
    exit 1
fi

if [ "$(pgrep -c irqbalance)" -gt "0" ]; then
    echo 'Please, switch off irqbalance demon'
    # exit 1
fi
# end of sanity checks

set_rps () {
    # count mask in very ugly way
    # use only physical cores of both cpus
    mask2=$(printf '1%.0s' $(seq 1 "${2}"); printf '0%.0s' $(seq 1 "${2}"); printf '1%.0s' $(seq 1 "${2}"))
    mask16=$(echo "obase=16; ibase=2; ${mask2}" | bc)

    for j in /sys/class/net/"$1"/queues/rx-*/rps_cpus; do
        echo "${mask16}" > "$j"
    done
}

unset_rps () {
    for j in /sys/class/net/"$1"/queues/rx-*/rps_cpus; do
        echo 0 > "$j"
    done
}

set_rfs () {

    entries=$(($2 * 4096))
    echo "$entries" > /proc/sys/net/core/rps_sock_flow_entries

    if [ "$(cat /sys/class/net/"${1}"/device/device)" == "${INTEL_82574}" ]; then
        table_size="${entries}"
    else
        table_size="4096"
    fi

    for j in /sys/class/net/"$1"/queues/rx-*/rps_flow_cnt; do
        echo "${table_size}" > "$j"
    done
}

unset_rfs () {
    echo 0 > /proc/sys/net/core/rps_sock_flow_entries

    for j in /sys/class/net/"$1"/queues/rx-*/rps_flow_cnt; do
        echo 0 > "$j"
    done
}

set_ring_buffer() {
    ethtool -G "$1" rx "$2" tx "$3" &>/dev/null
}

set_channels_num() {
    # Skip for 82574L
    if [ "$(cat /sys/class/net/"${1}"/device/device)" == "${INTEL_82574}" ]; then
        return 0
    fi

    RX=$(ethtool -l "$1" | awk '/RX/ { print $2; exit; }')
    CURRENT_RX=$(ethtool -l "$1" | awk '/RX/ { print $2 }' | tail -1)
    COMBINED=$(ethtool -l "$1" | awk '/Combined/ { print $2; exit; }' )
    CURRENT_COMBINED=$(ethtool -l "$1" | awk '/Combined/ { print $2 }' | tail -1 )

    if [ "${RX}" -ne "0" ]; then
        # Check if already set
        if [ "${CURRENT_RX}" -ne "${2}" ]; then
            ethtool -L "$1" rx "$2"
        fi
    fi

    if [ "${COMBINED}" -ne "0" ]; then
        # Check if already set
        if [ "${CURRENT_COMBINED}" -ne "${2}" ]; then
            ethtool -L "$1" combined "$2"
        fi
    fi
}

set_ntune() {
    # count number of cores
    ncores=$(nproc)
    # count number of physical cores
    # pcores=$(( $(awk -F : '/cpu cores/ {print $2; exit}' /proc/cpuinfo)
    pcores=$(( ncores / 2 ))

    AFF=0
    for i in $1; do
        _device=$(cat /sys/class/net/"${i}"/device/device)
        # Set ring buffer size
        RX_RING="$(ethtool -g "${i}" | awk '/RX/ { print $2; exit; }')"
        TX_RING="$(ethtool -g "${i}" | awk '/TX/ { print $2; exit; }')"
        set_ring_buffer "$i" "${RX_RING}" "${TX_RING}"

        set_channels_num "$i" "$pcores"

        if [ "${_device}" == "${INTEL_82574}" ]; then
            set_rps "${i}" "$(( pcores / 2 ))"
        fi

        set_rfs "$i" "$ncores"

        sleep 1
        AFF=$((AFF+1))

        # weird logic
        if [ "${_device}" == "${INTEL_82599}" ]; then
            /usr/sbin/set_irq_affinity -x 0-$(( pcores - 1 )) "$i"
            continue
        fi

        if [ "${_device}" == "${MELLANOX_MT27500}" ]; then
            /usr/sbin/set_irq_affinity -x 0-$(( pcores - 1 )) "$i"
            continue
        fi

        if [ "$((AFF%2))" -eq "0" ]; then
            /usr/sbin/set_irq_affinity -x local "$i"
        else
            /usr/sbin/set_irq_affinity -x remote "$i"
        fi
    done
}

unset_ntune() {
    for i in $1; do
        unset_rfs "$i"

        if [ "$(cat /sys/class/net/"${i}"/device/device)" == "${INTEL_82574}" ]; then
            unset_rps "${i}"
        fi

        /usr/sbin/set_irq_affinity -X one "$i"
    done
}

get_active_interfaces() {
    declare -a interfaces
    local counter=0
    while true; do
        if [ -z "${interfaces}" ]; then
            logger "[$0]: ERROR - there are NO active interfaces at all; Wait and repeat ... "
            sleep $(( 2 ** counter))
            counter+=$(( counter + 1))
            if [ "${counter}" -ge 8 ]; then counter=0; fi
        else
            break
        fi

        for i in $(ls /sys/class/net/ | grep -E 'eth[0-9]|enx*'); do
            if [ "$(cat /sys/class/net/"$i"/operstate)" == 'up' ]; then
                if [ "$(cat /sys/class/net/"$i"/carrier)" -eq "1" ]; then
                    interfaces+=( "$i" )
                fi
            fi
        done
    done

    echo "${interfaces[@]}"
}

case "$1" in
  start)
    echo "[$NAME] Start irq tuning"
    set_ntune "$(get_active_interfaces)"
    ;;
  stop)
    echo "[$NAME] Stop irq tuning"
    unset_ntune "$(get_active_interfaces)"
    ;;
  restart|force-reload)
    echo "
    Applying changes (for most drivers) will bring network interface down and then bring it back up;
    Connections to this interface will be interrupted;
    So, restart action is prohibited;
    If you want to change irq affinity in runtime use set_irq_affinity script directly.
    "
    exit 0
    ;;
  *)
    echo "Usage: $SCRIPTNAME {start|stop}" >&2
    exit 3
    ;;
esac
