#!/bin/bash
#
# Copyright (c) 2014, Intel Corporation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#     * Redistributions of source code must retain the above copyright notice,
#       this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * Neither the name of Intel Corporation nor the names of its contributors
#       may be used to endorse or promote products derived from this software
#       without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Affinitize interrupts to cores
#
# typical usage is (as root):
# set_irq_affinity -x local eth1 <eth2> <eth3>
#
# to get help:
# set_irq_affinity

usage()
{
    echo
    echo "Usage: $0 [-x|-X] {all|local|remote|one|custom} [ethX] <[ethY]>"
    echo "  options: -x     Configure XPS as well as smp_affinity"
    echo "  options: -X     Disable XPS but set smp_affinity"
    echo "  options: {remote|one} can be followed by a specific node number"
    echo "  Ex: $0 local eth0"
    echo "  Ex: $0 remote 1 eth0"
    echo "  Ex: $0 custom eth0 eth1"
    echo "  Ex: $0 0-7,16-23 eth0"
    echo
    exit 1
}

usageX()
{
    echo "options -x and -X cannot both be specified, pick one"
    exit 1
}

if [ "$1" == "-x" ]; then
    XPS_ENA=1
    shift
fi

if [ "$1" == "-X" ]; then
    if [ -n "$XPS_ENA" ]; then
        usageX
    fi
    XPS_DIS=2
    shift
fi

if [ "$1" == -x ]; then
    usageX
fi

if [ -n "$XPS_ENA" ] && [ -n "$XPS_DIS" ]; then
    usageX
fi

if [ -z "$XPS_ENA" ]; then
    XPS_ENA=$XPS_DIS
fi

num='^[0-9]+$'
# Vars
AFF=$1
shift

case "$AFF" in
    remote) [[ $1 =~ $num ]] && rnode=$1 && shift ;;
    one)    [[ $1 =~ $num ]] && cnt=$1 && shift ;;
    all)    ;;
    local)  ;;
    custom) ;;
    [0-9]*) ;;
    -h|--help)  usage ;;
    "")     usage ;;
    *)      IFACES=$AFF && AFF=all ;;   # Backwards compat mode
esac

# append the interfaces listed to the string with spaces
while [ "$#" -ne "0" ] ; do
    IFACES+=" $1"
    shift
done

# for now the user must specify interfaces
if [ -z "$IFACES" ]; then
    usage
    exit 1
fi

# support functions

set_affinity()
{
    VEC=$core
    if [ "$VEC" -ge 32 ]
    then
        MASK_FILL=""
        MASK_ZERO="00000000"
        let "IDX = $VEC / 32"
        for ((i=1; i<=IDX;i++))
        do
            MASK_FILL="${MASK_FILL},${MASK_ZERO}"
        done

        let "VEC -= 32 * $IDX"
        MASK_TMP=$((1<<VEC))
        MASK=$(printf "%X%s" $MASK_TMP $MASK_FILL)
    else
        MASK_TMP=$((1<<VEC))
        MASK=$(printf "%X" $MASK_TMP)
    fi

    printf "%s" "$MASK" > /proc/irq/"$IRQ"/smp_affinity
    printf "%s %d %s -> /proc/irq/$IRQ/smp_affinity\n" "$IFACE" "$core" "$MASK"
    case "$XPS_ENA" in
    1)
        printf "%s %d %s -> /sys/class/net/%s/queues/tx-%d/xps_cpus\n" "$IFACE" "$core" "$MASK" "$IFACE" $((n-1))
        test -d             /sys/class/net/"$IFACE"/queues/tx-$((n-1)) &&
        printf "%s" "$MASK" > /sys/class/net/"$IFACE"/queues/tx-$((n-1))/xps_cpus
    ;;
    2)
        MASK=0
        printf "%s %d %s -> /sys/class/net/%s/queues/tx-%d/xps_cpus\n" "$IFACE" "$core" "$MASK" "$IFACE" $((n-1))
        test -d             /sys/class/net/"$IFACE"/queues/tx-$((n-1)) &&
        printf "%s" $MASK > /sys/class/net/"$IFACE"/queues/tx-$((n-1))/xps_cpus
    ;;
    *)
    esac
}

# Allow usage of , or -
#
parse_range () {
    RANGE=${@//,/ }
    RANGE=${RANGE//-/..}
    LIST=""
    for r in $RANGE; do
        # eval lets us use vars in {#..#} range
        [[ $r =~ \.\. ]] && r=$(eval echo "{$r}")
        LIST+=" $r"
    done
    echo "$LIST"
}

# Affinitize interrupts
#
setaff()
{
    CORES=$(parse_range "$CORES")
    ncores=$(echo "$CORES" | wc -w)
    n=1

    # this script only supports interrupt vectors in pairs,
    # modification would be required to support a single Tx or Rx queue
    # per interrupt vector

    queues="(${IFACE}-.*TxRx|mlx[0-9]-[0-9]+)"

    irqs=$(grep -E "$queues" /proc/interrupts | cut -f1 -d:)
    [ -z "$irqs" ] && irqs=$(grep "$IFACE" /proc/interrupts | cut -f1 -d:)
    [ -z "$irqs" ] && irqs=$(for i in /sys/class/net/"$IFACE"/device/msi_irqs ;\
                             do grep "$i:.*TxRx" /proc/interrupts | grep -v fdir | cut -f 1 -d : ;\
                             done)
    [ -z "$irqs" ] && echo "Error: Could not find interrupts for $IFACE"

    echo "IFACE CORE MASK -> FILE"
    echo "======================="
    for IRQ in $irqs; do
        [ "$n" -gt "$ncores" ] && n=1
        j=1
        # much faster than calling cut for each
        for i in $CORES; do
            [ $((j++)) -ge $n ] && break
        done
        core=$i
        set_affinity
        ((n++))
    done
}

# now the actual useful bits of code
CORES=$(</sys/devices/system/cpu/online)
[ "$CORES" ] || CORES=$(grep ^proc /proc/cpuinfo | cut -f2 -d:)

# Core list for each node from sysfs
node_dir=/sys/devices/system/node
for i in $node_dir/node*; do
    i=${i/*node/}
    corelist[$i]=$(<$node_dir/node"${i}"/cpulist)
done

for IFACE in $IFACES; do
    dev_dir=/sys/class/net/$IFACE/device
    [ -e "$dev_dir/numa_node" ] && node=$(<"$dev_dir/numa_node")
    [ "$node" ] && [ "$node" -gt 0 ] || node=0

    case "$AFF" in
    local)
        CORES=${corelist[$node]}
    ;;
    remote)
        [ "$rnode" ] || { [ $node -eq 0 ] && rnode=1 || rnode=0; }
        CORES=${corelist[$rnode]}
    ;;
    one)
        [ -n "$cnt" ] || cnt=0
        CORES=$cnt
    ;;
    all)
        CORES=$CORES
    ;;
    custom)
        echo -n "Input cores for $IFACE (ex. 0-7,15-23): "
        read -r CORES
    ;;
    [0-9]*)
        CORES=$AFF
    ;;
    *)
        usage
        exit 1
    ;;
    esac

    # call the worker function
    setaff
done
