My hardware is an AMD Epyc 7532 with a Supermicro H12SSL-C. SR-IOV is enabled in bios.
So on a fresh install of proxmox 8.4:
I have the following kernels installed :
6.5.13-6-pve(tried on this)
6.8.12-13-pve (currently using this)
6.8.12-9-pve
I have installed proxmox headers for both 6.8 and 6.5 kernels.
-set the the basic gpu passthrough steps (amd_iommu=pt) (added vfio modules...)
- installed openvswitch-switch and openvswitch-switch-dpdk (for dpdk helper functions)
- installed the latest doca-networking package version 3.0 from the nvidia website ( I was pinned on kernel 6.5 when I installed this)
The config is based on the post I saw on STH forum here:
Bash:
[Unit]
Description=Script to enable SR-IOV on boot
After=ovs-vswitchd.service
# networking.service needs interfaces that we create here
Before=networking.service
[Service]
Type=oneshot
# Init SR-IOV
ExecStart=/usr/bin/bash -c '/opt/ovs-doca-config/ovs-doca.sh'
[Install]
WantedBy=multi-user.target network-online.target
Code:
#!/bin/bash
# Primary device name and location.
set -x
DEVNAME=enp198s0f0np0
DEVNAME2=enp198s0f1np1
DEVPCIBASE=0000:c6:00
DEVPCIBASE2=0000:c6:01
DEVPCIBASE3=0000:c6:02
MAX_WAIT=30
# Function to wait for openibd.service
wait_for_openibd() {
local wait_time=0
echo "Waiting for openibd.service to complete..."
while ! systemctl is-active --quiet openibd.service; do
if [ $wait_time -ge $MAX_WAIT ]; then
echo "Error: openibd.service did not become active within $MAX_WAIT seconds"
exit 1
fi
echo "openibd.service is not yet active, waiting..."
sleep 1
((wait_time++))
done
echo "openibd.service is active"
}
systemctl stop openvswitch-switch.service
mkdir -p /hugepages
mount -t hugetlbfs hugetlbfs /hugepages
echo 4096 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
# Add SR-IOV virtual functions.
/usr/bin/echo 4 > /sys/class/net/${DEVNAME}/device/sriov_numvfs
#Set MAC addresses for the virtual functions.
/usr/bin/ip link set ${DEVNAME} vf 0 mac BC:24:11:5A:F6:00
/usr/bin/ip link set ${DEVNAME} vf 1 mac BC:24:11:5A:F6:01
/usr/bin/ip link set ${DEVNAME} vf 2 mac BC:24:11:5A:F6:02
/usr/bin/ip link set ${DEVNAME} vf 3 mac BC:24:11:5A:F6:03
#rename interfaces
/usr/bin/ip link set dev eth0 name ovs-sw1pf0vf0
/usr/bin/ip link set dev eth1 name ovs-sw1pf0vf1
/usr/bin/ip link set dev eth2 name ovs-sw1pf0vf2
/usr/bin/ip link set dev eth3 name ovs-sw1pf0vf3
#enable spoofchk
for i in $(seq 0 3); do
/usr/bin/ip link set ${DEVNAME} vf ${i} spoofchk on
done;
#for i in `ls /sys/class/net/${DEVNAME}/device/sriov/`; do
# echo ON | tee /sys/class/net/${DEVNAME}/device/sriov/${i}/trust
#done;
# Unbind the virtual functions.
/usr/bin/echo ${DEVPCIBASE}.2 > /sys/bus/pci/drivers/mlx5_core/unbind
/usr/bin/echo ${DEVPCIBASE}.3 > /sys/bus/pci/drivers/mlx5_core/unbind
/usr/bin/echo ${DEVPCIBASE}.4 > /sys/bus/pci/drivers/mlx5_core/unbind
/usr/bin/echo ${DEVPCIBASE}.5 > /sys/bus/pci/drivers/mlx5_core/unbind
#Enable the eSwitch./usr/sbin/devlink dev eswitch set pci/${DEVPCIBASE}.0 mode switchdev
/usr/sbin/devlink dev eswitch set pci/${DEVPCIBASE}.1 mode switchdev
#echo switchdev > /sys/class/net/${DEVNAME}/compat/devlink/mode
#echo switchdev > /sys/class/net/${DEVNAME2}/compat/devlink/mode
wait_for_openibd
# Ensure vfio-pci module is loaded
modprobe vfio-pci
if ! lsmod | grep -q vfio_pci; then
echo "Error: Failed to load vfio-pci module"
exit 1
fi
#Bind VF to host
/usr/bin/echo ${DEVPCIBASE}.2 > /sys/bus/pci/drivers/mlx5_core/bind
/usr/bin/echo ${DEVPCIBASE}.3 > /sys/bus/pci/drivers/mlx5_core/bind
/usr/bin/echo ${DEVPCIBASE}.4 > /sys/bus/pci/drivers/mlx5_core/bind
/usr/bin/echo ${DEVPCIBASE}.5 > /sys/bus/pci/drivers/mlx5_core/bind
#setup huge pages
mkdir -p /hugepages
mount -t hugetlbfs hugetlbfs /hugepages
echo 4096 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
# Set up OpenVSwitch.
systemctl start openvswitch-switch.service
#/usr/bin/ovs-vsctl add-br vmbr0
/usr/bin/ovs-vsctl --no-wait set Open_vSwitch . other_config:doca-init=true
/usr/bin/ovs-vsctl set Open_vSwitch . other_config:hw-offload=true
/usr/bin/ovs-vsctl set Open_vSwitch . other_config:lacp-fallback-ab=true
/usr/bin/ovs-vsctl set Open_vSwitch . other_config:tc-policy=skip_sw
systemctl restart openvswitch-switch.service
/usr/bin/ovs-vsctl set Open_vSwitch . other_config:default-datapath-type=netdev
/usr/bin/ovs-vsctl add-br vmbr0
/usr/bin/ovs-vsctl add-bond vmbr0 bond0 ${DEVNAME} ${DEVNAME2} lacp=active bond_mode=balance-tcp vlan_mode=native-untagged tag=1 other_config:lacp-time=fast
#rename interfaces
/usr/bin/ip link set dev eth0 name ovs-sw1pf0vf0
/usr/bin/ip link set dev eth1 name ovs-sw1pf0vf1
/usr/bin/ip link set dev eth2 name ovs-sw1pf0vf2
/usr/bin/ip link set dev eth3 name ovs-sw1pf0vf3
#Add the first network port as well as the representer devices.
for i in `ls /sys/class/net/${DEVNAME}/device/net/ | grep -v ${DEVNAME}`; do
/usr/bin/ovs-vsctl add-port vmbr0 ${i};
done;
#Bring up the switch, the physical port, and the representer devices.
usr/bin/ip link set dev ${DEVNAME} up
/usr/bin/ip link set dev ${DEVNAME2} up
/usr/bin/ip link set dev vmbr0 up
for i in `ls /sys/class/net/${DEVNAME}/device/net/ | grep -v ${DEVNAME}`; do
/usr/bin/ip link set dev ${i} up
done;
I check the openvswitch logs
and I see these lines