Repository: ayufan/pve-helpers Branch: master Commit: 0c8ced3b5e5b Files: 16 Total size: 27.9 KB Directory structure: gitextract_lw33zvzd/ ├── .gitignore ├── Makefile ├── README.md ├── VERSION ├── old-helpers/ │ ├── Makefile │ ├── README.md │ ├── VERSION │ ├── root/ │ │ ├── lib/ │ │ │ └── systemd/ │ │ │ └── system-sleep/ │ │ │ └── suspend-resume-all-vms │ │ └── usr/ │ │ ├── lib/ │ │ │ └── pve-helpers/ │ │ │ ├── qemu-server-hooks.sh │ │ │ ├── resume-all-vms.sh │ │ │ └── suspend-all-vms.sh │ │ └── sbin/ │ │ └── pin-vcpus.sh │ └── scripts/ │ └── pve-qemu-hooks.service └── root/ ├── etc/ │ └── systemd/ │ └── system/ │ └── pve-guests.service.d/ │ └── manual-start.conf ├── lib/ │ └── systemd/ │ └── system-sleep/ │ └── restart-vms └── var/ └── lib/ └── vz/ └── snippets/ └── exec-cmds ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *.deb ================================================ FILE: Makefile ================================================ export RELEASE_START_SHA ?= $(shell git rev-list -1 HEAD VERSION) export RELEASE ?= $(shell git rev-list $(RELEASE_START_SHA).. --count) export RELEASE_NAME ?= $(shell cat VERSION)-$(RELEASE) export RELEASE_VERSION ?= $(RELEASE_NAME)-g$(shell git rev-parse --short HEAD) PACKAGE_FILE ?= pve-helpers-$(RELEASE_VERSION)_all.deb TARGET_HOST ?= fill-me.home all: pve-helpers .PHONY: pve-helpers pve-helpers: $(PACKAGE_FILE) $(PACKAGE_FILE): fpm \ --input-type dir \ --output-type deb \ --name pve-helpers \ --version $(RELEASE_VERSION) \ --package $@ \ --architecture all \ --category admin \ --url https://gitlab.com/ayufan/pve-helpers-build \ --description "Proxmox VE Helpers" \ --vendor "Kamil Trzciński" \ --maintainer "Kamil Trzciński " \ --license "MIT" \ --deb-priority optional \ --depends inotify-tools \ --depends qemu-server \ --depends expect \ --depends util-linux \ --deb-compression gz \ root/=/ install: pve-helpers dpkg -i $(PACKAGE_FILE) deploy: pve-helpers scp $(PACKAGE_FILE) $(TARGET_HOST): ssh $(TARGET_HOST) dpkg -i $(PACKAGE_FILE) clean: rm -f $(PACKAGE_FILE) ================================================ FILE: README.md ================================================ # Proxmox VE Helpers This repository is a set of scripts to better handle some of the Proxmox functions: - automatically restart VMs on host suspend, - allow to use CPU pinning, - allow to set fifo scheduler - allow to set affinity mask for vfio devices Why to do CPU pinning? - Usually, it is not needed as long as you don't use SMT - If you use SMT, each vCPU is not equal, CPU pinning allows to ensure that VMs receive a real threads - For having a good and predictable performance it is not needed to pin to exact cores, Linux can balance it very well - In general the less we configure the better it works. These settings are hints to define affinity masks for resources. ## Installation Clone and compile the repository: ```bash # install dependencies sudo apt-get install -f ruby ruby-dev rubygems build-essential sudo gem install fpm ``` ```bash # compile pve-helpers git clone https://github.com/ayufan/pve-helpers cd pve-helpers sudo make install ``` ## Usage ### 1. Enable snippet You need to configure each machine to enable the hookscript. The snippet by default is installed in `/var/lib/vz` that for Proxmox is present as `local`. ```bash qm set 204 --hookscript=local:snippets/exec-cmds ``` ### 2. Configure VM Edit VM description and add a new line if one or both these two commands. ### 2.1. `cpu_taskset` For the best performance you want to assign VM to physical cores, not a mix of physical and virtual cores. For example for `i7-8700` each core has two threads: 0-6, 1-7, 2-8. You can easily check that with `lscpu -e`, checking which cores are assigned twice. ```bash CPU NODE SOCKET CORE L1d:L1i:L2:L3 ONLINE MAXMHZ MINMHZ 0 0 0 0 0:0:0:0 yes 4600.0000 800.0000 1 0 0 1 1:1:1:0 yes 4600.0000 800.0000 2 0 0 2 2:2:2:0 yes 4600.0000 800.0000 3 0 0 3 3:3:3:0 yes 4600.0000 800.0000 4 0 0 4 4:4:4:0 yes 4600.0000 800.0000 5 0 0 5 5:5:5:0 yes 4600.0000 800.0000 6 0 0 0 0:0:0:0 yes 4600.0000 800.0000 7 0 0 1 1:1:1:0 yes 4600.0000 800.0000 8 0 0 2 2:2:2:0 yes 4600.0000 800.0000 9 0 0 3 3:3:3:0 yes 4600.0000 800.0000 10 0 0 4 4:4:4:0 yes 4600.0000 800.0000 11 0 0 5 5:5:5:0 yes 4600.0000 800.0000 ``` For example it is advised to assign a one CPU less than a number of physical cores. For the `i7-8700` it will be 5 cores. Then, you can assign the 5 cores (with CPU pinning, but not pinning specific threads) to VM: ```text cpu_taskset 7-11 ``` This does assign to VM second thread of physical cores 1-6. We deliberatly choose to not assign `CORE 0`. If you have two VMs concurrently running, you can assign it on one thread, second on another thread, like this: ```text VM 1: cpu_taskset 1-5 VM 2: cpu_taskset 7-11 ``` ### 2.2. use `vendor-reset` for fixing AMD Radeon reset bug Instead of `pci_unbind` and `pci_rescan` install DKMS module from https://github.com/gnif/vendor-reset: ```bash apt install dkms git clone https://github.com/gnif/vendor-reset.git /usr/src/vendor-reset-0.1.1 dkms build vendor-reset/0.1.1 dkms install vendor-reset/0.1.1 echo vendor-reset >> /etc/modules modprobe vendor-reset ``` ### 2.3. `set_halt_poll` This setting changes the value of the kvm parameter `halt_poll_ns` in `/sys/module/kvm/parameters/halt_poll_ns` Different configurations benefit from different settings. Default value is `20000`. In theory, a larger value would be beneficial for the performance/latency of a VM. In practice, most Ryzen systems work best with `halt_poll_ns` set to `0`. Usage example: ```yaml cat /etc/pve/qemu-server/110.conf ##Set halt_poll_ns #set_halt_poll 0 ... ``` ### 2.4. `assign_interrupts` `assign_interrupts [--sleep=10s] [cpu cores] [--all] [interrupt name] [interrupt name...]` This setting aims to simplify the process of assigning interrupts to the correct cpu cores in order to get the best performance while doing a gpu/usb controller/audio controller passthrough. The goal is to have the same cores assigned to the VM using `cpu_taskset`, be responsible for the interrupts generated by the devices that are fully passed through to the VM. This is very important for achieving the lowest possible latency and eliminating random latency spikes inside the VM. Ideally, you would also use something like irqbalance to move all other interrupts away from the VM assigned CPU cores and onto your other hypervisor-reserved cores. Same CPU mask can be used with irqbalance to have the VM cpu cores banned from getting any other interrupts. Note: Isolating cpu cores with `isolcpus` while having its own small benefits, is not required to get these latency improvements. An optional `--sleep=10s` can be assigned to modify default `30s` wait duration. The `--all` can be used to automatically assign interrupts of all configured `hostpci` devices. Usage example: ```yaml cat /etc/pve/qemu-server/110.conf ##CPU pinning #cpu_taskset 1-5 #assign_interrupts --sleep=10s 1-5 --all ... ``` In this particular use case, all interrupts with `vfio` in their name are assigned to cores `4,12,5,13,6,14,7,15,2,10,3,11`, which in term correspond to cores `2-7` and their SMT equivalents `10-15`. In other words, cores `2,3,4,5,6,7` from an 8 core 3700x are assigned to the VM and to all of the interrupts from the GPU, the USB onboard controller, and the onboard audio controller. ### 2.5. `qm_conflict` and `qm_depends` Sometimes some VMs are conflicting with each other due to dependency on the same resources, like disks, or VGA. There are helper commands to shutdown (the `qm_conflict`) or start (the `qm_depends`) when main machine is being started. ```yaml cat /etc/pve/qemu-server/204.conf # qm_conflict 204 # qm_depends 207 ... ``` This first `qm_conflict` will shuttdown VM with VMID 204 before starting the current one, and it will also start VMID 207, that might be a sibiling VM. I use the `qm_conflict` or `qm_depends` to run Linux VM sometimes with VGA passthrough, sometimes as a sibiling VM without graphics cards passed, but running in a console mode. Be careful if you use `pci_unbind` and `pci_rebind`, they should be after the `qm_*` commands. ### 2.6. `pci_unbind` and `pci_rebind` It might be desirable to bind VGA to VM, but as soon as VM finishes unbind that and allow to use on a host. The `--all` can be used to unbind all devices. The simplest is to ensure that VGA can render output on a host before starting, then instruct Proxmox VE to unbind, and rebind devices: ```yaml cat /etc/pve/qemu-server/204.conf ## Rebind VGA to host #pci_unbind 02 00 0 #pci_unbind 02 00 1 #pci_unbind --all #pci_rebind ``` ### 3. Legacy features These are features that are no really longer needed to achieve a good latency in a VM. ### 3.1. `cpu_chrt` **no longer needed, outdated** Running virtualized environment always results in quite random latency due to amount of other work being done. This is also, because Linux hypervisor does balance all threads that has bad effects on `DPC` and `ISR` execution times. Latency in Windows VM can be measured with https://www.resplendence.com/latencymon. Ideally, we want to have the latency of `< 300us`. To improve the latency you can switch to the usage of `FIFO` scheduler. This has a catastrophic effects to everything else that is not your VM, but this is likely acceptable for Gaming / daily use of passthrough VMs. Configure VM description with: ```text cpu_chrt fifo 1 ``` > Note: > It seems that if Hyper-V entitlements (they are enabled for `ostype: win10`) are enabled this is no longer needed. > I now have amazing performance without using `cpu_chrt`. ### 3.2. `pci_unbind` and `pci_rescan` **no longer needed, outdated** Just use `vendor-reset`. There are multiple approaches to handle Radeon graphics cards. I did find that to make it stable: 1. VGA bios needs to be exported, put in `/usr/share/kvm` and passed as `romfile` of `hostpci*`, 2. PCIE unbind/rescan needs to happen. Exporting bios should happen ideally when running "natively", so with graphics card available, ideally on Windows, with `GPU-Z`. Once bios is exported, you should ensure that it contains UEFI section: https://pve.proxmox.com/wiki/Pci_passthrough#How_to_known_if_card_is_UEFI_.28ovmf.29_compatible. Sometimes the bios can be found on https://www.techpowerup.com/vgabios/. Ensure that you find the exact one for you `vid:pid` of your graphics card. This is how my config looks like once a bios is put in a correct place: ```yaml cat /etc/pve/qemu-server/204.conf ## Fix VGA #pci_rescan #pci_unbind 02 00 0 #pci_unbind 02 00 1 ... hookscript: local:snippets/exec-cmds ... hostpci0: 02:00,pcie=1,romfile=215895.rom,x-vga=1 ... machine: q35 ... ``` The comment defines a commands to execute to unbind and rebind graphics card VM. In cases where there are bugs in getting VM up, the `suspend/resume` cycle of Proxmox helps: `systemctl suspend`. ### 4. Suspend/resume There's a set of scripts that try to perform restart of machines when Proxmox VE machine goes to sleep. First, you might be interested in doing `suspend` on power button. Edit the `/etc/systemd/logind.conf` to modify: ```text HandlePowerKey=suspend ``` Then `systemctl restart systemd-logind.service` or reboot Proxmox VE. After that every of your machines should restart alongside with Proxmox VE suspend, thus be able to support restart on PCI passthrough devices, like GPU. **Ensure that each of your machines does support Qemu Guest Agent**. This function will not work if you don't have Qemu Guest Agent installed and running. ### 5. My setup Here's a quick rundown of my environment that I currently use with above quirks. #### 5.1. Hardware - i7-8700 - 48GB DDR4 - Intel iGPU used by Proxmox VE - AMD RX560 2GB used by Linux VM - GeForce RTX 2080 Super used by Windows VM - Audio is being output by both VMs to the shared speakers that are connected to Motherboard audio card - Each VM has it's own dedicated USB controller - Each VM has a dedicated amount of memory using 1G hugepages - Each VM does not use SMT, rather it is assigned to the thread 0 (Linux) or thread 1 (Windows) of each CPU, having only 5 vCPUs available to VM #### 5.2. Kernel config ```text GRUB_CMDLINE_LINUX="" GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX pci_stub.ids=10de:1e81,10de:10f8,10de:1ad8,10de:1ad9,10de:13c2,10de:0fbb,1002:67ef,1002:aae0" GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX intel_iommu=on kvm_intel.ept=Y kvm_intel.nested=Y i915.enable_hd_vgaarb=1 pcie_acs_override=downstream vfio-pci.disable_idle_d3=1" GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX cgroup_enable=memory swapaccount=1" GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX intel_pstate=disable" GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX hugepagesz=1G hugepages=42" ``` #### 5.3. Linux VM I use Linux for regular daily development work. My Proxmox VE config looks like this: ```text ## CPU PIN #cpu_taskset 0-5 #assign_interrupts 0-5 --all # ## Conflict (207 shares disks, 208 shares VGA) #qm_conflict 207 #qm_conflict 208 agent: 1 args: -audiodev id=alsa,driver=alsa,out.period-length=100000,out.frequency=48000,out.channels=2,out.try-poll=off,out.dev=swapped -soundhw hda balloon: 0 bios: ovmf boot: dcn bootdisk: scsi0 cores: 5 cpu: host hookscript: local:snippets/exec-cmds hostpci0: 02:00,romfile=215895.rom,x-vga=1 hostpci1: 04:00 hugepages: 1024 ide2: none,media=cdrom memory: 32768 name: ubuntu19-vga net0: virtio=32:13:40:C7:31:4C,bridge=vmbr0 numa: 1 onboot: 1 ostype: l26 scsi0: nvme-thin:vm-206-disk-1,discard=on,iothread=1,size=200G,ssd=1 scsi1: ssd:vm-206-disk-0,discard=on,iothread=1,size=100G,ssd=1 scsi10: ssd:vm-206-disk-1,iothread=1,replicate=0,size=32G,ssd=1 scsihw: virtio-scsi-pci serial0: socket sockets: 1 usb0: host=1050:0406 vga: none ``` #### 5.4. Windows VM I use Windows for Gaming. It has dedicated RTX 2080 Super. ```text ## CPU PIN #cpu_taskset 6-11 #assign_interrupts 6-11 --all agent: 1 args: -audiodev id=alsa,driver=alsa,out.period-length=100000,out.frequency=48000,out.channels=2,out.try-poll=off,out.dev=swapped -soundhw hda balloon: 0 bios: ovmf boot: dc bootdisk: scsi0 cores: 5 cpu: host cpuunits: 10000 efidisk0: nvme-thin:vm-204-disk-1,size=4M hookscript: local:snippets/exec-cmds hostpci0: 01:00,pcie=1,x-vga=1,romfile=Gigabyte.RTX2080Super.8192.190820.rom hugepages: 1024 ide2: none,media=cdrom machine: pc-q35-3.1 memory: 10240 name: win10-vga net0: e1000=3E:41:0E:4D:3D:14,bridge=vmbr0 numa: 1 onboot: 1 ostype: win10 runningmachine: pc-q35-3.1 scsi0: ssd:vm-204-disk-2,discard=on,iothread=1,size=64G,ssd=1 scsi1: ssd:vm-204-disk-0,backup=0,discard=on,iothread=1,replicate=0,size=921604M scsi3: nvme-thin:vm-204-disk-0,backup=0,discard=on,iothread=1,replicate=0,size=100G scsihw: virtio-scsi-pci sockets: 1 vga: none ``` #### 5.5. Switching between VMs To switch between VMs: 1. Both VMs always run concurrently. 1. I do change the monitor input. 1. Audio is by default being output by both VMs, no need to switch it. 1. I use Barrier (previously Synergy) for most of time. 1. In other cases I have Logitech multi-device keyboard and mouse, so I switch it on keyboard. 1. I also have a physical switch that I use to change lighting and monitor inputs. 1. I have the monitor with PBP and PIP, so I can watch how Windows is updating while doing development work on Linux. ## Author, License Kamil Trzciński, 2019-2021, MIT ================================================ FILE: VERSION ================================================ 0.6.0 ================================================ FILE: old-helpers/Makefile ================================================ export RELEASE_START_SHA ?= $(shell git rev-list -1 HEAD VERSION) export RELEASE ?= $(shell git rev-list $(RELEASE_START_SHA).. --count) export RELEASE_NAME ?= $(shell cat VERSION)-$(RELEASE) export RELEASE_VERSION ?= $(RELEASE_NAME)-g$(shell git rev-parse --short HEAD) PACKAGE_FILE ?= pve-helpers-$(RELEASE_VERSION)_all.deb TARGET_HOST ?= fill-me.home all: pve-helpers .PHONY: pve-helpers pve-helpers: $(PACKAGE_FILE) $(PACKAGE_FILE): fpm \ --input-type dir \ --output-type deb \ --name pve-helpers \ --version $(RELEASE_VERSION) \ --package $@ \ --architecture all \ --category admin \ --url https://gitlab.com/ayufan/pve-helpers-build \ --description "Proxmox VE Helpers" \ --vendor "Kamil Trzciński" \ --maintainer "Kamil Trzciński " \ --license "MIT" \ --deb-priority optional \ --depends inotify-tools \ --depends qemu-server \ --depends expect \ --depends util-linux \ --deb-compression bzip2 \ --deb-systemd scripts/pve-qemu-hooks.service \ root/=/ install: pve-helpers dpkg -i $(PACKAGE_FILE) deploy: pve-helpers scp $(PACKAGE_FILE) $(TARGET_HOST): ssh $(TARGET_HOST) dpkg -i $(PACKAGE_FILE) clean: rm -f $(PACKAGE_FILE) ================================================ FILE: old-helpers/README.md ================================================ # Proxmox VE Qemu Helpers This repository is a set of scripts to better handle some of the Proxmox functions: - automatically suspend/resume on host suspend, - allow to use CPU pinning, - allow to run actions on VM bootup ## Installation Clone and compile the repository: ```bash git clone https://github.com/ayufan/pve-helpers cd pve-helpers sudo make install ``` ## Usage ### 1. Enable CPU pinning (`/usr/sbin/pin-vcpus.sh`) The CPU pinning is enabled only when you add in notes the `CPUPIN` keyword. It will pin each CPU thread to one physical thread. The pinning will omit the CORE0 as it assumes that you use it for the purpose of the host machine. For the best performance you should configure cores specification exactly the way as they are on your host machine: matching number of threads per-core. Currently, Proxmox VE does not allow you to configure `threads`, so you have to do it manually: ```bash qm set VMID -args -smp 10,cores=5,threads=2 ``` The above assume that you use CPU with SMT, which has two threads per-each core. The CPU pinning method will properly assign each virtual thread to physical thread taking into account CPUs affinity mask as produced by `lscpu -e`. To ensure that CPU pinning does work, you can try it from command line as `root` user: ```bash pin-vcpus.sh VMID ``` #### 1.1. Using `isolcpus` The above option should be used with conjuction to `isolcpus` of kernel. This is a way to disable CPU cores from being used by hypervisor, making it possible to assign cores exclusively to the VMs only. For doing that edit `/etc/default/grub` and add: ```bash GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX isolcpus=1-5,7-11" GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX nohz_full=1-5,7-11" GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX rcu_nocbs=1-5,7-11" ``` Where `1-5,7-11` matches a cores that Proxmox VE should not use. You really want to omit everything that is on CORE0. The above specification is valid for latest `i7-8700` CPUs: ```bash CPU NODE SOCKET CORE L1d:L1i:L2:L3 ONLINE MAXMHZ MINMHZ 0 0 0 0 0:0:0:0 yes 4600.0000 800.0000 1 0 0 1 1:1:1:0 yes 4600.0000 800.0000 2 0 0 2 2:2:2:0 yes 4600.0000 800.0000 3 0 0 3 3:3:3:0 yes 4600.0000 800.0000 4 0 0 4 4:4:4:0 yes 4600.0000 800.0000 5 0 0 5 5:5:5:0 yes 4600.0000 800.0000 6 0 0 0 0:0:0:0 yes 4600.0000 800.0000 7 0 0 1 1:1:1:0 yes 4600.0000 800.0000 8 0 0 2 2:2:2:0 yes 4600.0000 800.0000 9 0 0 3 3:3:3:0 yes 4600.0000 800.0000 10 0 0 4 4:4:4:0 yes 4600.0000 800.0000 11 0 0 5 5:5:5:0 yes 4600.0000 800.0000 ``` For Ryzen CPUs you will rather see CORE0 to be assigned to CPU0 and CPU1, thus your specification will look `2-11`. After editing configuration `update-grub` and reboot Proxmox VE. ### 2. Suspend/resume There's a set of scripts that try to perform suspend of machines when Proxmox VE machine goes to sleep. First, you might be interested in doing `suspend` on power button. Edit the `/etc/systemd/logind.conf` to modify: ``` HandlePowerKey=suspend ``` Then `systemctl restart systemd-logind.service` or reboot Proxmox VE. After that every of your machines should suspend alongside with Proxmox VE suspend, thus be able to support suspend/resume on PCI passthrough devices, like GPU. **Ensure that each of your machines does support Qemu Guest Agent**. This function will not work if you don't have Qemu Guest Agent installed and running. ### 3. Run hooks on machine start and stop This allows you to add a script `/etc/qemu-server-hooks/VMID.up` that will be executed when machine starts. This allows you to add a script `/etc/qemu-server-hooks/VMID.down` that will be executed when machine stops. ## Author, License Kamil Trzciński, 2019, MIT ================================================ FILE: old-helpers/VERSION ================================================ 0.2.0 ================================================ FILE: old-helpers/root/lib/systemd/system-sleep/suspend-resume-all-vms ================================================ #!/bin/bash if [[ "$1" == "pre" ]]; then /usr/lib/pve-helpers/suspend-all-vms.sh elif [[ "$1" == "post" ]]; then /usr/lib/pve-helpers/resume-all-vms.sh else echo "invalid: $@" exit 1 fi ================================================ FILE: old-helpers/root/usr/lib/pve-helpers/qemu-server-hooks.sh ================================================ #!/bin/bash hooks=/etc/qemu-server-hooks watch=/var/run/qemu-server mkdir -p "$hooks" "$watch" pin_vcpus() { /usr/sbin/pin-vcpus.sh "$@" } while read file; do VMID=$(basename "$file" .pid) # ignore non-pid matches if [[ "$file" == "$VMID" ]]; then continue fi if [[ -e "$watch/$file" ]]; then echo "$VMID: Did start." [[ -f "$hooks/$VMID.up" ]] && "$hooks/$VMID.up" pin_vcpus "$VMID" & else echo "$VMID: Did stop." [[ -f "$hooks/$VMID.down" ]] && "$hooks/$VMID.down" fi done < <(/usr/bin/inotifywait -mq -e create,delete --format "%f" "$watch") ================================================ FILE: old-helpers/root/usr/lib/pve-helpers/resume-all-vms.sh ================================================ #!/bin/bash resume_vm() { local VMID="$1" local VMSTATUS=$(qm status "$VMID") local VMCONFIG=$(qm config "$VMID") # We need to reset only when hostpci.*: if grep -q ^hostpci <(echo "$VMCONFIG"); then if [[ "$VMSTATUS" == "status: running" ]]; then echo "$VMID: Resetting as it has 'hostpci*:' devices..." qm reset "$VMID" return 1 fi fi if [[ ! -e "/var/run/qemu-server/$VMID.suspended" ]]; then echo "$VMID: Nothing to due, due to missing: $VMID.suspended." return 0 fi rm -f "/var/run/qemu-server/$VMID.suspended" if [[ "$VMSTATUS" == "status: stopped" ]]; then echo "$VMID: Starting (stopped)..." qm start "$VMID" fi echo "$VMID: Resuming..." qm resume "$VMID" for i in $(seq 1 30); do VMSTATUS=$(qm status "$VMID") if [[ "$VMSTATUS" == "status: running" ]]; then echo "$VMID: Resumed." return 0 fi echo "$VMID: Waiting for resume: $VMSTATUS..." sleep 1s done echo "$VMID: Failed to resume: $VMSTATUS." qm reset "$VMID" return 1 } for i in /etc/pve/nodes/$(hostname)/qemu-server/*.conf; do VMID=$(basename "$i" .conf) resume_vm "$VMID" & done wait ================================================ FILE: old-helpers/root/usr/lib/pve-helpers/suspend-all-vms.sh ================================================ #!/bin/bash suspend_vm_action() { local VMID="$1" local ACTION="$2" if ! qm guest cmd "$VMID" ping; then return 1 fi echo "$VMID: Suspending ($ACTION)..." qm guest cmd "$VMID" "$ACTION" for i in $(seq 1 30); do local VMSTATUS=$(qm status "$VMID") if [[ "$VMSTATUS" == "status: suspended" ]] || [[ "$VMSTATUS" == "status: stopped" ]]; then echo "$VMID: Suspended." touch "/var/run/qemu-server/$VMID.suspended" return 0 fi echo "$VMID: Waiting for suspend: $VMSTATUS..." sleep 1s done echo "$VMID: Failed to suspend: $VMSTATUS." return 1 } suspend_vm() { local VMID="$1" local VMSTATUS=$(qm status "$VMID") local VMCONFIG=$(qm config "$VMID") if [[ "$VMSTATUS" != "status: running" ]]; then echo "$VMID: Nothing to due, due to: $VMSTATUS." return 0 fi if ! grep -q ^hostpci <(echo "$VMCONFIG"); then echo "$VMID: VM does not use PCI-passthrough" return 0 fi # if suspend_vm_action "$VMID" suspend-disk; then # return 0 # fi # echo "$VMID: VM does not support suspend-disk via Guest Agent, using shutdown." if qm shutdown "$VMID"; then touch "/var/run/qemu-server/$VMID.suspended" return 0 fi echo "$VMID: Failed to suspend or shutdown." return 1 } for i in /etc/pve/nodes/$(hostname)/qemu-server/*.conf; do VMID=$(basename "$i" .conf) suspend_vm "$VMID" & done wait ================================================ FILE: old-helpers/root/usr/sbin/pin-vcpus.sh ================================================ #!/bin/bash set -eo pipefail if [[ $# -ne 1 ]]; then echo "Usage: $0 " exit 1 fi VMID="$1" if ! VMCONFIG=$(qm config "$VMID"); then echo "$VMID: Does not exist." exit 1 fi if ! grep -q CPUPIN <(echo "$VMCONFIG"); then echo "$VMID: Does not have CPUPIN defined." exit 1 fi vm_cpu_tasks() { expect <" send "info cpus\r" expect ">" EOF } # this functions returns a list of CPU cores # in order as they have HT threads # mapping Intel cpus to Qemu emulated cpus cores() { # tail -n+2: ignore header # sort -n -k4: sort by core-index vs threads # ignore core-0: assuming that it is assigned to host with isolcpus while read CPU NODE SOCKET CORE REST; do if [[ "$CORE" == "0" ]]; then # We assume that $CORE is assigned to host (always) continue fi echo "$CPU" done < <(lscpu -e | tail -n+2 | sort -n -k4) } echo "$VMID: Checking..." for i in $(seq 1 10); do VMSTATUS=$(qm status $VMID) if [[ "$VMSTATUS" != "status: running" ]]; then echo "$VMID: VM is not running: $VMSTATUS" exit 1 fi VCPUS=($(vm_cpu_tasks)) VCPU_COUNT="${#VCPUS[@]}" if [[ $VCPU_COUNT -gt 0 ]]; then break fi echo "* No VCPUS for $VMID" sleep 3s done if [[ $VCPU_COUNT -eq 0 ]]; then exit 1 fi echo "$VMID: Detected VCPU ${#VCPUS[@]} threads..." for CPU_INDEX in "${!VCPUS[@]}"; do CPU_TASK="${VCPUS[$CPU_INDEX]}" if read CPU_INDEX; then echo "$VMID: Assigning $CPU_INDEX to $CPU_TASK..." taskset -pc "$CPU_INDEX" "$CPU_TASK" else echo "$VMID: No CPU to assign to $CPU_TASK" fi done < <(cores) ================================================ FILE: old-helpers/scripts/pve-qemu-hooks.service ================================================ [Unit] Description = PVE Qemu Server Hooks [Service] Type = simple ExecStart = /usr/lib/pve-helpers/qemu-server-hooks.sh [Install] WantedBy = multi-user.target ================================================ FILE: root/etc/systemd/system/pve-guests.service.d/manual-start.conf ================================================ [Unit] RefuseManualStart=false RefuseManualStop=false ================================================ FILE: root/lib/systemd/system-sleep/restart-vms ================================================ #!/bin/bash if [[ "$1" == "pre" ]]; then /bin/systemctl stop pve-guests.service elif [[ "$1" == "post" ]]; then /bin/systemctl start pve-guests.service else echo "invalid: $@" exit 1 fi ================================================ FILE: root/var/lib/vz/snippets/exec-cmds ================================================ #!/bin/bash VMID="$1" ACTION="$2" SLEPT="" vmpid() { cat "/var/run/qemu-server/$VMID.pid" } if_action() { if [[ "$ACTION" == "$1" ]]; then shift eval "$@" fi } sleep_once() { if [[ -z "$SLEPT" ]]; then sleep 1s SLEPT=1 fi } hostpci_ids() { grep '^hostpci[0-9]:.*0000' "/etc/pve/qemu-server/$VMID.conf" | awk '{print $2}' | awk -F, '{print $1}' } exec_pci_rescan() { echo "Running PCI rescan for $VMID..." echo 1 > /sys/bus/pci/rescan } exec_set_haltpoll() { echo "Setting haltpoll for $VMID..." echo $1 > /sys/module/kvm/parameters/halt_poll_ns } exec_assign_interrupts() { local SLEEP="30s" if [[ $1 == --sleep=* ]]; then SLEEP="${1#--sleep=}" shift fi echo "Wating $SLEEP seconds for all vfio-gpu interrupts to show up..." sleep "$SLEEP" MASK="$1" shift if [[ "$1" == "--all" ]]; then set -- $(hostpci_ids) fi for interrupt; do interrupt=$(printf '%b' "${interrupt//%/\\x}") echo "Moving $interrupt interrupts to $MASK cpu cores $VMID..." grep "$interrupt" /proc/interrupts | cut -d ":" -f 1 | while read -r i; do echo "- IRQ: $(grep "^\s*$i:" /proc/interrupts)" echo "$MASK" > /proc/irq/$i/smp_affinity_list done done } exec_pci_unbind() { if [[ "$1" == "--all" ]]; then set -- $(hostpci_ids) else set -- "0000:$1:$2.$3" fi for devid; do if [[ -e "/sys/bus/pci/devices/$devid" ]]; then echo "Running PCI unbind of '$devid' for $VMID..." echo 1 > "/sys/bus/pci/devices/$devid/remove" elif [[ -e "/sys/bus/pci/devices/$devid.0" ]]; then echo "Running PCI unbind of '$devid.0' for $VMID..." echo 1 > "/sys/bus/pci/devices/$devid.0/remove" else echo "The '$devid' not found in '/sys/bus/pci/devices'" fi done } exec_cpu_taskset() { sleep_once echo "Running taskset with $1 for $(vmpid)..." taskset -a -p -c "$1" "$(vmpid)" echo "" } exec_cpu_chrt() { sleep_once echo "Running chrt with $1:$2 for $(vmpid)..." chrt -v "--$1" -a -p "$2" "$(vmpid)" echo "" } exec_qm_conflict() { echo "Conflicting with other VM$1, shutdown just in case..." qm shutdown "$1" } exec_qm_depends() { echo "VM$1 is required, ensure that it is started..." qm start "$1" } exec_cmds() { while read CMD ARG1 ARG2 ARG3 REST; do case "$CMD" in "#pci_rescan") if_action pre-start exec_pci_rescan ;; "#cpu_taskset") if_action post-start exec_cpu_taskset "$ARG1" ;; "#set_halt_poll") if_action post-start exec_set_haltpoll "$ARG1" ;; "#assign_interrupts") if_action post-start exec_assign_interrupts "$ARG1" "$ARG2" "$ARG3" $REST ;; "#cpu_chrt") if_action post-start exec_cpu_chrt "${ARG1:-fifo}" "${ARG2:-1}" ;; "#qm_depends") if_action post-start exec_qm_depends "$ARG1" ;; "#pci_unbind") if_action post-stop exec_pci_unbind "$ARG1" "$ARG2" "$ARG3" ;; "#pci_unbind_all") if_action post-stop exec_pci_unbind_all ;; "#pci_rebind") if_action post-stop exec_pci_rescan ;; "#qm_conflict") if_action pre-start exec_qm_conflict "$ARG1" ;; "#qm_*"|"#cpu_*"|"#pci_*"|"#set_*"|"#assign_*") echo "exec-cmds: command is unknown '$CMD'" ;; esac done } echo "Running exec-cmds for $VMID on $ACTION..." exec_cmds < "/etc/pve/qemu-server/$VMID.conf" exit 0