From 3a4e0144744aca1d961864fb76ae84cfd70c8f7d Mon Sep 17 00:00:00 2001 From: Fiona Ebner Date: Mon, 29 Sep 2025 14:56:50 +0200 Subject: [PATCH] cfg2cmd: turn off hpet for Linux VMs running at least kernel 2.6 and machine type >= 10.1 Recent enough Linux versions already use 'kvm-clock' rather than 'hpet' as the default clock source [0][1]. Changes in QEMU [2] led to slightly increased CPU usage when using hpet [3][4]: > the timer must be kept running even if not enabled, in > order to set the ISR flag, so writes to HPET_TN_CFG must > not call hpet_del_timer() Upstream suggested to not use hpet if possible [5][6]: > That said, if you can disable the HPET timer by default without > problems with e.g. live migration I strongly suggest you do. And in > the mean time you can also revert these patches, they were actually > reported as bugs but it's not clear what guest OS was affected. > No, the bug reports are really just for corner cases and there are no > huge issues. However, both Linux and Windows give the HPET a > relatively high priority that it probably does not deserve. :) There were more changes in QEMU, so it would require more reverts. Thus, disable the timer. People having a Linux VM pinned to an older machine version or using other os types will see the increased usage again if installing the new QEMU 10.1 binary, but that seems like a fair trade-off for reducing CPU load for everybody else and being able to move forward. The is_linux() helper does not include the 'l24' os type by default, because all except one existing checks as well as the newly introduced check are specifically for 'l26' and most future features are not worth considering for 'l24' either. Users of Linux 2.6.x before v2.6.26 might need to pin the machine version or manually enable hpet if they want to continue using HPET. Otherwise, there is acpi_pm since v2.6.18 that should be automatically picked. [0]: /sys/devices/system/clocksource/clocksource0/current_clocksource [1]: Kernel commit 790c73f6289a ("x86: KVM guest: paravirtualized clocksource") in v2.6.26+ [2]: QEMU commit f0ccf77078 ("hpet: fix and cleanup persistence of interrupt status") [3]: https://lore.kernel.org/qemu-devel/8183674f-a9cc-4727-bb52-fe3d3e44804b@proxmox.com/ [4]: https://forum.proxmox.com/threads/161849/post-756793 [5]: https://lore.kernel.org/qemu-devel/CABgObfaKJ5NFVKmYLFmu4C0iZZLJJtcWksLCzyA0tBoz0koZ4A@mail.gmail.com/ [6]: https://lore.kernel.org/qemu-devel/CABgObfYnOzg=BPeG5BjSmGEV_Q0pR7xGg6L3XNQCONtU_GiuGA@mail.gmail.com/ Signed-off-by: Fiona Ebner Link: https://lore.proxmox.com/20250929125852.102343-6-f.ebner@proxmox.com --- src/PVE/QemuServer.pm | 2 +- src/PVE/QemuServer/Cfg2Cmd.pm | 30 ++++++++++++++++++++++++++--- src/PVE/QemuServer/Cfg2Cmd/Timer.pm | 2 ++ 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/PVE/QemuServer.pm b/src/PVE/QemuServer.pm index f1025cfa..45daa06c 100644 --- a/src/PVE/QemuServer.pm +++ b/src/PVE/QemuServer.pm @@ -3382,7 +3382,7 @@ sub config_to_command { } # For now, handles only specific parts, but the final goal is to cover everything. - my $cfg2cmd = PVE::QemuServer::Cfg2Cmd->new($conf, $defaults); + my $cfg2cmd = PVE::QemuServer::Cfg2Cmd->new($conf, $defaults, $version_guard); my $generated = $cfg2cmd->generate(); push $cmd->@*, '-global', $_ for ($generated->global_flags() // [])->@*; push $machineFlags->@*, ($generated->machine_flags() // [])->@*; diff --git a/src/PVE/QemuServer/Cfg2Cmd.pm b/src/PVE/QemuServer/Cfg2Cmd.pm index 6b26ab23..c7ee0165 100644 --- a/src/PVE/QemuServer/Cfg2Cmd.pm +++ b/src/PVE/QemuServer/Cfg2Cmd.pm @@ -7,15 +7,16 @@ use PVE::QemuServer::Cfg2Cmd::Timer; use PVE::QemuServer::Helpers; sub new { - my ($class, $conf, $defaults) = @_; + my ($class, $conf, $defaults, $version_guard) = @_; my $self = bless { conf => $conf, defaults => $defaults, + 'version-guard' => $version_guard, }, $class; - my $ostype = $self->get_prop('ostype'); - $self->{'windows-version'} = PVE::QemuServer::Helpers::windows_version($ostype); + $self->{ostype} = $self->get_prop('ostype'); + $self->{'windows-version'} = PVE::QemuServer::Helpers::windows_version($self->{ostype}); return $self; } @@ -74,12 +75,35 @@ sub rtc_flags { return $self->{'rtc-flags'}; } +=head3 is_linux + + if ($self->is_linux()) { + do_something_for_linux_vms(); + } + +Check if the virtual machine is configured for running Linux. Does not include the C os type +by default. Specify C<$include_l24> if that is desired. + +=cut + +sub is_linux { + my ($self, $include_l24) = @_; + + return $self->{ostype} eq 'l26' || ($include_l24 && $self->{ostype} eq 'l24'); +} + sub windows_version { my ($self) = @_; return $self->{'windows-version'}; } +sub version_guard { + my ($self, $major, $minor, $pve) = @_; + + $self->{'version-guard'}->($major, $minor, $pve); +} + sub generate { my ($self) = @_; diff --git a/src/PVE/QemuServer/Cfg2Cmd/Timer.pm b/src/PVE/QemuServer/Cfg2Cmd/Timer.pm index d4b16af0..452c15b2 100644 --- a/src/PVE/QemuServer/Cfg2Cmd/Timer.pm +++ b/src/PVE/QemuServer/Cfg2Cmd/Timer.pm @@ -21,6 +21,8 @@ sub generate { if ($cfg2cmd->windows_version() >= 6) { $cfg2cmd->add_global_flag('kvm-pit.lost_tick_policy=discard'); $cfg2cmd->add_machine_flag('hpet=off'); + } elsif ($cfg2cmd->is_linux() && $cfg2cmd->version_guard(10, 1, 0)) { + $cfg2cmd->add_machine_flag('hpet=off'); } $cfg2cmd->add_rtc_flag('driftfix=slew') if $time_drift_fix;