其他分享
首页 > 其他分享> > 虚机运行时重启主机容易panic

虚机运行时重启主机容易panic

作者:互联网

一:问题描述

最近有遇到重启主机时产生panic的情况,异常的堆栈如下:

[2847164.482478] kvm: exiting hardware virtualization
[2847164.482504] kvm: exiting hardware virtualization
[2847164.482528] ------------[ cut here ]------------
[2847164.482530] kernel BUG at /root/kvm/source/x86/x86.c:388!
[2847164.482554] invalid opcode: 0000 [#1] SMP NOPTI
[2847164.482569] CPU: 37 PID: 2538414 Comm: CPU 22/KVM Kdump: loaded Tainted: G           OE     4.19.117.bsk.5-amd64 #4.19.117.bsk.5
[2847164.482598] Hardware name: FOXCONN R-5111/GROOT, BIOS IC1B123F 07/16/2020
[2847164.482635] RIP: 0010:kvm_spurious_fault+0x5/0x10 [kvm_intel_0]
[2847164.482652] Code: 41 5c 41 5d 41 5e c3 5b b8 01 00 00 00 5d 41 5c 41 5d 41 5e c3 b8 00 04 00 00 e9 7c ff ff ff 66 0f 1f 44 00 00 0f 1f 44 00 00 <0f> 0b 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 45 31 c0 31 c9 31
[2847164.482695] RSP: 0018:ffffbcf877c77cf0 EFLAGS: 00010046
[2847164.482710] RAX: 0000000000000000 RBX: ffff9546d35d0000 RCX: ffff9546cb23b000
[2847164.482728] RDX: 0000000000000810 RSI: 0000000000000000 RDI: 00000000000000ef
[2847164.482746] RBP: ffffbcf877c77dd0 R08: 0000000000000040 R09: 0000000000005e80
[2847164.482763] R10: 00000000000044a8 R11: ffff957429769880 R12: 0000000000000000
[2847164.482781] R13: ffff9546d35d44a8 R14: ffffbcf8792c2bb8 R15: 0000000000000000
[2847164.482798] FS:  00007f8205ffb700(0000) GS:ffff95753f540000(0000) knlGS:0000000000000000
[2847164.482818] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[2847164.482833] CR2: 000000c00040831c CR3: 0000003171efc004 CR4: 00000000007606e0
[2847164.482851] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[2847164.482869] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[2847164.482887] PKRU: 55555554
[2847164.482895] Call Trace:
[2847164.482924]  vmx_set_rvi+0x1a/0x70 [kvm_intel_0]
[2847164.482949]  vmx_sync_pir_to_irr+0xb0/0x100 [kvm_intel_0]
[2847164.482977]  kvm_arch_vcpu_ioctl_run+0x12e5/0x1ab0 [kvm_intel_0]
[2847164.483002]  ? kvm_vcpu_ioctl+0x38b/0x5d0 [kvm_intel_0]
[2847164.483024]  kvm_vcpu_ioctl+0x38b/0x5d0 [kvm_intel_0]
[2847164.483042]  do_vfs_ioctl+0x9e/0x610
[2847164.483064]  ? __audit_syscall_entry+0x103/0x130
[2847164.483081]  ? syscall_trace_enter+0x1ae/0x2c0
[2847164.483094]  ksys_ioctl+0x70/0x80
[2847164.483106]  ? exit_to_usermode_loop+0xd3/0xf0
[2847164.483124]  __x64_sys_ioctl+0x16/0x20
[2847164.483138]  do_syscall_64+0x5d/0x110
[2847164.483150]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[2847164.483167] RIP: 0033:0x7fda91a10017

二:分析

异常时执行的汇编:

0xffffffffc2c7ad30 <vmcs_load>: nopl   0x0(%rax,%rax,1) [FTRACE NOP]
0xffffffffc2c7ad35 <vmcs_load+5>:       sub    $0x10,%rsp
0xffffffffc2c7ad39 <vmcs_load+9>:       mov    %gs:0x28,%rax
0xffffffffc2c7ad42 <vmcs_load+18>:      mov    %rax,0x8(%rsp)
0xffffffffc2c7ad47 <vmcs_load+23>:      xor    %eax,%eax
0xffffffffc2c7ad49 <vmcs_load+25>:      mov    $0x80000000,%eax
0xffffffffc2c7ad4e <vmcs_load+30>:      add    %rdi,%rax
0xffffffffc2c7ad51 <vmcs_load+33>:      jb     0xffffffffc2c7adc9 <vmcs_load+153>
0xffffffffc2c7ad53 <vmcs_load+35>:      mov    $0xffffffff80000000,%rdx
0xffffffffc2c7ad5a <vmcs_load+42>:      sub    -0x34db5c69(%rip),%rdx        # 0xffffffff8dec50f8
0xffffffffc2c7ad61 <vmcs_load+49>:      add    %rdx,%rax
0xffffffffc2c7ad64 <vmcs_load+52>:      mov    %rax,(%rsp)
0xffffffffc2c7ad68 <vmcs_load+56>:      nopl   0x0(%rax,%rax,1)
0xffffffffc2c7ad6d <vmcs_load+61>:      mov    %rsp,%rax
0xffffffffc2c7ad70 <vmcs_load+64>:      vmptrld (%rax) 执行异常的指令

执行指令异常
0xffffffffc2c45f00 <kvm_spurious_fault>:        nopl   0x0(%rax,%rax,1) [FTRACE NOP]
0xffffffffc2c45f05 <kvm_spurious_fault+5>:      ud2   该指令panic
static void vmcs_load(struct vmcs *vmcs)
{
asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) CC_SET(na)
                      : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
                      : "memory");
}

下面是__ex实现的宏定义:

执行失败后会调用kvm_spurious_fault 到panic。

/*
Hardware virtualization extension instructions may fault if a
reboot turns off virtualization while processes are running.
Usually after catching the fault we just panic; during reboot
instead the instruction is ignored.
 */
 #define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) 
 "666: \n\t" 
 insn "\n\t" 
 "jmp 668f \n\t"
 "667: \n\t"
 "call kvm_spurious_fault \n\t"
 "668: \n\t" 
 ".pushsection .fixup, \"ax\" \n\t" 
 "700: \n\t" <br /> cleanup_insn "\n\t"
 "cmpb $0, kvm_rebooting\n\t" 
 "je 667b \n\t" 
 "jmp 668b \n\t" 
 ".popsection \n\t"  
 _ASM_EXTABLE(666b, 700b)
#define __kvm_handle_fault_on_reboot(insn) \
 ____kvm_handle_fault_on_reboot(insn, "")

kvm_rebooting在如下函数中设置:

static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
                      void *v)
{
        /*
         * Some (well, at least mine) BIOSes hang on reboot if
         * in vmx root mode.
         *
         * And Intel TXT required VMX off for all cpu when system shutdown.
         */
        pr_info("kvm: exiting hardware virtualization\n");该日志在下图有打印
        kvm_rebooting = true;
        on_each_cpu(hardware_disable_nolock, NULL, 1);
        return NOTIFY_OK;
}

三:结论

vmptrld属于虚拟化扩展指令,如果程序还在运行,reboot进程关闭了虚拟化,此时虚拟化相关的执行会失败。这时内核会产生panic。软件执行这些命令时会先判断kvm_rebooting 是否为1,如果是就不执行虚拟化指令,直接panic。解释如下:

/*

*/

panic前有日志线上硬件虚拟机已经退出。所以panic执行符合逻辑,这更像是内核的一种选择,不属于问题。

附指令说明:

 

 

标签:00,主机,kvm,reboot,虚机,fault,panic,rax
来源: https://blog.csdn.net/py199122/article/details/120484836