GSoC 2012 week4&5:week3のバグを直して擬似BIOS動くようにした
これが動いていなかったので、直した。
IntelのマニュアルのVol3・section 9.1.1 "Processor State After Reset"とsection 26.3 ”CHECKING AND LOADING GUEST STATE”を読みながら、VMCSの初期値のつじつま合わせを。
int vm_setup_bios_registers(struct vmctx *vmctx, int vcpu) { int error; uint64_t rip, cr0, cr3, cr4, efer, rflags, rax, rbx, rcx, rdx; uint64_t rsi, rdi, rbp, rsp, desc_base; uint32_t desc_access, desc_limit; uint16_t gsel; #if 0 rip = 0xfff0; #endif rip = 0x0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0) goto done; rflags = 0x2; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags)) != 0) goto done; cr0 = 0x60000010; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) goto done; cr3 = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, cr3)) != 0) goto done; cr4 = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0) goto done; #if 0 desc_base = 0xffff0000; #endif desc_base = 0x0; desc_limit = 0xffff; /* PRESENT | DESC_TYPE_CODEDATA | SEG_TYPE_DATA_RW_ACCESSED */ desc_access = 0x00000093; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS, desc_base, desc_limit, desc_access); if (error) goto done; #if 0 gsel = 0xf000; #endif gsel = 0x0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, gsel)) != 0) goto done; desc_base = 0x0; desc_limit = 0xffff; /* PRESENT | DESC_TYPE_CODEDATA | SEG_TYPE_DATA_RW_ACCESSED */ desc_access = 0x00000093; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS, desc_base, desc_limit, desc_access); if (error) goto done; gsel = 0x0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, gsel)) != 0) goto done; /* same as SS */ error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS, desc_base, desc_limit, desc_access); if (error) goto done; /* same as SS */ if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, gsel)) != 0) goto done; /* same as SS */ error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES, desc_base, desc_limit, desc_access); if (error) goto done; /* same as SS */ if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, gsel)) != 0) goto done; /* same as SS */ error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS, desc_base, desc_limit, desc_access); if (error) goto done; /* same as SS */ if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, gsel)) != 0) goto done; /* same as SS */ error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS, desc_base, desc_limit, desc_access); if (error) goto done; /* same as SS */ if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, gsel)) != 0) goto done; rdx = 0xf00; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0) goto done; rax = 0x0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, rax)) != 0) goto done; rbx = 0x0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, rbx)) != 0) goto done; rcx = 0x0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, rcx)) != 0) goto done; rsi = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, rsi)) != 0) goto done; rdi = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, rdi)) != 0) goto done; rbp = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, rbp)) != 0) goto done; rsp = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, rsp)) != 0) goto done; desc_base = 0x0; desc_limit = 0xffff; /* PRESENT | DESC_TYPE_CODEDATA | SEG_TYPE_DATA_RW */ desc_access = 0x00000092; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR, desc_base, desc_limit, desc_access); if (error != 0) goto done; /* same as GDTR */ error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR, desc_base, desc_limit, desc_access); if (error != 0) goto done; desc_base = 0x0; desc_limit = 0xffff; /* PRESENT | SEG_TYPE_16BIT_BUSY_TSS */ desc_access = 0x00000083; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, desc_base, desc_limit, desc_access); if (error) goto done; gsel = 0x0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, gsel)) != 0) goto done; desc_base = 0x0; desc_limit = 0xffff; /* PRESENT | SEG_TYPE_LDT */ desc_access = 0x00000082; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base, desc_limit, desc_access); if (error) goto done; /* same as TR */ if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, gsel)) != 0) goto done; efer = 0x9; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_EFER, efer)) != 0) goto done; error = 0; done: return (error); }
ここまではユーザランドから出来たのだが、どうもカーネル側でも手を加えないとならないと気づいたので、こんな感じに。
(real modeでVMLAUNCHするには、VM_ENTRY_LOAD_EFERとVM_ENTRY_GUEST_LMAをクリアしないとならんという話。)
Index: sys/amd64/vmm/intel/vmx.c =================================================================== --- sys/amd64/vmm/intel/vmx.c (revision 238294) +++ sys/amd64/vmm/intel/vmx.c (revision 238295) @@ -1652,10 +1655,22 @@ break; case VM_CAP_UNRESTRICTED_GUEST: if (cap_unrestricted_guest) { + uint64_t ctls; + retval = 0; baseval = procbased_ctls2; flag = PROCBASED2_UNRESTRICTED_GUEST; reg = VMCS_SEC_PROC_BASED_CTLS; + error = vmcs_getreg(vmcs, + VMCS_IDENT(VMCS_ENTRY_CTLS), &ctls); + if (error == 0) { + ctls &= ~(VM_ENTRY_LOAD_EFER | VM_ENTRY_GUEST_LMA); + vmcs_setreg(vmcs, + VMCS_IDENT(VMCS_ENTRY_CTLS), ctls); + }else{ + printf("%s vmcs_getreg returns %d\n", + __func__, error); + } } break; default:
これでBHyVeを実行すると、アドレス0000:0000からリアルモードでプログラムが実行され、0000:0000に置いたVMCALL命令が無事実行されて/usr/sbin/bhyveで命令がハンドル出来た。
$ sudo bhyvebiosload -m 128 -M 256 -h /usr/bhyve-guest/ vm0 $ sudo bhyve -m 128 -M 256 -b vm0 VMCALL handled
とてもおもしろいのだが、正直セグメント周りが今でもよく分からなくて、それが原因で本来考えていたFFFF:0000からのエントリは今でもできていない(´・ω・`)
ちなみに、マトモに全diffを読みたい人は、
svn diff -r238292:238305 https://socsvn.freebsd.org/socsvn/soc2012/syuu/bhyve-bios
してください。