|
2 | 2 |
|
3 | 3 | #include "mmu.h" |
4 | 4 | #include "mmu_internal.h" |
| 5 | +#include "mmutrace.h" |
5 | 6 | #include "tdp_iter.h" |
6 | 7 | #include "tdp_mmu.h" |
7 | 8 | #include "spte.h" |
@@ -271,6 +272,10 @@ static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, |
271 | 272 | #define tdp_root_for_each_pte(_iter, _root, _start, _end) \ |
272 | 273 | for_each_tdp_pte(_iter, _root->spt, _root->role.level, _start, _end) |
273 | 274 |
|
| 275 | +#define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end) \ |
| 276 | + for_each_tdp_pte(_iter, __va(_mmu->root_hpa), \ |
| 277 | + _mmu->shadow_root_level, _start, _end) |
| 278 | + |
274 | 279 | /* |
275 | 280 | * Flush the TLB if the process should drop kvm->mmu_lock. |
276 | 281 | * Return whether the caller still needs to flush the tlb. |
@@ -355,3 +360,132 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm) |
355 | 360 | if (flush) |
356 | 361 | kvm_flush_remote_tlbs(kvm); |
357 | 362 | } |
| 363 | + |
| 364 | +/* |
| 365 | + * Installs a last-level SPTE to handle a TDP page fault. |
| 366 | + * (NPT/EPT violation/misconfiguration) |
| 367 | + */ |
| 368 | +static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, |
| 369 | + int map_writable, |
| 370 | + struct tdp_iter *iter, |
| 371 | + kvm_pfn_t pfn, bool prefault) |
| 372 | +{ |
| 373 | + u64 new_spte; |
| 374 | + int ret = 0; |
| 375 | + int make_spte_ret = 0; |
| 376 | + |
| 377 | + if (unlikely(is_noslot_pfn(pfn))) { |
| 378 | + new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); |
| 379 | + trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte); |
| 380 | + } else |
| 381 | + make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn, |
| 382 | + pfn, iter->old_spte, prefault, true, |
| 383 | + map_writable, !shadow_accessed_mask, |
| 384 | + &new_spte); |
| 385 | + |
| 386 | + if (new_spte == iter->old_spte) |
| 387 | + ret = RET_PF_SPURIOUS; |
| 388 | + else |
| 389 | + tdp_mmu_set_spte(vcpu->kvm, iter, new_spte); |
| 390 | + |
| 391 | + /* |
| 392 | + * If the page fault was caused by a write but the page is write |
| 393 | + * protected, emulation is needed. If the emulation was skipped, |
| 394 | + * the vCPU would have the same fault again. |
| 395 | + */ |
| 396 | + if (make_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) { |
| 397 | + if (write) |
| 398 | + ret = RET_PF_EMULATE; |
| 399 | + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); |
| 400 | + } |
| 401 | + |
| 402 | + /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */ |
| 403 | + if (unlikely(is_mmio_spte(new_spte))) |
| 404 | + ret = RET_PF_EMULATE; |
| 405 | + |
| 406 | + trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep); |
| 407 | + if (!prefault) |
| 408 | + vcpu->stat.pf_fixed++; |
| 409 | + |
| 410 | + return ret; |
| 411 | +} |
| 412 | + |
| 413 | +/* |
| 414 | + * Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing |
| 415 | + * page tables and SPTEs to translate the faulting guest physical address. |
| 416 | + */ |
| 417 | +int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, |
| 418 | + int map_writable, int max_level, kvm_pfn_t pfn, |
| 419 | + bool prefault) |
| 420 | +{ |
| 421 | + bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled(); |
| 422 | + bool write = error_code & PFERR_WRITE_MASK; |
| 423 | + bool exec = error_code & PFERR_FETCH_MASK; |
| 424 | + bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled; |
| 425 | + struct kvm_mmu *mmu = vcpu->arch.mmu; |
| 426 | + struct tdp_iter iter; |
| 427 | + struct kvm_mmu_memory_cache *pf_pt_cache = |
| 428 | + &vcpu->arch.mmu_shadow_page_cache; |
| 429 | + u64 *child_pt; |
| 430 | + u64 new_spte; |
| 431 | + int ret; |
| 432 | + gfn_t gfn = gpa >> PAGE_SHIFT; |
| 433 | + int level; |
| 434 | + int req_level; |
| 435 | + |
| 436 | + if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa))) |
| 437 | + return RET_PF_RETRY; |
| 438 | + if (WARN_ON(!is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))) |
| 439 | + return RET_PF_RETRY; |
| 440 | + |
| 441 | + level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn, |
| 442 | + huge_page_disallowed, &req_level); |
| 443 | + |
| 444 | + trace_kvm_mmu_spte_requested(gpa, level, pfn); |
| 445 | + tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { |
| 446 | + if (nx_huge_page_workaround_enabled) |
| 447 | + disallowed_hugepage_adjust(iter.old_spte, gfn, |
| 448 | + iter.level, &pfn, &level); |
| 449 | + |
| 450 | + if (iter.level == level) |
| 451 | + break; |
| 452 | + |
| 453 | + /* |
| 454 | + * If there is an SPTE mapping a large page at a higher level |
| 455 | + * than the target, that SPTE must be cleared and replaced |
| 456 | + * with a non-leaf SPTE. |
| 457 | + */ |
| 458 | + if (is_shadow_present_pte(iter.old_spte) && |
| 459 | + is_large_pte(iter.old_spte)) { |
| 460 | + tdp_mmu_set_spte(vcpu->kvm, &iter, 0); |
| 461 | + |
| 462 | + kvm_flush_remote_tlbs_with_address(vcpu->kvm, iter.gfn, |
| 463 | + KVM_PAGES_PER_HPAGE(iter.level)); |
| 464 | + |
| 465 | + /* |
| 466 | + * The iter must explicitly re-read the spte here |
| 467 | + * because the new value informs the !present |
| 468 | + * path below. |
| 469 | + */ |
| 470 | + iter.old_spte = READ_ONCE(*iter.sptep); |
| 471 | + } |
| 472 | + |
| 473 | + if (!is_shadow_present_pte(iter.old_spte)) { |
| 474 | + child_pt = kvm_mmu_memory_cache_alloc(pf_pt_cache); |
| 475 | + clear_page(child_pt); |
| 476 | + new_spte = make_nonleaf_spte(child_pt, |
| 477 | + !shadow_accessed_mask); |
| 478 | + |
| 479 | + trace_kvm_mmu_get_page(sp, true); |
| 480 | + tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte); |
| 481 | + } |
| 482 | + } |
| 483 | + |
| 484 | + if (WARN_ON(iter.level != level)) |
| 485 | + return RET_PF_RETRY; |
| 486 | + |
| 487 | + ret = tdp_mmu_map_handle_target_level(vcpu, write, map_writable, &iter, |
| 488 | + pfn, prefault); |
| 489 | + |
| 490 | + return ret; |
| 491 | +} |
0 commit comments