@@ -28,6 +28,8 @@ LIBKVM_x86 += lib/x86/sev.c
LIBKVM_x86 += lib/x86/svm.c
LIBKVM_x86 += lib/x86/ucall.c
LIBKVM_x86 += lib/x86/vmx.c
+LIBKVM_x86 += lib/x86/tdx/tdx_util.c
+LIBKVM_x86 += lib/x86/tdx/td_boot.S
LIBKVM_arm64 += lib/arm64/gic.c
LIBKVM_arm64 += lib/arm64/gic_v3.c
@@ -79,6 +79,7 @@ enum kvm_mem_region_type {
MEM_REGION_DATA,
MEM_REGION_PT,
MEM_REGION_TEST_DATA,
+ MEM_REGION_TDX_BOOT_PARAMS,
NR_MEM_REGIONS,
};
@@ -985,6 +986,9 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
+uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
+ uint32_t nr_runnable_vcpus,
+ uint64_t extra_mem_pages);
static inline unsigned int
vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
{
@@ -1150,6 +1154,8 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm)
*/
void kvm_selftest_arch_init(void);
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+
void kvm_arch_vm_post_create(struct kvm_vm *vm);
bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
@@ -19,6 +19,7 @@ struct kvm_vm_arch {
uint64_t s_bit;
int sev_fd;
bool is_pt_protected;
+ bool has_protected_regs;
};
static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch)
new file mode 100644
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_TDX_TD_BOOT_H
+#define SELFTEST_TDX_TD_BOOT_H
+
+#include <stdint.h>
+
+#include "tdx/td_boot_asm.h"
+
+/*
+ * Layout for boot section (not to scale)
+ *
+ * GPA
+ * _________________________________ 0x1_0000_0000 (4GB)
+ * | Boot code trampoline |
+ * |___________________________|____ 0x0_ffff_fff0: Reset vector (16B below 4GB)
+ * | Boot code |
+ * |___________________________|____ td_boot will be copied here, so that the
+ * | | jmp to td_boot is exactly at the reset vector
+ * | Empty space |
+ * | |
+ * |───────────────────────────|
+ * | |
+ * | |
+ * | Boot parameters |
+ * | |
+ * | |
+ * |___________________________|____ 0x0_ffff_0000: TD_BOOT_PARAMETERS_GPA
+ */
+#define FOUR_GIGABYTES_GPA (4ULL << 30)
+
+/*
+ * The exact memory layout for LGDT or LIDT instructions.
+ */
+struct __packed td_boot_parameters_dtr {
+ uint16_t limit;
+ uint32_t base;
+};
+
+/*
+ * The exact layout in memory required for a ljmp, including the selector for
+ * changing code segment.
+ */
+struct __packed td_boot_parameters_ljmp_target {
+ uint32_t eip_gva;
+ uint16_t code64_sel;
+};
+
+/*
+ * Allows each vCPU to be initialized with different eip and esp.
+ */
+struct __packed td_per_vcpu_parameters {
+ uint32_t esp_gva;
+ struct td_boot_parameters_ljmp_target ljmp_target;
+};
+
+/*
+ * Boot parameters for the TD.
+ *
+ * Unlike a regular VM, KVM cannot set registers such as esp, eip, etc
+ * before boot, so to run selftests, these registers' values have to be
+ * initialized by the TD.
+ *
+ * This struct is loaded in TD private memory at TD_BOOT_PARAMETERS_GPA.
+ *
+ * The TD boot code will read off parameters from this struct and set up the
+ * vCPU for executing selftests.
+ */
+struct __packed td_boot_parameters {
+ uint32_t cr0;
+ uint32_t cr3;
+ uint32_t cr4;
+ struct td_boot_parameters_dtr gdtr;
+ struct td_boot_parameters_dtr idtr;
+ struct td_per_vcpu_parameters per_vcpu[];
+};
+
+void td_boot(void);
+void reset_vector(void);
+void td_boot_code_end(void);
+
+#define TD_BOOT_CODE_SIZE (td_boot_code_end - td_boot)
+
+#endif /* SELFTEST_TDX_TD_BOOT_H */
new file mode 100644
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_TDX_TD_BOOT_ASM_H
+#define SELFTEST_TDX_TD_BOOT_ASM_H
+
+/*
+ * GPA where TD boot parameters will be loaded.
+ *
+ * TD_BOOT_PARAMETERS_GPA is arbitrarily chosen to
+ *
+ * + be within the 4GB address space
+ * + provide enough contiguous memory for the struct td_boot_parameters such
+ * that there is one struct td_per_vcpu_parameters for KVM_MAX_VCPUS
+ */
+#define TD_BOOT_PARAMETERS_GPA 0xffff0000
+
+#endif // SELFTEST_TDX_TD_BOOT_ASM_H
new file mode 100644
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_TDX_KVM_UTIL_H
+#define SELFTESTS_TDX_KVM_UTIL_H
+
+#include <stdint.h>
+
+#include "kvm_util.h"
+
+void tdx_filter_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid_data);
+void __tdx_mask_cpuid_features(struct kvm_cpuid_entry2 *entry);
+
+struct kvm_vcpu *td_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, void *guest_code);
+
+struct kvm_vm *td_create(void);
+void td_initialize(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
+ uint64_t attributes);
+void td_finalize(struct kvm_vm *vm);
+
+#endif // SELFTESTS_TDX_KVM_UTIL_H
@@ -373,9 +373,9 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
return vm;
}
-static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
- uint32_t nr_runnable_vcpus,
- uint64_t extra_mem_pages)
+uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
+ uint32_t nr_runnable_vcpus,
+ uint64_t extra_mem_pages)
{
uint64_t page_size = vm_guest_mode_params[mode].page_size;
uint64_t nr_pages;
@@ -590,7 +590,7 @@ void sync_exception_handlers_to_guest(struct kvm_vm *vm)
*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
}
-static void vm_init_descriptor_tables(struct kvm_vm *vm)
+void vm_init_descriptor_tables(struct kvm_vm *vm)
{
extern void *idt_handlers;
struct kvm_segment seg;
@@ -697,16 +697,19 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
vcpu = __vm_vcpu_add(vm, vcpu_id);
vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
- vcpu_init_sregs(vm, vcpu);
- vcpu_init_xcrs(vm, vcpu);
vcpu->initial_stack_addr = stack_vaddr;
- /* Setup guest general purpose registers */
- vcpu_regs_get(vcpu, ®s);
- regs.rflags = regs.rflags | 0x2;
- regs.rsp = stack_vaddr;
- vcpu_regs_set(vcpu, ®s);
+ if (!vm->arch.has_protected_regs) {
+ vcpu_init_sregs(vm, vcpu);
+ vcpu_init_xcrs(vm, vcpu);
+
+ /* Setup guest general purpose registers */
+ vcpu_regs_get(vcpu, ®s);
+ regs.rflags = regs.rflags | 0x2;
+ regs.rsp = stack_vaddr;
+ vcpu_regs_set(vcpu, ®s);
+ }
/* Setup the MP state */
mp_state.mp_state = 0;
new file mode 100644
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include "tdx/td_boot_asm.h"
+
+/* Offsets for reading struct td_boot_parameters. */
+#define TD_BOOT_PARAMETERS_CR0 0
+#define TD_BOOT_PARAMETERS_CR3 4
+#define TD_BOOT_PARAMETERS_CR4 8
+#define TD_BOOT_PARAMETERS_GDT 12
+#define TD_BOOT_PARAMETERS_IDT 18
+#define TD_BOOT_PARAMETERS_PER_VCPU 24
+
+/* Offsets for reading struct td_per_vcpu_parameters. */
+#define TD_PER_VCPU_PARAMETERS_ESP_GVA 0
+#define TD_PER_VCPU_PARAMETERS_LJMP_TARGET 4
+
+#define SIZEOF_TD_PER_VCPU_PARAMETERS 10
+
+.code32
+
+.globl td_boot
+td_boot:
+ /* In this procedure, edi is used as a temporary register. */
+ cli
+
+ /* Paging is off. */
+
+ movl $TD_BOOT_PARAMETERS_GPA, %ebx
+
+ /*
+ * Find the address of struct td_per_vcpu_parameters for this
+ * vCPU based on esi (TDX spec: initialized with vCPU id). Put
+ * struct address into register for indirect addressing.
+ */
+ movl $SIZEOF_TD_PER_VCPU_PARAMETERS, %eax
+ mul %esi
+ leal TD_BOOT_PARAMETERS_PER_VCPU(%ebx), %edi
+ addl %edi, %eax
+
+ /* Setup stack. */
+ movl TD_PER_VCPU_PARAMETERS_ESP_GVA(%eax), %esp
+
+ /* Setup GDT. */
+ leal TD_BOOT_PARAMETERS_GDT(%ebx), %edi
+ lgdt (%edi)
+
+ /* Setup IDT. */
+ leal TD_BOOT_PARAMETERS_IDT(%ebx), %edi
+ lidt (%edi)
+
+ /*
+ * Set up control registers (There are no instructions to mov from
+ * memory to control registers, hence use ebx as a scratch register).
+ */
+ movl TD_BOOT_PARAMETERS_CR4(%ebx), %edi
+ movl %edi, %cr4
+ movl TD_BOOT_PARAMETERS_CR3(%ebx), %edi
+ movl %edi, %cr3
+ movl TD_BOOT_PARAMETERS_CR0(%ebx), %edi
+ movl %edi, %cr0
+
+ /* Paging is on after setting the most significant bit on cr0. */
+
+ /*
+ * Jump to selftest guest code. Far jumps read <segment
+ * selector:new eip> from <addr+4:addr>. This location has
+ * already been set up in boot parameters, and boot parameters can
+ * be read because boot code and boot parameters are loaded so
+ * that GVA and GPA are mapped 1:1.
+ */
+ ljmp *TD_PER_VCPU_PARAMETERS_LJMP_TARGET(%eax)
+
+.globl reset_vector
+reset_vector:
+ jmp td_boot
+ /*
+ * Pad reset_vector to its full size of 16 bytes so that this
+ * can be loaded with the end of reset_vector aligned to GPA=4G.
+ */
+ int3
+ int3
+ int3
+ int3
+ int3
+ int3
+ int3
+ int3
+ int3
+ int3
+ int3
+ int3
+ int3
+ int3
+
+/* Leave marker so size of td_boot code can be computed. */
+.globl td_boot_code_end
+td_boot_code_end:
+
+/* Disable executable stack. */
+.section .note.GNU-stack,"",%progbits
new file mode 100644
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/kvm.h>
+#include <errno.h>
+#include <linux/kvm.h>
+#include <stdint.h>
+#include <sys/ioctl.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "tdx/td_boot.h"
+#include "test_util.h"
+
+uint64_t tdx_s_bit;
+
+/*
+ * TDX ioctls
+ */
+
+static char *tdx_cmd_str[] = {
+ "KVM_TDX_CAPABILITIES",
+ "KVM_TDX_INIT_VM",
+ "KVM_TDX_INIT_VCPU",
+ "KVM_TDX_INIT_MEM_REGION",
+ "KVM_TDX_FINALIZE_VM",
+ "KVM_TDX_GET_CPUID"
+};
+
+#define TDX_MAX_CMD_STR (ARRAY_SIZE(tdx_cmd_str))
+
+static int _tdx_ioctl(int fd, int ioctl_no, uint32_t flags, void *data)
+{
+ struct kvm_tdx_cmd tdx_cmd;
+
+ TEST_ASSERT(ioctl_no < TDX_MAX_CMD_STR, "Unknown TDX CMD : %d\n",
+ ioctl_no);
+
+ memset(&tdx_cmd, 0x0, sizeof(tdx_cmd));
+ tdx_cmd.id = ioctl_no;
+ tdx_cmd.flags = flags;
+ tdx_cmd.data = (uint64_t)data;
+
+ return ioctl(fd, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
+}
+
+static void tdx_ioctl(int fd, int ioctl_no, uint32_t flags, void *data)
+{
+ int r;
+
+ r = _tdx_ioctl(fd, ioctl_no, flags, data);
+ TEST_ASSERT(r == 0, "%s failed: %d %d", tdx_cmd_str[ioctl_no], r,
+ errno);
+}
+
+static struct kvm_tdx_capabilities *tdx_read_capabilities(struct kvm_vm *vm)
+{
+ struct kvm_tdx_capabilities *tdx_cap = NULL;
+ int nr_cpuid_configs = 4;
+ int rc = -1;
+ int i;
+
+ do {
+ nr_cpuid_configs *= 2;
+
+ tdx_cap = realloc(tdx_cap, sizeof(*tdx_cap) +
+ sizeof(tdx_cap->cpuid) +
+ (sizeof(struct kvm_cpuid_entry2) * nr_cpuid_configs));
+ TEST_ASSERT(tdx_cap,
+ "Could not allocate memory for tdx capability nr_cpuid_configs %d\n",
+ nr_cpuid_configs);
+
+ tdx_cap->cpuid.nent = nr_cpuid_configs;
+ rc = _tdx_ioctl(vm->fd, KVM_TDX_CAPABILITIES, 0, tdx_cap);
+ } while (rc < 0 && errno == E2BIG);
+
+ TEST_ASSERT(rc == 0, "KVM_TDX_CAPABILITIES failed: %d %d",
+ rc, errno);
+
+ pr_debug("tdx_cap: supported_attrs: 0x%016llx\n"
+ "tdx_cap: supported_xfam 0x%016llx\n",
+ tdx_cap->supported_attrs, tdx_cap->supported_xfam);
+
+ for (i = 0; i < tdx_cap->cpuid.nent; i++) {
+ const struct kvm_cpuid_entry2 *config = &tdx_cap->cpuid.entries[i];
+
+ pr_debug("cpuid config[%d]: leaf 0x%x sub_leaf 0x%x eax 0x%08x ebx 0x%08x ecx 0x%08x edx 0x%08x\n",
+ i, config->function, config->index,
+ config->eax, config->ebx, config->ecx, config->edx);
+ }
+
+ return tdx_cap;
+}
+
+static struct kvm_cpuid_entry2 *tdx_find_cpuid_config(struct kvm_tdx_capabilities *cap,
+ uint32_t leaf, uint32_t sub_leaf)
+{
+ struct kvm_cpuid_entry2 *config;
+ uint32_t i;
+
+ for (i = 0; i < cap->cpuid.nent; i++) {
+ config = &cap->cpuid.entries[i];
+
+ if (config->function == leaf && config->index == sub_leaf)
+ return config;
+ }
+
+ return NULL;
+}
+
+#define XFEATURE_MASK_CET (XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL)
+
+static void tdx_apply_cpuid_restrictions(struct kvm_cpuid2 *cpuid_data)
+{
+ for (int i = 0; i < cpuid_data->nent; i++) {
+ struct kvm_cpuid_entry2 *e = &cpuid_data->entries[i];
+
+ if (e->function == 0xd && e->index == 0) {
+ /*
+ * TDX module requires both XTILE_{CFG, DATA} to be set.
+ * Both bits are required for AMX to be functional.
+ */
+ if ((e->eax & XFEATURE_MASK_XTILE) !=
+ XFEATURE_MASK_XTILE) {
+ e->eax &= ~XFEATURE_MASK_XTILE;
+ }
+ }
+ if (e->function == 0xd && e->index == 1) {
+ /*
+ * TDX doesn't support LBR yet.
+ * Disable bits from the XCR0 register.
+ */
+ e->ecx &= ~XFEATURE_MASK_LBR;
+ /*
+ * TDX modules requires both CET_{U, S} to be set even
+ * if only one is supported.
+ */
+ if (e->ecx & XFEATURE_MASK_CET)
+ e->ecx |= XFEATURE_MASK_CET;
+ }
+ }
+}
+
+#define KVM_MAX_CPUID_ENTRIES 256
+
+#define CPUID_EXT_VMX BIT(5)
+#define CPUID_EXT_SMX BIT(6)
+#define CPUID_PSE36 BIT(17)
+#define CPUID_7_0_EBX_TSC_ADJUST BIT(1)
+#define CPUID_7_0_EBX_SGX BIT(2)
+#define CPUID_7_0_EBX_INTEL_PT BIT(25)
+#define CPUID_7_0_ECX_SGX_LC BIT(30)
+#define CPUID_APM_INVTSC BIT(8)
+#define CPUID_8000_0008_EBX_WBNOINVD BIT(9)
+#define CPUID_EXT_PDCM BIT(15)
+
+#define TDX_SUPPORTED_KVM_FEATURES ((1U << KVM_FEATURE_NOP_IO_DELAY) | \
+ (1U << KVM_FEATURE_PV_UNHALT) | \
+ (1U << KVM_FEATURE_PV_TLB_FLUSH) | \
+ (1U << KVM_FEATURE_PV_SEND_IPI) | \
+ (1U << KVM_FEATURE_POLL_CONTROL) | \
+ (1U << KVM_FEATURE_PV_SCHED_YIELD) | \
+ (1U << KVM_FEATURE_MSI_EXT_DEST_ID))
+
+void __tdx_mask_cpuid_features(struct kvm_cpuid_entry2 *entry)
+{
+ /*
+ * Only entries with sub-leaf zero need to be masked, but some of these
+ * leaves have other sub-leaves defined. Bail on any non-zero sub-leaf,
+ * so they don't get unintentionally modified.
+ */
+ if (entry->index)
+ return;
+
+ switch (entry->function) {
+ case 0x1:
+ entry->ecx &= ~(CPUID_EXT_VMX | CPUID_EXT_SMX);
+ entry->edx &= ~CPUID_PSE36;
+ break;
+ case 0x7:
+ entry->ebx &= ~(CPUID_7_0_EBX_TSC_ADJUST | CPUID_7_0_EBX_SGX);
+ entry->ebx &= ~CPUID_7_0_EBX_INTEL_PT;
+ entry->ecx &= ~CPUID_7_0_ECX_SGX_LC;
+ break;
+ case 0x40000001:
+ entry->eax &= TDX_SUPPORTED_KVM_FEATURES;
+ break;
+ case 0x80000007:
+ entry->edx |= CPUID_APM_INVTSC;
+ break;
+ case 0x80000008:
+ entry->ebx &= CPUID_8000_0008_EBX_WBNOINVD;
+ break;
+ default:
+ break;
+ }
+}
+
+static void tdx_mask_cpuid_features(struct kvm_cpuid2 *cpuid_data)
+{
+ for (int i = 0; i < cpuid_data->nent; i++)
+ __tdx_mask_cpuid_features(&cpuid_data->entries[i]);
+}
+
+void tdx_filter_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid_data)
+{
+ struct kvm_tdx_capabilities *tdx_cap;
+ struct kvm_cpuid_entry2 *config;
+ struct kvm_cpuid_entry2 *e;
+ int i;
+
+ tdx_cap = tdx_read_capabilities(vm);
+
+ i = 0;
+ while (i < cpuid_data->nent) {
+ e = cpuid_data->entries + i;
+ config = tdx_find_cpuid_config(tdx_cap, e->function, e->index);
+
+ if (!config) {
+ int left = cpuid_data->nent - i - 1;
+
+ if (left > 0)
+ memmove(cpuid_data->entries + i,
+ cpuid_data->entries + i + 1,
+ sizeof(*cpuid_data->entries) * left);
+ cpuid_data->nent--;
+ continue;
+ }
+
+ e->eax &= config->eax;
+ e->ebx &= config->ebx;
+ e->ecx &= config->ecx;
+ e->edx &= config->edx;
+
+ i++;
+ }
+
+ free(tdx_cap);
+}
+
+static void tdx_td_init(struct kvm_vm *vm, uint64_t attributes)
+{
+ struct kvm_tdx_init_vm *init_vm;
+ const struct kvm_cpuid2 *tmp;
+ struct kvm_cpuid2 *cpuid;
+
+ tmp = kvm_get_supported_cpuid();
+
+ cpuid = allocate_kvm_cpuid2(KVM_MAX_CPUID_ENTRIES);
+ memcpy(cpuid, tmp, kvm_cpuid2_size(tmp->nent));
+ tdx_mask_cpuid_features(cpuid);
+
+ init_vm = calloc(1, sizeof(*init_vm) +
+ sizeof(init_vm->cpuid.entries[0]) * cpuid->nent);
+ TEST_ASSERT(init_vm, "vm allocation failed");
+
+ memcpy(&init_vm->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
+ free(cpuid);
+
+ init_vm->attributes = attributes;
+
+ tdx_apply_cpuid_restrictions(&init_vm->cpuid);
+ tdx_filter_cpuid(vm, &init_vm->cpuid);
+
+ tdx_ioctl(vm->fd, KVM_TDX_INIT_VM, 0, init_vm);
+ free(init_vm);
+}
+
+static void tdx_td_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid2 *cpuid;
+
+ cpuid = allocate_kvm_cpuid2(KVM_MAX_CPUID_ENTRIES);
+ tdx_ioctl(vcpu->fd, KVM_TDX_GET_CPUID, 0, cpuid);
+ vcpu_init_cpuid(vcpu, cpuid);
+ free(cpuid);
+ tdx_ioctl(vcpu->fd, KVM_TDX_INIT_VCPU, 0, NULL);
+ /*
+ * Refresh CPUID to get KVM's "runtime" updates which are done by
+ * KVM_TDX_INIT_VCPU.
+ */
+ vcpu_get_cpuid(vcpu);
+}
+
+static void tdx_init_mem_region(struct kvm_vm *vm, void *source_pages,
+ uint64_t gpa, uint64_t size)
+{
+ uint32_t metadata = KVM_TDX_MEASURE_MEMORY_REGION;
+ struct kvm_tdx_init_mem_region mem_region = {
+ .source_addr = (uint64_t)source_pages,
+ .gpa = gpa,
+ .nr_pages = size / PAGE_SIZE,
+ };
+ struct kvm_vcpu *vcpu;
+
+ vcpu = list_first_entry_or_null(&vm->vcpus, struct kvm_vcpu, list);
+
+ TEST_ASSERT((mem_region.nr_pages > 0) &&
+ ((mem_region.nr_pages * PAGE_SIZE) == size),
+ "Cannot add partial pages to the guest memory.\n");
+ TEST_ASSERT(((uint64_t)source_pages & (PAGE_SIZE - 1)) == 0,
+ "Source memory buffer is not page aligned\n");
+ tdx_ioctl(vcpu->fd, KVM_TDX_INIT_MEM_REGION, metadata, &mem_region);
+}
+
+static void tdx_td_finalize_mr(struct kvm_vm *vm)
+{
+ tdx_ioctl(vm->fd, KVM_TDX_FINALIZE_VM, 0, NULL);
+}
+
+/*
+ * TD creation/setup/finalization
+ */
+
+static void tdx_enable_capabilities(struct kvm_vm *vm)
+{
+ int rc;
+
+ rc = kvm_check_cap(KVM_CAP_X2APIC_API);
+ TEST_ASSERT(rc, "TDX: KVM_CAP_X2APIC_API is not supported!");
+ rc = kvm_check_cap(KVM_CAP_SPLIT_IRQCHIP);
+ TEST_ASSERT(rc, "TDX: KVM_CAP_SPLIT_IRQCHIP is not supported!");
+
+ vm_enable_cap(vm, KVM_CAP_X2APIC_API,
+ KVM_X2APIC_API_USE_32BIT_IDS |
+ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK);
+ vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24);
+}
+
+static void tdx_apply_cr4_restrictions(struct kvm_sregs *sregs)
+{
+ /* TDX spec 11.6.2: CR4 bit MCE is fixed to 1 */
+ sregs->cr4 |= X86_CR4_MCE;
+
+ /* Set this because UEFI also sets this up, to handle XMM exceptions */
+ sregs->cr4 |= X86_CR4_OSXMMEXCPT;
+
+ /* TDX spec 11.6.2: CR4 bit VMXE and SMXE are fixed to 0 */
+ sregs->cr4 &= ~(X86_CR4_VMXE | X86_CR4_SMXE);
+}
+
+static void load_td_boot_code(struct kvm_vm *vm)
+{
+ void *boot_code_hva = addr_gpa2hva(vm, FOUR_GIGABYTES_GPA - TD_BOOT_CODE_SIZE);
+
+ TEST_ASSERT(td_boot_code_end - reset_vector == 16,
+ "The reset vector must be 16 bytes in size.");
+ memcpy(boot_code_hva, td_boot, TD_BOOT_CODE_SIZE);
+}
+
+static void load_td_per_vcpu_parameters(struct td_boot_parameters *params,
+ struct kvm_sregs *sregs,
+ struct kvm_vcpu *vcpu,
+ void *guest_code)
+{
+ struct td_per_vcpu_parameters *vcpu_params = ¶ms->per_vcpu[vcpu->id];
+
+ TEST_ASSERT(vcpu->initial_stack_addr != 0,
+ "initial stack address should not be 0");
+ TEST_ASSERT(vcpu->initial_stack_addr <= 0xffffffff,
+ "initial stack address must fit in 32 bits");
+ TEST_ASSERT((uint64_t)guest_code <= 0xffffffff,
+ "guest_code must fit in 32 bits");
+ TEST_ASSERT(sregs->cs.selector != 0, "cs.selector should not be 0");
+
+ vcpu_params->esp_gva = (uint32_t)(uint64_t)vcpu->initial_stack_addr;
+ vcpu_params->ljmp_target.eip_gva = (uint32_t)(uint64_t)guest_code;
+ vcpu_params->ljmp_target.code64_sel = sregs->cs.selector;
+}
+
+static void load_td_common_parameters(struct td_boot_parameters *params,
+ struct kvm_sregs *sregs)
+{
+ /* Set parameters! */
+ params->cr0 = sregs->cr0;
+ params->cr3 = sregs->cr3;
+ params->cr4 = sregs->cr4;
+ params->gdtr.limit = sregs->gdt.limit;
+ params->gdtr.base = sregs->gdt.base;
+ params->idtr.limit = sregs->idt.limit;
+ params->idtr.base = sregs->idt.base;
+
+ TEST_ASSERT(params->cr0 != 0, "cr0 should not be 0");
+ TEST_ASSERT(params->cr3 != 0, "cr3 should not be 0");
+ TEST_ASSERT(params->cr4 != 0, "cr4 should not be 0");
+ TEST_ASSERT(params->gdtr.base != 0, "gdt base address should not be 0");
+ TEST_ASSERT(params->idtr.base != 0, "idt base address should not be 0");
+}
+
+static void load_td_boot_parameters(struct td_boot_parameters *params,
+ struct kvm_vcpu *vcpu, void *guest_code)
+{
+ struct kvm_sregs sregs;
+
+ /* Assemble parameters in sregs */
+ memset(&sregs, 0, sizeof(struct kvm_sregs));
+ vcpu_setup_mode_sregs(vcpu->vm, &sregs);
+ tdx_apply_cr4_restrictions(&sregs);
+
+ if (!params->cr0)
+ load_td_common_parameters(params, &sregs);
+
+ load_td_per_vcpu_parameters(params, &sregs, vcpu, guest_code);
+}
+
+/*
+ * Adds a vCPU to a TD (Trusted Domain) with minimum defaults. It will not set
+ * up any general purpose registers as they will be initialized by the TDX. In
+ * TDX, vCPUs RIP is set to 0xFFFFFFF0. See Intel TDX EAS Section "Initial State
+ * of Guest GPRs" for more information on vCPUs initial register values when
+ * entering the TD first time.
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - The id of the vCPU to add to the VM.
+ */
+struct kvm_vcpu *td_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, void *guest_code)
+{
+ struct kvm_vcpu *vcpu;
+
+ vm->arch.has_protected_regs = true;
+ vcpu = vm_arch_vcpu_add(vm, vcpu_id);
+
+ tdx_td_vcpu_init(vcpu);
+
+ load_td_boot_parameters(addr_gpa2hva(vm, TD_BOOT_PARAMETERS_GPA),
+ vcpu, guest_code);
+
+ return vcpu;
+}
+
+static void load_td_memory_region(struct kvm_vm *vm,
+ struct userspace_mem_region *region)
+{
+ const struct sparsebit *pages = region->protected_phy_pages;
+ const vm_paddr_t gpa_base = region->region.guest_phys_addr;
+ const uint64_t hva_base = region->region.userspace_addr;
+ const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift;
+
+ sparsebit_idx_t i;
+ sparsebit_idx_t j;
+
+ if (!sparsebit_any_set(pages))
+ return;
+
+ sparsebit_for_each_set_range(pages, i, j) {
+ const uint64_t size_to_load = (j - i + 1) * vm->page_size;
+ const uint64_t offset =
+ (i - lowest_page_in_region) * vm->page_size;
+ const uint64_t hva = hva_base + offset;
+ const uint64_t gpa = gpa_base + offset;
+ void *source_addr;
+
+ /*
+ * KVM_TDX_INIT_MEM_REGION ioctl cannot encrypt memory in place.
+ * Make a copy if there's only one backing memory source.
+ */
+ source_addr = mmap(NULL, size_to_load, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ TEST_ASSERT(source_addr,
+ "Could not allocate memory for loading memory region");
+
+ memcpy(source_addr, (void *)hva, size_to_load);
+
+ tdx_init_mem_region(vm, source_addr, gpa, size_to_load);
+
+ munmap(source_addr, size_to_load);
+ }
+}
+
+static void load_td_private_memory(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *region;
+ int ctr;
+
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
+ load_td_memory_region(vm, region);
+ }
+}
+
+struct kvm_vm *td_create(void)
+{
+ const struct vm_shape shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = KVM_X86_TDX_VM,
+ };
+
+ return ____vm_create(shape);
+}
+
+static void td_setup_boot_code(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type)
+{
+ size_t boot_code_allocation = round_up(TD_BOOT_CODE_SIZE, PAGE_SIZE);
+ vm_paddr_t boot_code_base_gpa = FOUR_GIGABYTES_GPA - boot_code_allocation;
+ size_t npages = DIV_ROUND_UP(boot_code_allocation, PAGE_SIZE);
+ vm_vaddr_t addr;
+
+ vm_userspace_mem_region_add(vm, src_type, boot_code_base_gpa, 1, npages,
+ KVM_MEM_GUEST_MEMFD);
+ vm->memslots[MEM_REGION_CODE] = 1;
+ addr = vm_vaddr_identity_alloc(vm, boot_code_allocation,
+ boot_code_base_gpa, MEM_REGION_CODE);
+ TEST_ASSERT_EQ(addr, boot_code_base_gpa);
+
+ load_td_boot_code(vm);
+}
+
+static size_t td_boot_parameters_size(void)
+{
+ int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+ size_t total_per_vcpu_parameters_size =
+ max_vcpus * sizeof(struct td_per_vcpu_parameters);
+
+ return sizeof(struct td_boot_parameters) + total_per_vcpu_parameters_size;
+}
+
+static void td_setup_boot_parameters(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type)
+{
+ size_t boot_params_size = td_boot_parameters_size();
+ int npages = DIV_ROUND_UP(boot_params_size, PAGE_SIZE);
+ size_t total_size = npages * PAGE_SIZE;
+ vm_vaddr_t addr;
+
+ vm_userspace_mem_region_add(vm, src_type, TD_BOOT_PARAMETERS_GPA, 2,
+ npages, KVM_MEM_GUEST_MEMFD);
+ vm->memslots[MEM_REGION_TDX_BOOT_PARAMS] = 2;
+ addr = vm_vaddr_identity_alloc(vm, total_size, TD_BOOT_PARAMETERS_GPA,
+ MEM_REGION_TDX_BOOT_PARAMS);
+ TEST_ASSERT_EQ(addr, TD_BOOT_PARAMETERS_GPA);
+}
+
+void td_initialize(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
+ uint64_t attributes)
+{
+ uint64_t nr_pages_required;
+
+ tdx_enable_capabilities(vm);
+
+ tdx_td_init(vm, attributes);
+
+ nr_pages_required = vm_nr_pages_required(VM_MODE_DEFAULT, 1, 0);
+
+ /*
+ * Add memory (add 0th memslot) for TD. This will be used to setup the
+ * CPU (provide stack space for the CPU) and to load the elf file.
+ */
+ vm_userspace_mem_region_add(vm, src_type, 0, 0, nr_pages_required,
+ KVM_MEM_GUEST_MEMFD);
+
+ kvm_vm_elf_load(vm, program_invocation_name);
+ tdx_s_bit = vm->arch.s_bit;
+ sync_global_to_guest(vm, tdx_s_bit);
+
+ vm_init_descriptor_tables(vm);
+
+ td_setup_boot_code(vm, src_type);
+ td_setup_boot_parameters(vm, src_type);
+}
+
+void td_finalize(struct kvm_vm *vm)
+{
+ sync_exception_handlers_to_guest(vm);
+
+ load_td_private_memory(vm);
+
+ tdx_td_finalize_mr(vm);
+}