mbox series

[v2,0/9] KVM: s390: Extend MEM_OP ioctl by storage key checked cmpxchg

Message ID 20221012205609.2811294-1-scgl@linux.ibm.com
Headers show
Series KVM: s390: Extend MEM_OP ioctl by storage key checked cmpxchg | expand

Message

Janis Schoetterl-Glausch Oct. 12, 2022, 8:56 p.m. UTC
User space can use the MEM_OP ioctl to make storage key checked reads
and writes to the guest, however, it has no way of performing atomic,
key checked, accesses to the guest.
Extend the MEM_OP ioctl in order to allow for this, by adding a cmpxchg
mode. For now, support this mode for absolute accesses only.

This mode can be use, for example, to set the device-state-change
indicator and the adapter-local-summary indicator atomically.

Also contains some fixes/changes for the memop selftest independent of
the cmpxchg changes.

v1 -> v2
 * get rid of xrk instruction for cmpxchg byte and short implementation
 * pass old parameter via pointer instead of in mem_op struct
 * indicate failure of cmpxchg due to wrong old value by special return
   code
 * picked up R-b's (thanks Thomas)

Janis Schoetterl-Glausch (9):
  s390/uaccess: Add storage key checked cmpxchg access to user space
  KVM: s390: Extend MEM_OP ioctl by storage key checked cmpxchg
  Documentation: KVM: s390: Describe KVM_S390_MEMOP_F_CMPXCHG
  KVM: s390: selftest: memop: Pass mop_desc via pointer
  KVM: s390: selftest: memop: Replace macros by functions
  KVM: s390: selftest: memop: Add cmpxchg tests
  KVM: s390: selftest: memop: Add bad address test
  KVM: s390: selftest: memop: Fix typo
  KVM: s390: selftest: memop: Fix wrong address being used in test

 Documentation/virt/kvm/api.rst            |  21 +-
 include/uapi/linux/kvm.h                  |   5 +
 arch/s390/include/asm/uaccess.h           | 189 ++++++
 arch/s390/kvm/gaccess.h                   |   4 +
 arch/s390/kvm/gaccess.c                   |  57 ++
 arch/s390/kvm/kvm-s390.c                  |  35 +-
 tools/testing/selftests/kvm/s390x/memop.c | 674 +++++++++++++++++-----
 7 files changed, 833 insertions(+), 152 deletions(-)

Range-diff against v1:
 1:  7b4392170faa !  1:  58adf2b7688a s390/uaccess: Add storage key checked cmpxchg access to user space
    @@ arch/s390/include/asm/uaccess.h: do {									\
     +						    unsigned __int128 *old_p,
     +						    unsigned __int128 new, u8 access_key)
     +{
    -+	u32 shift, mask, old_word, new_word, align_mask, tmp, diff;
    ++	u32 shift, mask, old_word, new_word, align_mask, tmp;
     +	u64 aligned;
     +	int ret = -EFAULT;
     +
    @@ arch/s390/include/asm/uaccess.h: do {									\
     +		new_word = ((u8)new) << shift;
     +		break;
     +	}
    ++	tmp = old_word; /* don't modify *old_p on fault */
     +	asm volatile(
     +		       "spka	0(%[access_key])\n"
     +		"	sacf	256\n"
     +		"0:	l	%[tmp],%[aligned]\n"
    -+		"1:	nr	%[tmp],%[hole_mask]\n"
    ++		"1:	nr	%[tmp],%[mask]\n"
    ++		"	xilf	%[mask],0xffffffff\n"
     +		"	or	%[new_word],%[tmp]\n"
    -+		"	or	%[old_word],%[tmp]\n"
    -+		"	lr	%[tmp],%[old_word]\n"
    -+		"2:	cs	%[tmp],%[new_word],%[aligned]\n"
    -+		"3:	jnl	4f\n"
    -+		"	xrk	%[diff],%[tmp],%[old_word]\n"
    -+		"	nr	%[diff],%[hole_mask]\n"
    -+		"	xr	%[new_word],%[diff]\n"
    -+		"	xr	%[old_word],%[diff]\n"
    -+		"	xrk	%[diff],%[tmp],%[old_word]\n"
    ++		"	or	%[tmp],%[old_word]\n"
    ++		"2:	lr	%[old_word],%[tmp]\n"
    ++		"3:	cs	%[tmp],%[new_word],%[aligned]\n"
    ++		"4:	jnl	5f\n"
    ++		/* We'll restore old_word before the cs, use reg for the diff */
    ++		"	xr	%[old_word],%[tmp]\n"
    ++		/* Apply diff assuming only bits outside target byte(s) changed */
    ++		"	xr	%[new_word],%[old_word]\n"
    ++		/* If prior assumption false we exit loop, so not an issue */
    ++		"	nr	%[old_word],%[mask]\n"
     +		"	jz	2b\n"
    -+		"4:	ipm	%[ret]\n"
    ++		"5:	ipm	%[ret]\n"
     +		"	srl	%[ret],28\n"
    -+		"5:	sacf	768\n"
    ++		"6:	sacf	768\n"
     +		"	spka	%[default_key]\n"
    -+		EX_TABLE(0b, 5b) EX_TABLE(1b, 5b)
    -+		EX_TABLE(2b, 5b) EX_TABLE(3b, 5b)
    ++		EX_TABLE(0b, 6b) EX_TABLE(1b, 6b)
    ++		EX_TABLE(3b, 6b) EX_TABLE(4b, 6b)
     +		: [old_word] "+&d" (old_word),
     +		  [new_word] "+&d" (new_word),
    -+		  [tmp] "=&d" (tmp),
    ++		  [tmp] "+&d" (tmp),
     +		  [aligned] "+Q" (*(u32 *)aligned),
    -+		  [diff] "=&d" (diff),
     +		  [ret] "+d" (ret)
     +		: [access_key] "a" (access_key << 4),
    -+		  [hole_mask] "d" (~mask),
    ++		  [mask] "d" (~mask),
     +		  [default_key] "J" (PAGE_DEFAULT_KEY)
     +		: "cc"
     +	);
    @@ arch/s390/include/asm/uaccess.h: do {									\
     + * cmpxchg_user_key_size() - cmpxchg with user space target, honoring storage keys
     + * @size: Size of the value being cmpxchg'ed, one of 1,2,4,8,16.
     + * @address: User space address of value to compare to *@old_p and exchange with
    -+ *           *@new. Must be aligned to @size.
    ++ *           @new. Must be aligned to @size.
     + * @old_p: Pointer to old value. Interpreted as a @size byte integer and compared
     + *         to the content pointed to by @address in order to determine if the
     + *         exchange occurs. The value read from @address is written back to *@old_p.
 2:  80e3fda3d2af !  2:  c6731b0063ab KVM: s390: Extend MEM_OP ioctl by storage key checked cmpxchg
    @@ Commit message
         Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
     
      ## include/uapi/linux/kvm.h ##
    -@@ include/uapi/linux/kvm.h: struct kvm_translation {
    - struct kvm_s390_mem_op {
    - 	/* in */
    - 	__u64 gaddr;		/* the guest address */
    -+	/* in & out */
    - 	__u64 flags;		/* flags */
    -+	/* in */
    - 	__u32 size;		/* amount of bytes */
    - 	__u32 op;		/* type of operation */
    - 	__u64 buf;		/* buffer in userspace */
     @@ include/uapi/linux/kvm.h: struct kvm_s390_mem_op {
      		struct {
      			__u8 ar;	/* the access register number */
      			__u8 key;	/* access key, ignored if flag unset */
    -+			/* in & out */
    -+			__u64 old[2];	/* ignored if flag unset */
    ++			__u8 pad1[6];	/* ignored */
    ++			__u64 old_p;	/* ignored if flag unset */
      		};
      		__u32 sida_offset; /* offset into the sida */
      		__u8 reserved[32]; /* ignored */
    @@ include/uapi/linux/kvm.h: struct kvm_s390_mem_op {
      #define KVM_S390_MEMOP_F_INJECT_EXCEPTION	(1ULL << 1)
      #define KVM_S390_MEMOP_F_SKEY_PROTECTION	(1ULL << 2)
     +#define KVM_S390_MEMOP_F_CMPXCHG		(1ULL << 3)
    ++/* Non program exception return codes (pgm codes are 16 bit) */
    ++#define KVM_S390_MEMOP_R_NO_XCHG		((1 << 16) + 0)
      
      /* for KVM_INTERRUPT */
      struct kvm_interrupt {
    @@ arch/s390/kvm/gaccess.c: int access_guest_real(struct kvm_vcpu *vcpu, unsigned l
     +	if (kvm_is_error_hva(hva))
     +		return PGM_ADDRESSING;
     +	/*
    -+	 * Check if it's a ro memslot, even tho that can't occur (they're unsupported).
    ++	 * Check if it's a read-only memslot, even though that cannot occur
    ++	 * since those are unsupported.
     +	 * Don't try to actually handle that case.
     +	 */
     +	if (!writable)
    @@ arch/s390/kvm/gaccess.c: int access_guest_real(struct kvm_vcpu *vcpu, unsigned l
     +	ret = cmpxchg_user_key_size(len, (void __user *)hva, old_p, new, access_key);
     +	mark_page_dirty_in_slot(kvm, slot, gfn);
     +	/*
    -+	 * Assume that the fault is caused by key protection, the alternative
    -+	 * is that the user page is write protected.
    ++	 * Assume that the fault is caused by protection, either key protection
    ++	 * or user page write protection.
     +	 */
     +	if (ret == -EFAULT)
     +		ret = PGM_PROTECTION;
    @@ arch/s390/kvm/kvm-s390.c: int kvm_vm_ioctl_check_extension(struct kvm *kvm, long
      		r = MEM_OP_MAX_SIZE;
      		break;
     +	case KVM_CAP_S390_MEM_OP_EXTENSION:
    ++		/*
    ++		 * Flag bits indicating which extensions are supported.
    ++		 * The first extension doesn't use a flag, but pretend it does,
    ++		 * this way that can be changed in the future.
    ++		 */
     +		r = 0x3;
     +		break;
      	case KVM_CAP_NR_VCPUS:
      	case KVM_CAP_MAX_VCPUS:
      	case KVM_CAP_MAX_VCPU_ID:
     @@ arch/s390/kvm/kvm-s390.c: static bool access_key_invalid(u8 access_key)
    - 	return access_key > 0xf;
    - }
    - 
    --static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
    -+static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop, bool *modified)
    + static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
      {
      	void __user *uaddr = (void __user *)mop->buf;
    -+	unsigned __int128 old;
    ++	void __user *old_p = (void __user *)mop->old_p;
     +	union {
     +		unsigned __int128 quad;
     +		char raw[sizeof(unsigned __int128)];
    -+	} new = { .quad = 0 };
    ++	} old = { .quad = 0}, new = { .quad = 0 };
    ++	unsigned int off_in_quad = sizeof(unsigned __int128) - mop->size;
      	u64 supported_flags;
      	void *tmpbuf = NULL;
      	int r, srcu_idx;
      
    -+	*modified = false;
      	supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
     -			  | KVM_S390_MEMOP_F_CHECK_ONLY;
     +			  | KVM_S390_MEMOP_F_CHECK_ONLY
    @@ arch/s390/kvm/kvm-s390.c: static int kvm_s390_vm_mem_op(struct kvm *kvm, struct
     +	if (mop->flags & KVM_S390_MEMOP_F_CMPXCHG) {
     +		if (mop->size > sizeof(new))
     +			return -EINVAL;
    -+		if (copy_from_user(&new.raw[sizeof(new) - mop->size], uaddr, mop->size))
    ++		/* off_in_quad has been validated */
    ++		if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size))
    ++			return -EFAULT;
    ++		if (copy_from_user(&old.raw[off_in_quad], old_p, mop->size))
     +			return -EFAULT;
    -+		memcpy(&old, mop->old, sizeof(old));
     +	}
      	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
      		tmpbuf = vmalloc(mop->size);
    @@ arch/s390/kvm/kvm-s390.c: static int kvm_s390_vm_mem_op(struct kvm *kvm, struct
      			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
     +		} else if (mop->flags & KVM_S390_MEMOP_F_CMPXCHG) {
     +			r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size,
    -+						       &old, new.quad, mop->key);
    -+			if (!r) {
    -+				mop->flags &= ~KVM_S390_MEMOP_F_CMPXCHG;
    -+			} else if (r == 1) {
    -+				memcpy(mop->old, &old, sizeof(old));
    -+				r = 0;
    ++						       &old.quad, new.quad, mop->key);
    ++			if (r == 1) {
    ++				r = KVM_S390_MEMOP_R_NO_XCHG;
    ++				if (copy_to_user(old_p, &old.raw[off_in_quad], mop->size))
    ++					r = -EFAULT;
     +			}
    -+			*modified = true;
      		} else {
      			if (copy_from_user(tmpbuf, uaddr, mop->size)) {
      				r = -EFAULT;
    -@@ arch/s390/kvm/kvm-s390.c: long kvm_arch_vm_ioctl(struct file *filp,
    - 	}
    - 	case KVM_S390_MEM_OP: {
    - 		struct kvm_s390_mem_op mem_op;
    -+		bool modified;
    - 
    --		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
    --			r = kvm_s390_vm_mem_op(kvm, &mem_op);
    --		else
    -+		r = copy_from_user(&mem_op, argp, sizeof(mem_op));
    -+		if (r) {
    - 			r = -EFAULT;
    -+			break;
    -+		}
    -+		r = kvm_s390_vm_mem_op(kvm, &mem_op, &modified);
    -+		if (r)
    -+			break;
    -+		if (modified) {
    -+			r = copy_to_user(argp, &mem_op, sizeof(mem_op));
    -+			if (r) {
    -+				r = -EFAULT;
    -+				break;
    -+			}
    -+		}
    - 		break;
    - 	}
    - 	case KVM_S390_ZPCI_OP: {
 3:  cf036cd58aff <  -:  ------------ Documentation: KVM: s390: Describe KVM_S390_MEMOP_F_CMPXCHG
 -:  ------------ >  3:  6cb32b244899 Documentation: KVM: s390: Describe KVM_S390_MEMOP_F_CMPXCHG
 4:  e1d25110a983 !  4:  5f1217ad9d31 KVM: s390: selftest: memop: Pass mop_desc via pointer
    @@ Commit message
         The struct is quite large, so this seems nicer.
     
         Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
    +    Reviewed-by: Thomas Huth <thuth@redhat.com>
     
      ## tools/testing/selftests/kvm/s390x/memop.c ##
     @@ tools/testing/selftests/kvm/s390x/memop.c: struct mop_desc {
 5:  e02924290577 =  5:  86a15b53846a KVM: s390: selftest: memop: Replace macros by functions
 7:  de6ac5a125e2 !  6:  49e67d7559de KVM: s390: selftest: memop: Add cmpxchg tests
    @@ tools/testing/selftests/kvm/s390x/memop.c: static struct kvm_s390_mem_op ksmo_fr
      	}
     +	if (desc->old) {
     +		ksmo.flags |= KVM_S390_MEMOP_F_CMPXCHG;
    -+		switch (ksmo.size) {
    -+		case 1:
    -+			ksmo.old[1] = *(uint8_t *)desc->old;
    -+			break;
    -+		case 2:
    -+			ksmo.old[1] = *(uint16_t *)desc->old;
    -+			break;
    -+		case 4:
    -+			ksmo.old[1] = *(uint32_t *)desc->old;
    -+			break;
    -+		case 8:
    -+			ksmo.old[1] = *(uint64_t *)desc->old;
    -+			break;
    -+		case 16:
    -+			memcpy(ksmo.old, desc->old, sizeof(ksmo.old));
    -+			break;
    -+		}
    ++		ksmo.old_p = (uint64_t)desc->old;
     +	}
      	if (desc->_ar)
      		ksmo.ar = desc->ar;
      	else
    -@@ tools/testing/selftests/kvm/s390x/memop.c: static struct kvm_s390_mem_op ksmo_from_desc(const struct mop_desc *desc)
    - 	return ksmo;
    - }
    - 
    -+static void cmpxchg_write_back(struct kvm_s390_mem_op *ksmo, struct mop_desc *desc)
    -+{
    -+	if (desc->old) {
    -+		switch (ksmo->size) {
    -+		case 1:
    -+			*(uint8_t *)desc->old = ksmo->old[1];
    -+			break;
    -+		case 2:
    -+			*(uint16_t *)desc->old = ksmo->old[1];
    -+			break;
    -+		case 4:
    -+			*(uint32_t *)desc->old = ksmo->old[1];
    -+			break;
    -+		case 8:
    -+			*(uint64_t *)desc->old = ksmo->old[1];
    -+			break;
    -+		case 16:
    -+			memcpy(desc->old, ksmo->old, sizeof(ksmo->old));
    -+			break;
    -+		}
    -+	}
    -+	if (desc->cmpxchg_success)
    -+		*desc->cmpxchg_success = !(ksmo->flags & KVM_S390_MEMOP_F_CMPXCHG);
    -+}
    -+
    - struct test_info {
    - 	struct kvm_vm *vm;
    - 	struct kvm_vcpu *vcpu;
     @@ tools/testing/selftests/kvm/s390x/memop.c: static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksm
      		printf("ABSOLUTE, WRITE, ");
      		break;
      	}
     -	printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u",
     -	       ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key);
    -+	printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old[0]=%llu, old[1]=%llu",
    ++	printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_p=%llx",
     +	       ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key,
    -+	       ksmo->old[0], ksmo->old[1]);
    ++	       ksmo->old_p);
      	if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY)
      		printf(", CHECK_ONLY");
      	if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION)
    @@ tools/testing/selftests/kvm/s390x/memop.c: static void print_memop(struct kvm_vc
      }
      
     -static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo)
    -+static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
    -+			struct mop_desc *desc)
    ++static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
    ++			   struct mop_desc *desc)
      {
      	struct kvm_vcpu *vcpu = info.vcpu;
      
    -@@ tools/testing/selftests/kvm/s390x/memop.c: static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo)
    - 		vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo);
    + 	if (!vcpu)
    +-		vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo);
    ++		return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo);
      	else
    - 		vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo);
    -+	cmpxchg_write_back(ksmo, desc);
    +-		vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo);
    ++		return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo);
      }
      
     -static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo)
    -+static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
    -+			   struct mop_desc *desc)
    ++static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
    ++			struct mop_desc *desc)
      {
    - 	struct kvm_vcpu *vcpu = info.vcpu;
    +-	struct kvm_vcpu *vcpu = info.vcpu;
     +	int r;
    ++
    ++	r = err_memop_ioctl(info, ksmo, desc);
    ++	if (ksmo->flags & KVM_S390_MEMOP_F_CMPXCHG) {
    ++		if (desc->cmpxchg_success)
    ++			*desc->cmpxchg_success = !r;
    ++		if (r == KVM_S390_MEMOP_R_NO_XCHG)
    ++			r = 0;
    ++	}
    ++	TEST_ASSERT(!r, __KVM_IOCTL_ERROR("KVM_S390_MEM_OP", r));
      
    - 	if (!vcpu)
    +-	if (!vcpu)
     -		return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo);
    -+		r = __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo);
    - 	else
    +-	else
     -		return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo);
    -+		r = __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo);
    -+	cmpxchg_write_back(ksmo, desc);
    -+	return r;
      }
      
      #define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...)	\
 6:  f4ce20cd7eff =  7:  faad9cf03ea6 KVM: s390: selftest: memop: Add bad address test
 8:  0bad86fd6183 !  8:  8070036aa89a KVM: s390: selftest: memop: Fix typo
    @@ Metadata
      ## Commit message ##
         KVM: s390: selftest: memop: Fix typo
     
    +    "acceeded" isn't a word, should be "exceeded".
    +
         Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
    +    Reviewed-by: Thomas Huth <thuth@redhat.com>
     
      ## tools/testing/selftests/kvm/s390x/memop.c ##
     @@ tools/testing/selftests/kvm/s390x/memop.c: static void test_errors_key_fetch_prot_override_enabled(void)
 9:  7a1e9cb79bbb =  9:  18c423e4e3ad KVM: s390: selftest: memop: Fix wrong address being used in test

base-commit: 4fe89d07dcc2804c8b562f6c7896a45643d34b2f