Message ID | 20180211205848.4568-8-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | target/arm: More SVE prep work | expand |
On 11 February 2018 at 20:58, Richard Henderson <richard.henderson@linaro.org> wrote: > As an implementation choice, widening VL has zeroed the > previously inaccessible portion of the sve registers. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/cpu.h | 2 ++ > linux-user/syscall.c | 20 +++++++++++++++++ > target/arm/cpu64.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 83 insertions(+) > > diff --git a/target/arm/cpu.h b/target/arm/cpu.h > index 51a3e16275..8e1016cfd6 100644 > --- a/target/arm/cpu.h > +++ b/target/arm/cpu.h > @@ -842,6 +842,8 @@ int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, > #ifdef TARGET_AARCH64 > int aarch64_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg); > int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); > +unsigned aarch64_get_sve_vlen(CPUARMState *env); > +unsigned aarch64_set_sve_vlen(CPUARMState *env, unsigned vlen); > #endif > > target_ulong do_arm_semihosting(CPUARMState *env); > diff --git a/linux-user/syscall.c b/linux-user/syscall.c > index 82b35a6bdf..4840bf502f 100644 > --- a/linux-user/syscall.c > +++ b/linux-user/syscall.c > @@ -10659,6 +10659,26 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, > break; > } > #endif > +#ifdef TARGET_AARCH64 > + case 50: /* PR_SVE_SET_VL */ Could you put an #ifndef PR_SVE_SET_VL #define PR_SVE_SET_VL 50 #endif (ditto for PR_SVE_GET_VL) in somewhere suitable rather than using hard-coded constants, please? > + /* We cannot support either PR_SVE_SET_VL_ONEXEC > + or PR_SVE_VL_INHERIT. Therefore, anything above > + ARM_MAX_VQ results in EINVAL. */ > + if (!arm_feature(cpu_env, ARM_FEATURE_SVE) > + || arg2 > ARM_MAX_VQ * 16 || arg2 & 15) { > + ret = -TARGET_EINVAL; > + } else { > + ret = aarch64_set_sve_vlen(cpu_env, arg2); > + } > + break; > + case 51: /* PR_SVE_GET_VL */ > + if (arm_feature(cpu_env, ARM_FEATURE_SVE)) { > + ret = aarch64_get_sve_vlen(cpu_env); > + } else { > + ret = -TARGET_EINVAL; > + } Seems a bit odd to write the if() with the working case first for one of these and with the error case first for the other. > + break; > +#endif /* AARCH64 */ > case PR_GET_SECCOMP: > case PR_SET_SECCOMP: > /* Disable seccomp to prevent the target disabling syscalls we > diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c > index 1c330adc28..6dee78f006 100644 > --- a/target/arm/cpu64.c > +++ b/target/arm/cpu64.c > @@ -363,3 +363,64 @@ static void aarch64_cpu_register_types(void) > } > > type_init(aarch64_cpu_register_types) > + > +/* Return the current cumulative SVE VLEN. */ > +unsigned aarch64_get_sve_vlen(CPUARMState *env) > +{ > + return ((env->vfp.zcr_el[1] & 0xf) + 1) * 16; If this is supposed to also work for system-emulation mode it needs to look at zcr_el[2] and [3]. If it's user-emulation mode only I think we should #ifdef it and add a comment so that's clear. Similarly with _set_. In fact if it's user-emulation only then it probably belongs in linux-user/arm/ ? > +} > + > +/* Set the cumulative ZCR.EL to VLEN, or the nearest supported value. > + Return the new value. */ > +unsigned aarch64_set_sve_vlen(CPUARMState *env, unsigned vl) > +{ > + unsigned vq = vl / 16; > + unsigned old_vq = (env->vfp.zcr_el[1] & 0xf) + 1; > + > + if (vq < 1) { > + vq = 1; > + } else if (vq > ARM_MAX_VQ) { > + vq = ARM_MAX_VQ; > + } > + env->vfp.zcr_el[1] = vq - 1; > + > + /* The manual sez that when SVE is enabled and VL is widened the "says". "sez" will probably get picked up and fixed next time somebody runs a spellcheck over the codebase, so we might as well save them the work. > + * implementation is allowed to zero the previously inaccessible > + * portion of the registers. The corollary to that is that when > + * SVE is enabled and VL is narrowed we are also allowed to zero > + * the now inaccessible portion of the registers. > + * > + * The intent of this is that no predicate bit beyond VL is ever set. > + * Which means that some operations on predicate registers themselves > + * may operate on full uint64_t or even unrolled across the maximum > + * uint64_t[4]. Performing 4 bits of host arithmetic unconditionally > + * may well be cheaper than conditionals to restrict to the operation "restrict the operation" ? > + * to the relevant portion of a uint16_t[16]. > + * > + * ??? Need to move this somewhere else, so that it applies to > + * changes to the real system registers and EL state changes. > + */ > + if (vq < old_vq) { > + unsigned i, j; > + uint64_t pmask; > + > + /* Zap the high bits of the zregs. */ > + for (i = 0; i < 32; i++) { > + memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq)); > + } > + > + /* Zap the high bits of the pregs and ffr. */ > + pmask = 0; > + if (vq & 3) { > + pmask = ~(-1ULL << (16 * (vq & 3))); > + } > + for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) { > + for (i = 0; i < 17; ++i) { > + env->vfp.pregs[i].p[j] &= pmask; > + } > + pmask = 0; > + } > + } > + > + return vq * 16; > +} > -- > 2.14.3 thanks -- PMM
diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 51a3e16275..8e1016cfd6 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -842,6 +842,8 @@ int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, #ifdef TARGET_AARCH64 int aarch64_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg); int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); +unsigned aarch64_get_sve_vlen(CPUARMState *env); +unsigned aarch64_set_sve_vlen(CPUARMState *env, unsigned vlen); #endif target_ulong do_arm_semihosting(CPUARMState *env); diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 82b35a6bdf..4840bf502f 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -10659,6 +10659,26 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; } #endif +#ifdef TARGET_AARCH64 + case 50: /* PR_SVE_SET_VL */ + /* We cannot support either PR_SVE_SET_VL_ONEXEC + or PR_SVE_VL_INHERIT. Therefore, anything above + ARM_MAX_VQ results in EINVAL. */ + if (!arm_feature(cpu_env, ARM_FEATURE_SVE) + || arg2 > ARM_MAX_VQ * 16 || arg2 & 15) { + ret = -TARGET_EINVAL; + } else { + ret = aarch64_set_sve_vlen(cpu_env, arg2); + } + break; + case 51: /* PR_SVE_GET_VL */ + if (arm_feature(cpu_env, ARM_FEATURE_SVE)) { + ret = aarch64_get_sve_vlen(cpu_env); + } else { + ret = -TARGET_EINVAL; + } + break; +#endif /* AARCH64 */ case PR_GET_SECCOMP: case PR_SET_SECCOMP: /* Disable seccomp to prevent the target disabling syscalls we diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 1c330adc28..6dee78f006 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -363,3 +363,64 @@ static void aarch64_cpu_register_types(void) } type_init(aarch64_cpu_register_types) + +/* Return the current cumulative SVE VLEN. */ +unsigned aarch64_get_sve_vlen(CPUARMState *env) +{ + return ((env->vfp.zcr_el[1] & 0xf) + 1) * 16; +} + +/* Set the cumulative ZCR.EL to VLEN, or the nearest supported value. + Return the new value. */ +unsigned aarch64_set_sve_vlen(CPUARMState *env, unsigned vl) +{ + unsigned vq = vl / 16; + unsigned old_vq = (env->vfp.zcr_el[1] & 0xf) + 1; + + if (vq < 1) { + vq = 1; + } else if (vq > ARM_MAX_VQ) { + vq = ARM_MAX_VQ; + } + env->vfp.zcr_el[1] = vq - 1; + + /* The manual sez that when SVE is enabled and VL is widened the + * implementation is allowed to zero the previously inaccessible + * portion of the registers. The corollary to that is that when + * SVE is enabled and VL is narrowed we are also allowed to zero + * the now inaccessible portion of the registers. + * + * The intent of this is that no predicate bit beyond VL is ever set. + * Which means that some operations on predicate registers themselves + * may operate on full uint64_t or even unrolled across the maximum + * uint64_t[4]. Performing 4 bits of host arithmetic unconditionally + * may well be cheaper than conditionals to restrict to the operation + * to the relevant portion of a uint16_t[16]. + * + * ??? Need to move this somewhere else, so that it applies to + * changes to the real system registers and EL state changes. + */ + if (vq < old_vq) { + unsigned i, j; + uint64_t pmask; + + /* Zap the high bits of the zregs. */ + for (i = 0; i < 32; i++) { + memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq)); + } + + /* Zap the high bits of the pregs and ffr. */ + pmask = 0; + if (vq & 3) { + pmask = ~(-1ULL << (16 * (vq & 3))); + } + for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) { + for (i = 0; i < 17; ++i) { + env->vfp.pregs[i].p[j] &= pmask; + } + pmask = 0; + } + } + + return vq * 16; +}
As an implementation choice, widening VL has zeroed the previously inaccessible portion of the sve registers. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/cpu.h | 2 ++ linux-user/syscall.c | 20 +++++++++++++++++ target/arm/cpu64.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+) -- 2.14.3