@@ -733,27 +733,33 @@ typedef struct CPUArchState {
uint64_t scxtnum_el[4];
- /*
- * SME ZA storage -- 256 x 256 byte array, with bytes in host word order,
- * as we do with vfp.zregs[]. This corresponds to the architectural ZA
- * array, where ZA[N] is in the least-significant bytes of env->zarray[N].
- * When SVL is less than the architectural maximum, the accessible
- * storage is restricted, such that if the SVL is X bytes the guest can
- * see only the bottom X elements of zarray[], and only the least
- * significant X bytes of each element of the array. (In other words,
- * the observable part is always square.)
- *
- * The ZA storage can also be considered as a set of square tiles of
- * elements of different sizes. The mapping from tiles to the ZA array
- * is architecturally defined, such that for tiles of elements of esz
- * bytes, the Nth row (or "horizontal slice") of tile T is in
- * ZA[T + N * esz]. Note that this means that each tile is not contiguous
- * in the ZA storage, because its rows are striped through the ZA array.
- *
- * Because this is so large, keep this toward the end of the reset area,
- * to keep the offsets into the rest of the structure smaller.
- */
- ARMVectorReg zarray[ARM_MAX_VQ * 16];
+ struct {
+ /*
+ * SME ZA storage -- 256 x 256 byte array, with bytes in host
+ * word order, as we do with vfp.zregs[]. This corresponds to
+ * the architectural ZA array, where ZA[N] is in the least
+ * significant bytes of env->za_state.za[N].
+ *
+ * When SVL is less than the architectural maximum, the accessible
+ * storage is restricted, such that if the SVL is X bytes the guest
+ * can see only the bottom X elements of zarray[], and only the least
+ * significant X bytes of each element of the array. (In other words,
+ * the observable part is always square.)
+ *
+ * The ZA storage can also be considered as a set of square tiles of
+ * elements of different sizes. The mapping from tiles to the ZA array
+ * is architecturally defined, such that for tiles of elements of esz
+ * bytes, the Nth row (or "horizontal slice") of tile T is in
+ * ZA[T + N * esz]. Note that this means that each tile is not
+ * contiguous in the ZA storage, because its rows are striped through
+ * the ZA array.
+ *
+ * Because this is so large, keep this toward the end of the
+ * reset area, to keep the offsets into the rest of the structure
+ * smaller.
+ */
+ ARMVectorReg za[ARM_MAX_VQ * 16];
+ } za_state;
#endif
struct CPUBreakpoint *cpu_breakpoint[16];
@@ -248,7 +248,7 @@ static void target_setup_za_record(struct target_za_context *za,
for (i = 0; i < vl; ++i) {
uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
for (j = 0; j < vq * 2; ++j) {
- __put_user_e(env->zarray[i].d[j], z + j, le);
+ __put_user_e(env->za_state.za[i].d[j], z + j, le);
}
}
}
@@ -397,7 +397,7 @@ static bool target_restore_za_record(CPUARMState *env,
for (i = 0; i < vl; ++i) {
uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
for (j = 0; j < vq * 2; ++j) {
- __get_user_e(env->zarray[i].d[j], z + j, le);
+ __get_user_e(env->za_state.za[i].d[j], z + j, le);
}
}
return true;
@@ -1369,8 +1369,8 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
qemu_fprintf(f, "ZA[%0*d]=", svl_lg10, i);
for (j = zcr_len; j >= 0; --j) {
qemu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%c",
- env->zarray[i].d[2 * j + 1],
- env->zarray[i].d[2 * j],
+ env->za_state.za[i].d[2 * j + 1],
+ env->za_state.za[i].d[2 * j],
j ? ':' : '\n');
}
}
@@ -6438,7 +6438,7 @@ void aarch64_set_svcr(CPUARMState *env, uint64_t new, uint64_t mask)
* when disabled either.
*/
if (change & new & R_SVCR_ZA_MASK) {
- memset(env->zarray, 0, sizeof(env->zarray));
+ memset(&env->za_state, 0, sizeof(env->za_state));
}
if (tcg_enabled()) {
@@ -315,7 +315,7 @@ static const VMStateDescription vmstate_za = {
.minimum_version_id = 1,
.needed = za_needed,
.fields = (const VMStateField[]) {
- VMSTATE_STRUCT_ARRAY(env.zarray, ARMCPU, ARM_MAX_VQ * 16, 0,
+ VMSTATE_STRUCT_ARRAY(env.za_state.za, ARMCPU, ARM_MAX_VQ * 16, 0,
vmstate_vreg, ARMVectorReg),
VMSTATE_END_OF_LIST()
}
@@ -39,12 +39,12 @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
uint32_t i;
/*
- * Special case clearing the entire ZA space.
+ * Special case clearing the entire ZArray.
* This falls into the CONSTRAINED UNPREDICTABLE zeroing of any
* parts of the ZA storage outside of SVL.
*/
if (imm == 0xff) {
- memset(env->zarray, 0, sizeof(env->zarray));
+ memset(env->za_state.za, 0, sizeof(env->za_state.za));
return;
}
@@ -54,7 +54,7 @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
*/
for (i = 0; i < svl; i++) {
if (imm & (1 << (i % 8))) {
- memset(&env->zarray[i], 0, svl);
+ memset(&env->za_state.za[i], 0, svl);
}
}
}
@@ -92,7 +92,7 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
offset = tile * sizeof(ARMVectorReg);
/* Include the byte offset of zarray to make this relative to env. */
- offset += offsetof(CPUARMState, zarray);
+ offset += offsetof(CPUARMState, za_state.za);
tcg_gen_addi_i32(tmp, tmp, offset);
/* Add the byte offset to env to produce the final pointer. */
@@ -112,7 +112,7 @@ static TCGv_ptr get_tile(DisasContext *s, int esz, int tile)
TCGv_ptr addr = tcg_temp_new_ptr();
int offset;
- offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, zarray);
+ offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, za_state.za);
tcg_gen_addi_ptr(addr, tcg_env, offset);
return addr;
The whole ZA state will also contain ZT0. Make things easier in aarch64_set_svcr to zero both by wrapping them in a common structure. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/cpu.h | 48 +++++++++++++++++++--------------- linux-user/aarch64/signal.c | 4 +-- target/arm/cpu.c | 4 +-- target/arm/helper.c | 2 +- target/arm/machine.c | 2 +- target/arm/tcg/sme_helper.c | 6 ++--- target/arm/tcg/translate-sme.c | 4 +-- 7 files changed, 38 insertions(+), 32 deletions(-)