diff mbox series

[v3,09/51] target/arm: Add the SME ZA storage to CPUARMState

Message ID 20220620175235.60881-10-richard.henderson@linaro.org
State New
Headers show
Series target/arm: Scalable Matrix Extension | expand

Commit Message

Richard Henderson June 20, 2022, 5:51 p.m. UTC
Place this late in the resettable section of the structure,
to keep the most common element offsets from being > 64k.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/cpu.h     |  8 ++++++++
 target/arm/machine.c | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)

Comments

Peter Maydell June 21, 2022, 8:24 p.m. UTC | #1
On Mon, 20 Jun 2022 at 18:52, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Place this late in the resettable section of the structure,
> to keep the most common element offsets from being > 64k.
>
> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> +
> +    /*
> +     * SME ZA storage -- 256 x 256 byte array, with bytes in host word order,
> +     * as we do with vfp.zregs[].  Because this is so large, keep this toward
> +     * the end of the reset area, to keep the offsets into the rest of the
> +     * structure smaller.
> +     */
> +    ARMVectorReg zarray[ARM_MAX_VQ * 16];

Suggested more detailed comment:

    /*
     * SME ZA storage -- 256 x 256 byte array, with bytes in host word order,
     * as we do with vfp.zregs[]. This corresponds to the architectural ZA
     * array, where ZA[N] is in the least-significant bytes of env->zarray[N].
     * When SVL is less than the architectural maximum, the accessible
     * storage is restricted, such that if the SVL is X bytes the guest can
     * see only the bottom X elements of zarray[], and only the least
significant
     * X bytes of each element of the array. (In other words, the
observable part
     * is always square.)
     *
     * The ZA storage can also be considered as a set of square tiles
of elements
     * of different sizes. The mapping from tiles to the ZA array is
architecturally
     * defined, such that for tiles of elements of esz bytes, the Nth row (or
     * "horizontal slice") of tile T is in ZA[T + N * esz]. Note that this means
     * that each tile is not contiguous in the ZA storage because its rows are
     * striped through the ZA array.
     *
     * Because the ZA storage is so large, keep this toward the end of the reset
     * area, to keep the offsets into the rest of the structure smaller.
     */

Arguably para 2 is repeating architectural information, but I think it's
helpful as a brief summary (compare the comment earlier in this file about
mappings between S, D and Q views of the vector registers).

thanks
-- PMM
diff mbox series

Patch

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 05d369e690..c3c7ec697d 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -694,6 +694,14 @@  typedef struct CPUArchState {
     } keys;
 
     uint64_t scxtnum_el[4];
+
+    /*
+     * SME ZA storage -- 256 x 256 byte array, with bytes in host word order,
+     * as we do with vfp.zregs[].  Because this is so large, keep this toward
+     * the end of the reset area, to keep the offsets into the rest of the
+     * structure smaller.
+     */
+    ARMVectorReg zarray[ARM_MAX_VQ * 16];
 #endif
 
 #if defined(CONFIG_USER_ONLY)
diff --git a/target/arm/machine.c b/target/arm/machine.c
index 285e387d2c..54c5c62433 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -167,6 +167,39 @@  static const VMStateDescription vmstate_sve = {
         VMSTATE_END_OF_LIST()
     }
 };
+
+static const VMStateDescription vmstate_vreg = {
+    .name = "vreg",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64_ARRAY(d, ARMVectorReg, ARM_MAX_VQ * 2),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static bool za_needed(void *opaque)
+{
+    ARMCPU *cpu = opaque;
+
+    /*
+     * When ZA storage is disabled, its contents are discarded.
+     * It will be zeroed when ZA storage is re-enabled.
+     */
+    return FIELD_EX64(cpu->env.svcr, SVCR, ZA);
+}
+
+static const VMStateDescription vmstate_za = {
+    .name = "cpu/sme",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = za_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT_ARRAY(env.zarray, ARMCPU, ARM_MAX_VQ * 16, 0,
+                             vmstate_vreg, ARMVectorReg),
+        VMSTATE_END_OF_LIST()
+    }
+};
 #endif /* AARCH64 */
 
 static bool serror_needed(void *opaque)
@@ -884,6 +917,7 @@  const VMStateDescription vmstate_arm_cpu = {
         &vmstate_m_security,
 #ifdef TARGET_AARCH64
         &vmstate_sve,
+        &vmstate_za,
 #endif
         &vmstate_serror,
         &vmstate_irq_line_state,