@@ -141,6 +141,69 @@ DO_LD_PRIM_3(ld1dqu_le, ld1dd_le)
#define sve_st1dq_be_tlb sve_st1dd_be_tlb
#define sve_st1dq_le_tlb sve_st1dd_le_tlb
+/*
+ * The ARMVectorReg elements are stored in host-endian 64-bit units.
+ * For 128-bit quantities, the sequence defined by the Elem[] pseudocode
+ * corresponds to storing the two 64-bit pieces in little-endian order.
+ */
+/* FIXME: Nothing in this file makes any effort at atomicity. */
+
+static inline void sve_ld1qq_be_host(void *vd, intptr_t reg_off, void *host)
+{
+ sve_ld1dd_be_host(vd, reg_off + 8, host);
+ sve_ld1dd_be_host(vd, reg_off, host + 8);
+}
+
+static inline void sve_ld1qq_le_host(void *vd, intptr_t reg_off, void *host)
+{
+ sve_ld1dd_le_host(vd, reg_off, host);
+ sve_ld1dd_le_host(vd, reg_off + 8, host + 8);
+}
+
+static inline void
+sve_ld1qq_be_tlb(CPUARMState *env, void *vd, intptr_t reg_off,
+ target_ulong addr, uintptr_t ra)
+{
+ sve_ld1dd_be_tlb(env, vd, reg_off + 8, addr, ra);
+ sve_ld1dd_be_tlb(env, vd, reg_off, addr + 8, ra);
+}
+
+static inline void
+sve_ld1qq_le_tlb(CPUARMState *env, void *vd, intptr_t reg_off,
+ target_ulong addr, uintptr_t ra)
+{
+ sve_ld1dd_le_tlb(env, vd, reg_off, addr, ra);
+ sve_ld1dd_le_tlb(env, vd, reg_off + 8, addr + 8, ra);
+}
+
+static inline void sve_st1qq_be_host(void *vd, intptr_t reg_off, void *host)
+{
+ sve_st1dd_be_host(vd, reg_off + 8, host);
+ sve_st1dd_be_host(vd, reg_off, host + 8);
+}
+
+static inline void sve_st1qq_le_host(void *vd, intptr_t reg_off, void *host)
+{
+ sve_st1dd_le_host(vd, reg_off, host);
+ sve_st1dd_le_host(vd, reg_off + 8, host + 8);
+}
+
+static inline void
+sve_st1qq_be_tlb(CPUARMState *env, void *vd, intptr_t reg_off,
+ target_ulong addr, uintptr_t ra)
+{
+ sve_st1dd_be_tlb(env, vd, reg_off + 8, addr, ra);
+ sve_st1dd_be_tlb(env, vd, reg_off, addr + 8, ra);
+}
+
+static inline void
+sve_st1qq_le_tlb(CPUARMState *env, void *vd, intptr_t reg_off,
+ target_ulong addr, uintptr_t ra)
+{
+ sve_st1dd_le_tlb(env, vd, reg_off, addr, ra);
+ sve_st1dd_le_tlb(env, vd, reg_off + 8, addr + 8, ra);
+}
+
#undef DO_LD_TLB
#undef DO_ST_TLB
#undef DO_LD_HOST
@@ -408,54 +408,22 @@ static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
TLB(env, useronly_clean_ptr(addr), val, ra); \
}
-/*
- * The ARMVectorReg elements are stored in host-endian 64-bit units.
- * For 128-bit quantities, the sequence defined by the Elem[] pseudocode
- * corresponds to storing the two 64-bit pieces in little-endian order.
- */
-#define DO_LDQ(HNAME, VNAME, BE, HOST, TLB) \
-static inline void HNAME##_host(void *za, intptr_t off, void *host) \
-{ \
- uint64_t val0 = HOST(host), val1 = HOST(host + 8); \
- uint64_t *ptr = za + off; \
- ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
-} \
+#define DO_LDQ(HNAME, VNAME) \
static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
{ \
HNAME##_host(za, tile_vslice_offset(off), host); \
} \
-static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
- target_ulong addr, uintptr_t ra) \
-{ \
- uint64_t val0 = TLB(env, useronly_clean_ptr(addr), ra); \
- uint64_t val1 = TLB(env, useronly_clean_ptr(addr + 8), ra); \
- uint64_t *ptr = za + off; \
- ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
-} \
static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
target_ulong addr, uintptr_t ra) \
{ \
HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
}
-#define DO_STQ(HNAME, VNAME, BE, HOST, TLB) \
-static inline void HNAME##_host(void *za, intptr_t off, void *host) \
-{ \
- uint64_t *ptr = za + off; \
- HOST(host, ptr[BE]); \
- HOST(host + 8, ptr[!BE]); \
-} \
+#define DO_STQ(HNAME, VNAME) \
static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
{ \
HNAME##_host(za, tile_vslice_offset(off), host); \
} \
-static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
- target_ulong addr, uintptr_t ra) \
-{ \
- uint64_t *ptr = za + off; \
- TLB(env, useronly_clean_ptr(addr), ptr[BE], ra); \
- TLB(env, useronly_clean_ptr(addr + 8), ptr[!BE], ra); \
-} \
static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
target_ulong addr, uintptr_t ra) \
{ \
@@ -470,8 +438,8 @@ DO_LD(ld1s_le, uint32_t, ldl_le_p, cpu_ldl_le_data_ra)
DO_LD(ld1d_be, uint64_t, ldq_be_p, cpu_ldq_be_data_ra)
DO_LD(ld1d_le, uint64_t, ldq_le_p, cpu_ldq_le_data_ra)
-DO_LDQ(sve_ld1qq_be, sme_ld1q_be, 1, ldq_be_p, cpu_ldq_be_data_ra)
-DO_LDQ(sve_ld1qq_le, sme_ld1q_le, 0, ldq_le_p, cpu_ldq_le_data_ra)
+DO_LDQ(sve_ld1qq_be, sme_ld1q_be)
+DO_LDQ(sve_ld1qq_le, sme_ld1q_le)
DO_ST(st1b, uint8_t, stb_p, cpu_stb_data_ra)
DO_ST(st1h_be, uint16_t, stw_be_p, cpu_stw_be_data_ra)
@@ -481,8 +449,8 @@ DO_ST(st1s_le, uint32_t, stl_le_p, cpu_stl_le_data_ra)
DO_ST(st1d_be, uint64_t, stq_be_p, cpu_stq_be_data_ra)
DO_ST(st1d_le, uint64_t, stq_le_p, cpu_stq_le_data_ra)
-DO_STQ(sve_st1qq_be, sme_st1q_be, 1, stq_be_p, cpu_stq_be_data_ra)
-DO_STQ(sve_st1qq_le, sme_st1q_le, 0, stq_le_p, cpu_stq_le_data_ra)
+DO_STQ(sve_st1qq_be, sme_st1q_be)
+DO_STQ(sve_st1qq_le, sme_st1q_le)
#undef DO_LD
#undef DO_ST
Move from sme_helper.c to the shared header. Add a comment noting the lack of atomicity. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/sve_ldst_internal.h | 63 ++++++++++++++++++++++++++++++ target/arm/tcg/sme_helper.c | 44 +++------------------ 2 files changed, 69 insertions(+), 38 deletions(-)