@@ -1658,6 +1658,14 @@ DEF_HELPER_FLAGS_4(sve_ld2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld2qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
DEF_HELPER_FLAGS_4(sve_ld1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1bdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1722,6 +1730,14 @@ DEF_HELPER_FLAGS_4(sve_ld2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld2qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
DEF_HELPER_FLAGS_4(sve_ld1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1946,6 +1962,14 @@ DEF_HELPER_FLAGS_4(sve_st2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st2qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
DEF_HELPER_FLAGS_4(sve_st1bh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1bs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1bd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1998,6 +2022,14 @@ DEF_HELPER_FLAGS_4(sve_st2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st2qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
DEF_HELPER_FLAGS_4(sve_st1bh_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1bs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1bd_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -6744,6 +6744,10 @@ DO_LDN_2(2, dd, MO_64)
DO_LDN_2(3, dd, MO_64)
DO_LDN_2(4, dd, MO_64)
+DO_LDN_2(2, qq, MO_128)
+DO_LDN_2(3, qq, MO_128)
+DO_LDN_2(4, qq, MO_128)
+
#undef DO_LDN_1
#undef DO_LDN_2
@@ -7310,6 +7314,10 @@ DO_STN_2(4, dd, MO_64, MO_64)
DO_STN_2(1, sq, MO_128, MO_32)
DO_STN_2(1, dq, MO_128, MO_64)
+DO_STN_2(2, qq, MO_128, MO_128)
+DO_STN_2(3, qq, MO_128, MO_128)
+DO_STN_2(4, qq, MO_128, MO_128)
+
#undef DO_STN_1
#undef DO_STN_2
@@ -89,7 +89,7 @@ static inline int expand_imm_sh8u(DisasContext *s, int x)
*/
static inline int msz_dtype(DisasContext *s, int msz)
{
- static const uint8_t dtype[4] = { 0, 5, 10, 15 };
+ static const uint8_t dtype[5] = { 0, 5, 10, 15, 18 };
return dtype[msz];
}
@@ -4775,23 +4775,25 @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a)
*/
/* The memory mode of the dtype. */
-static const MemOp dtype_mop[18] = {
+static const MemOp dtype_mop[19] = {
MO_UB, MO_UB, MO_UB, MO_UB,
MO_SL, MO_UW, MO_UW, MO_UW,
MO_SW, MO_SW, MO_UL, MO_UL,
MO_SB, MO_SB, MO_SB, MO_UQ,
- MO_UL, MO_UQ,
+ /* Artificial values used by decode */
+ MO_UL, MO_UQ, MO_128
};
#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
/* The vector element size of dtype. */
-static const uint8_t dtype_esz[18] = {
+static const uint8_t dtype_esz[19] = {
0, 1, 2, 3,
3, 1, 2, 3,
3, 2, 2, 3,
3, 2, 1, 3,
- 4, 4,
+ /* Artificial values used by decode */
+ 4, 4, 4,
};
uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs,
@@ -4842,7 +4844,7 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
}
/* Indexed by [mte][be][dtype][nreg] */
-static gen_helper_gvec_mem * const ldr_fns[2][2][18][4] = {
+static gen_helper_gvec_mem * const ldr_fns[2][2][19][4] = {
{ /* mte inactive, little-endian */
{ { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
@@ -4870,6 +4872,8 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][18][4] = {
{ gen_helper_sve_ld1squ_le_r, NULL, NULL, NULL },
{ gen_helper_sve_ld1dqu_le_r, NULL, NULL, NULL },
+ { NULL, gen_helper_sve_ld2qq_le_r,
+ gen_helper_sve_ld3qq_le_r, gen_helper_sve_ld4qq_le_r },
},
/* mte inactive, big-endian */
@@ -4899,6 +4903,8 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][18][4] = {
{ gen_helper_sve_ld1squ_be_r, NULL, NULL, NULL },
{ gen_helper_sve_ld1dqu_be_r, NULL, NULL, NULL },
+ { NULL, gen_helper_sve_ld2qq_be_r,
+ gen_helper_sve_ld3qq_be_r, gen_helper_sve_ld4qq_be_r },
},
},
@@ -4937,6 +4943,10 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][18][4] = {
{ gen_helper_sve_ld1squ_le_r_mte, NULL, NULL, NULL },
{ gen_helper_sve_ld1dqu_le_r_mte, NULL, NULL, NULL },
+ { NULL,
+ gen_helper_sve_ld2qq_le_r_mte,
+ gen_helper_sve_ld3qq_le_r_mte,
+ gen_helper_sve_ld4qq_le_r_mte },
},
/* mte active, big-endian */
@@ -4974,6 +4984,10 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][18][4] = {
{ gen_helper_sve_ld1squ_be_r_mte, NULL, NULL, NULL },
{ gen_helper_sve_ld1dqu_be_r_mte, NULL, NULL, NULL },
+ { NULL,
+ gen_helper_sve_ld2qq_be_r_mte,
+ gen_helper_sve_ld3qq_be_r_mte,
+ gen_helper_sve_ld4qq_be_r_mte },
},
},
};
@@ -4998,16 +5012,26 @@ static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
return false;
}
- /* dtypes 16 and 17 are artificial, representing 128-bit element */
- if (a->dtype < 16) {
+ /* dtypes 16-18 are artificial, representing 128-bit element */
+ switch (a->dtype) {
+ case 0 ... 15:
if (!dc_isar_feature(aa64_sve, s)) {
return false;
}
- } else {
+ break;
+ case 16: case 17:
if (!dc_isar_feature(aa64_sve2p1, s)) {
return false;
}
s->is_nonstreaming = true;
+ break;
+ case 18:
+ if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
+ return false;
+ }
+ break;
+ default:
+ g_assert_not_reached();
}
if (sve_access_check(s)) {
@@ -5021,16 +5045,26 @@ static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
{
- /* dtypes 16 and 17 are artificial, representing 128-bit element */
- if (a->dtype < 16) {
+ /* dtypes 16-18 are artificial, representing 128-bit element */
+ switch (a->dtype) {
+ case 0 ... 15:
if (!dc_isar_feature(aa64_sve, s)) {
return false;
}
- } else {
+ break;
+ case 16: case 17:
if (!dc_isar_feature(aa64_sve2p1, s)) {
return false;
}
s->is_nonstreaming = true;
+ break;
+ case 18:
+ if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
+ return false;
+ }
+ break;
+ default:
+ g_assert_not_reached();
}
if (sve_access_check(s)) {
@@ -5542,55 +5576,67 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
gen_helper_sve_st1dd_be_r_mte,
gen_helper_sve_st1dq_be_r_mte } } },
};
- static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
+ static gen_helper_gvec_mem * const fn_multiple[2][2][3][5] = {
{ { { gen_helper_sve_st2bb_r,
gen_helper_sve_st2hh_le_r,
gen_helper_sve_st2ss_le_r,
- gen_helper_sve_st2dd_le_r },
+ gen_helper_sve_st2dd_le_r,
+ gen_helper_sve_st2qq_le_r },
{ gen_helper_sve_st3bb_r,
gen_helper_sve_st3hh_le_r,
gen_helper_sve_st3ss_le_r,
- gen_helper_sve_st3dd_le_r },
+ gen_helper_sve_st3dd_le_r,
+ gen_helper_sve_st3qq_le_r },
{ gen_helper_sve_st4bb_r,
gen_helper_sve_st4hh_le_r,
gen_helper_sve_st4ss_le_r,
- gen_helper_sve_st4dd_le_r } },
+ gen_helper_sve_st4dd_le_r,
+ gen_helper_sve_st4qq_le_r } },
{ { gen_helper_sve_st2bb_r,
gen_helper_sve_st2hh_be_r,
gen_helper_sve_st2ss_be_r,
- gen_helper_sve_st2dd_be_r },
+ gen_helper_sve_st2dd_be_r,
+ gen_helper_sve_st2qq_be_r },
{ gen_helper_sve_st3bb_r,
gen_helper_sve_st3hh_be_r,
gen_helper_sve_st3ss_be_r,
- gen_helper_sve_st3dd_be_r },
+ gen_helper_sve_st3dd_be_r,
+ gen_helper_sve_st3qq_be_r },
{ gen_helper_sve_st4bb_r,
gen_helper_sve_st4hh_be_r,
gen_helper_sve_st4ss_be_r,
- gen_helper_sve_st4dd_be_r } } },
+ gen_helper_sve_st4dd_be_r,
+ gen_helper_sve_st4qq_be_r } } },
{ { { gen_helper_sve_st2bb_r_mte,
gen_helper_sve_st2hh_le_r_mte,
gen_helper_sve_st2ss_le_r_mte,
- gen_helper_sve_st2dd_le_r_mte },
+ gen_helper_sve_st2dd_le_r_mte,
+ gen_helper_sve_st2qq_le_r_mte },
{ gen_helper_sve_st3bb_r_mte,
gen_helper_sve_st3hh_le_r_mte,
gen_helper_sve_st3ss_le_r_mte,
- gen_helper_sve_st3dd_le_r_mte },
+ gen_helper_sve_st3dd_le_r_mte,
+ gen_helper_sve_st3qq_le_r_mte },
{ gen_helper_sve_st4bb_r_mte,
gen_helper_sve_st4hh_le_r_mte,
gen_helper_sve_st4ss_le_r_mte,
- gen_helper_sve_st4dd_le_r_mte } },
+ gen_helper_sve_st4dd_le_r_mte,
+ gen_helper_sve_st4qq_le_r_mte } },
{ { gen_helper_sve_st2bb_r_mte,
gen_helper_sve_st2hh_be_r_mte,
gen_helper_sve_st2ss_be_r_mte,
- gen_helper_sve_st2dd_be_r_mte },
+ gen_helper_sve_st2dd_be_r_mte,
+ gen_helper_sve_st2qq_be_r_mte },
{ gen_helper_sve_st3bb_r_mte,
gen_helper_sve_st3hh_be_r_mte,
gen_helper_sve_st3ss_be_r_mte,
- gen_helper_sve_st3dd_be_r_mte },
+ gen_helper_sve_st3dd_be_r_mte,
+ gen_helper_sve_st3qq_be_r_mte },
{ gen_helper_sve_st4bb_r_mte,
gen_helper_sve_st4hh_be_r_mte,
gen_helper_sve_st4ss_be_r_mte,
- gen_helper_sve_st4dd_be_r_mte } } },
+ gen_helper_sve_st4dd_be_r_mte,
+ gen_helper_sve_st4qq_be_r_mte } } },
};
gen_helper_gvec_mem *fn;
int be = s->be_data == MO_BE;
@@ -5619,12 +5665,17 @@ static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
}
break;
case MO_128:
- assert(a->msz < a->esz);
- assert(a->nreg == 0);
- if (!dc_isar_feature(aa64_sve2p1, s)) {
- return false;
+ if (a->nreg == 0) {
+ assert(a->msz < a->esz);
+ if (!dc_isar_feature(aa64_sve2p1, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ } else {
+ if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
+ return false;
+ }
}
- s->is_nonstreaming = true;
break;
default:
g_assert_not_reached();
@@ -5651,12 +5702,17 @@ static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
}
break;
case MO_128:
- assert(a->msz < a->esz);
- assert(a->nreg == 0);
- if (!dc_isar_feature(aa64_sve2p1, s)) {
- return false;
+ if (a->nreg == 0) {
+ assert(a->msz < a->esz);
+ if (!dc_isar_feature(aa64_sve2p1, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ } else {
+ if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
+ return false;
+ }
}
- s->is_nonstreaming = true;
break;
default:
g_assert_not_reached();
@@ -229,6 +229,9 @@
@rprr_load_dt ....... dtype:4 rm:5 ... pg:3 rn:5 rd:5 &rprr_load
@rpri_load_dt ....... dtype:4 . imm:s4 ... pg:3 rn:5 rd:5 &rpri_load
+@rprr_load ....... .... rm:5 ... pg:3 rn:5 rd:5 &rprr_load
+@rpri_load ....... .... . imm:s4 ... pg:3 rn:5 rd:5 &rpri_load
+
@rprr_load_msz ....... .... rm:5 ... pg:3 rn:5 rd:5 \
&rprr_load dtype=%msz_dtype
@rpri_load_msz ....... .... . imm:s4 ... pg:3 rn:5 rd:5 \
@@ -1267,12 +1270,26 @@ LDNF1_zpri 1010010 .... 1.... 101 ... ..... ..... @rpri_load_dt nreg=0
# SVE load multiple structures (scalar plus scalar)
# LD2B, LD2H, LD2W, LD2D; etc.
LD_zprr 1010010 .. nreg:2 ..... 110 ... ..... ..... @rprr_load_msz
+# LD[234]Q
+LD_zprr 1010010 01 01 ..... 100 ... ..... ..... \
+ @rprr_load dtype=18 nreg=1
+LD_zprr 1010010 10 01 ..... 100 ... ..... ..... \
+ @rprr_load dtype=18 nreg=2
+LD_zprr 1010010 11 01 ..... 100 ... ..... ..... \
+ @rprr_load dtype=18 nreg=3
# SVE contiguous non-temporal load (scalar plus immediate)
# LDNT1B, LDNT1H, LDNT1W, LDNT1D
# SVE load multiple structures (scalar plus immediate)
# LD2B, LD2H, LD2W, LD2D; etc.
LD_zpri 1010010 .. nreg:2 0.... 111 ... ..... ..... @rpri_load_msz
+# LD[234]Q
+LD_zpri 1010010 01 001 .... 111 ... ..... ..... \
+ @rpri_load dtype=18 nreg=1
+LD_zpri 1010010 10 001 .... 111 ... ..... ..... \
+ @rpri_load dtype=18 nreg=2
+LD_zpri 1010010 11 001 .... 111 ... ..... ..... \
+ @rpri_load dtype=18 nreg=3
# SVE load and broadcast quadword (scalar plus scalar)
LD1RQ_zprr 1010010 .. 00 ..... 000 ... ..... ..... \
@@ -1383,11 +1400,25 @@ ST_zprr 1110010 11 10 ..... 010 ... ..... ..... \
# SVE store multiple structures (scalar plus immediate) (nreg != 0)
ST_zpri 1110010 .. nreg:2 1.... 111 ... ..... ..... \
@rpri_store msz=%size_23 esz=%size_23
+# ST[234]Q
+ST_zpri 11100100 01 00 .... 000 ... ..... ..... \
+ @rpri_store msz=4 esz=4 nreg=1
+ST_zpri 11100100 10 00 .... 000 ... ..... ..... \
+ @rpri_store msz=4 esz=4 nreg=2
+ST_zpri 11100100 11 00 .... 000 ... ..... ..... \
+ @rpri_store msz=4 esz=4 nreg=3
# SVE contiguous non-temporal store (scalar plus scalar) (nreg == 0)
# SVE store multiple structures (scalar plus scalar) (nreg != 0)
ST_zprr 1110010 .. nreg:2 ..... 011 ... ..... ..... \
@rprr_store msz=%size_23 esz=%size_23
+# ST[234]Q
+ST_zprr 11100100 01 1 ..... 000 ... ..... ..... \
+ @rprr_store msz=4 esz=4 nreg=1
+ST_zprr 11100100 10 1 ..... 000 ... ..... ..... \
+ @rprr_store msz=4 esz=4 nreg=2
+ST_zprr 11100100 11 1 ..... 000 ... ..... ..... \
+ @rprr_store msz=4 esz=4 nreg=3
# SVE 32-bit scatter store (scalar plus 32-bit scaled offsets)
# Require msz > 0 && msz <= esz.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/helper-sve.h | 32 +++++++++ target/arm/tcg/sve_helper.c | 8 +++ target/arm/tcg/translate-sve.c | 126 ++++++++++++++++++++++++--------- target/arm/tcg/sve.decode | 31 ++++++++ 4 files changed, 162 insertions(+), 35 deletions(-)