@@ -606,23 +606,23 @@ DEF_HELPER_FLAGS_5(sve2_sqrdmlah_d, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_udot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sdot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_udot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_usdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sdot_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_udot_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sdot_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_udot_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_usdot_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sdot_idx_b, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_sdot_idx_4b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_udot_idx_b, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_udot_idx_4b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sdot_idx_h, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_sdot_idx_4h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_udot_idx_h, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_udot_idx_4h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sudot_idx_b, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_sudot_idx_4b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_usdot_idx_b, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_usdot_idx_4b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fcaddh, TCG_CALL_NO_RWG,
@@ -6118,9 +6118,9 @@ static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
return true;
}
-TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
-TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
-TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
+TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_4b)
+TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_4b)
+TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_4b)
TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
@@ -6880,12 +6880,12 @@ static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
return true;
}
-TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
-TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
+TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_4b)
+TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_4b)
TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
- gen_helper_gvec_sudot_idx_b)
+ gen_helper_gvec_sudot_idx_4b)
TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
- gen_helper_gvec_usdot_idx_b)
+ gen_helper_gvec_usdot_idx_4b)
TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
gen_helper_gvec_bfdot_idx)
@@ -271,7 +271,7 @@ static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a)
return false;
}
return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
- gen_helper_gvec_sdot_b);
+ gen_helper_gvec_sdot_4b);
}
static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
@@ -280,7 +280,7 @@ static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
return false;
}
return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
- gen_helper_gvec_udot_b);
+ gen_helper_gvec_udot_4b);
}
static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
@@ -289,7 +289,7 @@ static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
return false;
}
return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
- gen_helper_gvec_usdot_b);
+ gen_helper_gvec_usdot_4b);
}
static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
@@ -356,7 +356,7 @@ static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a)
return false;
}
return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
- gen_helper_gvec_sdot_idx_b);
+ gen_helper_gvec_sdot_idx_4b);
}
static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
@@ -365,7 +365,7 @@ static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
return false;
}
return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
- gen_helper_gvec_udot_idx_b);
+ gen_helper_gvec_udot_idx_4b);
}
static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
@@ -374,7 +374,7 @@ static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
return false;
}
return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
- gen_helper_gvec_usdot_idx_b);
+ gen_helper_gvec_usdot_idx_4b);
}
static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
@@ -383,7 +383,7 @@ static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
return false;
}
return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
- gen_helper_gvec_sudot_idx_b);
+ gen_helper_gvec_sudot_idx_4b);
}
static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
@@ -3385,8 +3385,8 @@ DO_ZZI(UMIN, umin)
#undef DO_ZZI
static gen_helper_gvec_4 * const dot_fns[2][2] = {
- { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
- { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
+ { gen_helper_gvec_sdot_4b, gen_helper_gvec_sdot_4h },
+ { gen_helper_gvec_udot_4b, gen_helper_gvec_udot_4h }
};
TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
@@ -3396,18 +3396,18 @@ TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
*/
TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
- gen_helper_gvec_sdot_idx_b, a)
+ gen_helper_gvec_sdot_idx_4b, a)
TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
- gen_helper_gvec_sdot_idx_h, a)
+ gen_helper_gvec_sdot_idx_4h, a)
TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
- gen_helper_gvec_udot_idx_b, a)
+ gen_helper_gvec_udot_idx_4b, a)
TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
- gen_helper_gvec_udot_idx_h, a)
+ gen_helper_gvec_udot_idx_4h, a)
TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
- gen_helper_gvec_sudot_idx_b, a)
+ gen_helper_gvec_sudot_idx_4b, a)
TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
- gen_helper_gvec_usdot_idx_b, a)
+ gen_helper_gvec_usdot_idx_4b, a)
#define DO_SVE2_RRX(NAME, FUNC) \
TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
@@ -7106,7 +7106,7 @@ TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
- a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
+ a->esz == 2 ? gen_helper_gvec_usdot_4b : NULL, a, 0)
TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
gen_helper_crypto_aesmc, a->rd, a->rd, 0)
@@ -825,11 +825,11 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
clear_tail(d, opr_sz, simd_maxsz(desc)); \
}
-DO_DOT(gvec_sdot_b, int32_t, int8_t, int8_t)
-DO_DOT(gvec_udot_b, uint32_t, uint8_t, uint8_t)
-DO_DOT(gvec_usdot_b, uint32_t, uint8_t, int8_t)
-DO_DOT(gvec_sdot_h, int64_t, int16_t, int16_t)
-DO_DOT(gvec_udot_h, uint64_t, uint16_t, uint16_t)
+DO_DOT(gvec_sdot_4b, int32_t, int8_t, int8_t)
+DO_DOT(gvec_udot_4b, uint32_t, uint8_t, uint8_t)
+DO_DOT(gvec_usdot_4b, uint32_t, uint8_t, int8_t)
+DO_DOT(gvec_sdot_4h, int64_t, int16_t, int16_t)
+DO_DOT(gvec_udot_4h, uint64_t, uint16_t, uint16_t)
#define DO_DOT_IDX(NAME, TYPED, TYPEN, TYPEM, HD) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
@@ -865,12 +865,12 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
clear_tail(d, opr_sz, simd_maxsz(desc)); \
}
-DO_DOT_IDX(gvec_sdot_idx_b, int32_t, int8_t, int8_t, H4)
-DO_DOT_IDX(gvec_udot_idx_b, uint32_t, uint8_t, uint8_t, H4)
-DO_DOT_IDX(gvec_sudot_idx_b, int32_t, int8_t, uint8_t, H4)
-DO_DOT_IDX(gvec_usdot_idx_b, int32_t, uint8_t, int8_t, H4)
-DO_DOT_IDX(gvec_sdot_idx_h, int64_t, int16_t, int16_t, H8)
-DO_DOT_IDX(gvec_udot_idx_h, uint64_t, uint16_t, uint16_t, H8)
+DO_DOT_IDX(gvec_sdot_idx_4b, int32_t, int8_t, int8_t, H4)
+DO_DOT_IDX(gvec_udot_idx_4b, uint32_t, uint8_t, uint8_t, H4)
+DO_DOT_IDX(gvec_sudot_idx_4b, int32_t, int8_t, uint8_t, H4)
+DO_DOT_IDX(gvec_usdot_idx_4b, int32_t, uint8_t, int8_t, H4)
+DO_DOT_IDX(gvec_sdot_idx_4h, int64_t, int16_t, int16_t, H8)
+DO_DOT_IDX(gvec_udot_idx_4h, uint64_t, uint16_t, uint16_t, H8)
void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm,
float_status *fpst, uint32_t desc)
Emphasize that these are 4-way dot products. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/helper.h | 22 +++++++++++----------- target/arm/tcg/translate-a64.c | 14 +++++++------- target/arm/tcg/translate-neon.c | 14 +++++++------- target/arm/tcg/translate-sve.c | 18 +++++++++--------- target/arm/tcg/vec_helper.c | 22 +++++++++++----------- 5 files changed, 45 insertions(+), 45 deletions(-)