@@ -3158,6 +3158,11 @@ FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1)
* the same thing as the current security state of the processor!
*/
FIELD(TBFLAG_A32, NS, 10, 1)
+/*
+ * Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not.
+ * This requires an SME trap from AArch32 mode when using NEON.
+ */
+FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1)
/*
* Bit usage when in AArch32 state, for M-profile only.
@@ -3195,6 +3200,8 @@ FIELD(TBFLAG_A64, SMEEXC_EL, 20, 2)
FIELD(TBFLAG_A64, PSTATE_SM, 22, 1)
FIELD(TBFLAG_A64, PSTATE_ZA, 23, 1)
FIELD(TBFLAG_A64, SVL, 24, 4)
+/* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */
+FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1)
/*
* Helpers for using the above.
@@ -102,6 +102,10 @@ typedef struct DisasContext {
bool pstate_sm;
/* True if PSTATE.ZA is set. */
bool pstate_za;
+ /* True if non-streaming insns should raise an SME Streaming exception. */
+ bool sme_trap_nonstreaming;
+ /* True if the current instruction is non-streaming. */
+ bool is_nonstreaming;
/* True if MVE insns are definitely not predicated by VPR or LTPSIZE */
bool mve_no_pred;
/*
new file mode 100644
@@ -0,0 +1,90 @@
+# AArch64 SME allowed instruction decoding
+#
+# Copyright (c) 2022 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+
+# These patterns are taken from Appendix E1.1 of DDI0616 A.a,
+# Arm Architecture Reference Manual Supplement,
+# The Scalable Matrix Extension (SME), for Armv9-A
+
+{
+ [
+ OK 0-00 1110 0000 0001 0010 11-- ---- ---- # SMOV W|Xd,Vn.B[0]
+ OK 0-00 1110 0000 0010 0010 11-- ---- ---- # SMOV W|Xd,Vn.H[0]
+ OK 0100 1110 0000 0100 0010 11-- ---- ---- # SMOV Xd,Vn.S[0]
+ OK 0000 1110 0000 0001 0011 11-- ---- ---- # UMOV Wd,Vn.B[0]
+ OK 0000 1110 0000 0010 0011 11-- ---- ---- # UMOV Wd,Vn.H[0]
+ OK 0000 1110 0000 0100 0011 11-- ---- ---- # UMOV Wd,Vn.S[0]
+ OK 0100 1110 0000 1000 0011 11-- ---- ---- # UMOV Xd,Vn.D[0]
+ ]
+ FAIL 0--0 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD vector operations
+}
+
+{
+ [
+ OK 0101 1110 --1- ---- 11-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar)
+ OK 0101 1110 -10- ---- 00-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar, FP16)
+ OK 01-1 1110 1-10 0001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar)
+ OK 01-1 1110 1111 1001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar, FP16)
+ ]
+ FAIL 01-1 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD single-element operations
+}
+
+FAIL 0-00 110- ---- ---- ---- ---- ---- ---- # Advanced SIMD structure load/store
+FAIL 1100 1110 ---- ---- ---- ---- ---- ---- # Advanced SIMD cryptography extensions
+FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
+
+# These are the "avoidance of doubt" final table of Illegal Advanced SIMD instructions
+# We don't actually need to include these, as the default is OK.
+# -001 111- ---- ---- ---- ---- ---- ---- # Scalar floating-point operations
+# --10 110- ---- ---- ---- ---- ---- ---- # Load/store pair of FP registers
+# --01 1100 ---- ---- ---- ---- ---- ---- # Load FP register (PC-relative literal)
+# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
+# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
+# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
+
+FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR
+FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA
+FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT
+FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS
+FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR
+FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP
+FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result)
+FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA
+FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL
+FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD
+FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA
+FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA
+FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions
+FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar)
+FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm)
+FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector)
+FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm)
+FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector)
+FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector)
+FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector)
+FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar)
+FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm)
+FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar)
+FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm)
+FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch
+FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar)
+FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar)
+FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector)
+FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc)
@@ -6098,6 +6098,32 @@ int sme_exception_el(CPUARMState *env, int el)
return 0;
}
+/* This corresponds to the ARM pseudocode function IsFullA64Enabled(). */
+static bool sme_fa64(CPUARMState *env, int el)
+{
+ if (!cpu_isar_feature(aa64_sme_fa64, env_archcpu(env))) {
+ return false;
+ }
+
+ if (el <= 1 && !el_is_in_host(env, el)) {
+ if (!FIELD_EX64(env->vfp.smcr_el[1], SMCR, FA64)) {
+ return false;
+ }
+ }
+ if (el <= 2 && arm_is_el2_enabled(env)) {
+ if (!FIELD_EX64(env->vfp.smcr_el[2], SMCR, FA64)) {
+ return false;
+ }
+ }
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
+ if (!FIELD_EX64(env->vfp.smcr_el[3], SMCR, FA64)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
/*
* Given that SVE is enabled, return the vector length for EL.
*/
@@ -10801,6 +10827,20 @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
DP_TBFLAG_ANY(flags, PSTATE__IL, 1);
}
+ /*
+ * The SME exception we are testing for is raised via
+ * AArch64.CheckFPAdvSIMDEnabled(), as called from
+ * AArch32.CheckAdvSIMDOrFPEnabled().
+ */
+ if (el == 0
+ && FIELD_EX64(env->svcr, SVCR, SM)
+ && (!arm_is_el2_enabled(env)
+ || (arm_el_is_aa64(env, 2) && !(env->cp15.hcr_el2 & HCR_TGE)))
+ && arm_el_is_aa64(env, 1)
+ && !sme_fa64(env, el)) {
+ DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1);
+ }
+
return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
}
@@ -10850,6 +10890,7 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
}
if (FIELD_EX64(env->svcr, SVCR, SM)) {
DP_TBFLAG_A64(flags, PSTATE_SM, 1);
+ DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el));
}
DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA));
}
@@ -1155,7 +1155,7 @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
* unallocated-encoding checks (otherwise the syndrome information
* for the resulting exception will be incorrect).
*/
-static bool fp_access_check(DisasContext *s)
+static bool fp_access_check_only(DisasContext *s)
{
if (s->fp_excp_el) {
assert(!s->fp_access_checked);
@@ -1170,6 +1170,19 @@ static bool fp_access_check(DisasContext *s)
return true;
}
+static bool fp_access_check(DisasContext *s)
+{
+ if (!fp_access_check_only(s)) {
+ return false;
+ }
+ if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+ syn_smetrap(SME_ET_Streaming, false));
+ return false;
+ }
+ return true;
+}
+
/* Check that SVE access is enabled. If it is, return true.
* If not, emit code to generate an appropriate exception and return false.
*/
@@ -1994,7 +2007,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
default:
g_assert_not_reached();
}
- if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
+ if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
return;
} else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
return;
@@ -14530,6 +14543,23 @@ static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
}
}
+/*
+ * Include the generated SME FA64 decoder.
+ */
+
+#include "decode-sme-fa64.c.inc"
+
+static bool trans_OK(DisasContext *s, arg_OK *a)
+{
+ return true;
+}
+
+static bool trans_FAIL(DisasContext *s, arg_OK *a)
+{
+ s->is_nonstreaming = true;
+ return true;
+}
+
/**
* is_guarded_page:
* @env: The cpu environment
@@ -14657,6 +14687,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
+ dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
dc->vec_len = 0;
dc->vec_stride = 0;
dc->cp_regs = arm_cpu->cp_regs;
@@ -14805,6 +14836,11 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
}
}
+ s->is_nonstreaming = false;
+ if (s->sme_trap_nonstreaming) {
+ disas_sme_fa64(s, insn);
+ }
+
switch (extract32(insn, 25, 4)) {
case 0x0:
if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
@@ -234,6 +234,18 @@ static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
return false;
}
+ /*
+ * Note that rebuild_hflags_a32 has already accounted for being in EL0
+ * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not
+ * appear to be any insns which touch VFP which are allowed.
+ */
+ if (s->sme_trap_nonstreaming) {
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+ syn_smetrap(SME_ET_Streaming,
+ s->base.pc_next - s->pc_curr == 2));
+ return false;
+ }
+
if (!s->vfp_enabled && !ignore_vfp_enabled) {
assert(!arm_dc_feature(s, ARM_FEATURE_M));
unallocated_encoding(s);
@@ -9378,6 +9378,8 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
}
+ dc->sme_trap_nonstreaming =
+ EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
}
dc->cp_regs = cpu->cp_regs;
dc->features = env->features;
@@ -1,6 +1,7 @@
gen = [
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
+ decodetree.process('sme-fa64.decode', extra_args: '--static-decode=disas_sme_fa64'),
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),