diff mbox series

[4/4] tests/tcg/aarch64: Extend MTE gdbstub tests to system mode

Message ID 20240722160709.1677430-5-gustavo.romero@linaro.org
State New
Headers show
Series gdbstub: Add support for MTE in system mode | expand

Commit Message

Gustavo Romero July 22, 2024, 4:07 p.m. UTC
Extend MTE gdbstub tests to also run in system mode (share tests
between user mode and system mode). The tests will only run if a version
of GDB that supports MTE is available in the test environment and if
available compiler supports the 'memtag' flag (-march=armv8.5-a+memtag).

For the tests running in system mode, a page that supports MTE
operations is necessary. Therefore, an MTE-enabled page is made
available (mapped) in the third 2 MB chunk of the second 1 GB space in
the flat mapping set in boot.S. A new binary is also introduced (mte.c)
for the test. It's linked against boot.S and run by QEMU in system mode.

Also, in boot.S bits ATA[43] and TCF[40] are set in SCTLR_EL1 to enable
access to allocation tags at EL1 and enable MTE_SYNC exceptions in case
of Tag Check Faults, and bit TBI0[37] is set in TCR_EL1 so the top byte
of the addresses are ignored in the translation and used for tagged
addresses.

Signed-off-by: Gustavo Romero <gustavo.romero@linaro.org>
---
 tests/tcg/aarch64/Makefile.softmmu-target | 36 +++++++++++++++++--
 tests/tcg/aarch64/Makefile.target         |  3 +-
 tests/tcg/aarch64/gdbstub/test-mte.py     | 44 +++++++++++++++--------
 tests/tcg/aarch64/system/boot.S           | 26 ++++++++++++--
 tests/tcg/aarch64/system/kernel.ld        |  5 +++
 tests/tcg/aarch64/system/mte.c            | 40 +++++++++++++++++++++
 6 files changed, 133 insertions(+), 21 deletions(-)
 create mode 100644 tests/tcg/aarch64/system/mte.c

Comments

Richard Henderson July 25, 2024, 11:17 p.m. UTC | #1
On 7/23/24 02:07, Gustavo Romero wrote:
>   def run_test():
> -    gdb.execute("break 95", False, True)
> +    if mode == "system":
> +        # Break address: where to break before performing the tests
> +        # Addresss is the last insn. before 'main' returns. See mte.c
> +        ba = "*main+52"

Ugly.  You can add labels in your inline asm block instead.

> diff --git a/tests/tcg/aarch64/system/boot.S b/tests/tcg/aarch64/system/boot.S
> index 501685d0ec..a12393d00b 100644
> --- a/tests/tcg/aarch64/system/boot.S
> +++ b/tests/tcg/aarch64/system/boot.S
> @@ -135,11 +135,22 @@ __start:
>   	orr	x1, x1, x3
>   	str	x1, [x2]			/* 2nd 2mb (.data & .bss)*/
>   
> +        /* Third block: .mte_page */
> +	adrp	x1, .mte_page
> +	add	x1, x1, :lo12:.mte_page
> +	bic	x1, x1, #(1 << 21) - 1
> +	and 	x4, x1, x5
> +	add	x2, x0, x4, lsr #(21 - 3)
> +	ldr	x3, =(3 << 53) | 0x401 | 1 << 2	/* attr(AF, NX, block, AttrIndx=Attr1) */
> +	orr	x1, x1, x3
> +	str	x1, [x2]
> +
>   	/* Setup/enable the MMU.  */
>   
>   	/*
>   	 * TCR_EL1 - Translation Control Registers
>   	 *
> +	 * TBI0[37] = 0b1 => Top Byte ignored and used for tagged addresses
>   	 * IPS[34:32] = 40-bit PA, 1TB
>   	 * TG0[14:15] = b00 => 4kb granuale
>   	 * ORGN0[11:10] = Outer: Normal, WB Read-Alloc No Write-Alloc Cacheable
> @@ -152,16 +163,22 @@ __start:
>   	 * with at least 1gb range to see RAM. So we start with a
>   	 * level 1 lookup.
>   	 */
> -	ldr	x0, = (2 << 32) | 25 | (3 << 10) | (3 << 8)
> +	ldr	x0, = (1 << 37) | (2 << 32) | 25 | (3 << 10) | (3 << 8)
>   	msr	tcr_el1, x0
>   
> -	mov	x0, #0xee			/* Inner/outer cacheable WB */
> +	/*
> +	 * Attr0: Normal, Inner/outer cacheable WB
> +	 * Attr1: Tagged Normal (MTE)
> +	 */
> +	mov	x0, #0xf0ee

Up to here, I think we're fine, no matter the emulated cpu model.

>   	msr	mair_el1, x0
>   	isb
>   
>   	/*
>   	 * SCTLR_EL1 - System Control Register
>   	 *
> +	 * ATA[43] = 1 = enable access to allocation tags at EL1
> +	 * TCF[40] = 1 = Tag Check Faults cause a synchronous exception
>   	 * WXN[19] = 0 = no effect, Write does not imply XN (execute never)
>   	 * I[12] = Instruction cachability control
>   	 * SA[3] = SP alignment check
> @@ -169,7 +186,8 @@ __start:
>   	 * M[0] = 1, enable stage 1 address translation for EL0/1
>   	 */
>   	mrs	x0, sctlr_el1
> -	ldr	x1, =0x100d			/* bits I(12) SA(3) C(2) M(0) */
> +	/* Bits set: ATA(43) TCF(40) I(12) SA(3) C(2) M(0) */
> +	ldr	x1, =(0x100d | 1 << 43 | 1 << 40)

But here, it's only legal to run this modified boot.S on -cpu max.
We should check for MTE enabled before setting those, or
set them elsewhere, e.g. in main of the specific MTE test.

> @@ -239,3 +257,5 @@ ttb_stage2:
>   stack:
>   	.space 65536, 0
>   stack_end:
> +
> +	.section .mte_page

Why?

> diff --git a/tests/tcg/aarch64/system/kernel.ld b/tests/tcg/aarch64/system/kernel.ld
> index 7b3a76dcbf..7c00c1c378 100644
> --- a/tests/tcg/aarch64/system/kernel.ld
> +++ b/tests/tcg/aarch64/system/kernel.ld
> @@ -18,6 +18,11 @@ SECTIONS
>       .bss : {
>           *(.bss)
>       }
> +    /* align MTE section to next (third) 2mb */
> +    . = ALIGN(1 << 22);
> +    .mte : {
> +        *(.mte_page)
> +    }

Why?

>       /DISCARD/ : {
>           *(.ARM.attributes)
>       }
> diff --git a/tests/tcg/aarch64/system/mte.c b/tests/tcg/aarch64/system/mte.c
> new file mode 100644
> index 0000000000..58a5ac31ff
> --- /dev/null
> +++ b/tests/tcg/aarch64/system/mte.c
> @@ -0,0 +1,40 @@
> +#include <inttypes.h>
> +
> +int main(void)
> +{
> +    uint8_t *addr;
> +
> +    /*
> +     * Third 2MB chunk in the second 1GB block.
> +     * See .mte_page section in kernel.ld.
> +     */
> +    addr = (void *)((1UL << 30) | (1UL << 22));

... because you're not using .mte_page here, just computing it.

> +
> +    asm (
> +        /*
> +         * Set GCR for randon tag generation. 0xA5 is just a random value to set

random

> +         * GCR != 0 so the tag generated by 'irg' is not zero.
> +         */
> +        "ldr x1, =0xA5;"
> +        "msr gcr_el1, x1;"

I think it might be easier to split the asm:

   asm volatile("msr gcr_el1, %0" : : "r"(0xA5));

> +
> +         /* Generate a logical tag and put it in 'addr' pointer. */
> +         "irg %[addr], %[addr];"

   asm("irg %0,%0" : "+r"(addr));

> +
> +         /*
> +          * Store the generated tag to memory region pointed to by 'addr', i.e.
> +          * set the allocation tag for the memory region.
> +          */
> +         "stg %[addr], [%[addr]];"

Storing addr into addr is a titch confusing, clearer with zero?

   asm("stg xzr,[%0]" : : "r"(addr));

> +
> +         /*
> +          * Store a random value (0xdeadbeef) to *addr. This must not cause any
> +          * Tag Check Fault since logical and allocation tags are set the same.
> +          */
> +	 "ldr x1, =0xdeadbeef;"
> +	 "str x1, [x0];"

Where does x0 come from?  Certainly not "addr"...
Can you use "addr" directly in the gdb script?


r~
Gustavo Romero Aug. 8, 2024, 5:08 a.m. UTC | #2
Hi Richard,

On 7/25/24 8:17 PM, Richard Henderson wrote:
> On 7/23/24 02:07, Gustavo Romero wrote:
>>   def run_test():
>> -    gdb.execute("break 95", False, True)
>> +    if mode == "system":
>> +        # Break address: where to break before performing the tests
>> +        # Addresss is the last insn. before 'main' returns. See mte.c
>> +        ba = "*main+52"
> 
> Ugly.  You can add labels in your inline asm block instead.

I forgot about that possibility. Thanks. This motivated me to
convert mte.c to mte.S. I think there is no benefit in keeping
the test in C with a bunch of inline asm blocks. Please see v2.


>> diff --git a/tests/tcg/aarch64/system/boot.S b/tests/tcg/aarch64/system/boot.S
>> index 501685d0ec..a12393d00b 100644
>> --- a/tests/tcg/aarch64/system/boot.S
>> +++ b/tests/tcg/aarch64/system/boot.S
>> @@ -135,11 +135,22 @@ __start:
>>       orr    x1, x1, x3
>>       str    x1, [x2]            /* 2nd 2mb (.data & .bss)*/
>> +        /* Third block: .mte_page */
>> +    adrp    x1, .mte_page
>> +    add    x1, x1, :lo12:.mte_page
>> +    bic    x1, x1, #(1 << 21) - 1
>> +    and     x4, x1, x5
>> +    add    x2, x0, x4, lsr #(21 - 3)
>> +    ldr    x3, =(3 << 53) | 0x401 | 1 << 2    /* attr(AF, NX, block, AttrIndx=Attr1) */
>> +    orr    x1, x1, x3
>> +    str    x1, [x2]
>> +
>>       /* Setup/enable the MMU.  */
>>       /*
>>        * TCR_EL1 - Translation Control Registers
>>        *
>> +     * TBI0[37] = 0b1 => Top Byte ignored and used for tagged addresses
>>        * IPS[34:32] = 40-bit PA, 1TB
>>        * TG0[14:15] = b00 => 4kb granuale
>>        * ORGN0[11:10] = Outer: Normal, WB Read-Alloc No Write-Alloc Cacheable
>> @@ -152,16 +163,22 @@ __start:
>>        * with at least 1gb range to see RAM. So we start with a
>>        * level 1 lookup.
>>        */
>> -    ldr    x0, = (2 << 32) | 25 | (3 << 10) | (3 << 8)
>> +    ldr    x0, = (1 << 37) | (2 << 32) | 25 | (3 << 10) | (3 << 8)
>>       msr    tcr_el1, x0
>> -    mov    x0, #0xee            /* Inner/outer cacheable WB */
>> +    /*
>> +     * Attr0: Normal, Inner/outer cacheable WB
>> +     * Attr1: Tagged Normal (MTE)
>> +     */
>> +    mov    x0, #0xf0ee
> 
> Up to here, I think we're fine, no matter the emulated cpu model.
> 
>>       msr    mair_el1, x0
>>       isb
>>       /*
>>        * SCTLR_EL1 - System Control Register
>>        *
>> +     * ATA[43] = 1 = enable access to allocation tags at EL1
>> +     * TCF[40] = 1 = Tag Check Faults cause a synchronous exception
>>        * WXN[19] = 0 = no effect, Write does not imply XN (execute never)
>>        * I[12] = Instruction cachability control
>>        * SA[3] = SP alignment check
>> @@ -169,7 +186,8 @@ __start:
>>        * M[0] = 1, enable stage 1 address translation for EL0/1
>>        */
>>       mrs    x0, sctlr_el1
>> -    ldr    x1, =0x100d            /* bits I(12) SA(3) C(2) M(0) */
>> +    /* Bits set: ATA(43) TCF(40) I(12) SA(3) C(2) M(0) */
>> +    ldr    x1, =(0x100d | 1 << 43 | 1 << 40)
> 
> But here, it's only legal to run this modified boot.S on -cpu max.
> We should check for MTE enabled before setting those, or
> set them elsewhere, e.g. in main of the specific MTE test.

Right. I move all these bits for MTE to mte.S, under main function,
and just left the page table setting in boot.S, since we need to
set it before the MMU is turned on.


>> @@ -239,3 +257,5 @@ ttb_stage2:
>>   stack:
>>       .space 65536, 0
>>   stack_end:
>> +
>> +    .section .mte_page
> 
> Why?
> 
>> diff --git a/tests/tcg/aarch64/system/kernel.ld b/tests/tcg/aarch64/system/kernel.ld
>> index 7b3a76dcbf..7c00c1c378 100644
>> --- a/tests/tcg/aarch64/system/kernel.ld
>> +++ b/tests/tcg/aarch64/system/kernel.ld
>> @@ -18,6 +18,11 @@ SECTIONS
>>       .bss : {
>>           *(.bss)
>>       }
>> +    /* align MTE section to next (third) 2mb */
>> +    . = ALIGN(1 << 22);
>> +    .mte : {
>> +        *(.mte_page)
>> +    }
> 
> Why?
> 
>>       /DISCARD/ : {
>>           *(.ARM.attributes)
>>       }
>> diff --git a/tests/tcg/aarch64/system/mte.c b/tests/tcg/aarch64/system/mte.c
>> new file mode 100644
>> index 0000000000..58a5ac31ff
>> --- /dev/null
>> +++ b/tests/tcg/aarch64/system/mte.c
>> @@ -0,0 +1,40 @@
>> +#include <inttypes.h>
>> +
>> +int main(void)
>> +{
>> +    uint8_t *addr;
>> +
>> +    /*
>> +     * Third 2MB chunk in the second 1GB block.
>> +     * See .mte_page section in kernel.ld.
>> +     */
>> +    addr = (void *)((1UL << 30) | (1UL << 22));
> 
> ... because you're not using .mte_page here, just computing it.

.mte_page is used in boot.S for setting the page table entry, in:

         /* Third block: .mte_page */
         adrp    x1, .mte_page
         add     x1, x1, :lo12:.mte_page
	[...]

That's why it's being computed in kernel.ld. That said, you're
right that it's better to also tie 'addr' to it instead of having
a hard-coded value.

I'm using the .mte_page address in the new mte.S test in v2, so
now the value for 'addr' used in the test is tied to that computed
value.


>> +
>> +    asm (
>> +        /*
>> +         * Set GCR for randon tag generation. 0xA5 is just a random value to set
> 
> random
> 
>> +         * GCR != 0 so the tag generated by 'irg' is not zero.
>> +         */
>> +        "ldr x1, =0xA5;"
>> +        "msr gcr_el1, x1;"
> 
> I think it might be easier to split the asm:
> 
>    asm volatile("msr gcr_el1, %0" : : "r"(0xA5));

hmm, all the tests are built with optimizations disabled,
so no need to use volatile? Anyways, in v2 I'm using mov
instead of ldr, and since it's .S, no more inline asm.


>> +
>> +         /* Generate a logical tag and put it in 'addr' pointer. */
>> +         "irg %[addr], %[addr];"
> 
>    asm("irg %0,%0" : "+r"(addr));
> 
>> +
>> +         /*
>> +          * Store the generated tag to memory region pointed to by 'addr', i.e.
>> +          * set the allocation tag for the memory region.
>> +          */
>> +         "stg %[addr], [%[addr]];"
> 
> Storing addr into addr is a titch confusing, clearer with zero?
> 
>    asm("stg xzr,[%0]" : : "r"(addr))

This an invalid insn. xzr reg. can't be used as operand 1 in 'stg',
because register 31 is encoded as sp, not xzr.

However, I don't want to zero the allocation tag. Actually, it's
the opposite, I want it != 0, that's why I set gcr_el1 != 0: to
get a non-zero tag from 'irg'.

I agree it's a tad confusing, but the thing is, 'stg' expects a
tagged address in the source operand, not a "pure" tag, hence the
use of 'addr' in operand 1. But I think it could be:

stg tagged_addr, [addr]

In v2 I put a comment that stg actually extracts the tag to be
stored in [addr] from the address in 'tagged_addr'


>> +
>> +         /*
>> +          * Store a random value (0xdeadbeef) to *addr. This must not cause any
>> +          * Tag Check Fault since logical and allocation tags are set the same.
>> +          */
>> +     "ldr x1, =0xdeadbeef;"
>> +     "str x1, [x0];"
> 
> Where does x0 come from?  Certainly not "addr"...
> Can you use "addr" directly in the gdb script?

Yep, it should be 'addr' instead of x0 here. It's a leftover.
Fixed in v2. Thanks.


Cheers,
Gustavo
diff mbox series

Patch

diff --git a/tests/tcg/aarch64/Makefile.softmmu-target b/tests/tcg/aarch64/Makefile.softmmu-target
index dd6d595830..225a073e79 100644
--- a/tests/tcg/aarch64/Makefile.softmmu-target
+++ b/tests/tcg/aarch64/Makefile.softmmu-target
@@ -2,7 +2,9 @@ 
 # Aarch64 system tests
 #
 
-AARCH64_SYSTEM_SRC=$(SRC_PATH)/tests/tcg/aarch64/system
+AARCH64_SRC=$(SRC_PATH)/tests/tcg/aarch64
+AARCH64_SYSTEM_SRC=$(AARCH64_SRC)/system
+
 VPATH+=$(AARCH64_SYSTEM_SRC)
 
 # These objects provide the basic boot code and helper functions for all tests
@@ -21,7 +23,8 @@  LDFLAGS+=-static -nostdlib $(CRT_OBJS) $(MINILIB_OBJS) -lgcc
 
 config-cc.mak: Makefile
 	$(quiet-@)( \
-	    $(call cc-option,-march=armv8.3-a, CROSS_CC_HAS_ARMV8_3)) 3> config-cc.mak
+	    $(call cc-option,-march=armv8.3-a, CROSS_CC_HAS_ARMV8_3); \
+	    $(call cc-option,-march=armv8.5-a+memtag, CROSS_CC_HAS_ARMV8_MTE)) 3> config-cc.mak
 -include config-cc.mak
 
 # building head blobs
@@ -88,3 +91,32 @@  pauth-3:
 run-pauth-3:
 	$(call skip-test, "RUN of pauth-3", "not built")
 endif
+
+ifneq ($(CROSS_CC_HAS_ARMV8_MTE),)
+QEMU_MTE_ENABLED_MACHINE=-M virt,mte=on -cpu max -display none
+QEMU_OPTS_WITH_MTE_ON = $(QEMU_MTE_ENABLED_MACHINE) $(QEMU_BASE_ARGS) -kernel
+mte: CFLAGS+=-march=armv8.5-a+memtag
+
+run-mte: QEMU_OPTS=$(QEMU_OPTS_WITH_MTE_ON)
+run-mte: mte
+
+ifeq ($(GDB_HAS_MTE),y)
+run-gdbstub-mte: QEMU_OPTS=$(QEMU_OPTS_WITH_MTE_ON)
+run-gdbstub-mte: mte
+	$(call run-test, $@, $(GDB_SCRIPT) \
+		--gdb $(GDB) \
+		--qemu $(QEMU) --qargs "-chardev file$(COMMA)path=$<.out$(COMMA)id=output $(QEMU_OPTS)" \
+		--bin $< --test $(AARCH64_SRC)/gdbstub/test-mte.py --test-args mode=\"system\", \
+	gdbstub MTE support)
+
+EXTRA_RUNS += run-gdbstub-mte
+else # !GDB_HAS_MTE
+run-gdbstub-mte:
+	$(call skip-test "RUN of gdbstub-mte", "GDB without MTE support")
+endif
+else # !CROSS_CC_HAS_ARMV8_MTE
+mte:
+	$(call skip-test, "BUILD of $@", "missing compiler support")
+run-mte:
+	$(call skip-test, "RUN of mte", "not build")
+endif
diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target
index 8cc62eb456..2504517176 100644
--- a/tests/tcg/aarch64/Makefile.target
+++ b/tests/tcg/aarch64/Makefile.target
@@ -138,7 +138,8 @@  run-gdbstub-mte: mte-8
 	$(call run-test, $@, $(GDB_SCRIPT) \
 		--gdb $(GDB) \
 		--qemu $(QEMU) --qargs "$(QEMU_OPTS)" \
-		--bin $< --test $(AARCH64_SRC)/gdbstub/test-mte.py, \
+		--bin $< --test $(AARCH64_SRC)/gdbstub/test-mte.py \
+		--test-args mode=\"user\", \
 	gdbstub MTE support)
 
 EXTRA_RUNS += run-gdbstub-mte
diff --git a/tests/tcg/aarch64/gdbstub/test-mte.py b/tests/tcg/aarch64/gdbstub/test-mte.py
index 2db0663c1a..727999f277 100644
--- a/tests/tcg/aarch64/gdbstub/test-mte.py
+++ b/tests/tcg/aarch64/gdbstub/test-mte.py
@@ -1,13 +1,13 @@ 
 from __future__ import print_function
 #
 # Test GDB memory-tag commands that exercise the stubs for the qIsAddressTagged,
-# qMemTag, and QMemTag packets. Logical tag-only commands rely on local
-# operations, hence don't exercise any stub.
+# qMemTag, and QMemTag packets, which are used for manipulating allocation tags.
+# Logical tags-related commands rely on local operations, hence don't exercise
+# any stub and so are not used in this test.
 #
-# The test consists in breaking just after a atag() call (which sets the
-# allocation tag -- see mte-8.c for details) and setting/getting tags in
-# different memory locations and ranges starting at the address of the array
-# 'a'.
+# The test consists in breaking just after a tag is set in a specific memory
+# chunk, and then using the GDB 'memory-tagging' subcommands to set/get tags in
+# different memory locations and ranges in the MTE-enabled memory chunk.
 #
 # This is launched via tests/guest-debug/run-test.py
 #
@@ -23,12 +23,26 @@ 
 
 
 def run_test():
-    gdb.execute("break 95", False, True)
+    if mode == "system":
+        # Break address: where to break before performing the tests
+        # Addresss is the last insn. before 'main' returns. See mte.c
+        ba = "*main+52"
+        # Tagged address: the start of the MTE-enabled memory chunk to be tested
+        # Address is in the x0 register
+        ta = "$x0"
+    else: # mode="user"
+        # Line 95 in mte-8.c
+        ba = "95"
+        # 'a' is an array defined in C code. See mte-8.c
+        ta = "a"
+
+    gdb.execute(f"break {ba}", False, True)
     gdb.execute("continue", False, True)
+
     try:
         # Test if we can check correctly that the allocation tag for
         # array 'a' matches the logical tag after atag() is called.
-        co = gdb.execute("memory-tag check a", False, True)
+        co = gdb.execute(f"memory-tag check {ta}", False, True)
         tags_match = re.findall(PATTERN_0, co, re.MULTILINE)
         if tags_match:
             report(True, f"{tags_match[0]}")
@@ -40,19 +54,19 @@  def run_test():
 
         # Set the allocation tag for the first granule (16 bytes) of
         # address starting at 'a' address to a known value, i.e. 0x04.
-        gdb.execute("memory-tag set-allocation-tag a 1 04", False, True)
+        gdb.execute(f"memory-tag set-allocation-tag {ta} 1 04", False, True)
 
         # Then set the allocation tag for the second granule to a known
         # value, i.e. 0x06. This tests that contiguous tag granules are
         # set correct and don't run over each other.
-        gdb.execute("memory-tag set-allocation-tag a+16 1 06", False, True)
+        gdb.execute(f"memory-tag set-allocation-tag {ta}+16 1 06", False, True)
 
         # Read the known values back and check if they remain the same.
 
-        co = gdb.execute("memory-tag print-allocation-tag a", False, True)
+        co = gdb.execute(f"memory-tag print-allocation-tag {ta}", False, True)
         first_tag = re.match(PATTERN_1, co)[1]
 
-        co = gdb.execute("memory-tag print-allocation-tag a+16", False, True)
+        co = gdb.execute(f"memory-tag print-allocation-tag {ta}+16", False, True)
         second_tag = re.match(PATTERN_1, co)[1]
 
         if first_tag == "0x4" and second_tag == "0x6":
@@ -61,15 +75,15 @@  def run_test():
             report(False, "Can't set/print allocation tags!")
 
         # Now test fill pattern by setting a whole page with a pattern.
-        gdb.execute("memory-tag set-allocation-tag a 4096 0a0b", False, True)
+        gdb.execute(f"memory-tag set-allocation-tag {ta} 4096 0a0b", False, True)
 
         # And read back the tags of the last two granules in page so
         # we also test if the pattern is set correctly up to the end of
         # the page.
-        co = gdb.execute("memory-tag print-allocation-tag a+4096-32", False, True)
+        co = gdb.execute(f"memory-tag print-allocation-tag {ta}+4096-32", False, True)
         tag = re.match(PATTERN_1, co)[1]
 
-        co = gdb.execute("memory-tag print-allocation-tag a+4096-16", False, True)
+        co = gdb.execute(f"memory-tag print-allocation-tag {ta}+4096-16", False, True)
         last_tag = re.match(PATTERN_1, co)[1]
 
         if tag == "0xa" and last_tag == "0xb":
diff --git a/tests/tcg/aarch64/system/boot.S b/tests/tcg/aarch64/system/boot.S
index 501685d0ec..a12393d00b 100644
--- a/tests/tcg/aarch64/system/boot.S
+++ b/tests/tcg/aarch64/system/boot.S
@@ -135,11 +135,22 @@  __start:
 	orr	x1, x1, x3
 	str	x1, [x2]			/* 2nd 2mb (.data & .bss)*/
 
+        /* Third block: .mte_page */
+	adrp	x1, .mte_page
+	add	x1, x1, :lo12:.mte_page
+	bic	x1, x1, #(1 << 21) - 1
+	and 	x4, x1, x5
+	add	x2, x0, x4, lsr #(21 - 3)
+	ldr	x3, =(3 << 53) | 0x401 | 1 << 2	/* attr(AF, NX, block, AttrIndx=Attr1) */
+	orr	x1, x1, x3
+	str	x1, [x2]
+
 	/* Setup/enable the MMU.  */
 
 	/*
 	 * TCR_EL1 - Translation Control Registers
 	 *
+	 * TBI0[37] = 0b1 => Top Byte ignored and used for tagged addresses
 	 * IPS[34:32] = 40-bit PA, 1TB
 	 * TG0[14:15] = b00 => 4kb granuale
 	 * ORGN0[11:10] = Outer: Normal, WB Read-Alloc No Write-Alloc Cacheable
@@ -152,16 +163,22 @@  __start:
 	 * with at least 1gb range to see RAM. So we start with a
 	 * level 1 lookup.
 	 */
-	ldr	x0, = (2 << 32) | 25 | (3 << 10) | (3 << 8)
+	ldr	x0, = (1 << 37) | (2 << 32) | 25 | (3 << 10) | (3 << 8)
 	msr	tcr_el1, x0
 
-	mov	x0, #0xee			/* Inner/outer cacheable WB */
+	/*
+	 * Attr0: Normal, Inner/outer cacheable WB
+	 * Attr1: Tagged Normal (MTE)
+	 */
+	mov	x0, #0xf0ee
 	msr	mair_el1, x0
 	isb
 
 	/*
 	 * SCTLR_EL1 - System Control Register
 	 *
+	 * ATA[43] = 1 = enable access to allocation tags at EL1
+	 * TCF[40] = 1 = Tag Check Faults cause a synchronous exception
 	 * WXN[19] = 0 = no effect, Write does not imply XN (execute never)
 	 * I[12] = Instruction cachability control
 	 * SA[3] = SP alignment check
@@ -169,7 +186,8 @@  __start:
 	 * M[0] = 1, enable stage 1 address translation for EL0/1
 	 */
 	mrs	x0, sctlr_el1
-	ldr	x1, =0x100d			/* bits I(12) SA(3) C(2) M(0) */
+	/* Bits set: ATA(43) TCF(40) I(12) SA(3) C(2) M(0) */
+	ldr	x1, =(0x100d | 1 << 43 | 1 << 40)
 	bic	x0, x0, #(1 << 1)		/* clear bit A(1) */
 	bic	x0, x0, #(1 << 19)		/* clear WXN */
 	orr	x0, x0, x1			/* set bits */
@@ -239,3 +257,5 @@  ttb_stage2:
 stack:
 	.space 65536, 0
 stack_end:
+
+	.section .mte_page
diff --git a/tests/tcg/aarch64/system/kernel.ld b/tests/tcg/aarch64/system/kernel.ld
index 7b3a76dcbf..7c00c1c378 100644
--- a/tests/tcg/aarch64/system/kernel.ld
+++ b/tests/tcg/aarch64/system/kernel.ld
@@ -18,6 +18,11 @@  SECTIONS
     .bss : {
         *(.bss)
     }
+    /* align MTE section to next (third) 2mb */
+    . = ALIGN(1 << 22);
+    .mte : {
+        *(.mte_page)
+    }
     /DISCARD/ : {
         *(.ARM.attributes)
     }
diff --git a/tests/tcg/aarch64/system/mte.c b/tests/tcg/aarch64/system/mte.c
new file mode 100644
index 0000000000..58a5ac31ff
--- /dev/null
+++ b/tests/tcg/aarch64/system/mte.c
@@ -0,0 +1,40 @@ 
+#include <inttypes.h>
+
+int main(void)
+{
+    uint8_t *addr;
+
+    /*
+     * Third 2MB chunk in the second 1GB block.
+     * See .mte_page section in kernel.ld.
+     */
+    addr = (void *)((1UL << 30) | (1UL << 22));
+
+    asm (
+        /*
+         * Set GCR for randon tag generation. 0xA5 is just a random value to set
+         * GCR != 0 so the tag generated by 'irg' is not zero.
+         */
+        "ldr x1, =0xA5;"
+        "msr gcr_el1, x1;"
+
+         /* Generate a logical tag and put it in 'addr' pointer. */
+         "irg %[addr], %[addr];"
+
+         /*
+          * Store the generated tag to memory region pointed to by 'addr', i.e.
+          * set the allocation tag for the memory region.
+          */
+         "stg %[addr], [%[addr]];"
+
+         /*
+          * Store a random value (0xdeadbeef) to *addr. This must not cause any
+          * Tag Check Fault since logical and allocation tags are set the same.
+          */
+	 "ldr x1, =0xdeadbeef;"
+	 "str x1, [x0];"
+         : [addr] "+r" (addr)
+         :
+         :
+    );
+}