[edk2,v3] ArmPkg/AArch64Mmu: disable MMU during page table manipulations

Message ID 1460444266-10166-1-git-send-email-ard.biesheuvel@linaro.org
State Superseded
Headers show

Commit Message

Ard Biesheuvel April 12, 2016, 6:57 a.m.
On ARM, manipulating live page tables is cumbersome since the architecture
mandates the use of break-before-make, i.e., replacing a block entry with
a table entry requires an intermediate step via an invalid entry, or TLB
conflicts may occur.

Since it is not generally feasible to decide in the page table manipulation
routines whether such an invalid entry will result in those routines
themselves to become unavailable, use a function that is callable with
the MMU off (i.e., a leaf function that does not access the stack) to
perform the change of a block entry into a table entry.

Note that the opposite should never occur, i.e., table entries are never
coalesced into block entries.

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

---
 ArmPkg/Include/Library/ArmLib.h                       |  6 +++
 ArmPkg/Library/ArmLib/AArch64/AArch64Lib.inf          |  5 +-
 ArmPkg/Library/ArmLib/AArch64/AArch64LibConstructor.c | 36 +++++++++++++
 ArmPkg/Library/ArmLib/AArch64/AArch64Mmu.c            | 17 +++++-
 ArmPkg/Library/ArmLib/AArch64/AArch64Support.S        | 57 ++++++++++++++++++++
 5 files changed, 119 insertions(+), 2 deletions(-)

-- 
2.5.0

_______________________________________________
edk2-devel mailing list
edk2-devel@lists.01.org
https://lists.01.org/mailman/listinfo/edk2-devel

Comments

Mark Rutland April 14, 2016, 11:17 a.m. | #1
On Tue, Apr 12, 2016 at 08:57:46AM +0200, Ard Biesheuvel wrote:
> On ARM, manipulating live page tables is cumbersome since the architecture

> mandates the use of break-before-make, i.e., replacing a block entry with

> a table entry requires an intermediate step via an invalid entry, or TLB

> conflicts may occur.

> 

> Since it is not generally feasible to decide in the page table manipulation

> routines whether such an invalid entry will result in those routines

> themselves to become unavailable, use a function that is callable with

> the MMU off (i.e., a leaf function that does not access the stack) to

> perform the change of a block entry into a table entry.

> 

> Note that the opposite should never occur, i.e., table entries are never

> coalesced into block entries.

> 

> Contributed-under: TianoCore Contribution Agreement 1.0

> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

> ---

>  ArmPkg/Include/Library/ArmLib.h                       |  6 +++

>  ArmPkg/Library/ArmLib/AArch64/AArch64Lib.inf          |  5 +-

>  ArmPkg/Library/ArmLib/AArch64/AArch64LibConstructor.c | 36 +++++++++++++

>  ArmPkg/Library/ArmLib/AArch64/AArch64Mmu.c            | 17 +++++-

>  ArmPkg/Library/ArmLib/AArch64/AArch64Support.S        | 57 ++++++++++++++++++++

>  5 files changed, 119 insertions(+), 2 deletions(-)



> +RETURN_STATUS

> +EFIAPI

> +AArch64LibConstructor (

> +  VOID

> +  )

> +{

> +  extern UINT32 ArmReplaceLiveTranslationEntrySize;

> +

> +  //

> +  // The ArmReplaceLiveTranslationEntry () helper function may be invoked

> +  // with the MMU off so we have to ensure that it gets cleaned to the PoC

> +  //

> +  WriteBackDataCacheRange (ArmReplaceLiveTranslationEntry,

> +    ArmReplaceLiveTranslationEntrySize);

> +

> +  return RETURN_SUCCESS;

> +}


I take it that ArmReplaceLiveTranslationEntry isn't part of the static
image (e.g. it gets relocated or copied after the MMU is enabled for the
first time)?

Otherwise I'm not following why the code wouldn't be clean to the PoC.

> +  // write an entry and invalidate it in the caches

> +  .macro __set_entry, reg

> +  str   \reg, [x0]

> +  dmb   sy

> +  dc    ivac, x0

> +  dsb   sy

> +  .endm

> +

> +  .macro __replace_entry, el

> +  mrs   x8, sctlr_el\el

> +  bic   x9, x8, #CTRL_M_BIT  // Clear MMU enable bit

> +  msr   sctlr_el\el, x9

> +  isb

> +

> +  __set_entry xzr    // write invalid entry


The MMU is off, and thus there are no TLB walks, so I don't see the
point in storing+invalidating the invalid entry here.

It might be better to hoist the DC CIVAC from
ArmReplaceLiveTranslationEntry to here, to keep the maintenance
together, and avoid the redundant invalidate.

With the __set_entry macro folded in here, it would be a little clearer,
too.

Other than that, the general strategey seems sound to me.

Thanks,
Mark.

> +  .if   \el == 1

> +  tlbi  vmalle1

> +  .else

> +  tlbi  alle\el

> +  .endif

> +  dsb   sy

> +  __set_entry x1     // write updated entry

> +  msr   sctlr_el\el, x8

> +  isb

> +  .endm

> +

> +//VOID

> +//ArmReplaceLiveTranslationEntry (

> +//  IN  UINT64  *Entry,

> +//  IN  UINT64  Value

> +//  )

> +ASM_PFX(ArmReplaceLiveTranslationEntry):

> +

> +  // disable interrupts

> +  mrs   x2, daif

> +  msr   daifset, #0xf

> +  isb

> +

> +  // clean and invalidate first so that we don't clobber adjacent entries

> +  // that are dirty in the caches

> +  dc    civac, x0

> +  dsb   ish

> +

> +  EL1_OR_EL2_OR_EL3(x3)

> +1:__replace_entry 1

> +  b     4f

> +2:__replace_entry 2

> +  b     4f

> +3:__replace_entry 3

> +4:msr   daif, x2

> +  ret

> +

> +ASM_PFX(ArmReplaceLiveTranslationEntrySize):

> +   .long    . - ArmReplaceLiveTranslationEntry

> +

>  ASM_FUNCTION_REMOVE_IF_UNREFERENCED

> -- 

> 2.5.0

> 

_______________________________________________
edk2-devel mailing list
edk2-devel@lists.01.org
https://lists.01.org/mailman/listinfo/edk2-devel
Ard Biesheuvel April 14, 2016, 11:36 a.m. | #2
On 14 April 2016 at 13:17, Mark Rutland <mark.rutland@arm.com> wrote:
> On Tue, Apr 12, 2016 at 08:57:46AM +0200, Ard Biesheuvel wrote:

>> On ARM, manipulating live page tables is cumbersome since the architecture

>> mandates the use of break-before-make, i.e., replacing a block entry with

>> a table entry requires an intermediate step via an invalid entry, or TLB

>> conflicts may occur.

>>

>> Since it is not generally feasible to decide in the page table manipulation

>> routines whether such an invalid entry will result in those routines

>> themselves to become unavailable, use a function that is callable with

>> the MMU off (i.e., a leaf function that does not access the stack) to

>> perform the change of a block entry into a table entry.

>>

>> Note that the opposite should never occur, i.e., table entries are never

>> coalesced into block entries.

>>

>> Contributed-under: TianoCore Contribution Agreement 1.0

>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

>> ---

>>  ArmPkg/Include/Library/ArmLib.h                       |  6 +++

>>  ArmPkg/Library/ArmLib/AArch64/AArch64Lib.inf          |  5 +-

>>  ArmPkg/Library/ArmLib/AArch64/AArch64LibConstructor.c | 36 +++++++++++++

>>  ArmPkg/Library/ArmLib/AArch64/AArch64Mmu.c            | 17 +++++-

>>  ArmPkg/Library/ArmLib/AArch64/AArch64Support.S        | 57 ++++++++++++++++++++

>>  5 files changed, 119 insertions(+), 2 deletions(-)

>

>

>> +RETURN_STATUS

>> +EFIAPI

>> +AArch64LibConstructor (

>> +  VOID

>> +  )

>> +{

>> +  extern UINT32 ArmReplaceLiveTranslationEntrySize;

>> +

>> +  //

>> +  // The ArmReplaceLiveTranslationEntry () helper function may be invoked

>> +  // with the MMU off so we have to ensure that it gets cleaned to the PoC

>> +  //

>> +  WriteBackDataCacheRange (ArmReplaceLiveTranslationEntry,

>> +    ArmReplaceLiveTranslationEntrySize);

>> +

>> +  return RETURN_SUCCESS;

>> +}

>

> I take it that ArmReplaceLiveTranslationEntry isn't part of the static

> image (e.g. it gets relocated or copied after the MMU is enabled for the

> first time)?

>

> Otherwise I'm not following why the code wouldn't be clean to the PoC.

>


Yes, the various DXE modules are loaded from a compressed firmware
volume by a PE/COFF loader which may clean to the PoU only.

>> +  // write an entry and invalidate it in the caches

>> +  .macro __set_entry, reg

>> +  str   \reg, [x0]

>> +  dmb   sy

>> +  dc    ivac, x0

>> +  dsb   sy

>> +  .endm

>> +

>> +  .macro __replace_entry, el

>> +  mrs   x8, sctlr_el\el

>> +  bic   x9, x8, #CTRL_M_BIT  // Clear MMU enable bit

>> +  msr   sctlr_el\el, x9

>> +  isb

>> +

>> +  __set_entry xzr    // write invalid entry

>

> The MMU is off, and thus there are no TLB walks, so I don't see the

> point in storing+invalidating the invalid entry here.

>


Do you mean storing is sufficient? Or that we don't need bbm in the
first place when disabling the MMU? Because it seems to me that one
implies the other: if we can flush the TLBs without the risk of them
refetching the stale entries, we can just write the new value here and
be done with it.

> It might be better to hoist the DC CIVAC from

> ArmReplaceLiveTranslationEntry to here, to keep the maintenance

> together, and avoid the redundant invalidate.

>

> With the __set_entry macro folded in here, it would be a little clearer,

> too.

>

> Other than that, the general strategey seems sound to me.

>


Thanks.

>> +  .if   \el == 1

>> +  tlbi  vmalle1

>> +  .else

>> +  tlbi  alle\el

>> +  .endif

>> +  dsb   sy

>> +  __set_entry x1     // write updated entry

>> +  msr   sctlr_el\el, x8

>> +  isb

>> +  .endm

>> +

>> +//VOID

>> +//ArmReplaceLiveTranslationEntry (

>> +//  IN  UINT64  *Entry,

>> +//  IN  UINT64  Value

>> +//  )

>> +ASM_PFX(ArmReplaceLiveTranslationEntry):

>> +

>> +  // disable interrupts

>> +  mrs   x2, daif

>> +  msr   daifset, #0xf

>> +  isb

>> +

>> +  // clean and invalidate first so that we don't clobber adjacent entries

>> +  // that are dirty in the caches

>> +  dc    civac, x0

>> +  dsb   ish

>> +

>> +  EL1_OR_EL2_OR_EL3(x3)

>> +1:__replace_entry 1

>> +  b     4f

>> +2:__replace_entry 2

>> +  b     4f

>> +3:__replace_entry 3

>> +4:msr   daif, x2

>> +  ret

>> +

>> +ASM_PFX(ArmReplaceLiveTranslationEntrySize):

>> +   .long    . - ArmReplaceLiveTranslationEntry

>> +

>>  ASM_FUNCTION_REMOVE_IF_UNREFERENCED

>> --

>> 2.5.0

>>

_______________________________________________
edk2-devel mailing list
edk2-devel@lists.01.org
https://lists.01.org/mailman/listinfo/edk2-devel
Mark Rutland April 14, 2016, 12:52 p.m. | #3
On Thu, Apr 14, 2016 at 01:36:00PM +0200, Ard Biesheuvel wrote:
> On 14 April 2016 at 13:17, Mark Rutland <mark.rutland@arm.com> wrote:

> > On Tue, Apr 12, 2016 at 08:57:46AM +0200, Ard Biesheuvel wrote:

> > I take it that ArmReplaceLiveTranslationEntry isn't part of the static

> > image (e.g. it gets relocated or copied after the MMU is enabled for the

> > first time)?

> 

> Yes, the various DXE modules are loaded from a compressed firmware

> volume by a PE/COFF loader which may clean to the PoU only.


Ok.

> >> +  // write an entry and invalidate it in the caches

> >> +  .macro __set_entry, reg

> >> +  str   \reg, [x0]

> >> +  dmb   sy

> >> +  dc    ivac, x0

> >> +  dsb   sy

> >> +  .endm

> >> +

> >> +  .macro __replace_entry, el

> >> +  mrs   x8, sctlr_el\el

> >> +  bic   x9, x8, #CTRL_M_BIT  // Clear MMU enable bit

> >> +  msr   sctlr_el\el, x9

> >> +  isb

> >> +

> >> +  __set_entry xzr    // write invalid entry

> >

> > The MMU is off, and thus there are no TLB walks, so I don't see the

> > point in storing+invalidating the invalid entry here.

> 

> Do you mean storing is sufficient? Or that we don't need bbm in the

> first place when disabling the MMU?


With the MMU off we don't need to follow a strict BBM sequence.

> Because it seems to me that one implies the other: if we can flush the

> TLBs without the risk of them refetching the stale entries, we can

> just write the new value here and be done with it.


Yes, provided we invalidate the TLBs while the MMU is off, and have the
appropriate cache maintenance.

Thanks,
Mark.
_______________________________________________
edk2-devel mailing list
edk2-devel@lists.01.org
https://lists.01.org/mailman/listinfo/edk2-devel

Patch

diff --git a/ArmPkg/Include/Library/ArmLib.h b/ArmPkg/Include/Library/ArmLib.h
index 15f610d82e1d..1689f0072db6 100644
--- a/ArmPkg/Include/Library/ArmLib.h
+++ b/ArmPkg/Include/Library/ArmLib.h
@@ -613,4 +613,10 @@  ArmClearMemoryRegionReadOnly (
   IN  UINT64                    Length
   );
 
+VOID
+ArmReplaceLiveTranslationEntry (
+  IN  UINT64  *Entry,
+  IN  UINT64  Value
+  );
+
 #endif // __ARM_LIB__
diff --git a/ArmPkg/Library/ArmLib/AArch64/AArch64Lib.inf b/ArmPkg/Library/ArmLib/AArch64/AArch64Lib.inf
index dd585dea91fb..58684e8492f2 100644
--- a/ArmPkg/Library/ArmLib/AArch64/AArch64Lib.inf
+++ b/ArmPkg/Library/ArmLib/AArch64/AArch64Lib.inf
@@ -17,9 +17,10 @@  [Defines]
   INF_VERSION                    = 0x00010005
   BASE_NAME                      = AArch64Lib
   FILE_GUID                      = ef20ddf5-b334-47b3-94cf-52ff44c29138
-  MODULE_TYPE                    = DXE_DRIVER
+  MODULE_TYPE                    = BASE
   VERSION_STRING                 = 1.0
   LIBRARY_CLASS                  = ArmLib
+  CONSTRUCTOR                    = AArch64LibConstructor
 
 [Sources.AARCH64]
   AArch64Lib.c
@@ -31,6 +32,7 @@  [Sources.AARCH64]
 
   ../Common/AArch64/ArmLibSupport.S
   ../Common/ArmLib.c
+  AArch64LibConstructor.c
 
 [Packages]
   ArmPkg/ArmPkg.dec
@@ -38,6 +40,7 @@  [Packages]
 
 [LibraryClasses]
   MemoryAllocationLib
+  CacheMaintenanceLib
 
 [Protocols]
   gEfiCpuArchProtocolGuid
diff --git a/ArmPkg/Library/ArmLib/AArch64/AArch64LibConstructor.c b/ArmPkg/Library/ArmLib/AArch64/AArch64LibConstructor.c
new file mode 100644
index 000000000000..d2d0d3c15ee3
--- /dev/null
+++ b/ArmPkg/Library/ArmLib/AArch64/AArch64LibConstructor.c
@@ -0,0 +1,36 @@ 
+#/* @file
+#
+#  Copyright (c) 2016, Linaro Limited. All rights reserved.
+#
+#  This program and the accompanying materials
+#  are licensed and made available under the terms and conditions of the BSD License
+#  which accompanies this distribution.  The full text of the license may be found at
+#  http://opensource.org/licenses/bsd-license.php
+#
+#  THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+#
+#*/
+
+#include <Base.h>
+
+#include <Library/ArmLib.h>
+#include <Library/CacheMaintenanceLib.h>
+
+RETURN_STATUS
+EFIAPI
+AArch64LibConstructor (
+  VOID
+  )
+{
+  extern UINT32 ArmReplaceLiveTranslationEntrySize;
+
+  //
+  // The ArmReplaceLiveTranslationEntry () helper function may be invoked
+  // with the MMU off so we have to ensure that it gets cleaned to the PoC
+  //
+  WriteBackDataCacheRange (ArmReplaceLiveTranslationEntry,
+    ArmReplaceLiveTranslationEntrySize);
+
+  return RETURN_SUCCESS;
+}
diff --git a/ArmPkg/Library/ArmLib/AArch64/AArch64Mmu.c b/ArmPkg/Library/ArmLib/AArch64/AArch64Mmu.c
index b7d23c6f3286..2cc6fc45aecf 100644
--- a/ArmPkg/Library/ArmLib/AArch64/AArch64Mmu.c
+++ b/ArmPkg/Library/ArmLib/AArch64/AArch64Mmu.c
@@ -169,6 +169,20 @@  GetRootTranslationTableInfo (
 
 STATIC
 VOID
+ReplaceLiveEntry (
+  IN  UINT64  *Entry,
+  IN  UINT64  Value
+  )
+{
+  if (!ArmMmuEnabled ()) {
+    *Entry = Value;
+  } else {
+    ArmReplaceLiveTranslationEntry (Entry, Value);
+  }
+}
+
+STATIC
+VOID
 LookupAddresstoRootTable (
   IN  UINT64  MaxAddress,
   OUT UINTN  *T0SZ,
@@ -330,7 +344,8 @@  GetBlockEntryListFromAddress (
         }
 
         // Fill the BlockEntry with the new TranslationTable
-        *BlockEntry = ((UINTN)TranslationTable & TT_ADDRESS_MASK_DESCRIPTION_TABLE) | TableAttributes | TT_TYPE_TABLE_ENTRY;
+        ReplaceLiveEntry (BlockEntry,
+          ((UINTN)TranslationTable & TT_ADDRESS_MASK_DESCRIPTION_TABLE) | TableAttributes | TT_TYPE_TABLE_ENTRY);
       }
     } else {
       if (IndexLevel != PageLevel) {
diff --git a/ArmPkg/Library/ArmLib/AArch64/AArch64Support.S b/ArmPkg/Library/ArmLib/AArch64/AArch64Support.S
index 1a3023b79487..9cb51d4fa283 100644
--- a/ArmPkg/Library/ArmLib/AArch64/AArch64Support.S
+++ b/ArmPkg/Library/ArmLib/AArch64/AArch64Support.S
@@ -56,6 +56,8 @@  GCC_ASM_EXPORT (ArmReadIdPfr1)
 GCC_ASM_EXPORT (ArmWriteHcr)
 GCC_ASM_EXPORT (ArmReadHcr)
 GCC_ASM_EXPORT (ArmReadCurrentEL)
+GCC_ASM_EXPORT (ArmReplaceLiveTranslationEntry)
+GCC_ASM_EXPORT (ArmReplaceLiveTranslationEntrySize)
 
 .set CTRL_M_BIT,      (1 << 0)
 .set CTRL_A_BIT,      (1 << 1)
@@ -481,4 +483,59 @@  ASM_PFX(ArmReadCurrentEL):
   mrs   x0, CurrentEL
   ret
 
+  // write an entry and invalidate it in the caches
+  .macro __set_entry, reg
+  str   \reg, [x0]
+  dmb   sy
+  dc    ivac, x0
+  dsb   sy
+  .endm
+
+  .macro __replace_entry, el
+  mrs   x8, sctlr_el\el
+  bic   x9, x8, #CTRL_M_BIT  // Clear MMU enable bit
+  msr   sctlr_el\el, x9
+  isb
+
+  __set_entry xzr    // write invalid entry
+  .if   \el == 1
+  tlbi  vmalle1
+  .else
+  tlbi  alle\el
+  .endif
+  dsb   sy
+  __set_entry x1     // write updated entry
+  msr   sctlr_el\el, x8
+  isb
+  .endm
+
+//VOID
+//ArmReplaceLiveTranslationEntry (
+//  IN  UINT64  *Entry,
+//  IN  UINT64  Value
+//  )
+ASM_PFX(ArmReplaceLiveTranslationEntry):
+
+  // disable interrupts
+  mrs   x2, daif
+  msr   daifset, #0xf
+  isb
+
+  // clean and invalidate first so that we don't clobber adjacent entries
+  // that are dirty in the caches
+  dc    civac, x0
+  dsb   ish
+
+  EL1_OR_EL2_OR_EL3(x3)
+1:__replace_entry 1
+  b     4f
+2:__replace_entry 2
+  b     4f
+3:__replace_entry 3
+4:msr   daif, x2
+  ret
+
+ASM_PFX(ArmReplaceLiveTranslationEntrySize):
+   .long    . - ArmReplaceLiveTranslationEntry
+
 ASM_FUNCTION_REMOVE_IF_UNREFERENCED