diff mbox

[edk2] ArmVirtualizationPkg: Xen: shuffle init order to deal with incoherency

Message ID 1427386519-1882-1-git-send-email-ard.biesheuvel@linaro.org
State New
Headers show

Commit Message

Ard Biesheuvel March 26, 2015, 4:15 p.m. UTC
Hi all,

This supersedes the patches I sent before: simply deferring the use
of the Xen console solves most of the problems, although we obviously
need to tread carefully, hence this patch.

I am still seeing an issue where the debug build hangs on some seemingly
unrelated error condition unless i clean the data cache every time I
print something to the console, so I may need to followup with some
additional fixes.

------------------------->8------------------------
In order to prevent incoherency issues caused by the fact that, under
virtualization, the guest is incoherent with the hypervisor's view of
memory, this patch reshuffles the init sequence so that we only modify
memory that is covered by the static footprint of the binary image, or
that has explicitly been cleaned by virtual address beforehand.

NOTE: according to the ARM ARM, cache invalidate operations are demoted
to clean and invalidate when running under virtualization, which is the
reason why we need to be careful not to invalidate regions that we are
using with the caches off.

We rely on the fact that the arm64 boot protocol stipulates that the
entire Image should be clean to the PoC, and any modifications we
make to it should not suddenly disappear when the caches are enabled.
There is no such guarantee for any other memory regions, hence the need
for this approach.

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 .../PrePi/AArch64/ModuleEntryPoint.S               | 133 ++++++---------------
 .../PrePi/ArmVirtPrePiUniCoreRelocatable.inf       |   1 +
 ArmPlatformPkg/ArmVirtualizationPkg/PrePi/PrePi.c  |  66 +++++-----
 ArmPlatformPkg/ArmVirtualizationPkg/PrePi/PrePi.h  |   3 +-
 4 files changed, 72 insertions(+), 131 deletions(-)
diff mbox

Patch

diff --git a/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/AArch64/ModuleEntryPoint.S b/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/AArch64/ModuleEntryPoint.S
index 568d0086d662..9e6b483ecf3a 100644
--- a/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/AArch64/ModuleEntryPoint.S
+++ b/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/AArch64/ModuleEntryPoint.S
@@ -26,8 +26,6 @@  GCC_ASM_IMPORT(ArmPlatformPeiBootAction)
 GCC_ASM_IMPORT(ArmPlatformStackSet)
 GCC_ASM_EXPORT(_ModuleEntryPoint)
 
-StartupAddr:        .8byte ASM_PFX(CEntryPoint)
-
 ASM_PFX(_ModuleEntryPoint):
   //
   // We are built as a ET_DYN PIE executable, so we need to process all
@@ -63,118 +61,57 @@  ASM_PFX(_ModuleEntryPoint):
   b     .Lreloc_loop
 .Lreloc_done:
 
+  //
+  // Use a statically allocated stack as long as we are still running
+  // with the MMU off. This prevents surprises when there are stale
+  // dirty cachelines shadowing our main stack when we enable the cache.
+  //
+  ldr   x8, TempStack
+  mov   sp, x8
+
   // Do early platform specific actions
   bl    ASM_PFX(ArmPlatformPeiBootAction)
 
-  // Get ID of this CPU in Multicore system
-  bl    ASM_PFX(ArmReadMpidr)
-  // Keep a copy of the MpId register value
-  mov   x10, x0
-
-// Check if we can install the stack at the top of the System Memory or if we need
-// to install the stacks at the bottom of the Firmware Device (case the FD is located
-// at the top of the DRAM)
-_SetupStackPosition:
-  // Compute Top of System Memory
   ldr   x1, PcdGet64 (PcdSystemMemoryBase)
   ldr   x2, PcdGet64 (PcdSystemMemorySize)
-  sub   x2, x2, #1
+  ldr   w3, PcdGet32 (PcdSystemMemoryUefiRegionSize)
   add   x1, x1, x2      // x1 = SystemMemoryTop = PcdSystemMemoryBase + PcdSystemMemorySize
+  sub   x25, x1, x3     // x25 = UefiMemoryBase
 
-  // Calculate Top of the Firmware Device
-  ldr   x2, PcdGet64 (PcdFdBaseAddress)
-  ldr   w3, PcdGet32 (PcdFdSize)
-  sub   x3, x3, #1
-  add   x3, x3, x2      // x3 = FdTop = PcdFdBaseAddress + PcdFdSize
+  ldr   w3, PcdGet32 (PcdCPUCorePrimaryStackSize)
+  sub   x26, x1, x3     // x26 = StacksBase
 
-  // UEFI Memory Size (stacks are allocated in this region)
-  LoadConstantToReg (FixedPcdGet32(PcdSystemMemoryUefiRegionSize), x4)
+  ldr   w3, PcdGet32 (PcdPeiGlobalVariableSize)
+  sub   x27, x1, x3     // x27 = GlobalVariableBase
+  and   x27, x27, ~0xf
 
-  //
-  // Reserve the memory for the UEFI region (contain stacks on its top)
-  //
+  // Get ID of this CPU in Multicore system
+  bl    ASM_PFX(ArmReadMpidr)
 
-  // Calculate how much space there is between the top of the Firmware and the Top of the System Memory
-  subs  x0, x1, x3   // x0 = SystemMemoryTop - FdTop
-  b.mi  _SetupStack  // Jump if negative (FdTop > SystemMemoryTop). Case when the PrePi is in XIP memory outside of the DRAM
-  cmp   x0, x4
-  b.ge  _SetupStack
-
-  // Case the top of stacks is the FdBaseAddress
-  mov   x1, x2
-
-_SetupStack:
-  // x1 contains the top of the stack (and the UEFI Memory)
-
-  // Because the 'push' instruction is equivalent to 'stmdb' (decrement before), we need to increment
-  // one to the top of the stack. We check if incrementing one does not overflow (case of DRAM at the
-  // top of the memory space)
-  adds  x11, x1, #1
-  b.cs  _SetupOverflowStack
-
-_SetupAlignedStack:
-  mov   x1, x11
-  b     _GetBaseUefiMemory
-
-_SetupOverflowStack:
-  // Case memory at the top of the address space. Ensure the top of the stack is EFI_PAGE_SIZE
-  // aligned (4KB)
-  LoadConstantToReg (EFI_PAGE_MASK, x11)
-  and   x11, x11, x1
-  sub   x1, x1, x11
-
-_GetBaseUefiMemory:
-  // Calculate the Base of the UEFI Memory
-  sub   x11, x1, x4
-
-_GetStackBase:
-  // r1 = The top of the Mpcore Stacks
-  // Stack for the primary core = PrimaryCoreStack
-  LoadConstantToReg (FixedPcdGet32(PcdCPUCorePrimaryStackSize), x2)
-  sub   x12, x1, x2
-
-  // Stack for the secondary core = Number of Cores - 1
-  LoadConstantToReg (FixedPcdGet32(PcdCoreCount), x0)
-  sub   x0, x0, #1
-  LoadConstantToReg (FixedPcdGet32(PcdCPUCoreSecondaryStackSize), x1)
-  mul   x1, x1, x0
-  sub   x12, x12, x1
-
-  // x12 = The base of the MpCore Stacks (primary stack & secondary stacks)
-  mov   x0, x12
-  mov   x1, x10
-  //ArmPlatformStackSet(StackBase, MpId, PrimaryStackSize, SecondaryStackSize)
-  LoadConstantToReg (FixedPcdGet32(PcdCPUCorePrimaryStackSize), x2)
-  LoadConstantToReg (FixedPcdGet32(PcdCPUCoreSecondaryStackSize), x3)
-  bl    ASM_PFX(ArmPlatformStackSet)
-
-  // Is it the Primary Core ?
-  mov   x0, x10
-  bl    ASM_PFX(ArmPlatformIsPrimaryCore)
-  cmp   x0, #1
-  bne   _PrepareArguments
-
-_ReserveGlobalVariable:
-  LoadConstantToReg (FixedPcdGet32(PcdPeiGlobalVariableSize), x0)
-  // InitializePrimaryStack($GlobalVariableSize, $Tmp1, $Tmp2)
-  InitializePrimaryStack(x0, x1, x2)
-
-_PrepareArguments:
-  mov   x0, x10
-  mov   x1, x11
-  mov   x2, x12
-  mov   x3, sp
-
-  // Move sec startup address into a data register
-  // Ensure we're jumping to FV version of the code (not boot remapped alias)
-  ldr   x4, StartupAddr
+  mov   x1, x25
+  mov   x2, x26
+  mov   x3, x27
 
   // Jump to PrePiCore C code
   //    x0 = MpId
   //    x1 = UefiMemoryBase
   //    x2 = StacksBase
   //    x3 = GlobalVariableBase
-  blr   x4
+  bl    CEntryPoint
+
+  mov   x0, x25
+  mov   x1, x26
+  mov   x2, x27
+  mov   sp, x27
+  bl    PrePiMain
 
 _NeverReturn:
   b _NeverReturn
+
+  .align  3
+TempStack:
+  .quad   0f
+  .bss
+  .align  4
+  .skip   4096
+0:
diff --git a/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/ArmVirtPrePiUniCoreRelocatable.inf b/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/ArmVirtPrePiUniCoreRelocatable.inf
index 86cf870b79ac..16023382a679 100755
--- a/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/ArmVirtPrePiUniCoreRelocatable.inf
+++ b/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/ArmVirtPrePiUniCoreRelocatable.inf
@@ -55,6 +55,7 @@ 
   PrePiHobListPointerLib
   PlatformPeiLib
   MemoryInitPeiLib
+  CacheMaintenanceLib
 
 [Ppis]
   gArmMpCoreInfoPpiGuid
diff --git a/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/PrePi.c b/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/PrePi.c
index 0772805890f2..0dbaedc19fce 100755
--- a/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/PrePi.c
+++ b/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/PrePi.c
@@ -20,6 +20,7 @@ 
 #include <Library/PrePiHobListPointerLib.h>
 #include <Library/TimerLib.h>
 #include <Library/PerformanceLib.h>
+#include <Library/CacheMaintenanceLib.h>
 
 #include <Ppi/GuidedSectionExtraction.h>
 #include <Ppi/ArmMpCoreInfo.h>
@@ -89,18 +90,23 @@  VOID
 PrePiMain (
   IN  UINTN                     UefiMemoryBase,
   IN  UINTN                     StacksBase,
-  IN  UINTN                     GlobalVariableBase,
-  IN  UINT64                    StartTimeStamp
+  IN  UINTN                     GlobalVariableBase
   )
 {
-  EFI_HOB_HANDOFF_INFO_TABLE*   HobList;
   EFI_STATUS                    Status;
+  UINT64                        StartTimeStamp;
   CHAR8                         Buffer[100];
   UINTN                         CharCount;
   UINTN                         StacksSize;
 
-  // Initialize the architecture specific bits
-  ArchInitialize ();
+  if (PerformanceMeasurementEnabled ()) {
+    // Initialize the Timer Library to setup the Timer HW controller
+    TimerConstructor ();
+    // We cannot call yet the PerformanceLib because the HOB List has not been initialized
+    StartTimeStamp = GetPerformanceCounter ();
+  } else {
+    StartTimeStamp = 0;
+  }
 
   // Initialize the Serial Port
   SerialPortInitialize ();
@@ -108,19 +114,6 @@  PrePiMain (
     (CHAR16*)PcdGetPtr(PcdFirmwareVersionString), __TIME__, __DATE__);
   SerialPortWrite ((UINT8 *) Buffer, CharCount);
 
-  // Declare the PI/UEFI memory region
-  HobList = HobConstructor (
-    (VOID*)UefiMemoryBase,
-    FixedPcdGet32 (PcdSystemMemoryUefiRegionSize),
-    (VOID*)UefiMemoryBase,
-    (VOID*)StacksBase  // The top of the UEFI Memory is reserved for the stacks
-    );
-  PrePeiSetHobList (HobList);
-
-  // Initialize MMU and Memory HOBs (Resource Descriptor HOBs)
-  Status = MemoryPeim (UefiMemoryBase, FixedPcdGet32 (PcdSystemMemoryUefiRegionSize));
-  ASSERT_EFI_ERROR (Status);
-
   // Create the Stacks HOB (reserve the memory for all stacks)
   StacksSize = PcdGet32 (PcdCPUCorePrimaryStackSize);
   BuildStackHob (StacksBase, StacksSize);
@@ -170,20 +163,15 @@  CEntryPoint (
   IN  UINTN                     GlobalVariableBase
   )
 {
-  UINT64   StartTimeStamp;
+  EFI_HOB_HANDOFF_INFO_TABLE*   HobList;
+  EFI_STATUS                    Status;
+
+  // Initialize the architecture specific bits
+  ArchInitialize ();
 
   // Initialize the platform specific controllers
   ArmPlatformInitialize (MpId);
 
-  if (PerformanceMeasurementEnabled ()) {
-    // Initialize the Timer Library to setup the Timer HW controller
-    TimerConstructor ();
-    // We cannot call yet the PerformanceLib because the HOB List has not been initialized
-    StartTimeStamp = GetPerformanceCounter ();
-  } else {
-    StartTimeStamp = 0;
-  }
-
   // Data Cache enabled on Primary core when MMU is enabled.
   ArmDisableDataCache ();
   // Invalidate Data cache
@@ -193,11 +181,27 @@  CEntryPoint (
   // Enable Instruction Caches on all cores.
   ArmEnableInstructionCache ();
 
+  //
+  // Invalidate the PI/UEFI memory region explicitly before populating it.
+  // This operation will be demoted to a clean+invalidate if we are running
+  // under virtualization, and cleaning the cache while it is off can clobber
+  // our data.
+  //
+  InvalidateDataCacheRange((VOID*)UefiMemoryBase, FixedPcdGet32 (PcdSystemMemoryUefiRegionSize));
+
   // Define the Global Variable region
   mGlobalVariableBase = GlobalVariableBase;
 
-  PrePiMain (UefiMemoryBase, StacksBase, GlobalVariableBase, StartTimeStamp);
+  // Declare the PI/UEFI memory region
+  HobList = HobConstructor (
+    (VOID*)UefiMemoryBase,
+    FixedPcdGet32 (PcdSystemMemoryUefiRegionSize),
+    (VOID*)UefiMemoryBase,
+    (VOID*)StacksBase  // The top of the UEFI Memory is reserved for the stacks
+    );
+  PrePeiSetHobList (HobList);
 
-  // DXE Core should always load and never return
-  ASSERT (FALSE);
+  // Initialize MMU and Memory HOBs (Resource Descriptor HOBs)
+  Status = MemoryPeim (UefiMemoryBase, FixedPcdGet32 (PcdSystemMemoryUefiRegionSize));
+  ASSERT_EFI_ERROR (Status);
 }
diff --git a/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/PrePi.h b/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/PrePi.h
index 517429fab9a4..88476224f8a8 100644
--- a/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/PrePi.h
+++ b/ArmPlatformPkg/ArmVirtualizationPkg/PrePi/PrePi.h
@@ -39,8 +39,7 @@  VOID
 PrePiMain (
   IN  UINTN                     UefiMemoryBase,
   IN  UINTN                     StacksBase,
-  IN  UINTN                     GlobalVariableBase,
-  IN  UINT64                    StartTimeStamp
+  IN  UINTN                     GlobalVariableBase
   );
 
 EFI_STATUS