diff mbox series

[v3,1/2] semihosting/arm-compat: replace heuristic for softmmu SYS_HEAPINFO

Message ID 20210615165800.23265-2-alex.bennee@linaro.org
State New
Headers show
Series semihosting/next (SYS_HEAPINFO) | expand

Commit Message

Alex Bennée June 15, 2021, 4:57 p.m. UTC
The previous numbers were a guess at best and rather arbitrary without
taking into account anything that might be loaded. Instead of using
guesses based on the state of registers implement a new function that
scans MemoryRegions for the RAM of the current address space and then
looks for the lowest address above any ROM blobs (which include
-kernel loaded code).

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

Cc: Andrew Strauss <astrauss11@gmail.com>
Message-Id: <20210601090715.22330-1-alex.bennee@linaro.org>

---
v2
  - report some known information (limits)
  - reword the commit message
v3
  - rework to use the ROM blob scanning suggested by Peter
  - drop arch specific wrappers
  - dropped rb/tb tags as it's a rework
---
 include/hw/loader.h           |  10 +++
 hw/core/loader.c              |  19 +++++
 semihosting/arm-compat-semi.c | 131 ++++++++++++++++++----------------
 3 files changed, 99 insertions(+), 61 deletions(-)

-- 
2.20.1

Comments

Peter Maydell June 17, 2021, 7:22 p.m. UTC | #1
On Tue, 15 Jun 2021 at 18:01, Alex Bennée <alex.bennee@linaro.org> wrote:
>

> The previous numbers were a guess at best and rather arbitrary without

> taking into account anything that might be loaded. Instead of using

> guesses based on the state of registers implement a new function that

> scans MemoryRegions for the RAM of the current address space and then

> looks for the lowest address above any ROM blobs (which include

> -kernel loaded code).

>

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> Cc: Andrew Strauss <astrauss11@gmail.com>

> Message-Id: <20210601090715.22330-1-alex.bennee@linaro.org>

>

> ---

> v2

>   - report some known information (limits)

>   - reword the commit message

> v3

>   - rework to use the ROM blob scanning suggested by Peter

>   - drop arch specific wrappers

>   - dropped rb/tb tags as it's a rework

> ---

>  include/hw/loader.h           |  10 +++

>  hw/core/loader.c              |  19 +++++

>  semihosting/arm-compat-semi.c | 131 ++++++++++++++++++----------------

>  3 files changed, 99 insertions(+), 61 deletions(-)

>

> diff --git a/include/hw/loader.h b/include/hw/loader.h

> index cbfc184873..037828e94d 100644

> --- a/include/hw/loader.h

> +++ b/include/hw/loader.h

> @@ -349,4 +349,14 @@ int rom_add_option(const char *file, int32_t bootindex);

>   * overflow on real hardware too. */

>  #define UBOOT_MAX_GUNZIP_BYTES (64 << 20)

>

> +/**

> + * rom_find_highest_addr: return highest address of ROM in region

> + *

> + * This function is used to find the highest ROM address (or loaded

> + * blob) so we can advise where true heap memory may be.

> + *

> + * Returns: highest found address in region

> + */

> +hwaddr rom_find_highest_addr(hwaddr base, size_t size);

> +

>  #endif

> diff --git a/hw/core/loader.c b/hw/core/loader.c

> index 5b34869a54..05003556ee 100644

> --- a/hw/core/loader.c

> +++ b/hw/core/loader.c

> @@ -1310,6 +1310,25 @@ static Rom *find_rom(hwaddr addr, size_t size)

>      return NULL;

>  }

>

> +hwaddr rom_find_highest_addr(hwaddr base, size_t size)

> +{

> +    Rom *rom;

> +    hwaddr lowest = base;

> +

> +    QTAILQ_FOREACH(rom, &roms, next) {


You should ignore roms with rom->mr non-NULL (which are rom blobs
that have been set up to be loaded into a specific MR), and ones with
fw_file non NULL (which are rom blobs that have been set up to be
loaded into the fw_cfg device).

> +        if (rom->addr < base) {

> +            continue;

> +        }

> +        if (rom->addr + rom->romsize > base + size) {

> +            continue;

> +        }


This will incorrectly ignore ROM blobs that start below
'base' but end partway through it, as well as blobs that
start within the region but end after it.

> +        if (rom->addr + rom->romsize > lowest) {

> +            lowest = rom->addr + rom->romsize;

> +        }

> +    }


There's a cute algorithm (suggested by a friend) which you could
use to find the largest unoccupied chunk of (base, size) rather than
merely looking for the unoccupied part at the top of it:

(1) iterate through the rom list, constructing a list of tuples
(hwaddr addr, int count). For every relevant rom, add two entries
to the tuple list: (rom->addr, 1) and (rom->addr + rom->romsize, -1)
(2) Sort the tuple list by addr; break ties by sorting (x,1) before (x,-1)
(3) Set gapstart to 0.
(4) iterate through the sorted tuple list, keeping a running count
(increment the count for each (x, 1) tuple, decrement for (x, -1) tuples).
If the count goes from 0 to 1, you've just left an empty section of
the range, and there was a gap from gapstart to x-1 (so if that's the
largest gap you've seen so far, remember it). If the count goes from
1 to 0, that's the beginning of a new gap, so set gapstart to x.
(5) Return whatever is the largest gap we saw.

Exercises for the reader:
 (a) locate any off-by-one errors in the above sketch
 (b) account for the fact that gaps outside our (base,size)
     are of no use to us and should be ignored, and gaps that
     overlap with our range need to be clipped
 (c) decide whether you think this is worth doing :-)

> +    return lowest;

> +}

> +

>  /*

>   * Copies memory from registered ROMs to dest. Any memory that is contained in

>   * a ROM between addr and addr + size is copied. Note that this can involve

> diff --git a/semihosting/arm-compat-semi.c b/semihosting/arm-compat-semi.c

> index 1c29146dcf..a276161181 100644

> --- a/semihosting/arm-compat-semi.c

> +++ b/semihosting/arm-compat-semi.c

> @@ -44,6 +44,7 @@

>  #else

>  #include "exec/gdbstub.h"

>  #include "qemu/cutils.h"

> +#include "hw/loader.h"

>  #ifdef TARGET_ARM

>  #include "hw/arm/boot.h"

>  #endif

> @@ -144,33 +145,71 @@ typedef struct GuestFD {

>  static GArray *guestfd_array;

>

>  #ifndef CONFIG_USER_ONLY

> -#include "exec/address-spaces.h"

> -/*

> - * Find the base of a RAM region containing the specified address

> +

> +/**

> + * common_semi_find_bases: find information about ram and heap base

> + *

> + * This function attempts to provide meaningful numbers for RAM and

> + * HEAP base addresses. The rambase is simply the lowest addressable

> + * RAM position. For the heapbase we scan though the address space and

> + * return the first available address above any ROM regions created by

> + * the loaders.

> + *

> + * Returns: a structure with the numbers we need.

>   */

> -static inline hwaddr

> -common_semi_find_region_base(hwaddr addr)

> +

> +typedef struct LayoutInfo {

> +    target_ulong rambase;

> +    size_t ramsize;

> +    target_ulong heapbase;

> +    target_ulong heaplimit;

> +    target_ulong stackbase;

> +    target_ulong stacklimit;


You should work in 'hwaddr's at least while you're working with
addresses from the memory subsystem; convert to target_ulong only
at the end (and it would probably be a good idea to catch the
improbable case of "turns out the RAM we found for the heap is
beyond the range of a target_ulong"...)

> +} LayoutInfo;

> +

> +static bool find_ram_cb(Int128 start, Int128 len, const MemoryRegion *mr,

> +                        hwaddr offset_in_region, void *opaque)

>  {

> -    MemoryRegion *subregion;

> +    LayoutInfo *info = (LayoutInfo *) opaque;

> +

> +    if (!mr->ram || mr->readonly) {

> +        return false;

> +    }

> +

> +    info->rambase = mr->addr;

> +    info->ramsize = int128_get64(len);

> +

> +    return true;


This will pick the first MR it finds that happens to be RAM (which
could be some tiny thing). You don't want that, you want specifically
whatever the board decided was the system RAM, which is the MemoryRegion
MachineState::ram.

mr->addr is also not the address of the MR in the flatview, it's
the address of the MR in its container, which might not start at the
base of the address space.

In this case what we want is the address within the flatrange
of the first visible bit of that MR, which is just 'start'.

> +}

> +

> +static LayoutInfo common_semi_find_bases(CPUState *cs)

> +{

> +    FlatView *fv;

> +    LayoutInfo info = { 0, 0, 0, 0, 0, 0 };

> +

> +    RCU_READ_LOCK_GUARD();

> +

> +    fv = address_space_to_flatview(cs->as);

> +    flatview_for_each_range(fv, find_ram_cb, &info);

>

>      /*

> -     * Find the chunk of R/W memory containing the address.  This is

> -     * used for the SYS_HEAPINFO semihosting call, which should

> -     * probably be using information from the loaded application.

> +     * If we have found the RAM lets iterate through the ROM blobs to

> +     * workout the best place for the remainder of RAM and split it

> +     * equally between stack and heap.

>       */

> -    QTAILQ_FOREACH(subregion, &get_system_memory()->subregions,

> -                   subregions_link) {

> -        if (subregion->ram && !subregion->readonly) {

> -            Int128 top128 = int128_add(int128_make64(subregion->addr),

> -                                       subregion->size);

> -            Int128 addr128 = int128_make64(addr);

> -            if (subregion->addr <= addr && int128_lt(addr128, top128)) {

> -                return subregion->addr;

> -            }

> -        }

> +    if (info.rambase && info.ramsize) {

> +        hwaddr limit = info.rambase + info.ramsize;

> +        size_t space;

> +        info.heapbase = rom_find_highest_addr(info.rambase, info.ramsize);

> +        space = QEMU_ALIGN_DOWN((limit - info.heapbase) / 2, TARGET_PAGE_SIZE);

> +        info.heaplimit = info.heapbase + space;

> +        info.stackbase = info.rambase + info.ramsize;

> +        info.stacklimit = info.stackbase - space;


I don't think we need to divide the memory into separate stack
and heap like this -- guests can probably handle the heaplimit
being the same as the stackbase and the stacklimit being the
same as the heapbase, which is what we've been giving them so far.

>      }

> -    return 0;

> +

> +    return info;


This will recalculate it for every call, but I don't suppose the
guest is going to call it more than once so that's fine.

>  }

> +

>  #endif

>

>  #ifdef TARGET_ARM

> @@ -204,28 +243,6 @@ common_semi_sys_exit_extended(CPUState *cs, int nr)

>      return (nr == TARGET_SYS_EXIT_EXTENDED || is_a64(cs->env_ptr));

>  }

>

> -#ifndef CONFIG_USER_ONLY

> -#include "hw/arm/boot.h"

> -static inline target_ulong

> -common_semi_rambase(CPUState *cs)

> -{

> -    CPUArchState *env = cs->env_ptr;

> -    const struct arm_boot_info *info = env->boot_info;

> -    target_ulong sp;

> -

> -    if (info) {

> -        return info->loader_start;

> -    }

> -

> -    if (is_a64(env)) {

> -        sp = env->xregs[31];

> -    } else {

> -        sp = env->regs[13];

> -    }

> -    return common_semi_find_region_base(sp);

> -}

> -#endif

> -

>  #endif /* TARGET_ARM */

>

>  #ifdef TARGET_RISCV

> @@ -251,17 +268,6 @@ common_semi_sys_exit_extended(CPUState *cs, int nr)

>      return (nr == TARGET_SYS_EXIT_EXTENDED || sizeof(target_ulong) == 8);

>  }

>

> -#ifndef CONFIG_USER_ONLY

> -

> -static inline target_ulong

> -common_semi_rambase(CPUState *cs)

> -{

> -    RISCVCPU *cpu = RISCV_CPU(cs);

> -    CPURISCVState *env = &cpu->env;

> -    return common_semi_find_region_base(env->gpr[xSP]);

> -}

> -#endif

> -

>  #endif

>

>  /*

> @@ -1165,12 +1171,12 @@ target_ulong do_common_semihosting(CPUState *cs)

>      case TARGET_SYS_HEAPINFO:

>          {

>              target_ulong retvals[4];

> -            target_ulong limit;

>              int i;

>  #ifdef CONFIG_USER_ONLY

>              TaskState *ts = cs->opaque;

> +            target_ulong limit;

>  #else

> -            target_ulong rambase = common_semi_rambase(cs);

> +            LayoutInfo info = common_semi_find_bases(cs);

>  #endif

>

>              GET_ARG(0);

> @@ -1201,12 +1207,15 @@ target_ulong do_common_semihosting(CPUState *cs)

>              retvals[2] = ts->stack_base;

>              retvals[3] = 0; /* Stack limit.  */

>  #else

> -            limit = current_machine->ram_size;

> -            /* TODO: Make this use the limit of the loaded application.  */

> -            retvals[0] = rambase + limit / 2;

> -            retvals[1] = rambase + limit;

> -            retvals[2] = rambase + limit; /* Stack base */

> -            retvals[3] = rambase; /* Stack limit.  */

> +            /*

> +             * Reporting 0 indicates we couldn't calculate the real

> +             * values which should force most software to fall back to

> +             * using information it has.

> +             */


AIUI this is true for the specific case of "zero heapbase" but
probably not for zero anything else. (That bit of Arm compiler
docs that had a note not in the semihosting spec is apparently
accidentally over-optimistic about what the Arm compiler libc
implementation can handle.) Still, if we have no idea I guess
returning all-zeroes is the best we can do.

> +            retvals[0] = info.heapbase; /* Heap Base */

> +            retvals[1] = info.heaplimit; /* Heap Limit */

> +            retvals[2] = info.stackbase; /* Stack base */

> +            retvals[3] = info.stacklimit; /* Stack limit.  */

>  #endif


-- PMM
Peter Maydell June 18, 2021, 10:09 a.m. UTC | #2
On Thu, 17 Jun 2021 at 20:22, Peter Maydell <peter.maydell@linaro.org> wrote:
> This will pick the first MR it finds that happens to be RAM (which

> could be some tiny thing). You don't want that, you want specifically

> whatever the board decided was the system RAM, which is the MemoryRegion

> MachineState::ram.


As a concrete example, on the mps3-an547 machine this code finds
the 512KB ITCM at 0x0 rather than the much more useful 2GB DRAM
at 0x6000_0000...

-- PMM
Peter Maydell June 18, 2021, 3:01 p.m. UTC | #3
On Fri, 18 Jun 2021 at 11:09, Peter Maydell <peter.maydell@linaro.org> wrote:
>

> On Thu, 17 Jun 2021 at 20:22, Peter Maydell <peter.maydell@linaro.org> wrote:

> > This will pick the first MR it finds that happens to be RAM (which

> > could be some tiny thing). You don't want that, you want specifically

> > whatever the board decided was the system RAM, which is the MemoryRegion

> > MachineState::ram.

>

> As a concrete example, on the mps3-an547 machine this code finds

> the 512KB ITCM at 0x0 rather than the much more useful 2GB DRAM

> at 0x6000_0000...


Turns out that "look for MachineState::ram" won't find you the
2GB DRAM, though, because it's lurking behind a Memory Protection
Controller so it doesn't appear directly in the flatview.
Not sure what to do about that -- maybe we should forget about
MachineState::ram and just go for "largest RAM we can see" ?
Trying to get this code to be able to 'look through' IOMMUs sounds
like it would be massively painful.

-- PMM
diff mbox series

Patch

diff --git a/include/hw/loader.h b/include/hw/loader.h
index cbfc184873..037828e94d 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -349,4 +349,14 @@  int rom_add_option(const char *file, int32_t bootindex);
  * overflow on real hardware too. */
 #define UBOOT_MAX_GUNZIP_BYTES (64 << 20)
 
+/**
+ * rom_find_highest_addr: return highest address of ROM in region
+ *
+ * This function is used to find the highest ROM address (or loaded
+ * blob) so we can advise where true heap memory may be.
+ *
+ * Returns: highest found address in region
+ */
+hwaddr rom_find_highest_addr(hwaddr base, size_t size);
+
 #endif
diff --git a/hw/core/loader.c b/hw/core/loader.c
index 5b34869a54..05003556ee 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -1310,6 +1310,25 @@  static Rom *find_rom(hwaddr addr, size_t size)
     return NULL;
 }
 
+hwaddr rom_find_highest_addr(hwaddr base, size_t size)
+{
+    Rom *rom;
+    hwaddr lowest = base;
+
+    QTAILQ_FOREACH(rom, &roms, next) {
+        if (rom->addr < base) {
+            continue;
+        }
+        if (rom->addr + rom->romsize > base + size) {
+            continue;
+        }
+        if (rom->addr + rom->romsize > lowest) {
+            lowest = rom->addr + rom->romsize;
+        }
+    }
+    return lowest;
+}
+
 /*
  * Copies memory from registered ROMs to dest. Any memory that is contained in
  * a ROM between addr and addr + size is copied. Note that this can involve
diff --git a/semihosting/arm-compat-semi.c b/semihosting/arm-compat-semi.c
index 1c29146dcf..a276161181 100644
--- a/semihosting/arm-compat-semi.c
+++ b/semihosting/arm-compat-semi.c
@@ -44,6 +44,7 @@ 
 #else
 #include "exec/gdbstub.h"
 #include "qemu/cutils.h"
+#include "hw/loader.h"
 #ifdef TARGET_ARM
 #include "hw/arm/boot.h"
 #endif
@@ -144,33 +145,71 @@  typedef struct GuestFD {
 static GArray *guestfd_array;
 
 #ifndef CONFIG_USER_ONLY
-#include "exec/address-spaces.h"
-/*
- * Find the base of a RAM region containing the specified address
+
+/**
+ * common_semi_find_bases: find information about ram and heap base
+ *
+ * This function attempts to provide meaningful numbers for RAM and
+ * HEAP base addresses. The rambase is simply the lowest addressable
+ * RAM position. For the heapbase we scan though the address space and
+ * return the first available address above any ROM regions created by
+ * the loaders.
+ *
+ * Returns: a structure with the numbers we need.
  */
-static inline hwaddr
-common_semi_find_region_base(hwaddr addr)
+
+typedef struct LayoutInfo {
+    target_ulong rambase;
+    size_t ramsize;
+    target_ulong heapbase;
+    target_ulong heaplimit;
+    target_ulong stackbase;
+    target_ulong stacklimit;
+} LayoutInfo;
+
+static bool find_ram_cb(Int128 start, Int128 len, const MemoryRegion *mr,
+                        hwaddr offset_in_region, void *opaque)
 {
-    MemoryRegion *subregion;
+    LayoutInfo *info = (LayoutInfo *) opaque;
+
+    if (!mr->ram || mr->readonly) {
+        return false;
+    }
+
+    info->rambase = mr->addr;
+    info->ramsize = int128_get64(len);
+
+    return true;
+}
+
+static LayoutInfo common_semi_find_bases(CPUState *cs)
+{
+    FlatView *fv;
+    LayoutInfo info = { 0, 0, 0, 0, 0, 0 };
+
+    RCU_READ_LOCK_GUARD();
+
+    fv = address_space_to_flatview(cs->as);
+    flatview_for_each_range(fv, find_ram_cb, &info);
 
     /*
-     * Find the chunk of R/W memory containing the address.  This is
-     * used for the SYS_HEAPINFO semihosting call, which should
-     * probably be using information from the loaded application.
+     * If we have found the RAM lets iterate through the ROM blobs to
+     * workout the best place for the remainder of RAM and split it
+     * equally between stack and heap.
      */
-    QTAILQ_FOREACH(subregion, &get_system_memory()->subregions,
-                   subregions_link) {
-        if (subregion->ram && !subregion->readonly) {
-            Int128 top128 = int128_add(int128_make64(subregion->addr),
-                                       subregion->size);
-            Int128 addr128 = int128_make64(addr);
-            if (subregion->addr <= addr && int128_lt(addr128, top128)) {
-                return subregion->addr;
-            }
-        }
+    if (info.rambase && info.ramsize) {
+        hwaddr limit = info.rambase + info.ramsize;
+        size_t space;
+        info.heapbase = rom_find_highest_addr(info.rambase, info.ramsize);
+        space = QEMU_ALIGN_DOWN((limit - info.heapbase) / 2, TARGET_PAGE_SIZE);
+        info.heaplimit = info.heapbase + space;
+        info.stackbase = info.rambase + info.ramsize;
+        info.stacklimit = info.stackbase - space;
     }
-    return 0;
+
+    return info;
 }
+
 #endif
 
 #ifdef TARGET_ARM
@@ -204,28 +243,6 @@  common_semi_sys_exit_extended(CPUState *cs, int nr)
     return (nr == TARGET_SYS_EXIT_EXTENDED || is_a64(cs->env_ptr));
 }
 
-#ifndef CONFIG_USER_ONLY
-#include "hw/arm/boot.h"
-static inline target_ulong
-common_semi_rambase(CPUState *cs)
-{
-    CPUArchState *env = cs->env_ptr;
-    const struct arm_boot_info *info = env->boot_info;
-    target_ulong sp;
-
-    if (info) {
-        return info->loader_start;
-    }
-
-    if (is_a64(env)) {
-        sp = env->xregs[31];
-    } else {
-        sp = env->regs[13];
-    }
-    return common_semi_find_region_base(sp);
-}
-#endif
-
 #endif /* TARGET_ARM */
 
 #ifdef TARGET_RISCV
@@ -251,17 +268,6 @@  common_semi_sys_exit_extended(CPUState *cs, int nr)
     return (nr == TARGET_SYS_EXIT_EXTENDED || sizeof(target_ulong) == 8);
 }
 
-#ifndef CONFIG_USER_ONLY
-
-static inline target_ulong
-common_semi_rambase(CPUState *cs)
-{
-    RISCVCPU *cpu = RISCV_CPU(cs);
-    CPURISCVState *env = &cpu->env;
-    return common_semi_find_region_base(env->gpr[xSP]);
-}
-#endif
-
 #endif
 
 /*
@@ -1165,12 +1171,12 @@  target_ulong do_common_semihosting(CPUState *cs)
     case TARGET_SYS_HEAPINFO:
         {
             target_ulong retvals[4];
-            target_ulong limit;
             int i;
 #ifdef CONFIG_USER_ONLY
             TaskState *ts = cs->opaque;
+            target_ulong limit;
 #else
-            target_ulong rambase = common_semi_rambase(cs);
+            LayoutInfo info = common_semi_find_bases(cs);
 #endif
 
             GET_ARG(0);
@@ -1201,12 +1207,15 @@  target_ulong do_common_semihosting(CPUState *cs)
             retvals[2] = ts->stack_base;
             retvals[3] = 0; /* Stack limit.  */
 #else
-            limit = current_machine->ram_size;
-            /* TODO: Make this use the limit of the loaded application.  */
-            retvals[0] = rambase + limit / 2;
-            retvals[1] = rambase + limit;
-            retvals[2] = rambase + limit; /* Stack base */
-            retvals[3] = rambase; /* Stack limit.  */
+            /*
+             * Reporting 0 indicates we couldn't calculate the real
+             * values which should force most software to fall back to
+             * using information it has.
+             */
+            retvals[0] = info.heapbase; /* Heap Base */
+            retvals[1] = info.heaplimit; /* Heap Limit */
+            retvals[2] = info.stackbase; /* Stack base */
+            retvals[3] = info.stacklimit; /* Stack limit.  */
 #endif
 
             for (i = 0; i < ARRAY_SIZE(retvals); i++) {