diff mbox series

[20/24] accel/tcg: Use interval tree for TARGET_PAGE_DATA_SIZE

Message ID 20221006031113.1139454-21-richard.henderson@linaro.org
State New
Headers show
Series accel/tcg: Rewrite user-only vma tracking | expand

Commit Message

Richard Henderson Oct. 6, 2022, 3:11 a.m. UTC
Continue weaning user-only away from PageDesc.

Use an interval tree to record target data.
Chunk the data, to minimize allocation overhead.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/internal.h  |   1 -
 accel/tcg/user-exec.c | 110 ++++++++++++++++++++++++++++++++----------
 2 files changed, 85 insertions(+), 26 deletions(-)

Comments

Alex Bennée Oct. 25, 2022, 7:30 p.m. UTC | #1
Richard Henderson <richard.henderson@linaro.org> writes:

> Continue weaning user-only away from PageDesc.
>
> Use an interval tree to record target data.
> Chunk the data, to minimize allocation overhead.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  accel/tcg/internal.h  |   1 -
>  accel/tcg/user-exec.c | 110 ++++++++++++++++++++++++++++++++----------
>  2 files changed, 85 insertions(+), 26 deletions(-)
>
> diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
> index 1bd5a02911..8731dc52e2 100644
> --- a/accel/tcg/internal.h
> +++ b/accel/tcg/internal.h
> @@ -26,7 +26,6 @@
>  typedef struct PageDesc {
>  #ifdef CONFIG_USER_ONLY
>      unsigned long flags;
> -    void *target_data;
>  #else
>      QemuSpin lock;
>      /* list of TBs intersecting this ram page */
> diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
> index fb7d6ee9e9..bce3d5f335 100644
> --- a/accel/tcg/user-exec.c
> +++ b/accel/tcg/user-exec.c
> @@ -210,47 +210,107 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
>      return addr;
>  }
>  
> +#ifdef TARGET_PAGE_DATA_SIZE
> +/*
> + * Allocate chunks of target data together.  For the only current user,
> + * if we allocate one hunk per page, we have overhead of 40/128 or 40%.
> + * Therefore, allocate memory for 64 pages at a time for overhead < 1%.
> + */
> +#define TPD_PAGES  64
> +#define TBD_MASK   (TARGET_PAGE_MASK * TPD_PAGES)
> +
> +typedef struct TargetPageDataNode {
> +    IntervalTreeNode itree;
> +    char data[TPD_PAGES][TARGET_PAGE_DATA_SIZE] __attribute__((aligned));
> +} TargetPageDataNode;
> +
> +static IntervalTreeRoot targetdata_root;
> +
>  void page_reset_target_data(target_ulong start, target_ulong end)
>  {
> -#ifdef TARGET_PAGE_DATA_SIZE
> -    target_ulong addr, len;
> +    IntervalTreeNode *n, *next;
> +    target_ulong last;
>  
> -    /*
> -     * This function should never be called with addresses outside the
> -     * guest address space.  If this assert fires, it probably indicates
> -     * a missing call to h2g_valid.
> -     */
> -    assert(end - 1 <= GUEST_ADDR_MAX);
> -    assert(start < end);
>      assert_memory_lock();
>  
>      start = start & TARGET_PAGE_MASK;
> -    end = TARGET_PAGE_ALIGN(end);
> +    last = TARGET_PAGE_ALIGN(end) - 1;
>  
> -    for (addr = start, len = end - start;
> -         len != 0;
> -         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
> -        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
> +    for (n = interval_tree_iter_first(&targetdata_root, start, last),
> +         next = n ? interval_tree_iter_next(n, start, last) : NULL;
> +         n != NULL;
> +         n = next,
> +         next = next ? interval_tree_iter_next(n, start, last) : NULL) {
> +        target_ulong n_start, n_last, p_ofs, p_len;
> +        TargetPageDataNode *t;
>  
> -        g_free(p->target_data);
> -        p->target_data = NULL;
> +        if (n->start >= start && n->last <= last) {
> +            interval_tree_remove(n, &targetdata_root);
> +            g_free(n);
> +            continue;
> +        }
> +
> +        if (n->start < start) {
> +            n_start = start;
> +            p_ofs = (start - n->start) >> TARGET_PAGE_BITS;
> +        } else {
> +            n_start = n->start;
> +            p_ofs = 0;
> +        }
> +        n_last = MIN(last, n->last);
> +        p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
> +
> +        t = container_of(n, TargetPageDataNode, itree);
> +        memset(t->data[p_ofs], 0, p_len * TARGET_PAGE_DATA_SIZE);
>      }
> -#endif
>  }
>  
> -#ifdef TARGET_PAGE_DATA_SIZE
>  void *page_get_target_data(target_ulong address)
>  {
> -    PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
> -    void *ret = p->target_data;
> +    IntervalTreeNode *n;
> +    TargetPageDataNode *t;
> +    target_ulong page, region;
> +    bool locked;
>  
> -    if (!ret) {
> -        ret = g_malloc0(TARGET_PAGE_DATA_SIZE);
> -        p->target_data = ret;
> +    page = address & TARGET_PAGE_MASK;
> +    region = address & TBD_MASK;
> +
> +    n = interval_tree_iter_first(&targetdata_root, page, page);
> +    if (n) {
> +        goto found;
>      }
> -    return ret;
> +
> +    /*
> +     * See util/interval-tree.c re lockless lookups: no false positives but
> +     * there are false negatives.  If we find nothing, retry with the mmap
> +     * lock acquired.  We also need the lock for the allocation + insert.
> +     */
> +    locked = have_mmap_lock();

Are we expecting to already hold the lock here?

> +    if (!locked) {
> +        mmap_lock();
> +        n = interval_tree_iter_first(&targetdata_root, page, page);

So we only search if we haven't got a lock already.

> +        if (n) {
> +            mmap_unlock();
> +            goto found;
> +        }
> +    }
> +
> +    t = g_new0(TargetPageDataNode, 1);
> +    n = &t->itree;
> +    n->start = region;
> +    n->last = region | ~TBD_MASK;
> +    interval_tree_insert(n, &targetdata_root);
> +    if (!locked) {
> +        mmap_unlock();
> +    }

To be honest the mmap_lock safe to use recursively and wrapping the
locked portion in a LOCK_GUARD would make me happier that we didn't cock
up unwinding.

> +
> + found:
> +    t = container_of(n, TargetPageDataNode, itree);
> +    return t->data[(page - region) >> TARGET_PAGE_BITS];
>  }
> -#endif
> +#else
> +void page_reset_target_data(target_ulong start, target_ulong end) { }
> +#endif /* TARGET_PAGE_DATA_SIZE */
>  
>  /* The softmmu versions of these helpers are in cputlb.c.  */
Richard Henderson Oct. 25, 2022, 9:29 p.m. UTC | #2
On 10/26/22 05:30, Alex Bennée wrote:
>>   void *page_get_target_data(target_ulong address)
>>   {
>> -    PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
>> -    void *ret = p->target_data;
>> +    IntervalTreeNode *n;
>> +    TargetPageDataNode *t;
>> +    target_ulong page, region;
>> +    bool locked;
>>   
>> -    if (!ret) {
>> -        ret = g_malloc0(TARGET_PAGE_DATA_SIZE);
>> -        p->target_data = ret;
>> +    page = address & TARGET_PAGE_MASK;
>> +    region = address & TBD_MASK;
>> +
>> +    n = interval_tree_iter_first(&targetdata_root, page, page);
>> +    if (n) {
>> +        goto found;
>>       }
>> -    return ret;
>> +
>> +    /*
>> +     * See util/interval-tree.c re lockless lookups: no false positives but
>> +     * there are false negatives.  If we find nothing, retry with the mmap
>> +     * lock acquired.  We also need the lock for the allocation + insert.
>> +     */
>> +    locked = have_mmap_lock();
> 
> Are we expecting to already hold the lock here?

No.  This is called in the middle of a normal load/store instruction, in response to MTE 
being enabled.  We really want to avoid taking a lock if we can.

> 
>> +    if (!locked) {
>> +        mmap_lock();
>> +        n = interval_tree_iter_first(&targetdata_root, page, page);
> 
> So we only search if we haven't got a lock already.

We only search *again* if we haven't got a lock already.  If we had the lock, then the 
first search above produced golden results.  And we must have the lock, acquired just now...

> 
>> +        if (n) {
>> +            mmap_unlock();
>> +            goto found;
>> +        }
>> +    }
>> +
>> +    t = g_new0(TargetPageDataNode, 1);
>> +    n = &t->itree;
>> +    n->start = region;
>> +    n->last = region | ~TBD_MASK;
>> +    interval_tree_insert(n, &targetdata_root);

... for the modification to the data structure.

>> +    if (!locked) {
>> +        mmap_unlock();
>> +    }
> 
> To be honest the mmap_lock safe to use recursively and wrapping the
> locked portion in a LOCK_GUARD would make me happier that we didn't cock
> up unwinding.

Fair.  I was trying to avoid a second interval tree walk if we *did* happen to have the 
lock, but as I said myself above, that's extremely unlikely given the only user.


r~
diff mbox series

Patch

diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
index 1bd5a02911..8731dc52e2 100644
--- a/accel/tcg/internal.h
+++ b/accel/tcg/internal.h
@@ -26,7 +26,6 @@ 
 typedef struct PageDesc {
 #ifdef CONFIG_USER_ONLY
     unsigned long flags;
-    void *target_data;
 #else
     QemuSpin lock;
     /* list of TBs intersecting this ram page */
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index fb7d6ee9e9..bce3d5f335 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -210,47 +210,107 @@  tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
     return addr;
 }
 
+#ifdef TARGET_PAGE_DATA_SIZE
+/*
+ * Allocate chunks of target data together.  For the only current user,
+ * if we allocate one hunk per page, we have overhead of 40/128 or 40%.
+ * Therefore, allocate memory for 64 pages at a time for overhead < 1%.
+ */
+#define TPD_PAGES  64
+#define TBD_MASK   (TARGET_PAGE_MASK * TPD_PAGES)
+
+typedef struct TargetPageDataNode {
+    IntervalTreeNode itree;
+    char data[TPD_PAGES][TARGET_PAGE_DATA_SIZE] __attribute__((aligned));
+} TargetPageDataNode;
+
+static IntervalTreeRoot targetdata_root;
+
 void page_reset_target_data(target_ulong start, target_ulong end)
 {
-#ifdef TARGET_PAGE_DATA_SIZE
-    target_ulong addr, len;
+    IntervalTreeNode *n, *next;
+    target_ulong last;
 
-    /*
-     * This function should never be called with addresses outside the
-     * guest address space.  If this assert fires, it probably indicates
-     * a missing call to h2g_valid.
-     */
-    assert(end - 1 <= GUEST_ADDR_MAX);
-    assert(start < end);
     assert_memory_lock();
 
     start = start & TARGET_PAGE_MASK;
-    end = TARGET_PAGE_ALIGN(end);
+    last = TARGET_PAGE_ALIGN(end) - 1;
 
-    for (addr = start, len = end - start;
-         len != 0;
-         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
-        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
+    for (n = interval_tree_iter_first(&targetdata_root, start, last),
+         next = n ? interval_tree_iter_next(n, start, last) : NULL;
+         n != NULL;
+         n = next,
+         next = next ? interval_tree_iter_next(n, start, last) : NULL) {
+        target_ulong n_start, n_last, p_ofs, p_len;
+        TargetPageDataNode *t;
 
-        g_free(p->target_data);
-        p->target_data = NULL;
+        if (n->start >= start && n->last <= last) {
+            interval_tree_remove(n, &targetdata_root);
+            g_free(n);
+            continue;
+        }
+
+        if (n->start < start) {
+            n_start = start;
+            p_ofs = (start - n->start) >> TARGET_PAGE_BITS;
+        } else {
+            n_start = n->start;
+            p_ofs = 0;
+        }
+        n_last = MIN(last, n->last);
+        p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
+
+        t = container_of(n, TargetPageDataNode, itree);
+        memset(t->data[p_ofs], 0, p_len * TARGET_PAGE_DATA_SIZE);
     }
-#endif
 }
 
-#ifdef TARGET_PAGE_DATA_SIZE
 void *page_get_target_data(target_ulong address)
 {
-    PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
-    void *ret = p->target_data;
+    IntervalTreeNode *n;
+    TargetPageDataNode *t;
+    target_ulong page, region;
+    bool locked;
 
-    if (!ret) {
-        ret = g_malloc0(TARGET_PAGE_DATA_SIZE);
-        p->target_data = ret;
+    page = address & TARGET_PAGE_MASK;
+    region = address & TBD_MASK;
+
+    n = interval_tree_iter_first(&targetdata_root, page, page);
+    if (n) {
+        goto found;
     }
-    return ret;
+
+    /*
+     * See util/interval-tree.c re lockless lookups: no false positives but
+     * there are false negatives.  If we find nothing, retry with the mmap
+     * lock acquired.  We also need the lock for the allocation + insert.
+     */
+    locked = have_mmap_lock();
+    if (!locked) {
+        mmap_lock();
+        n = interval_tree_iter_first(&targetdata_root, page, page);
+        if (n) {
+            mmap_unlock();
+            goto found;
+        }
+    }
+
+    t = g_new0(TargetPageDataNode, 1);
+    n = &t->itree;
+    n->start = region;
+    n->last = region | ~TBD_MASK;
+    interval_tree_insert(n, &targetdata_root);
+    if (!locked) {
+        mmap_unlock();
+    }
+
+ found:
+    t = container_of(n, TargetPageDataNode, itree);
+    return t->data[(page - region) >> TARGET_PAGE_BITS];
 }
-#endif
+#else
+void page_reset_target_data(target_ulong start, target_ulong end) { }
+#endif /* TARGET_PAGE_DATA_SIZE */
 
 /* The softmmu versions of these helpers are in cputlb.c.  */