Message ID | 20221006031113.1139454-21-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | accel/tcg: Rewrite user-only vma tracking | expand |
Richard Henderson <richard.henderson@linaro.org> writes: > Continue weaning user-only away from PageDesc. > > Use an interval tree to record target data. > Chunk the data, to minimize allocation overhead. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > accel/tcg/internal.h | 1 - > accel/tcg/user-exec.c | 110 ++++++++++++++++++++++++++++++++---------- > 2 files changed, 85 insertions(+), 26 deletions(-) > > diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h > index 1bd5a02911..8731dc52e2 100644 > --- a/accel/tcg/internal.h > +++ b/accel/tcg/internal.h > @@ -26,7 +26,6 @@ > typedef struct PageDesc { > #ifdef CONFIG_USER_ONLY > unsigned long flags; > - void *target_data; > #else > QemuSpin lock; > /* list of TBs intersecting this ram page */ > diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c > index fb7d6ee9e9..bce3d5f335 100644 > --- a/accel/tcg/user-exec.c > +++ b/accel/tcg/user-exec.c > @@ -210,47 +210,107 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, > return addr; > } > > +#ifdef TARGET_PAGE_DATA_SIZE > +/* > + * Allocate chunks of target data together. For the only current user, > + * if we allocate one hunk per page, we have overhead of 40/128 or 40%. > + * Therefore, allocate memory for 64 pages at a time for overhead < 1%. > + */ > +#define TPD_PAGES 64 > +#define TBD_MASK (TARGET_PAGE_MASK * TPD_PAGES) > + > +typedef struct TargetPageDataNode { > + IntervalTreeNode itree; > + char data[TPD_PAGES][TARGET_PAGE_DATA_SIZE] __attribute__((aligned)); > +} TargetPageDataNode; > + > +static IntervalTreeRoot targetdata_root; > + > void page_reset_target_data(target_ulong start, target_ulong end) > { > -#ifdef TARGET_PAGE_DATA_SIZE > - target_ulong addr, len; > + IntervalTreeNode *n, *next; > + target_ulong last; > > - /* > - * This function should never be called with addresses outside the > - * guest address space. If this assert fires, it probably indicates > - * a missing call to h2g_valid. > - */ > - assert(end - 1 <= GUEST_ADDR_MAX); > - assert(start < end); > assert_memory_lock(); > > start = start & TARGET_PAGE_MASK; > - end = TARGET_PAGE_ALIGN(end); > + last = TARGET_PAGE_ALIGN(end) - 1; > > - for (addr = start, len = end - start; > - len != 0; > - len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { > - PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1); > + for (n = interval_tree_iter_first(&targetdata_root, start, last), > + next = n ? interval_tree_iter_next(n, start, last) : NULL; > + n != NULL; > + n = next, > + next = next ? interval_tree_iter_next(n, start, last) : NULL) { > + target_ulong n_start, n_last, p_ofs, p_len; > + TargetPageDataNode *t; > > - g_free(p->target_data); > - p->target_data = NULL; > + if (n->start >= start && n->last <= last) { > + interval_tree_remove(n, &targetdata_root); > + g_free(n); > + continue; > + } > + > + if (n->start < start) { > + n_start = start; > + p_ofs = (start - n->start) >> TARGET_PAGE_BITS; > + } else { > + n_start = n->start; > + p_ofs = 0; > + } > + n_last = MIN(last, n->last); > + p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS; > + > + t = container_of(n, TargetPageDataNode, itree); > + memset(t->data[p_ofs], 0, p_len * TARGET_PAGE_DATA_SIZE); > } > -#endif > } > > -#ifdef TARGET_PAGE_DATA_SIZE > void *page_get_target_data(target_ulong address) > { > - PageDesc *p = page_find(address >> TARGET_PAGE_BITS); > - void *ret = p->target_data; > + IntervalTreeNode *n; > + TargetPageDataNode *t; > + target_ulong page, region; > + bool locked; > > - if (!ret) { > - ret = g_malloc0(TARGET_PAGE_DATA_SIZE); > - p->target_data = ret; > + page = address & TARGET_PAGE_MASK; > + region = address & TBD_MASK; > + > + n = interval_tree_iter_first(&targetdata_root, page, page); > + if (n) { > + goto found; > } > - return ret; > + > + /* > + * See util/interval-tree.c re lockless lookups: no false positives but > + * there are false negatives. If we find nothing, retry with the mmap > + * lock acquired. We also need the lock for the allocation + insert. > + */ > + locked = have_mmap_lock(); Are we expecting to already hold the lock here? > + if (!locked) { > + mmap_lock(); > + n = interval_tree_iter_first(&targetdata_root, page, page); So we only search if we haven't got a lock already. > + if (n) { > + mmap_unlock(); > + goto found; > + } > + } > + > + t = g_new0(TargetPageDataNode, 1); > + n = &t->itree; > + n->start = region; > + n->last = region | ~TBD_MASK; > + interval_tree_insert(n, &targetdata_root); > + if (!locked) { > + mmap_unlock(); > + } To be honest the mmap_lock safe to use recursively and wrapping the locked portion in a LOCK_GUARD would make me happier that we didn't cock up unwinding. > + > + found: > + t = container_of(n, TargetPageDataNode, itree); > + return t->data[(page - region) >> TARGET_PAGE_BITS]; > } > -#endif > +#else > +void page_reset_target_data(target_ulong start, target_ulong end) { } > +#endif /* TARGET_PAGE_DATA_SIZE */ > > /* The softmmu versions of these helpers are in cputlb.c. */
On 10/26/22 05:30, Alex Bennée wrote: >> void *page_get_target_data(target_ulong address) >> { >> - PageDesc *p = page_find(address >> TARGET_PAGE_BITS); >> - void *ret = p->target_data; >> + IntervalTreeNode *n; >> + TargetPageDataNode *t; >> + target_ulong page, region; >> + bool locked; >> >> - if (!ret) { >> - ret = g_malloc0(TARGET_PAGE_DATA_SIZE); >> - p->target_data = ret; >> + page = address & TARGET_PAGE_MASK; >> + region = address & TBD_MASK; >> + >> + n = interval_tree_iter_first(&targetdata_root, page, page); >> + if (n) { >> + goto found; >> } >> - return ret; >> + >> + /* >> + * See util/interval-tree.c re lockless lookups: no false positives but >> + * there are false negatives. If we find nothing, retry with the mmap >> + * lock acquired. We also need the lock for the allocation + insert. >> + */ >> + locked = have_mmap_lock(); > > Are we expecting to already hold the lock here? No. This is called in the middle of a normal load/store instruction, in response to MTE being enabled. We really want to avoid taking a lock if we can. > >> + if (!locked) { >> + mmap_lock(); >> + n = interval_tree_iter_first(&targetdata_root, page, page); > > So we only search if we haven't got a lock already. We only search *again* if we haven't got a lock already. If we had the lock, then the first search above produced golden results. And we must have the lock, acquired just now... > >> + if (n) { >> + mmap_unlock(); >> + goto found; >> + } >> + } >> + >> + t = g_new0(TargetPageDataNode, 1); >> + n = &t->itree; >> + n->start = region; >> + n->last = region | ~TBD_MASK; >> + interval_tree_insert(n, &targetdata_root); ... for the modification to the data structure. >> + if (!locked) { >> + mmap_unlock(); >> + } > > To be honest the mmap_lock safe to use recursively and wrapping the > locked portion in a LOCK_GUARD would make me happier that we didn't cock > up unwinding. Fair. I was trying to avoid a second interval tree walk if we *did* happen to have the lock, but as I said myself above, that's extremely unlikely given the only user. r~
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h index 1bd5a02911..8731dc52e2 100644 --- a/accel/tcg/internal.h +++ b/accel/tcg/internal.h @@ -26,7 +26,6 @@ typedef struct PageDesc { #ifdef CONFIG_USER_ONLY unsigned long flags; - void *target_data; #else QemuSpin lock; /* list of TBs intersecting this ram page */ diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index fb7d6ee9e9..bce3d5f335 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -210,47 +210,107 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, return addr; } +#ifdef TARGET_PAGE_DATA_SIZE +/* + * Allocate chunks of target data together. For the only current user, + * if we allocate one hunk per page, we have overhead of 40/128 or 40%. + * Therefore, allocate memory for 64 pages at a time for overhead < 1%. + */ +#define TPD_PAGES 64 +#define TBD_MASK (TARGET_PAGE_MASK * TPD_PAGES) + +typedef struct TargetPageDataNode { + IntervalTreeNode itree; + char data[TPD_PAGES][TARGET_PAGE_DATA_SIZE] __attribute__((aligned)); +} TargetPageDataNode; + +static IntervalTreeRoot targetdata_root; + void page_reset_target_data(target_ulong start, target_ulong end) { -#ifdef TARGET_PAGE_DATA_SIZE - target_ulong addr, len; + IntervalTreeNode *n, *next; + target_ulong last; - /* - * This function should never be called with addresses outside the - * guest address space. If this assert fires, it probably indicates - * a missing call to h2g_valid. - */ - assert(end - 1 <= GUEST_ADDR_MAX); - assert(start < end); assert_memory_lock(); start = start & TARGET_PAGE_MASK; - end = TARGET_PAGE_ALIGN(end); + last = TARGET_PAGE_ALIGN(end) - 1; - for (addr = start, len = end - start; - len != 0; - len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { - PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1); + for (n = interval_tree_iter_first(&targetdata_root, start, last), + next = n ? interval_tree_iter_next(n, start, last) : NULL; + n != NULL; + n = next, + next = next ? interval_tree_iter_next(n, start, last) : NULL) { + target_ulong n_start, n_last, p_ofs, p_len; + TargetPageDataNode *t; - g_free(p->target_data); - p->target_data = NULL; + if (n->start >= start && n->last <= last) { + interval_tree_remove(n, &targetdata_root); + g_free(n); + continue; + } + + if (n->start < start) { + n_start = start; + p_ofs = (start - n->start) >> TARGET_PAGE_BITS; + } else { + n_start = n->start; + p_ofs = 0; + } + n_last = MIN(last, n->last); + p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS; + + t = container_of(n, TargetPageDataNode, itree); + memset(t->data[p_ofs], 0, p_len * TARGET_PAGE_DATA_SIZE); } -#endif } -#ifdef TARGET_PAGE_DATA_SIZE void *page_get_target_data(target_ulong address) { - PageDesc *p = page_find(address >> TARGET_PAGE_BITS); - void *ret = p->target_data; + IntervalTreeNode *n; + TargetPageDataNode *t; + target_ulong page, region; + bool locked; - if (!ret) { - ret = g_malloc0(TARGET_PAGE_DATA_SIZE); - p->target_data = ret; + page = address & TARGET_PAGE_MASK; + region = address & TBD_MASK; + + n = interval_tree_iter_first(&targetdata_root, page, page); + if (n) { + goto found; } - return ret; + + /* + * See util/interval-tree.c re lockless lookups: no false positives but + * there are false negatives. If we find nothing, retry with the mmap + * lock acquired. We also need the lock for the allocation + insert. + */ + locked = have_mmap_lock(); + if (!locked) { + mmap_lock(); + n = interval_tree_iter_first(&targetdata_root, page, page); + if (n) { + mmap_unlock(); + goto found; + } + } + + t = g_new0(TargetPageDataNode, 1); + n = &t->itree; + n->start = region; + n->last = region | ~TBD_MASK; + interval_tree_insert(n, &targetdata_root); + if (!locked) { + mmap_unlock(); + } + + found: + t = container_of(n, TargetPageDataNode, itree); + return t->data[(page - region) >> TARGET_PAGE_BITS]; } -#endif +#else +void page_reset_target_data(target_ulong start, target_ulong end) { } +#endif /* TARGET_PAGE_DATA_SIZE */ /* The softmmu versions of these helpers are in cputlb.c. */
Continue weaning user-only away from PageDesc. Use an interval tree to record target data. Chunk the data, to minimize allocation overhead. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- accel/tcg/internal.h | 1 - accel/tcg/user-exec.c | 110 ++++++++++++++++++++++++++++++++---------- 2 files changed, 85 insertions(+), 26 deletions(-)