diff mbox series

[04/23] accel/tcg: Split out tlbfast_flush_range_locked

Message ID 20241009150855.804605-5-richard.henderson@linaro.org
State New
Headers show
Series accel/tcg: Convert victim tlb to IntervalTree | expand

Commit Message

Richard Henderson Oct. 9, 2024, 3:08 p.m. UTC
While this may at present be overly complicated for use
by single page flushes, do so with the expectation that
this will eventually allow simplification of large pages.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 61 +++++++++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 28 deletions(-)

Comments

Pierrick Bouvier Oct. 9, 2024, 11:05 p.m. UTC | #1
On 10/9/24 08:08, Richard Henderson wrote:
> While this may at present be overly complicated for use
> by single page flushes, do so with the expectation that
> this will eventually allow simplification of large pages.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   accel/tcg/cputlb.c | 61 +++++++++++++++++++++++++---------------------
>   1 file changed, 33 insertions(+), 28 deletions(-)
> 
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index e37af24525..6773874f2d 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -520,10 +520,37 @@ static inline void tlb_flush_vtlb_page_locked(CPUState *cpu, int mmu_idx,
>       tlb_flush_vtlb_page_mask_locked(cpu, mmu_idx, page, -1);
>   }
>   
> +static void tlbfast_flush_range_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
> +                                       vaddr addr, vaddr len, vaddr mask)
> +{
> +    /*
> +     * If @mask is smaller than the tlb size, there may be multiple entries
> +     * within the TLB; for now, just flush the entire TLB.
> +     * Otherwise all addresses that match under @mask hit the same TLB entry.
> +     *
> +     * If @len is larger than the tlb size, then it will take longer to
> +     * test all of the entries in the TLB than it will to flush it all.
> +     */
> +    if (mask < fast->mask || len > fast->mask) {
> +        tlbfast_flush_locked(desc, fast);
> +        return;
> +    }
> +
> +    for (vaddr i = 0; i < len; i += TARGET_PAGE_SIZE) {
> +        vaddr page = addr + i;
> +        CPUTLBEntry *entry = tlbfast_entry(fast, page);
> +
> +        if (tlb_flush_entry_mask_locked(entry, page, mask)) {
> +            desc->n_used_entries--;
> +        }
> +    }
> +}
> +
>   static void tlb_flush_page_locked(CPUState *cpu, int midx, vaddr page)
>   {
> -    vaddr lp_addr = cpu->neg.tlb.d[midx].large_page_addr;
> -    vaddr lp_mask = cpu->neg.tlb.d[midx].large_page_mask;
> +    CPUTLBDesc *desc = &cpu->neg.tlb.d[midx];
> +    vaddr lp_addr = desc->large_page_addr;
> +    vaddr lp_mask = desc->large_page_mask;
>   
>       /* Check if we need to flush due to large pages.  */
>       if ((page & lp_mask) == lp_addr) {
> @@ -532,9 +559,8 @@ static void tlb_flush_page_locked(CPUState *cpu, int midx, vaddr page)
>                     midx, lp_addr, lp_mask);
>           tlb_flush_one_mmuidx_locked(cpu, midx, get_clock_realtime());
>       } else {
> -        if (tlb_flush_entry_locked(tlb_entry(cpu, midx, page), page)) {
> -            tlb_n_used_entries_dec(cpu, midx);
> -        }
> +        tlbfast_flush_range_locked(desc, &cpu->neg.tlb.f[midx],
> +                                   page, TARGET_PAGE_SIZE, -1);
>           tlb_flush_vtlb_page_locked(cpu, midx, page);
>       }
>   }
> @@ -689,24 +715,6 @@ static void tlb_flush_range_locked(CPUState *cpu, int midx,
>       CPUTLBDescFast *f = &cpu->neg.tlb.f[midx];
>       vaddr mask = MAKE_64BIT_MASK(0, bits);
>   
> -    /*
> -     * If @bits is smaller than the tlb size, there may be multiple entries
> -     * within the TLB; otherwise all addresses that match under @mask hit
> -     * the same TLB entry.
> -     * TODO: Perhaps allow bits to be a few bits less than the size.
> -     * For now, just flush the entire TLB.
> -     *
> -     * If @len is larger than the tlb size, then it will take longer to
> -     * test all of the entries in the TLB than it will to flush it all.
> -     */
> -    if (mask < f->mask || len > f->mask) {
> -        tlb_debug("forcing full flush midx %d ("
> -                  "%016" VADDR_PRIx "/%016" VADDR_PRIx "+%016" VADDR_PRIx ")\n",
> -                  midx, addr, mask, len);
> -        tlb_flush_one_mmuidx_locked(cpu, midx, get_clock_realtime());
> -        return;
> -    }
> -
>       /*
>        * Check if we need to flush due to large pages.
>        * Because large_page_mask contains all 1's from the msb,
> @@ -720,13 +728,10 @@ static void tlb_flush_range_locked(CPUState *cpu, int midx,
>           return;
>       }
>   
> +    tlbfast_flush_range_locked(d, f, addr, len, mask);
> +
>       for (vaddr i = 0; i < len; i += TARGET_PAGE_SIZE) {
>           vaddr page = addr + i;
> -        CPUTLBEntry *entry = tlb_entry(cpu, midx, page);
> -
> -        if (tlb_flush_entry_mask_locked(entry, page, mask)) {
> -            tlb_n_used_entries_dec(cpu, midx);
> -        }
>           tlb_flush_vtlb_page_mask_locked(cpu, midx, page, mask);
>       }
>   }

Why don't we have the same kind of change for 
tlb_flush_vtlb_page_mask_locked?

We know have two loops (for entry mask, and for page mask).
Richard Henderson Oct. 10, 2024, 1:20 a.m. UTC | #2
On 10/9/24 16:05, Pierrick Bouvier wrote:
>> @@ -720,13 +728,10 @@ static void tlb_flush_range_locked(CPUState *cpu, int midx,
>>           return;
>>       }
>> +    tlbfast_flush_range_locked(d, f, addr, len, mask);
>> +
>>       for (vaddr i = 0; i < len; i += TARGET_PAGE_SIZE) {
>>           vaddr page = addr + i;
>> -        CPUTLBEntry *entry = tlb_entry(cpu, midx, page);
>> -
>> -        if (tlb_flush_entry_mask_locked(entry, page, mask)) {
>> -            tlb_n_used_entries_dec(cpu, midx);
>> -        }
>>           tlb_flush_vtlb_page_mask_locked(cpu, midx, page, mask);
>>       }
>>   }
> 
> Why don't we have the same kind of change for tlb_flush_vtlb_page_mask_locked?
> 
> We know have two loops (for entry mask, and for page mask).

It goes away in patch 15.

r~
Pierrick Bouvier Oct. 11, 2024, 5:09 p.m. UTC | #3
On 10/9/24 18:20, Richard Henderson wrote:
> On 10/9/24 16:05, Pierrick Bouvier wrote:
>>> @@ -720,13 +728,10 @@ static void tlb_flush_range_locked(CPUState *cpu, int midx,
>>>            return;
>>>        }
>>> +    tlbfast_flush_range_locked(d, f, addr, len, mask);
>>> +
>>>        for (vaddr i = 0; i < len; i += TARGET_PAGE_SIZE) {
>>>            vaddr page = addr + i;
>>> -        CPUTLBEntry *entry = tlb_entry(cpu, midx, page);
>>> -
>>> -        if (tlb_flush_entry_mask_locked(entry, page, mask)) {
>>> -            tlb_n_used_entries_dec(cpu, midx);
>>> -        }
>>>            tlb_flush_vtlb_page_mask_locked(cpu, midx, page, mask);
>>>        }
>>>    }
>>
>> Why don't we have the same kind of change for tlb_flush_vtlb_page_mask_locked?
>>
>> We know have two loops (for entry mask, and for page mask).
> 
> It goes away in patch 15.
> 
> r~

Right, looks good.
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
diff mbox series

Patch

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index e37af24525..6773874f2d 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -520,10 +520,37 @@  static inline void tlb_flush_vtlb_page_locked(CPUState *cpu, int mmu_idx,
     tlb_flush_vtlb_page_mask_locked(cpu, mmu_idx, page, -1);
 }
 
+static void tlbfast_flush_range_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
+                                       vaddr addr, vaddr len, vaddr mask)
+{
+    /*
+     * If @mask is smaller than the tlb size, there may be multiple entries
+     * within the TLB; for now, just flush the entire TLB.
+     * Otherwise all addresses that match under @mask hit the same TLB entry.
+     *
+     * If @len is larger than the tlb size, then it will take longer to
+     * test all of the entries in the TLB than it will to flush it all.
+     */
+    if (mask < fast->mask || len > fast->mask) {
+        tlbfast_flush_locked(desc, fast);
+        return;
+    }
+
+    for (vaddr i = 0; i < len; i += TARGET_PAGE_SIZE) {
+        vaddr page = addr + i;
+        CPUTLBEntry *entry = tlbfast_entry(fast, page);
+
+        if (tlb_flush_entry_mask_locked(entry, page, mask)) {
+            desc->n_used_entries--;
+        }
+    }
+}
+
 static void tlb_flush_page_locked(CPUState *cpu, int midx, vaddr page)
 {
-    vaddr lp_addr = cpu->neg.tlb.d[midx].large_page_addr;
-    vaddr lp_mask = cpu->neg.tlb.d[midx].large_page_mask;
+    CPUTLBDesc *desc = &cpu->neg.tlb.d[midx];
+    vaddr lp_addr = desc->large_page_addr;
+    vaddr lp_mask = desc->large_page_mask;
 
     /* Check if we need to flush due to large pages.  */
     if ((page & lp_mask) == lp_addr) {
@@ -532,9 +559,8 @@  static void tlb_flush_page_locked(CPUState *cpu, int midx, vaddr page)
                   midx, lp_addr, lp_mask);
         tlb_flush_one_mmuidx_locked(cpu, midx, get_clock_realtime());
     } else {
-        if (tlb_flush_entry_locked(tlb_entry(cpu, midx, page), page)) {
-            tlb_n_used_entries_dec(cpu, midx);
-        }
+        tlbfast_flush_range_locked(desc, &cpu->neg.tlb.f[midx],
+                                   page, TARGET_PAGE_SIZE, -1);
         tlb_flush_vtlb_page_locked(cpu, midx, page);
     }
 }
@@ -689,24 +715,6 @@  static void tlb_flush_range_locked(CPUState *cpu, int midx,
     CPUTLBDescFast *f = &cpu->neg.tlb.f[midx];
     vaddr mask = MAKE_64BIT_MASK(0, bits);
 
-    /*
-     * If @bits is smaller than the tlb size, there may be multiple entries
-     * within the TLB; otherwise all addresses that match under @mask hit
-     * the same TLB entry.
-     * TODO: Perhaps allow bits to be a few bits less than the size.
-     * For now, just flush the entire TLB.
-     *
-     * If @len is larger than the tlb size, then it will take longer to
-     * test all of the entries in the TLB than it will to flush it all.
-     */
-    if (mask < f->mask || len > f->mask) {
-        tlb_debug("forcing full flush midx %d ("
-                  "%016" VADDR_PRIx "/%016" VADDR_PRIx "+%016" VADDR_PRIx ")\n",
-                  midx, addr, mask, len);
-        tlb_flush_one_mmuidx_locked(cpu, midx, get_clock_realtime());
-        return;
-    }
-
     /*
      * Check if we need to flush due to large pages.
      * Because large_page_mask contains all 1's from the msb,
@@ -720,13 +728,10 @@  static void tlb_flush_range_locked(CPUState *cpu, int midx,
         return;
     }
 
+    tlbfast_flush_range_locked(d, f, addr, len, mask);
+
     for (vaddr i = 0; i < len; i += TARGET_PAGE_SIZE) {
         vaddr page = addr + i;
-        CPUTLBEntry *entry = tlb_entry(cpu, midx, page);
-
-        if (tlb_flush_entry_mask_locked(entry, page, mask)) {
-            tlb_n_used_entries_dec(cpu, midx);
-        }
         tlb_flush_vtlb_page_mask_locked(cpu, midx, page, mask);
     }
 }