diff mbox series

[for-4.2,02/24] cputlb: Add tlb_flush_asid_by_mmuidx and friends

Message ID 20190719210326.15466-3-richard.henderson@linaro.org
State New
Headers show
Series target/arm: Implement ARMv8.1-VHE | expand

Commit Message

Richard Henderson July 19, 2019, 9:03 p.m. UTC
Since we have remembered ASIDs, we can further minimize flushing
by comparing against the one we want to flush.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 include/exec/exec-all.h | 16 +++++++++++++
 include/qom/cpu.h       |  1 +
 accel/tcg/cputlb.c      | 51 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 68 insertions(+)

-- 
2.17.1

Comments

Alex Bennée July 22, 2019, 10:04 a.m. UTC | #1
Richard Henderson <richard.henderson@linaro.org> writes:

> Since we have remembered ASIDs, we can further minimize flushing

> by comparing against the one we want to flush.

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  include/exec/exec-all.h | 16 +++++++++++++

>  include/qom/cpu.h       |  1 +

>  accel/tcg/cputlb.c      | 51 +++++++++++++++++++++++++++++++++++++++++

>  3 files changed, 68 insertions(+)

>

> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h

> index 9c77aa5bf9..0d890e1e60 100644

> --- a/include/exec/exec-all.h

> +++ b/include/exec/exec-all.h

> @@ -240,6 +240,22 @@ void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap);

>   */

>  void tlb_set_asid_for_mmuidx(CPUState *cpu, uint32_t asid,

>                               uint16_t idxmap, uint16_t dep_idxmap);

> +/**

> + * tlb_flush_asid_by_mmuidx:

> + * @cpu: Originating CPU of the flush

> + * @asid: Address Space Identifier

> + * @idxmap: bitmap of MMU indexes to flush if asid matches

> + *

> + * For each mmu index, if @asid matches the value previously saved via

> + * tlb_set_asid_for_mmuidx, flush the index.

> + */

> +void tlb_flush_asid_by_mmuidx(CPUState *cpu, uint32_t asid, uint16_t idxmap);

> +/* Similarly, broadcasting to all cpus. */

> +void tlb_flush_asid_by_mmuidx_all_cpus(CPUState *cpu, uint32_t asid,

> +                                       uint16_t idxmap);

> +/* Similarly, waiting for the broadcast to complete.  */

> +void tlb_flush_asid_by_mmuidx_all_cpus_synced(CPUState *cpu, uint32_t asid,

> +                                              uint16_t idxmap);

>  /**

>   * tlb_set_page_with_attrs:

>   * @cpu: CPU to add this TLB entry for

> diff --git a/include/qom/cpu.h b/include/qom/cpu.h

> index 5ee0046b62..4ae6ea3e1d 100644

> --- a/include/qom/cpu.h

> +++ b/include/qom/cpu.h

> @@ -283,6 +283,7 @@ struct hax_vcpu_state;

>  typedef union {

>      int           host_int;

>      unsigned long host_ulong;

> +    uint64_t      host_uint64;


Missing an access helper, although host_uint64 doesn't make sense, 64 is
64 always:

  #define RUN_ON_CPU_UINT64(i)    ((run_on_cpu_data){.uint64 = (i)})

>      void         *host_ptr;

>      vaddr         target_ptr;

>  } run_on_cpu_data;

> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c

> index c68f57755b..3ef68a11bf 100644

> --- a/accel/tcg/cputlb.c

> +++ b/accel/tcg/cputlb.c

> @@ -540,6 +540,57 @@ void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)

>      tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);

>  }

>

> +static void tlb_flush_asid_by_mmuidx_async_work(CPUState *cpu,

> +                                                run_on_cpu_data data)

> +{

> +    CPUTLB *tlb = cpu_tlb(cpu);

> +    uint32_t asid = data.host_uint64;

> +    uint16_t idxmap = data.host_uint64 >> 32;

> +    uint16_t to_flush = 0, work;

> +

> +    assert_cpu_is_self(cpu);

> +

> +    for (work = idxmap; work != 0; work &= work - 1) {

> +        int mmu_idx = ctz32(work);

> +        if (tlb->d[mmu_idx].asid == asid) {

> +            to_flush |= 1 << mmu_idx;

> +        }

> +    }

> +

> +    if (to_flush) {

> +        tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(to_flush));

> +    }

> +}

> +

> +void tlb_flush_asid_by_mmuidx(CPUState *cpu, uint32_t asid, uint16_t idxmap)

> +{

> +    run_on_cpu_data data = { .host_uint64 = deposit64(asid, 32, 32,

> idxmap) };


Then this would be:
     uint64_t asid_idx_map = deposit64(asid, 32, 32, idxmap);
     ...
     async_run_on_cpu(cpu, tlb_flush_asid_by_mmuidx_async_work, RUN_ON_CPU_UINT64(data))

Not a massive win but consistent with the other *_run_on calls and
easier to grep.

> +

> +    if (cpu->created && !qemu_cpu_is_self(cpu)) {

> +        async_run_on_cpu(cpu, tlb_flush_asid_by_mmuidx_async_work, data);

> +    } else {

> +        tlb_flush_asid_by_mmuidx_async_work(cpu, data);

> +    }

> +}

> +

> +void tlb_flush_asid_by_mmuidx_all_cpus(CPUState *src_cpu,

> +                                       uint32_t asid, uint16_t idxmap)

> +{

> +    run_on_cpu_data data = { .host_uint64 = deposit64(asid, 32, 32, idxmap) };

> +

> +    flush_all_helper(src_cpu, tlb_flush_asid_by_mmuidx_async_work, data);

> +    tlb_flush_asid_by_mmuidx_async_work(src_cpu, data);

> +}

> +

> +void tlb_flush_asid_by_mmuidx_all_cpus_synced(CPUState *src_cpu,

> +                                              uint32_t asid, uint16_t idxmap)

> +{

> +    run_on_cpu_data data = { .host_uint64 = deposit64(asid, 32, 32, idxmap) };

> +

> +    flush_all_helper(src_cpu, tlb_flush_asid_by_mmuidx_async_work, data);

> +    async_safe_run_on_cpu(src_cpu, tlb_flush_asid_by_mmuidx_async_work, data);

> +}

> +

>  void tlb_set_asid_for_mmuidx(CPUState *cpu, uint32_t asid, uint16_t idxmap,

>                               uint16_t depmap)

>  {


Otherwise:

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>


--
Alex Bennée
diff mbox series

Patch

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 9c77aa5bf9..0d890e1e60 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -240,6 +240,22 @@  void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap);
  */
 void tlb_set_asid_for_mmuidx(CPUState *cpu, uint32_t asid,
                              uint16_t idxmap, uint16_t dep_idxmap);
+/**
+ * tlb_flush_asid_by_mmuidx:
+ * @cpu: Originating CPU of the flush
+ * @asid: Address Space Identifier
+ * @idxmap: bitmap of MMU indexes to flush if asid matches
+ *
+ * For each mmu index, if @asid matches the value previously saved via
+ * tlb_set_asid_for_mmuidx, flush the index.
+ */
+void tlb_flush_asid_by_mmuidx(CPUState *cpu, uint32_t asid, uint16_t idxmap);
+/* Similarly, broadcasting to all cpus. */
+void tlb_flush_asid_by_mmuidx_all_cpus(CPUState *cpu, uint32_t asid,
+                                       uint16_t idxmap);
+/* Similarly, waiting for the broadcast to complete.  */
+void tlb_flush_asid_by_mmuidx_all_cpus_synced(CPUState *cpu, uint32_t asid,
+                                              uint16_t idxmap);
 /**
  * tlb_set_page_with_attrs:
  * @cpu: CPU to add this TLB entry for
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 5ee0046b62..4ae6ea3e1d 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -283,6 +283,7 @@  struct hax_vcpu_state;
 typedef union {
     int           host_int;
     unsigned long host_ulong;
+    uint64_t      host_uint64;
     void         *host_ptr;
     vaddr         target_ptr;
 } run_on_cpu_data;
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index c68f57755b..3ef68a11bf 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -540,6 +540,57 @@  void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
     tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
 }
 
+static void tlb_flush_asid_by_mmuidx_async_work(CPUState *cpu,
+                                                run_on_cpu_data data)
+{
+    CPUTLB *tlb = cpu_tlb(cpu);
+    uint32_t asid = data.host_uint64;
+    uint16_t idxmap = data.host_uint64 >> 32;
+    uint16_t to_flush = 0, work;
+
+    assert_cpu_is_self(cpu);
+
+    for (work = idxmap; work != 0; work &= work - 1) {
+        int mmu_idx = ctz32(work);
+        if (tlb->d[mmu_idx].asid == asid) {
+            to_flush |= 1 << mmu_idx;
+        }
+    }
+
+    if (to_flush) {
+        tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(to_flush));
+    }
+}
+
+void tlb_flush_asid_by_mmuidx(CPUState *cpu, uint32_t asid, uint16_t idxmap)
+{
+    run_on_cpu_data data = { .host_uint64 = deposit64(asid, 32, 32, idxmap) };
+
+    if (cpu->created && !qemu_cpu_is_self(cpu)) {
+        async_run_on_cpu(cpu, tlb_flush_asid_by_mmuidx_async_work, data);
+    } else {
+        tlb_flush_asid_by_mmuidx_async_work(cpu, data);
+    }
+}
+
+void tlb_flush_asid_by_mmuidx_all_cpus(CPUState *src_cpu,
+                                       uint32_t asid, uint16_t idxmap)
+{
+    run_on_cpu_data data = { .host_uint64 = deposit64(asid, 32, 32, idxmap) };
+
+    flush_all_helper(src_cpu, tlb_flush_asid_by_mmuidx_async_work, data);
+    tlb_flush_asid_by_mmuidx_async_work(src_cpu, data);
+}
+
+void tlb_flush_asid_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
+                                              uint32_t asid, uint16_t idxmap)
+{
+    run_on_cpu_data data = { .host_uint64 = deposit64(asid, 32, 32, idxmap) };
+
+    flush_all_helper(src_cpu, tlb_flush_asid_by_mmuidx_async_work, data);
+    async_safe_run_on_cpu(src_cpu, tlb_flush_asid_by_mmuidx_async_work, data);
+}
+
 void tlb_set_asid_for_mmuidx(CPUState *cpu, uint32_t asid, uint16_t idxmap,
                              uint16_t depmap)
 {