[v8,19/25] cputlb: introduce tlb_flush_*_all_cpus[_synced]

Message ID	20170127103505.18606-20-alex.bennee@linaro.org
State	Superseded
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of qemu-devel-bounces+patch=linaro.org@nongnu.org designates 2001:4830:134:3::11 as permitted sender) client-ip=2001:4830:134:3::11; From: =?utf-8?q?Alex_Benn=C3=A9e?= <alex.bennee@linaro.org> To: Date: Fri, 27 Jan 2017 10:34:59 +0000 Message-Id: <20170127103505.18606-20-alex.bennee@linaro.org> In-Reply-To: <20170127103505.18606-1-alex.bennee@linaro.org> References: <20170127103505.18606-1-alex.bennee@linaro.org> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Subject: [Qemu-devel] [PATCH v8 19/25] cputlb: introduce tlb_flush_*_all_cpus[_synced] Precedence: list Cc: Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <rth@twiddle.net>, =?utf-8?q?Alex_Benn=C3=A9e?= <alex.bennee@linaro.org>, "open list:Overall" <qemu-devel@nongnu.org>, Peter Crosthwaite <crosthwaite.peter@gmail.com> Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org Sender: "Qemu-devel" <qemu-devel-bounces+patch=linaro.org@nongnu.org>
Series	[v8,01/25] docs: new design document multi-thread-tcg.txt \| expand [v8,01/25] docs: new design document multi-thread-tcg.txt [v8,02/25] mttcg: translate-all: Enable locking debug in a debug build [v8,03/25] mttcg: Add missing tb_lock/unlock() in cpu_exec_step() [v8,04/25] tcg: move TCG_MO/BAR types into own file [v8,05/25] tcg: add options for enabling MTTCG [v8,06/25] tcg: add kick timer for single-threaded vCPU emulation [v8,07/25] tcg: rename tcg_current_cpu to tcg_current_rr_cpu [v8,08/25] tcg: drop global lock during TCG code execution [v8,09/25] tcg: remove global exit_request [v8,10/25] tcg: enable tb_lock() for SoftMMU [v8,11/25] tcg: enable thread-per-vCPU [v8,12/25] tcg: handle EXCP_ATOMIC exception for system emulation [v8,13/25] cputlb: add assert_cpu_is_self checks [v8,14/25] cputlb: tweak qemu_ram_addr_from_host_nofail reporting [v8,15/25] cputlb: introduce tlb_flush_* async work. [v8,16/25] cputlb and arm/sparc targets: convert mmuidx flushes from varg to bitmap [v8,17/25] cputlb: add tlb_flush_by_mmuidx async routines [v8,18/25] cputlb: atomically update tlb fields used by tlb_reset_dirty [v8,19/25] cputlb: introduce tlb_flush_*_all_cpus[_synced] [v8,20/25] target-arm/powerctl: defer cpu reset work to CPU context [v8,21/25] target-arm: don't generate WFE/YIELD calls for MTTCG [v8,22/25] target-arm/cpu.h: make ARM_CP defined consistent [v8,23/25] target-arm: introduce ARM_CP_EXIT_PC [v8,24/25] target-arm: ensure all cross vCPUs TLB flushes complete [v8,25/25] tcg: enable MTTCG by default for ARM on x86 hosts

diff --git a/cputlb.c b/cputlb.c index 65003350e3..7f9a54f253 100644 --- a/cputlb.c +++ b/cputlb.c @@ -73,6 +73,25 @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) +/* flush_all_helper: run fn across all cpus + * + * If the wait flag is set then the src cpu's helper will be queued as + * "safe" work and the loop exited creating a synchronisation point + * where all queued work will be finished before execution starts + * again. + */ +static void flush_all_helper(CPUState *src, run_on_cpu_func fn, + run_on_cpu_data d) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + if (cpu != src) { + async_run_on_cpu(cpu, fn, d); + } + } +} + /* statistics */ int tlb_flush_count; @@ -128,6 +147,19 @@ void tlb_flush(CPUState *cpu) } } +void tlb_flush_all_cpus(CPUState *src_cpu) +{ + flush_all_helper(src_cpu, tlb_flush_global_async_work, RUN_ON_CPU_NULL); + tlb_flush_global_async_work(src_cpu, RUN_ON_CPU_NULL); +} + +void QEMU_NORETURN tlb_flush_all_cpus_synced(CPUState *src_cpu) +{ + flush_all_helper(src_cpu, tlb_flush_global_async_work, RUN_ON_CPU_NULL); + tlb_flush_global_async_work(src_cpu, RUN_ON_CPU_NULL); + cpu_loop_exit(src_cpu); +} + static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) { CPUArchState *env = cpu->env_ptr; @@ -178,6 +210,30 @@ void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) } } +void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) +{ + const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; + + tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); + + flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); + fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); +} + +void QEMU_NORETURN tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, + uint16_t idxmap) +{ + const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; + + tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); + + flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); + async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); + cpu_loop_exit(src_cpu); +} + + + static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr) { if (addr == (tlb_entry->addr_read & @@ -317,14 +373,56 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) } } -void tlb_flush_page_all(target_ulong addr) +void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, + uint16_t idxmap) { - CPUState *cpu; + const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work; + target_ulong addr_and_mmu_idx; - CPU_FOREACH(cpu) { - async_run_on_cpu(cpu, tlb_flush_page_async_work, - RUN_ON_CPU_TARGET_PTR(addr)); - } + tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); + + /* This should already be page aligned */ + addr_and_mmu_idx = addr & TARGET_PAGE_MASK; + addr_and_mmu_idx |= idxmap; + + flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); + fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); +} + +void QEMU_NORETURN tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, + target_ulong addr, + uint16_t idxmap) +{ + const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work; + target_ulong addr_and_mmu_idx; + + tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); + + /* This should already be page aligned */ + addr_and_mmu_idx = addr & TARGET_PAGE_MASK; + addr_and_mmu_idx |= idxmap; + + flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); + async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); + cpu_loop_exit(src_cpu); +} + +void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) +{ + const run_on_cpu_func fn = tlb_flush_page_async_work; + + flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr)); + fn(src, RUN_ON_CPU_TARGET_PTR(addr)); +} + +void QEMU_NORETURN tlb_flush_page_all_cpus_synced(CPUState *src, + target_ulong addr) +{ + const run_on_cpu_func fn = tlb_flush_page_async_work; + + flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr)); + async_safe_run_on_cpu(src, fn, RUN_ON_CPU_TARGET_PTR(addr)); + cpu_loop_exit(src); } /* update the TLBs so that writes to code in the virtual page 'addr' diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index a6c17ed74a..cffed39fe9 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -93,6 +93,27 @@ void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx); */ void tlb_flush_page(CPUState *cpu, target_ulong addr); /** + * tlb_flush_page_all_cpus: + * @cpu: src CPU of the flush + * @addr: virtual address of page to be flushed + * + * Flush one page from the TLB of the specified CPU, for all + * MMU indexes. + */ +void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr); +/** + * tlb_flush_page_all_cpus_synced: + * @cpu: src CPU of the flush + * @addr: virtual address of page to be flushed + * + * Flush one page from the TLB of the specified CPU, for all + * MMU indexes like tlb_flush_page_all_cpus except this function does + * not return and will restart the run-loop once all CPUs have + * executed the flush. + */ +void QEMU_NORETURN tlb_flush_page_all_cpus_synced(CPUState *src, + target_ulong addr); +/** * tlb_flush: * @cpu: CPU whose TLB should be flushed * @@ -103,6 +124,19 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr); */ void tlb_flush(CPUState *cpu); /** + * tlb_flush_all_cpus: + * @cpu: src CPU of the flush + */ +void tlb_flush_all_cpus(CPUState *src_cpu); +/** + * tlb_flush_all_cpus_synced: + * @cpu: src CPU of the flush + * + * Like tlb_flush_all_cpus except this function does not return and + * will restart the run-loop once all CPUs have executed the flush. + */ +void QEMU_NORETURN tlb_flush_all_cpus_synced(CPUState *src_cpu); +/** * tlb_flush_page_by_mmuidx: * @cpu: CPU whose TLB should be flushed * @addr: virtual address of page to be flushed @@ -114,8 +148,34 @@ void tlb_flush(CPUState *cpu); void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap); /** + * tlb_flush_page_by_mmuidx_all_cpus: + * @cpu: Originating CPU of the flush + * @addr: virtual address of page to be flushed + * @idxmap: bitmap of MMU indexes to flush + * + * Flush one page from the TLB of all CPUs, for the specified + * MMU indexes. + */ +void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, + uint16_t idxmap); +/** + * tlb_flush_page_by_mmuidx_all_cpus_synced: + * @cpu: Originating CPU of the flush + * @addr: virtual address of page to be flushed + * @idxmap: bitmap of MMU indexes to flush + * + * Flush one page from the TLB of all CPUs, for the specified MMU + * indexes like tlb_flush_page_by_mmuidx_all_cpus except this function + * does not return and will restart the run-loop once all CPUs have + * executed the flush. + */ +void QEMU_NORETURN tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, + target_ulong addr, + uint16_t idxmap); +/** * tlb_flush_by_mmuidx: * @cpu: CPU whose TLB should be flushed + * @wait: If true ensure synchronisation by exiting the cpu_loop * @idxmap: bitmap of MMU indexes to flush * * Flush all entries from the TLB of the specified CPU, for the specified @@ -123,6 +183,27 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, */ void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap); /** + * tlb_flush_by_mmuidx_all_cpus: + * @cpu: Originating CPU of the flush + * @idxmap: bitmap of MMU indexes to flush + * + * Flush all entries from all TLBs of all CPUs, for the specified + * MMU indexes. + */ +void tlb_flush_by_mmuidx_all_cpus(CPUState *cpu, uint16_t idxmap); +/** + * tlb_flush_by_mmuidx_all_cpus_synced: + * @cpu: Originating CPU of the flush + * @idxmap: bitmap of MMU indexes to flush + * + * Flush all entries from all TLBs of all CPUs, for the specified + * MMU indexes like tlb_flush_by_mmuidx_all_cpus except this function + * does not return and will restart the run-loop once all CPUs have + * executed the flush. + */ +void QEMU_NORETURN tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, + uint16_t idxmap); +/** * tlb_set_page_with_attrs: * @cpu: CPU to add this TLB entry for * @vaddr: virtual address of page to add entry for @@ -159,16 +240,26 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr, void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr); void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx, uintptr_t retaddr); -void tlb_flush_page_all(target_ulong addr); #else static inline void tlb_flush_page(CPUState *cpu, target_ulong addr) { } - +static inline void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) +{ +} +static inline void tlb_flush_page_all_cpus_synced(CPUState *src, + target_ulong addr) +{ +} static inline void tlb_flush(CPUState *cpu) { } - +static inline void tlb_flush_all_cpus(CPUState *src_cpu) +{ +} +static inline void tlb_flush_all_cpus_synced(CPUState *src_cpu) +{ +} static inline void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) { @@ -177,6 +268,23 @@ static inline void tlb_flush_page_by_mmuidx(CPUState *cpu, static inline void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) { } +static inline void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, + target_ulong addr, + uint16_t idxmap) +{ +} +static inline void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, + target_ulong addr, + uint16_t idxmap) +{ +} +static inline void tlb_flush_by_mmuidx_all_cpus(CPUState *cpu, uint16_t idxmap) +{ +} +static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, + uint16_t idxmap) +{ +} #endif #define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */

[v8,19/25] cputlb: introduce tlb_flush_*_all_cpus[_synced]

Commit Message

Patch