diff mbox

[v5,16/33] tcg: drop global lock during TCG code execution

Message ID 20161027151030.20863-17-alex.bennee@linaro.org
State Superseded
Headers show

Commit Message

Alex Bennée Oct. 27, 2016, 3:10 p.m. UTC
From: Jan Kiszka <jan.kiszka@siemens.com>


This finally allows TCG to benefit from the iothread introduction: Drop
the global mutex while running pure TCG CPU code. Reacquire the lock
when entering MMIO or PIO emulation, or when leaving the TCG loop.

We have to revert a few optimization for the current TCG threading
model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not
kicking it in qemu_cpu_kick. We also need to disable RAM block
reordering until we have a more efficient locking mechanism at hand.

Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here.
These numbers demonstrate where we gain something:

20338 jan       20   0  331m  75m 6904 R   99  0.9   0:50.95 qemu-system-arm
20337 jan       20   0  331m  75m 6904 S   20  0.9   0:26.50 qemu-system-arm

The guest CPU was fully loaded, but the iothread could still run mostly
independent on a second core. Without the patch we don't get beyond

32206 jan       20   0  330m  73m 7036 R   82  0.9   1:06.00 qemu-system-arm
32204 jan       20   0  330m  73m 7036 S   21  0.9   0:17.03 qemu-system-arm

We don't benefit significantly, though, when the guest is not fully
loading a host CPU.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>

Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com>
[FK: Rebase, fix qemu_devices_reset deadlock, rm address_space_* mutex]
Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>

[EGC: fixed iothread lock for cpu-exec IRQ handling]
Signed-off-by: Emilio G. Cota <cota@braap.org>

[AJB: -smp single-threaded fix, rm old info from commit msg, review updates]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>


---
v5 (ajb, base patches):
 - added an assert to BQL unlock/lock functions instead of hanging
 - ensure all cpu->interrupt_requests *modifications* protected by BQL
 - add a re-read on cpu->interrupt_request for correctness
 - BQL fixes for:
   - assert BQL held for PPC hypercalls (emulate_spar_hypercall)
   - SCLP service calls on s390x
 - merge conflict with kick timer patch
v4 (ajb, base patches):
 - protect cpu->interrupt updates with BQL
 - fix wording io_mem_notdirty calls
 - s/we/with/
v3 (ajb, base-patches):
  - stale iothread_unlocks removed (cpu_exit/resume_from_signal deals
  with it in the longjmp).
  - fix re-base conflicts
v2 (ajb):
  - merge with tcg: grab iothread lock in cpu-exec interrupt handling
  - use existing fns for tracking lock state
  - lock iothread for mem_region
    - add assert on mem region modification
    - ensure smm_helper holds iothread
  - Add JK s-o-b
  - Fix-up FK s-o-b annotation
v1 (ajb, base-patches):
  - SMP failure now fixed by previous commit

Changes from Fred Konrad (mttcg-v7 via paolo):
  * Rebase on the current HEAD.
  * Fixes a deadlock in qemu_devices_reset().
  * Remove the mutex in address_space_*
---
 cpu-exec.c                 | 20 ++++++++++++++++++--
 cpus.c                     | 28 +++++-----------------------
 cputlb.c                   | 21 ++++++++++++++++++++-
 exec.c                     | 12 +++++++++---
 hw/core/irq.c              |  1 +
 hw/i386/kvmvapic.c         |  4 ++--
 hw/ppc/spapr.c             |  3 +++
 include/qom/cpu.h          |  1 +
 memory.c                   |  2 ++
 qom/cpu.c                  | 10 ++++++++++
 target-i386/smm_helper.c   |  7 +++++++
 target-s390x/misc_helper.c |  5 ++++-
 translate-all.c            | 11 +++++++++--
 translate-common.c         | 21 +++++++++++----------
 14 files changed, 102 insertions(+), 44 deletions(-)

-- 
2.10.1
diff mbox

Patch

diff --git a/cpu-exec.c b/cpu-exec.c
index 6ea5507..3ca44bd 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -29,6 +29,7 @@ 
 #include "qemu/rcu.h"
 #include "exec/tb-hash.h"
 #include "exec/log.h"
+#include "qemu/main-loop.h"
 #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
 #include "hw/i386/apic.h"
 #endif
@@ -388,8 +389,10 @@  static inline bool cpu_handle_halt(CPUState *cpu)
         if ((cpu->interrupt_request & CPU_INTERRUPT_POLL)
             && replay_interrupt()) {
             X86CPU *x86_cpu = X86_CPU(cpu);
+            qemu_mutex_lock_iothread();
             apic_poll_irq(x86_cpu->apic_state);
             cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
+            qemu_mutex_unlock_iothread();
         }
 #endif
         if (!cpu_has_work(cpu)) {
@@ -443,7 +446,9 @@  static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
 #else
             if (replay_exception()) {
                 CPUClass *cc = CPU_GET_CLASS(cpu);
+                qemu_mutex_lock_iothread();
                 cc->do_interrupt(cpu);
+                qemu_mutex_unlock_iothread();
                 cpu->exception_index = -1;
             } else if (!replay_has_interrupt()) {
                 /* give a chance to iothread in replay mode */
@@ -469,9 +474,11 @@  static inline void cpu_handle_interrupt(CPUState *cpu,
                                         TranslationBlock **last_tb)
 {
     CPUClass *cc = CPU_GET_CLASS(cpu);
-    int interrupt_request = cpu->interrupt_request;
 
-    if (unlikely(interrupt_request)) {
+    if (unlikely(atomic_read(&cpu->interrupt_request))) {
+        int interrupt_request;
+        qemu_mutex_lock_iothread();
+        interrupt_request = cpu->interrupt_request;
         if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
             /* Mask out external interrupts for this step. */
             interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
@@ -526,7 +533,12 @@  static inline void cpu_handle_interrupt(CPUState *cpu,
                the program flow was changed */
             *last_tb = NULL;
         }
+
+        /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
+        qemu_mutex_unlock_iothread();
     }
+
+
     if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) {
         atomic_set(&cpu->exit_request, 0);
         cpu->exception_index = EXCP_INTERRUPT;
@@ -656,8 +668,12 @@  int cpu_exec(CPUState *cpu)
             g_assert(cpu == current_cpu);
             g_assert(cc == CPU_GET_CLASS(cpu));
 #endif /* buggy compiler */
+
             cpu->can_do_io = 1;
             tb_lock_reset();
+            if (qemu_mutex_iothread_locked()) {
+                qemu_mutex_unlock_iothread();
+            }
         }
     } /* for(;;) */
 
diff --git a/cpus.c b/cpus.c
index c6ff79f..aca2a2e 100644
--- a/cpus.c
+++ b/cpus.c
@@ -993,8 +993,6 @@  static void qemu_kvm_init_cpu_signals(CPUState *cpu)
 #endif /* _WIN32 */
 
 static QemuMutex qemu_global_mutex;
-static QemuCond qemu_io_proceeded_cond;
-static unsigned iothread_requesting_mutex;
 
 static QemuThread io_thread;
 
@@ -1008,7 +1006,6 @@  void qemu_init_cpu_loop(void)
     qemu_init_sigbus();
     qemu_cond_init(&qemu_cpu_cond);
     qemu_cond_init(&qemu_pause_cond);
-    qemu_cond_init(&qemu_io_proceeded_cond);
     qemu_mutex_init(&qemu_global_mutex);
 
     qemu_thread_get_self(&io_thread);
@@ -1051,10 +1048,6 @@  static void qemu_tcg_wait_io_event(CPUState *cpu)
 
     start_tcg_kick_timer();
 
-    while (iothread_requesting_mutex) {
-        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
-    }
-
     CPU_FOREACH(cpu) {
         qemu_wait_io_event_common(cpu);
     }
@@ -1203,9 +1196,11 @@  static int tcg_cpu_exec(CPUState *cpu)
         cpu->icount_decr.u16.low = decr;
         cpu->icount_extra = count;
     }
+    qemu_mutex_unlock_iothread();
     cpu_exec_start(cpu);
     ret = cpu_exec(cpu);
     cpu_exec_end(cpu);
+    qemu_mutex_lock_iothread();
 #ifdef CONFIG_PROFILER
     tcg_time += profile_getclock() - ti;
 #endif
@@ -1385,27 +1380,14 @@  bool qemu_mutex_iothread_locked(void)
 
 void qemu_mutex_lock_iothread(void)
 {
-    atomic_inc(&iothread_requesting_mutex);
-    /* In the simple case there is no need to bump the VCPU thread out of
-     * TCG code execution.
-     */
-    if (!tcg_enabled() || qemu_in_vcpu_thread() ||
-        !first_cpu || !first_cpu->created) {
-        qemu_mutex_lock(&qemu_global_mutex);
-        atomic_dec(&iothread_requesting_mutex);
-    } else {
-        if (qemu_mutex_trylock(&qemu_global_mutex)) {
-            qemu_cpu_kick_rr_cpu();
-            qemu_mutex_lock(&qemu_global_mutex);
-        }
-        atomic_dec(&iothread_requesting_mutex);
-        qemu_cond_broadcast(&qemu_io_proceeded_cond);
-    }
+    g_assert(!qemu_mutex_iothread_locked());
+    qemu_mutex_lock(&qemu_global_mutex);
     iothread_locked = true;
 }
 
 void qemu_mutex_unlock_iothread(void)
 {
+    g_assert(qemu_mutex_iothread_locked());
     iothread_locked = false;
     qemu_mutex_unlock(&qemu_global_mutex);
 }
diff --git a/cputlb.c b/cputlb.c
index cc4da4d..986efaa 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -18,6 +18,7 @@ 
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "exec/memory.h"
@@ -505,6 +506,7 @@  static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
     hwaddr physaddr = iotlbentry->addr;
     MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
     uint64_t val;
+    bool locked = false;
 
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
     cpu->mem_io_pc = retaddr;
@@ -513,7 +515,16 @@  static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
     }
 
     cpu->mem_io_vaddr = addr;
+
+    if (mr->global_locking) {
+        qemu_mutex_lock_iothread();
+        locked = true;
+    }
     memory_region_dispatch_read(mr, physaddr, &val, size, iotlbentry->attrs);
+    if (locked) {
+        qemu_mutex_unlock_iothread();
+    }
+
     return val;
 }
 
@@ -524,15 +535,23 @@  static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
     CPUState *cpu = ENV_GET_CPU(env);
     hwaddr physaddr = iotlbentry->addr;
     MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
+    bool locked = false;
 
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
         cpu_io_recompile(cpu, retaddr);
     }
-
     cpu->mem_io_vaddr = addr;
     cpu->mem_io_pc = retaddr;
+
+    if (mr->global_locking) {
+        qemu_mutex_lock_iothread();
+        locked = true;
+    }
     memory_region_dispatch_write(mr, physaddr, val, size, iotlbentry->attrs);
+    if (locked) {
+        qemu_mutex_unlock_iothread();
+    }
 }
 
 /* Return true if ADDR is present in the victim tlb, and has been copied
diff --git a/exec.c b/exec.c
index 30ae278..8db5545 100644
--- a/exec.c
+++ b/exec.c
@@ -2024,9 +2024,9 @@  static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
                 }
                 cpu->watchpoint_hit = wp;
 
-                /* The tb_lock will be reset when cpu_loop_exit or
-                 * cpu_loop_exit_noexc longjmp back into the cpu_exec
-                 * main loop.
+                /* Both tb_lock and iothread_mutex will be reset when
+                 * cpu_loop_exit or cpu_loop_exit_noexc longjmp
+                 * back into the cpu_exec main loop.
                  */
                 tb_lock();
                 tb_check_watchpoint(cpu);
@@ -2262,8 +2262,14 @@  static void io_mem_init(void)
     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
                           NULL, UINT64_MAX);
+
+    /* io_mem_notdirty calls tb_invalidate_phys_page_fast,
+     * which can be called without the iothread mutex.
+     */
     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
                           NULL, UINT64_MAX);
+    memory_region_clear_global_locking(&io_mem_notdirty);
+
     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
                           NULL, UINT64_MAX);
 }
diff --git a/hw/core/irq.c b/hw/core/irq.c
index 49ff2e6..b98d1d6 100644
--- a/hw/core/irq.c
+++ b/hw/core/irq.c
@@ -22,6 +22,7 @@ 
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 #include "qemu-common.h"
 #include "hw/irq.h"
 #include "qom/object.h"
diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
index 4448253..3be6b96 100644
--- a/hw/i386/kvmvapic.c
+++ b/hw/i386/kvmvapic.c
@@ -450,8 +450,8 @@  static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
     resume_all_vcpus();
 
     if (!kvm_enabled()) {
-        /* tb_lock will be reset when cpu_loop_exit_noexc longjmps
-         * back into the cpu_exec loop. */
+        /* Both tb_lock and iothread_mutex will be reset when
+         *  longjmps back into the cpu_exec loop. */
         tb_lock();
         tb_gen_code(cs, current_pc, current_cs_base, current_flags, 1);
         cpu_loop_exit_noexc(cs);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index ddb7438..63df95b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1023,6 +1023,9 @@  static void emulate_spapr_hypercall(PowerPCCPU *cpu)
 {
     CPUPPCState *env = &cpu->env;
 
+    /* The TCG path should also be holding the BQL at this point */
+    g_assert(qemu_mutex_iothread_locked());
+
     if (msr_pr) {
         hcall_dprintf("Hypercall made with MSR[PR]=1\n");
         env->gpr[3] = H_PRIVILEGE;
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index d268e20..0c44b3c 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -305,6 +305,7 @@  struct CPUState {
     bool unplug;
     bool crash_occurred;
     bool exit_request;
+    /* updates protected by BQL */
     uint32_t interrupt_request;
     int singlestep_enabled;
     int64_t icount_extra;
diff --git a/memory.c b/memory.c
index 58f9269..7a67c05 100644
--- a/memory.c
+++ b/memory.c
@@ -925,6 +925,8 @@  void memory_region_transaction_commit(void)
     AddressSpace *as;
 
     assert(memory_region_transaction_depth);
+    assert(qemu_mutex_iothread_locked());
+
     --memory_region_transaction_depth;
     if (!memory_region_transaction_depth) {
         if (memory_region_update_pending) {
diff --git a/qom/cpu.c b/qom/cpu.c
index c40f774..4817cd4 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -113,9 +113,19 @@  static void cpu_common_get_memory_mapping(CPUState *cpu,
     error_setg(errp, "Obtaining memory mappings is unsupported on this CPU.");
 }
 
+/* Resetting the IRQ comes from across the code base so we take the
+ * BQL here if we need to.  cpu_interrupt assumes it is held.*/
 void cpu_reset_interrupt(CPUState *cpu, int mask)
 {
+    bool need_lock = !qemu_mutex_iothread_locked();
+
+    if (need_lock) {
+        qemu_mutex_lock_iothread();
+    }
     cpu->interrupt_request &= ~mask;
+    if (need_lock) {
+        qemu_mutex_unlock_iothread();
+    }
 }
 
 void cpu_exit(CPUState *cpu)
diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c
index 4dd6a2c..f051a77 100644
--- a/target-i386/smm_helper.c
+++ b/target-i386/smm_helper.c
@@ -18,6 +18,7 @@ 
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "exec/log.h"
@@ -42,11 +43,14 @@  void helper_rsm(CPUX86State *env)
 #define SMM_REVISION_ID 0x00020000
 #endif
 
+/* Called with iothread lock taken */
 void cpu_smm_update(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
     bool smm_enabled = (env->hflags & HF_SMM_MASK);
 
+    g_assert(qemu_mutex_iothread_locked());
+
     if (cpu->smram) {
         memory_region_set_enabled(cpu->smram, smm_enabled);
     }
@@ -333,7 +337,10 @@  void helper_rsm(CPUX86State *env)
     }
     env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK;
     env->hflags &= ~HF_SMM_MASK;
+
+    qemu_mutex_lock_iothread();
     cpu_smm_update(cpu);
+    qemu_mutex_unlock_iothread();
 
     qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n");
     log_cpu_state_mask(CPU_LOG_INT, CPU(cpu), CPU_DUMP_CCOP);
diff --git a/target-s390x/misc_helper.c b/target-s390x/misc_helper.c
index 4df2ec6..5851e4d 100644
--- a/target-s390x/misc_helper.c
+++ b/target-s390x/misc_helper.c
@@ -25,6 +25,7 @@ 
 #include "exec/helper-proto.h"
 #include "sysemu/kvm.h"
 #include "qemu/timer.h"
+#include "qemu/main-loop.h"
 #include "exec/address-spaces.h"
 #ifdef CONFIG_KVM
 #include <linux/kvm.h>
@@ -109,11 +110,13 @@  void program_interrupt(CPUS390XState *env, uint32_t code, int ilen)
 /* SCLP service call */
 uint32_t HELPER(servc)(CPUS390XState *env, uint64_t r1, uint64_t r2)
 {
+    qemu_mutex_lock_iothread();
     int r = sclp_service_call(env, r1, r2);
     if (r < 0) {
         program_interrupt(env, -r, 4);
-        return 0;
+        r = 0;
     }
+    qemu_mutex_unlock_iothread();
     return r;
 }
 
diff --git a/translate-all.c b/translate-all.c
index 1237f3c..23a2170 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -55,6 +55,7 @@ 
 #include "translate-all.h"
 #include "qemu/bitmap.h"
 #include "qemu/timer.h"
+#include "qemu/main-loop.h"
 #include "exec/log.h"
 
 /* #define DEBUG_TB_INVALIDATE */
@@ -1503,7 +1504,9 @@  void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
 }
 
 #ifdef CONFIG_SOFTMMU
-/* len must be <= 8 and start must be a multiple of len */
+/* len must be <= 8 and start must be a multiple of len.
+ * Called via softmmu_template.h, with iothread mutex not held.
+ */
 void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
 {
     PageDesc *p;
@@ -1698,7 +1701,10 @@  void tb_check_watchpoint(CPUState *cpu)
 
 #ifndef CONFIG_USER_ONLY
 /* in deterministic execution mode, instructions doing device I/Os
-   must be at the end of the TB */
+ * must be at the end of the TB.
+ *
+ * Called by softmmu_template.h, with iothread mutex not held.
+ */
 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
 {
 #if defined(TARGET_MIPS) || defined(TARGET_SH4)
@@ -1910,6 +1916,7 @@  void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf)
 
 void cpu_interrupt(CPUState *cpu, int mask)
 {
+    g_assert(qemu_mutex_iothread_locked());
     cpu->interrupt_request |= mask;
     cpu->tcg_exit_req = 1;
 }
diff --git a/translate-common.c b/translate-common.c
index 5e989cd..d504dd0 100644
--- a/translate-common.c
+++ b/translate-common.c
@@ -21,6 +21,7 @@ 
 #include "qemu-common.h"
 #include "qom/cpu.h"
 #include "sysemu/cpus.h"
+#include "qemu/main-loop.h"
 
 uintptr_t qemu_real_host_page_size;
 intptr_t qemu_real_host_page_mask;
@@ -30,6 +31,7 @@  intptr_t qemu_real_host_page_mask;
 static void tcg_handle_interrupt(CPUState *cpu, int mask)
 {
     int old_mask;
+    g_assert(qemu_mutex_iothread_locked());
 
     old_mask = cpu->interrupt_request;
     cpu->interrupt_request |= mask;
@@ -40,17 +42,16 @@  static void tcg_handle_interrupt(CPUState *cpu, int mask)
      */
     if (!qemu_cpu_is_self(cpu)) {
         qemu_cpu_kick(cpu);
-        return;
-    }
-
-    if (use_icount) {
-        cpu->icount_decr.u16.high = 0xffff;
-        if (!cpu->can_do_io
-            && (mask & ~old_mask) != 0) {
-            cpu_abort(cpu, "Raised interrupt while not in I/O function");
-        }
     } else {
-        cpu->tcg_exit_req = 1;
+        if (use_icount) {
+            cpu->icount_decr.u16.high = 0xffff;
+            if (!cpu->can_do_io
+                && (mask & ~old_mask) != 0) {
+                cpu_abort(cpu, "Raised interrupt while not in I/O function");
+            }
+        } else {
+            cpu->tcg_exit_req = 1;
+        }
     }
 }