@@ -1313,6 +1313,8 @@ static void prepare_icount_for_run(CPUState *cpu)
insns_left = MIN(0xffff, cpu->icount_budget);
cpu->icount_decr.u16.low = insns_left;
cpu->icount_extra = cpu->icount_budget - insns_left;
+
+ replay_mutex_lock();
}
}
@@ -1328,6 +1330,8 @@ static void process_icount_data(CPUState *cpu)
cpu->icount_budget = 0;
replay_account_executed_instructions();
+
+ replay_mutex_unlock();
}
}
@@ -1414,6 +1418,7 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
cpu->exit_request = 1;
while (1) {
+ replay_mutex_lock();
qemu_mutex_lock_iothread();
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
@@ -1426,6 +1431,8 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
qemu_mutex_unlock_iothread();
+ replay_mutex_unlock();
+
if (!cpu) {
cpu = first_cpu;
}
@@ -1732,12 +1739,21 @@ void pause_all_vcpus(void)
}
}
+ /* We need to drop the replay_lock so any vCPU threads woken up
+ * can finish their replay tasks
+ */
+ replay_mutex_unlock();
+
while (!all_vcpus_paused()) {
qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
CPU_FOREACH(cpu) {
qemu_cpu_kick(cpu);
}
}
+
+ qemu_mutex_unlock_iothread();
+ replay_mutex_lock();
+ qemu_mutex_lock_iothread();
}
void cpu_resume(CPUState *cpu)
@@ -49,6 +49,28 @@ Modifications of qemu include:
* recording/replaying user input (mouse and keyboard)
* adding internal checkpoints for cpu and io synchronization
+Locking and thread synchronisation
+----------------------------------
+
+Previously the synchronisation of the main thread and the vCPU thread
+was ensured by the holding of the BQL. However the trend has been to
+reduce the time the BQL was held across the system including under TCG
+system emulation. As it is important that batches of events are kept
+in sequence (e.g. expiring timers and checkpoints in the main thread
+while instruction checkpoints are written by the vCPU thread) we need
+another lock to keep things in lock-step. This role is now handled by
+the replay_mutex_lock. It used to be held only for each event being
+written but now it is held for a whole execution period. This results
+in a deterministic ping-pong between the two main threads.
+
+As the BQL is now a finer grained lock than the replay_lock it is almost
+certainly a bug, and a source of deadlocks, to take the
+replay_mutex_lock while the BQL is held. This is enforced by an assert.
+While the unlocks are usually in the reverse order, this is not
+necessary; you can drop the replay_lock while holding the BQL, without
+doing a more complicated unlock_iothread/replay_unlock/lock_iothread
+sequence.
+
Non-deterministic events
------------------------
@@ -63,6 +63,8 @@ bool replay_mutex_locked(void);
/* Replay process control functions */
+/*! Enables and take replay locks (even if we don't use it) */
+void replay_init_locks(void);
/*! Enables recording or saving event log with specified parameters */
void replay_configure(struct QemuOpts *opts);
/*! Initializes timers used for snapshotting and enables events recording */
@@ -96,25 +96,24 @@ void *replay_event_char_read_load(void)
void replay_char_write_event_save(int res, int offset)
{
+ g_assert(replay_mutex_locked());
+
replay_save_instructions();
- replay_mutex_lock();
replay_put_event(EVENT_CHAR_WRITE);
replay_put_dword(res);
replay_put_dword(offset);
- replay_mutex_unlock();
}
void replay_char_write_event_load(int *res, int *offset)
{
+ g_assert(replay_mutex_locked());
+
replay_account_executed_instructions();
- replay_mutex_lock();
if (replay_next_event_is(EVENT_CHAR_WRITE)) {
*res = replay_get_dword();
*offset = replay_get_dword();
replay_finish_event();
- replay_mutex_unlock();
} else {
- replay_mutex_unlock();
error_report("Missing character write event in the replay log");
exit(1);
}
@@ -122,23 +121,21 @@ void replay_char_write_event_load(int *res, int *offset)
int replay_char_read_all_load(uint8_t *buf)
{
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
+
if (replay_next_event_is(EVENT_CHAR_READ_ALL)) {
size_t size;
int res;
replay_get_array(buf, &size);
replay_finish_event();
- replay_mutex_unlock();
res = (int)size;
assert(res >= 0);
return res;
} else if (replay_next_event_is(EVENT_CHAR_READ_ALL_ERROR)) {
int res = replay_get_dword();
replay_finish_event();
- replay_mutex_unlock();
return res;
} else {
- replay_mutex_unlock();
error_report("Missing character read all event in the replay log");
exit(1);
}
@@ -146,19 +143,17 @@ int replay_char_read_all_load(uint8_t *buf)
void replay_char_read_all_save_error(int res)
{
+ g_assert(replay_mutex_locked());
assert(res < 0);
replay_save_instructions();
- replay_mutex_lock();
replay_put_event(EVENT_CHAR_READ_ALL_ERROR);
replay_put_dword(res);
- replay_mutex_unlock();
}
void replay_char_read_all_save_buf(uint8_t *buf, int offset)
{
+ g_assert(replay_mutex_locked());
replay_save_instructions();
- replay_mutex_lock();
replay_put_event(EVENT_CHAR_READ_ALL);
replay_put_array(buf, offset);
- replay_mutex_unlock();
}
@@ -79,16 +79,14 @@ bool replay_has_events(void)
void replay_flush_events(void)
{
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
+
while (!QTAILQ_EMPTY(&events_list)) {
Event *event = QTAILQ_FIRST(&events_list);
- replay_mutex_unlock();
replay_run_event(event);
- replay_mutex_lock();
QTAILQ_REMOVE(&events_list, event, events);
g_free(event);
}
- replay_mutex_unlock();
}
void replay_disable_events(void)
@@ -102,14 +100,14 @@ void replay_disable_events(void)
void replay_clear_events(void)
{
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
+
while (!QTAILQ_EMPTY(&events_list)) {
Event *event = QTAILQ_FIRST(&events_list);
QTAILQ_REMOVE(&events_list, event, events);
g_free(event);
}
- replay_mutex_unlock();
}
/*! Adds specified async event to the queue */
@@ -136,9 +134,8 @@ void replay_add_event(ReplayAsyncEventKind event_kind,
event->opaque2 = opaque2;
event->id = id;
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
QTAILQ_INSERT_TAIL(&events_list, event, events);
- replay_mutex_unlock();
}
void replay_bh_schedule_event(QEMUBH *bh)
@@ -210,10 +207,7 @@ void replay_save_events(int checkpoint)
while (!QTAILQ_EMPTY(&events_list)) {
Event *event = QTAILQ_FIRST(&events_list);
replay_save_event(event, checkpoint);
-
- replay_mutex_unlock();
replay_run_event(event);
- replay_mutex_lock();
QTAILQ_REMOVE(&events_list, event, events);
g_free(event);
}
@@ -299,9 +293,7 @@ void replay_read_events(int checkpoint)
}
replay_finish_event();
read_event_kind = -1;
- replay_mutex_unlock();
replay_run_event(event);
- replay_mutex_lock();
g_free(event);
}
@@ -174,30 +174,43 @@ static __thread bool replay_locked;
void replay_mutex_init(void)
{
qemu_mutex_init(&lock);
+ /* Hold the mutex while we start-up */
+ qemu_mutex_lock(&lock);
+ replay_locked = true;
}
-void replay_mutex_destroy(void)
+bool replay_mutex_locked(void)
{
- qemu_mutex_destroy(&lock);
+ return replay_locked;
}
-bool replay_mutex_locked(void)
+void replay_mutex_destroy(void)
{
- return replay_locked;
+ if (replay_mutex_locked()) {
+ qemu_mutex_unlock(&lock);
+ }
+ qemu_mutex_destroy(&lock);
}
+/* Ordering constraints, replay_lock must be taken before BQL */
void replay_mutex_lock(void)
{
- g_assert(!replay_mutex_locked());
- qemu_mutex_lock(&lock);
- replay_locked = true;
+ if (replay_mode != REPLAY_MODE_NONE) {
+ g_assert(!qemu_mutex_iothread_locked());
+ g_assert(!replay_mutex_locked());
+ qemu_mutex_lock(&lock);
+ replay_locked = true;
+ }
}
+/* BQL can't be held when releasing the replay_lock */
void replay_mutex_unlock(void)
{
- g_assert(replay_mutex_locked());
- replay_locked = false;
- qemu_mutex_unlock(&lock);
+ if (replay_mode != REPLAY_MODE_NONE) {
+ g_assert(replay_mutex_locked());
+ replay_locked = false;
+ qemu_mutex_unlock(&lock);
+ }
}
/*! Saves cached instructions. */
@@ -17,13 +17,13 @@
int64_t replay_save_clock(ReplayClockKind kind, int64_t clock)
{
- replay_save_instructions();
if (replay_file) {
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
+
+ replay_save_instructions();
replay_put_event(EVENT_CLOCK + kind);
replay_put_qword(clock);
- replay_mutex_unlock();
}
return clock;
@@ -46,16 +46,16 @@ void replay_read_next_clock(ReplayClockKind kind)
/*! Reads next clock event from the input. */
int64_t replay_read_clock(ReplayClockKind kind)
{
+ g_assert(replay_file && replay_mutex_locked());
+
replay_account_executed_instructions();
if (replay_file) {
int64_t ret;
- replay_mutex_lock();
if (replay_next_event_is(EVENT_CLOCK + kind)) {
replay_read_next_clock(kind);
}
ret = replay_state.cached_clock[kind];
- replay_mutex_unlock();
return ret;
}
@@ -81,7 +81,7 @@ int replay_get_instructions(void)
void replay_account_executed_instructions(void)
{
if (replay_mode == REPLAY_MODE_PLAY) {
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
if (replay_state.instructions_count > 0) {
int count = (int)(replay_get_current_step()
- replay_state.current_step);
@@ -100,24 +100,22 @@ void replay_account_executed_instructions(void)
qemu_notify_event();
}
}
- replay_mutex_unlock();
}
}
bool replay_exception(void)
{
+
if (replay_mode == REPLAY_MODE_RECORD) {
+ g_assert(replay_mutex_locked());
replay_save_instructions();
- replay_mutex_lock();
replay_put_event(EVENT_EXCEPTION);
- replay_mutex_unlock();
return true;
} else if (replay_mode == REPLAY_MODE_PLAY) {
+ g_assert(replay_mutex_locked());
bool res = replay_has_exception();
if (res) {
- replay_mutex_lock();
replay_finish_event();
- replay_mutex_unlock();
}
return res;
}
@@ -129,10 +127,9 @@ bool replay_has_exception(void)
{
bool res = false;
if (replay_mode == REPLAY_MODE_PLAY) {
+ g_assert(replay_mutex_locked());
replay_account_executed_instructions();
- replay_mutex_lock();
res = replay_next_event_is(EVENT_EXCEPTION);
- replay_mutex_unlock();
}
return res;
@@ -141,17 +138,15 @@ bool replay_has_exception(void)
bool replay_interrupt(void)
{
if (replay_mode == REPLAY_MODE_RECORD) {
+ g_assert(replay_mutex_locked());
replay_save_instructions();
- replay_mutex_lock();
replay_put_event(EVENT_INTERRUPT);
- replay_mutex_unlock();
return true;
} else if (replay_mode == REPLAY_MODE_PLAY) {
+ g_assert(replay_mutex_locked());
bool res = replay_has_interrupt();
if (res) {
- replay_mutex_lock();
replay_finish_event();
- replay_mutex_unlock();
}
return res;
}
@@ -163,10 +158,9 @@ bool replay_has_interrupt(void)
{
bool res = false;
if (replay_mode == REPLAY_MODE_PLAY) {
+ g_assert(replay_mutex_locked());
replay_account_executed_instructions();
- replay_mutex_lock();
res = replay_next_event_is(EVENT_INTERRUPT);
- replay_mutex_unlock();
}
return res;
}
@@ -174,9 +168,8 @@ bool replay_has_interrupt(void)
void replay_shutdown_request(ShutdownCause cause)
{
if (replay_mode == REPLAY_MODE_RECORD) {
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
replay_put_event(EVENT_SHUTDOWN + cause);
- replay_mutex_unlock();
}
}
@@ -190,9 +183,9 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint)
return true;
}
- replay_mutex_lock();
if (replay_mode == REPLAY_MODE_PLAY) {
+ g_assert(replay_mutex_locked());
if (replay_next_event_is(EVENT_CHECKPOINT + checkpoint)) {
replay_finish_event();
} else if (replay_state.data_kind != EVENT_ASYNC) {
@@ -205,15 +198,20 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint)
checkpoint were processed */
res = replay_state.data_kind != EVENT_ASYNC;
} else if (replay_mode == REPLAY_MODE_RECORD) {
+ g_assert(replay_mutex_locked());
replay_put_event(EVENT_CHECKPOINT + checkpoint);
replay_save_events(checkpoint);
res = true;
}
out:
- replay_mutex_unlock();
return res;
}
+void replay_init_locks(void)
+{
+ replay_mutex_init();
+}
+
static void replay_enable(const char *fname, int mode)
{
const char *fmode = NULL;
@@ -233,8 +231,6 @@ static void replay_enable(const char *fname, int mode)
atexit(replay_finish);
- replay_mutex_init();
-
replay_file = fopen(fname, fmode);
if (replay_file == NULL) {
fprintf(stderr, "Replay: open %s: %s\n", fname, strerror(errno));
@@ -274,6 +270,8 @@ void replay_configure(QemuOpts *opts)
Location loc;
if (!opts) {
+ /* we no longer need this lock */
+ replay_mutex_destroy();
return;
}
@@ -29,6 +29,7 @@
#include "qemu/sockets.h" // struct in_addr needed for libslirp.h
#include "sysemu/qtest.h"
#include "sysemu/cpus.h"
+#include "sysemu/replay.h"
#include "slirp/libslirp.h"
#include "qemu/main-loop.h"
#include "block/aio.h"
@@ -245,18 +246,21 @@ static int os_host_main_loop_wait(int64_t timeout)
timeout = SCALE_MS;
}
+
if (timeout) {
spin_counter = 0;
- qemu_mutex_unlock_iothread();
} else {
spin_counter++;
}
+ qemu_mutex_unlock_iothread();
+
+ replay_mutex_unlock();
ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout);
- if (timeout) {
- qemu_mutex_lock_iothread();
- }
+ replay_mutex_lock();
+
+ qemu_mutex_lock_iothread();
glib_pollfds_poll();
@@ -463,8 +467,13 @@ static int os_host_main_loop_wait(int64_t timeout)
poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout);
qemu_mutex_unlock_iothread();
+
+ replay_mutex_unlock();
+
g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w->num, poll_timeout_ns);
+ replay_mutex_lock();
+
qemu_mutex_lock_iothread();
if (g_poll_ret > 0) {
for (i = 0; i < w->num; i++) {
@@ -3091,6 +3091,8 @@ int main(int argc, char **argv, char **envp)
qemu_init_cpu_list();
qemu_init_cpu_loop();
+
+ replay_init_locks();
qemu_mutex_lock_iothread();
atexit(qemu_run_exit_notifiers);