diff mbox series

[v9,11/13] accel/tcg: adding integration with linux perf

Message ID 20191007152839.30804-12-alex.bennee@linaro.org
State New
Headers show
Series TCG code quality tracking and perf integration | expand

Commit Message

Alex Bennée Oct. 7, 2019, 3:28 p.m. UTC
From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>


This commit adds support to Linux Perf in order to be able to analyze
qemu jitted code and also to able to see the TBs PC in it.

When using "-perf" qemu creates a jitdump file in the current working
directory. You then integrate the file using perf inject.

Example of use:
 perf record -k 1 qemu-x86_64 -perf ./a.out
 perf inject -j -i perf.data -o perf.data.jitted
 perf report -i perf.data.jitted

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>

Message-Id: <20190830121903.17585-2-vandersonmr2@gmail.com>
[AJB: rebase and various fixes]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>


---
AJB:
   - use get_clock instead of repeating ourselves
   - add lock for writing to file
   - use canonical kernel.org links
   - g_autoptr/g_autofree strings
---
 accel/tcg/Makefile.objs      |   2 +-
 accel/tcg/perf/Makefile.objs |   1 +
 accel/tcg/perf/jitdump.c     | 194 +++++++++++++++++++++++++++++++++++
 accel/tcg/perf/jitdump.h     |  36 +++++++
 accel/tcg/translate-all.c    |  14 +++
 docs/devel/tcg.rst           |  15 +++
 include/qemu-common.h        |   3 +
 linux-user/main.c            |   7 ++
 os-posix.c                   |   5 +
 qemu-options.hx              |  11 ++
 10 files changed, 287 insertions(+), 1 deletion(-)
 create mode 100644 accel/tcg/perf/Makefile.objs
 create mode 100644 accel/tcg/perf/jitdump.c
 create mode 100644 accel/tcg/perf/jitdump.h

-- 
2.20.1

Comments

Richard Henderson Oct. 8, 2019, 7:33 p.m. UTC | #1
On 10/7/19 11:28 AM, Alex Bennée wrote:
> +static uint32_t get_e_machine(void)

> +{

> +    uint32_t e_machine = EM_NONE;

> +    Elf64_Ehdr elf_header;


Not ideal, as this appears to not work on 32-bit hosts, but the two structures
do match up within the first 24 bytes, in which this is located.

That said, this value is present within tcg/host/tcg-target.inc.c as
ELF_HOST_MACHINE.  So we really don't have to play /proc/self/exec games.

> +void start_jitdump_file(void)

> +{

> +    g_autofree gchar *dumpfile_name = g_strdup_printf("./jit-%d.dump", getpid());

> +    dumpfile = fopen(dumpfile_name, "w+");

> +

> +    /* 'Perf record' saves mmaped files during the execution of a program and

> +     * 'perf inject' iterate over them to reconstruct all used/executed binary.

> +     * So, we create a mmap with the path of our jitdump that is processed

> +     * and used by 'perf inject' to reconstruct jitted binaries.

> +     */

> +    perf_marker = mmap(NULL, sysconf(_SC_PAGESIZE),

> +                          PROT_READ | PROT_EXEC,

> +                          MAP_PRIVATE,

> +                          fileno(dumpfile), 0);


(1) sysconf(_SC_PAGESIZE) is qemu_real_host_page_size.
(2) This is a page-sized mapping of a new, zero-sized file?
    I assume this mapping event gets logged, and that it its
    only purpose?
(3) I really need to read the kernel docs...

> +void append_load_in_jitdump_file(TranslationBlock *tb)

> +{

> +    gchar *func_name = g_strdup_printf("TB virt:0x"TARGET_FMT_lx, tb->pc);

> +

> +    /* Serialise the writing of the dump file */

> +    qemu_mutex_lock(&dumpfile_lock);

> +

> +    struct jr_code_load load_event;

> +    load_event.p.id = JIT_CODE_LOAD;

> +    load_event.p.total_size =

> +        sizeof(struct jr_code_load) + func_name->len + 1 + tb->tc.size;


How does a "gchar *func_name" have ->len?  Did this used to be GString, but a
last-minute change means it no longer compiles?

> +    fflush(dumpfile);


Why fflushing all of the time?  Surely the file contents doesn't matter until
after the final close.

> +    qemu_mutex_unlock(&dumpfile_lock);


Why a separate qemu locking instead of using stdio's own locking (flockfile).

> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c

> index 871d91d559..3fafb656e7 100644

> --- a/accel/tcg/translate-all.c

> +++ b/accel/tcg/translate-all.c

> @@ -58,6 +58,10 @@

>  #include "sysemu/cpus.h"

>  #include "sysemu/tcg.h"

>  

> +#ifdef __linux__

> +#include "perf/jitdump.h"

> +#endif


Why the ifdefs?  We're not dependent on other headers are we?
Not that there's a "perf" on other hosts, but AFACT it should
at least compile...


r~
diff mbox series

Patch

diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs
index 49ffe81b5d..6a1ad59199 100644
--- a/accel/tcg/Makefile.objs
+++ b/accel/tcg/Makefile.objs
@@ -3,6 +3,6 @@  obj-$(CONFIG_SOFTMMU) += cputlb.o
 obj-y += tcg-runtime.o tcg-runtime-gvec.o
 obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
 obj-y += translator.o tb-stats.o
-
+obj-y += perf/
 obj-$(CONFIG_USER_ONLY) += user-exec.o
 obj-$(call lnot,$(CONFIG_SOFTMMU)) += user-exec-stub.o
diff --git a/accel/tcg/perf/Makefile.objs b/accel/tcg/perf/Makefile.objs
new file mode 100644
index 0000000000..ca9abb4f48
--- /dev/null
+++ b/accel/tcg/perf/Makefile.objs
@@ -0,0 +1 @@ 
+obj-$(CONFIG_LINUX) += jitdump.o
diff --git a/accel/tcg/perf/jitdump.c b/accel/tcg/perf/jitdump.c
new file mode 100644
index 0000000000..e1d6f2214e
--- /dev/null
+++ b/accel/tcg/perf/jitdump.c
@@ -0,0 +1,194 @@ 
+/*
+ * This code implements an interface to create and fill jitdump files. These files
+ * store information used by Linux Perf to enhance the presentation of jitted
+ * code and to allow the disassembly of jitted code.
+ *
+ * The jitdump file specification can be found in the Linux Kernel Source tree:
+ *    tools/perf/Documentation/jitdump-specification.txt
+ *
+ * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/tools/perf/Documentation/jitdump-specification.txt
+ *
+ * Copyright (c) 2019 Vanderson M. do Rosario
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+
+#include <sys/syscall.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <elf.h>
+
+#include "disas/disas.h"
+#include "jitdump.h"
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "exec/tb-stats.h"
+
+struct jitheader {
+    uint32_t magic;     /* characters "jItD" */
+    uint32_t version;   /* header version */
+    uint32_t total_size;/* total size of header */
+    uint32_t elf_mach;  /* elf mach target */
+    uint32_t pad1;      /* reserved */
+    uint32_t pid;       /* JIT process id */
+    uint64_t timestamp; /* timestamp */
+    uint64_t flags;     /* flags */
+};
+
+enum jit_record_type {
+    JIT_CODE_LOAD       = 0,
+    JIT_CODE_MOVE       = 1,
+    JIT_CODE_DEBUG_INFO = 2,
+    JIT_CODE_CLOSE      = 3,
+
+    JIT_CODE_MAX,
+};
+
+/* record prefix (mandatory in each record) */
+struct jr_prefix {
+    uint32_t id;
+    uint32_t total_size;
+    uint64_t timestamp;
+};
+
+struct jr_code_load {
+    struct jr_prefix p;
+
+    uint32_t pid;
+    uint32_t tid;
+    uint64_t vma;
+    uint64_t code_addr;
+    uint64_t code_size;
+    uint64_t code_index;
+};
+
+struct jr_code_close {
+    struct jr_prefix p;
+};
+
+struct jr_code_move {
+    struct jr_prefix p;
+
+    uint32_t pid;
+    uint32_t tid;
+    uint64_t vma;
+    uint64_t old_code_addr;
+    uint64_t new_code_addr;
+    uint64_t code_size;
+    uint64_t code_index;
+};
+
+FILE *dumpfile;
+QemuMutex dumpfile_lock;
+void *perf_marker;
+
+static uint32_t get_e_machine(void)
+{
+    uint32_t e_machine = EM_NONE;
+    Elf64_Ehdr elf_header;
+    FILE *exe = fopen("/proc/self/exe", "r");
+
+    if (exe == NULL) {
+        return e_machine;
+    }
+
+    if (fread(&elf_header, sizeof(Elf64_Ehdr), 1, exe) != 1) {
+        goto end;
+    }
+
+    e_machine = elf_header.e_machine;
+
+end:
+    fclose(exe);
+    return e_machine;
+}
+
+void start_jitdump_file(void)
+{
+    g_autofree gchar *dumpfile_name = g_strdup_printf("./jit-%d.dump", getpid());
+    dumpfile = fopen(dumpfile_name, "w+");
+
+    /* 'Perf record' saves mmaped files during the execution of a program and
+     * 'perf inject' iterate over them to reconstruct all used/executed binary.
+     * So, we create a mmap with the path of our jitdump that is processed
+     * and used by 'perf inject' to reconstruct jitted binaries.
+     */
+    perf_marker = mmap(NULL, sysconf(_SC_PAGESIZE),
+                          PROT_READ | PROT_EXEC,
+                          MAP_PRIVATE,
+                          fileno(dumpfile), 0);
+
+    if (perf_marker == MAP_FAILED) {
+        printf("Failed to create mmap marker file for perf %d\n", fileno(dumpfile));
+        fclose(dumpfile);
+        return;
+    }
+
+    struct jitheader header;
+    header.magic = 0x4A695444;
+    header.version = 1;
+    header.elf_mach = get_e_machine();
+    header.total_size = sizeof(struct jitheader);
+    header.pid = getpid();
+    header.timestamp = get_clock();
+    header.flags = 0;
+
+    fwrite(&header, header.total_size, 1, dumpfile);
+
+    fflush(dumpfile);
+
+    qemu_mutex_init(&dumpfile_lock);
+}
+
+void append_load_in_jitdump_file(TranslationBlock *tb)
+{
+    gchar *func_name = g_strdup_printf("TB virt:0x"TARGET_FMT_lx, tb->pc);
+
+    /* Serialise the writing of the dump file */
+    qemu_mutex_lock(&dumpfile_lock);
+
+    struct jr_code_load load_event;
+    load_event.p.id = JIT_CODE_LOAD;
+    load_event.p.total_size =
+        sizeof(struct jr_code_load) + func_name->len + 1 + tb->tc.size;
+    load_event.p.timestamp = get_clock();
+    load_event.pid = getpid();
+    load_event.tid = syscall(SYS_gettid);
+    load_event.vma = tb->pc;
+    load_event.code_addr = (uint64_t) tb->tc.ptr;
+    load_event.code_size = tb->tc.size;
+    load_event.code_index = tb->pc;
+
+    fwrite(&load_event, sizeof(struct jr_code_load), 1, dumpfile);
+    fwrite(func_name->str, func_name->len + 1, 1, dumpfile);
+    fwrite(tb->tc.ptr, tb->tc.size, 1, dumpfile);
+
+    g_free(func_name);
+    fflush(dumpfile);
+
+    qemu_mutex_unlock(&dumpfile_lock);
+}
+
+void close_jitdump_file(void)
+{
+    fclose(dumpfile);
+    if (perf_marker != MAP_FAILED) {
+        munmap(perf_marker, sysconf(_SC_PAGESIZE));
+    }
+}
+
+bool is_jitdump_enabled;
+
+void enable_jitdump(void)
+{
+    is_jitdump_enabled = true;
+}
+
+bool jitdump_enabled(void)
+{
+    return is_jitdump_enabled;
+}
diff --git a/accel/tcg/perf/jitdump.h b/accel/tcg/perf/jitdump.h
new file mode 100644
index 0000000000..5d6df3ec91
--- /dev/null
+++ b/accel/tcg/perf/jitdump.h
@@ -0,0 +1,36 @@ 
+/*
+ * QEMU Linux Perf Support
+ *
+ * Copyright (c) 2019 Vanderson M. do Rosario (vandersonmr2@gmail.com)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef JITDUMP_H
+#define JITDUMP_H
+
+#include "exec/exec-all.h"
+
+void start_jitdump_file(void);
+
+void append_load_in_jitdump_file(TranslationBlock *tb);
+void append_move_in_jitdump_file(TranslationBlock *tb);
+
+void close_jitdump_file(void);
+
+#endif
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 871d91d559..3fafb656e7 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -58,6 +58,10 @@ 
 #include "sysemu/cpus.h"
 #include "sysemu/tcg.h"
 
+#ifdef __linux__
+#include "perf/jitdump.h"
+#endif
+
 /* #define DEBUG_TB_INVALIDATE */
 /* #define DEBUG_TB_FLUSH */
 /* make various TB consistency checks */
@@ -1167,6 +1171,11 @@  void tcg_exec_init(unsigned long tb_size)
     cpu_gen_init();
     page_init();
     tb_htable_init();
+#ifdef __linux__
+    if (jitdump_enabled()) {
+        start_jitdump_file();
+    }
+#endif
     code_gen_alloc(tb_size);
 #if defined(CONFIG_SOFTMMU)
     /* There's no guest base to take into account, so go ahead and
@@ -1978,6 +1987,11 @@  TranslationBlock *tb_gen_code(CPUState *cpu,
         return existing_tb;
     }
     tcg_tb_insert(tb);
+#ifdef __linux__
+    if (jitdump_enabled()) {
+        append_load_in_jitdump_file(tb);
+    }
+#endif
     return tb;
 }
 
diff --git a/docs/devel/tcg.rst b/docs/devel/tcg.rst
index 4956a30a4e..ab5639cf67 100644
--- a/docs/devel/tcg.rst
+++ b/docs/devel/tcg.rst
@@ -109,3 +109,18 @@  memory areas instead calls out to C code for device emulation.
 Finally, the MMU helps tracking dirty pages and pages pointed to by
 translation blocks.
 
+Profiling JITted code
+---------------------
+
+The Linux `perf` tool will treat all JITed code as a single block as
+unlike the main code it can't use debug information to link individual
+program counter samples with larger functions. To overcome this
+limitation you can use the `--perf` option to generate a map file.
+This needs to be integrated with the `perf.data` file before the final
+report can be viewed.
+
+.. code::
+
+  perf record -k 1 $QEMU --perf $REMAINING_ARGS
+  perf inject -i perf.data -j -o perf.data.jitted
+  perf report -i perf.data.jitted
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 8d84db90b0..a16e0e7eb6 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -129,4 +129,7 @@  void page_size_init(void);
  * returned. */
 bool dump_in_progress(void);
 
+void enable_jitdump(void);
+bool jitdump_enabled(void);
+
 #endif
diff --git a/linux-user/main.c b/linux-user/main.c
index 560d053f72..18f771c0be 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -377,6 +377,11 @@  static void handle_arg_strace(const char *arg)
     do_strace = 1;
 }
 
+static void handle_arg_perf(const char *arg)
+{
+    enable_jitdump();
+}
+
 static void handle_arg_version(const char *arg)
 {
     printf("qemu-" TARGET_NAME " version " QEMU_FULL_VERSION
@@ -449,6 +454,8 @@  static const struct qemu_argument arg_table[] = {
      "",           "Seed for pseudo-random number generator"},
     {"trace",      "QEMU_TRACE",       true,  handle_arg_trace,
      "",           "[[enable=]<pattern>][,events=<file>][,file=<file>]"},
+    {"perf",      "QEMU_PERF",         false, handle_arg_perf,
+     "",           "dump jitdump files to help linux perf JIT code visualization"},
     {"version",    "QEMU_VERSION",     false, handle_arg_version,
      "",           "display version information and exit"},
 #if defined(TARGET_XTENSA)
diff --git a/os-posix.c b/os-posix.c
index 86cffd2c7d..36ea3a08ca 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -191,6 +191,11 @@  int os_parse_cmd_args(int index, const char *optarg)
     case QEMU_OPTION_enablefips:
         fips_set_state(true);
         break;
+#if defined(CONFIG_TCG) && defined (CLOCK_MONOTONIC)
+    case QEMU_OPTION_perf:
+        enable_jitdump();
+        break;
+#endif
 #endif
     default:
         return -1;
diff --git a/qemu-options.hx b/qemu-options.hx
index 2a04ca6ac5..2924032c3b 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4160,6 +4160,17 @@  STEXI
 Enable FIPS 140-2 compliance mode.
 ETEXI
 
+#ifdef __linux__
+DEF("perf", 0, QEMU_OPTION_perf,
+    "-perf  dump jitdump files to help linux perf JIT code visualization\n",
+    QEMU_ARCH_ALL)
+#endif
+STEXI
+@item -perf
+@findex -perf
+Dumps jitdump files to help linux perf JIT code visualization
+ETEXI
+
 HXCOMM Deprecated by -accel tcg
 DEF("no-kvm", 0, QEMU_OPTION_no_kvm, "", QEMU_ARCH_I386)