diff mbox series

[v2,3/3] target-i386: post memory failure event to uplayer

Message ID 20200922095630.394893-4-pizhenwei@bytedance.com
State New
Headers show
Series add MEMORY_FAILURE event | expand

Commit Message

zhenwei pi Sept. 22, 2020, 9:56 a.m. UTC
Post memory failure event to uplayer to handle hardware memory
corrupted event. Rather than simple QEMU log, QEMU could report more
effective message to uplayer. For example, guest crashes by MCE,
selecting another host server is a better choice.

Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
---
 target/i386/helper.c | 15 +++++++++++++++
 target/i386/kvm.c    |  7 ++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

Comments

Philippe Mathieu-Daudé Sept. 22, 2020, 10:30 a.m. UTC | #1
On 9/22/20 11:56 AM, zhenwei pi wrote:
> Post memory failure event to uplayer to handle hardware memory
> corrupted event. Rather than simple QEMU log, QEMU could report more
> effective message to uplayer. For example, guest crashes by MCE,
> selecting another host server is a better choice.
> 
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
> ---
>  target/i386/helper.c | 15 +++++++++++++++
>  target/i386/kvm.c    |  7 ++++++-
>  2 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/target/i386/helper.c b/target/i386/helper.c
> index 0c7fd32491..47823c29e4 100644
> --- a/target/i386/helper.c
> +++ b/target/i386/helper.c
> @@ -18,6 +18,7 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "qapi/qapi-events-run-state.h"
>  #include "cpu.h"
>  #include "exec/exec-all.h"
>  #include "qemu/qemu-print.h"
> @@ -858,6 +859,7 @@ static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
>      CPUX86State *cenv = &cpu->env;
>      uint64_t *banks = cenv->mce_banks + 4 * params->bank;
>      char msg[64];
> +    MemoryFailureFlags mf_flags = {0};
>      bool need_reset = false;
>  
>      cpu_synchronize_state(cs);
> @@ -869,6 +871,12 @@ static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
>      if (!(params->flags & MCE_INJECT_UNCOND_AO)
>          && !(params->status & MCI_STATUS_AR)
>          && (cenv->mcg_status & MCG_STATUS_MCIP)) {
> +        mf_flags.has_action_required = true;
> +        mf_flags.action_required = false;
> +        mf_flags.has_recursive = true;
> +        mf_flags.recursive = true;
> +        qapi_event_send_memory_failure(MEMORY_FAILURE_ACTION_GUEST_MCE_INJECT,
> +                                       true, &mf_flags);

Can you extract a function such:

static void emit_guest_mce_failure(bool action_required, bool recursive)
{
  ...
}

To use as:

           emit_guest_mce_failure(true, true);

>          return;
>      }
>  
> @@ -909,6 +917,8 @@ static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
>          }
>  
>          if (need_reset) {
> +            qapi_event_send_memory_failure(
> +                 MEMORY_FAILURE_ACTION_GUEST_MCE_FATAL, false, NULL);
>              monitor_printf(params->mon, "%s", msg);
>              qemu_log_mask(CPU_LOG_RESET, "%s\n", msg);
>              qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
> @@ -934,6 +944,11 @@ static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
>      } else {
>          banks[1] |= MCI_STATUS_OVER;
>      }
> +
> +    mf_flags.has_action_required = true;
> +    mf_flags.action_required = !!(params->status & MCI_STATUS_AR);
> +    qapi_event_send_memory_failure(MEMORY_FAILURE_ACTION_GUEST_MCE_INJECT,
> +                                   true, &mf_flags);

And here:

       emit_guest_mce_failure(params->status & MCI_STATUS_AR, false);

>  }
>  
>  void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> index 9efb07e7c8..989889c291 100644
> --- a/target/i386/kvm.c
> +++ b/target/i386/kvm.c
> @@ -14,6 +14,7 @@
>  
>  #include "qemu/osdep.h"
>  #include "qapi/error.h"
> +#include "qapi/qapi-events-run-state.h"
>  #include <sys/ioctl.h>
>  #include <sys/utsname.h>
>  
> @@ -577,6 +578,8 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code)
>  
>  static void hardware_memory_error(void *host_addr)
>  {
> +    qapi_event_send_memory_failure(MEMORY_FAILURE_ACTION_HYPERVISOR_FATAL,
> +                                   false, NULL);
>      error_report("QEMU got Hardware memory error at addr %p", host_addr);
>      exit(1);
>  }
> @@ -631,7 +634,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>          hardware_memory_error(addr);
>      }
>  
> -    /* Hope we are lucky for AO MCE */
> +    /* Hope we are lucky for AO MCE, just notify a event */
> +    qapi_event_send_memory_failure(MEMORY_FAILURE_ACTION_HYPERVISOR_IGNORE,
> +                                   false, NULL);
>  }
>  
>  static void kvm_reset_exception(CPUX86State *env)
>
zhenwei pi Sept. 23, 2020, 3:12 a.m. UTC | #2
On 9/22/20 6:30 PM, Philippe Mathieu-Daudé wrote:
> On 9/22/20 11:56 AM, zhenwei pi wrote:
>> Post memory failure event to uplayer to handle hardware memory
>> corrupted event. Rather than simple QEMU log, QEMU could report more
>> effective message to uplayer. For example, guest crashes by MCE,
>> selecting another host server is a better choice.
>>
>> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
>> ---
>>   target/i386/helper.c | 15 +++++++++++++++
>>   target/i386/kvm.c    |  7 ++++++-
>>   2 files changed, 21 insertions(+), 1 deletion(-)
>>
>> diff --git a/target/i386/helper.c b/target/i386/helper.c
>> index 0c7fd32491..47823c29e4 100644
>> --- a/target/i386/helper.c
>> +++ b/target/i386/helper.c
>> @@ -18,6 +18,7 @@
>>    */
>>   
>>   #include "qemu/osdep.h"
>> +#include "qapi/qapi-events-run-state.h"
>>   #include "cpu.h"
>>   #include "exec/exec-all.h"
>>   #include "qemu/qemu-print.h"
>> @@ -858,6 +859,7 @@ static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
>>       CPUX86State *cenv = &cpu->env;
>>       uint64_t *banks = cenv->mce_banks + 4 * params->bank;
>>       char msg[64];
>> +    MemoryFailureFlags mf_flags = {0};
>>       bool need_reset = false;
>>   
>>       cpu_synchronize_state(cs);
>> @@ -869,6 +871,12 @@ static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
>>       if (!(params->flags & MCE_INJECT_UNCOND_AO)
>>           && !(params->status & MCI_STATUS_AR)
>>           && (cenv->mcg_status & MCG_STATUS_MCIP)) {
>> +        mf_flags.has_action_required = true;
>> +        mf_flags.action_required = false;
>> +        mf_flags.has_recursive = true;
>> +        mf_flags.recursive = true;
>> +        qapi_event_send_memory_failure(MEMORY_FAILURE_ACTION_GUEST_MCE_INJECT,
>> +                                       true, &mf_flags);
> 
> Can you extract a function such:
> 
> static void emit_guest_mce_failure(bool action_required, bool recursive)
> {
>    ...
> }
> 
> To use as:
> 
>             emit_guest_mce_failure(true, true);
> 
>>           return;
>>       }
>>

There are 2 field in struct MemoryFailureFlags currently, maybe more 
fields need to be added in the future, and emit_guest_mce_failure need 
more arguments too. So is it worth wrapping a function now?


>> @@ -909,6 +917,8 @@ static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
>>           }
>>   
>>           if (need_reset) {
>> +            qapi_event_send_memory_failure(
>> +                 MEMORY_FAILURE_ACTION_GUEST_MCE_FATAL, false, NULL);
>>               monitor_printf(params->mon, "%s", msg);
>>               qemu_log_mask(CPU_LOG_RESET, "%s\n", msg);
>>               qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
>> @@ -934,6 +944,11 @@ static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
>>       } else {
>>           banks[1] |= MCI_STATUS_OVER;
>>       }
>> +
>> +    mf_flags.has_action_required = true;
>> +    mf_flags.action_required = !!(params->status & MCI_STATUS_AR);
>> +    qapi_event_send_memory_failure(MEMORY_FAILURE_ACTION_GUEST_MCE_INJECT,
>> +                                   true, &mf_flags);
> 
> And here:
> 
>         emit_guest_mce_failure(params->status & MCI_STATUS_AR, false);
> 
>>   }
>>   
>>   void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
>> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
>> index 9efb07e7c8..989889c291 100644
>> --- a/target/i386/kvm.c
>> +++ b/target/i386/kvm.c
>> @@ -14,6 +14,7 @@
>>   
>>   #include "qemu/osdep.h"
>>   #include "qapi/error.h"
>> +#include "qapi/qapi-events-run-state.h"
>>   #include <sys/ioctl.h>
>>   #include <sys/utsname.h>
>>   
>> @@ -577,6 +578,8 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code)
>>   
>>   static void hardware_memory_error(void *host_addr)
>>   {
>> +    qapi_event_send_memory_failure(MEMORY_FAILURE_ACTION_HYPERVISOR_FATAL,
>> +                                   false, NULL);
>>       error_report("QEMU got Hardware memory error at addr %p", host_addr);
>>       exit(1);
>>   }
>> @@ -631,7 +634,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>>           hardware_memory_error(addr);
>>       }
>>   
>> -    /* Hope we are lucky for AO MCE */
>> +    /* Hope we are lucky for AO MCE, just notify a event */
>> +    qapi_event_send_memory_failure(MEMORY_FAILURE_ACTION_HYPERVISOR_IGNORE,
>> +                                   false, NULL);
>>   }
>>   
>>   static void kvm_reset_exception(CPUX86State *env)
>>
>
diff mbox series

Patch

diff --git a/target/i386/helper.c b/target/i386/helper.c
index 0c7fd32491..47823c29e4 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -18,6 +18,7 @@ 
  */
 
 #include "qemu/osdep.h"
+#include "qapi/qapi-events-run-state.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "qemu/qemu-print.h"
@@ -858,6 +859,7 @@  static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
     CPUX86State *cenv = &cpu->env;
     uint64_t *banks = cenv->mce_banks + 4 * params->bank;
     char msg[64];
+    MemoryFailureFlags mf_flags = {0};
     bool need_reset = false;
 
     cpu_synchronize_state(cs);
@@ -869,6 +871,12 @@  static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
     if (!(params->flags & MCE_INJECT_UNCOND_AO)
         && !(params->status & MCI_STATUS_AR)
         && (cenv->mcg_status & MCG_STATUS_MCIP)) {
+        mf_flags.has_action_required = true;
+        mf_flags.action_required = false;
+        mf_flags.has_recursive = true;
+        mf_flags.recursive = true;
+        qapi_event_send_memory_failure(MEMORY_FAILURE_ACTION_GUEST_MCE_INJECT,
+                                       true, &mf_flags);
         return;
     }
 
@@ -909,6 +917,8 @@  static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
         }
 
         if (need_reset) {
+            qapi_event_send_memory_failure(
+                 MEMORY_FAILURE_ACTION_GUEST_MCE_FATAL, false, NULL);
             monitor_printf(params->mon, "%s", msg);
             qemu_log_mask(CPU_LOG_RESET, "%s\n", msg);
             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
@@ -934,6 +944,11 @@  static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
     } else {
         banks[1] |= MCI_STATUS_OVER;
     }
+
+    mf_flags.has_action_required = true;
+    mf_flags.action_required = !!(params->status & MCI_STATUS_AR);
+    qapi_event_send_memory_failure(MEMORY_FAILURE_ACTION_GUEST_MCE_INJECT,
+                                   true, &mf_flags);
 }
 
 void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 9efb07e7c8..989889c291 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -14,6 +14,7 @@ 
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "qapi/qapi-events-run-state.h"
 #include <sys/ioctl.h>
 #include <sys/utsname.h>
 
@@ -577,6 +578,8 @@  static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code)
 
 static void hardware_memory_error(void *host_addr)
 {
+    qapi_event_send_memory_failure(MEMORY_FAILURE_ACTION_HYPERVISOR_FATAL,
+                                   false, NULL);
     error_report("QEMU got Hardware memory error at addr %p", host_addr);
     exit(1);
 }
@@ -631,7 +634,9 @@  void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
         hardware_memory_error(addr);
     }
 
-    /* Hope we are lucky for AO MCE */
+    /* Hope we are lucky for AO MCE, just notify a event */
+    qapi_event_send_memory_failure(MEMORY_FAILURE_ACTION_HYPERVISOR_IGNORE,
+                                   false, NULL);
 }
 
 static void kvm_reset_exception(CPUX86State *env)