diff mbox series

[02/13] target/ppc: Use atomic load for LQ and LQARX

Message ID 20180626161921.27941-3-richard.henderson@linaro.org
State New
Headers show
Series target/ppc improve atomic operations | expand

Commit Message

Richard Henderson June 26, 2018, 4:19 p.m. UTC
Section 1.4 of the Power ISA v3.0B states that both of these
instructions are single-copy atomic.  As we cannot (yet) issue
128-bit loads within TCG, use the generic helpers provided.

Since TCG cannot (yet) return a 128-bit value, add a slot within
CPUPPCState for returning the high half of a 128-bit return value.
This solution is preferred to the helper assigning to architectural
registers directly, as it avoids clobbering all TCG live values.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 target/ppc/cpu.h        |  3 ++
 target/ppc/helper.h     |  5 +++
 target/ppc/mem_helper.c | 20 ++++++++-
 target/ppc/translate.c  | 93 ++++++++++++++++++++++++++++++-----------
 4 files changed, 95 insertions(+), 26 deletions(-)

-- 
2.17.1

Comments

David Gibson June 28, 2018, 3:49 a.m. UTC | #1
On Tue, Jun 26, 2018 at 09:19:10AM -0700, Richard Henderson wrote:
> Section 1.4 of the Power ISA v3.0B states that both of these

> instructions are single-copy atomic.  As we cannot (yet) issue

> 128-bit loads within TCG, use the generic helpers provided.

> 

> Since TCG cannot (yet) return a 128-bit value, add a slot within

> CPUPPCState for returning the high half of a 128-bit return value.

> This solution is preferred to the helper assigning to architectural

> registers directly, as it avoids clobbering all TCG live values.

> 

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  target/ppc/cpu.h        |  3 ++

>  target/ppc/helper.h     |  5 +++

>  target/ppc/mem_helper.c | 20 ++++++++-

>  target/ppc/translate.c  | 93 ++++++++++++++++++++++++++++++-----------

>  4 files changed, 95 insertions(+), 26 deletions(-)

> 

> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h

> index c7f3fb6b73..973cf44cda 100644

> --- a/target/ppc/cpu.h

> +++ b/target/ppc/cpu.h

> @@ -1015,6 +1015,9 @@ struct CPUPPCState {

>      /* Next instruction pointer */

>      target_ulong nip;

>  

> +    /* High part of 128-bit helper return.  */

> +    uint64_t retxh;

> +


Adding a temporary here is kind of gross.  I guess the helper
interface doesn't allow for 128-bit returns, but couldn't you pass a
register number into the helper and have it update the right GPR
without going through a temp?

>      int access_type; /* when a memory exception occurs, the access

>                          type is stored here */

>  

> diff --git a/target/ppc/helper.h b/target/ppc/helper.h

> index d751f0e219..3f451a5d7e 100644

> --- a/target/ppc/helper.h

> +++ b/target/ppc/helper.h

> @@ -799,3 +799,8 @@ DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32)

>  

>  DEF_HELPER_1(tbegin, void, env)

>  DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)

> +

> +#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)

> +DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)

> +DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)

> +#endif

> diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c

> index a34e604db3..44a8f3445a 100644

> --- a/target/ppc/mem_helper.c

> +++ b/target/ppc/mem_helper.c

> @@ -21,9 +21,9 @@

>  #include "exec/exec-all.h"

>  #include "qemu/host-utils.h"

>  #include "exec/helper-proto.h"

> -

>  #include "helper_regs.h"

>  #include "exec/cpu_ldst.h"

> +#include "tcg.h"

>  #include "internal.h"

>  

>  //#define DEBUG_OP

> @@ -215,6 +215,24 @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg,

>      return i;

>  }

>  

> +#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)

> +uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,

> +                               uint32_t opidx)

> +{

> +    Int128 ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC());

> +    env->retxh = int128_gethi(ret);

> +    return int128_getlo(ret);

> +}

> +

> +uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,

> +                               uint32_t opidx)

> +{

> +    Int128 ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC());

> +    env->retxh = int128_gethi(ret);

> +    return int128_getlo(ret);

> +}

> +#endif

> +

>  /*****************************************************************************/

>  /* Altivec extension helpers */

>  #if defined(HOST_WORDS_BIGENDIAN)

> diff --git a/target/ppc/translate.c b/target/ppc/translate.c

> index 3a215a1dc6..0923cc24e3 100644

> --- a/target/ppc/translate.c

> +++ b/target/ppc/translate.c

> @@ -2607,7 +2607,7 @@ static void gen_ld(DisasContext *ctx)

>  static void gen_lq(DisasContext *ctx)

>  {

>      int ra, rd;

> -    TCGv EA;

> +    TCGv EA, hi, lo;

>  

>      /* lq is a legal user mode instruction starting in ISA 2.07 */

>      bool legal_in_user_mode = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;

> @@ -2633,16 +2633,35 @@ static void gen_lq(DisasContext *ctx)

>      EA = tcg_temp_new();

>      gen_addr_imm_index(ctx, EA, 0x0F);

>  

> -    /* We only need to swap high and low halves. gen_qemu_ld64_i64 does

> -       necessary 64-bit byteswap already. */

> -    if (unlikely(ctx->le_mode)) {

> -        gen_qemu_ld64_i64(ctx, cpu_gpr[rd + 1], EA);

> +    /* Note that the low part is always in RD+1, even in LE mode.  */

> +    lo = cpu_gpr[rd + 1];

> +    hi = cpu_gpr[rd];

> +

> +    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {

> +#ifdef CONFIG_ATOMIC128

> +        TCGv_i32 oi = tcg_temp_new_i32();

> +        if (ctx->le_mode) {

> +            tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));

> +            gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);

> +        } else {

> +            tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));

> +            gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);

> +        }

> +        tcg_temp_free_i32(oi);

> +        tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));

> +#else

> +        /* Restart with exclusive lock.  */

> +        gen_helper_exit_atomic(cpu_env);

> +        ctx->base.is_jmp = DISAS_NORETURN;

> +#endif

> +    } else if (ctx->le_mode) {

> +        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ);

>          gen_addr_add(ctx, EA, EA, 8);

> -        gen_qemu_ld64_i64(ctx, cpu_gpr[rd], EA);

> +        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_LEQ);

>      } else {

> -        gen_qemu_ld64_i64(ctx, cpu_gpr[rd], EA);

> +        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_BEQ);

>          gen_addr_add(ctx, EA, EA, 8);

> -        gen_qemu_ld64_i64(ctx, cpu_gpr[rd + 1], EA);

> +        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_BEQ);

>      }

>      tcg_temp_free(EA);

>  }

> @@ -3236,9 +3255,8 @@ STCX(stdcx_, DEF_MEMOP(MO_Q))

>  /* lqarx */

>  static void gen_lqarx(DisasContext *ctx)

>  {

> -    TCGv EA;

>      int rd = rD(ctx->opcode);

> -    TCGv gpr1, gpr2;

> +    TCGv EA, hi, lo;

>  

>      if (unlikely((rd & 1) || (rd == rA(ctx->opcode)) ||

>                   (rd == rB(ctx->opcode)))) {

> @@ -3247,24 +3265,49 @@ static void gen_lqarx(DisasContext *ctx)

>      }

>  

>      gen_set_access_type(ctx, ACCESS_RES);

> -    EA = tcg_temp_local_new();

> +    EA = tcg_temp_new();

>      gen_addr_reg_index(ctx, EA);

> -    gen_check_align(ctx, EA, 15);

> -    if (unlikely(ctx->le_mode)) {

> -        gpr1 = cpu_gpr[rd+1];

> -        gpr2 = cpu_gpr[rd];

> -    } else {

> -        gpr1 = cpu_gpr[rd];

> -        gpr2 = cpu_gpr[rd+1];

> -    }

> -    tcg_gen_qemu_ld_i64(gpr1, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));

> -    tcg_gen_mov_tl(cpu_reserve, EA);

> -    gen_addr_add(ctx, EA, EA, 8);

> -    tcg_gen_qemu_ld_i64(gpr2, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));

>  

> -    tcg_gen_st_tl(gpr1, cpu_env, offsetof(CPUPPCState, reserve_val));

> -    tcg_gen_st_tl(gpr2, cpu_env, offsetof(CPUPPCState, reserve_val2));

> +    /* Note that the low part is always in RD+1, even in LE mode.  */

> +    lo = cpu_gpr[rd + 1];

> +    hi = cpu_gpr[rd];

> +

> +    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {

> +#ifdef CONFIG_ATOMIC128

> +        TCGv_i32 oi = tcg_temp_new_i32();

> +        if (ctx->le_mode) {

> +            tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16,

> +                                                ctx->mem_idx));

> +            gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);

> +        } else {

> +            tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16,

> +                                                ctx->mem_idx));

> +            gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);

> +        }

> +        tcg_temp_free_i32(oi);

> +        tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));

> +#else

> +        /* Restart with exclusive lock.  */

> +        gen_helper_exit_atomic(cpu_env);

> +        ctx->base.is_jmp = DISAS_NORETURN;

> +        tcg_temp_free(EA);

> +        return;

> +#endif

> +    } else if (ctx->le_mode) {

> +        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ | MO_ALIGN_16);

> +        tcg_gen_mov_tl(cpu_reserve, EA);

> +        gen_addr_add(ctx, EA, EA, 8);

> +        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_LEQ);

> +    } else {

> +        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_BEQ | MO_ALIGN_16);

> +        tcg_gen_mov_tl(cpu_reserve, EA);

> +        gen_addr_add(ctx, EA, EA, 8);

> +        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_BEQ);

> +    }

>      tcg_temp_free(EA);

> +

> +    tcg_gen_st_tl(hi, cpu_env, offsetof(CPUPPCState, reserve_val));

> +    tcg_gen_st_tl(lo, cpu_env, offsetof(CPUPPCState, reserve_val2));

>  }

>  

>  /* stqcx. */


-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
Richard Henderson June 28, 2018, 3:22 p.m. UTC | #2
On 06/27/2018 08:49 PM, David Gibson wrote:
>> +    /* High part of 128-bit helper return.  */

>> +    uint64_t retxh;

>> +

> 

> Adding a temporary here is kind of gross.  I guess the helper

> interface doesn't allow for 128-bit returns, but couldn't you pass a

> register number into the helper and have it update the right GPR

> without going through a temp?


I could pass a pointer, but that would cause ...

>> +#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)

>> +DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)

>> +DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)


... the helper definitions to lose TCG_CALL_NO_WG, because they *would* write
to a global register.  Which would cause TCG to discard all of the global guest
registers cached within host registers.

I've used this secondary memory return before, in target/s390,
and to me it seems cleaner than pointers.


r~
David Gibson June 29, 2018, 3:33 a.m. UTC | #3
On Thu, Jun 28, 2018 at 08:22:38AM -0700, Richard Henderson wrote:
> On 06/27/2018 08:49 PM, David Gibson wrote:

> >> +    /* High part of 128-bit helper return.  */

> >> +    uint64_t retxh;

> >> +

> > 

> > Adding a temporary here is kind of gross.  I guess the helper

> > interface doesn't allow for 128-bit returns, but couldn't you pass a

> > register number into the helper and have it update the right GPR

> > without going through a temp?

> 

> I could pass a pointer, but that would cause ...

> 

> >> +#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)

> >> +DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)

> >> +DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)

> 

> ... the helper definitions to lose TCG_CALL_NO_WG, because they *would* write

> to a global register.  Which would cause TCG to discard all of the global guest

> registers cached within host registers.

> 

> I've used this secondary memory return before, in target/s390,

> and to me it seems cleaner than pointers.


Ok, sounds reasonable, applied to ppc-for-3.0.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
diff mbox series

Patch

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index c7f3fb6b73..973cf44cda 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1015,6 +1015,9 @@  struct CPUPPCState {
     /* Next instruction pointer */
     target_ulong nip;
 
+    /* High part of 128-bit helper return.  */
+    uint64_t retxh;
+
     int access_type; /* when a memory exception occurs, the access
                         type is stored here */
 
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index d751f0e219..3f451a5d7e 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -799,3 +799,8 @@  DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32)
 
 DEF_HELPER_1(tbegin, void, env)
 DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
+
+#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
+DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
+DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
+#endif
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index a34e604db3..44a8f3445a 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -21,9 +21,9 @@ 
 #include "exec/exec-all.h"
 #include "qemu/host-utils.h"
 #include "exec/helper-proto.h"
-
 #include "helper_regs.h"
 #include "exec/cpu_ldst.h"
+#include "tcg.h"
 #include "internal.h"
 
 //#define DEBUG_OP
@@ -215,6 +215,24 @@  target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg,
     return i;
 }
 
+#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
+uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
+                               uint32_t opidx)
+{
+    Int128 ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
+    env->retxh = int128_gethi(ret);
+    return int128_getlo(ret);
+}
+
+uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
+                               uint32_t opidx)
+{
+    Int128 ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
+    env->retxh = int128_gethi(ret);
+    return int128_getlo(ret);
+}
+#endif
+
 /*****************************************************************************/
 /* Altivec extension helpers */
 #if defined(HOST_WORDS_BIGENDIAN)
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 3a215a1dc6..0923cc24e3 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -2607,7 +2607,7 @@  static void gen_ld(DisasContext *ctx)
 static void gen_lq(DisasContext *ctx)
 {
     int ra, rd;
-    TCGv EA;
+    TCGv EA, hi, lo;
 
     /* lq is a legal user mode instruction starting in ISA 2.07 */
     bool legal_in_user_mode = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
@@ -2633,16 +2633,35 @@  static void gen_lq(DisasContext *ctx)
     EA = tcg_temp_new();
     gen_addr_imm_index(ctx, EA, 0x0F);
 
-    /* We only need to swap high and low halves. gen_qemu_ld64_i64 does
-       necessary 64-bit byteswap already. */
-    if (unlikely(ctx->le_mode)) {
-        gen_qemu_ld64_i64(ctx, cpu_gpr[rd + 1], EA);
+    /* Note that the low part is always in RD+1, even in LE mode.  */
+    lo = cpu_gpr[rd + 1];
+    hi = cpu_gpr[rd];
+
+    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
+#ifdef CONFIG_ATOMIC128
+        TCGv_i32 oi = tcg_temp_new_i32();
+        if (ctx->le_mode) {
+            tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
+            gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+        } else {
+            tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
+            gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+        }
+        tcg_temp_free_i32(oi);
+        tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
+#else
+        /* Restart with exclusive lock.  */
+        gen_helper_exit_atomic(cpu_env);
+        ctx->base.is_jmp = DISAS_NORETURN;
+#endif
+    } else if (ctx->le_mode) {
+        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ);
         gen_addr_add(ctx, EA, EA, 8);
-        gen_qemu_ld64_i64(ctx, cpu_gpr[rd], EA);
+        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_LEQ);
     } else {
-        gen_qemu_ld64_i64(ctx, cpu_gpr[rd], EA);
+        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_BEQ);
         gen_addr_add(ctx, EA, EA, 8);
-        gen_qemu_ld64_i64(ctx, cpu_gpr[rd + 1], EA);
+        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_BEQ);
     }
     tcg_temp_free(EA);
 }
@@ -3236,9 +3255,8 @@  STCX(stdcx_, DEF_MEMOP(MO_Q))
 /* lqarx */
 static void gen_lqarx(DisasContext *ctx)
 {
-    TCGv EA;
     int rd = rD(ctx->opcode);
-    TCGv gpr1, gpr2;
+    TCGv EA, hi, lo;
 
     if (unlikely((rd & 1) || (rd == rA(ctx->opcode)) ||
                  (rd == rB(ctx->opcode)))) {
@@ -3247,24 +3265,49 @@  static void gen_lqarx(DisasContext *ctx)
     }
 
     gen_set_access_type(ctx, ACCESS_RES);
-    EA = tcg_temp_local_new();
+    EA = tcg_temp_new();
     gen_addr_reg_index(ctx, EA);
-    gen_check_align(ctx, EA, 15);
-    if (unlikely(ctx->le_mode)) {
-        gpr1 = cpu_gpr[rd+1];
-        gpr2 = cpu_gpr[rd];
-    } else {
-        gpr1 = cpu_gpr[rd];
-        gpr2 = cpu_gpr[rd+1];
-    }
-    tcg_gen_qemu_ld_i64(gpr1, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));
-    tcg_gen_mov_tl(cpu_reserve, EA);
-    gen_addr_add(ctx, EA, EA, 8);
-    tcg_gen_qemu_ld_i64(gpr2, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));
 
-    tcg_gen_st_tl(gpr1, cpu_env, offsetof(CPUPPCState, reserve_val));
-    tcg_gen_st_tl(gpr2, cpu_env, offsetof(CPUPPCState, reserve_val2));
+    /* Note that the low part is always in RD+1, even in LE mode.  */
+    lo = cpu_gpr[rd + 1];
+    hi = cpu_gpr[rd];
+
+    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
+#ifdef CONFIG_ATOMIC128
+        TCGv_i32 oi = tcg_temp_new_i32();
+        if (ctx->le_mode) {
+            tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16,
+                                                ctx->mem_idx));
+            gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+        } else {
+            tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16,
+                                                ctx->mem_idx));
+            gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+        }
+        tcg_temp_free_i32(oi);
+        tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
+#else
+        /* Restart with exclusive lock.  */
+        gen_helper_exit_atomic(cpu_env);
+        ctx->base.is_jmp = DISAS_NORETURN;
+        tcg_temp_free(EA);
+        return;
+#endif
+    } else if (ctx->le_mode) {
+        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ | MO_ALIGN_16);
+        tcg_gen_mov_tl(cpu_reserve, EA);
+        gen_addr_add(ctx, EA, EA, 8);
+        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_LEQ);
+    } else {
+        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_BEQ | MO_ALIGN_16);
+        tcg_gen_mov_tl(cpu_reserve, EA);
+        gen_addr_add(ctx, EA, EA, 8);
+        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_BEQ);
+    }
     tcg_temp_free(EA);
+
+    tcg_gen_st_tl(hi, cpu_env, offsetof(CPUPPCState, reserve_val));
+    tcg_gen_st_tl(lo, cpu_env, offsetof(CPUPPCState, reserve_val2));
 }
 
 /* stqcx. */