diff mbox series

[4/9] tcg: Introduce atomic helpers for integer min/max

Message ID 20180427002651.28356-5-richard.henderson@linaro.org
State Superseded
Headers show
Series target/arm: Implement v8.1-Atomics | expand

Commit Message

Richard Henderson April 27, 2018, 12:26 a.m. UTC
Given that this atomic operation will be used by both risc-v
and aarch64, let's not duplicate code across the two targets.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 accel/tcg/atomic_template.h | 71 +++++++++++++++++++++++++++++++++++++++++++++
 accel/tcg/tcg-runtime.h     |  8 +++++
 tcg/tcg-op.h                | 34 ++++++++++++++++++++++
 tcg/tcg.h                   |  8 +++++
 tcg/tcg-op.c                |  8 +++++
 5 files changed, 129 insertions(+)

-- 
2.14.3

Comments

Peter Maydell May 3, 2018, 1:26 p.m. UTC | #1
On 27 April 2018 at 01:26, Richard Henderson
<richard.henderson@linaro.org> wrote:
> Given that this atomic operation will be used by both risc-v

> and aarch64, let's not duplicate code across the two targets.

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  accel/tcg/atomic_template.h | 71 +++++++++++++++++++++++++++++++++++++++++++++

>  accel/tcg/tcg-runtime.h     |  8 +++++

>  tcg/tcg-op.h                | 34 ++++++++++++++++++++++

>  tcg/tcg.h                   |  8 +++++

>  tcg/tcg-op.c                |  8 +++++

>  5 files changed, 129 insertions(+)


> @@ -233,6 +270,39 @@ ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,

>          ldo = ldn;

>      }

>  }

> +

> +/* These helpers are, as a whole, full barriers.  Within the helper,

> + * the leading barrier is explicit and the trailing barrier is within

> + * cmpxchg primitive.

> + */

> +#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET)                \

> +ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \

> +                        ABI_TYPE xval EXTRA_ARGS)                   \

> +{                                                                   \

> +    ATOMIC_MMU_DECLS;                                               \

> +    XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                          \

> +    XDATA_TYPE ldo, ldn, old, new, val = xval;                      \

> +    smp_mb();                                                       \

> +    ldn = atomic_read__nocheck(haddr);                              \


I see you're using the __nocheck function here. How does this
work for the 32-bit host case where you don't necessarily have
a 64-bit atomic primitive?

> +    do {                                                            \

> +        ldo = ldn; old = BSWAP(ldo); new = FN(old, val);            \

> +        ldn = atomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new));      \

> +    } while (ldo != ldn);                                           \

> +    ATOMIC_MMU_CLEANUP;                                             \

> +    return RET;                                                     \

> +}


I was going to suggest that you could also now use this to
iimplement the currently-hand-coded fetch_add and add_fetch
for the reverse-host-endian case, but those don't have a leading
smp_mb() and this does. Do you know why those are different?

thanks
-- PMM
Richard Henderson May 3, 2018, 5:13 p.m. UTC | #2
On 05/03/2018 06:26 AM, Peter Maydell wrote:
> On 27 April 2018 at 01:26, Richard Henderson

> <richard.henderson@linaro.org> wrote:

>> Given that this atomic operation will be used by both risc-v

>> and aarch64, let's not duplicate code across the two targets.

>>

>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

>> ---

>>  accel/tcg/atomic_template.h | 71 +++++++++++++++++++++++++++++++++++++++++++++

>>  accel/tcg/tcg-runtime.h     |  8 +++++

>>  tcg/tcg-op.h                | 34 ++++++++++++++++++++++

>>  tcg/tcg.h                   |  8 +++++

>>  tcg/tcg-op.c                |  8 +++++

>>  5 files changed, 129 insertions(+)

> 

>> @@ -233,6 +270,39 @@ ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,

>>          ldo = ldn;

>>      }

>>  }

>> +

>> +/* These helpers are, as a whole, full barriers.  Within the helper,

>> + * the leading barrier is explicit and the trailing barrier is within

>> + * cmpxchg primitive.

>> + */

>> +#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET)                \

>> +ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \

>> +                        ABI_TYPE xval EXTRA_ARGS)                   \

>> +{                                                                   \

>> +    ATOMIC_MMU_DECLS;                                               \

>> +    XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                          \

>> +    XDATA_TYPE ldo, ldn, old, new, val = xval;                      \

>> +    smp_mb();                                                       \

>> +    ldn = atomic_read__nocheck(haddr);                              \

> 

> I see you're using the __nocheck function here. How does this

> work for the 32-bit host case where you don't necessarily have

> a 64-bit atomic primitive?


It won't be compiled for the 32-bit host.  Translation will not attempt to use
this helper and will instead call exit_atomic.

> 

>> +    do {                                                            \

>> +        ldo = ldn; old = BSWAP(ldo); new = FN(old, val);            \

>> +        ldn = atomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new));      \

>> +    } while (ldo != ldn);                                           \

>> +    ATOMIC_MMU_CLEANUP;                                             \

>> +    return RET;                                                     \

>> +}

> 

> I was going to suggest that you could also now use this to

> iimplement the currently-hand-coded fetch_add and add_fetch

> for the reverse-host-endian case, but those don't have a leading

> smp_mb() and this does. Do you know why those are different?


That would seem to be a bug...


r~
Peter Maydell May 3, 2018, 5:26 p.m. UTC | #3
On 3 May 2018 at 18:13, Richard Henderson <richard.henderson@linaro.org> wrote:
> On 05/03/2018 06:26 AM, Peter Maydell wrote:

>> On 27 April 2018 at 01:26, Richard Henderson

>> <richard.henderson@linaro.org> wrote:

>>> Given that this atomic operation will be used by both risc-v

>>> and aarch64, let's not duplicate code across the two targets.

>>>

>>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

>>> ---

>>>  accel/tcg/atomic_template.h | 71 +++++++++++++++++++++++++++++++++++++++++++++

>>>  accel/tcg/tcg-runtime.h     |  8 +++++

>>>  tcg/tcg-op.h                | 34 ++++++++++++++++++++++

>>>  tcg/tcg.h                   |  8 +++++

>>>  tcg/tcg-op.c                |  8 +++++

>>>  5 files changed, 129 insertions(+)

>>

>>> @@ -233,6 +270,39 @@ ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,

>>>          ldo = ldn;

>>>      }

>>>  }

>>> +

>>> +/* These helpers are, as a whole, full barriers.  Within the helper,

>>> + * the leading barrier is explicit and the trailing barrier is within

>>> + * cmpxchg primitive.

>>> + */

>>> +#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET)                \

>>> +ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \

>>> +                        ABI_TYPE xval EXTRA_ARGS)                   \

>>> +{                                                                   \

>>> +    ATOMIC_MMU_DECLS;                                               \

>>> +    XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                          \

>>> +    XDATA_TYPE ldo, ldn, old, new, val = xval;                      \

>>> +    smp_mb();                                                       \

>>> +    ldn = atomic_read__nocheck(haddr);                              \

>>

>> I see you're using the __nocheck function here. How does this

>> work for the 32-bit host case where you don't necessarily have

>> a 64-bit atomic primitive?

>

> It won't be compiled for the 32-bit host.  Translation will not attempt to use

> this helper and will instead call exit_atomic.


OK. Can you point me at the code that handles min/max atomics in that case?

thanks
-- PMM
Richard Henderson May 3, 2018, 5:39 p.m. UTC | #4
On 05/03/2018 10:26 AM, Peter Maydell wrote:
>> It won't be compiled for the 32-bit host.  Translation will not attempt to use

>> this helper and will instead call exit_atomic.

> 

> OK. Can you point me at the code that handles min/max atomics in that case?


exit_atomic raises EXP_ATOMIC, which leads to cpu_exec_step_atomic, which grabs
the exclusive lock and then executes the operation in a serial context.  This
is expanded inline via do_nonatomic_op_i64.


r~
Peter Maydell May 3, 2018, 6:19 p.m. UTC | #5
On 3 May 2018 at 18:39, Richard Henderson <richard.henderson@linaro.org> wrote:
> On 05/03/2018 10:26 AM, Peter Maydell wrote:

>>> It won't be compiled for the 32-bit host.  Translation will not attempt to use

>>> this helper and will instead call exit_atomic.

>>

>> OK. Can you point me at the code that handles min/max atomics in that case?

>

> exit_atomic raises EXP_ATOMIC, which leads to cpu_exec_step_atomic, which grabs

> the exclusive lock and then executes the operation in a serial context.  This

> is expanded inline via do_nonatomic_op_i64.


Ah, gotcha -- hidden behind a lot of macros.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>


-- PMM
diff mbox series

Patch

diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
index e022df4571..2489dd3ec1 100644
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -25,18 +25,22 @@ 
 #elif DATA_SIZE == 8
 # define SUFFIX     q
 # define DATA_TYPE  uint64_t
+# define SDATA_TYPE int64_t
 # define BSWAP      bswap64
 #elif DATA_SIZE == 4
 # define SUFFIX     l
 # define DATA_TYPE  uint32_t
+# define SDATA_TYPE int32_t
 # define BSWAP      bswap32
 #elif DATA_SIZE == 2
 # define SUFFIX     w
 # define DATA_TYPE  uint16_t
+# define SDATA_TYPE int16_t
 # define BSWAP      bswap16
 #elif DATA_SIZE == 1
 # define SUFFIX     b
 # define DATA_TYPE  uint8_t
+# define SDATA_TYPE int8_t
 # define BSWAP
 #else
 # error unsupported data size
@@ -118,6 +122,39 @@  GEN_ATOMIC_HELPER(or_fetch)
 GEN_ATOMIC_HELPER(xor_fetch)
 
 #undef GEN_ATOMIC_HELPER
+
+/* These helpers are, as a whole, full barriers.  Within the helper,
+ * the leading barrier is explicit and the trailing barrier is within
+ * cmpxchg primitive.
+ */
+#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET)                \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
+                        ABI_TYPE xval EXTRA_ARGS)                   \
+{                                                                   \
+    ATOMIC_MMU_DECLS;                                               \
+    XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                          \
+    XDATA_TYPE cmp, old, new, val = xval;                           \
+    smp_mb();                                                       \
+    cmp = atomic_read__nocheck(haddr);                              \
+    do {                                                            \
+        old = cmp; new = FN(old, val);                              \
+        cmp = atomic_cmpxchg__nocheck(haddr, old, new);             \
+    } while (cmp != old);                                           \
+    ATOMIC_MMU_CLEANUP;                                             \
+    return RET;                                                     \
+}
+
+GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umin, MIN,  DATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umax, MAX,  DATA_TYPE, old)
+
+GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umin_fetch, MIN,  DATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umax_fetch, MAX,  DATA_TYPE, new)
+
+#undef GEN_ATOMIC_HELPER_FN
 #endif /* DATA SIZE >= 16 */
 
 #undef END
@@ -233,6 +270,39 @@  ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
         ldo = ldn;
     }
 }
+
+/* These helpers are, as a whole, full barriers.  Within the helper,
+ * the leading barrier is explicit and the trailing barrier is within
+ * cmpxchg primitive.
+ */
+#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET)                \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
+                        ABI_TYPE xval EXTRA_ARGS)                   \
+{                                                                   \
+    ATOMIC_MMU_DECLS;                                               \
+    XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                          \
+    XDATA_TYPE ldo, ldn, old, new, val = xval;                      \
+    smp_mb();                                                       \
+    ldn = atomic_read__nocheck(haddr);                              \
+    do {                                                            \
+        ldo = ldn; old = BSWAP(ldo); new = FN(old, val);            \
+        ldn = atomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new));      \
+    } while (ldo != ldn);                                           \
+    ATOMIC_MMU_CLEANUP;                                             \
+    return RET;                                                     \
+}
+
+GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umin, MIN,  DATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umax, MAX,  DATA_TYPE, old)
+
+GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umin_fetch, MIN,  DATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umax_fetch, MAX,  DATA_TYPE, new)
+
+#undef GEN_ATOMIC_HELPER_FN
 #endif /* DATA_SIZE >= 16 */
 
 #undef END
@@ -241,5 +311,6 @@  ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
 #undef BSWAP
 #undef ABI_TYPE
 #undef DATA_TYPE
+#undef SDATA_TYPE
 #undef SUFFIX
 #undef DATA_SIZE
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 2536959a18..1bd39d136d 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -125,11 +125,19 @@  GEN_ATOMIC_HELPERS(fetch_add)
 GEN_ATOMIC_HELPERS(fetch_and)
 GEN_ATOMIC_HELPERS(fetch_or)
 GEN_ATOMIC_HELPERS(fetch_xor)
+GEN_ATOMIC_HELPERS(fetch_smin)
+GEN_ATOMIC_HELPERS(fetch_umin)
+GEN_ATOMIC_HELPERS(fetch_smax)
+GEN_ATOMIC_HELPERS(fetch_umax)
 
 GEN_ATOMIC_HELPERS(add_fetch)
 GEN_ATOMIC_HELPERS(and_fetch)
 GEN_ATOMIC_HELPERS(or_fetch)
 GEN_ATOMIC_HELPERS(xor_fetch)
+GEN_ATOMIC_HELPERS(smin_fetch)
+GEN_ATOMIC_HELPERS(umin_fetch)
+GEN_ATOMIC_HELPERS(smax_fetch)
+GEN_ATOMIC_HELPERS(umax_fetch)
 
 GEN_ATOMIC_HELPERS(xchg)
 
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 540337e605..9326b52312 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -898,6 +898,7 @@  void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
 
 void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
 void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+
 void tcg_gen_atomic_fetch_add_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
 void tcg_gen_atomic_fetch_add_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
 void tcg_gen_atomic_fetch_and_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
@@ -906,6 +907,15 @@  void tcg_gen_atomic_fetch_or_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
 void tcg_gen_atomic_fetch_or_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
 void tcg_gen_atomic_fetch_xor_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
 void tcg_gen_atomic_fetch_xor_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_smin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_smin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_umin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_umin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_smax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_smax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_umax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_umax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+
 void tcg_gen_atomic_add_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
 void tcg_gen_atomic_add_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
 void tcg_gen_atomic_and_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
@@ -914,6 +924,14 @@  void tcg_gen_atomic_or_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
 void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
 void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
 void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_smin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_smin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_umin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_umin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_smax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_smax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_umax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_umax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
 
 void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
 void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
@@ -1043,10 +1061,18 @@  void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
 #define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i64
 #define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i64
 #define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i64
+#define tcg_gen_atomic_fetch_smin_tl tcg_gen_atomic_fetch_smin_i64
+#define tcg_gen_atomic_fetch_umin_tl tcg_gen_atomic_fetch_umin_i64
+#define tcg_gen_atomic_fetch_smax_tl tcg_gen_atomic_fetch_smax_i64
+#define tcg_gen_atomic_fetch_umax_tl tcg_gen_atomic_fetch_umax_i64
 #define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i64
 #define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i64
 #define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i64
 #define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i64
+#define tcg_gen_atomic_smin_fetch_tl tcg_gen_atomic_smin_fetch_i64
+#define tcg_gen_atomic_umin_fetch_tl tcg_gen_atomic_umin_fetch_i64
+#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i64
+#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i64
 #define tcg_gen_dup_tl_vec  tcg_gen_dup_i64_vec
 #else
 #define tcg_gen_movi_tl tcg_gen_movi_i32
@@ -1145,10 +1171,18 @@  void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
 #define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i32
 #define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i32
 #define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i32
+#define tcg_gen_atomic_fetch_smin_tl tcg_gen_atomic_fetch_smin_i32
+#define tcg_gen_atomic_fetch_umin_tl tcg_gen_atomic_fetch_umin_i32
+#define tcg_gen_atomic_fetch_smax_tl tcg_gen_atomic_fetch_smax_i32
+#define tcg_gen_atomic_fetch_umax_tl tcg_gen_atomic_fetch_umax_i32
 #define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i32
 #define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i32
 #define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i32
 #define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i32
+#define tcg_gen_atomic_smin_fetch_tl tcg_gen_atomic_smin_fetch_i32
+#define tcg_gen_atomic_umin_fetch_tl tcg_gen_atomic_umin_fetch_i32
+#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i32
+#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i32
 #define tcg_gen_dup_tl_vec  tcg_gen_dup_i32_vec
 #endif
 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 30896ca304..55e2747966 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -1389,12 +1389,20 @@  GEN_ATOMIC_HELPER_ALL(fetch_sub)
 GEN_ATOMIC_HELPER_ALL(fetch_and)
 GEN_ATOMIC_HELPER_ALL(fetch_or)
 GEN_ATOMIC_HELPER_ALL(fetch_xor)
+GEN_ATOMIC_HELPER_ALL(fetch_smin)
+GEN_ATOMIC_HELPER_ALL(fetch_umin)
+GEN_ATOMIC_HELPER_ALL(fetch_smax)
+GEN_ATOMIC_HELPER_ALL(fetch_umax)
 
 GEN_ATOMIC_HELPER_ALL(add_fetch)
 GEN_ATOMIC_HELPER_ALL(sub_fetch)
 GEN_ATOMIC_HELPER_ALL(and_fetch)
 GEN_ATOMIC_HELPER_ALL(or_fetch)
 GEN_ATOMIC_HELPER_ALL(xor_fetch)
+GEN_ATOMIC_HELPER_ALL(smin_fetch)
+GEN_ATOMIC_HELPER_ALL(umin_fetch)
+GEN_ATOMIC_HELPER_ALL(smax_fetch)
+GEN_ATOMIC_HELPER_ALL(umax_fetch)
 
 GEN_ATOMIC_HELPER_ALL(xchg)
 
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 5b82c3be8d..6a914654f5 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -3051,11 +3051,19 @@  GEN_ATOMIC_HELPER(fetch_add, add, 0)
 GEN_ATOMIC_HELPER(fetch_and, and, 0)
 GEN_ATOMIC_HELPER(fetch_or, or, 0)
 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
+GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
+GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
+GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
+GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
 
 GEN_ATOMIC_HELPER(add_fetch, add, 1)
 GEN_ATOMIC_HELPER(and_fetch, and, 1)
 GEN_ATOMIC_HELPER(or_fetch, or, 1)
 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
+GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
+GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
+GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
+GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
 
 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
 {