diff mbox series

[03/72] qemu/host-utils: Add wrappers for carry builtins

Message ID 20210508014802.892561-4-richard.henderson@linaro.org
State Superseded
Headers show
Series Convert floatx80 and float128 to FloatParts | expand

Commit Message

Richard Henderson May 8, 2021, 1:46 a.m. UTC
These builtins came in clang 3.8, but are not present in gcc through
version 11.  Even in clang the optimization is not ideal except for
x86_64, but no worse than the hand-coding that we currently do.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 include/qemu/host-utils.h | 50 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

-- 
2.25.1

Comments

Alex Bennée May 10, 2021, 12:57 p.m. UTC | #1
Richard Henderson <richard.henderson@linaro.org> writes:

> These builtins came in clang 3.8, but are not present in gcc through

> version 11.  Even in clang the optimization is not ideal except for

> x86_64, but no worse than the hand-coding that we currently do.


Given this statement....

<snip>
> +/**

> + * uadd64_carry - addition with carry-in and carry-out

> + * @x, @y: addends

> + * @pcarry: in-out carry value

> + *

> + * Computes @x + @y + *@pcarry, placing the carry-out back

> + * into *@pcarry and returning the 64-bit sum.

> + */

> +static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)

> +{

> +#if __has_builtin(__builtin_addcll)

> +    unsigned long long c = *pcarry;

> +    x = __builtin_addcll(x, y, c, &c);


what happens when unsigned long long isn't the same as uint64_t? Doesn't
C99 only specify a minimum?

> +    *pcarry = c & 1;


Why do we need to clamp it here? Shouldn't the compiler automatically do
that due to the bool?

> +    return x;

> +#else

> +    bool c = *pcarry;

> +    /* This is clang's internal expansion of __builtin_addc. */

> +    c = uadd64_overflow(x, c, &x);

> +    c |= uadd64_overflow(x, y, &x);

> +    *pcarry = c;

> +    return x;

> +#endif


Either way if you aren't super happy with the compilers builtin and you
get equivalent code with the unambigious hand coded version then what is
the point of having a builtin leg?

> +}

> +

> +/**

> + * usub64_borrow - subtraction with borrow-in and borrow-out

> + * @x, @y: addends

> + * @pborrow: in-out borrow value

> + *

> + * Computes @x - @y - *@pborrow, placing the borrow-out back

> + * into *@pborrow and returning the 64-bit sum.

> + */

> +static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)

> +{

> +#if __has_builtin(__builtin_subcll)

> +    unsigned long long b = *pborrow;

> +    x = __builtin_subcll(x, y, b, &b);

> +    *pborrow = b & 1;

> +    return x;

> +#else

> +    bool b = *pborrow;

> +    b = usub64_overflow(x, b, &x);

> +    b |= usub64_overflow(x, y, &x);

> +    *pborrow = b;

> +    return x;

> +#endif

> +}

> +

>  /* Host type specific sizes of these routines.  */

>  

>  #if ULONG_MAX == UINT32_MAX



-- 
Alex Bennée
Richard Henderson May 11, 2021, 8:10 p.m. UTC | #2
On 5/10/21 7:57 AM, Alex Bennée wrote:
> 

> Richard Henderson <richard.henderson@linaro.org> writes:

> 

>> These builtins came in clang 3.8, but are not present in gcc through

>> version 11.  Even in clang the optimization is not ideal except for

>> x86_64, but no worse than the hand-coding that we currently do.

> 

> Given this statement....


I think you mis-read the "except for x86_64" part?

Anyway, these are simply bugs to be filed against clang, so that hopefully 
clang-12 will do a good job with the builtin.  And as I said, while the 
generated code is not ideal, it's no worse.

>> +static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)

>> +{

>> +#if __has_builtin(__builtin_addcll)

>> +    unsigned long long c = *pcarry;

>> +    x = __builtin_addcll(x, y, c, &c);

> 

> what happens when unsigned long long isn't the same as uint64_t? Doesn't

> C99 only specify a minimum?


If you only look at C99, sure.  But looking at the set of supported hosts, 
unsigned long long is always a 64-bit type.

>> +    *pcarry = c & 1;

> 

> Why do we need to clamp it here? Shouldn't the compiler automatically do

> that due to the bool?


This produces a single AND insn, instead of CMP + SETcc.


r~
Alex Bennée May 12, 2021, 11:17 a.m. UTC | #3
Richard Henderson <richard.henderson@linaro.org> writes:

> On 5/10/21 7:57 AM, Alex Bennée wrote:

>> Richard Henderson <richard.henderson@linaro.org> writes:

>> 

>>> These builtins came in clang 3.8, but are not present in gcc through

>>> version 11.  Even in clang the optimization is not ideal except for

>>> x86_64, but no worse than the hand-coding that we currently do.

>> Given this statement....

>

> I think you mis-read the "except for x86_64" part?

>

> Anyway, these are simply bugs to be filed against clang, so that

> hopefully clang-12 will do a good job with the builtin.  And as I

> said, while the generated code is not ideal, it's no worse.

>

>>> +static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)

>>> +{

>>> +#if __has_builtin(__builtin_addcll)

>>> +    unsigned long long c = *pcarry;

>>> +    x = __builtin_addcll(x, y, c, &c);

>> what happens when unsigned long long isn't the same as uint64_t?

>> Doesn't

>> C99 only specify a minimum?

>

> If you only look at C99, sure.  But looking at the set of supported

> hosts, unsigned long long is always a 64-bit type.


I guess I'm worrying about a theoretical future - but we don't worry
about it for other ll builtins so no biggy.

>

>>> +    *pcarry = c & 1;

>> Why do we need to clamp it here? Shouldn't the compiler

>> automatically do

>> that due to the bool?

>

> This produces a single AND insn, instead of CMP + SETcc.


Might be worth mentioning that in the commit message. 

Anyway:

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>


-- 
Alex Bennée
diff mbox series

Patch

diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index fd76f0cbd3..2ea8b3000b 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -26,6 +26,7 @@ 
 #ifndef HOST_UTILS_H
 #define HOST_UTILS_H
 
+#include "qemu/compiler.h"
 #include "qemu/bswap.h"
 
 #ifdef CONFIG_INT128
@@ -581,6 +582,55 @@  static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 #endif
 }
 
+/**
+ * uadd64_carry - addition with carry-in and carry-out
+ * @x, @y: addends
+ * @pcarry: in-out carry value
+ *
+ * Computes @x + @y + *@pcarry, placing the carry-out back
+ * into *@pcarry and returning the 64-bit sum.
+ */
+static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
+{
+#if __has_builtin(__builtin_addcll)
+    unsigned long long c = *pcarry;
+    x = __builtin_addcll(x, y, c, &c);
+    *pcarry = c & 1;
+    return x;
+#else
+    bool c = *pcarry;
+    /* This is clang's internal expansion of __builtin_addc. */
+    c = uadd64_overflow(x, c, &x);
+    c |= uadd64_overflow(x, y, &x);
+    *pcarry = c;
+    return x;
+#endif
+}
+
+/**
+ * usub64_borrow - subtraction with borrow-in and borrow-out
+ * @x, @y: addends
+ * @pborrow: in-out borrow value
+ *
+ * Computes @x - @y - *@pborrow, placing the borrow-out back
+ * into *@pborrow and returning the 64-bit sum.
+ */
+static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
+{
+#if __has_builtin(__builtin_subcll)
+    unsigned long long b = *pborrow;
+    x = __builtin_subcll(x, y, b, &b);
+    *pborrow = b & 1;
+    return x;
+#else
+    bool b = *pborrow;
+    b = usub64_overflow(x, b, &x);
+    b |= usub64_overflow(x, y, &x);
+    *pborrow = b;
+    return x;
+#endif
+}
+
 /* Host type specific sizes of these routines.  */
 
 #if ULONG_MAX == UINT32_MAX