diff mbox series

[v2,38/38] host/include/ppc: Implement aes-round.h

Message ID 20230609022401.684157-39-richard.henderson@linaro.org
State New
Headers show
Series crypto: Provide aes-round.h and host accel | expand

Commit Message

Richard Henderson June 9, 2023, 2:24 a.m. UTC
Detect CRYPTO in cpuinfo; implement the accel hooks.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 host/include/ppc/host/aes-round.h   | 181 ++++++++++++++++++++++++++++
 host/include/ppc/host/cpuinfo.h     |   1 +
 host/include/ppc64/host/aes-round.h |   1 +
 util/cpuinfo-ppc.c                  |   8 ++
 4 files changed, 191 insertions(+)
 create mode 100644 host/include/ppc/host/aes-round.h
 create mode 100644 host/include/ppc64/host/aes-round.h

Comments

Daniel Henrique Barboza June 12, 2023, 1:30 p.m. UTC | #1
On 6/8/23 23:24, Richard Henderson wrote:
> Detect CRYPTO in cpuinfo; implement the accel hooks.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---

Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>

>   host/include/ppc/host/aes-round.h   | 181 ++++++++++++++++++++++++++++
>   host/include/ppc/host/cpuinfo.h     |   1 +
>   host/include/ppc64/host/aes-round.h |   1 +
>   util/cpuinfo-ppc.c                  |   8 ++
>   4 files changed, 191 insertions(+)
>   create mode 100644 host/include/ppc/host/aes-round.h
>   create mode 100644 host/include/ppc64/host/aes-round.h
> 
> diff --git a/host/include/ppc/host/aes-round.h b/host/include/ppc/host/aes-round.h
> new file mode 100644
> index 0000000000..9b5a15d1e5
> --- /dev/null
> +++ b/host/include/ppc/host/aes-round.h
> @@ -0,0 +1,181 @@
> +/*
> + * Power v2.07 specific aes acceleration.
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#ifndef PPC_HOST_AES_ROUND_H
> +#define PPC_HOST_AES_ROUND_H
> +
> +#ifndef __ALTIVEC__
> +/* Without ALTIVEC, we can't even write inline assembly. */
> +#include "host/include/generic/host/aes-round.h"
> +#else
> +#include "host/cpuinfo.h"
> +
> +#ifdef __CRYPTO__
> +# define HAVE_AES_ACCEL  true
> +#else
> +# define HAVE_AES_ACCEL  likely(cpuinfo & CPUINFO_CRYPTO)
> +#endif
> +#define ATTR_AES_ACCEL
> +
> +/*
> + * While there is <altivec.h>, both gcc and clang "aid" with the
> + * endianness issues in different ways. Just use inline asm instead.
> + */
> +
> +/* Bytes in memory are host-endian; bytes in register are @be. */
> +static inline AESStateVec aes_accel_ld(const AESState *p, bool be)
> +{
> +    AESStateVec r;
> +
> +    if (be) {
> +        asm("lvx %0, 0, %1" : "=v"(r) : "r"(p), "m"(*p));
> +    } else if (HOST_BIG_ENDIAN) {
> +        AESStateVec rev = {
> +            15, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  1,  0,
> +        };
> +        asm("lvx %0, 0, %1\n\t"
> +            "vperm %0, %0, %0, %2"
> +            : "=v"(r) : "r"(p), "v"(rev), "m"(*p));
> +    } else {
> +#ifdef __POWER9_VECTOR__
> +        asm("lxvb16x %x0, 0, %1" : "=v"(r) : "r"(p), "m"(*p));
> +#else
> +        asm("lxvd2x %x0, 0, %1\n\t"
> +            "xxpermdi %x0, %x0, %x0, 2"
> +            : "=v"(r) : "r"(p), "m"(*p));
> +#endif
> +    }
> +    return r;
> +}
> +
> +static void aes_accel_st(AESState *p, AESStateVec r, bool be)
> +{
> +    if (be) {
> +        asm("stvx %1, 0, %2" : "=m"(*p) : "v"(r), "r"(p));
> +    } else if (HOST_BIG_ENDIAN) {
> +        AESStateVec rev = {
> +            15, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  1,  0,
> +        };
> +        asm("vperm %1, %1, %1, %2\n\t"
> +            "stvx %1, 0, %3"
> +            : "=m"(*p), "+v"(r) : "v"(rev), "r"(p));
> +    } else {
> +#ifdef __POWER9_VECTOR__
> +        asm("stxvb16x %x1, 0, %2" : "=m"(*p) : "v"(r), "r"(p));
> +#else
> +        asm("xxpermdi %x1, %x1, %x1, 2\n\t"
> +            "stxvd2x %x1, 0, %2"
> +            : "=m"(*p), "+v"(r) : "r"(p));
> +#endif
> +    }
> +}
> +
> +static inline AESStateVec aes_accel_vcipher(AESStateVec d, AESStateVec k)
> +{
> +    asm("vcipher %0, %0, %1" : "+v"(d) : "v"(k));
> +    return d;
> +}
> +
> +static inline AESStateVec aes_accel_vncipher(AESStateVec d, AESStateVec k)
> +{
> +    asm("vncipher %0, %0, %1" : "+v"(d) : "v"(k));
> +    return d;
> +}
> +
> +static inline AESStateVec aes_accel_vcipherlast(AESStateVec d, AESStateVec k)
> +{
> +    asm("vcipherlast %0, %0, %1" : "+v"(d) : "v"(k));
> +    return d;
> +}
> +
> +static inline AESStateVec aes_accel_vncipherlast(AESStateVec d, AESStateVec k)
> +{
> +    asm("vncipherlast %0, %0, %1" : "+v"(d) : "v"(k));
> +    return d;
> +}
> +
> +static inline void
> +aesenc_MC_accel(AESState *ret, const AESState *st, bool be)
> +{
> +    AESStateVec t, z = { };
> +
> +    t = aes_accel_ld(st, be);
> +    t = aes_accel_vncipherlast(t, z);
> +    t = aes_accel_vcipher(t, z);
> +    aes_accel_st(ret, t, be);
> +}
> +
> +static inline void
> +aesenc_SB_SR_AK_accel(AESState *ret, const AESState *st,
> +                      const AESState *rk, bool be)
> +{
> +    AESStateVec t, k;
> +
> +    t = aes_accel_ld(st, be);
> +    k = aes_accel_ld(rk, be);
> +    t = aes_accel_vcipherlast(t, k);
> +    aes_accel_st(ret, t, be);
> +}
> +
> +static inline void
> +aesenc_SB_SR_MC_AK_accel(AESState *ret, const AESState *st,
> +                         const AESState *rk, bool be)
> +{
> +    AESStateVec t, k;
> +
> +    t = aes_accel_ld(st, be);
> +    k = aes_accel_ld(rk, be);
> +    t = aes_accel_vcipher(t, k);
> +    aes_accel_st(ret, t, be);
> +}
> +
> +static inline void
> +aesdec_IMC_accel(AESState *ret, const AESState *st, bool be)
> +{
> +    AESStateVec t, z = { };
> +
> +    t = aes_accel_ld(st, be);
> +    t = aes_accel_vcipherlast(t, z);
> +    t = aes_accel_vncipher(t, z);
> +    aes_accel_st(ret, t, be);
> +}
> +
> +static inline void
> +aesdec_ISB_ISR_AK_accel(AESState *ret, const AESState *st,
> +                        const AESState *rk, bool be)
> +{
> +    AESStateVec t, k;
> +
> +    t = aes_accel_ld(st, be);
> +    k = aes_accel_ld(rk, be);
> +    t = aes_accel_vncipherlast(t, k);
> +    aes_accel_st(ret, t, be);
> +}
> +
> +static inline void
> +aesdec_ISB_ISR_AK_IMC_accel(AESState *ret, const AESState *st,
> +                            const AESState *rk, bool be)
> +{
> +    AESStateVec t, k;
> +
> +    t = aes_accel_ld(st, be);
> +    k = aes_accel_ld(rk, be);
> +    t = aes_accel_vncipher(t, k);
> +    aes_accel_st(ret, t, be);
> +}
> +
> +static inline void
> +aesdec_ISB_ISR_IMC_AK_accel(AESState *ret, const AESState *st,
> +                            const AESState *rk, bool be)
> +{
> +    AESStateVec t, k, z = { };
> +
> +    t = aes_accel_ld(st, be);
> +    k = aes_accel_ld(rk, be);
> +    t = aes_accel_vncipher(t, z);
> +    aes_accel_st(ret, t ^ k, be);
> +}
> +#endif /* __ALTIVEC__ */
> +#endif /* PPC_HOST_AES_ROUND_H */
> diff --git a/host/include/ppc/host/cpuinfo.h b/host/include/ppc/host/cpuinfo.h
> index 7ec252ef52..6cc727dba7 100644
> --- a/host/include/ppc/host/cpuinfo.h
> +++ b/host/include/ppc/host/cpuinfo.h
> @@ -16,6 +16,7 @@
>   #define CPUINFO_ISEL            (1u << 5)
>   #define CPUINFO_ALTIVEC         (1u << 6)
>   #define CPUINFO_VSX             (1u << 7)
> +#define CPUINFO_CRYPTO          (1u << 8)
>   
>   /* Initialized with a constructor. */
>   extern unsigned cpuinfo;
> diff --git a/host/include/ppc64/host/aes-round.h b/host/include/ppc64/host/aes-round.h
> new file mode 100644
> index 0000000000..4a78d94de8
> --- /dev/null
> +++ b/host/include/ppc64/host/aes-round.h
> @@ -0,0 +1 @@
> +#include "host/include/ppc/host/aes-round.h"
> diff --git a/util/cpuinfo-ppc.c b/util/cpuinfo-ppc.c
> index ee761de33a..053b383720 100644
> --- a/util/cpuinfo-ppc.c
> +++ b/util/cpuinfo-ppc.c
> @@ -49,6 +49,14 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
>           /* We only care about the portion of VSX that overlaps Altivec. */
>           if (hwcap & PPC_FEATURE_HAS_VSX) {
>               info |= CPUINFO_VSX;
> +            /*
> +             * We use VSX especially for little-endian, but we should
> +             * always have both anyway, since VSX came with Power7
> +             * and crypto came with Power8.
> +             */
> +            if (hwcap2 & PPC_FEATURE2_HAS_VEC_CRYPTO) {
> +                info |= CPUINFO_CRYPTO;
> +            }
>           }
>       }
>
diff mbox series

Patch

diff --git a/host/include/ppc/host/aes-round.h b/host/include/ppc/host/aes-round.h
new file mode 100644
index 0000000000..9b5a15d1e5
--- /dev/null
+++ b/host/include/ppc/host/aes-round.h
@@ -0,0 +1,181 @@ 
+/*
+ * Power v2.07 specific aes acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef PPC_HOST_AES_ROUND_H
+#define PPC_HOST_AES_ROUND_H
+
+#ifndef __ALTIVEC__
+/* Without ALTIVEC, we can't even write inline assembly. */
+#include "host/include/generic/host/aes-round.h"
+#else
+#include "host/cpuinfo.h"
+
+#ifdef __CRYPTO__
+# define HAVE_AES_ACCEL  true
+#else
+# define HAVE_AES_ACCEL  likely(cpuinfo & CPUINFO_CRYPTO)
+#endif
+#define ATTR_AES_ACCEL
+
+/*
+ * While there is <altivec.h>, both gcc and clang "aid" with the
+ * endianness issues in different ways. Just use inline asm instead.
+ */
+
+/* Bytes in memory are host-endian; bytes in register are @be. */
+static inline AESStateVec aes_accel_ld(const AESState *p, bool be)
+{
+    AESStateVec r;
+
+    if (be) {
+        asm("lvx %0, 0, %1" : "=v"(r) : "r"(p), "m"(*p));
+    } else if (HOST_BIG_ENDIAN) {
+        AESStateVec rev = {
+            15, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  1,  0,
+        };
+        asm("lvx %0, 0, %1\n\t"
+            "vperm %0, %0, %0, %2"
+            : "=v"(r) : "r"(p), "v"(rev), "m"(*p));
+    } else {
+#ifdef __POWER9_VECTOR__
+        asm("lxvb16x %x0, 0, %1" : "=v"(r) : "r"(p), "m"(*p));
+#else
+        asm("lxvd2x %x0, 0, %1\n\t"
+            "xxpermdi %x0, %x0, %x0, 2"
+            : "=v"(r) : "r"(p), "m"(*p));
+#endif
+    }
+    return r;
+}
+
+static void aes_accel_st(AESState *p, AESStateVec r, bool be)
+{
+    if (be) {
+        asm("stvx %1, 0, %2" : "=m"(*p) : "v"(r), "r"(p));
+    } else if (HOST_BIG_ENDIAN) {
+        AESStateVec rev = {
+            15, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  1,  0,
+        };
+        asm("vperm %1, %1, %1, %2\n\t"
+            "stvx %1, 0, %3"
+            : "=m"(*p), "+v"(r) : "v"(rev), "r"(p));
+    } else {
+#ifdef __POWER9_VECTOR__
+        asm("stxvb16x %x1, 0, %2" : "=m"(*p) : "v"(r), "r"(p));
+#else
+        asm("xxpermdi %x1, %x1, %x1, 2\n\t"
+            "stxvd2x %x1, 0, %2"
+            : "=m"(*p), "+v"(r) : "r"(p));
+#endif
+    }
+}
+
+static inline AESStateVec aes_accel_vcipher(AESStateVec d, AESStateVec k)
+{
+    asm("vcipher %0, %0, %1" : "+v"(d) : "v"(k));
+    return d;
+}
+
+static inline AESStateVec aes_accel_vncipher(AESStateVec d, AESStateVec k)
+{
+    asm("vncipher %0, %0, %1" : "+v"(d) : "v"(k));
+    return d;
+}
+
+static inline AESStateVec aes_accel_vcipherlast(AESStateVec d, AESStateVec k)
+{
+    asm("vcipherlast %0, %0, %1" : "+v"(d) : "v"(k));
+    return d;
+}
+
+static inline AESStateVec aes_accel_vncipherlast(AESStateVec d, AESStateVec k)
+{
+    asm("vncipherlast %0, %0, %1" : "+v"(d) : "v"(k));
+    return d;
+}
+
+static inline void
+aesenc_MC_accel(AESState *ret, const AESState *st, bool be)
+{
+    AESStateVec t, z = { };
+
+    t = aes_accel_ld(st, be);
+    t = aes_accel_vncipherlast(t, z);
+    t = aes_accel_vcipher(t, z);
+    aes_accel_st(ret, t, be);
+}
+
+static inline void
+aesenc_SB_SR_AK_accel(AESState *ret, const AESState *st,
+                      const AESState *rk, bool be)
+{
+    AESStateVec t, k;
+
+    t = aes_accel_ld(st, be);
+    k = aes_accel_ld(rk, be);
+    t = aes_accel_vcipherlast(t, k);
+    aes_accel_st(ret, t, be);
+}
+
+static inline void
+aesenc_SB_SR_MC_AK_accel(AESState *ret, const AESState *st,
+                         const AESState *rk, bool be)
+{
+    AESStateVec t, k;
+
+    t = aes_accel_ld(st, be);
+    k = aes_accel_ld(rk, be);
+    t = aes_accel_vcipher(t, k);
+    aes_accel_st(ret, t, be);
+}
+
+static inline void
+aesdec_IMC_accel(AESState *ret, const AESState *st, bool be)
+{
+    AESStateVec t, z = { };
+
+    t = aes_accel_ld(st, be);
+    t = aes_accel_vcipherlast(t, z);
+    t = aes_accel_vncipher(t, z);
+    aes_accel_st(ret, t, be);
+}
+
+static inline void
+aesdec_ISB_ISR_AK_accel(AESState *ret, const AESState *st,
+                        const AESState *rk, bool be)
+{
+    AESStateVec t, k;
+
+    t = aes_accel_ld(st, be);
+    k = aes_accel_ld(rk, be);
+    t = aes_accel_vncipherlast(t, k);
+    aes_accel_st(ret, t, be);
+}
+
+static inline void
+aesdec_ISB_ISR_AK_IMC_accel(AESState *ret, const AESState *st,
+                            const AESState *rk, bool be)
+{
+    AESStateVec t, k;
+
+    t = aes_accel_ld(st, be);
+    k = aes_accel_ld(rk, be);
+    t = aes_accel_vncipher(t, k);
+    aes_accel_st(ret, t, be);
+}
+
+static inline void
+aesdec_ISB_ISR_IMC_AK_accel(AESState *ret, const AESState *st,
+                            const AESState *rk, bool be)
+{
+    AESStateVec t, k, z = { };
+
+    t = aes_accel_ld(st, be);
+    k = aes_accel_ld(rk, be);
+    t = aes_accel_vncipher(t, z);
+    aes_accel_st(ret, t ^ k, be);
+}
+#endif /* __ALTIVEC__ */
+#endif /* PPC_HOST_AES_ROUND_H */
diff --git a/host/include/ppc/host/cpuinfo.h b/host/include/ppc/host/cpuinfo.h
index 7ec252ef52..6cc727dba7 100644
--- a/host/include/ppc/host/cpuinfo.h
+++ b/host/include/ppc/host/cpuinfo.h
@@ -16,6 +16,7 @@ 
 #define CPUINFO_ISEL            (1u << 5)
 #define CPUINFO_ALTIVEC         (1u << 6)
 #define CPUINFO_VSX             (1u << 7)
+#define CPUINFO_CRYPTO          (1u << 8)
 
 /* Initialized with a constructor. */
 extern unsigned cpuinfo;
diff --git a/host/include/ppc64/host/aes-round.h b/host/include/ppc64/host/aes-round.h
new file mode 100644
index 0000000000..4a78d94de8
--- /dev/null
+++ b/host/include/ppc64/host/aes-round.h
@@ -0,0 +1 @@ 
+#include "host/include/ppc/host/aes-round.h"
diff --git a/util/cpuinfo-ppc.c b/util/cpuinfo-ppc.c
index ee761de33a..053b383720 100644
--- a/util/cpuinfo-ppc.c
+++ b/util/cpuinfo-ppc.c
@@ -49,6 +49,14 @@  unsigned __attribute__((constructor)) cpuinfo_init(void)
         /* We only care about the portion of VSX that overlaps Altivec. */
         if (hwcap & PPC_FEATURE_HAS_VSX) {
             info |= CPUINFO_VSX;
+            /*
+             * We use VSX especially for little-endian, but we should
+             * always have both anyway, since VSX came with Power7
+             * and crypto came with Power8.
+             */
+            if (hwcap2 & PPC_FEATURE2_HAS_VEC_CRYPTO) {
+                info |= CPUINFO_CRYPTO;
+            }
         }
     }