diff mbox

target-arm: add support for v8 VMULL.P64 instruction

Message ID 1395912558-1041-1-git-send-email-ard.biesheuvel@linaro.org
State New
Headers show

Commit Message

Ard Biesheuvel March 27, 2014, 9:29 a.m. UTC
This adds support for the VMULL.P64 polynomial 64x64 to 128 bit multiplication
instruction, which is an optional feature that is part of the v8 Crypto
Extensions.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---

This is an incremental patch on top of the SHA-1/SHA-256 patch I sent earlier
this week.

 target-arm/cpu.c           |  1 +
 target-arm/cpu.h           |  1 +
 target-arm/crypto_helper.c | 19 +++++++++++++++++++
 target-arm/helper.h        |  2 ++
 target-arm/translate.c     | 18 +++++++++++++++++-
 5 files changed, 40 insertions(+), 1 deletion(-)

Comments

Peter Maydell March 27, 2014, 10:55 a.m. UTC | #1
On 27 March 2014 09:29, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> This adds support for the VMULL.P64 polynomial 64x64 to 128 bit multiplication
> instruction, which is an optional feature that is part of the v8 Crypto
> Extensions.

> +void HELPER(crypto_pmull)(CPUARMState *env, uint32_t rd, uint32_t rn,
> +                          uint32_t rm)
> +{
> +    uint64_t n = float64_val(env->vfp.regs[rn]);
> +    uint64_t m = float64_val(env->vfp.regs[rm]);
> +    uint64_t d0 = (n & 1) ? m : 0;
> +    uint64_t d1 = 0;
> +    int shift;
> +
> +    for (shift = 1; (n >>= 1); shift++) {
> +        if (n & 1) {
> +            d0 ^= m << shift;
> +            d1 ^= m >> (64 - shift);
> +        }
> +    }
> +    env->vfp.regs[rd] = make_float64(d0);
> +    env->vfp.regs[rd + 1] = make_float64(d1);
> +}

Surely we can reuse the helper we already have for implementing
the A64 version of this instruction ?

thanks
-- PMM
Ard Biesheuvel March 27, 2014, 11:30 a.m. UTC | #2
On 27 March 2014 11:55, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 27 March 2014 09:29, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
>> This adds support for the VMULL.P64 polynomial 64x64 to 128 bit multiplication
>> instruction, which is an optional feature that is part of the v8 Crypto
>> Extensions.
>
>> +void HELPER(crypto_pmull)(CPUARMState *env, uint32_t rd, uint32_t rn,
>> +                          uint32_t rm)
>> +{
>> +    uint64_t n = float64_val(env->vfp.regs[rn]);
>> +    uint64_t m = float64_val(env->vfp.regs[rm]);
>> +    uint64_t d0 = (n & 1) ? m : 0;
>> +    uint64_t d1 = 0;
>> +    int shift;
>> +
>> +    for (shift = 1; (n >>= 1); shift++) {
>> +        if (n & 1) {
>> +            d0 ^= m << shift;
>> +            d1 ^= m >> (64 - shift);
>> +        }
>> +    }
>> +    env->vfp.regs[rd] = make_float64(d0);
>> +    env->vfp.regs[rd + 1] = make_float64(d1);
>> +}
>
> Surely we can reuse the helper we already have for implementing
> the A64 version of this instruction ?
>

Absolutely! I just wasn't aware there was one :-)

Regards,
Ard.
Peter Maydell May 29, 2014, 5:42 p.m. UTC | #3
On 27 March 2014 09:29, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> This adds support for the VMULL.P64 polynomial 64x64 to 128 bit multiplication
> instruction, which is an optional feature that is part of the v8 Crypto
> Extensions.
>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---

I've reimplemented this to use the existing A64 helpers,
and will post a patch shortly.

thanks
-- PMM
diff mbox

Patch

diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 58c4584ac3bc..60244c7ffc82 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -293,6 +293,7 @@  static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
         set_feature(env, ARM_FEATURE_V8_AES);
         set_feature(env, ARM_FEATURE_V8_SHA1);
         set_feature(env, ARM_FEATURE_V8_SHA256);
+        set_feature(env, ARM_FEATURE_V8_PMULL);
     }
     if (arm_feature(env, ARM_FEATURE_V7)) {
         set_feature(env, ARM_FEATURE_VAPA);
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index f5039d8b0177..d8add6d565a6 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -632,6 +632,7 @@  enum arm_features {
     ARM_FEATURE_CRC, /* ARMv8 CRC instructions */
     ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */
     ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */
+    ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */
 };
 
 static inline int arm_feature(CPUARMState *env, int feature)
diff --git a/target-arm/crypto_helper.c b/target-arm/crypto_helper.c
index 211be36ebda8..b56a767b527e 100644
--- a/target-arm/crypto_helper.c
+++ b/target-arm/crypto_helper.c
@@ -522,3 +522,22 @@  void HELPER(crypto_sha256su1)(CPUARMState *env, uint32_t rd, uint32_t rn,
     env->vfp.regs[rd] = make_float64(d.l[0]);
     env->vfp.regs[rd + 1] = make_float64(d.l[1]);
 }
+
+void HELPER(crypto_pmull)(CPUARMState *env, uint32_t rd, uint32_t rn,
+                          uint32_t rm)
+{
+    uint64_t n = float64_val(env->vfp.regs[rn]);
+    uint64_t m = float64_val(env->vfp.regs[rm]);
+    uint64_t d0 = (n & 1) ? m : 0;
+    uint64_t d1 = 0;
+    int shift;
+
+    for (shift = 1; (n >>= 1); shift++) {
+        if (n & 1) {
+            d0 ^= m << shift;
+            d1 ^= m >> (64 - shift);
+        }
+    }
+    env->vfp.regs[rd] = make_float64(d0);
+    env->vfp.regs[rd + 1] = make_float64(d1);
+}
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 9024aef75157..8333f7dd0be2 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -521,6 +521,8 @@  DEF_HELPER_4(crypto_sha256h2, void, env, i32, i32, i32)
 DEF_HELPER_3(crypto_sha256su0, void, env, i32, i32)
 DEF_HELPER_4(crypto_sha256su1, void, env, i32, i32, i32)
 
+DEF_HELPER_4(crypto_pmull, void, env, i32, i32, i32)
+
 DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 
diff --git a/target-arm/translate.c b/target-arm/translate.c
index e79241402da8..576cdc24b530 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -5917,7 +5917,7 @@  static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                     {0, 0, 0, 6}, /* VQDMLSL */
                     {0, 0, 0, 0}, /* Integer VMULL */
                     {0, 0, 0, 2}, /* VQDMULL */
-                    {0, 0, 0, 5}, /* Polynomial VMULL */
+                    {0, 0, 0, 4}, /* Polynomial VMULL */
                     {0, 0, 0, 3}, /* Reserved: always UNDEF */
                 };
 
@@ -5937,6 +5937,22 @@  static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                     return 1;
                 }
 
+                /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
+                   outside the loop below as it only performs a single pass. */
+                if (op == 14 && size == 2) {
+                    if (!arm_feature(env, ARM_FEATURE_V8_PMULL)) {
+                        return 1;
+                    }
+                    tmp = tcg_const_i32(rd);
+                    tmp2 = tcg_const_i32(rn);
+                    tmp3 = tcg_const_i32(rm);
+                    gen_helper_crypto_pmull(cpu_env, tmp, tmp2, tmp3);
+                    tcg_temp_free_i32(tmp);
+                    tcg_temp_free_i32(tmp2);
+                    tcg_temp_free_i32(tmp3);
+                    return 0;
+                }
+
                 /* Avoid overlapping operands.  Wide source operands are
                    always aligned so will never overlap with wide
                    destinations in problematic ways.  */