diff mbox series

[14/20] tcg/i386: Expand vector word rotate as avx512vbmi2 shift-double

Message ID 20211218194250.247633-15-richard.henderson@linaro.org
State Superseded
Headers show
Series tcg: vector improvements | expand

Commit Message

Richard Henderson Dec. 18, 2021, 7:42 p.m. UTC
While there are no specific 16-bit rotate instructions, there
are double-word shifts, which can perform the same operation.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/i386/tcg-target.c.inc | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

Comments

Alex Bennée Feb. 3, 2022, 10:32 a.m. UTC | #1
Richard Henderson <richard.henderson@linaro.org> writes:

> While there are no specific 16-bit rotate instructions, there
> are double-word shifts, which can perform the same operation.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Even hackbox can't utilise these - maybe it's time to request a upgrade
for my dev box ;-)

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
diff mbox series

Patch

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 30b9afc1d3..54fb8321a9 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -3350,6 +3350,8 @@  int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
     case INDEX_op_rotlv_vec:
     case INDEX_op_rotrv_vec:
         switch (vece) {
+        case MO_16:
+            return have_avx512vbmi2 ? -1 : 0;
         case MO_32:
         case MO_64:
             return have_avx512vl ? 1 : have_avx2 ? -1 : 0;
@@ -3494,6 +3496,12 @@  static void expand_vec_rotli(TCGType type, unsigned vece,
         return;
     }
 
+    if (have_avx512vbmi2) {
+        vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece,
+                  tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm);
+        return;
+    }
+
     t = tcg_temp_new_vec(type);
     tcg_gen_shli_vec(vece, t, v1, imm);
     tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
@@ -3524,8 +3532,16 @@  static void expand_vec_rotls(TCGType type, unsigned vece,
 static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
                             TCGv_vec v1, TCGv_vec sh, bool right)
 {
-    TCGv_vec t = tcg_temp_new_vec(type);
+    TCGv_vec t;
 
+    if (have_avx512vbmi2) {
+        vec_gen_4(right ? INDEX_op_x86_vpshrdv_vec : INDEX_op_x86_vpshldv_vec,
+                  type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v1),
+                  tcgv_vec_arg(v1), tcgv_vec_arg(sh));
+        return;
+    }
+
+    t = tcg_temp_new_vec(type);
     tcg_gen_dupi_vec(vece, t, 8 << vece);
     tcg_gen_sub_vec(vece, t, t, sh);
     if (right) {