diff mbox series

[PULL,12/30] tcg/i386: Implement avx512 immediate rotate

Message ID 20220303205944.469445-13-richard.henderson@linaro.org
State New
Headers show
Series [PULL,01/30] tcg/optimize: only read val after const check | expand

Commit Message

Richard Henderson March 3, 2022, 8:59 p.m. UTC
AVX512VL has VPROLD and VPROLQ, layered onto the same
opcode as PSHIFTD, but requires EVEX encoding and W1.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/i386/tcg-target.h     |  2 +-
 tcg/i386/tcg-target.c.inc | 15 +++++++++++++--
 2 files changed, 14 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 79af353860..23a8b2a8c8 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -195,7 +195,7 @@  extern bool have_movbe;
 #define TCG_TARGET_HAS_not_vec          0
 #define TCG_TARGET_HAS_neg_vec          0
 #define TCG_TARGET_HAS_abs_vec          1
-#define TCG_TARGET_HAS_roti_vec         0
+#define TCG_TARGET_HAS_roti_vec         have_avx512vl
 #define TCG_TARGET_HAS_rots_vec         0
 #define TCG_TARGET_HAS_rotv_vec         0
 #define TCG_TARGET_HAS_shi_vec          1
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index de01fbf40c..3a9f6a3360 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -362,7 +362,7 @@  static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
 #define OPC_PSHUFLW     (0x70 | P_EXT | P_SIMDF2)
 #define OPC_PSHUFHW     (0x70 | P_EXT | P_SIMDF3)
 #define OPC_PSHIFTW_Ib  (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */
-#define OPC_PSHIFTD_Ib  (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */
+#define OPC_PSHIFTD_Ib  (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */
 #define OPC_PSHIFTQ_Ib  (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */
 #define OPC_PSLLW       (0xf1 | P_EXT | P_DATA16)
 #define OPC_PSLLD       (0xf2 | P_EXT | P_DATA16)
@@ -3000,6 +3000,14 @@  static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
             insn = shift_imm_insn[vece];
         }
         sub = 4;
+        goto gen_shift;
+    case INDEX_op_rotli_vec:
+        insn = OPC_PSHIFTD_Ib | P_EVEX;  /* VPROL[DQ] */
+        if (vece == MO_64) {
+            insn |= P_VEXW;
+        }
+        sub = 1;
+        goto gen_shift;
     gen_shift:
         tcg_debug_assert(vece != MO_8);
         if (type == TCG_TYPE_V256) {
@@ -3289,6 +3297,7 @@  static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_shli_vec:
     case INDEX_op_shri_vec:
     case INDEX_op_sari_vec:
+    case INDEX_op_rotli_vec:
     case INDEX_op_x86_psrldq_vec:
         return C_O1_I1(x, x);
 
@@ -3310,11 +3319,13 @@  int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
     case INDEX_op_xor_vec:
     case INDEX_op_andc_vec:
         return 1;
-    case INDEX_op_rotli_vec:
     case INDEX_op_cmp_vec:
     case INDEX_op_cmpsel_vec:
         return -1;
 
+    case INDEX_op_rotli_vec:
+        return have_avx512vl && vece >= MO_32 ? 1 : -1;
+
     case INDEX_op_shli_vec:
     case INDEX_op_shri_vec:
         /* We must expand the operation for MO_8.  */