diff mbox series

[v2,2/7] tcg/ppc: Use PADDI in tcg_out_movi

Message ID 20230808030250.50602-3-richard.henderson@linaro.org
State Superseded
Headers show
Series tcg/ppc: Support power10 prefixed instructions | expand

Commit Message

Richard Henderson Aug. 8, 2023, 3:02 a.m. UTC
PADDI can load 34-bit immediates and 34-bit pc-relative addresses.

Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/ppc/tcg-target.c.inc | 51 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

Comments

Nicholas Piggin Aug. 9, 2023, 9:03 a.m. UTC | #1
On Tue Aug 8, 2023 at 1:02 PM AEST, Richard Henderson wrote:
> PADDI can load 34-bit immediates and 34-bit pc-relative addresses.
>

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>

> Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  tcg/ppc/tcg-target.c.inc | 51 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 51 insertions(+)
>
> diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
> index 642d0fd128..2141c0bc78 100644
> --- a/tcg/ppc/tcg-target.c.inc
> +++ b/tcg/ppc/tcg-target.c.inc
> @@ -707,6 +707,38 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
>      return true;
>  }
>  
> +/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
> +static bool tcg_out_need_prefix_align(TCGContext *s)
> +{
> +    return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
> +}
> +
> +static void tcg_out_prefix_align(TCGContext *s)
> +{
> +    if (tcg_out_need_prefix_align(s)) {
> +        tcg_out32(s, NOP);
> +    }
> +}
> +
> +static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
> +{
> +    return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
> +}
> +
> +/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
> +static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
> +                          unsigned ra, tcg_target_long imm, bool r)
> +{
> +    tcg_insn_unit p, i;
> +
> +    p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
> +    i = opc | TAI(rt, ra, imm);
> +
> +    tcg_out_prefix_align(s);
> +    tcg_out32(s, p);
> +    tcg_out32(s, i);
> +}
> +
>  static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
>                               TCGReg base, tcg_target_long offset);
>  
> @@ -992,6 +1024,25 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
>          return;
>      }
>  
> +    /*
> +     * Load values up to 34 bits, and pc-relative addresses,
> +     * with one prefixed insn.
> +     */
> +    if (have_isa_3_10) {
> +        if (arg == sextract64(arg, 0, 34)) {
> +            /* pli ret,value = paddi ret,0,value,0 */
> +            tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
> +            return;
> +        }
> +
> +        tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
> +        if (tmp == sextract64(tmp, 0, 34)) {
> +            /* pla ret,value = paddi ret,0,value,1 */
> +            tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
> +            return;
> +        }
> +    }
> +
>      /* Load 32-bit immediates with two insns.  Note that we've already
>         eliminated bare ADDIS, so we know both insns are required.  */
>      if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
diff mbox series

Patch

diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 642d0fd128..2141c0bc78 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -707,6 +707,38 @@  static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
     return true;
 }
 
+/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
+static bool tcg_out_need_prefix_align(TCGContext *s)
+{
+    return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
+}
+
+static void tcg_out_prefix_align(TCGContext *s)
+{
+    if (tcg_out_need_prefix_align(s)) {
+        tcg_out32(s, NOP);
+    }
+}
+
+static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
+{
+    return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
+}
+
+/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
+static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
+                          unsigned ra, tcg_target_long imm, bool r)
+{
+    tcg_insn_unit p, i;
+
+    p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
+    i = opc | TAI(rt, ra, imm);
+
+    tcg_out_prefix_align(s);
+    tcg_out32(s, p);
+    tcg_out32(s, i);
+}
+
 static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
                              TCGReg base, tcg_target_long offset);
 
@@ -992,6 +1024,25 @@  static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
         return;
     }
 
+    /*
+     * Load values up to 34 bits, and pc-relative addresses,
+     * with one prefixed insn.
+     */
+    if (have_isa_3_10) {
+        if (arg == sextract64(arg, 0, 34)) {
+            /* pli ret,value = paddi ret,0,value,0 */
+            tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
+            return;
+        }
+
+        tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
+        if (tmp == sextract64(tmp, 0, 34)) {
+            /* pla ret,value = paddi ret,0,value,1 */
+            tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
+            return;
+        }
+    }
+
     /* Load 32-bit immediates with two insns.  Note that we've already
        eliminated bare ADDIS, so we know both insns are required.  */
     if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {