tcg/arm: Expand epilogue inline

Message ID 20191015012936.16275-1-richard.henderson@linaro.org
State New
Headers show
Series
  • tcg/arm: Expand epilogue inline
Related show

Commit Message

Richard Henderson Oct. 15, 2019, 1:29 a.m.
It is, after all, just two instructions.

Profiling on a cortex-a15, using -d nochain to increase the number
of exit_tb that are executed, shows a minor improvement of 0.5%.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 tcg/arm/tcg-target.inc.c | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

-- 
2.17.1

Comments

Philippe Mathieu-Daudé Oct. 15, 2019, 10:06 a.m. | #1
Hi Richard,

On 10/15/19 3:29 AM, Richard Henderson wrote:
> It is, after all, just two instructions.

> 

> Profiling on a cortex-a15, using -d nochain to increase the number

> of exit_tb that are executed, shows a minor improvement of 0.5%.

> 

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>   tcg/arm/tcg-target.inc.c | 32 +++++++++++++-------------------

>   1 file changed, 13 insertions(+), 19 deletions(-)

> 

> diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c

> index 94d80d79d1..2a9ebfe25a 100644

> --- a/tcg/arm/tcg-target.inc.c

> +++ b/tcg/arm/tcg-target.inc.c

> @@ -1745,24 +1745,18 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)

>   #endif

>   }

>   

> -static tcg_insn_unit *tb_ret_addr;

> +static void tcg_out_epilogue(TCGContext *s);

>   

> -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,

> -                const TCGArg *args, const int *const_args)

> +static void tcg_out_op(TCGContext *s, TCGOpcode opc,

> +                       const TCGArg *args, const int *const_args)

>   {

>       TCGArg a0, a1, a2, a3, a4, a5;

>       int c;

>   

>       switch (opc) {

>       case INDEX_op_exit_tb:

> -        /* Reuse the zeroing that exists for goto_ptr.  */

> -        a0 = args[0];

> -        if (a0 == 0) {

> -            tcg_out_goto(s, COND_AL, s->code_gen_epilogue);

> -        } else {

> -            tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);

> -            tcg_out_goto(s, COND_AL, tb_ret_addr);

> -        }

> +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]);

> +        tcg_out_epilogue(s);

>           break;

>       case INDEX_op_goto_tb:

>           {

> @@ -2284,19 +2278,17 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)

>         + TCG_TARGET_STACK_ALIGN - 1) \

>        & -TCG_TARGET_STACK_ALIGN)

>   

> +#define STACK_ADDEND  (FRAME_SIZE - PUSH_SIZE)

> +

>   static void tcg_target_qemu_prologue(TCGContext *s)

>   {

> -    int stack_addend;

> -

>       /* Calling convention requires us to save r4-r11 and lr.  */

>       /* stmdb sp!, { r4 - r11, lr } */

>       tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);

>   

>       /* Reserve callee argument and tcg temp space.  */

> -    stack_addend = FRAME_SIZE - PUSH_SIZE;

> -

>       tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,

> -                   TCG_REG_CALL_STACK, stack_addend, 1);

> +                   TCG_REG_CALL_STACK, STACK_ADDEND, 1);

>       tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,

>                     CPU_TEMP_BUF_NLONGS * sizeof(long));

>   

> @@ -2310,11 +2302,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)

>        */

>       s->code_gen_epilogue = s->code_ptr;

>       tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);

> +    tcg_out_epilogue(s);

> +}

>   

> -    /* TB epilogue */

> -    tb_ret_addr = s->code_ptr;

> +static void tcg_out_epilogue(TCGContext *s)


Do you mind splitting this patch in 2?
First use tcg_out_epilogue(), then optimize tcg_out_op().

> +{

>       tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,

> -                   TCG_REG_CALL_STACK, stack_addend, 1);

> +                   TCG_REG_CALL_STACK, STACK_ADDEND, 1);

>   

>       /* ldmia sp!, { r4 - r11, pc } */

>       tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);

>

Patch

diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 94d80d79d1..2a9ebfe25a 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -1745,24 +1745,18 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 #endif
 }
 
-static tcg_insn_unit *tb_ret_addr;
+static void tcg_out_epilogue(TCGContext *s);
 
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
-                const TCGArg *args, const int *const_args)
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+                       const TCGArg *args, const int *const_args)
 {
     TCGArg a0, a1, a2, a3, a4, a5;
     int c;
 
     switch (opc) {
     case INDEX_op_exit_tb:
-        /* Reuse the zeroing that exists for goto_ptr.  */
-        a0 = args[0];
-        if (a0 == 0) {
-            tcg_out_goto(s, COND_AL, s->code_gen_epilogue);
-        } else {
-            tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
-            tcg_out_goto(s, COND_AL, tb_ret_addr);
-        }
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]);
+        tcg_out_epilogue(s);
         break;
     case INDEX_op_goto_tb:
         {
@@ -2284,19 +2278,17 @@  static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
       + TCG_TARGET_STACK_ALIGN - 1) \
      & -TCG_TARGET_STACK_ALIGN)
 
+#define STACK_ADDEND  (FRAME_SIZE - PUSH_SIZE)
+
 static void tcg_target_qemu_prologue(TCGContext *s)
 {
-    int stack_addend;
-
     /* Calling convention requires us to save r4-r11 and lr.  */
     /* stmdb sp!, { r4 - r11, lr } */
     tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
 
     /* Reserve callee argument and tcg temp space.  */
-    stack_addend = FRAME_SIZE - PUSH_SIZE;
-
     tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
-                   TCG_REG_CALL_STACK, stack_addend, 1);
+                   TCG_REG_CALL_STACK, STACK_ADDEND, 1);
     tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
                   CPU_TEMP_BUF_NLONGS * sizeof(long));
 
@@ -2310,11 +2302,13 @@  static void tcg_target_qemu_prologue(TCGContext *s)
      */
     s->code_gen_epilogue = s->code_ptr;
     tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
+    tcg_out_epilogue(s);
+}
 
-    /* TB epilogue */
-    tb_ret_addr = s->code_ptr;
+static void tcg_out_epilogue(TCGContext *s)
+{
     tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
-                   TCG_REG_CALL_STACK, stack_addend, 1);
+                   TCG_REG_CALL_STACK, STACK_ADDEND, 1);
 
     /* ldmia sp!, { r4 - r11, pc } */
     tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);