diff mbox series

[64/77] target/microblaze: Convert mbar to decodetree

Message ID 20200825205950.730499-65-richard.henderson@linaro.org
State New
Headers show
Series target/microblaze improvements | expand

Commit Message

Richard Henderson Aug. 25, 2020, 8:59 p.m. UTC
Split this out of the normal branch instructions, as it requires
special handling.  End the TB only for an instruction barrier.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 target/microblaze/insns.decode |  2 +
 target/microblaze/translate.c  | 81 ++++++++++++++++++----------------
 2 files changed, 45 insertions(+), 38 deletions(-)

-- 
2.25.1

Comments

Edgar E. Iglesias Aug. 27, 2020, 9:24 a.m. UTC | #1
On Tue, Aug 25, 2020 at 01:59:37PM -0700, Richard Henderson wrote:
> Split this out of the normal branch instructions, as it requires

> special handling.  End the TB only for an instruction barrier.

> 

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  target/microblaze/insns.decode |  2 +

>  target/microblaze/translate.c  | 81 ++++++++++++++++++----------------

>  2 files changed, 45 insertions(+), 38 deletions(-)

> 

> diff --git a/target/microblaze/insns.decode b/target/microblaze/insns.decode

> index 53da2b75aa..77b073be9e 100644

> --- a/target/microblaze/insns.decode

> +++ b/target/microblaze/insns.decode

> @@ -124,6 +124,8 @@ lwea            110010 ..... ..... ..... 0001 000 0000  @typea

>  lwx             110010 ..... ..... ..... 1000 000 0000  @typea

>  lwi             111010 ..... ..... ................     @typeb

>  

> +mbar            101110 imm:5 00010 0000 0000 0000 0100

> +

>  mul             010000 ..... ..... ..... 000 0000 0000  @typea

>  mulh            010000 ..... ..... ..... 000 0000 0001  @typea

>  mulhu           010000 ..... ..... ..... 000 0000 0011  @typea

> diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c

> index fc1c661368..a391e80fb9 100644

> --- a/target/microblaze/translate.c

> +++ b/target/microblaze/translate.c

> @@ -1166,6 +1166,48 @@ static bool trans_brki(DisasContext *dc, arg_typeb_br *arg)

>      return true;

>  }

>  

> +static bool trans_mbar(DisasContext *dc, arg_mbar *arg)

> +{

> +    int mbar_imm = arg->imm;

> +

> +    /*

> +     * Instruction access memory barrier.

> +     * End the TB so that we recognize self-modified code immediately.

> +     */

> +    if (mbar_imm & 1) {

> +        dc->cpustate_changed = 1;

> +    }


This should be (mbar_imm & 1) == 0
But even with that fixed it fails some of our tests because interrupts
that should become visible within a couple of cycles after a
memory data barrier can now be delayed longer.

I think we should always break the TB.

> +    /* Data access memory barrier.  */

> +    if (mbar_imm & 2) {

> +        tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);

> +    }


This should be (mbar_imm & 2) == 0


> +

> +    /* Sleep. */

> +    if (mbar_imm & 16) {

> +        TCGv_i32 tmp_1;

> +

> +        if (trap_userspace(dc, true)) {

> +            /* Sleep is a privileged instruction.  */

> +            return true;

> +        }

> +

> +        t_sync_flags(dc);

> +

> +        tmp_1 = tcg_const_i32(1);

> +        tcg_gen_st_i32(tmp_1, cpu_env,

> +                       -offsetof(MicroBlazeCPU, env)

> +                       +offsetof(CPUState, halted));

> +        tcg_temp_free_i32(tmp_1);

> +

> +        tcg_gen_movi_i32(cpu_pc, dc->base.pc_next + 4);

> +

> +        gen_raise_exception(dc, EXCP_HLT);

> +    }

> +    return true;

> +}

> +

> +

>  static void msr_read(DisasContext *dc, TCGv_i32 d)

>  {

>      TCGv_i32 t;

> @@ -1447,50 +1489,13 @@ static void dec_bcc(DisasContext *dc)

>  

>  static void dec_br(DisasContext *dc)

>  {

> -    unsigned int dslot, link, abs, mbar;

> +    unsigned int dslot, link, abs;

>      uint32_t add_pc;

>  

>      dslot = dc->ir & (1 << 20);

>      abs = dc->ir & (1 << 19);

>      link = dc->ir & (1 << 18);

>  

> -    /* Memory barrier.  */

> -    mbar = (dc->ir >> 16) & 31;

> -    if (mbar == 2 && dc->imm == 4) {

> -        uint16_t mbar_imm = dc->rd;

> -

> -        /* Data access memory barrier.  */

> -        if ((mbar_imm & 2) == 0) {

> -            tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);

> -        }

> -

> -        /* mbar IMM & 16 decodes to sleep.  */

> -        if (mbar_imm & 16) {

> -            TCGv_i32 tmp_1;

> -

> -            if (trap_userspace(dc, true)) {

> -                /* Sleep is a privileged instruction.  */

> -                return;

> -            }

> -

> -            t_sync_flags(dc);

> -

> -            tmp_1 = tcg_const_i32(1);

> -            tcg_gen_st_i32(tmp_1, cpu_env,

> -                           -offsetof(MicroBlazeCPU, env)

> -                           +offsetof(CPUState, halted));

> -            tcg_temp_free_i32(tmp_1);

> -

> -            tcg_gen_movi_i32(cpu_pc, dc->base.pc_next + 4);

> -

> -            gen_raise_exception(dc, EXCP_HLT);

> -            return;

> -        }

> -        /* Break the TB.  */

> -        dc->cpustate_changed = 1;

> -        return;

> -    }

> -

>      if (dslot && dec_setup_dslot(dc)) {

>          return;

>      }

> -- 

> 2.25.1

>
Richard Henderson Aug. 27, 2020, 9:58 a.m. UTC | #2
On 8/27/20 2:24 AM, Edgar E. Iglesias wrote:
>> +    /*

>> +     * Instruction access memory barrier.

>> +     * End the TB so that we recognize self-modified code immediately.

>> +     */

>> +    if (mbar_imm & 1) {

>> +        dc->cpustate_changed = 1;

>> +    }

> 

> This should be (mbar_imm & 1) == 0

> But even with that fixed it fails some of our tests because interrupts

> that should become visible within a couple of cycles after a

> memory data barrier can now be delayed longer.

> 

> I think we should always break the TB.


Ok.  I assume this follows a write to something like an interrupt controller
register?

> 

>> +    /* Data access memory barrier.  */

>> +    if (mbar_imm & 2) {

>> +        tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);

>> +    }

> 

> This should be (mbar_imm & 2) == 0


Oops.  ;-)

Applying the following incremental patch.


r~


diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
index a391e80fb9..1e2bb529ac 100644
--- a/target/microblaze/translate.c
+++ b/target/microblaze/translate.c
@@ -1170,16 +1170,8 @@ static bool trans_mbar(DisasContext *dc, arg_mbar *arg)
 {
     int mbar_imm = arg->imm;

-    /*
-     * Instruction access memory barrier.
-     * End the TB so that we recognize self-modified code immediately.
-     */
-    if (mbar_imm & 1) {
-        dc->cpustate_changed = 1;
-    }
-
     /* Data access memory barrier.  */
-    if (mbar_imm & 2) {
+    if ((mbar_imm & 2) == 0) {
         tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
     }

@@ -1204,6 +1196,19 @@ static bool trans_mbar(DisasContext *dc, arg_mbar *arg)

         gen_raise_exception(dc, EXCP_HLT);
     }
+
+    /*
+     * If !(mbar_imm & 1), this is an instruction access memory barrier
+     * and we need to end the TB so that we recognize self-modified
+     * code immediately.
+     *
+     * However, there are some data mbars that need the TB break
+     * (and return to main loop) to recognize interrupts right away.
+     * E.g. recognizing a change to an interrupt controller register.
+     *
+     * Therefore, choose to end the TB always.
+     */
+    dc->cpustate_changed = 1;
     return true;
 }
Edgar E. Iglesias Aug. 27, 2020, 10:08 a.m. UTC | #3
On Thu, Aug 27, 2020 at 02:58:06AM -0700, Richard Henderson wrote:
> On 8/27/20 2:24 AM, Edgar E. Iglesias wrote:

> >> +    /*

> >> +     * Instruction access memory barrier.

> >> +     * End the TB so that we recognize self-modified code immediately.

> >> +     */

> >> +    if (mbar_imm & 1) {

> >> +        dc->cpustate_changed = 1;

> >> +    }

> > 

> > This should be (mbar_imm & 1) == 0

> > But even with that fixed it fails some of our tests because interrupts

> > that should become visible within a couple of cycles after a

> > memory data barrier can now be delayed longer.

> > 

> > I think we should always break the TB.

> 

> Ok.  I assume this follows a write to something like an interrupt controller

> register?


yes, kind of. It's a memory store to a device that raises an interrupt as a
result of that store. The interrupt propagates to the CPU and on real HW if
the pipeline depth of the core is small, it gets taken within a couple of
cycles after the barrier completes. In QEMU, that delay can become very long
if we don't break the TB.

Architecturally, it would be wrong to make such assumptions about the pipeline
but there's code out there already..



> 

> > 

> >> +    /* Data access memory barrier.  */

> >> +    if (mbar_imm & 2) {

> >> +        tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);

> >> +    }

> > 

> > This should be (mbar_imm & 2) == 0

> 

> Oops.  ;-)

> 

> Applying the following incremental patch.


Thanks! With your incremental patch:

Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>




> 

> 

> r~

> 

> 

> diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c

> index a391e80fb9..1e2bb529ac 100644

> --- a/target/microblaze/translate.c

> +++ b/target/microblaze/translate.c

> @@ -1170,16 +1170,8 @@ static bool trans_mbar(DisasContext *dc, arg_mbar *arg)

>  {

>      int mbar_imm = arg->imm;

> 

> -    /*

> -     * Instruction access memory barrier.

> -     * End the TB so that we recognize self-modified code immediately.

> -     */

> -    if (mbar_imm & 1) {

> -        dc->cpustate_changed = 1;

> -    }

> -

>      /* Data access memory barrier.  */

> -    if (mbar_imm & 2) {

> +    if ((mbar_imm & 2) == 0) {

>          tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);

>      }

> 

> @@ -1204,6 +1196,19 @@ static bool trans_mbar(DisasContext *dc, arg_mbar *arg)

> 

>          gen_raise_exception(dc, EXCP_HLT);

>      }

> +

> +    /*

> +     * If !(mbar_imm & 1), this is an instruction access memory barrier

> +     * and we need to end the TB so that we recognize self-modified

> +     * code immediately.

> +     *

> +     * However, there are some data mbars that need the TB break

> +     * (and return to main loop) to recognize interrupts right away.

> +     * E.g. recognizing a change to an interrupt controller register.

> +     *

> +     * Therefore, choose to end the TB always.

> +     */

> +    dc->cpustate_changed = 1;

>      return true;

>  }

>
Richard Henderson Aug. 27, 2020, 11:11 a.m. UTC | #4
On 8/27/20 3:08 AM, Edgar E. Iglesias wrote:
>> Ok.  I assume this follows a write to something like an interrupt controller

>> register?

> 

> yes, kind of. It's a memory store to a device that raises an interrupt as a

> result of that store. The interrupt propagates to the CPU and on real HW if

> the pipeline depth of the core is small, it gets taken within a couple of

> cycles after the barrier completes. In QEMU, that delay can become very long

> if we don't break the TB.


Ok, yeah, same idea.  The data store alters the set of interrupts pending.

> Architecturally, it would be wrong to make such assumptions about the pipeline

> but there's code out there already..


Yes indeed.


r~
diff mbox series

Patch

diff --git a/target/microblaze/insns.decode b/target/microblaze/insns.decode
index 53da2b75aa..77b073be9e 100644
--- a/target/microblaze/insns.decode
+++ b/target/microblaze/insns.decode
@@ -124,6 +124,8 @@  lwea            110010 ..... ..... ..... 0001 000 0000  @typea
 lwx             110010 ..... ..... ..... 1000 000 0000  @typea
 lwi             111010 ..... ..... ................     @typeb
 
+mbar            101110 imm:5 00010 0000 0000 0000 0100
+
 mul             010000 ..... ..... ..... 000 0000 0000  @typea
 mulh            010000 ..... ..... ..... 000 0000 0001  @typea
 mulhu           010000 ..... ..... ..... 000 0000 0011  @typea
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
index fc1c661368..a391e80fb9 100644
--- a/target/microblaze/translate.c
+++ b/target/microblaze/translate.c
@@ -1166,6 +1166,48 @@  static bool trans_brki(DisasContext *dc, arg_typeb_br *arg)
     return true;
 }
 
+static bool trans_mbar(DisasContext *dc, arg_mbar *arg)
+{
+    int mbar_imm = arg->imm;
+
+    /*
+     * Instruction access memory barrier.
+     * End the TB so that we recognize self-modified code immediately.
+     */
+    if (mbar_imm & 1) {
+        dc->cpustate_changed = 1;
+    }
+
+    /* Data access memory barrier.  */
+    if (mbar_imm & 2) {
+        tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
+    }
+
+    /* Sleep. */
+    if (mbar_imm & 16) {
+        TCGv_i32 tmp_1;
+
+        if (trap_userspace(dc, true)) {
+            /* Sleep is a privileged instruction.  */
+            return true;
+        }
+
+        t_sync_flags(dc);
+
+        tmp_1 = tcg_const_i32(1);
+        tcg_gen_st_i32(tmp_1, cpu_env,
+                       -offsetof(MicroBlazeCPU, env)
+                       +offsetof(CPUState, halted));
+        tcg_temp_free_i32(tmp_1);
+
+        tcg_gen_movi_i32(cpu_pc, dc->base.pc_next + 4);
+
+        gen_raise_exception(dc, EXCP_HLT);
+    }
+    return true;
+}
+
+
 static void msr_read(DisasContext *dc, TCGv_i32 d)
 {
     TCGv_i32 t;
@@ -1447,50 +1489,13 @@  static void dec_bcc(DisasContext *dc)
 
 static void dec_br(DisasContext *dc)
 {
-    unsigned int dslot, link, abs, mbar;
+    unsigned int dslot, link, abs;
     uint32_t add_pc;
 
     dslot = dc->ir & (1 << 20);
     abs = dc->ir & (1 << 19);
     link = dc->ir & (1 << 18);
 
-    /* Memory barrier.  */
-    mbar = (dc->ir >> 16) & 31;
-    if (mbar == 2 && dc->imm == 4) {
-        uint16_t mbar_imm = dc->rd;
-
-        /* Data access memory barrier.  */
-        if ((mbar_imm & 2) == 0) {
-            tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
-        }
-
-        /* mbar IMM & 16 decodes to sleep.  */
-        if (mbar_imm & 16) {
-            TCGv_i32 tmp_1;
-
-            if (trap_userspace(dc, true)) {
-                /* Sleep is a privileged instruction.  */
-                return;
-            }
-
-            t_sync_flags(dc);
-
-            tmp_1 = tcg_const_i32(1);
-            tcg_gen_st_i32(tmp_1, cpu_env,
-                           -offsetof(MicroBlazeCPU, env)
-                           +offsetof(CPUState, halted));
-            tcg_temp_free_i32(tmp_1);
-
-            tcg_gen_movi_i32(cpu_pc, dc->base.pc_next + 4);
-
-            gen_raise_exception(dc, EXCP_HLT);
-            return;
-        }
-        /* Break the TB.  */
-        dc->cpustate_changed = 1;
-        return;
-    }
-
     if (dslot && dec_setup_dslot(dc)) {
         return;
     }