diff mbox

[2/3] target-arm: Code changes to implement overwrite of tag field on PC load

Message ID 1474047287-145701-3-git-send-email-thomas.hanson@linaro.org
State New
Headers show

Commit Message

Tom Hanson Sept. 16, 2016, 5:34 p.m. UTC
gen_intermediate_code_a64() transfers TBI values from TB->flags to
DisasContext structure.

disas_uncond_b_reg() calls new function gen_a64_set_pc_reg() to handle BR,
BLR and RET instructions.

gen_a64_set_pc_reg() implements all of the required checks and overwiting
logic to clean up the tag field of an address before loading the PC.
Currently only called in one place, but will be used in the future to
handle arithmetic overflow cases with 56-bit addresses.  (See following
patch.)

Signed-off-by: Thomas Hanson <thomas.hanson@linaro.org>

---
 target-arm/translate-a64.c | 67 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 62 insertions(+), 5 deletions(-)

-- 
1.9.1

Comments

Peter Maydell Sept. 30, 2016, 1:24 a.m. UTC | #1
On 16 September 2016 at 10:34, Thomas Hanson <thomas.hanson@linaro.org> wrote:
> gen_intermediate_code_a64() transfers TBI values from TB->flags to

> DisasContext structure.

>

> disas_uncond_b_reg() calls new function gen_a64_set_pc_reg() to handle BR,

> BLR and RET instructions.

>

> gen_a64_set_pc_reg() implements all of the required checks and overwiting

> logic to clean up the tag field of an address before loading the PC.

> Currently only called in one place, but will be used in the future to

> handle arithmetic overflow cases with 56-bit addresses.  (See following

> patch.)


I've cc'd RTH in case he wants to comment on the TCG op sequences
we're generating here.

Same commit message style remarks as patch 1.

> Signed-off-by: Thomas Hanson <thomas.hanson@linaro.org>

> ---

>  target-arm/translate-a64.c | 67 ++++++++++++++++++++++++++++++++++++++++++----

>  1 file changed, 62 insertions(+), 5 deletions(-)

>

> diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c

> index f5e29d2..4d6f951 100644

> --- a/target-arm/translate-a64.c

> +++ b/target-arm/translate-a64.c

> @@ -41,6 +41,7 @@ static TCGv_i64 cpu_pc;

>

>  /* Load/store exclusive handling */

>  static TCGv_i64 cpu_exclusive_high;

> +static TCGv_i64 cpu_reg(DisasContext *s, int reg);

>

>  static const char *regnames[] = {

>      "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",

> @@ -176,6 +177,58 @@ void gen_a64_set_pc_im(uint64_t val)

>      tcg_gen_movi_i64(cpu_pc, val);

>  }

>

> +void gen_a64_set_pc_reg(DisasContext *s, unsigned int rn)


I think it would be better to take a TCGv_i64 here rather than
unsigned int rn (ie have the caller do the cpu_reg(s, rn)).
(You probably don't need that prototype of cpu_reg() above if
you do this, though that's not why it's better.)

> +{


This could use a brief comment explaining the semantics we are
implementing here, something like:

    /* Load the PC from a register.
     *
     * If address tagging is enabled via the TCR TBI bits, then loading
     * an address into the PC will clear out any tag in the it:
     *  + for EL2 and EL3 there is only one TBI bit, and if it is set
     *    then the address is zero-extended, clearing bits [63:56]
     *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
     *    and TBI1 controls addressses with bit 55 == 1.
     *    If the appropriate TBI bit is set for the address then
     *    the address is sign-extended from bit 55 into bits [63:56]
     *
     * We can avoid doing this for relative-branches, because the
     * PC + offset can never overflow into the tag bits (assuming
     * that virtual addresses are less than 56 bits wide, as they
     * are currently), but we must handle it for branch-to-register.
     */

> +    if (s->current_el <= 1) {

> +        /* Test if NEITHER or BOTH TBI values are set.  If so, no need to

> +         * examine bit 55 of address, can just generate code.

> +         * If mixed, then test via generated code

> +         */

> +        if (s->tbi0 && s->tbi1) {

> +            TCGv_i64 tmp_reg = tcg_temp_new_i64();

> +            /* Both bits set, just fix it */


Rather than "just fix it", we should say "always sign extend from
bit 55 into [63:56]".

> +            tcg_gen_shli_i64(tmp_reg, cpu_reg(s, rn), 8);

> +            tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);

> +            tcg_temp_free_i64(tmp_reg);

> +        } else if (!s->tbi0 && !s->tbi1) {

> +            /* Neither bit set, just load it as-is */

> +            tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));

> +        } else {

> +            TCGv_i64 tcg_tmpval = tcg_temp_new_i64();

> +            TCGv_i64 tcg_bit55  = tcg_temp_new_i64();

> +            TCGv_i64 tcg_zero   = tcg_const_i64(0);

> +

> +            tcg_gen_andi_i64(tcg_bit55, cpu_reg(s, rn), (1ull << 55));

> +

> +            if (s->tbi0) {

> +                /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */

> +                tcg_gen_andi_i64(tcg_tmpval, cpu_reg(s, rn),

> +                                 0x00FFFFFFFFFFFFFFull);

> +                tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,

> +                                    tcg_tmpval, cpu_reg(s, rn));

> +            } else {

> +                /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */

> +                tcg_gen_ori_i64(tcg_tmpval, cpu_reg(s, rn),

> +                                0xFF00000000000000ull);

> +                tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,

> +                                    tcg_tmpval, cpu_reg(s, rn));

> +            }

> +            tcg_temp_free_i64(tcg_zero);

> +            tcg_temp_free_i64(tcg_bit55);

> +            tcg_temp_free_i64(tcg_tmpval);

> +        }

> +    } else {  /* EL > 1 */

> +        if (s->tbi0) {

> +            /* Force tag byte to all zero */

> +            tcg_gen_andi_i64(cpu_pc, cpu_reg(s, rn), 0x00FFFFFFFFFFFFFFull);

> +        } else {

> +            /* Load unmodified address */

> +            tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));

> +        }

> +     }

> +


Stray blank line.

> +}

> +

>  typedef struct DisasCompare64 {

>      TCGCond cond;

>      TCGv_i64 value;

> @@ -1691,12 +1744,14 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)

>

>      switch (opc) {

>      case 0: /* BR */

> -    case 2: /* RET */

> -        tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));

> -        break;

>      case 1: /* BLR */

> -        tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));

> -        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);

> +    case 2: /* RET */

> +        /* Check for tagged addresses and generate appropriate code */


This comment isn't really necessary I think.

> +        gen_a64_set_pc_reg(s, rn);

> +        /* BLR also needs to load return address */

> +        if (opc == 1) {

> +            tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);

> +        }

>          break;

>      case 4: /* ERET */

>          if (s->current_el == 0) {

> @@ -11150,6 +11205,8 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)

>      dc->condexec_mask = 0;

>      dc->condexec_cond = 0;

>      dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);

> +    dc->tbi0 = ARM_TBFLAG_TBI0(tb->flags);

> +    dc->tbi1 = ARM_TBFLAG_TBI1(tb->flags);


I think these two lines would be better in the previous commit.

>      dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);

>  #if !defined(CONFIG_USER_ONLY)

>      dc->user = (dc->current_el == 0);

> --

> 1.9.1

>


thanks
-- PMM
Tom Hanson Oct. 5, 2016, 9:53 p.m. UTC | #2
On 09/29/2016 07:24 PM, Peter Maydell wrote:
> On 16 September 2016 at 10:34, Thomas Hanson <thomas.hanson@linaro.org> wrote:

...
>> diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c

>> index f5e29d2..4d6f951 100644

...
>> @@ -176,6 +177,58 @@ void gen_a64_set_pc_im(uint64_t val)

>>      tcg_gen_movi_i64(cpu_pc, val);

>>  }

>>

>> +void gen_a64_set_pc_reg(DisasContext *s, unsigned int rn)

> 

> I think it would be better to take a TCGv_i64 here rather than

> unsigned int rn (ie have the caller do the cpu_reg(s, rn)).

> (You probably don't need that prototype of cpu_reg() above if

> you do this, though that's not why it's better.)

> 

Why would this be better? 

To me, the caller has a register number and wants that register used to load 
the PC.  So, it passes in the register number.  

The fact that gen_a64_set_pc_reg() needs to convert that into a TCGv_i64 is
an implementation detail that should be encapsulated/hidden from the caller.

If the desire is to eliminate the multiple cpu_reg() calls inside of 
gen_a64_set_pc_reg() then that mapping could be done at the top of the 
function before the outer if().
Peter Maydell Oct. 5, 2016, 10:01 p.m. UTC | #3
On 5 October 2016 at 14:53, Tom Hanson <thomas.hanson@linaro.org> wrote:
> On 09/29/2016 07:24 PM, Peter Maydell wrote:

>> On 16 September 2016 at 10:34, Thomas Hanson <thomas.hanson@linaro.org> wrote:

>>> +void gen_a64_set_pc_reg(DisasContext *s, unsigned int rn)

>>

>> I think it would be better to take a TCGv_i64 here rather than

>> unsigned int rn (ie have the caller do the cpu_reg(s, rn)).

>> (You probably don't need that prototype of cpu_reg() above if

>> you do this, though that's not why it's better.)

>>

> Why would this be better?

>

> To me, the caller has a register number and wants that register used to load

> the PC.  So, it passes in the register number.

>

> The fact that gen_a64_set_pc_reg() needs to convert that into a TCGv_i64 is

> an implementation detail that should be encapsulated/hidden from the caller.

>

> If the desire is to eliminate the multiple cpu_reg() calls inside of

> gen_a64_set_pc_reg() then that mapping could be done at the top of the

> function before the outer if().


It matches the style of the rest of the code which generally
prefers to convert register numbers into TCGv earlier rather
than later (at the level which is doing decode of instruction
bits, rather than inside utility functions), and gives you a
more flexible utility function, which can do a "write value to PC"
for any value, not just something that happens to be in a CPU
register. And as you say it avoids calling cpu_reg() multiple times
as a side benefit.

thanks
-- PMM
Tom Hanson Oct. 11, 2016, 3:51 p.m. UTC | #4
On 5 October 2016 at 16:01, Peter Maydell <peter.maydell@linaro.org> wrote:

> On 5 October 2016 at 14:53, Tom Hanson <thomas.hanson@linaro.org> wrote:

> > On 09/29/2016 07:24 PM, Peter Maydell wrote:

> >> On 16 September 2016 at 10:34, Thomas Hanson <thomas.hanson@linaro.org>

> wrote:

> >>> +void gen_a64_set_pc_reg(DisasContext *s, unsigned int rn)

> >>

> >> I think it would be better to take a TCGv_i64 here rather than

> >> unsigned int rn (ie have the caller do the cpu_reg(s, rn)).

> >> (You probably don't need that prototype of cpu_reg() above if

> >> you do this, though that's not why it's better.)

> >>

> > Why would this be better?

> >

> > To me, the caller has a register number and wants that register used to

> load

> > the PC.  So, it passes in the register number.

> >

> > The fact that gen_a64_set_pc_reg() needs to convert that into a TCGv_i64

> is

> > an implementation detail that should be encapsulated/hidden from the

> caller.

> >

> > If the desire is to eliminate the multiple cpu_reg() calls inside of

> > gen_a64_set_pc_reg() then that mapping could be done at the top of the

> > function before the outer if().

>

> It matches the style of the rest of the code which generally

> prefers to convert register numbers into TCGv earlier rather

> than later (at the level which is doing decode of instruction

> bits, rather than inside utility functions), and gives you a

> more flexible utility function, which can do a "write value to PC"

> for any value, not just something that happens to be in a CPU

> register. And as you say it avoids calling cpu_reg() multiple times

> as a side benefit.

>

> thanks

> -- PMM

>


Sorry,  didn't see this before I submitted v2.  Even now I can't get
Thunderbird to display it.  Sigh.  (Apologies if this comes through with
bars on the side, I have to use the web mail interface in order to be able
to respond.)

This approach seems counter to both structured and OO design principles
which would push common code (like type conversion) down into the lower
level function in order to increase re-use and minimize code duplication.
Those principles suggest that if we need a gen_a64_set_pc_value() function
that can load the PC from something other than a register or an immediate,
then it should be a lower level function than, and be called by,
gen_a64_set_pc_reg().  This also has the benefit of reducing clutter in the
caller, making it more readable and more maintainable.

As a separate issue, we now have functions to load the PC from an immediate
value and from a register.  Where else could we legitimately load the PC
from?
Peter Maydell Oct. 11, 2016, 4:12 p.m. UTC | #5
On 11 October 2016 at 16:51, Thomas Hanson <thomas.hanson@linaro.org> wrote:
> On 5 October 2016 at 16:01, Peter Maydell <peter.maydell@linaro.org> wrote:

>> It matches the style of the rest of the code which generally

>> prefers to convert register numbers into TCGv earlier rather

>> than later (at the level which is doing decode of instruction

>> bits, rather than inside utility functions), and gives you a

>> more flexible utility function, which can do a "write value to PC"

>> for any value, not just something that happens to be in a CPU

>> register. And as you say it avoids calling cpu_reg() multiple times

>> as a side benefit.


> This approach seems counter to both structured and OO design principles

> which would push common code (like type conversion) down into the lower

> level function in order to increase re-use and minimize code duplication.

> Those principles suggest that if we need a gen_a64_set_pc_value() function

> that can load the PC from something other than a register or an immediate,

> then it should be a lower level function than, and be called by,

> gen_a64_set_pc_reg().  This also has the benefit of reducing clutter in the

> caller, making it more readable and more maintainable.


The 'lower level' stuff here has a general pattern of taking either
(1) a TCGv or (2) an integer immediate. We should follow that pattern.

> As a separate issue, we now have functions to load the PC from an immediate

> value and from a register.  Where else could we legitimately load the PC

> from?


Anything where we found ourselves wanting to do some preliminary
manipulation of the value before writing it to the PC.

thanks
-- PMM
Tom Hanson Oct. 12, 2016, 7:52 p.m. UTC | #6
On 10/11/2016 10:12 AM, Peter Maydell wrote:
> On 11 October 2016 at 16:51, Thomas Hanson <thomas.hanson@linaro.org> wrote:

>> On 5 October 2016 at 16:01, Peter Maydell <peter.maydell@linaro.org> wrote:

>>> It matches the style of the rest of the code which generally

>>> prefers to convert register numbers into TCGv earlier rather

>>> than later (at the level which is doing decode of instruction

>>> bits, rather than inside utility functions), and gives you a

>>> more flexible utility function, which can do a "write value to PC"

>>> for any value, not just something that happens to be in a CPU

>>> register. And as you say it avoids calling cpu_reg() multiple times

>>> as a side benefit.

> 

>> This approach seems counter to both structured and OO design principles

>> which would push common code (like type conversion) down into the lower

>> level function in order to increase re-use and minimize code duplication.

>> Those principles suggest that if we need a gen_a64_set_pc_value() function

>> that can load the PC from something other than a register or an immediate,

>> then it should be a lower level function than, and be called by,

>> gen_a64_set_pc_reg().  This also has the benefit of reducing clutter in the

>> caller, making it more readable and more maintainable.

> 

> The 'lower level' stuff here has a general pattern of taking either

> (1) a TCGv or (2) an integer immediate. We should follow that pattern.

> 

>> As a separate issue, we now have functions to load the PC from an immediate

>> value and from a register.  Where else could we legitimately load the PC

>> from?

> 

> Anything where we found ourselves wanting to do some preliminary

> manipulation of the value before writing it to the PC.

> 

> thanks

> -- PMM

> 


I split gen_a64_set_pc_reg() into 2 funtions, upper that takes a register and lower
that takes a variable.  Patch v3 submitted.
diff mbox

Patch

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index f5e29d2..4d6f951 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -41,6 +41,7 @@  static TCGv_i64 cpu_pc;
 
 /* Load/store exclusive handling */
 static TCGv_i64 cpu_exclusive_high;
+static TCGv_i64 cpu_reg(DisasContext *s, int reg);
 
 static const char *regnames[] = {
     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
@@ -176,6 +177,58 @@  void gen_a64_set_pc_im(uint64_t val)
     tcg_gen_movi_i64(cpu_pc, val);
 }
 
+void gen_a64_set_pc_reg(DisasContext *s, unsigned int rn)
+{
+    if (s->current_el <= 1) {
+        /* Test if NEITHER or BOTH TBI values are set.  If so, no need to
+         * examine bit 55 of address, can just generate code.
+         * If mixed, then test via generated code
+         */
+        if (s->tbi0 && s->tbi1) {
+            TCGv_i64 tmp_reg = tcg_temp_new_i64();
+            /* Both bits set, just fix it */
+            tcg_gen_shli_i64(tmp_reg, cpu_reg(s, rn), 8);
+            tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);
+            tcg_temp_free_i64(tmp_reg);
+        } else if (!s->tbi0 && !s->tbi1) {
+            /* Neither bit set, just load it as-is */
+            tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
+        } else {
+            TCGv_i64 tcg_tmpval = tcg_temp_new_i64();
+            TCGv_i64 tcg_bit55  = tcg_temp_new_i64();
+            TCGv_i64 tcg_zero   = tcg_const_i64(0);
+
+            tcg_gen_andi_i64(tcg_bit55, cpu_reg(s, rn), (1ull << 55));
+
+            if (s->tbi0) {
+                /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */
+                tcg_gen_andi_i64(tcg_tmpval, cpu_reg(s, rn),
+                                 0x00FFFFFFFFFFFFFFull);
+                tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,
+                                    tcg_tmpval, cpu_reg(s, rn));
+            } else {
+                /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */
+                tcg_gen_ori_i64(tcg_tmpval, cpu_reg(s, rn),
+                                0xFF00000000000000ull);
+                tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,
+                                    tcg_tmpval, cpu_reg(s, rn));
+            }
+            tcg_temp_free_i64(tcg_zero);
+            tcg_temp_free_i64(tcg_bit55);
+            tcg_temp_free_i64(tcg_tmpval);
+        }
+    } else {  /* EL > 1 */
+        if (s->tbi0) {
+            /* Force tag byte to all zero */
+            tcg_gen_andi_i64(cpu_pc, cpu_reg(s, rn), 0x00FFFFFFFFFFFFFFull);
+        } else {
+            /* Load unmodified address */
+            tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
+        }
+     }
+
+}
+
 typedef struct DisasCompare64 {
     TCGCond cond;
     TCGv_i64 value;
@@ -1691,12 +1744,14 @@  static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
 
     switch (opc) {
     case 0: /* BR */
-    case 2: /* RET */
-        tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
-        break;
     case 1: /* BLR */
-        tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
-        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
+    case 2: /* RET */
+        /* Check for tagged addresses and generate appropriate code */
+        gen_a64_set_pc_reg(s, rn);
+        /* BLR also needs to load return address */
+        if (opc == 1) {
+            tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
+        }
         break;
     case 4: /* ERET */
         if (s->current_el == 0) {
@@ -11150,6 +11205,8 @@  void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
     dc->condexec_mask = 0;
     dc->condexec_cond = 0;
     dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
+    dc->tbi0 = ARM_TBFLAG_TBI0(tb->flags);
+    dc->tbi1 = ARM_TBFLAG_TBI1(tb->flags);
     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
 #if !defined(CONFIG_USER_ONLY)
     dc->user = (dc->current_el == 0);