[v2,12/29] tcg/aarch64: Implement tcg_out_dupm_vec

Message ID	20190501050536.15580-13-richard.henderson@linaro.org
State	Superseded
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of qemu-devel-bounces+patch=linaro.org@nongnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; From: Richard Henderson <richard.henderson@linaro.org> To: qemu-devel@nongnu.org Date: Tue, 30 Apr 2019 22:05:19 -0700 Message-Id: <20190501050536.15580-13-richard.henderson@linaro.org> In-Reply-To: <20190501050536.15580-1-richard.henderson@linaro.org> References: <20190501050536.15580-1-richard.henderson@linaro.org> Subject: [Qemu-devel] [PATCH v2 12/29] tcg/aarch64: Implement tcg_out_dupm_vec Precedence: list Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org Sender: "Qemu-devel" <qemu-devel-bounces+patch=linaro.org@nongnu.org>
Series	tcg vector improvements \| expand [v2,00/29] tcg vector improvements [v2,01/29] tcg: Implement tcg_gen_gvec_3i() [v2,02/29] tcg: Do not recreate INDEX_op_neg_vec unless supported [v2,03/29] tcg: Allow add_vec, sub_vec, neg_vec, not_vec to be expanded [v2,04/29] tcg: Specify optional vector requirements with a list [v2,05/29] tcg: Assert fixed_reg is read-only [v2,06/29] tcg: Return bool success from tcg_out_mov [v2,07/29] tcg: Support cross-class moves without instruction support [v2,08/29] tcg: Promote tcg_out_{dup, dupi}_vec to backend interface [v2,09/29] tcg: Manually expand INDEX_op_dup_vec [v2,10/29] tcg: Add tcg_out_dupm_vec to the backend interface [v2,11/29] tcg/i386: Implement tcg_out_dupm_vec [v2,12/29] tcg/aarch64: Implement tcg_out_dupm_vec [v2,13/29] tcg: Add INDEX_op_dup_mem_vec [v2,14/29] tcg: Add gvec expanders for variable shift [v2,15/29] tcg/i386: Support vector variable shift opcodes [v2,16/29] tcg/aarch64: Support vector variable shift opcodes [v2,17/29] tcg: Add gvec expanders for vector shift by scalar [v2,18/29] tcg/i386: Support vector scalar shift opcodes [v2,19/29] tcg: Add support for integer absolute value [v2,20/29] tcg: Add support for vector absolute value [v2,21/29] tcg/i386: Support vector absolute value [v2,22/29] tcg/aarch64: Support vector absolute value [v2,23/29] target/arm: Use tcg_gen_abs_i64 and tcg_gen_gvec_abs [v2,24/29] target/cris: Use tcg_gen_abs_tl [v2,25/29] target/ppc: Use tcg_gen_abs_i32 [v2,26/29] target/ppc: Use tcg_gen_abs_tl [v2,27/29] target/s390x: Use tcg_gen_abs_i64 [v2,28/29] target/tricore: Use tcg_gen_abs_tl [v2,29/29] target/xtensa: Use tcg_gen_abs_i32

Message ID

20190501050536.15580-13-richard.henderson@linaro.org

State

Superseded

Headers

Received-SPF: pass (google.com: domain of
	qemu-devel-bounces+patch=linaro.org@nongnu.org designates
	209.51.188.17 as permitted sender) client-ip=209.51.188.17; 
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Date: Tue, 30 Apr 2019 22:05:19 -0700
Message-Id: <20190501050536.15580-13-richard.henderson@linaro.org>
In-Reply-To: <20190501050536.15580-1-richard.henderson@linaro.org>
References: <20190501050536.15580-1-richard.henderson@linaro.org>
Subject: [Qemu-devel] [PATCH v2 12/29] tcg/aarch64: Implement
	tcg_out_dupm_vec
Precedence: list
Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org
Sender: "Qemu-devel" <qemu-devel-bounces+patch=linaro.org@nongnu.org>

Series

tcg vector improvements | expand

Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- tcg/aarch64/tcg-target.inc.c | 38 ++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) -- 2.17.1

Comments

Alex Bennée May 2, 2019, 1:26 p.m. UTC | #1

Richard Henderson <richard.henderson@linaro.org> writes:

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  tcg/aarch64/tcg-target.inc.c | 38 ++++++++++++++++++++++++++++++++++--

>  1 file changed, 36 insertions(+), 2 deletions(-)

>

> diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c

> index 4a3cfa778a..411fb463ac 100644

> --- a/tcg/aarch64/tcg-target.inc.c

> +++ b/tcg/aarch64/tcg-target.inc.c

> @@ -381,6 +381,9 @@ typedef enum {

>      I3207_BLR       = 0xd63f0000,

>      I3207_RET       = 0xd65f0000,

>

> +    /* AdvSIMD load/store single structure.  */

> +    I3303_LD1R      = 0x0d40c000,

> +


I can't recall where these magic numbers come from again? The (moving)
section numbers of the ARM ARM?

I was hoping the XML had a bit more guidance on the encoding names but
we get:

    <iclass name="No offset" oneof="2" id="LD1R_asisdlso_R1" no_encodings="1" isa="A64">
and
    <iclass name="Post-index" oneof="2" id="as_post_index" no_encodings="2" isa="A64">

Although the instruction does have:

   <instructionsection id="LD1R_advsimd" title="LD1R -- A64" type="instruction">


>      /* Load literal for loading the address at pc-relative offset */

>      I3305_LDR       = 0x58000000,

>      I3305_LDR_v64   = 0x5c000000,

> @@ -414,6 +417,8 @@ typedef enum {

>      I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,

>      I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,

>

> +

> +

>      I3312_TO_I3310  = 0x00200800,

>      I3312_TO_I3313  = 0x01000000,

>

> @@ -566,7 +571,14 @@ static inline uint32_t tcg_in32(TCGContext *s)

>  #define tcg_out_insn(S, FMT, OP, ...) \

>      glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ##

> __VA_ARGS__)


The above is basically a winge as to what do we really get out of this
"type checking"?

>

> -static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)

> +static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,

> +                              TCGReg rt, TCGReg rn, unsigned size)

> +{

> +    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));

> +}

> +

> +static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,

> +                              int imm19, TCGReg rt)

>  {

>      tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);

>  }

> @@ -825,7 +837,29 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,

>  static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,

>                               TCGReg r, TCGReg base, intptr_t offset)

>  {

> -    return false;

> +    if (offset != 0) {

> +        AArch64Insn add_insn = I3401_ADDI;

> +        TCGReg temp = TCG_REG_TMP;

> +

> +        if (offset < 0) {

> +            add_insn = I3401_SUBI;

> +            offset = -offset;

> +        }

> +        if (offset <= 0xfff) {

> +            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset);

> +        } else if (offset <= 0xffffff) {

> +            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);

> +            if (offset & 0xfff) {

> +                tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);

> +            }

> +        } else {

> +            tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);

> +            tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);

> +        }

> +        base = temp;

> +    }

> +    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);

> +    return true;

>  }

>

>  static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,



--
Alex Bennée

Richard Henderson May 2, 2019, 3:35 p.m. UTC | #2

On 5/2/19 6:26 AM, Alex Bennée wrote:
>> +    /* AdvSIMD load/store single structure.  */

>> +    I3303_LD1R      = 0x0d40c000,

>> +

> 

> I can't recall where these magic numbers come from again? The (moving)

> section numbers of the ARM ARM?

They come from the A_a version of the ARM ARM.

The current D_a version has now even removed the section numbers, instead of
slowly modifying them as they did through B and C revisions.

> I was hoping the XML had a bit more guidance on the encoding names but

> we get:

Yeah, ARM doesn't name these at all.

I have wondered if they are adverse to naming encodings, because if they had to
name them all they would feel constrained to not invent so many strange
encodings.  ;-)

> The above is basically a winge as to what do we really get out of this

> "type checking"?

Well, ignore the "type checking" for a moment.

How would you distinguish all of the different encoding functions?
Or would you just open-code every single instruction like we do in
tcg/arm/ and tcg/ppc/?

Let me know if you come up with a scheme that works better than this.

r~

diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 4a3cfa778a..411fb463ac 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -381,6 +381,9 @@  typedef enum {
     I3207_BLR       = 0xd63f0000,
     I3207_RET       = 0xd65f0000,
 
+    /* AdvSIMD load/store single structure.  */
+    I3303_LD1R      = 0x0d40c000,
+
     /* Load literal for loading the address at pc-relative offset */
     I3305_LDR       = 0x58000000,
     I3305_LDR_v64   = 0x5c000000,
@@ -414,6 +417,8 @@  typedef enum {
     I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
     I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
 
+
+
     I3312_TO_I3310  = 0x00200800,
     I3312_TO_I3313  = 0x01000000,
 
@@ -566,7 +571,14 @@  static inline uint32_t tcg_in32(TCGContext *s)
 #define tcg_out_insn(S, FMT, OP, ...) \
     glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
 
-static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
+static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
+                              TCGReg rt, TCGReg rn, unsigned size)
+{
+    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
+}
+
+static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
+                              int imm19, TCGReg rt)
 {
     tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
 }
@@ -825,7 +837,29 @@  static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
                              TCGReg r, TCGReg base, intptr_t offset)
 {
-    return false;
+    if (offset != 0) {
+        AArch64Insn add_insn = I3401_ADDI;
+        TCGReg temp = TCG_REG_TMP;
+
+        if (offset < 0) {
+            add_insn = I3401_SUBI;
+            offset = -offset;
+        }
+        if (offset <= 0xfff) {
+            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset);
+        } else if (offset <= 0xffffff) {
+            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
+            if (offset & 0xfff) {
+                tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
+            }
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
+            tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
+        }
+        base = temp;
+    }
+    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
+    return true;
 }
 
 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,

[v2,12/29] tcg/aarch64: Implement tcg_out_dupm_vec

Commit Message

Comments

Patch