Message ID | 20240302051601.53649-5-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | target/sparc: Implement VIS4 | expand |
On 02/03/2024 05:15, Richard Henderson wrote: > These instructions have f32 inputs, which changes the decode > of the register numbers. While we're fixing things, use a > common helper for both insns, extracting the 16-bit scalar > in tcg beforehand. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/sparc/helper.h | 3 +-- > target/sparc/translate.c | 38 ++++++++++++++++++++++++++++++---- > target/sparc/vis_helper.c | 43 +++++++++------------------------------ > 3 files changed, 45 insertions(+), 39 deletions(-) > > diff --git a/target/sparc/helper.h b/target/sparc/helper.h > index adc1b87319..9e0b8b463e 100644 > --- a/target/sparc/helper.h > +++ b/target/sparc/helper.h > @@ -93,8 +93,7 @@ DEF_HELPER_FLAGS_2(fqtox, TCG_CALL_NO_WG, s64, env, i128) > > DEF_HELPER_FLAGS_2(fpmerge, TCG_CALL_NO_RWG_SE, i64, i64, i64) > DEF_HELPER_FLAGS_2(fmul8x16, TCG_CALL_NO_RWG_SE, i64, i32, i64) > -DEF_HELPER_FLAGS_2(fmul8x16al, TCG_CALL_NO_RWG_SE, i64, i64, i64) > -DEF_HELPER_FLAGS_2(fmul8x16au, TCG_CALL_NO_RWG_SE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(fmul8x16a, TCG_CALL_NO_RWG_SE, i64, i32, s32) > DEF_HELPER_FLAGS_2(fmul8sux16, TCG_CALL_NO_RWG_SE, i64, i64, i64) > DEF_HELPER_FLAGS_2(fmul8ulx16, TCG_CALL_NO_RWG_SE, i64, i64, i64) > DEF_HELPER_FLAGS_2(fmuld8sux16, TCG_CALL_NO_RWG_SE, i64, i64, i64) > diff --git a/target/sparc/translate.c b/target/sparc/translate.c > index 5144fe4ed9..598cfcf0ac 100644 > --- a/target/sparc/translate.c > +++ b/target/sparc/translate.c > @@ -45,6 +45,7 @@ > # define gen_helper_clear_softint(E, S) qemu_build_not_reached() > # define gen_helper_done(E) qemu_build_not_reached() > # define gen_helper_flushw(E) qemu_build_not_reached() > +# define gen_helper_fmul8x16a(D, S1, S2) qemu_build_not_reached() > # define gen_helper_rdccr(D, E) qemu_build_not_reached() > # define gen_helper_rdcwp(D, E) qemu_build_not_reached() > # define gen_helper_restored(E) qemu_build_not_reached() > @@ -72,8 +73,6 @@ > # define gen_helper_fexpand ({ qemu_build_not_reached(); NULL; }) > # define gen_helper_fmul8sux16 ({ qemu_build_not_reached(); NULL; }) > # define gen_helper_fmul8ulx16 ({ qemu_build_not_reached(); NULL; }) > -# define gen_helper_fmul8x16al ({ qemu_build_not_reached(); NULL; }) > -# define gen_helper_fmul8x16au ({ qemu_build_not_reached(); NULL; }) > # define gen_helper_fmul8x16 ({ qemu_build_not_reached(); NULL; }) > # define gen_helper_fmuld8sux16 ({ qemu_build_not_reached(); NULL; }) > # define gen_helper_fmuld8ulx16 ({ qemu_build_not_reached(); NULL; }) > @@ -719,6 +718,18 @@ static void gen_op_bshuffle(TCGv_i64 dst, TCGv_i64 src1, TCGv_i64 src2) > #endif > } > > +static void gen_op_fmul8x16al(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2) > +{ > + tcg_gen_ext16s_i32(src2, src2); > + gen_helper_fmul8x16a(dst, src1, src2); > +} > + > +static void gen_op_fmul8x16au(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2) > +{ > + tcg_gen_sari_i32(src2, src2, 16); > + gen_helper_fmul8x16a(dst, src1, src2); > +} > + > static void finishing_insn(DisasContext *dc) > { > /* > @@ -4539,6 +4550,27 @@ TRANS(FSUBs, ALL, do_env_fff, a, gen_helper_fsubs) > TRANS(FMULs, ALL, do_env_fff, a, gen_helper_fmuls) > TRANS(FDIVs, ALL, do_env_fff, a, gen_helper_fdivs) > > +static bool do_dff(DisasContext *dc, arg_r_r_r *a, > + void (*func)(TCGv_i64, TCGv_i32, TCGv_i32)) > +{ > + TCGv_i64 dst; > + TCGv_i32 src1, src2; > + > + if (gen_trap_ifnofpu(dc)) { > + return true; > + } > + > + dst = gen_dest_fpr_D(dc, a->rd); > + src1 = gen_load_fpr_F(dc, a->rs1); > + src2 = gen_load_fpr_F(dc, a->rs2); > + func(dst, src1, src2); > + gen_store_fpr_D(dc, a->rd, dst); > + return advance_pc(dc); > +} > + > +TRANS(FMUL8x16AU, VIS1, do_dff, a, gen_op_fmul8x16au) > +TRANS(FMUL8x16AL, VIS1, do_dff, a, gen_op_fmul8x16al) > + > static bool do_dfd(DisasContext *dc, arg_r_r_r *a, > void (*func)(TCGv_i64, TCGv_i32, TCGv_i64)) > { > @@ -4576,8 +4608,6 @@ static bool do_ddd(DisasContext *dc, arg_r_r_r *a, > return advance_pc(dc); > } > > -TRANS(FMUL8x16AU, VIS1, do_ddd, a, gen_helper_fmul8x16au) > -TRANS(FMUL8x16AL, VIS1, do_ddd, a, gen_helper_fmul8x16al) > TRANS(FMUL8SUx16, VIS1, do_ddd, a, gen_helper_fmul8sux16) > TRANS(FMUL8ULx16, VIS1, do_ddd, a, gen_helper_fmul8ulx16) > TRANS(FMULD8SUx16, VIS1, do_ddd, a, gen_helper_fmuld8sux16) > diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c > index 7728ffe9c6..5c7f5536bc 100644 > --- a/target/sparc/vis_helper.c > +++ b/target/sparc/vis_helper.c > @@ -119,43 +119,20 @@ uint64_t helper_fmul8x16(uint32_t src1, uint64_t src2) > return d.ll; > } > > -uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2) > +uint64_t helper_fmul8x16a(uint32_t src1, int32_t src2) > { > - VIS64 s, d; > + VIS32 s; > + VIS64 d; > uint32_t tmp; > > - s.ll = src1; > - d.ll = src2; > + s.l = src1; > + d.ll = 0; > > -#define PMUL(r) \ > - tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \ > - if ((tmp & 0xff) > 0x7f) { \ > - tmp += 0x100; \ > - } \ > - d.VIS_W64(r) = tmp >> 8; > - > - PMUL(0); > - PMUL(1); > - PMUL(2); > - PMUL(3); > -#undef PMUL > - > - return d.ll; > -} > - > -uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2) > -{ > - VIS64 s, d; > - uint32_t tmp; > - > - s.ll = src1; > - d.ll = src2; > - > -#define PMUL(r) \ > - tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \ > - if ((tmp & 0xff) > 0x7f) { \ > - tmp += 0x100; \ > - } \ > +#define PMUL(r) \ > + tmp = src2 * (int32_t)s.VIS_B64(r); \ > + if ((tmp & 0xff) > 0x7f) { \ > + tmp += 0x100; \ > + } \ > d.VIS_W64(r) = tmp >> 8; > > PMUL(0); Hi Richard, This patch is showing a couple of issues after a run through the GitLab pipeline: 1) From checkpatch (https://gitlab.com/mcayland/qemu/-/jobs/6743594359#L44): ERROR: Macros with multiple statements should be enclosed in a do - while loop total: 2 errors, 0 warnings, 130 lines checked 2) From the s390x runners (https://gitlab.com/mcayland/qemu/-/jobs/6743594301#L4792): ../target/sparc/vis_helper.c: In function ‘helper_fmul8x16a’: ../target/sparc/vis_helper.c:46:21: error: array subscript 7 is above array bounds of ‘uint8_t[4]’ {aka ‘unsigned char[4]’} [-Werror=array-bounds] 46 | #define VIS_B64(n) b[7 - (n)] | ^ ../target/sparc/vis_helper.c:133:29: note: in expansion of macro ‘VIS_B64’ 133 | tmp = src2 * (int32_t)s.VIS_B64(r); \ | ^~~~~~~ ../target/sparc/vis_helper.c:139:5: note: in expansion of macro ‘PMUL’ 139 | PMUL(0); | ^~~~ ../target/sparc/vis_helper.c:71:13: note: while referencing ‘b’ 71 | uint8_t b[4]; | ^ ../target/sparc/vis_helper.c:46:21: error: array subscript 6 is above array bounds of ‘uint8_t[4]’ {aka ‘unsigned char[4]’} [-Werror=array-bounds] 46 | #define VIS_B64(n) b[7 - (n)] | ^ ../target/sparc/vis_helper.c:133:29: note: in expansion of macro ‘VIS_B64’ 133 | tmp = src2 * (int32_t)s.VIS_B64(r); \ | ^~~~~~~ ../target/sparc/vis_helper.c:140:5: note: in expansion of macro ‘PMUL’ 140 | PMUL(1); | ^~~~ ../target/sparc/vis_helper.c:71:13: note: while referencing ‘b’ 71 | uint8_t b[4]; | ^ ../target/sparc/vis_helper.c:46:21: error: array subscript 5 is above array bounds of ‘uint8_t[4]’ {aka ‘unsigned char[4]’} [-Werror=array-bounds] 46 | #define VIS_B64(n) b[7 - (n)] | ^ ../target/sparc/vis_helper.c:133:29: note: in expansion of macro ‘VIS_B64’ 133 | tmp = src2 * (int32_t)s.VIS_B64(r); \ | ^~~~~~~ ../target/sparc/vis_helper.c:141:5: note: in expansion of macro ‘PMUL’ 141 | PMUL(2); | ^~~~ ../target/sparc/vis_helper.c:71:13: note: while referencing ‘b’ 71 | uint8_t b[4]; | ^ ../target/sparc/vis_helper.c:46:21: error: array subscript 4 is above array bounds of ‘uint8_t[4]’ {aka ‘unsigned char[4]’} [-Werror=array-bounds] 46 | #define VIS_B64(n) b[7 - (n)] | ^ ../target/sparc/vis_helper.c:133:29: note: in expansion of macro ‘VIS_B64’ 133 | tmp = src2 * (int32_t)s.VIS_B64(r); \ | ^~~~~~~ ../target/sparc/vis_helper.c:142:5: note: in expansion of macro ‘PMUL’ 142 | PMUL(3); | ^~~~ ../target/sparc/vis_helper.c:71:13: note: while referencing ‘b’ 71 | uint8_t b[4]; | ^ cc1: all warnings being treated as errors [4028/5573] Compiling C object libqemu-sparc64-softmmu.fa.p/trace_control-target.c.o [4029/5573] Compiling C object libqemu-sparc64-softmmu.fa.p/target_sparc_translate.c.o ninja: build stopped: subcommand failed. make: *** [Makefile:167: run-ninja] Error 1 ATB, Mark.
diff --git a/target/sparc/helper.h b/target/sparc/helper.h index adc1b87319..9e0b8b463e 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -93,8 +93,7 @@ DEF_HELPER_FLAGS_2(fqtox, TCG_CALL_NO_WG, s64, env, i128) DEF_HELPER_FLAGS_2(fpmerge, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(fmul8x16, TCG_CALL_NO_RWG_SE, i64, i32, i64) -DEF_HELPER_FLAGS_2(fmul8x16al, TCG_CALL_NO_RWG_SE, i64, i64, i64) -DEF_HELPER_FLAGS_2(fmul8x16au, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(fmul8x16a, TCG_CALL_NO_RWG_SE, i64, i32, s32) DEF_HELPER_FLAGS_2(fmul8sux16, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(fmul8ulx16, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(fmuld8sux16, TCG_CALL_NO_RWG_SE, i64, i64, i64) diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 5144fe4ed9..598cfcf0ac 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -45,6 +45,7 @@ # define gen_helper_clear_softint(E, S) qemu_build_not_reached() # define gen_helper_done(E) qemu_build_not_reached() # define gen_helper_flushw(E) qemu_build_not_reached() +# define gen_helper_fmul8x16a(D, S1, S2) qemu_build_not_reached() # define gen_helper_rdccr(D, E) qemu_build_not_reached() # define gen_helper_rdcwp(D, E) qemu_build_not_reached() # define gen_helper_restored(E) qemu_build_not_reached() @@ -72,8 +73,6 @@ # define gen_helper_fexpand ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fmul8sux16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fmul8ulx16 ({ qemu_build_not_reached(); NULL; }) -# define gen_helper_fmul8x16al ({ qemu_build_not_reached(); NULL; }) -# define gen_helper_fmul8x16au ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fmul8x16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fmuld8sux16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fmuld8ulx16 ({ qemu_build_not_reached(); NULL; }) @@ -719,6 +718,18 @@ static void gen_op_bshuffle(TCGv_i64 dst, TCGv_i64 src1, TCGv_i64 src2) #endif } +static void gen_op_fmul8x16al(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2) +{ + tcg_gen_ext16s_i32(src2, src2); + gen_helper_fmul8x16a(dst, src1, src2); +} + +static void gen_op_fmul8x16au(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2) +{ + tcg_gen_sari_i32(src2, src2, 16); + gen_helper_fmul8x16a(dst, src1, src2); +} + static void finishing_insn(DisasContext *dc) { /* @@ -4539,6 +4550,27 @@ TRANS(FSUBs, ALL, do_env_fff, a, gen_helper_fsubs) TRANS(FMULs, ALL, do_env_fff, a, gen_helper_fmuls) TRANS(FDIVs, ALL, do_env_fff, a, gen_helper_fdivs) +static bool do_dff(DisasContext *dc, arg_r_r_r *a, + void (*func)(TCGv_i64, TCGv_i32, TCGv_i32)) +{ + TCGv_i64 dst; + TCGv_i32 src1, src2; + + if (gen_trap_ifnofpu(dc)) { + return true; + } + + dst = gen_dest_fpr_D(dc, a->rd); + src1 = gen_load_fpr_F(dc, a->rs1); + src2 = gen_load_fpr_F(dc, a->rs2); + func(dst, src1, src2); + gen_store_fpr_D(dc, a->rd, dst); + return advance_pc(dc); +} + +TRANS(FMUL8x16AU, VIS1, do_dff, a, gen_op_fmul8x16au) +TRANS(FMUL8x16AL, VIS1, do_dff, a, gen_op_fmul8x16al) + static bool do_dfd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i64, TCGv_i32, TCGv_i64)) { @@ -4576,8 +4608,6 @@ static bool do_ddd(DisasContext *dc, arg_r_r_r *a, return advance_pc(dc); } -TRANS(FMUL8x16AU, VIS1, do_ddd, a, gen_helper_fmul8x16au) -TRANS(FMUL8x16AL, VIS1, do_ddd, a, gen_helper_fmul8x16al) TRANS(FMUL8SUx16, VIS1, do_ddd, a, gen_helper_fmul8sux16) TRANS(FMUL8ULx16, VIS1, do_ddd, a, gen_helper_fmul8ulx16) TRANS(FMULD8SUx16, VIS1, do_ddd, a, gen_helper_fmuld8sux16) diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c index 7728ffe9c6..5c7f5536bc 100644 --- a/target/sparc/vis_helper.c +++ b/target/sparc/vis_helper.c @@ -119,43 +119,20 @@ uint64_t helper_fmul8x16(uint32_t src1, uint64_t src2) return d.ll; } -uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2) +uint64_t helper_fmul8x16a(uint32_t src1, int32_t src2) { - VIS64 s, d; + VIS32 s; + VIS64 d; uint32_t tmp; - s.ll = src1; - d.ll = src2; + s.l = src1; + d.ll = 0; -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_W64(r) = tmp >> 8; - - PMUL(0); - PMUL(1); - PMUL(2); - PMUL(3); -#undef PMUL - - return d.ll; -} - -uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2) -{ - VIS64 s, d; - uint32_t tmp; - - s.ll = src1; - d.ll = src2; - -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ +#define PMUL(r) \ + tmp = src2 * (int32_t)s.VIS_B64(r); \ + if ((tmp & 0xff) > 0x7f) { \ + tmp += 0x100; \ + } \ d.VIS_W64(r) = tmp >> 8; PMUL(0);
These instructions have f32 inputs, which changes the decode of the register numbers. While we're fixing things, use a common helper for both insns, extracting the 16-bit scalar in tcg beforehand. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/sparc/helper.h | 3 +-- target/sparc/translate.c | 38 ++++++++++++++++++++++++++++++---- target/sparc/vis_helper.c | 43 +++++++++------------------------------ 3 files changed, 45 insertions(+), 39 deletions(-)