Message ID | 20240502165528.244004-8-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | target/sparc: vis fixes | expand |
On 2/5/24 18:55, Richard Henderson wrote: > The unit operation for fmul8x16 and friends is described in the > manual as "MS16b". Split that out for clarity. Improve rounding > with an unconditional addition of 0.5 as a fixed-point integer. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/sparc/vis_helper.c | 78 ++++++++++++--------------------------- > 1 file changed, 24 insertions(+), 54 deletions(-) > @@ -150,23 +138,14 @@ uint64_t helper_fmul8x16a(uint32_t src1, int32_t src2) > uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) > { > VIS64 s, d; > - uint32_t tmp; > > s.ll = src1; > d.ll = src2; > > -#define PMUL(r) \ > - tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ > - if ((tmp & 0xff) > 0x7f) { \ > - tmp += 0x100; \ > - } \ > - d.VIS_W64(r) = tmp >> 8; > - > - PMUL(0); > - PMUL(1); > - PMUL(2); > - PMUL(3); > -#undef PMUL > + d.VIS_W64(0) = do_ms16b(s.VIS_SB64(1), d.VIS_SW64(0)); s.VIS_SB64(1) = upper bit, OK. > + d.VIS_W64(1) = do_ms16b(s.VIS_SB64(3), d.VIS_SW64(1)); > + d.VIS_W64(2) = do_ms16b(s.VIS_SB64(5), d.VIS_SW64(2)); > + d.VIS_W64(3) = do_ms16b(s.VIS_SB64(7), d.VIS_SW64(3)); > > return d.ll; > } > @@ -174,23 +153,14 @@ uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) > uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) > { > VIS64 s, d; > - uint32_t tmp; > > s.ll = src1; > d.ll = src2; > > -#define PMUL(r) \ > - tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ > - if ((tmp & 0xff) > 0x7f) { \ > - tmp += 0x100; \ > - } \ > - d.VIS_W64(r) = tmp >> 8; > - > - PMUL(0); > - PMUL(1); > - PMUL(2); > - PMUL(3); > -#undef PMUL > + d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0)); s.VIS_B64(0) for lower bit, OK. > + d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1)); > + d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2)); > + d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3)); > > return d.ll; > } Maybe add a comment for high/low bits in fmul8sux16/fmul8ulx16, as it was not obvious at first. Otherwise, Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
On 3/5/24 21:11, Philippe Mathieu-Daudé wrote: > On 2/5/24 18:55, Richard Henderson wrote: >> The unit operation for fmul8x16 and friends is described in the >> manual as "MS16b". Split that out for clarity. Improve rounding >> with an unconditional addition of 0.5 as a fixed-point integer. >> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >> --- >> target/sparc/vis_helper.c | 78 ++++++++++++--------------------------- >> 1 file changed, 24 insertions(+), 54 deletions(-) > > >> @@ -150,23 +138,14 @@ uint64_t helper_fmul8x16a(uint32_t src1, int32_t >> src2) >> uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) >> { >> VIS64 s, d; >> - uint32_t tmp; >> s.ll = src1; >> d.ll = src2; >> -#define >> PMUL(r) \ >> - tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> >> 8); \ >> - if ((tmp & 0xff) > 0x7f) >> { \ >> - tmp += >> 0x100; \ >> - >> } \ >> - d.VIS_W64(r) = tmp >> 8; >> - >> - PMUL(0); >> - PMUL(1); >> - PMUL(2); >> - PMUL(3); >> -#undef PMUL >> + d.VIS_W64(0) = do_ms16b(s.VIS_SB64(1), d.VIS_SW64(0)); > > s.VIS_SB64(1) = upper bit, OK. I meant "bits" (plural)! > >> + d.VIS_W64(1) = do_ms16b(s.VIS_SB64(3), d.VIS_SW64(1)); >> + d.VIS_W64(2) = do_ms16b(s.VIS_SB64(5), d.VIS_SW64(2)); >> + d.VIS_W64(3) = do_ms16b(s.VIS_SB64(7), d.VIS_SW64(3)); >> return d.ll; >> } >> @@ -174,23 +153,14 @@ uint64_t helper_fmul8sux16(uint64_t src1, >> uint64_t src2) >> uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) >> { >> VIS64 s, d; >> - uint32_t tmp; >> s.ll = src1; >> d.ll = src2; >> -#define >> PMUL(r) \ >> - tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * >> 2)); \ >> - if ((tmp & 0xff) > 0x7f) >> { \ >> - tmp += >> 0x100; \ >> - >> } \ >> - d.VIS_W64(r) = tmp >> 8; >> - >> - PMUL(0); >> - PMUL(1); >> - PMUL(2); >> - PMUL(3); >> -#undef PMUL >> + d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0)); > > s.VIS_B64(0) for lower bit, OK. Ditto. > >> + d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1)); >> + d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2)); >> + d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3)); >> return d.ll; >> } > > Maybe add a comment for high/low bits in fmul8sux16/fmul8ulx16, > as it was not obvious at first. Otherwise, > > Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> >
diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c index 14c665cad6..e15c6bb34e 100644 --- a/target/sparc/vis_helper.c +++ b/target/sparc/vis_helper.c @@ -44,6 +44,7 @@ target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) #if HOST_BIG_ENDIAN #define VIS_B64(n) b[7 - (n)] +#define VIS_SB64(n) sb[7 - (n)] #define VIS_W64(n) w[3 - (n)] #define VIS_SW64(n) sw[3 - (n)] #define VIS_L64(n) l[1 - (n)] @@ -51,6 +52,7 @@ target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) #define VIS_W32(n) w[1 - (n)] #else #define VIS_B64(n) b[n] +#define VIS_SB64(n) sb[n] #define VIS_W64(n) w[n] #define VIS_SW64(n) sw[n] #define VIS_L64(n) l[n] @@ -60,6 +62,7 @@ target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) typedef union { uint8_t b[8]; + int8_t sb[8]; uint16_t w[4]; int16_t sw[4]; uint32_t l[2]; @@ -95,27 +98,23 @@ uint64_t helper_fpmerge(uint32_t src1, uint32_t src2) return d.ll; } +static inline int do_ms16b(int x, int y) +{ + return ((x * y) + 0x80) >> 8; +} + uint64_t helper_fmul8x16(uint32_t src1, uint64_t src2) { VIS64 d; VIS32 s; - uint32_t tmp; s.l = src1; d.ll = src2; -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B32(r); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_W64(r) = tmp >> 8; - - PMUL(0); - PMUL(1); - PMUL(2); - PMUL(3); -#undef PMUL + d.VIS_W64(0) = do_ms16b(s.VIS_B32(0), d.VIS_SW64(0)); + d.VIS_W64(1) = do_ms16b(s.VIS_B32(1), d.VIS_SW64(1)); + d.VIS_W64(2) = do_ms16b(s.VIS_B32(2), d.VIS_SW64(2)); + d.VIS_W64(3) = do_ms16b(s.VIS_B32(3), d.VIS_SW64(3)); return d.ll; } @@ -124,25 +123,14 @@ uint64_t helper_fmul8x16a(uint32_t src1, int32_t src2) { VIS32 s; VIS64 d; - uint32_t tmp; s.l = src1; d.ll = 0; -#define PMUL(r) \ - do { \ - tmp = src2 * (int32_t)s.VIS_B32(r); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_W64(r) = tmp >> 8; \ - } while (0) - - PMUL(0); - PMUL(1); - PMUL(2); - PMUL(3); -#undef PMUL + d.VIS_W64(0) = do_ms16b(s.VIS_B32(0), src2); + d.VIS_W64(1) = do_ms16b(s.VIS_B32(1), src2); + d.VIS_W64(2) = do_ms16b(s.VIS_B32(2), src2); + d.VIS_W64(3) = do_ms16b(s.VIS_B32(3), src2); return d.ll; } @@ -150,23 +138,14 @@ uint64_t helper_fmul8x16a(uint32_t src1, int32_t src2) uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) { VIS64 s, d; - uint32_t tmp; s.ll = src1; d.ll = src2; -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_W64(r) = tmp >> 8; - - PMUL(0); - PMUL(1); - PMUL(2); - PMUL(3); -#undef PMUL + d.VIS_W64(0) = do_ms16b(s.VIS_SB64(1), d.VIS_SW64(0)); + d.VIS_W64(1) = do_ms16b(s.VIS_SB64(3), d.VIS_SW64(1)); + d.VIS_W64(2) = do_ms16b(s.VIS_SB64(5), d.VIS_SW64(2)); + d.VIS_W64(3) = do_ms16b(s.VIS_SB64(7), d.VIS_SW64(3)); return d.ll; } @@ -174,23 +153,14 @@ uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) { VIS64 s, d; - uint32_t tmp; s.ll = src1; d.ll = src2; -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_W64(r) = tmp >> 8; - - PMUL(0); - PMUL(1); - PMUL(2); - PMUL(3); -#undef PMUL + d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0)); + d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1)); + d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2)); + d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3)); return d.ll; }
The unit operation for fmul8x16 and friends is described in the manual as "MS16b". Split that out for clarity. Improve rounding with an unconditional addition of 0.5 as a fixed-point integer. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/sparc/vis_helper.c | 78 ++++++++++++--------------------------- 1 file changed, 24 insertions(+), 54 deletions(-)