diff mbox series

[01/28] tcg: Add flags argument to bswap opcodes

Message ID 20210614083800.1166166-2-richard.henderson@linaro.org
State New
Headers show
Series tcg: bswap improvements | expand

Commit Message

Richard Henderson June 14, 2021, 8:37 a.m. UTC
This will eventually simplify front-end usage, and will allow
backends to unset TCG_TARGET_HAS_MEMORY_BSWAP without loss of
optimization.

The argument is added during expansion, not currently exposed
to the front end translators.  Non-zero values are not yet
supported by any backends.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 include/tcg/tcg-opc.h | 10 +++++-----
 include/tcg/tcg.h     | 12 ++++++++++++
 tcg/tcg-op.c          | 13 ++++++++-----
 tcg/README            | 18 ++++++++++--------
 4 files changed, 35 insertions(+), 18 deletions(-)

-- 
2.25.1

Comments

Philippe Mathieu-Daudé June 14, 2021, 9:19 a.m. UTC | #1
On 6/14/21 10:37 AM, Richard Henderson wrote:
> This will eventually simplify front-end usage, and will allow

> backends to unset TCG_TARGET_HAS_MEMORY_BSWAP without loss of

> optimization.

> 

> The argument is added during expansion, not currently exposed

> to the front end translators.  Non-zero values are not yet

> supported by any backends.

> 

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  include/tcg/tcg-opc.h | 10 +++++-----

>  include/tcg/tcg.h     | 12 ++++++++++++

>  tcg/tcg-op.c          | 13 ++++++++-----

>  tcg/README            | 18 ++++++++++--------

>  4 files changed, 35 insertions(+), 18 deletions(-)


> +/*

> + * Flags for the bswap opcodes.

> + * If IZ, the input is zero-extended, otherwise unknown.

> + * If OZ or OS, the output is zero- or sign-extended respectively,

> + * otherwise the high bits are undefined.

> + */

> +enum {

> +    TCG_BSWAP_IZ = 1,

> +    TCG_BSWAP_OZ = 2,

> +    TCG_BSWAP_OS = 4,


Matter of taste, I find "1 << x" bullet proof for flags.

Otherwise:
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Alex Bennée June 14, 2021, 11:49 a.m. UTC | #2
Richard Henderson <richard.henderson@linaro.org> writes:

> This will eventually simplify front-end usage, and will allow

> backends to unset TCG_TARGET_HAS_MEMORY_BSWAP without loss of

> optimization.

>

> The argument is added during expansion, not currently exposed

> to the front end translators.  Non-zero values are not yet

> supported by any backends.

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  include/tcg/tcg-opc.h | 10 +++++-----

>  include/tcg/tcg.h     | 12 ++++++++++++

>  tcg/tcg-op.c          | 13 ++++++++-----

>  tcg/README            | 18 ++++++++++--------

>  4 files changed, 35 insertions(+), 18 deletions(-)

>

> diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h

> index bbb0884af8..fddcc42cbd 100644

> --- a/include/tcg/tcg-opc.h

> +++ b/include/tcg/tcg-opc.h

> @@ -96,8 +96,8 @@ DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32))

>  DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32))

>  DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32))

>  DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32))

> -DEF(bswap16_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap16_i32))

> -DEF(bswap32_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap32_i32))

> +DEF(bswap16_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap16_i32))

> +DEF(bswap32_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap32_i32))

>  DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32))

>  DEF(neg_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_neg_i32))

>  DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32))

> @@ -165,9 +165,9 @@ DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64))

>  DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64))

>  DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64))

>  DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64))

> -DEF(bswap16_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64))

> -DEF(bswap32_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64))

> -DEF(bswap64_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64))

> +DEF(bswap16_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64))

> +DEF(bswap32_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64))

> +DEF(bswap64_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64))

>  DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64))

>  DEF(neg_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_neg_i64))

>  DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64))

> diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h

> index 064dab383b..7a060e532d 100644

> --- a/include/tcg/tcg.h

> +++ b/include/tcg/tcg.h

> @@ -430,6 +430,18 @@ typedef enum {

>      TCG_COND_GTU    = 8 | 4 | 0 | 1,

>  } TCGCond;

>  

> +/*

> + * Flags for the bswap opcodes.

> + * If IZ, the input is zero-extended, otherwise unknown.

> + * If OZ or OS, the output is zero- or sign-extended respectively,

> + * otherwise the high bits are undefined.

> + */

> +enum {

> +    TCG_BSWAP_IZ = 1,

> +    TCG_BSWAP_OZ = 2,

> +    TCG_BSWAP_OS = 4,

> +};

> +


So is a TCG_BSWAP_IZ only really for cases where we have loaded up a
narrower width value into the "natural" TCG sized register? We seem to
assume this is always the case even though the TCG bswap op doesn't have
visibility of how the arg value was loaded.


>  /* Invert the sense of the comparison.  */

>  static inline TCGCond tcg_invert_cond(TCGCond c)

>  {

> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c

> index dcc2ed0bbc..dc65577e2f 100644

> --- a/tcg/tcg-op.c

> +++ b/tcg/tcg-op.c

> @@ -1005,7 +1005,8 @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)

>  void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)

>  {

>      if (TCG_TARGET_HAS_bswap16_i32) {

> -        tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg);

> +        tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg,

> +                         TCG_BSWAP_IZ | TCG_BSWAP_OZ);

>      } else {

>          TCGv_i32 t0 = tcg_temp_new_i32();

>  

> @@ -1020,7 +1021,7 @@ void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)

>  void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)

>  {

>      if (TCG_TARGET_HAS_bswap32_i32) {

> -        tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg);

> +        tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0);

>      } else {

>          TCGv_i32 t0 = tcg_temp_new_i32();

>          TCGv_i32 t1 = tcg_temp_new_i32();

> @@ -1661,7 +1662,8 @@ void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg)

>          tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg));

>          tcg_gen_movi_i32(TCGV_HIGH(ret), 0);

>      } else if (TCG_TARGET_HAS_bswap16_i64) {

> -        tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg);

> +        tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg,

> +                         TCG_BSWAP_IZ | TCG_BSWAP_OZ);

>      } else {

>          TCGv_i64 t0 = tcg_temp_new_i64();

>  

> @@ -1680,7 +1682,8 @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)

>          tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));

>          tcg_gen_movi_i32(TCGV_HIGH(ret), 0);

>      } else if (TCG_TARGET_HAS_bswap32_i64) {

> -        tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg);

> +        tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg,

> +                         TCG_BSWAP_IZ | TCG_BSWAP_OZ);

>      } else {

>          TCGv_i64 t0 = tcg_temp_new_i64();

>          TCGv_i64 t1 = tcg_temp_new_i64();

> @@ -1717,7 +1720,7 @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)

>          tcg_temp_free_i32(t0);

>          tcg_temp_free_i32(t1);

>      } else if (TCG_TARGET_HAS_bswap64_i64) {

> -        tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg);

> +        tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0);

>      } else {

>          TCGv_i64 t0 = tcg_temp_new_i64();

>          TCGv_i64 t1 = tcg_temp_new_i64();

> diff --git a/tcg/README b/tcg/README

> index 8510d823e3..19fbf6ca52 100644

> --- a/tcg/README

> +++ b/tcg/README

> @@ -295,19 +295,21 @@ ext32u_i64 t0, t1

>  

>  8, 16 or 32 bit sign/zero extension (both operands must have the same type)

>  

> -* bswap16_i32/i64 t0, t1

> +* bswap16_i32/i64 t0, t1, flags

>  

> -16 bit byte swap on a 32/64 bit value. It assumes that the two/six high order

> -bytes are set to zero.

> +16 bit byte swap on a 32/64 bit value.  The flags values control how

> +the input and output sign- or zero-extension is treated.

>  

> -* bswap32_i32/i64 t0, t1

> +* bswap32_i32/i64 t0, t1, flags

>  

> -32 bit byte swap on a 32/64 bit value. With a 64 bit value, it assumes that

> -the four high order bytes are set to zero.

> +32 bit byte swap on a 32/64 bit value.  For 32-bit value, the flags

> +are ignored; for a 64-bit value the flags values control how the

> +input and output sign- or zero-extension is treated.

>  

> -* bswap64_i64 t0, t1

> +* bswap64_i64 t0, t1, flags

>  

> -64 bit byte swap

> +64 bit byte swap.  The flags are ignored -- the argument is present

> +for consistency with the smaller bswaps.

>  

>  * discard_i32/i64 t0


-- 
Alex Bennée
Richard Henderson June 14, 2021, 2:43 p.m. UTC | #3
On 6/14/21 4:49 AM, Alex Bennée wrote:
>> +/*

>> + * Flags for the bswap opcodes.

>> + * If IZ, the input is zero-extended, otherwise unknown.

>> + * If OZ or OS, the output is zero- or sign-extended respectively,

>> + * otherwise the high bits are undefined.

>> + */

>> +enum {

>> +    TCG_BSWAP_IZ = 1,

>> +    TCG_BSWAP_OZ = 2,

>> +    TCG_BSWAP_OS = 4,

>> +};

>> +

> 

> So is a TCG_BSWAP_IZ only really for cases where we have loaded up a

> narrower width value into the "natural" TCG sized register? We seem to

> assume this is always the case even though the TCG bswap op doesn't have

> visibility of how the arg value was loaded.


All of these flags are for narrower width bswap.  When you get to patch 17, you'll see 
that tcg_gen_bswap32_i32 and bswap64_i64 do not present this argument to the target/ front 
ends at all.

IZ is for when we know that we've used a zero-extending load feeding into the operation. 
I've also added code to the optimizer to *set* this bit when it can prove that the value 
feeding the bswap is zero-extended.


r~
Peter Maydell June 21, 2021, 1:41 p.m. UTC | #4
On Mon, 14 Jun 2021 at 09:43, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> This will eventually simplify front-end usage, and will allow

> backends to unset TCG_TARGET_HAS_MEMORY_BSWAP without loss of

> optimization.

>

> The argument is added during expansion, not currently exposed

> to the front end translators.  Non-zero values are not yet

> supported by any backends.


Here we say non-zero values are not yet supported by the backend...

> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c

> index dcc2ed0bbc..dc65577e2f 100644

> --- a/tcg/tcg-op.c

> +++ b/tcg/tcg-op.c

> @@ -1005,7 +1005,8 @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)

>  void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)

>  {

>      if (TCG_TARGET_HAS_bswap16_i32) {

> -        tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg);

> +        tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg,

> +                         TCG_BSWAP_IZ | TCG_BSWAP_OZ);

>      } else {

>          TCGv_i32 t0 = tcg_temp_new_i32();


...but here we pass a non-zero value...

> @@ -1661,7 +1662,8 @@ void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg)

>          tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg));

>          tcg_gen_movi_i32(TCGV_HIGH(ret), 0);

>      } else if (TCG_TARGET_HAS_bswap16_i64) {

> -        tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg);

> +        tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg,

> +                         TCG_BSWAP_IZ | TCG_BSWAP_OZ);

>      } else {

>          TCGv_i64 t0 = tcg_temp_new_i64();

>


...and again here...

> @@ -1680,7 +1682,8 @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)

>          tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));

>          tcg_gen_movi_i32(TCGV_HIGH(ret), 0);

>      } else if (TCG_TARGET_HAS_bswap32_i64) {

> -        tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg);

> +        tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg,

> +                         TCG_BSWAP_IZ | TCG_BSWAP_OZ);

>      } else {

>          TCGv_i64 t0 = tcg_temp_new_i64();

>          TCGv_i64 t1 = tcg_temp_new_i64();


...and here.

thanks
-- PMM
Peter Maydell June 21, 2021, 1:51 p.m. UTC | #5
On Mon, 21 Jun 2021 at 14:41, Peter Maydell <peter.maydell@linaro.org> wrote:
>

> On Mon, 14 Jun 2021 at 09:43, Richard Henderson

> <richard.henderson@linaro.org> wrote:

> >

> > This will eventually simplify front-end usage, and will allow

> > backends to unset TCG_TARGET_HAS_MEMORY_BSWAP without loss of

> > optimization.

> >

> > The argument is added during expansion, not currently exposed

> > to the front end translators.  Non-zero values are not yet

> > supported by any backends.

>

> Here we say non-zero values are not yet supported by the backend...


Looking at the tcg/README docs, I think what you mean is that
at the moment all the backends assume/require that the caller passes
one of TCG_BSWAP_IZ or (TCG_BSWAP_IZ | TCG_BSWAP_OZ), since the
pre-flags implementation requires the top bytes to be zero and leaves
them that way. But then the parts of your patch that pass in a zero
flags word would be wrong...

-- PMM
Richard Henderson June 21, 2021, 1:59 p.m. UTC | #6
On 6/21/21 6:41 AM, Peter Maydell wrote:
> On Mon, 14 Jun 2021 at 09:43, Richard Henderson

> <richard.henderson@linaro.org> wrote:

>>

>> This will eventually simplify front-end usage, and will allow

>> backends to unset TCG_TARGET_HAS_MEMORY_BSWAP without loss of

>> optimization.

>>

>> The argument is added during expansion, not currently exposed

>> to the front end translators.  Non-zero values are not yet

>> supported by any backends.

> 

> Here we say non-zero values are not yet supported by the backend...


I meant to say "non-zeroed" wrt the inputs and outputs,
i.e. the previous semantics...

>> +        tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg,

>> +                         TCG_BSWAP_IZ | TCG_BSWAP_OZ);


which were these.


r~
Richard Henderson June 21, 2021, 2:02 p.m. UTC | #7
On 6/21/21 6:51 AM, Peter Maydell wrote:
> On Mon, 21 Jun 2021 at 14:41, Peter Maydell <peter.maydell@linaro.org> wrote:

>>

>> On Mon, 14 Jun 2021 at 09:43, Richard Henderson

>> <richard.henderson@linaro.org> wrote:

>>>

>>> This will eventually simplify front-end usage, and will allow

>>> backends to unset TCG_TARGET_HAS_MEMORY_BSWAP without loss of

>>> optimization.

>>>

>>> The argument is added during expansion, not currently exposed

>>> to the front end translators.  Non-zero values are not yet

>>> supported by any backends.

>>

>> Here we say non-zero values are not yet supported by the backend...

> 

> Looking at the tcg/README docs, I think what you mean is that

> at the moment all the backends assume/require that the caller passes

> one of TCG_BSWAP_IZ or (TCG_BSWAP_IZ | TCG_BSWAP_OZ), since the

> pre-flags implementation requires the top bytes to be zero and leaves

> them that way.


Correct.

> But then the parts of your patch that pass in a zero

> flags word would be wrong...


The parts that pass in a zero flags word are covered by, from the README:

> The flags are ignored -- the argument is present

> for consistency with the smaller bswaps.



r~
Peter Maydell June 21, 2021, 2:15 p.m. UTC | #8
On Mon, 21 Jun 2021 at 15:02, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> On 6/21/21 6:51 AM, Peter Maydell wrote:

> > On Mon, 21 Jun 2021 at 14:41, Peter Maydell <peter.maydell@linaro.org> wrote:

> >>

> >> On Mon, 14 Jun 2021 at 09:43, Richard Henderson

> >> <richard.henderson@linaro.org> wrote:

> >>>

> >>> This will eventually simplify front-end usage, and will allow

> >>> backends to unset TCG_TARGET_HAS_MEMORY_BSWAP without loss of

> >>> optimization.

> >>>

> >>> The argument is added during expansion, not currently exposed

> >>> to the front end translators.  Non-zero values are not yet

> >>> supported by any backends.

> >>

> >> Here we say non-zero values are not yet supported by the backend...

> >

> > Looking at the tcg/README docs, I think what you mean is that

> > at the moment all the backends assume/require that the caller passes

> > one of TCG_BSWAP_IZ or (TCG_BSWAP_IZ | TCG_BSWAP_OZ), since the

> > pre-flags implementation requires the top bytes to be zero and leaves

> > them that way.

>

> Correct.

>

> > But then the parts of your patch that pass in a zero

> > flags word would be wrong...

>

> The parts that pass in a zero flags word are covered by, from the README:

>

> > The flags are ignored -- the argument is present

> > for consistency with the smaller bswaps.


Ah, I see. If you fix up the commit message, maybe something like

# The argument is added during expansion, not currently exposed
# to the front end translators. The backends currently only support
# a flags value of either TCG_BSWAP_IZ, or (TCG_BSWAP_IZ | TCG_BSWAP_OZ),
# since they all require zero top bytes and leave them that way.
# At the existing callsites we pass in (TCG_BSWAP_IZ | TCG_BSWAP_OZ),
# except for the flags-ignored cases of a 32-bit swap of a 32-bit
# value and or a 64-bit swap of a 64-bit value, where we pass 0.

then

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>


It would be nice to document the actual flag values/names in
the user-facing documentation, too.

thanks
-- PMM
diff mbox series

Patch

diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index bbb0884af8..fddcc42cbd 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -96,8 +96,8 @@  DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32))
 DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32))
 DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32))
 DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32))
-DEF(bswap16_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap16_i32))
-DEF(bswap32_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap32_i32))
+DEF(bswap16_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap16_i32))
+DEF(bswap32_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap32_i32))
 DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32))
 DEF(neg_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_neg_i32))
 DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32))
@@ -165,9 +165,9 @@  DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64))
 DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64))
 DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64))
 DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64))
-DEF(bswap16_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64))
-DEF(bswap32_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64))
-DEF(bswap64_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64))
+DEF(bswap16_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64))
+DEF(bswap32_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64))
+DEF(bswap64_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64))
 DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64))
 DEF(neg_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_neg_i64))
 DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64))
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 064dab383b..7a060e532d 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -430,6 +430,18 @@  typedef enum {
     TCG_COND_GTU    = 8 | 4 | 0 | 1,
 } TCGCond;
 
+/*
+ * Flags for the bswap opcodes.
+ * If IZ, the input is zero-extended, otherwise unknown.
+ * If OZ or OS, the output is zero- or sign-extended respectively,
+ * otherwise the high bits are undefined.
+ */
+enum {
+    TCG_BSWAP_IZ = 1,
+    TCG_BSWAP_OZ = 2,
+    TCG_BSWAP_OS = 4,
+};
+
 /* Invert the sense of the comparison.  */
 static inline TCGCond tcg_invert_cond(TCGCond c)
 {
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index dcc2ed0bbc..dc65577e2f 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -1005,7 +1005,8 @@  void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
 void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)
 {
     if (TCG_TARGET_HAS_bswap16_i32) {
-        tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg);
+        tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg,
+                         TCG_BSWAP_IZ | TCG_BSWAP_OZ);
     } else {
         TCGv_i32 t0 = tcg_temp_new_i32();
 
@@ -1020,7 +1021,7 @@  void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)
 void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
 {
     if (TCG_TARGET_HAS_bswap32_i32) {
-        tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg);
+        tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0);
     } else {
         TCGv_i32 t0 = tcg_temp_new_i32();
         TCGv_i32 t1 = tcg_temp_new_i32();
@@ -1661,7 +1662,8 @@  void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg)
         tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg));
         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
     } else if (TCG_TARGET_HAS_bswap16_i64) {
-        tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg);
+        tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg,
+                         TCG_BSWAP_IZ | TCG_BSWAP_OZ);
     } else {
         TCGv_i64 t0 = tcg_temp_new_i64();
 
@@ -1680,7 +1682,8 @@  void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
         tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
     } else if (TCG_TARGET_HAS_bswap32_i64) {
-        tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg);
+        tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg,
+                         TCG_BSWAP_IZ | TCG_BSWAP_OZ);
     } else {
         TCGv_i64 t0 = tcg_temp_new_i64();
         TCGv_i64 t1 = tcg_temp_new_i64();
@@ -1717,7 +1720,7 @@  void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
         tcg_temp_free_i32(t0);
         tcg_temp_free_i32(t1);
     } else if (TCG_TARGET_HAS_bswap64_i64) {
-        tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg);
+        tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0);
     } else {
         TCGv_i64 t0 = tcg_temp_new_i64();
         TCGv_i64 t1 = tcg_temp_new_i64();
diff --git a/tcg/README b/tcg/README
index 8510d823e3..19fbf6ca52 100644
--- a/tcg/README
+++ b/tcg/README
@@ -295,19 +295,21 @@  ext32u_i64 t0, t1
 
 8, 16 or 32 bit sign/zero extension (both operands must have the same type)
 
-* bswap16_i32/i64 t0, t1
+* bswap16_i32/i64 t0, t1, flags
 
-16 bit byte swap on a 32/64 bit value. It assumes that the two/six high order
-bytes are set to zero.
+16 bit byte swap on a 32/64 bit value.  The flags values control how
+the input and output sign- or zero-extension is treated.
 
-* bswap32_i32/i64 t0, t1
+* bswap32_i32/i64 t0, t1, flags
 
-32 bit byte swap on a 32/64 bit value. With a 64 bit value, it assumes that
-the four high order bytes are set to zero.
+32 bit byte swap on a 32/64 bit value.  For 32-bit value, the flags
+are ignored; for a 64-bit value the flags values control how the
+input and output sign- or zero-extension is treated.
 
-* bswap64_i64 t0, t1
+* bswap64_i64 t0, t1, flags
 
-64 bit byte swap
+64 bit byte swap.  The flags are ignored -- the argument is present
+for consistency with the smaller bswaps.
 
 * discard_i32/i64 t0