diff mbox series

[PULL,11/24] tcg/optimize: Use tcg_constant_internal with constant folding

Message ID 20210114021654.647242-12-richard.henderson@linaro.org
State Accepted
Commit 8fe35e0444be88de4e3ab80a2a0e210a1f6d663d
Headers show
Series tcg patch queue | expand

Commit Message

Richard Henderson Jan. 14, 2021, 2:16 a.m. UTC
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 tcg/optimize.c | 108 ++++++++++++++++++++++---------------------------
 1 file changed, 49 insertions(+), 59 deletions(-)

-- 
2.25.1

Comments

Alistair Francis Jan. 15, 2021, 11:03 p.m. UTC | #1
On Wed, Jan 13, 2021 at 6:32 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>


This patch results in a QEMU seg fault when starting userspace on RISC-V 32-bit.

This is the full backtrace:

```
#0  0x0000555555a67c4d in ts_are_copies (ts2=0x7fffa8008008,
ts1=0x7fffa8001e40) at ../tcg/optimize.c:163
#1  tcg_opt_gen_mov (s=s@entry=0x7fffa8000b60,
op=op@entry=0x7fffa81ac778, dst=140736011968064, src=140736011993096)
at ../tcg/optimize.c:191
#2  0x0000555555a67dcb in tcg_opt_gen_movi (s=s@entry=0x7fffa8000b60,
temps_used=temps_used@entry=0x7ffff1cb92c0,
op=op@entry=0x7fffa81ac778, dst=<optimized out>, val=<optimized out>)
at ../tcg/optimize.c:249
#3  0x0000555555a6914d in tcg_optimize (s=s@entry=0x7fffa8000b60) at
../tcg/optimize.c:1242
#4  0x0000555555abb248 in tcg_gen_code (s=0x7fffa8000b60,
tb=tb@entry=0x7fffae84edc0 <code_gen_buffer+42266003>) at
../tcg/tcg.c:4406
#5  0x0000555555a7f4d5 in tb_gen_code (cpu=cpu@entry=0x7ffff7fac930,
pc=160234, cs_base=0, flags=16640, cflags=-16252928) at
../accel/tcg/translate-all.c:1952
#6  0x0000555555ae4fe4 in tb_find (cf_mask=<optimized out>, tb_exit=0,
last_tb=0x0, cpu=0x7ffff7fac930) at ../accel/tcg/cpu-exec.c:454
#7  cpu_exec (cpu=cpu@entry=0x7ffff7fac930) at ../accel/tcg/cpu-exec.c:810
#8  0x0000555555aa6513 in tcg_cpus_exec (cpu=cpu@entry=0x7ffff7fac930)
at ../accel/tcg/tcg-cpus.c:57
#9  0x0000555555a8c7a3 in mttcg_cpu_thread_fn
(arg=arg@entry=0x7ffff7fac930) at ../accel/tcg/tcg-cpus-mttcg.c:69
#10 0x0000555555c94209 in qemu_thread_start (args=0x7ffff1cb96d0) at
../util/qemu-thread-posix.c:521
#11 0x00007ffff673a3e9 in start_thread () at /usr/lib/libpthread.so.0
#12 0x00007ffff6395293 in clone () at /usr/lib/libc.so.6
```

I run QEMU with these arguments:

./build/riscv32-softmmu/qemu-system-riscv32 \
    -machine virt -serial mon:stdio -serial null -nographic \
    -append "root=/dev/vda rw highres=off  console=ttyS0 ip=dhcp earlycon=sbi" \
    -device virtio-net-device,netdev=net0,mac=52:54:00:12:34:02
-netdev user,id=net0 \
    -object rng-random,filename=/dev/urandom,id=rng0 -device
virtio-rng-device,rng=rng0 \
    -smp 4 -d guest_errors -m 256M \
    -kernel ./Image \
    -drive id=disk0,file=./core-image-minimal-qemuriscv32.ext4,if=none,format=raw
\
    -device virtio-blk-device,drive=disk0 \
    -bios default

I am uploading the images to:
https://nextcloud.alistair23.me/index.php/s/MQFyGGNLPZjLZPH

Although apparently it will take a few hours to upload the 2GB rootFS.

Alistair


> ---

>  tcg/optimize.c | 108 ++++++++++++++++++++++---------------------------

>  1 file changed, 49 insertions(+), 59 deletions(-)

>

> diff --git a/tcg/optimize.c b/tcg/optimize.c

> index 49bf1386c7..bda727d5ed 100644

> --- a/tcg/optimize.c

> +++ b/tcg/optimize.c

> @@ -178,37 +178,6 @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)

>      return ts_are_copies(arg_temp(arg1), arg_temp(arg2));

>  }

>

> -static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, uint64_t val)

> -{

> -    const TCGOpDef *def;

> -    TCGOpcode new_op;

> -    uint64_t mask;

> -    TempOptInfo *di = arg_info(dst);

> -

> -    def = &tcg_op_defs[op->opc];

> -    if (def->flags & TCG_OPF_VECTOR) {

> -        new_op = INDEX_op_dupi_vec;

> -    } else if (def->flags & TCG_OPF_64BIT) {

> -        new_op = INDEX_op_movi_i64;

> -    } else {

> -        new_op = INDEX_op_movi_i32;

> -    }

> -    op->opc = new_op;

> -    /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */

> -    op->args[0] = dst;

> -    op->args[1] = val;

> -

> -    reset_temp(dst);

> -    di->is_const = true;

> -    di->val = val;

> -    mask = val;

> -    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {

> -        /* High bits of the destination are now garbage.  */

> -        mask |= ~0xffffffffull;

> -    }

> -    di->mask = mask;

> -}

> -

>  static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)

>  {

>      TCGTemp *dst_ts = arg_temp(dst);

> @@ -259,6 +228,27 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)

>      }

>  }

>

> +static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,

> +                             TCGOp *op, TCGArg dst, uint64_t val)

> +{

> +    const TCGOpDef *def = &tcg_op_defs[op->opc];

> +    TCGType type;

> +    TCGTemp *tv;

> +

> +    if (def->flags & TCG_OPF_VECTOR) {

> +        type = TCGOP_VECL(op) + TCG_TYPE_V64;

> +    } else if (def->flags & TCG_OPF_64BIT) {

> +        type = TCG_TYPE_I64;

> +    } else {

> +        type = TCG_TYPE_I32;

> +    }

> +

> +    /* Convert movi to mov with constant temp. */

> +    tv = tcg_constant_internal(type, val);

> +    init_ts_info(temps_used, tv);

> +    tcg_opt_gen_mov(s, op, dst, temp_arg(tv));

> +}

> +

>  static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)

>  {

>      uint64_t l64, h64;

> @@ -622,7 +612,7 @@ void tcg_optimize(TCGContext *s)

>      nb_temps = s->nb_temps;

>      nb_globals = s->nb_globals;

>

> -    bitmap_zero(temps_used.l, nb_temps);

> +    memset(&temps_used, 0, sizeof(temps_used));

>      for (i = 0; i < nb_temps; ++i) {

>          s->temps[i].state_ptr = NULL;

>      }

> @@ -727,7 +717,7 @@ void tcg_optimize(TCGContext *s)

>          CASE_OP_32_64(rotr):

>              if (arg_is_const(op->args[1])

>                  && arg_info(op->args[1])->val == 0) {

> -                tcg_opt_gen_movi(s, op, op->args[0], 0);

> +                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);

>                  continue;

>              }

>              break;

> @@ -1054,7 +1044,7 @@ void tcg_optimize(TCGContext *s)

>

>          if (partmask == 0) {

>              tcg_debug_assert(nb_oargs == 1);

> -            tcg_opt_gen_movi(s, op, op->args[0], 0);

> +            tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);

>              continue;

>          }

>          if (affected == 0) {

> @@ -1071,7 +1061,7 @@ void tcg_optimize(TCGContext *s)

>          CASE_OP_32_64(mulsh):

>              if (arg_is_const(op->args[2])

>                  && arg_info(op->args[2])->val == 0) {

> -                tcg_opt_gen_movi(s, op, op->args[0], 0);

> +                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);

>                  continue;

>              }

>              break;

> @@ -1098,7 +1088,7 @@ void tcg_optimize(TCGContext *s)

>          CASE_OP_32_64_VEC(sub):

>          CASE_OP_32_64_VEC(xor):

>              if (args_are_copies(op->args[1], op->args[2])) {

> -                tcg_opt_gen_movi(s, op, op->args[0], 0);

> +                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);

>                  continue;

>              }

>              break;

> @@ -1115,14 +1105,14 @@ void tcg_optimize(TCGContext *s)

>              break;

>          CASE_OP_32_64(movi):

>          case INDEX_op_dupi_vec:

> -            tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);

> +            tcg_opt_gen_movi(s, &temps_used, op, op->args[0], op->args[1]);

>              break;

>

>          case INDEX_op_dup_vec:

>              if (arg_is_const(op->args[1])) {

>                  tmp = arg_info(op->args[1])->val;

>                  tmp = dup_const(TCGOP_VECE(op), tmp);

> -                tcg_opt_gen_movi(s, op, op->args[0], tmp);

> +                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);

>                  break;

>              }

>              goto do_default;

> @@ -1132,7 +1122,7 @@ void tcg_optimize(TCGContext *s)

>              if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {

>                  tmp = arg_info(op->args[1])->val;

>                  if (tmp == arg_info(op->args[2])->val) {

> -                    tcg_opt_gen_movi(s, op, op->args[0], tmp);

> +                    tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);

>                      break;

>                  }

>              } else if (args_are_copies(op->args[1], op->args[2])) {

> @@ -1160,7 +1150,7 @@ void tcg_optimize(TCGContext *s)

>          case INDEX_op_extrh_i64_i32:

>              if (arg_is_const(op->args[1])) {

>                  tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);

> -                tcg_opt_gen_movi(s, op, op->args[0], tmp);

> +                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);

>                  break;

>              }

>              goto do_default;

> @@ -1190,7 +1180,7 @@ void tcg_optimize(TCGContext *s)

>              if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {

>                  tmp = do_constant_folding(opc, arg_info(op->args[1])->val,

>                                            arg_info(op->args[2])->val);

> -                tcg_opt_gen_movi(s, op, op->args[0], tmp);

> +                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);

>                  break;

>              }

>              goto do_default;

> @@ -1201,7 +1191,7 @@ void tcg_optimize(TCGContext *s)

>                  TCGArg v = arg_info(op->args[1])->val;

>                  if (v != 0) {

>                      tmp = do_constant_folding(opc, v, 0);

> -                    tcg_opt_gen_movi(s, op, op->args[0], tmp);

> +                    tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);

>                  } else {

>                      tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);

>                  }

> @@ -1214,7 +1204,7 @@ void tcg_optimize(TCGContext *s)

>                  tmp = deposit64(arg_info(op->args[1])->val,

>                                  op->args[3], op->args[4],

>                                  arg_info(op->args[2])->val);

> -                tcg_opt_gen_movi(s, op, op->args[0], tmp);

> +                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);

>                  break;

>              }

>              goto do_default;

> @@ -1223,7 +1213,7 @@ void tcg_optimize(TCGContext *s)

>              if (arg_is_const(op->args[1])) {

>                  tmp = extract64(arg_info(op->args[1])->val,

>                                  op->args[2], op->args[3]);

> -                tcg_opt_gen_movi(s, op, op->args[0], tmp);

> +                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);

>                  break;

>              }

>              goto do_default;

> @@ -1232,7 +1222,7 @@ void tcg_optimize(TCGContext *s)

>              if (arg_is_const(op->args[1])) {

>                  tmp = sextract64(arg_info(op->args[1])->val,

>                                   op->args[2], op->args[3]);

> -                tcg_opt_gen_movi(s, op, op->args[0], tmp);

> +                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);

>                  break;

>              }

>              goto do_default;

> @@ -1249,7 +1239,7 @@ void tcg_optimize(TCGContext *s)

>                      tmp = (int32_t)(((uint32_t)v1 >> shr) |

>                                      ((uint32_t)v2 << (32 - shr)));

>                  }

> -                tcg_opt_gen_movi(s, op, op->args[0], tmp);

> +                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);

>                  break;

>              }

>              goto do_default;

> @@ -1258,7 +1248,7 @@ void tcg_optimize(TCGContext *s)

>              tmp = do_constant_folding_cond(opc, op->args[1],

>                                             op->args[2], op->args[3]);

>              if (tmp != 2) {

> -                tcg_opt_gen_movi(s, op, op->args[0], tmp);

> +                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);

>                  break;

>              }

>              goto do_default;

> @@ -1268,7 +1258,7 @@ void tcg_optimize(TCGContext *s)

>                                             op->args[1], op->args[2]);

>              if (tmp != 2) {

>                  if (tmp) {

> -                    bitmap_zero(temps_used.l, nb_temps);

> +                    memset(&temps_used, 0, sizeof(temps_used));

>                      op->opc = INDEX_op_br;

>                      op->args[0] = op->args[3];

>                  } else {

> @@ -1314,7 +1304,7 @@ void tcg_optimize(TCGContext *s)

>                  uint64_t a = ((uint64_t)ah << 32) | al;

>                  uint64_t b = ((uint64_t)bh << 32) | bl;

>                  TCGArg rl, rh;

> -                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);

> +                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);

>

>                  if (opc == INDEX_op_add2_i32) {

>                      a += b;

> @@ -1324,8 +1314,8 @@ void tcg_optimize(TCGContext *s)

>

>                  rl = op->args[0];

>                  rh = op->args[1];

> -                tcg_opt_gen_movi(s, op, rl, (int32_t)a);

> -                tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32));

> +                tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);

> +                tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));

>                  break;

>              }

>              goto do_default;

> @@ -1336,12 +1326,12 @@ void tcg_optimize(TCGContext *s)

>                  uint32_t b = arg_info(op->args[3])->val;

>                  uint64_t r = (uint64_t)a * b;

>                  TCGArg rl, rh;

> -                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);

> +                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);

>

>                  rl = op->args[0];

>                  rh = op->args[1];

> -                tcg_opt_gen_movi(s, op, rl, (int32_t)r);

> -                tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32));

> +                tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);

> +                tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));

>                  break;

>              }

>              goto do_default;

> @@ -1352,7 +1342,7 @@ void tcg_optimize(TCGContext *s)

>              if (tmp != 2) {

>                  if (tmp) {

>              do_brcond_true:

> -                    bitmap_zero(temps_used.l, nb_temps);

> +                    memset(&temps_used, 0, sizeof(temps_used));

>                      op->opc = INDEX_op_br;

>                      op->args[0] = op->args[5];

>                  } else {

> @@ -1368,7 +1358,7 @@ void tcg_optimize(TCGContext *s)

>                  /* Simplify LT/GE comparisons vs zero to a single compare

>                     vs the high word of the input.  */

>              do_brcond_high:

> -                bitmap_zero(temps_used.l, nb_temps);

> +                memset(&temps_used, 0, sizeof(temps_used));

>                  op->opc = INDEX_op_brcond_i32;

>                  op->args[0] = op->args[1];

>                  op->args[1] = op->args[3];

> @@ -1394,7 +1384,7 @@ void tcg_optimize(TCGContext *s)

>                      goto do_default;

>                  }

>              do_brcond_low:

> -                bitmap_zero(temps_used.l, nb_temps);

> +                memset(&temps_used, 0, sizeof(temps_used));

>                  op->opc = INDEX_op_brcond_i32;

>                  op->args[1] = op->args[2];

>                  op->args[2] = op->args[4];

> @@ -1429,7 +1419,7 @@ void tcg_optimize(TCGContext *s)

>                                              op->args[5]);

>              if (tmp != 2) {

>              do_setcond_const:

> -                tcg_opt_gen_movi(s, op, op->args[0], tmp);

> +                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);

>              } else if ((op->args[5] == TCG_COND_LT

>                          || op->args[5] == TCG_COND_GE)

>                         && arg_is_const(op->args[3])

> @@ -1514,7 +1504,7 @@ void tcg_optimize(TCGContext *s)

>                 block, otherwise we only trash the output args.  "mask" is

>                 the non-zero bits mask for the first output arg.  */

>              if (def->flags & TCG_OPF_BB_END) {

> -                bitmap_zero(temps_used.l, nb_temps);

> +                memset(&temps_used, 0, sizeof(temps_used));

>              } else {

>          do_reset_output:

>                  for (i = 0; i < nb_oargs; i++) {

> --

> 2.25.1

>

>
Richard Henderson Jan. 16, 2021, 5:24 p.m. UTC | #2
On 1/15/21 1:03 PM, Alistair Francis wrote:
> I run QEMU with these arguments:

> 

> ./build/riscv32-softmmu/qemu-system-riscv32 \

>     -machine virt -serial mon:stdio -serial null -nographic \

>     -append "root=/dev/vda rw highres=off  console=ttyS0 ip=dhcp earlycon=sbi" \

>     -device virtio-net-device,netdev=net0,mac=52:54:00:12:34:02

> -netdev user,id=net0 \

>     -object rng-random,filename=/dev/urandom,id=rng0 -device

> virtio-rng-device,rng=rng0 \

>     -smp 4 -d guest_errors -m 256M \

>     -kernel ./Image \

>     -drive id=disk0,file=./core-image-minimal-qemuriscv32.ext4,if=none,format=raw

> \

>     -device virtio-blk-device,drive=disk0 \

>     -bios default

> 

> I am uploading the images to:

> https://nextcloud.alistair23.me/index.php/s/MQFyGGNLPZjLZPH


I don't replicate the assertion failure, I get to

/sbin/init: error while loading shared libraries: libkmod.so.2: cannot open
shared object file: Error 74
[    0.819845] Kernel panic - not syncing: Attempted to kill init!
exitcode=0x00007f00
[    0.820430] CPU: 1 PID: 1 Comm: init Not tainted 5.11.0-rc3 #1


r~
Laurent Vivier Jan. 18, 2021, 8:17 p.m. UTC | #3
On 16/01/2021 18:24, Richard Henderson wrote:
> On 1/15/21 1:03 PM, Alistair Francis wrote:

>> I run QEMU with these arguments:

>>

>> ./build/riscv32-softmmu/qemu-system-riscv32 \

>>     -machine virt -serial mon:stdio -serial null -nographic \

>>     -append "root=/dev/vda rw highres=off  console=ttyS0 ip=dhcp earlycon=sbi" \

>>     -device virtio-net-device,netdev=net0,mac=52:54:00:12:34:02

>> -netdev user,id=net0 \

>>     -object rng-random,filename=/dev/urandom,id=rng0 -device

>> virtio-rng-device,rng=rng0 \

>>     -smp 4 -d guest_errors -m 256M \

>>     -kernel ./Image \

>>     -drive id=disk0,file=./core-image-minimal-qemuriscv32.ext4,if=none,format=raw

>> \

>>     -device virtio-blk-device,drive=disk0 \

>>     -bios default

>>

>> I am uploading the images to:

>> https://nextcloud.alistair23.me/index.php/s/MQFyGGNLPZjLZPH

> 

> I don't replicate the assertion failure, I get to

> 

> /sbin/init: error while loading shared libraries: libkmod.so.2: cannot open

> shared object file: Error 74

> [    0.819845] Kernel panic - not syncing: Attempted to kill init!

> exitcode=0x00007f00

> [    0.820430] CPU: 1 PID: 1 Comm: init Not tainted 5.11.0-rc3 #1


This commit breaks the build of my hello world test program with mips64el/stretch guest
(and I guess some others too).

cat > $CHROOT/tmp/hello.c <<EOF
#include <stdio.h>
int main(void)
{
    printf("Hello World!\n");
    return 0;
}
EOF

unshare --time --ipc --uts --pid --fork --kill-child --mount --mount-proc --root \
        $CHROOT gcc /tmp/hello.c -o /tmp/hello
/tmp/hello.c:1:0: internal compiler error: Segmentation fault
 #include <stdio.h>

executable file is not ELF
Please submit a full bug report,
with preprocessed source if appropriate.
See <file:///usr/share/doc/gcc-6/README.Bugs> for instructions.

# gcc --version
gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516
Copyright (C) 2016 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

Any idea?

Thanks,
Laurent
Richard Henderson Jan. 18, 2021, 9:03 p.m. UTC | #4
On 1/18/21 10:17 AM, Laurent Vivier wrote:
> This commit breaks the build of my hello world test program with mips64el/stretch guest

> (and I guess some others too).

> 

> cat > $CHROOT/tmp/hello.c <<EOF

> #include <stdio.h>

> int main(void)

> {

>     printf("Hello World!\n");

>     return 0;

> }

> EOF

> 

> unshare --time --ipc --uts --pid --fork --kill-child --mount --mount-proc --root \

>         $CHROOT gcc /tmp/hello.c -o /tmp/hello

> /tmp/hello.c:1:0: internal compiler error: Segmentation fault

>  #include <stdio.h>

> 

> executable file is not ELF

> Please submit a full bug report,

> with preprocessed source if appropriate.

> See <file:///usr/share/doc/gcc-6/README.Bugs> for instructions.

> 

> # gcc --version

> gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516

> Copyright (C) 2016 Free Software Foundation, Inc.

> This is free software; see the source for copying conditions.  There is NO

> warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

> 

> Any idea?


Working on it:

https://bugs.launchpad.net/bugs/1912065

There's a temp hack in there that may work for you.  With no change, you'll see
an assert instead of a segv if you --enable-debug-tcg.


r~
Richard W.M. Jones Feb. 1, 2021, 8:45 p.m. UTC | #5
This commit breaks running certain s390x binaries, at least
the "mount" command (or a library it uses) breaks.

More details in this BZ:

https://bugzilla.redhat.com/show_bug.cgi?id=1922248

Could we revert this change since it seems to have caused other
problems as well?

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
Fedora Windows cross-compiler. Compile Windows programs, test, and
build Windows installers. Over 100 libraries supported.
http://fedoraproject.org/wiki/MinGW
Richard W.M. Jones Feb. 1, 2021, 8:53 p.m. UTC | #6
On Mon, Feb 01, 2021 at 08:45:47PM +0000, Richard W.M. Jones wrote:
> This commit breaks running certain s390x binaries, at least

> the "mount" command (or a library it uses) breaks.

> 

> More details in this BZ:

> 

> https://bugzilla.redhat.com/show_bug.cgi?id=1922248

> 

> Could we revert this change since it seems to have caused other

> problems as well?


BTW I think the problem I am seeing is a bit different from the other
ones that were reported.  This commit causes the guest binary to
malfunction.  qemu itself does not emit any warning, nor crash.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
virt-p2v converts physical machines to virtual machines.  Boot with a
live CD or over the network (PXE) and turn machines into KVM guests.
http://libguestfs.org/virt-v2v
Richard Henderson Feb. 4, 2021, 2:22 a.m. UTC | #7
On 2/1/21 10:45 AM, Richard W.M. Jones wrote:
> This commit breaks running certain s390x binaries, at least

> the "mount" command (or a library it uses) breaks.

> 

> More details in this BZ:

> 

> https://bugzilla.redhat.com/show_bug.cgi?id=1922248

> 

> Could we revert this change since it seems to have caused other

> problems as well?


Well, the other problems have been fixed (which were in fact latent, and could
have been produced by other means).  I would not like to sideline this patch
set indefinitely.

Could you give me some help extracting the relevant binaries?  "Begin with an
s390x host" is a non-starter.

FWIW, with qemu-system-s390x, booting debian, building qemu-s390x, and running
"/bin/mount -t proc proc /mnt" under double-emulation does not show the bug.

I suspect that's because debian targets a relatively old s390x cpu, and that
yours is using the relatively new vector instructions.  But I don't know.

What I do know is that current qemu doesn't seem to boot current fedora:

 $ ../bld/qemu-system-s390x -nographic -m 4G -cpu max -drive
file=Fedora-Server-netinst-s390x-33-1.2.iso,format=raw,if=virtio
 qemu-system-s390x: warning: 'msa5-base' requires 'kimd-sha-512'.
 qemu-system-s390x: warning: 'msa5-base' requires 'klmd-sha-512'.
 LOADPARM=[        ]
 Using virtio-blk.
 ISO boot image size verified

 KASLR disabled: CPU has no PRNG
 Linux version 5.8.15-301.fc33.s390x
(mockbuild@buildvm-s390x-07.s390.fedoraproject.org) 1 SMP Thu Oct 15 15:55:57
UTC 2020Kernel fault: interruption code 0005 ilc:2
 PSW : 0000200180000000 00000000000124c4
       R:0 T:0 IO:0 EX:0 Key:0 M:0 W:0 P:0 AS:0 CC:2 PM:0 RI:0 EA:3
 GPRS: 0000000000000000 0000000b806a2da6 7aa19c5cbb980703 95f62d65812b83ab
       d5e42882af203615 0000000b806a2da0 0000000000000000 0000000000000000
       00000000000230e8 0000000001438500 0000000001720320 0000000000000000
       0000000001442718 0000000000010070 0000000000012482 000000000000bf20

Which makes me thing that fedora 33 is now targeting a cpu that is too new and
not actually supported by tcg.


r~
David Hildenbrand Feb. 4, 2021, 6:41 a.m. UTC | #8
> Am 04.02.2021 um 03:22 schrieb Richard Henderson <richard.henderson@linaro.org>:

> 

> On 2/1/21 10:45 AM, Richard W.M. Jones wrote:

>> This commit breaks running certain s390x binaries, at least

>> the "mount" command (or a library it uses) breaks.

>> 

>> More details in this BZ:

>> 

>> https://bugzilla.redhat.com/show_bug.cgi?id=1922248

>> 

>> Could we revert this change since it seems to have caused other

>> problems as well?

> 

> Well, the other problems have been fixed (which were in fact latent, and could

> have been produced by other means).  I would not like to sideline this patch

> set indefinitely.

> 

> Could you give me some help extracting the relevant binaries?  "Begin with an

> s390x host" is a non-starter.

> 


Hi,

I‘m planning on reproducing it today or tomorrow. Especially, finding a reproducer and trying reproducing on x86-64 host.

> FWIW, with qemu-system-s390x, booting debian, building qemu-s390x, and running

> "/bin/mount -t proc proc /mnt" under double-emulation does not show the bug.

> 

> I suspect that's because debian targets a relatively old s390x cpu, and that

> yours is using the relatively new vector instructions.  But I don't know.

> 

> What I do know is that current qemu doesn't seem to boot current fedora:

> 

> $ ../bld/qemu-system-s390x -nographic -m 4G -cpu max -drive

> file=Fedora-Server-netinst-s390x-33-1.2.iso,format=raw,if=virtio

> qemu-system-s390x: warning: 'msa5-base' requires 'kimd-sha-512'.

> qemu-system-s390x: warning: 'msa5-base' requires 'klmd-sha-512'.

> LOADPARM=[        ]

> Using virtio-blk.

> ISO boot image size verified

> 

> KASLR disabled: CPU has no PRNG

> Linux version 5.8.15-301.fc33.s390x

> (mockbuild@buildvm-s390x-07.s390.fedoraproject.org) 1 SMP Thu Oct 15 15:55:57

> UTC 2020Kernel fault: interruption code 0005 ilc:2

> PSW : 0000200180000000 00000000000124c4

>       R:0 T:0 IO:0 EX:0 Key:0 M:0 W:0 P:0 AS:0 CC:2 PM:0 RI:0 EA:3

> GPRS: 0000000000000000 0000000b806a2da6 7aa19c5cbb980703 95f62d65812b83ab

>       d5e42882af203615 0000000b806a2da0 0000000000000000 0000000000000000

>       00000000000230e8 0000000001438500 0000000001720320 0000000000000000

>       0000000001442718 0000000000010070 0000000000012482 000000000000bf20

> 

> Which makes me thing that fedora 33 is now targeting a cpu that is too new and

> not actually supported by tcg.

> 


Try rawhide instead, that worked when testing the clang build fixes. Alternatively, boot F33 via kernel and initrd.

The Fedora 33 iso is broken and cannot boot under KVM as well (the combined kernel+initrd file is messed up).

Cheers!

> 

> r~

>
David Hildenbrand Feb. 4, 2021, 7:55 a.m. UTC | #9
On 04.02.21 07:41, David Hildenbrand wrote:
> 

>> Am 04.02.2021 um 03:22 schrieb Richard Henderson <richard.henderson@linaro.org>:

>>

>> On 2/1/21 10:45 AM, Richard W.M. Jones wrote:

>>> This commit breaks running certain s390x binaries, at least

>>> the "mount" command (or a library it uses) breaks.

>>>

>>> More details in this BZ:

>>>

>>> https://bugzilla.redhat.com/show_bug.cgi?id=1922248

>>>

>>> Could we revert this change since it seems to have caused other

>>> problems as well?

>>

>> Well, the other problems have been fixed (which were in fact latent, and could

>> have been produced by other means).  I would not like to sideline this patch

>> set indefinitely.

>>

>> Could you give me some help extracting the relevant binaries?  "Begin with an

>> s390x host" is a non-starter.

>>

> 

> Hi,

> 

> I‘m planning on reproducing it today or tomorrow. Especially, finding a reproducer and trying reproducing on x86-64 host.


FWIW, on an x86-64 host, I can boot F32, Fedora rawhide, and RHEL8.X 
just fine from qcow2 (so "mount" seems to work in that environment as 
expected). Maybe it's really s390x-host specific? I'll give it a try.

-- 
Thanks,

David / dhildenb
David Hildenbrand Feb. 4, 2021, 8:38 a.m. UTC | #10
On 04.02.21 08:55, David Hildenbrand wrote:
> On 04.02.21 07:41, David Hildenbrand wrote:

>>

>>> Am 04.02.2021 um 03:22 schrieb Richard Henderson <richard.henderson@linaro.org>:

>>>

>>> On 2/1/21 10:45 AM, Richard W.M. Jones wrote:

>>>> This commit breaks running certain s390x binaries, at least

>>>> the "mount" command (or a library it uses) breaks.

>>>>

>>>> More details in this BZ:

>>>>

>>>> https://bugzilla.redhat.com/show_bug.cgi?id=1922248

>>>>

>>>> Could we revert this change since it seems to have caused other

>>>> problems as well?

>>>

>>> Well, the other problems have been fixed (which were in fact latent, and could

>>> have been produced by other means).  I would not like to sideline this patch

>>> set indefinitely.

>>>

>>> Could you give me some help extracting the relevant binaries?  "Begin with an

>>> s390x host" is a non-starter.

>>>

>>

>> Hi,

>>

>> I‘m planning on reproducing it today or tomorrow. Especially, finding a reproducer and trying reproducing on x86-64 host.

> 

> FWIW, on an x86-64 host, I can boot F32, Fedora rawhide, and RHEL8.X

> just fine from qcow2 (so "mount" seems to work in that environment as

> expected). Maybe it's really s390x-host specific? I'll give it a try.

> 


F33 qcow2 [1] fails booting on an s390x/TCG host.

I tried "-cpu qemu" and "-qemu qemu=vx=off". The same image boots on 
x86-64/TCG host just fine.


With

commit 8f17a975e60b773d7c366a81c0d9bbe304f30859
Author: Richard Henderson <richard.henderson@linaro.org>
Date:   Mon Mar 30 19:52:02 2020 -0700

     tcg/optimize: Adjust TempOptInfo allocation

The image boots just fine on s390x/TCG as well.


[1] 
https://dl.fedoraproject.org/pub/fedora-secondary/releases/33/Cloud/s390x/images/Fedora-Cloud-Base-33-1.2.s390x.qcow2

-- 
Thanks,

David / dhildenb
Richard W.M. Jones Feb. 4, 2021, 9:03 a.m. UTC | #11
On Thu, Feb 04, 2021 at 09:38:45AM +0100, David Hildenbrand wrote:
> On 04.02.21 08:55, David Hildenbrand wrote:

> >On 04.02.21 07:41, David Hildenbrand wrote:

> >>

> >>>Am 04.02.2021 um 03:22 schrieb Richard Henderson <richard.henderson@linaro.org>:

> >>>

> >>>On 2/1/21 10:45 AM, Richard W.M. Jones wrote:

> >>>>This commit breaks running certain s390x binaries, at least

> >>>>the "mount" command (or a library it uses) breaks.

> >>>>

> >>>>More details in this BZ:

> >>>>

> >>>>https://bugzilla.redhat.com/show_bug.cgi?id=1922248

> >>>>

> >>>>Could we revert this change since it seems to have caused other

> >>>>problems as well?

> >>>

> >>>Well, the other problems have been fixed (which were in fact latent, and could

> >>>have been produced by other means).  I would not like to sideline this patch

> >>>set indefinitely.

> >>>

> >>>Could you give me some help extracting the relevant binaries?  "Begin with an

> >>>s390x host" is a non-starter.

> >>>

> >>

> >>Hi,

> >>

> >>I‘m planning on reproducing it today or tomorrow. Especially, finding a reproducer and trying reproducing on x86-64 host.

> >

> >FWIW, on an x86-64 host, I can boot F32, Fedora rawhide, and RHEL8.X

> >just fine from qcow2 (so "mount" seems to work in that environment as

> >expected). Maybe it's really s390x-host specific? I'll give it a try.

> >

> 

> F33 qcow2 [1] fails booting on an s390x/TCG host.


What did the failure look like?

> I tried "-cpu qemu" and "-qemu qemu=vx=off". The same image boots on

> x86-64/TCG host just fine.

> 

> 

> With

> 

> commit 8f17a975e60b773d7c366a81c0d9bbe304f30859

> Author: Richard Henderson <richard.henderson@linaro.org>

> Date:   Mon Mar 30 19:52:02 2020 -0700

> 

>     tcg/optimize: Adjust TempOptInfo allocation

> 

> The image boots just fine on s390x/TCG as well.


Let me try this in a minute on my original test machine.

Rich.

> 

> [1] https://dl.fedoraproject.org/pub/fedora-secondary/releases/33/Cloud/s390x/images/Fedora-Cloud-Base-33-1.2.s390x.qcow2

> 

> -- 

> Thanks,

> 

> David / dhildenb


-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
virt-top is 'top' for virtual machines.  Tiny program with many
powerful monitoring features, net stats, disk stats, logging, etc.
http://people.redhat.com/~rjones/virt-top
David Hildenbrand Feb. 4, 2021, 9:12 a.m. UTC | #12
On 04.02.21 10:03, Richard W.M. Jones wrote:
> On Thu, Feb 04, 2021 at 09:38:45AM +0100, David Hildenbrand wrote:

>> On 04.02.21 08:55, David Hildenbrand wrote:

>>> On 04.02.21 07:41, David Hildenbrand wrote:

>>>>

>>>>> Am 04.02.2021 um 03:22 schrieb Richard Henderson <richard.henderson@linaro.org>:

>>>>>

>>>>> On 2/1/21 10:45 AM, Richard W.M. Jones wrote:

>>>>>> This commit breaks running certain s390x binaries, at least

>>>>>> the "mount" command (or a library it uses) breaks.

>>>>>>

>>>>>> More details in this BZ:

>>>>>>

>>>>>> https://bugzilla.redhat.com/show_bug.cgi?id=1922248

>>>>>>

>>>>>> Could we revert this change since it seems to have caused other

>>>>>> problems as well?

>>>>>

>>>>> Well, the other problems have been fixed (which were in fact latent, and could

>>>>> have been produced by other means).  I would not like to sideline this patch

>>>>> set indefinitely.

>>>>>

>>>>> Could you give me some help extracting the relevant binaries?  "Begin with an

>>>>> s390x host" is a non-starter.

>>>>>

>>>>

>>>> Hi,

>>>>

>>>> I‘m planning on reproducing it today or tomorrow. Especially, finding a reproducer and trying reproducing on x86-64 host.

>>>

>>> FWIW, on an x86-64 host, I can boot F32, Fedora rawhide, and RHEL8.X

>>> just fine from qcow2 (so "mount" seems to work in that environment as

>>> expected). Maybe it's really s390x-host specific? I'll give it a try.

>>>

>>

>> F33 qcow2 [1] fails booting on an s390x/TCG host.

> 

> What did the failure look like?


It starts booting just fine until

[   10.869011] Core dump to |/bin/false pipe failed
[   10.915968] systemd[1]: Finished Create list of static device nodes for the current kernel.
[   10.946424] systemd[1]: systemd-journald.service: Main process exited, code=killed, status=31/SYS
[   10.966677] systemd[1]: systemd-journald.service: Failed with result 'signal'.
[   11.017545] systemd[1]: Failed to start Journal Service.
[FAILED] Failed to start Journal Service.
See 'systemctl status systemd-journald.service' for details.

which repeats a couple of times. Then things go nuts

[   32.488899] systemd[1]: Failed to start Rule-based Manager for Device Events and Files.
[FAILED] Failed to start Rule-based…r for Device Events and Files.
See 'systemctl status systemd-udevd.service' for details.
[   32.501449] systemd[1]: systemd-udevd.service: Scheduled restart job, restart counter is at 1.
[   32.502134] systemd[1]: Stopped Rule-based Manager for Device Events and Files.


Looks also related to /dev / udev.

> 

>> I tried "-cpu qemu" and "-qemu qemu=vx=off". The same image boots on

>> x86-64/TCG host just fine.

>>

>>

>> With

>>

>> commit 8f17a975e60b773d7c366a81c0d9bbe304f30859

>> Author: Richard Henderson <richard.henderson@linaro.org>

>> Date:   Mon Mar 30 19:52:02 2020 -0700

>>

>>      tcg/optimize: Adjust TempOptInfo allocation

>>

>> The image boots just fine on s390x/TCG as well.

> 

> Let me try this in a minute on my original test machine.


That's the commit exactly before the problematic one (didn't want to mess with reverts for now).

-- 
Thanks,

David / dhildenb
Richard W.M. Jones Feb. 4, 2021, 9:29 a.m. UTC | #13
> > commit 8f17a975e60b773d7c366a81c0d9bbe304f30859

> > Author: Richard Henderson <richard.henderson@linaro.org>

> > Date:   Mon Mar 30 19:52:02 2020 -0700

> > 

> >     tcg/optimize: Adjust TempOptInfo allocation

> > 

> > The image boots just fine on s390x/TCG as well.

> 

> Let me try this in a minute on my original test machine.


I got the wrong end of the stick as David pointed out in the other email.

However I did test things again this morning (all on s390 host), and
current head (1ed9228f63ea4b) fails same as before ("mount" command
fails).

Also I downloaded:

  https://dl.fedoraproject.org/pub/fedora-secondary/releases/33/Cloud/s390x/images/Fedora-Cloud-Base-33-1.2.s390x.qcow2

and booted it on 1ed9228f63ea4b using this command:

  $ ~/d/qemu/build/s390x-softmmu/qemu-system-s390x -machine accel=tcg -m 2048 -drive file=Fedora-Cloud-Base-33-1.2.s390x.qcow2,format=qcow2,if=virtio -serial stdio

Lots of core dumps inside the guest, same as David saw.

I then reset qemu back to 8f17a975e60b773d ("tcg/optimize: Adjust
TempOptInfo allocation"), rebuilt qemu, tested the same command and
cloud image, and that booted up much happier with no failures or core
dumps.

Isn't it kind of weird that this would only affect an s390 host?  I
don't understand why the host would make a difference if we're doing
TCG.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
virt-df lists disk usage of guests without needing to install any
software inside the virtual machine.  Supports Linux and Windows.
http://people.redhat.com/~rjones/virt-df/
Richard W.M. Jones Feb. 4, 2021, 9:30 a.m. UTC | #14
I have this s390 machine for another 99 hours now, so if you want me
to test patches then send them my way.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
libguestfs lets you edit virtual machines.  Supports shell scripting,
bindings from many languages.  http://libguestfs.org
David Hildenbrand Feb. 4, 2021, 9:37 a.m. UTC | #15
On 04.02.21 10:29, Richard W.M. Jones wrote:
>>> commit 8f17a975e60b773d7c366a81c0d9bbe304f30859

>>> Author: Richard Henderson <richard.henderson@linaro.org>

>>> Date:   Mon Mar 30 19:52:02 2020 -0700

>>>

>>>      tcg/optimize: Adjust TempOptInfo allocation

>>>

>>> The image boots just fine on s390x/TCG as well.

>>

>> Let me try this in a minute on my original test machine.

> 

> I got the wrong end of the stick as David pointed out in the other email.

> 

> However I did test things again this morning (all on s390 host), and

> current head (1ed9228f63ea4b) fails same as before ("mount" command

> fails).

> 

> Also I downloaded:

> 

>    https://dl.fedoraproject.org/pub/fedora-secondary/releases/33/Cloud/s390x/images/Fedora-Cloud-Base-33-1.2.s390x.qcow2

> 

> and booted it on 1ed9228f63ea4b using this command:

> 

>    $ ~/d/qemu/build/s390x-softmmu/qemu-system-s390x -machine accel=tcg -m 2048 -drive file=Fedora-Cloud-Base-33-1.2.s390x.qcow2,format=qcow2,if=virtio -serial stdio

> 

> Lots of core dumps inside the guest, same as David saw.

> 

> I then reset qemu back to 8f17a975e60b773d ("tcg/optimize: Adjust

> TempOptInfo allocation"), rebuilt qemu, tested the same command and

> cloud image, and that booted up much happier with no failures or core

> dumps.

> 

> Isn't it kind of weird that this would only affect an s390 host?  I

> don't understand why the host would make a difference if we're doing

> TCG.


I assume an existing BUG in the s390x TCG backend ... which makes it 
harder to debug :)

-- 
Thanks,

David / dhildenb
Philippe Mathieu-Daudé Feb. 4, 2021, 4:04 p.m. UTC | #16
On 2/4/21 10:37 AM, David Hildenbrand wrote:
> On 04.02.21 10:29, Richard W.M. Jones wrote:

>>>> commit 8f17a975e60b773d7c366a81c0d9bbe304f30859

>>>> Author: Richard Henderson <richard.henderson@linaro.org>

>>>> Date:   Mon Mar 30 19:52:02 2020 -0700

>>>>

>>>>      tcg/optimize: Adjust TempOptInfo allocation

>>>>

>>>> The image boots just fine on s390x/TCG as well.

>>>

>>> Let me try this in a minute on my original test machine.

>>

>> I got the wrong end of the stick as David pointed out in the other email.

>>

>> However I did test things again this morning (all on s390 host), and

>> current head (1ed9228f63ea4b) fails same as before ("mount" command

>> fails).

>>

>> Also I downloaded:

>>

>>   

>> https://dl.fedoraproject.org/pub/fedora-secondary/releases/33/Cloud/s390x/images/Fedora-Cloud-Base-33-1.2.s390x.qcow2

>>

>>

>> and booted it on 1ed9228f63ea4b using this command:

>>

>>    $ ~/d/qemu/build/s390x-softmmu/qemu-system-s390x -machine accel=tcg

>> -m 2048 -drive

>> file=Fedora-Cloud-Base-33-1.2.s390x.qcow2,format=qcow2,if=virtio

>> -serial stdio

>>

>> Lots of core dumps inside the guest, same as David saw.

>>

>> I then reset qemu back to 8f17a975e60b773d ("tcg/optimize: Adjust

>> TempOptInfo allocation"), rebuilt qemu, tested the same command and

>> cloud image, and that booted up much happier with no failures or core

>> dumps.

>>

>> Isn't it kind of weird that this would only affect an s390 host?  I

>> don't understand why the host would make a difference if we're doing

>> TCG.

> 

> I assume an existing BUG in the s390x TCG backend ... which makes it

> harder to debug :)


This seems to fix it:

-- >8 --
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
index 8517e552323..32d03dbfbaf 100644
--- a/tcg/s390/tcg-target.c.inc
+++ b/tcg/s390/tcg-target.c.inc
@@ -1094,10 +1094,16 @@ static int tgen_cmp(TCGContext *s, TCGType type,
TCGCond c, TCGReg r1,
                 op = (is_unsigned ? RIL_CLFI : RIL_CFI);
                 tcg_out_insn_RIL(s, op, r1, c2);
                 goto exit;
-            } else if (c2 == (is_unsigned ? (uint32_t)c2 : (int32_t)c2)) {
-                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
-                tcg_out_insn_RIL(s, op, r1, c2);
-                goto exit;
+            } else if (is_unsigned) {
+                if (c2 == (uint32_t)c2) {
+                    tcg_out_insn_RIL(s, RIL_CLGFI, r1, c2);
+                    goto exit;
+                }
+            } else {
+                if (c2 == (int32_t)c2) {
+                    tcg_out_insn_RIL(s, RIL_CGFI, r1, c2);
+                    goto exit;
+                }
             }
         }
---
Philippe Mathieu-Daudé Feb. 4, 2021, 4:08 p.m. UTC | #17
On Thu, Feb 4, 2021 at 5:04 PM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote:
> On 2/4/21 10:37 AM, David Hildenbrand wrote:

> > On 04.02.21 10:29, Richard W.M. Jones wrote:

> >>>> commit 8f17a975e60b773d7c366a81c0d9bbe304f30859

> >>>> Author: Richard Henderson <richard.henderson@linaro.org>

> >>>> Date:   Mon Mar 30 19:52:02 2020 -0700

> >>>>

> >>>>      tcg/optimize: Adjust TempOptInfo allocation

> >>>>

> >>>> The image boots just fine on s390x/TCG as well.

> >>>

> >>> Let me try this in a minute on my original test machine.

> >>

> >> I got the wrong end of the stick as David pointed out in the other email.

> >>

> >> However I did test things again this morning (all on s390 host), and

> >> current head (1ed9228f63ea4b) fails same as before ("mount" command

> >> fails).

> >>

> >> Also I downloaded:

> >>

> >>

> >> https://dl.fedoraproject.org/pub/fedora-secondary/releases/33/Cloud/s390x/images/Fedora-Cloud-Base-33-1.2.s390x.qcow2

> >>

> >>

> >> and booted it on 1ed9228f63ea4b using this command:

> >>

> >>    $ ~/d/qemu/build/s390x-softmmu/qemu-system-s390x -machine accel=tcg

> >> -m 2048 -drive

> >> file=Fedora-Cloud-Base-33-1.2.s390x.qcow2,format=qcow2,if=virtio

> >> -serial stdio

> >>

> >> Lots of core dumps inside the guest, same as David saw.

> >>

> >> I then reset qemu back to 8f17a975e60b773d ("tcg/optimize: Adjust

> >> TempOptInfo allocation"), rebuilt qemu, tested the same command and

> >> cloud image, and that booted up much happier with no failures or core

> >> dumps.

> >>

> >> Isn't it kind of weird that this would only affect an s390 host?  I

> >> don't understand why the host would make a difference if we're doing

> >> TCG.

> >

> > I assume an existing BUG in the s390x TCG backend ... which makes it

> > harder to debug :)

>

> This seems to fix it:

>

> -- >8 --

> diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc

> index 8517e552323..32d03dbfbaf 100644

> --- a/tcg/s390/tcg-target.c.inc

> +++ b/tcg/s390/tcg-target.c.inc

> @@ -1094,10 +1094,16 @@ static int tgen_cmp(TCGContext *s, TCGType type,

> TCGCond c, TCGReg r1,

>                  op = (is_unsigned ? RIL_CLFI : RIL_CFI);

>                  tcg_out_insn_RIL(s, op, r1, c2);

>                  goto exit;

> -            } else if (c2 == (is_unsigned ? (uint32_t)c2 : (int32_t)c2)) {

> -                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);

> -                tcg_out_insn_RIL(s, op, r1, c2);

> -                goto exit;

> +            } else if (is_unsigned) {

> +                if (c2 == (uint32_t)c2) {

> +                    tcg_out_insn_RIL(s, RIL_CLGFI, r1, c2);


This path was working,

> +                    goto exit;

> +                }

> +            } else {

> +                if (c2 == (int32_t)c2) {

> +                    tcg_out_insn_RIL(s, RIL_CGFI, r1, c2);


but this one not ¯\_(ツ)_/¯

> +                    goto exit;

> +                }

>              }

>          }

> ---

>
David Hildenbrand Feb. 4, 2021, 4:12 p.m. UTC | #18
On 04.02.21 17:04, Philippe Mathieu-Daudé wrote:
> On 2/4/21 10:37 AM, David Hildenbrand wrote:

>> On 04.02.21 10:29, Richard W.M. Jones wrote:

>>>>> commit 8f17a975e60b773d7c366a81c0d9bbe304f30859

>>>>> Author: Richard Henderson <richard.henderson@linaro.org>

>>>>> Date:   Mon Mar 30 19:52:02 2020 -0700

>>>>>

>>>>>       tcg/optimize: Adjust TempOptInfo allocation

>>>>>

>>>>> The image boots just fine on s390x/TCG as well.

>>>>

>>>> Let me try this in a minute on my original test machine.

>>>

>>> I got the wrong end of the stick as David pointed out in the other email.

>>>

>>> However I did test things again this morning (all on s390 host), and

>>> current head (1ed9228f63ea4b) fails same as before ("mount" command

>>> fails).

>>>

>>> Also I downloaded:

>>>

>>>    

>>> https://dl.fedoraproject.org/pub/fedora-secondary/releases/33/Cloud/s390x/images/Fedora-Cloud-Base-33-1.2.s390x.qcow2

>>>

>>>

>>> and booted it on 1ed9228f63ea4b using this command:

>>>

>>>     $ ~/d/qemu/build/s390x-softmmu/qemu-system-s390x -machine accel=tcg

>>> -m 2048 -drive

>>> file=Fedora-Cloud-Base-33-1.2.s390x.qcow2,format=qcow2,if=virtio

>>> -serial stdio

>>>

>>> Lots of core dumps inside the guest, same as David saw.

>>>

>>> I then reset qemu back to 8f17a975e60b773d ("tcg/optimize: Adjust

>>> TempOptInfo allocation"), rebuilt qemu, tested the same command and

>>> cloud image, and that booted up much happier with no failures or core

>>> dumps.

>>>

>>> Isn't it kind of weird that this would only affect an s390 host?  I

>>> don't understand why the host would make a difference if we're doing

>>> TCG.

>>

>> I assume an existing BUG in the s390x TCG backend ... which makes it

>> harder to debug :)

> 

> This seems to fix it:

> 

> -- >8 --

> diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc

> index 8517e552323..32d03dbfbaf 100644

> --- a/tcg/s390/tcg-target.c.inc

> +++ b/tcg/s390/tcg-target.c.inc

> @@ -1094,10 +1094,16 @@ static int tgen_cmp(TCGContext *s, TCGType type,

> TCGCond c, TCGReg r1,

>                   op = (is_unsigned ? RIL_CLFI : RIL_CFI);

>                   tcg_out_insn_RIL(s, op, r1, c2);

>                   goto exit;

> -            } else if (c2 == (is_unsigned ? (uint32_t)c2 : (int32_t)c2)) {

> -                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);

> -                tcg_out_insn_RIL(s, op, r1, c2);

> -                goto exit;

> +            } else if (is_unsigned) {

> +                if (c2 == (uint32_t)c2) {

> +                    tcg_out_insn_RIL(s, RIL_CLGFI, r1, c2);

> +                    goto exit;

> +                }

> +            } else {

> +                if (c2 == (int32_t)c2) {

> +                    tcg_out_insn_RIL(s, RIL_CGFI, r1, c2);

> +                    goto exit;

> +                }

>               }

>           }


Indeed, seems to work. Thanks!

... will try to review :)

-- 
Thanks,

David / dhildenb
David Hildenbrand Feb. 4, 2021, 4:29 p.m. UTC | #19
On 04.02.21 17:04, Philippe Mathieu-Daudé wrote:
> On 2/4/21 10:37 AM, David Hildenbrand wrote:

>> On 04.02.21 10:29, Richard W.M. Jones wrote:

>>>>> commit 8f17a975e60b773d7c366a81c0d9bbe304f30859

>>>>> Author: Richard Henderson <richard.henderson@linaro.org>

>>>>> Date:   Mon Mar 30 19:52:02 2020 -0700

>>>>>

>>>>>       tcg/optimize: Adjust TempOptInfo allocation

>>>>>

>>>>> The image boots just fine on s390x/TCG as well.

>>>>

>>>> Let me try this in a minute on my original test machine.

>>>

>>> I got the wrong end of the stick as David pointed out in the other email.

>>>

>>> However I did test things again this morning (all on s390 host), and

>>> current head (1ed9228f63ea4b) fails same as before ("mount" command

>>> fails).

>>>

>>> Also I downloaded:

>>>

>>>    

>>> https://dl.fedoraproject.org/pub/fedora-secondary/releases/33/Cloud/s390x/images/Fedora-Cloud-Base-33-1.2.s390x.qcow2

>>>

>>>

>>> and booted it on 1ed9228f63ea4b using this command:

>>>

>>>     $ ~/d/qemu/build/s390x-softmmu/qemu-system-s390x -machine accel=tcg

>>> -m 2048 -drive

>>> file=Fedora-Cloud-Base-33-1.2.s390x.qcow2,format=qcow2,if=virtio

>>> -serial stdio

>>>

>>> Lots of core dumps inside the guest, same as David saw.

>>>

>>> I then reset qemu back to 8f17a975e60b773d ("tcg/optimize: Adjust

>>> TempOptInfo allocation"), rebuilt qemu, tested the same command and

>>> cloud image, and that booted up much happier with no failures or core

>>> dumps.

>>>

>>> Isn't it kind of weird that this would only affect an s390 host?  I

>>> don't understand why the host would make a difference if we're doing

>>> TCG.

>>

>> I assume an existing BUG in the s390x TCG backend ... which makes it

>> harder to debug :)

> 

> This seems to fix it:

> 

> -- >8 --

> diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc

> index 8517e552323..32d03dbfbaf 100644

> --- a/tcg/s390/tcg-target.c.inc

> +++ b/tcg/s390/tcg-target.c.inc

> @@ -1094,10 +1094,16 @@ static int tgen_cmp(TCGContext *s, TCGType type,

> TCGCond c, TCGReg r1,

>                   op = (is_unsigned ? RIL_CLFI : RIL_CFI);

>                   tcg_out_insn_RIL(s, op, r1, c2);

>                   goto exit;

> -            } else if (c2 == (is_unsigned ? (uint32_t)c2 : (int32_t)c2)) {

> -                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);

> -                tcg_out_insn_RIL(s, op, r1, c2);

> -                goto exit;

> +            } else if (is_unsigned) {

> +                if (c2 == (uint32_t)c2) {

> +                    tcg_out_insn_RIL(s, RIL_CLGFI, r1, c2);

> +                    goto exit;

> +                }

> +            } else {

> +                if (c2 == (int32_t)c2) {

> +                    tcg_out_insn_RIL(s, RIL_CGFI, r1, c2);

> +                    goto exit;

> +                }

>               }

>           }

> ---

> 


This works as well I think. Broken casting.

diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
index 8517e55232..f41ca02492 100644
--- a/tcg/s390/tcg-target.c.inc
+++ b/tcg/s390/tcg-target.c.inc
@@ -1094,7 +1094,7 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
                  op = (is_unsigned ? RIL_CLFI : RIL_CFI);
                  tcg_out_insn_RIL(s, op, r1, c2);
                  goto exit;
-            } else if (c2 == (is_unsigned ? (uint32_t)c2 : (int32_t)c2)) {
+            } else if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
                  op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);

(int32_t)c2 will be converted to (uint32_t) first, then to (TCGArg).
Which is different than casting directly from (int32_t)c2 to (TCGArg).

Nasty.


-- 
Thanks,

David / dhildenb
Richard W.M. Jones Feb. 4, 2021, 4:48 p.m. UTC | #20
On Thu, Feb 04, 2021 at 05:04:22PM +0100, Philippe Mathieu-Daudé wrote:
> On 2/4/21 10:37 AM, David Hildenbrand wrote:

> > On 04.02.21 10:29, Richard W.M. Jones wrote:

> >>>> commit 8f17a975e60b773d7c366a81c0d9bbe304f30859

> >>>> Author: Richard Henderson <richard.henderson@linaro.org>

> >>>> Date:   Mon Mar 30 19:52:02 2020 -0700

> >>>>

> >>>>      tcg/optimize: Adjust TempOptInfo allocation

> >>>>

> >>>> The image boots just fine on s390x/TCG as well.

> >>>

> >>> Let me try this in a minute on my original test machine.

> >>

> >> I got the wrong end of the stick as David pointed out in the other email.

> >>

> >> However I did test things again this morning (all on s390 host), and

> >> current head (1ed9228f63ea4b) fails same as before ("mount" command

> >> fails).

> >>

> >> Also I downloaded:

> >>

> >>   

> >> https://dl.fedoraproject.org/pub/fedora-secondary/releases/33/Cloud/s390x/images/Fedora-Cloud-Base-33-1.2.s390x.qcow2

> >>

> >>

> >> and booted it on 1ed9228f63ea4b using this command:

> >>

> >>    $ ~/d/qemu/build/s390x-softmmu/qemu-system-s390x -machine accel=tcg

> >> -m 2048 -drive

> >> file=Fedora-Cloud-Base-33-1.2.s390x.qcow2,format=qcow2,if=virtio

> >> -serial stdio

> >>

> >> Lots of core dumps inside the guest, same as David saw.

> >>

> >> I then reset qemu back to 8f17a975e60b773d ("tcg/optimize: Adjust

> >> TempOptInfo allocation"), rebuilt qemu, tested the same command and

> >> cloud image, and that booted up much happier with no failures or core

> >> dumps.

> >>

> >> Isn't it kind of weird that this would only affect an s390 host?  I

> >> don't understand why the host would make a difference if we're doing

> >> TCG.

> > 

> > I assume an existing BUG in the s390x TCG backend ... which makes it

> > harder to debug :)

> 

> This seems to fix it:

> 

> -- >8 --

> diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc

> index 8517e552323..32d03dbfbaf 100644

> --- a/tcg/s390/tcg-target.c.inc

> +++ b/tcg/s390/tcg-target.c.inc

> @@ -1094,10 +1094,16 @@ static int tgen_cmp(TCGContext *s, TCGType type,

> TCGCond c, TCGReg r1,

>                  op = (is_unsigned ? RIL_CLFI : RIL_CFI);

>                  tcg_out_insn_RIL(s, op, r1, c2);

>                  goto exit;

> -            } else if (c2 == (is_unsigned ? (uint32_t)c2 : (int32_t)c2)) {

> -                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);

> -                tcg_out_insn_RIL(s, op, r1, c2);

> -                goto exit;

> +            } else if (is_unsigned) {

> +                if (c2 == (uint32_t)c2) {

> +                    tcg_out_insn_RIL(s, RIL_CLGFI, r1, c2);

> +                    goto exit;

> +                }

> +            } else {

> +                if (c2 == (int32_t)c2) {

> +                    tcg_out_insn_RIL(s, RIL_CGFI, r1, c2);

> +                    goto exit;

> +                }

>              }

>          }


Thanks - can confirm this fixes both observed problems here.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
virt-builder quickly builds VMs from scratch
http://libguestfs.org/virt-builder.1.html
Eric Blake Feb. 4, 2021, 4:52 p.m. UTC | #21
On 2/4/21 10:04 AM, Philippe Mathieu-Daudé wrote:

>>> Isn't it kind of weird that this would only affect an s390 host?  I

>>> don't understand why the host would make a difference if we're doing

>>> TCG.

>>

>> I assume an existing BUG in the s390x TCG backend ... which makes it

>> harder to debug :)

> 

> This seems to fix it:

> 

> -- >8 --

> diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc

> index 8517e552323..32d03dbfbaf 100644

> --- a/tcg/s390/tcg-target.c.inc

> +++ b/tcg/s390/tcg-target.c.inc

> @@ -1094,10 +1094,16 @@ static int tgen_cmp(TCGContext *s, TCGType type,

> TCGCond c, TCGReg r1,

>                  op = (is_unsigned ? RIL_CLFI : RIL_CFI);

>                  tcg_out_insn_RIL(s, op, r1, c2);

>                  goto exit;

> -            } else if (c2 == (is_unsigned ? (uint32_t)c2 : (int32_t)c2)) {


In this code, you are comparing c2 to the type promotion of uint32_t and
int32_t.  That is, the conversion rules are as if you had done:

(common_type) c2 == (common_type) (uint32_t) (is_unsigned ? (uint32_t)c2
: (uint32_t)(int32_t)c2)

> -                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);

> -                tcg_out_insn_RIL(s, op, r1, c2);

> -                goto exit;

> +            } else if (is_unsigned) {

> +                if (c2 == (uint32_t)c2) {


whereas this is:

(common_type_unsigned)c2 == (common_type_unsigned)(uint32_t)c2

> +                    tcg_out_insn_RIL(s, RIL_CLGFI, r1, c2);

> +                    goto exit;

> +                }

> +            } else {

> +                if (c2 == (int32_t)c2) {


and this is:

(common_type_signed)c2 == (common_type_signed)(int32_t)c2

and the two may indeed use a different type.

I'm not sure (from the context shown here) what the type of c2 was, to
determine what the common types are, but as an example, on my 64-bit system,

(long)-1 == (int32_t)-1

is true (the 64-bit value (long)-1 is equal to the sign-extended 64-bit
value created from the signed 32-bit value (int32_t)-1), while

(unsigned long)-1 == (uint32_t)(int32_t)-1

is false (the 32-bit unsigned value (uint32_t) -1 promotes without sign
extension to the 64-bit (unsigned long) type, and 0xffffffffffffffff is
not equal to 0x00000000ffffffff).

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3226
Virtualization:  qemu.org | libvirt.org
Eric Blake Feb. 4, 2021, 4:57 p.m. UTC | #22
On 2/4/21 10:29 AM, David Hildenbrand wrote:

>> +++ b/tcg/s390/tcg-target.c.inc

>> @@ -1094,10 +1094,16 @@ static int tgen_cmp(TCGContext *s, TCGType type,

>> TCGCond c, TCGReg r1,

>>                   op = (is_unsigned ? RIL_CLFI : RIL_CFI);

>>                   tcg_out_insn_RIL(s, op, r1, c2);

>>                   goto exit;

>> -            } else if (c2 == (is_unsigned ? (uint32_t)c2 :

>> (int32_t)c2)) {

>> -                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);

>> -                tcg_out_insn_RIL(s, op, r1, c2);

>> -                goto exit;

>> +            } else if (is_unsigned) {

>> +                if (c2 == (uint32_t)c2) {

>> +                    tcg_out_insn_RIL(s, RIL_CLGFI, r1, c2);

>> +                    goto exit;

>> +                }

>> +            } else {

>> +                if (c2 == (int32_t)c2) {

>> +                    tcg_out_insn_RIL(s, RIL_CGFI, r1, c2);

>> +                    goto exit;

>> +                }

>>               }

>>           }

>> ---

>>

> 

> This works as well I think. Broken casting.

> 

> diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc

> index 8517e55232..f41ca02492 100644

> --- a/tcg/s390/tcg-target.c.inc

> +++ b/tcg/s390/tcg-target.c.inc

> @@ -1094,7 +1094,7 @@ static int tgen_cmp(TCGContext *s, TCGType type,

> TCGCond c, TCGReg r1,

>                  op = (is_unsigned ? RIL_CLFI : RIL_CFI);

>                  tcg_out_insn_RIL(s, op, r1, c2);

>                  goto exit;

> -            } else if (c2 == (is_unsigned ? (uint32_t)c2 : (int32_t)c2)) {

> +            } else if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 :

> (TCGArg)(int32_t)c2)) {


Yes, that also solves your problem in fewer lines of code, by doing the
round-trip parsing through the intermediate type and back to the desired
common type all at one expression, rather than separated at two
different points where intermediate type promotion rules interfere with
the work.

>                  op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);

> 

> (int32_t)c2 will be converted to (uint32_t) first, then to (TCGArg).

> Which is different than casting directly from (int32_t)c2 to (TCGArg).


Yep, the broken version was losing out on the desired sign extensions
because of the argument promotion rules of ?:

> 

> Nasty.

> 

> 


-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3226
Virtualization:  qemu.org | libvirt.org
Richard Henderson Feb. 4, 2021, 5:33 p.m. UTC | #23
On 2/4/21 6:29 AM, David Hildenbrand wrote:
> On 04.02.21 17:04, Philippe Mathieu-Daudé wrote:

>> On 2/4/21 10:37 AM, David Hildenbrand wrote:

>>> On 04.02.21 10:29, Richard W.M. Jones wrote:

>>>>>> commit 8f17a975e60b773d7c366a81c0d9bbe304f30859

>>>>>> Author: Richard Henderson <richard.henderson@linaro.org>

>>>>>> Date:   Mon Mar 30 19:52:02 2020 -0700

>>>>>>

>>>>>>       tcg/optimize: Adjust TempOptInfo allocation

>>>>>>

>>>>>> The image boots just fine on s390x/TCG as well.

>>>>>

>>>>> Let me try this in a minute on my original test machine.

>>>>

>>>> I got the wrong end of the stick as David pointed out in the other email.

>>>>

>>>> However I did test things again this morning (all on s390 host), and

>>>> current head (1ed9228f63ea4b) fails same as before ("mount" command

>>>> fails).

>>>>

>>>> Also I downloaded:

>>>>

>>>>   

>>>> https://dl.fedoraproject.org/pub/fedora-secondary/releases/33/Cloud/s390x/images/Fedora-Cloud-Base-33-1.2.s390x.qcow2

>>>>

>>>>

>>>>

>>>> and booted it on 1ed9228f63ea4b using this command:

>>>>

>>>>     $ ~/d/qemu/build/s390x-softmmu/qemu-system-s390x -machine accel=tcg

>>>> -m 2048 -drive

>>>> file=Fedora-Cloud-Base-33-1.2.s390x.qcow2,format=qcow2,if=virtio

>>>> -serial stdio

>>>>

>>>> Lots of core dumps inside the guest, same as David saw.

>>>>

>>>> I then reset qemu back to 8f17a975e60b773d ("tcg/optimize: Adjust

>>>> TempOptInfo allocation"), rebuilt qemu, tested the same command and

>>>> cloud image, and that booted up much happier with no failures or core

>>>> dumps.

>>>>

>>>> Isn't it kind of weird that this would only affect an s390 host?  I

>>>> don't understand why the host would make a difference if we're doing

>>>> TCG.

>>>

>>> I assume an existing BUG in the s390x TCG backend ... which makes it

>>> harder to debug :)

>>

>> This seems to fix it:

>>

>> -- >8 --

>> diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc

>> index 8517e552323..32d03dbfbaf 100644

>> --- a/tcg/s390/tcg-target.c.inc

>> +++ b/tcg/s390/tcg-target.c.inc

>> @@ -1094,10 +1094,16 @@ static int tgen_cmp(TCGContext *s, TCGType type,

>> TCGCond c, TCGReg r1,

>>                   op = (is_unsigned ? RIL_CLFI : RIL_CFI);

>>                   tcg_out_insn_RIL(s, op, r1, c2);

>>                   goto exit;

>> -            } else if (c2 == (is_unsigned ? (uint32_t)c2 : (int32_t)c2)) {

>> -                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);

>> -                tcg_out_insn_RIL(s, op, r1, c2);

>> -                goto exit;

>> +            } else if (is_unsigned) {

>> +                if (c2 == (uint32_t)c2) {

>> +                    tcg_out_insn_RIL(s, RIL_CLGFI, r1, c2);

>> +                    goto exit;

>> +                }

>> +            } else {

>> +                if (c2 == (int32_t)c2) {

>> +                    tcg_out_insn_RIL(s, RIL_CGFI, r1, c2);

>> +                    goto exit;

>> +                }

>>               }

>>           }

>> ---

>>

> 

> This works as well I think. Broken casting.

> 

> diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc

> index 8517e55232..f41ca02492 100644

> --- a/tcg/s390/tcg-target.c.inc

> +++ b/tcg/s390/tcg-target.c.inc

> @@ -1094,7 +1094,7 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond

> c, TCGReg r1,

>                  op = (is_unsigned ? RIL_CLFI : RIL_CFI);

>                  tcg_out_insn_RIL(s, op, r1, c2);

>                  goto exit;

> -            } else if (c2 == (is_unsigned ? (uint32_t)c2 : (int32_t)c2)) {

> +            } else if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 :

> (TCGArg)(int32_t)c2)) {

>                  op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);

> 

> (int32_t)c2 will be converted to (uint32_t) first, then to (TCGArg).

> Which is different than casting directly from (int32_t)c2 to (TCGArg).

> 

> Nasty.


Oh excellent.  Thanks for the find and analysis, guys.
I totally agree that this is a bug.

And it makes sense that extra constant propagation from the optimizer from the
patch in question might tickle this problem.



r~
diff mbox series

Patch

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 49bf1386c7..bda727d5ed 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -178,37 +178,6 @@  static bool args_are_copies(TCGArg arg1, TCGArg arg2)
     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
 }
 
-static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, uint64_t val)
-{
-    const TCGOpDef *def;
-    TCGOpcode new_op;
-    uint64_t mask;
-    TempOptInfo *di = arg_info(dst);
-
-    def = &tcg_op_defs[op->opc];
-    if (def->flags & TCG_OPF_VECTOR) {
-        new_op = INDEX_op_dupi_vec;
-    } else if (def->flags & TCG_OPF_64BIT) {
-        new_op = INDEX_op_movi_i64;
-    } else {
-        new_op = INDEX_op_movi_i32;
-    }
-    op->opc = new_op;
-    /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
-    op->args[0] = dst;
-    op->args[1] = val;
-
-    reset_temp(dst);
-    di->is_const = true;
-    di->val = val;
-    mask = val;
-    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
-        /* High bits of the destination are now garbage.  */
-        mask |= ~0xffffffffull;
-    }
-    di->mask = mask;
-}
-
 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
 {
     TCGTemp *dst_ts = arg_temp(dst);
@@ -259,6 +228,27 @@  static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
     }
 }
 
+static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
+                             TCGOp *op, TCGArg dst, uint64_t val)
+{
+    const TCGOpDef *def = &tcg_op_defs[op->opc];
+    TCGType type;
+    TCGTemp *tv;
+
+    if (def->flags & TCG_OPF_VECTOR) {
+        type = TCGOP_VECL(op) + TCG_TYPE_V64;
+    } else if (def->flags & TCG_OPF_64BIT) {
+        type = TCG_TYPE_I64;
+    } else {
+        type = TCG_TYPE_I32;
+    }
+
+    /* Convert movi to mov with constant temp. */
+    tv = tcg_constant_internal(type, val);
+    init_ts_info(temps_used, tv);
+    tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
+}
+
 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
 {
     uint64_t l64, h64;
@@ -622,7 +612,7 @@  void tcg_optimize(TCGContext *s)
     nb_temps = s->nb_temps;
     nb_globals = s->nb_globals;
 
-    bitmap_zero(temps_used.l, nb_temps);
+    memset(&temps_used, 0, sizeof(temps_used));
     for (i = 0; i < nb_temps; ++i) {
         s->temps[i].state_ptr = NULL;
     }
@@ -727,7 +717,7 @@  void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(rotr):
             if (arg_is_const(op->args[1])
                 && arg_info(op->args[1])->val == 0) {
-                tcg_opt_gen_movi(s, op, op->args[0], 0);
+                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
                 continue;
             }
             break;
@@ -1054,7 +1044,7 @@  void tcg_optimize(TCGContext *s)
 
         if (partmask == 0) {
             tcg_debug_assert(nb_oargs == 1);
-            tcg_opt_gen_movi(s, op, op->args[0], 0);
+            tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
             continue;
         }
         if (affected == 0) {
@@ -1071,7 +1061,7 @@  void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(mulsh):
             if (arg_is_const(op->args[2])
                 && arg_info(op->args[2])->val == 0) {
-                tcg_opt_gen_movi(s, op, op->args[0], 0);
+                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
                 continue;
             }
             break;
@@ -1098,7 +1088,7 @@  void tcg_optimize(TCGContext *s)
         CASE_OP_32_64_VEC(sub):
         CASE_OP_32_64_VEC(xor):
             if (args_are_copies(op->args[1], op->args[2])) {
-                tcg_opt_gen_movi(s, op, op->args[0], 0);
+                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
                 continue;
             }
             break;
@@ -1115,14 +1105,14 @@  void tcg_optimize(TCGContext *s)
             break;
         CASE_OP_32_64(movi):
         case INDEX_op_dupi_vec:
-            tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
+            tcg_opt_gen_movi(s, &temps_used, op, op->args[0], op->args[1]);
             break;
 
         case INDEX_op_dup_vec:
             if (arg_is_const(op->args[1])) {
                 tmp = arg_info(op->args[1])->val;
                 tmp = dup_const(TCGOP_VECE(op), tmp);
-                tcg_opt_gen_movi(s, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -1132,7 +1122,7 @@  void tcg_optimize(TCGContext *s)
             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
                 tmp = arg_info(op->args[1])->val;
                 if (tmp == arg_info(op->args[2])->val) {
-                    tcg_opt_gen_movi(s, op, op->args[0], tmp);
+                    tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
                     break;
                 }
             } else if (args_are_copies(op->args[1], op->args[2])) {
@@ -1160,7 +1150,7 @@  void tcg_optimize(TCGContext *s)
         case INDEX_op_extrh_i64_i32:
             if (arg_is_const(op->args[1])) {
                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
-                tcg_opt_gen_movi(s, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -1190,7 +1180,7 @@  void tcg_optimize(TCGContext *s)
             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
                                           arg_info(op->args[2])->val);
-                tcg_opt_gen_movi(s, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -1201,7 +1191,7 @@  void tcg_optimize(TCGContext *s)
                 TCGArg v = arg_info(op->args[1])->val;
                 if (v != 0) {
                     tmp = do_constant_folding(opc, v, 0);
-                    tcg_opt_gen_movi(s, op, op->args[0], tmp);
+                    tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
                 } else {
                     tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
                 }
@@ -1214,7 +1204,7 @@  void tcg_optimize(TCGContext *s)
                 tmp = deposit64(arg_info(op->args[1])->val,
                                 op->args[3], op->args[4],
                                 arg_info(op->args[2])->val);
-                tcg_opt_gen_movi(s, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -1223,7 +1213,7 @@  void tcg_optimize(TCGContext *s)
             if (arg_is_const(op->args[1])) {
                 tmp = extract64(arg_info(op->args[1])->val,
                                 op->args[2], op->args[3]);
-                tcg_opt_gen_movi(s, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -1232,7 +1222,7 @@  void tcg_optimize(TCGContext *s)
             if (arg_is_const(op->args[1])) {
                 tmp = sextract64(arg_info(op->args[1])->val,
                                  op->args[2], op->args[3]);
-                tcg_opt_gen_movi(s, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -1249,7 +1239,7 @@  void tcg_optimize(TCGContext *s)
                     tmp = (int32_t)(((uint32_t)v1 >> shr) |
                                     ((uint32_t)v2 << (32 - shr)));
                 }
-                tcg_opt_gen_movi(s, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -1258,7 +1248,7 @@  void tcg_optimize(TCGContext *s)
             tmp = do_constant_folding_cond(opc, op->args[1],
                                            op->args[2], op->args[3]);
             if (tmp != 2) {
-                tcg_opt_gen_movi(s, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -1268,7 +1258,7 @@  void tcg_optimize(TCGContext *s)
                                            op->args[1], op->args[2]);
             if (tmp != 2) {
                 if (tmp) {
-                    bitmap_zero(temps_used.l, nb_temps);
+                    memset(&temps_used, 0, sizeof(temps_used));
                     op->opc = INDEX_op_br;
                     op->args[0] = op->args[3];
                 } else {
@@ -1314,7 +1304,7 @@  void tcg_optimize(TCGContext *s)
                 uint64_t a = ((uint64_t)ah << 32) | al;
                 uint64_t b = ((uint64_t)bh << 32) | bl;
                 TCGArg rl, rh;
-                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
+                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
 
                 if (opc == INDEX_op_add2_i32) {
                     a += b;
@@ -1324,8 +1314,8 @@  void tcg_optimize(TCGContext *s)
 
                 rl = op->args[0];
                 rh = op->args[1];
-                tcg_opt_gen_movi(s, op, rl, (int32_t)a);
-                tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32));
+                tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);
+                tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));
                 break;
             }
             goto do_default;
@@ -1336,12 +1326,12 @@  void tcg_optimize(TCGContext *s)
                 uint32_t b = arg_info(op->args[3])->val;
                 uint64_t r = (uint64_t)a * b;
                 TCGArg rl, rh;
-                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
+                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
 
                 rl = op->args[0];
                 rh = op->args[1];
-                tcg_opt_gen_movi(s, op, rl, (int32_t)r);
-                tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32));
+                tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);
+                tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));
                 break;
             }
             goto do_default;
@@ -1352,7 +1342,7 @@  void tcg_optimize(TCGContext *s)
             if (tmp != 2) {
                 if (tmp) {
             do_brcond_true:
-                    bitmap_zero(temps_used.l, nb_temps);
+                    memset(&temps_used, 0, sizeof(temps_used));
                     op->opc = INDEX_op_br;
                     op->args[0] = op->args[5];
                 } else {
@@ -1368,7 +1358,7 @@  void tcg_optimize(TCGContext *s)
                 /* Simplify LT/GE comparisons vs zero to a single compare
                    vs the high word of the input.  */
             do_brcond_high:
-                bitmap_zero(temps_used.l, nb_temps);
+                memset(&temps_used, 0, sizeof(temps_used));
                 op->opc = INDEX_op_brcond_i32;
                 op->args[0] = op->args[1];
                 op->args[1] = op->args[3];
@@ -1394,7 +1384,7 @@  void tcg_optimize(TCGContext *s)
                     goto do_default;
                 }
             do_brcond_low:
-                bitmap_zero(temps_used.l, nb_temps);
+                memset(&temps_used, 0, sizeof(temps_used));
                 op->opc = INDEX_op_brcond_i32;
                 op->args[1] = op->args[2];
                 op->args[2] = op->args[4];
@@ -1429,7 +1419,7 @@  void tcg_optimize(TCGContext *s)
                                             op->args[5]);
             if (tmp != 2) {
             do_setcond_const:
-                tcg_opt_gen_movi(s, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
             } else if ((op->args[5] == TCG_COND_LT
                         || op->args[5] == TCG_COND_GE)
                        && arg_is_const(op->args[3])
@@ -1514,7 +1504,7 @@  void tcg_optimize(TCGContext *s)
                block, otherwise we only trash the output args.  "mask" is
                the non-zero bits mask for the first output arg.  */
             if (def->flags & TCG_OPF_BB_END) {
-                bitmap_zero(temps_used.l, nb_temps);
+                memset(&temps_used, 0, sizeof(temps_used));
             } else {
         do_reset_output:
                 for (i = 0; i < nb_oargs; i++) {