diff mbox series

[PULL,08/10] target/arm: Conditionalize some asserts on aarch32 support

Message ID 20181102171638.24069-9-peter.maydell@linaro.org
State Not Applicable
Headers show
Series target-arm queue | expand

Commit Message

Peter Maydell Nov. 2, 2018, 5:16 p.m. UTC
From: Richard Henderson <richard.henderson@linaro.org>


When populating id registers from kvm, on a host that doesn't support
aarch32 mode at all, neither arm_div nor jazelle will be supported either.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

Tested-by: Alex Bennée <alex.bennee@linaro.org>

Message-id: 20181102102025.3546-1-richard.henderson@linaro.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

---
 target/arm/cpu.h |  5 +++++
 target/arm/cpu.c | 15 +++++++++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

-- 
2.19.1

Comments

Laszlo Ersek May 24, 2019, 12:33 p.m. UTC | #1
Hi,

On 11/02/18 18:16, Peter Maydell wrote:
> From: Richard Henderson <richard.henderson@linaro.org>

> 

> When populating id registers from kvm, on a host that doesn't support

> aarch32 mode at all, neither arm_div nor jazelle will be supported either.

> 

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> Tested-by: Alex Bennée <alex.bennee@linaro.org>

> Message-id: 20181102102025.3546-1-richard.henderson@linaro.org

> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

> ---

>  target/arm/cpu.h |  5 +++++

>  target/arm/cpu.c | 15 +++++++++++++--

>  2 files changed, 18 insertions(+), 2 deletions(-)

> 

> diff --git a/target/arm/cpu.h b/target/arm/cpu.h

> index 8e6779936eb..b5eff79f73b 100644

> --- a/target/arm/cpu.h

> +++ b/target/arm/cpu.h

> @@ -3296,6 +3296,11 @@ static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id)

>      return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1;

>  }

>  

> +static inline bool isar_feature_aa64_aa32(const ARMISARegisters *id)

> +{

> +    return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL0) >= 2;

> +}

> +

>  static inline bool isar_feature_aa64_sve(const ARMISARegisters *id)

>  {

>      return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0;

> diff --git a/target/arm/cpu.c b/target/arm/cpu.c

> index 8f16e96b6c8..784a4c2dfcc 100644

> --- a/target/arm/cpu.c

> +++ b/target/arm/cpu.c

> @@ -774,6 +774,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

>      CPUARMState *env = &cpu->env;

>      int pagebits;

>      Error *local_err = NULL;

> +    bool no_aa32 = false;

>  

>      /* If we needed to query the host kernel for the CPU features

>       * then it's possible that might have failed in the initfn, but

> @@ -820,6 +821,16 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

>              set_feature(env, ARM_FEATURE_V7VE);

>          }

>      }

> +

> +    /*

> +     * There exist AArch64 cpus without AArch32 support.  When KVM

> +     * queries ID_ISAR0_EL1 on such a host, the value is UNKNOWN.

> +     * Similarly, we cannot check ID_AA64PFR0 without AArch64 support.

> +     */

> +    if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {

> +        no_aa32 = !cpu_isar_feature(aa64_aa32, cpu);

> +    }

> +

>      if (arm_feature(env, ARM_FEATURE_V7VE)) {

>          /* v7 Virtualization Extensions. In real hardware this implies

>           * EL2 and also the presence of the Security Extensions.

> @@ -829,7 +840,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

>           * Presence of EL2 itself is ARM_FEATURE_EL2, and of the

>           * Security Extensions is ARM_FEATURE_EL3.

>           */

> -        assert(cpu_isar_feature(arm_div, cpu));

> +        assert(no_aa32 || cpu_isar_feature(arm_div, cpu));


The assertion above fails on my AArch64 host (APM Mustang A3). Meaning
that my host CPU supports AArch32, but lacks "arm_div".

(My understanding is that this commit, i.e., 0f8d06f16c9d, relaxed the
assert originally added in commit 7e0cf8b47f0e ("target/arm: Convert
division from feature bits to isar0 tests", 2018-10-24). Can we relax it
even further?

Better yet: can we rework the code to emit a warning, rather than
aborting QEMU? Assertions are not the best tool IMHO for catching
unusual (or slightly non-conformant / early) hardware.)

Thanks,
Laszlo

>          set_feature(env, ARM_FEATURE_LPAE);

>          set_feature(env, ARM_FEATURE_V7);

>      }

> @@ -855,7 +866,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

>      if (arm_feature(env, ARM_FEATURE_V6)) {

>          set_feature(env, ARM_FEATURE_V5);

>          if (!arm_feature(env, ARM_FEATURE_M)) {

> -            assert(cpu_isar_feature(jazelle, cpu));

> +            assert(no_aa32 || cpu_isar_feature(jazelle, cpu));

>              set_feature(env, ARM_FEATURE_AUXCR);

>          }

>      }

>
Laszlo Ersek May 24, 2019, 12:45 p.m. UTC | #2
On 05/24/19 14:33, Laszlo Ersek wrote:
> Hi,

>

> On 11/02/18 18:16, Peter Maydell wrote:

>> From: Richard Henderson <richard.henderson@linaro.org>

>>

>> When populating id registers from kvm, on a host that doesn't support

>> aarch32 mode at all, neither arm_div nor jazelle will be supported either.

>>

>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

>> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

>> Tested-by: Alex Bennée <alex.bennee@linaro.org>

>> Message-id: 20181102102025.3546-1-richard.henderson@linaro.org

>> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

>> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

>> ---

>>  target/arm/cpu.h |  5 +++++

>>  target/arm/cpu.c | 15 +++++++++++++--

>>  2 files changed, 18 insertions(+), 2 deletions(-)

>>

>> diff --git a/target/arm/cpu.h b/target/arm/cpu.h

>> index 8e6779936eb..b5eff79f73b 100644

>> --- a/target/arm/cpu.h

>> +++ b/target/arm/cpu.h

>> @@ -3296,6 +3296,11 @@ static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id)

>>      return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1;

>>  }

>>

>> +static inline bool isar_feature_aa64_aa32(const ARMISARegisters *id)

>> +{

>> +    return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL0) >= 2;

>> +}

>> +

>>  static inline bool isar_feature_aa64_sve(const ARMISARegisters *id)

>>  {

>>      return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0;

>> diff --git a/target/arm/cpu.c b/target/arm/cpu.c

>> index 8f16e96b6c8..784a4c2dfcc 100644

>> --- a/target/arm/cpu.c

>> +++ b/target/arm/cpu.c

>> @@ -774,6 +774,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

>>      CPUARMState *env = &cpu->env;

>>      int pagebits;

>>      Error *local_err = NULL;

>> +    bool no_aa32 = false;

>>

>>      /* If we needed to query the host kernel for the CPU features

>>       * then it's possible that might have failed in the initfn, but

>> @@ -820,6 +821,16 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

>>              set_feature(env, ARM_FEATURE_V7VE);

>>          }

>>      }

>> +

>> +    /*

>> +     * There exist AArch64 cpus without AArch32 support.  When KVM

>> +     * queries ID_ISAR0_EL1 on such a host, the value is UNKNOWN.

>> +     * Similarly, we cannot check ID_AA64PFR0 without AArch64 support.

>> +     */

>> +    if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {

>> +        no_aa32 = !cpu_isar_feature(aa64_aa32, cpu);

>> +    }

>> +

>>      if (arm_feature(env, ARM_FEATURE_V7VE)) {

>>          /* v7 Virtualization Extensions. In real hardware this implies

>>           * EL2 and also the presence of the Security Extensions.

>> @@ -829,7 +840,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

>>           * Presence of EL2 itself is ARM_FEATURE_EL2, and of the

>>           * Security Extensions is ARM_FEATURE_EL3.

>>           */

>> -        assert(cpu_isar_feature(arm_div, cpu));

>> +        assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

>

> The assertion above fails on my AArch64 host (APM Mustang A3). Meaning

> that my host CPU supports AArch32, but lacks "arm_div".

>

> (My understanding is that this commit, i.e., 0f8d06f16c9d, relaxed the

> assert originally added in commit 7e0cf8b47f0e ("target/arm: Convert

> division from feature bits to isar0 tests", 2018-10-24). Can we relax it

> even further?

>

> Better yet: can we rework the code to emit a warning, rather than

> aborting QEMU? Assertions are not the best tool IMHO for catching

> unusual (or slightly non-conformant / early) hardware.)


To clarify, I intended to launch a 32-bit ARM guest (with KVM
acceleration) on my AArch64 host.

Libvirt generated the following QEMU command line:

LC_ALL=C \
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin \
QEMU_AUDIO_DRV=none \
/opt/qemu-installed-optimized/bin/qemu-system-aarch64 \
  -name guest=f28.32bit,debug-threads=on \
  -S \
  -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-2-f28.32bit/master-key.aes \
  -machine virt-4.1,accel=kvm,usb=off,dump-guest-core=off,gic-version=2 \
  -cpu host,aarch64=off \
  -drive file=/root/QEMU_EFI.fd.padded,if=pflash,format=raw,unit=0,readonly=on \
  -drive file=/var/lib/libvirt/qemu/nvram/f28.32bit_VARS.fd,if=pflash,format=raw,unit=1 \
  -m 8192 \
  -realtime mlock=off \
  -smp 8,sockets=8,cores=1,threads=1 \
  -uuid d525042e-1b37-4058-86ca-c6a2086e8485 \
  -no-user-config \
  -nodefaults \
  -chardev socket,id=charmonitor,fd=27,server,nowait \
  -mon chardev=charmonitor,id=monitor,mode=control \
  -rtc base=utc \
  -no-shutdown \
  -boot strict=on \
  -device pcie-root-port,port=0x8,chassis=1,id=pci.1,bus=pcie.0,multifunction=on,addr=0x1 \
  -device pcie-root-port,port=0x9,chassis=2,id=pci.2,bus=pcie.0,addr=0x1.0x1 \
  -device pcie-root-port,port=0xa,chassis=3,id=pci.3,bus=pcie.0,addr=0x1.0x2 \
  -device pcie-root-port,port=0xb,chassis=4,id=pci.4,bus=pcie.0,addr=0x1.0x3 \
  -device pcie-root-port,port=0xc,chassis=5,id=pci.5,bus=pcie.0,addr=0x1.0x4 \
  -device pcie-root-port,port=0xd,chassis=6,id=pci.6,bus=pcie.0,addr=0x1.0x5 \
  -device qemu-xhci,id=usb,bus=pci.1,addr=0x0 \
  -device virtio-scsi-pci,id=scsi0,bus=pci.2,addr=0x0 \
  -device virtio-serial-pci,id=virtio-serial0,bus=pci.3,addr=0x0 \
  -drive file=/var/lib/libvirt/images/f28.32bit.root.qcow2,format=qcow2,if=none,id=drive-scsi0-0-0-0,werror=enospc,cache=writeback,discard=unmap \
  -device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1,write-cache=on \
  -drive file=/var/lib/libvirt/images/f28.32bit.home.qcow2,format=qcow2,if=none,id=drive-scsi0-0-0-1,werror=enospc,cache=writeback,discard=unmap \
  -device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=1,drive=drive-scsi0-0-0-1,id=scsi0-0-0-1,write-cache=on \
  -netdev tap,fd=29,id=hostnet0,vhost=on,vhostfd=30 \
  -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:6f:d1:c8,bus=pci.4,addr=0x0,romfile= \
  -chardev pty,id=charserial0 \
  -serial chardev:charserial0 \
  -chardev socket,id=charchannel0,fd=31,server,nowait \
  -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,id=channel0,name=org.qemu.guest_agent.0 \
  -device usb-tablet,id=input0,bus=usb.0,port=1 \
  -device usb-kbd,id=input1,bus=usb.0,port=2 \
  -vnc 127.0.0.1:0 \
  -device virtio-gpu-pci,id=video0,max_outputs=1,bus=pci.5,addr=0x0 \
  -object rng-random,id=objrng0,filename=/dev/urandom \
  -device virtio-rng-pci,rng=objrng0,id=rng0,max-bytes=1048576,period=1000,bus=pci.6,addr=0x0 \
  -msg timestamp=on

and then I got:

> qemu-system-aarch64: /root/src/upstream/qemu/target/arm/cpu.c:986:

> arm_cpu_realizefn: Assertion `no_aa32 || ({ ARMCPU *cpu_ = (cpu);

> isar_feature_arm_div(&cpu_->isar); })' failed.


QEMU was built at commit 8dc7fd56dd4f ("Merge remote-tracking branch
'remotes/philmd-gitlab/tags/fw_cfg-20190523-pull-request' into staging",
2019-05-23).

Thanks
Laszlo
Philippe Mathieu-Daudé May 24, 2019, 1:11 p.m. UTC | #3
On 5/24/19 2:33 PM, Laszlo Ersek wrote:
> Hi,

> 

> On 11/02/18 18:16, Peter Maydell wrote:

>> From: Richard Henderson <richard.henderson@linaro.org>

>>

>> When populating id registers from kvm, on a host that doesn't support

>> aarch32 mode at all, neither arm_div nor jazelle will be supported either.

>>

>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

>> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

>> Tested-by: Alex Bennée <alex.bennee@linaro.org>

>> Message-id: 20181102102025.3546-1-richard.henderson@linaro.org

>> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

>> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

>> ---

>>  target/arm/cpu.h |  5 +++++

>>  target/arm/cpu.c | 15 +++++++++++++--

>>  2 files changed, 18 insertions(+), 2 deletions(-)

>>

>> diff --git a/target/arm/cpu.h b/target/arm/cpu.h

>> index 8e6779936eb..b5eff79f73b 100644

>> --- a/target/arm/cpu.h

>> +++ b/target/arm/cpu.h

>> @@ -3296,6 +3296,11 @@ static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id)

>>      return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1;

>>  }

>>  

>> +static inline bool isar_feature_aa64_aa32(const ARMISARegisters *id)

>> +{

>> +    return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL0) >= 2;


FWIW here we check EL0, ...

>> +}

>> +

>>  static inline bool isar_feature_aa64_sve(const ARMISARegisters *id)

>>  {

>>      return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0;

>> diff --git a/target/arm/cpu.c b/target/arm/cpu.c

>> index 8f16e96b6c8..784a4c2dfcc 100644

>> --- a/target/arm/cpu.c

>> +++ b/target/arm/cpu.c

>> @@ -774,6 +774,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

>>      CPUARMState *env = &cpu->env;

>>      int pagebits;

>>      Error *local_err = NULL;

>> +    bool no_aa32 = false;

>>  

>>      /* If we needed to query the host kernel for the CPU features

>>       * then it's possible that might have failed in the initfn, but

>> @@ -820,6 +821,16 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

>>              set_feature(env, ARM_FEATURE_V7VE);

>>          }

>>      }

>> +

>> +    /*

>> +     * There exist AArch64 cpus without AArch32 support.  When KVM

>> +     * queries ID_ISAR0_EL1 on such a host, the value is UNKNOWN.


... then here the comment is about EL1.

>> +     * Similarly, we cannot check ID_AA64PFR0 without AArch64 support.

>> +     */

>> +    if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {

>> +        no_aa32 = !cpu_isar_feature(aa64_aa32, cpu);

>> +    }

>> +

>>      if (arm_feature(env, ARM_FEATURE_V7VE)) {

>>          /* v7 Virtualization Extensions. In real hardware this implies

>>           * EL2 and also the presence of the Security Extensions.

>> @@ -829,7 +840,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

>>           * Presence of EL2 itself is ARM_FEATURE_EL2, and of the

>>           * Security Extensions is ARM_FEATURE_EL3.

>>           */

>> -        assert(cpu_isar_feature(arm_div, cpu));

>> +        assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

> 

> The assertion above fails on my AArch64 host (APM Mustang A3). Meaning

> that my host CPU supports AArch32, but lacks "arm_div".

> 

> (My understanding is that this commit, i.e., 0f8d06f16c9d, relaxed the

> assert originally added in commit 7e0cf8b47f0e ("target/arm: Convert

> division from feature bits to isar0 tests", 2018-10-24). Can we relax it

> even further?

> 

> Better yet: can we rework the code to emit a warning, rather than

> aborting QEMU? Assertions are not the best tool IMHO for catching

> unusual (or slightly non-conformant / early) hardware.)

> 

> Thanks,

> Laszlo

> 

>>          set_feature(env, ARM_FEATURE_LPAE);

>>          set_feature(env, ARM_FEATURE_V7);

>>      }

>> @@ -855,7 +866,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

>>      if (arm_feature(env, ARM_FEATURE_V6)) {

>>          set_feature(env, ARM_FEATURE_V5);

>>          if (!arm_feature(env, ARM_FEATURE_M)) {

>> -            assert(cpu_isar_feature(jazelle, cpu));

>> +            assert(no_aa32 || cpu_isar_feature(jazelle, cpu));

>>              set_feature(env, ARM_FEATURE_AUXCR);

>>          }

>>      }

>>

>
Peter Maydell July 16, 2019, 12:03 p.m. UTC | #4
On Fri, 24 May 2019 at 13:33, Laszlo Ersek <lersek@redhat.com> wrote:
> On 11/02/18 18:16, Peter Maydell wrote:

> > @@ -829,7 +840,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

> >           * Presence of EL2 itself is ARM_FEATURE_EL2, and of the

> >           * Security Extensions is ARM_FEATURE_EL3.

> >           */

> > -        assert(cpu_isar_feature(arm_div, cpu));

> > +        assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

>

> The assertion above fails on my AArch64 host (APM Mustang A3). Meaning

> that my host CPU supports AArch32, but lacks "arm_div".


Hi; I just realized we left this assertion-failure bug report
unaddressed, so I had a look at it.

I tried to repro on my Mustang, but this works for me.
A CPU with AArch32 but without the Arm-mode division instructions
would be non-compliant (and very obviously so if tested), so
I suspect the actual problem is not with the hardware but with
the kernel not correctly reporting the ID registers to QEMU.
What kernel version are you using?

> Better yet: can we rework the code to emit a warning, rather than

> aborting QEMU? Assertions are not the best tool IMHO for catching

> unusual (or slightly non-conformant / early) hardware.)


The intention of the assertion really is to catch QEMU bugs
where we got the ID register values wrong in our emulated
CPUs. Perhaps we should relax all these assertions to only
testing if we're using TCG, not KVM ?

thanks
-- PMM
Richard Henderson July 16, 2019, 2:02 p.m. UTC | #5
On 7/16/19 12:03 PM, Peter Maydell wrote:
> The intention of the assertion really is to catch QEMU bugs

> where we got the ID register values wrong in our emulated

> CPUs. Perhaps we should relax all these assertions to only

> testing if we're using TCG, not KVM ?


Perhaps.  In some instances if ID register values are wrong we would then not
migrate properly, but none of the checks we're currently doing of this sort
would catch those particular cases.


r~
Peter Maydell July 16, 2019, 2:18 p.m. UTC | #6
On Tue, 16 Jul 2019 at 15:02, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> On 7/16/19 12:03 PM, Peter Maydell wrote:

> > The intention of the assertion really is to catch QEMU bugs

> > where we got the ID register values wrong in our emulated

> > CPUs. Perhaps we should relax all these assertions to only

> > testing if we're using TCG, not KVM ?

>

> Perhaps.  In some instances if ID register values are wrong we would then not

> migrate properly, but none of the checks we're currently doing of this sort

> would catch those particular cases.


In those cases we should probably print a warning and install
a migration-blocker, rather than asserting...

thanks
-- PMM
Richard Henderson July 16, 2019, 3:04 p.m. UTC | #7
On 7/16/19 2:18 PM, Peter Maydell wrote:
> On Tue, 16 Jul 2019 at 15:02, Richard Henderson

> <richard.henderson@linaro.org> wrote:

>>

>> On 7/16/19 12:03 PM, Peter Maydell wrote:

>>> The intention of the assertion really is to catch QEMU bugs

>>> where we got the ID register values wrong in our emulated

>>> CPUs. Perhaps we should relax all these assertions to only

>>> testing if we're using TCG, not KVM ?

>>

>> Perhaps.  In some instances if ID register values are wrong we would then not

>> migrate properly, but none of the checks we're currently doing of this sort

>> would catch those particular cases.

> 

> In those cases we should probably print a warning and install

> a migration-blocker, rather than asserting...


Ok, but I'm saying we don't assert for those either, at the moment.


r~
Laszlo Ersek July 16, 2019, 4:50 p.m. UTC | #8
On 07/16/19 14:03, Peter Maydell wrote:
> On Fri, 24 May 2019 at 13:33, Laszlo Ersek <lersek@redhat.com> wrote:

>> On 11/02/18 18:16, Peter Maydell wrote:

>>> @@ -829,7 +840,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

>>>           * Presence of EL2 itself is ARM_FEATURE_EL2, and of the

>>>           * Security Extensions is ARM_FEATURE_EL3.

>>>           */

>>> -        assert(cpu_isar_feature(arm_div, cpu));

>>> +        assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

>>

>> The assertion above fails on my AArch64 host (APM Mustang A3). Meaning

>> that my host CPU supports AArch32, but lacks "arm_div".

> 

> Hi; I just realized we left this assertion-failure bug report

> unaddressed, so I had a look at it.


Yes, it's also tracked under LP#1830864; thanks for looking into it.

> 

> I tried to repro on my Mustang, but this works for me.

> A CPU with AArch32 but without the Arm-mode division instructions

> would be non-compliant (and very obviously so if tested), so

> I suspect the actual problem is not with the hardware but with

> the kernel not correctly reporting the ID registers to QEMU.

> What kernel version are you using?


So, I've just retested, with the QEMU binary I've left around from last
time. (This QEMU binary was built at upstream commit d247c8e7f4fc, with
Phil's v2 series applied on top, for regression testing:

[PATCH v2 0/9] hw/block/pflash_cfi01: Add DeviceReset() handler

http://mid.mail-archive.com/38281fa7-30f4-60ec-3357-3e1613c44dbe@redhat.com
)

The issue still reproduces, so it makes sense for me to look at the host
kernel version... Well, I'm afraid it won't help much, for an upstream
investigation:

  4.14.0-115.8.2.el7a.aarch64

This is the latest released kernel from "Red Hat Enterprise Linux for
ARM 64 7".

Thanks!
Laszlo

>> Better yet: can we rework the code to emit a warning, rather than

>> aborting QEMU? Assertions are not the best tool IMHO for catching

>> unusual (or slightly non-conformant / early) hardware.)

> 

> The intention of the assertion really is to catch QEMU bugs

> where we got the ID register values wrong in our emulated

> CPUs. Perhaps we should relax all these assertions to only

> testing if we're using TCG, not KVM ?

> 

> thanks

> -- PMM

>
Peter Maydell July 16, 2019, 4:59 p.m. UTC | #9
On Tue, 16 Jul 2019 at 17:51, Laszlo Ersek <lersek@redhat.com> wrote:
> The issue still reproduces, so it makes sense for me to look at the host

> kernel version... Well, I'm afraid it won't help much, for an upstream

> investigation:

>

>   4.14.0-115.8.2.el7a.aarch64

>

> This is the latest released kernel from "Red Hat Enterprise Linux for

> ARM 64 7".


OK. (I'm using 4.15.0-51-generic from ubuntu).

Could you run with QEMU under gdb, and when it hits the
assertion go back up a stack frame to the arm_cpu_realizefn()
frame, and then "print /x cpu->isar" ? That should show us
what we think we've got as ID registers from the kernel.
(You might need to build QEMU with --enable-debug to get
useful enough debug info to do that, not sure.)

thanks
-- PMM
Laszlo Ersek July 16, 2019, 6:42 p.m. UTC | #10
On 07/16/19 18:59, Peter Maydell wrote:
> On Tue, 16 Jul 2019 at 17:51, Laszlo Ersek <lersek@redhat.com> wrote:

>> The issue still reproduces, so it makes sense for me to look at the host

>> kernel version... Well, I'm afraid it won't help much, for an upstream

>> investigation:

>>

>>   4.14.0-115.8.2.el7a.aarch64

>>

>> This is the latest released kernel from "Red Hat Enterprise Linux for

>> ARM 64 7".

> 

> OK. (I'm using 4.15.0-51-generic from ubuntu).

> 

> Could you run with QEMU under gdb, and when it hits the

> assertion go back up a stack frame to the arm_cpu_realizefn()

> frame, and then "print /x cpu->isar" ? That should show us

> what we think we've got as ID registers from the kernel.

> (You might need to build QEMU with --enable-debug to get

> useful enough debug info to do that, not sure.)


(My qemu build script always builds QEMU in two configs, the difference
being --prefix and --enable-debug.)

This is what I got:

(gdb) frame 4
#4  0x00000000006a063c in arm_cpu_realizefn (dev=0x1761140,
    errp=0xffffffffe540)
    at .../qemu/target/arm/cpu.c:1159
1159            assert(no_aa32 || cpu_isar_feature(arm_div, cpu));
(gdb) print /x cpu->isar
$1 = {id_isar0 = 0x0, id_isar1 = 0x0, id_isar2 = 0x0, id_isar3 = 0x0,
  id_isar4 = 0x0, id_isar5 = 0x0, id_isar6 = 0x0, mvfr0 = 0x0,
  mvfr1 = 0x0, mvfr2 = 0x0, id_aa64isar0 = 0x0, id_aa64isar1 = 0x0,
  id_aa64pfr0 = 0x11, id_aa64pfr1 = 0x0, id_aa64mmfr0 = 0x0,
  id_aa64mmfr1 = 0x0}

Thanks!
Laszlo
Philippe Mathieu-Daudé July 16, 2019, 8:10 p.m. UTC | #11
On 7/16/19 8:42 PM, Laszlo Ersek wrote:
> On 07/16/19 18:59, Peter Maydell wrote:

>> On Tue, 16 Jul 2019 at 17:51, Laszlo Ersek <lersek@redhat.com> wrote:

>>> The issue still reproduces, so it makes sense for me to look at the host

>>> kernel version... Well, I'm afraid it won't help much, for an upstream

>>> investigation:

>>>

>>>   4.14.0-115.8.2.el7a.aarch64

>>>

>>> This is the latest released kernel from "Red Hat Enterprise Linux for

>>> ARM 64 7".

>>

>> OK. (I'm using 4.15.0-51-generic from ubuntu).

>>

>> Could you run with QEMU under gdb, and when it hits the

>> assertion go back up a stack frame to the arm_cpu_realizefn()

>> frame, and then "print /x cpu->isar" ? That should show us

>> what we think we've got as ID registers from the kernel.

>> (You might need to build QEMU with --enable-debug to get

>> useful enough debug info to do that, not sure.)

> 

> (My qemu build script always builds QEMU in two configs, the difference

> being --prefix and --enable-debug.)

> 

> This is what I got:

> 

> (gdb) frame 4

> #4  0x00000000006a063c in arm_cpu_realizefn (dev=0x1761140,

>     errp=0xffffffffe540)

>     at .../qemu/target/arm/cpu.c:1159

> 1159            assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

> (gdb) print /x cpu->isar

> $1 = {id_isar0 = 0x0, id_isar1 = 0x0, id_isar2 = 0x0, id_isar3 = 0x0,

>   id_isar4 = 0x0, id_isar5 = 0x0, id_isar6 = 0x0, mvfr0 = 0x0,

>   mvfr1 = 0x0, mvfr2 = 0x0, id_aa64isar0 = 0x0, id_aa64isar1 = 0x0,

>   id_aa64pfr0 = 0x11, id_aa64pfr1 = 0x0, id_aa64mmfr0 = 0x0,

>   id_aa64mmfr1 = 0x0}


For ISAR0, DIVIDE=0

so cpu_isar_feature(arm_div, cpu)=false

For AA64PFR0, EL0=1, EL1=1.

EL0 = 1: EL0 can be executed in AArch64 state only.
EL1 = 1: EL1 can be executed in AArch64 state only.

so cpu_isar_feature(aa64_aa32, cpu)=false
then no_aa32=true

The commit description is "on a host that doesn't support aarch32 mode
at all, neither arm_div nor jazelle will be supported either."

Shouldn't we use a slighly different logic? Such:

-    assert(no_aa32 || cpu_isar_feature(arm_div, cpu));
+    assert(no_aa32 && !cpu_isar_feature(arm_div, cpu));
Laszlo Ersek July 17, 2019, 8:36 a.m. UTC | #12
On 07/16/19 22:10, Philippe Mathieu-Daudé wrote:
> On 7/16/19 8:42 PM, Laszlo Ersek wrote:

>> On 07/16/19 18:59, Peter Maydell wrote:

>>> On Tue, 16 Jul 2019 at 17:51, Laszlo Ersek <lersek@redhat.com> wrote:

>>>> The issue still reproduces, so it makes sense for me to look at the host

>>>> kernel version... Well, I'm afraid it won't help much, for an upstream

>>>> investigation:

>>>>

>>>>   4.14.0-115.8.2.el7a.aarch64

>>>>

>>>> This is the latest released kernel from "Red Hat Enterprise Linux for

>>>> ARM 64 7".

>>>

>>> OK. (I'm using 4.15.0-51-generic from ubuntu).

>>>

>>> Could you run with QEMU under gdb, and when it hits the

>>> assertion go back up a stack frame to the arm_cpu_realizefn()

>>> frame, and then "print /x cpu->isar" ? That should show us

>>> what we think we've got as ID registers from the kernel.

>>> (You might need to build QEMU with --enable-debug to get

>>> useful enough debug info to do that, not sure.)

>>

>> (My qemu build script always builds QEMU in two configs, the difference

>> being --prefix and --enable-debug.)

>>

>> This is what I got:

>>

>> (gdb) frame 4

>> #4  0x00000000006a063c in arm_cpu_realizefn (dev=0x1761140,

>>     errp=0xffffffffe540)

>>     at .../qemu/target/arm/cpu.c:1159

>> 1159            assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

>> (gdb) print /x cpu->isar

>> $1 = {id_isar0 = 0x0, id_isar1 = 0x0, id_isar2 = 0x0, id_isar3 = 0x0,

>>   id_isar4 = 0x0, id_isar5 = 0x0, id_isar6 = 0x0, mvfr0 = 0x0,

>>   mvfr1 = 0x0, mvfr2 = 0x0, id_aa64isar0 = 0x0, id_aa64isar1 = 0x0,

>>   id_aa64pfr0 = 0x11, id_aa64pfr1 = 0x0, id_aa64mmfr0 = 0x0,

>>   id_aa64mmfr1 = 0x0}

> 

> For ISAR0, DIVIDE=0

> 

> so cpu_isar_feature(arm_div, cpu)=false

> 

> For AA64PFR0, EL0=1, EL1=1.

> 

> EL0 = 1: EL0 can be executed in AArch64 state only.

> EL1 = 1: EL1 can be executed in AArch64 state only.

> 

> so cpu_isar_feature(aa64_aa32, cpu)=false

> then no_aa32=true

> 

> The commit description is "on a host that doesn't support aarch32 mode

> at all, neither arm_div nor jazelle will be supported either."

> 

> Shouldn't we use a slighly different logic? Such:

> 

> -    assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

> +    assert(no_aa32 && !cpu_isar_feature(arm_div, cpu));

> 


I'm unsure. The current formula seems to match the commit description.
Implication -- that is, "A implies B", (A-->B) -- is equivalent to (!A
|| B).

We have "no_aa32 || arm_div", which corresponds to "aa32 implies
arm_div" (aa32-->arm_div). And that seems to match exactly what Peter said.

The assert you suggest would fire on a host that supports at least one
of aa32 and arm_div (= the assertion would fail if (aa32 || arm_div)).
That would break on my host (hw+kernel) just the same, in the end. To
substitute the boolean values:

-    assert(false || false)
+    assert(false && true)

Thanks
Laszlo
Laszlo Ersek July 17, 2019, 9:22 a.m. UTC | #13
On 07/17/19 10:36, Laszlo Ersek wrote:
> On 07/16/19 22:10, Philippe Mathieu-Daudé wrote:

>> On 7/16/19 8:42 PM, Laszlo Ersek wrote:

>>> On 07/16/19 18:59, Peter Maydell wrote:

>>>> On Tue, 16 Jul 2019 at 17:51, Laszlo Ersek <lersek@redhat.com>

>>>> wrote:

>>>>> The issue still reproduces, so it makes sense for me to look at

>>>>> the host kernel version... Well, I'm afraid it won't help much,

>>>>> for an upstream investigation:

>>>>>

>>>>>   4.14.0-115.8.2.el7a.aarch64

>>>>>

>>>>> This is the latest released kernel from "Red Hat Enterprise Linux

>>>>> for ARM 64 7".

>>>>

>>>> OK. (I'm using 4.15.0-51-generic from ubuntu).

>>>>

>>>> Could you run with QEMU under gdb, and when it hits the

>>>> assertion go back up a stack frame to the arm_cpu_realizefn()

>>>> frame, and then "print /x cpu->isar" ? That should show us

>>>> what we think we've got as ID registers from the kernel.

>>>> (You might need to build QEMU with --enable-debug to get

>>>> useful enough debug info to do that, not sure.)

>>>

>>> (My qemu build script always builds QEMU in two configs, the

>>> difference being --prefix and --enable-debug.)

>>>

>>> This is what I got:

>>>

>>> (gdb) frame 4

>>> #4  0x00000000006a063c in arm_cpu_realizefn (dev=0x1761140,

>>>     errp=0xffffffffe540)

>>>     at .../qemu/target/arm/cpu.c:1159

>>> 1159            assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

>>> (gdb) print /x cpu->isar

>>> $1 = {id_isar0 = 0x0, id_isar1 = 0x0, id_isar2 = 0x0, id_isar3 = 0x0,

>>>   id_isar4 = 0x0, id_isar5 = 0x0, id_isar6 = 0x0, mvfr0 = 0x0,

>>>   mvfr1 = 0x0, mvfr2 = 0x0, id_aa64isar0 = 0x0, id_aa64isar1 = 0x0,

>>>   id_aa64pfr0 = 0x11, id_aa64pfr1 = 0x0, id_aa64mmfr0 = 0x0,

>>>   id_aa64mmfr1 = 0x0}

>>

>> For ISAR0, DIVIDE=0

>>

>> so cpu_isar_feature(arm_div, cpu)=false

>>

>> For AA64PFR0, EL0=1, EL1=1.

>>

>> EL0 = 1: EL0 can be executed in AArch64 state only.

>> EL1 = 1: EL1 can be executed in AArch64 state only.

>>

>> so cpu_isar_feature(aa64_aa32, cpu)=false

>> then no_aa32=true

>>

>> The commit description is "on a host that doesn't support aarch32

>> mode at all, neither arm_div nor jazelle will be supported either."

>>

>> Shouldn't we use a slighly different logic? Such:

>>

>> -    assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

>> +    assert(no_aa32 && !cpu_isar_feature(arm_div, cpu));

>>

>

> I'm unsure. The current formula seems to match the commit description.

> Implication -- that is, "A implies B", (A-->B) -- is equivalent to (!A

> || B).

>

> We have "no_aa32 || arm_div", which corresponds to "aa32 implies

> arm_div" (aa32-->arm_div). And that seems to match exactly what Peter

> said.

>

> The assert you suggest would fire on a host that supports at least one

> of aa32 and arm_div (= the assertion would fail if (aa32 || arm_div)).

> That would break on my host (hw+kernel) just the same, in the end. To

> substitute the boolean values:

>

> -    assert(false || false)

> +    assert(false && true)


Hmmm wait a second. The ARMv8 ARM says, about ID_ISAR0_EL1:

> Divide, bits [27:24]

>

>     Indicates the implemented Divide instructions. Permitted values

>     are:

>     0000 None implemented.

>     0001 Adds SDIV and UDIV in the T32 instruction set.

>     0010 As for 0b0001, and adds SDIV and UDIV in the A32 instruction

>          set.

>     All other values are reserved.


So this means that (aa32 && !arm_div) *does* conform to the architecture
manual! And then, I understand where the bug is.

As I wrote above, the current C expression stands for:

  aa32 --> arm_div

which -- we see from the ARMv8 ARM -- is wrong.

Upon re-reading the commit message more carefully:

    on a host that doesn't support aarch32 mode at all, neither arm_div
    nor jazelle will be supported either

it's clear that the intent was *not* the implication encoded in the
source. Instead, the intent was the *reverse* implication, namely:

  !aa32 --> !arm_div    [1]

Or, equivalently (because, (A --> B) === (!A --> !B)):

  arm_div --> aa32      [2]

Now, if you encode any one of these (equivalent) formulae in C, with the
logical OR operator, you get:

- Starting from [1]:

  (A     --> B)        === (!A   || B)
  (!aa32 --> !arm_div) === (aa32 || !arm_div) === (!no_aa32 || !arm_div)

- Starting from [2]:

  (A       --> B)    === (!A       || B)
  (arm_div --> aa32) === (!arm_div || aa32) === (!arm_div || !no_aa32)

You can see that, regardless of whether we start with [1], or
equivalently, [2], we end up with the exact same predicate, logically
speaking. The final expressions only differ in C with regard to the
order of evaluation / shortcut behavior. We can pick whichever we prefer
(for whatever other reason).

FWIW, the language of the original commit message corresponds to [1].
So, if we want to stick with that, then the patch we need is:

> diff --git a/target/arm/cpu.c b/target/arm/cpu.c

> index e75a64a25a4b..ea84a3e11abb 100644

> --- a/target/arm/cpu.c

> +++ b/target/arm/cpu.c

> @@ -1382,8 +1382,13 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

>           * include the various other features that V7VE implies.

>           * Presence of EL2 itself is ARM_FEATURE_EL2, and of the

>           * Security Extensions is ARM_FEATURE_EL3.

> +         *

> +         * Lack of aa32 support excludes arm_div support:

> +         *   no_aa32 --> !arm_div

> +         * Using the logical OR operator, the same is expressed as:

> +         *   !no_aa32 || !arm_div

>           */

> -        assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

> +        assert(!no_aa32 || !cpu_isar_feature(arm_div, cpu));

>          set_feature(env, ARM_FEATURE_LPAE);

>          set_feature(env, ARM_FEATURE_V7);

>      }


If you guys agree, I can formally submit this patch.

Thanks
Laszlo
Laszlo Ersek July 17, 2019, 9:24 a.m. UTC | #14
On 07/17/19 11:22, Laszlo Ersek wrote:
> On 07/17/19 10:36, Laszlo Ersek wrote:

>> On 07/16/19 22:10, Philippe Mathieu-Daudé wrote:

>>> On 7/16/19 8:42 PM, Laszlo Ersek wrote:

>>>> On 07/16/19 18:59, Peter Maydell wrote:

>>>>> On Tue, 16 Jul 2019 at 17:51, Laszlo Ersek <lersek@redhat.com>

>>>>> wrote:

>>>>>> The issue still reproduces, so it makes sense for me to look at

>>>>>> the host kernel version... Well, I'm afraid it won't help much,

>>>>>> for an upstream investigation:

>>>>>>

>>>>>>   4.14.0-115.8.2.el7a.aarch64

>>>>>>

>>>>>> This is the latest released kernel from "Red Hat Enterprise Linux

>>>>>> for ARM 64 7".

>>>>>

>>>>> OK. (I'm using 4.15.0-51-generic from ubuntu).

>>>>>

>>>>> Could you run with QEMU under gdb, and when it hits the

>>>>> assertion go back up a stack frame to the arm_cpu_realizefn()

>>>>> frame, and then "print /x cpu->isar" ? That should show us

>>>>> what we think we've got as ID registers from the kernel.

>>>>> (You might need to build QEMU with --enable-debug to get

>>>>> useful enough debug info to do that, not sure.)

>>>>

>>>> (My qemu build script always builds QEMU in two configs, the

>>>> difference being --prefix and --enable-debug.)

>>>>

>>>> This is what I got:

>>>>

>>>> (gdb) frame 4

>>>> #4  0x00000000006a063c in arm_cpu_realizefn (dev=0x1761140,

>>>>     errp=0xffffffffe540)

>>>>     at .../qemu/target/arm/cpu.c:1159

>>>> 1159            assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

>>>> (gdb) print /x cpu->isar

>>>> $1 = {id_isar0 = 0x0, id_isar1 = 0x0, id_isar2 = 0x0, id_isar3 = 0x0,

>>>>   id_isar4 = 0x0, id_isar5 = 0x0, id_isar6 = 0x0, mvfr0 = 0x0,

>>>>   mvfr1 = 0x0, mvfr2 = 0x0, id_aa64isar0 = 0x0, id_aa64isar1 = 0x0,

>>>>   id_aa64pfr0 = 0x11, id_aa64pfr1 = 0x0, id_aa64mmfr0 = 0x0,

>>>>   id_aa64mmfr1 = 0x0}

>>>

>>> For ISAR0, DIVIDE=0

>>>

>>> so cpu_isar_feature(arm_div, cpu)=false

>>>

>>> For AA64PFR0, EL0=1, EL1=1.

>>>

>>> EL0 = 1: EL0 can be executed in AArch64 state only.

>>> EL1 = 1: EL1 can be executed in AArch64 state only.

>>>

>>> so cpu_isar_feature(aa64_aa32, cpu)=false

>>> then no_aa32=true

>>>

>>> The commit description is "on a host that doesn't support aarch32

>>> mode at all, neither arm_div nor jazelle will be supported either."

>>>

>>> Shouldn't we use a slighly different logic? Such:

>>>

>>> -    assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

>>> +    assert(no_aa32 && !cpu_isar_feature(arm_div, cpu));

>>>

>>

>> I'm unsure. The current formula seems to match the commit description.

>> Implication -- that is, "A implies B", (A-->B) -- is equivalent to (!A

>> || B).

>>

>> We have "no_aa32 || arm_div", which corresponds to "aa32 implies

>> arm_div" (aa32-->arm_div). And that seems to match exactly what Peter

>> said.

>>

>> The assert you suggest would fire on a host that supports at least one

>> of aa32 and arm_div (= the assertion would fail if (aa32 || arm_div)).

>> That would break on my host (hw+kernel) just the same, in the end. To

>> substitute the boolean values:

>>

>> -    assert(false || false)

>> +    assert(false && true)

> 

> Hmmm wait a second. The ARMv8 ARM says, about ID_ISAR0_EL1:

> 

>> Divide, bits [27:24]

>>

>>     Indicates the implemented Divide instructions. Permitted values

>>     are:

>>     0000 None implemented.

>>     0001 Adds SDIV and UDIV in the T32 instruction set.

>>     0010 As for 0b0001, and adds SDIV and UDIV in the A32 instruction

>>          set.

>>     All other values are reserved.

> 

> So this means that (aa32 && !arm_div) *does* conform to the architecture

> manual! And then, I understand where the bug is.

> 

> As I wrote above, the current C expression stands for:

> 

>   aa32 --> arm_div

> 

> which -- we see from the ARMv8 ARM -- is wrong.

> 

> Upon re-reading the commit message more carefully:

> 

>     on a host that doesn't support aarch32 mode at all, neither arm_div

>     nor jazelle will be supported either

> 

> it's clear that the intent was *not* the implication encoded in the

> source. Instead, the intent was the *reverse* implication, namely:

> 

>   !aa32 --> !arm_div    [1]

> 

> Or, equivalently (because, (A --> B) === (!A --> !B)):

> 

>   arm_div --> aa32      [2]

> 

> Now, if you encode any one of these (equivalent) formulae in C, with the

> logical OR operator, you get:

> 

> - Starting from [1]:

> 

>   (A     --> B)        === (!A   || B)

>   (!aa32 --> !arm_div) === (aa32 || !arm_div) === (!no_aa32 || !arm_div)

> 

> - Starting from [2]:

> 

>   (A       --> B)    === (!A       || B)

>   (arm_div --> aa32) === (!arm_div || aa32) === (!arm_div || !no_aa32)

> 

> You can see that, regardless of whether we start with [1], or

> equivalently, [2], we end up with the exact same predicate, logically

> speaking. The final expressions only differ in C with regard to the

> order of evaluation / shortcut behavior. We can pick whichever we prefer

> (for whatever other reason).

> 

> FWIW, the language of the original commit message corresponds to [1].

> So, if we want to stick with that, then the patch we need is:

> 

>> diff --git a/target/arm/cpu.c b/target/arm/cpu.c

>> index e75a64a25a4b..ea84a3e11abb 100644

>> --- a/target/arm/cpu.c

>> +++ b/target/arm/cpu.c

>> @@ -1382,8 +1382,13 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

>>           * include the various other features that V7VE implies.

>>           * Presence of EL2 itself is ARM_FEATURE_EL2, and of the

>>           * Security Extensions is ARM_FEATURE_EL3.

>> +         *

>> +         * Lack of aa32 support excludes arm_div support:

>> +         *   no_aa32 --> !arm_div

>> +         * Using the logical OR operator, the same is expressed as:

>> +         *   !no_aa32 || !arm_div

>>           */

>> -        assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

>> +        assert(!no_aa32 || !cpu_isar_feature(arm_div, cpu));

>>          set_feature(env, ARM_FEATURE_LPAE);

>>          set_feature(env, ARM_FEATURE_V7);

>>      }

> 

> If you guys agree, I can formally submit this patch.


NB: the same might apply to the "jazelle" feature; I didn't check.

Thanks
Laszlo
Laszlo Ersek July 17, 2019, 12:49 p.m. UTC | #15
On 07/17/19 11:22, Laszlo Ersek wrote:

> because, (A --> B) === (!A --> !B)


Obviously, it is impossible for me to write an email containing logical
formulae without at least one *crucial* typo.

The correct form of the above equivalence is:

  (A --> B) === (!B --> !A)

This typo does not affect the rest of my previous message -- while I
quoted the equivalence incorrectly, I did use it correctly.

Thanks & sorry
Laszlo
Laszlo Ersek July 17, 2019, 12:53 p.m. UTC | #16
On 07/17/19 11:22, Laszlo Ersek wrote:
> On 07/17/19 10:36, Laszlo Ersek wrote:


>> -        assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

>> +        assert(!no_aa32 || !cpu_isar_feature(arm_div, cpu));


BTW this can be pronounced in natural language as follows: "we either
have aa32 support, or if we don't, then we lack arm_div support too".

Thanks
Laszlo
Philippe Mathieu-Daudé July 17, 2019, 1:36 p.m. UTC | #17
On 7/17/19 11:22 AM, Laszlo Ersek wrote:
> On 07/17/19 10:36, Laszlo Ersek wrote:

>> On 07/16/19 22:10, Philippe Mathieu-Daudé wrote:

>>> On 7/16/19 8:42 PM, Laszlo Ersek wrote:

>>>> On 07/16/19 18:59, Peter Maydell wrote:

>>>>> On Tue, 16 Jul 2019 at 17:51, Laszlo Ersek <lersek@redhat.com>

>>>>> wrote:

>>>>>> The issue still reproduces, so it makes sense for me to look at

>>>>>> the host kernel version... Well, I'm afraid it won't help much,

>>>>>> for an upstream investigation:

>>>>>>

>>>>>>   4.14.0-115.8.2.el7a.aarch64

>>>>>>

>>>>>> This is the latest released kernel from "Red Hat Enterprise Linux

>>>>>> for ARM 64 7".

>>>>>

>>>>> OK. (I'm using 4.15.0-51-generic from ubuntu).

>>>>>

>>>>> Could you run with QEMU under gdb, and when it hits the

>>>>> assertion go back up a stack frame to the arm_cpu_realizefn()

>>>>> frame, and then "print /x cpu->isar" ? That should show us

>>>>> what we think we've got as ID registers from the kernel.

>>>>> (You might need to build QEMU with --enable-debug to get

>>>>> useful enough debug info to do that, not sure.)

>>>>

>>>> (My qemu build script always builds QEMU in two configs, the

>>>> difference being --prefix and --enable-debug.)

>>>>

>>>> This is what I got:

>>>>

>>>> (gdb) frame 4

>>>> #4  0x00000000006a063c in arm_cpu_realizefn (dev=0x1761140,

>>>>     errp=0xffffffffe540)

>>>>     at .../qemu/target/arm/cpu.c:1159

>>>> 1159            assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

>>>> (gdb) print /x cpu->isar

>>>> $1 = {id_isar0 = 0x0, id_isar1 = 0x0, id_isar2 = 0x0, id_isar3 = 0x0,

>>>>   id_isar4 = 0x0, id_isar5 = 0x0, id_isar6 = 0x0, mvfr0 = 0x0,

>>>>   mvfr1 = 0x0, mvfr2 = 0x0, id_aa64isar0 = 0x0, id_aa64isar1 = 0x0,

>>>>   id_aa64pfr0 = 0x11, id_aa64pfr1 = 0x0, id_aa64mmfr0 = 0x0,

>>>>   id_aa64mmfr1 = 0x0}

>>>

>>> For ISAR0, DIVIDE=0

>>>

>>> so cpu_isar_feature(arm_div, cpu)=false

>>>

>>> For AA64PFR0, EL0=1, EL1=1.

>>>

>>> EL0 = 1: EL0 can be executed in AArch64 state only.

>>> EL1 = 1: EL1 can be executed in AArch64 state only.

>>>

>>> so cpu_isar_feature(aa64_aa32, cpu)=false

>>> then no_aa32=true

>>>

>>> The commit description is "on a host that doesn't support aarch32

>>> mode at all, neither arm_div nor jazelle will be supported either."

>>>

>>> Shouldn't we use a slighly different logic? Such:

>>>

>>> -    assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

>>> +    assert(no_aa32 && !cpu_isar_feature(arm_div, cpu));

>>>

>>

>> I'm unsure. The current formula seems to match the commit description.

>> Implication -- that is, "A implies B", (A-->B) -- is equivalent to (!A

>> || B).

>>

>> We have "no_aa32 || arm_div", which corresponds to "aa32 implies

>> arm_div" (aa32-->arm_div). And that seems to match exactly what Peter

>> said.

>>

>> The assert you suggest would fire on a host that supports at least one

>> of aa32 and arm_div (= the assertion would fail if (aa32 || arm_div)).

>> That would break on my host (hw+kernel) just the same, in the end. To

>> substitute the boolean values:

>>

>> -    assert(false || false)

>> +    assert(false && true)

> 

> Hmmm wait a second. The ARMv8 ARM says, about ID_ISAR0_EL1:

> 

>> Divide, bits [27:24]

>>

>>     Indicates the implemented Divide instructions. Permitted values

>>     are:

>>     0000 None implemented.

>>     0001 Adds SDIV and UDIV in the T32 instruction set.

>>     0010 As for 0b0001, and adds SDIV and UDIV in the A32 instruction

>>          set.

>>     All other values are reserved.

> 

> So this means that (aa32 && !arm_div) *does* conform to the architecture

> manual! And then, I understand where the bug is.

> 

> As I wrote above, the current C expression stands for:

> 

>   aa32 --> arm_div

> 

> which -- we see from the ARMv8 ARM -- is wrong.

> 

> Upon re-reading the commit message more carefully:

> 

>     on a host that doesn't support aarch32 mode at all, neither arm_div

>     nor jazelle will be supported either

> 

> it's clear that the intent was *not* the implication encoded in the

> source. Instead, the intent was the *reverse* implication, namely:

> 

>   !aa32 --> !arm_div    [1]

> 

> Or, equivalently (because, (A --> B) === (!A --> !B)):


[Laszlo corrected this as:   (A --> B) === (!B --> !A)]

>   arm_div --> aa32      [2]

> 

> Now, if you encode any one of these (equivalent) formulae in C, with the

> logical OR operator, you get:

> 

> - Starting from [1]:

> 

>   (A     --> B)        === (!A   || B)

>   (!aa32 --> !arm_div) === (aa32 || !arm_div) === (!no_aa32 || !arm_div)

> 

> - Starting from [2]:

> 

>   (A       --> B)    === (!A       || B)

>   (arm_div --> aa32) === (!arm_div || aa32) === (!arm_div || !no_aa32)

> 

> You can see that, regardless of whether we start with [1], or

> equivalently, [2], we end up with the exact same predicate, logically

> speaking. The final expressions only differ in C with regard to the

> order of evaluation / shortcut behavior. We can pick whichever we prefer

> (for whatever other reason).


This makes sense.

I still wonder why this didn't assert on Peter's setup.

> 

> FWIW, the language of the original commit message corresponds to [1].

> So, if we want to stick with that, then the patch we need is:

> 

>> diff --git a/target/arm/cpu.c b/target/arm/cpu.c

>> index e75a64a25a4b..ea84a3e11abb 100644

>> --- a/target/arm/cpu.c

>> +++ b/target/arm/cpu.c

>> @@ -1382,8 +1382,13 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

>>           * include the various other features that V7VE implies.

>>           * Presence of EL2 itself is ARM_FEATURE_EL2, and of the

>>           * Security Extensions is ARM_FEATURE_EL3.

>> +         *

>> +         * Lack of aa32 support excludes arm_div support:

>> +         *   no_aa32 --> !arm_div

>> +         * Using the logical OR operator, the same is expressed as:

>> +         *   !no_aa32 || !arm_div

>>           */

>> -        assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

>> +        assert(!no_aa32 || !cpu_isar_feature(arm_div, cpu));

>>          set_feature(env, ARM_FEATURE_LPAE);

>>          set_feature(env, ARM_FEATURE_V7);

>>      }

> 

> If you guys agree, I can formally submit this patch.


Worthwhile, so it could get in for the next release.

Regards,

Phil.
Peter Maydell July 17, 2019, 1:45 p.m. UTC | #18
On Wed, 17 Jul 2019 at 10:22, Laszlo Ersek <lersek@redhat.com> wrote:
> Hmmm wait a second. The ARMv8 ARM says, about ID_ISAR0_EL1:

>

> > Divide, bits [27:24]

> >

> >     Indicates the implemented Divide instructions. Permitted values

> >     are:

> >     0000 None implemented.

> >     0001 Adds SDIV and UDIV in the T32 instruction set.

> >     0010 As for 0b0001, and adds SDIV and UDIV in the A32 instruction

> >          set.

> >     All other values are reserved.

>

> So this means that (aa32 && !arm_div) *does* conform to the architecture

> manual!


The architecture manual also says
"In ARMv8-A the only permitted value is 0010.", and in
earlier versions of the manual it says also that ARMv7VE
implies that we must have all the integer divide instructions.
And this assert is guarded by "if (arm_feature(env, ARM_FEATURE_V7VE))".
(This is what we're trying to test, really: something that's
providing AArch32 v7VE-or-better must include the divide insns.)

> Upon re-reading the commit message more carefully:

>

>     on a host that doesn't support aarch32 mode at all, neither arm_div

>     nor jazelle will be supported either


The point of this text is that "v8 AArch64 with no AArch32" is
an exception to the previous version of the check, which was
just "v7VE must mean we have the divide insns".

Similarly with Jazelle: what we were testing before commit 0f8d06f
was "v6 must imply Jazelle", which is true for everything except
the special case of "an AArch64 CPU with no AArch32 support" (which
can only happen for a KVM setup), and the test we are trying to
check after the commit is "v6 aarch32 must imply Jazelle".

> > diff --git a/target/arm/cpu.c b/target/arm/cpu.c

> > index e75a64a25a4b..ea84a3e11abb 100644

> > --- a/target/arm/cpu.c

> > +++ b/target/arm/cpu.c

> > @@ -1382,8 +1382,13 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

> >           * include the various other features that V7VE implies.

> >           * Presence of EL2 itself is ARM_FEATURE_EL2, and of the

> >           * Security Extensions is ARM_FEATURE_EL3.

> > +         *

> > +         * Lack of aa32 support excludes arm_div support:

> > +         *   no_aa32 --> !arm_div

> > +         * Using the logical OR operator, the same is expressed as:

> > +         *   !no_aa32 || !arm_div

> >           */

> > -        assert(no_aa32 || cpu_isar_feature(arm_div, cpu));

> > +        assert(!no_aa32 || !cpu_isar_feature(arm_div, cpu));


This now fails to test what we wanted, because it will allow
an incorrect set of ID registers that define a v7VE CPU
without the arm_div feature to pass.

thanks
-- PMM
Peter Maydell July 17, 2019, 1:46 p.m. UTC | #19
On Wed, 17 Jul 2019 at 14:36, Philippe Mathieu-Daudé <philmd@redhat.com> wrote:
> I still wonder why this didn't assert on Peter's setup.


My setup does not assert because my host kernel correctly
provides the ID register values to QEMU. Laszlo's appears
to be providing all-zeroes, which then obviously breaks
assertions being made about the sanity of those ID register
values...

thanks
-- PMM
Laszlo Ersek July 17, 2019, 3:08 p.m. UTC | #20
On 07/17/19 15:46, Peter Maydell wrote:
> On Wed, 17 Jul 2019 at 14:36, Philippe Mathieu-Daudé <philmd@redhat.com> wrote:

>> I still wonder why this didn't assert on Peter's setup.

> 

> My setup does not assert because my host kernel correctly

> provides the ID register values to QEMU. Laszlo's appears

> to be providing all-zeroes, which then obviously breaks

> assertions being made about the sanity of those ID register

> values...


OK. Can you suggest a location that I should check in the host kernel?

Thanks!
Laszlo
Peter Maydell July 18, 2019, 12:30 p.m. UTC | #21
On Wed, 17 Jul 2019 at 16:08, Laszlo Ersek <lersek@redhat.com> wrote:
>

> On 07/17/19 15:46, Peter Maydell wrote:

> > On Wed, 17 Jul 2019 at 14:36, Philippe Mathieu-Daudé <philmd@redhat.com> wrote:

> >> I still wonder why this didn't assert on Peter's setup.

> >

> > My setup does not assert because my host kernel correctly

> > provides the ID register values to QEMU. Laszlo's appears

> > to be providing all-zeroes, which then obviously breaks

> > assertions being made about the sanity of those ID register

> > values...

>

> OK. Can you suggest a location that I should check in the host kernel?


I was about to write out the process of how we get these values
from the kernel, but as the first step of that I read through
QEMU's target/arm/kvm64.c:kvm_arm_get_host_cpu_features(),
which is the function which reads these values using the
KVM_GET_ONE_REG ioctl. It starts with an attempt to read
ID_AA64PFR0, and has a comment for the error-handling case:

        /*
         * Before v4.15, the kernel only exposed a limited number of system
         * registers, not including any of the interesting AArch64 ID regs.
         * For the most part we could leave these fields as zero with minimal
         * effect, since this does not affect the values seen by the guest.
         *
         * However, it could cause problems down the line for QEMU,
         * so provide a minimal v8.0 default.
         *
         * ??? Could read MIDR and use knowledge from cpu64.c.
         * ??? Could map a page of memory into our temp guest and
         *     run the tiniest of hand-crafted kernels to extract
         *     the values seen by the guest.
         * ??? Either of these sounds like too much effort just
         *     to work around running a modern host kernel.
         */

I have 4.15, and don't hit this assert; you have 4.14 and do,
so I think you're going to be going through this codepath which
currently sets only ahcf->isar.id_aa64pfr0 and none of the other
ID register fields in the isar struct.

I'm not sure exactly which kernel commits added the ID register
reading support. (The relevant kernel code is in
arch/arm64/kvm/sys_regs.c I think.)

Anyway, I think we need to do at least one of:
 * enhance the "provide a minimal v8.0 default" code in this
   condition in kvm_arm_get_host_cpu_features() so that it
   populates the ID registers sufficiently to avoid asserts
   and other bad things
 * make the asserts on ID register oddnesses be only for TCG
   (ie where QEMU controls the values) and not for KVM

thanks
-- PMM
Laszlo Ersek July 18, 2019, 7:07 p.m. UTC | #22
On 07/18/19 14:30, Peter Maydell wrote:
> On Wed, 17 Jul 2019 at 16:08, Laszlo Ersek <lersek@redhat.com> wrote:

>>

>> On 07/17/19 15:46, Peter Maydell wrote:

>>> On Wed, 17 Jul 2019 at 14:36, Philippe Mathieu-Daudé <philmd@redhat.com> wrote:

>>>> I still wonder why this didn't assert on Peter's setup.

>>>

>>> My setup does not assert because my host kernel correctly

>>> provides the ID register values to QEMU. Laszlo's appears

>>> to be providing all-zeroes, which then obviously breaks

>>> assertions being made about the sanity of those ID register

>>> values...

>>

>> OK. Can you suggest a location that I should check in the host kernel?

> 

> I was about to write out the process of how we get these values

> from the kernel, but as the first step of that I read through

> QEMU's target/arm/kvm64.c:kvm_arm_get_host_cpu_features(),

> which is the function which reads these values using the

> KVM_GET_ONE_REG ioctl. It starts with an attempt to read

> ID_AA64PFR0, and has a comment for the error-handling case:

> 

>         /*

>          * Before v4.15, the kernel only exposed a limited number of system

>          * registers, not including any of the interesting AArch64 ID regs.

>          * For the most part we could leave these fields as zero with minimal

>          * effect, since this does not affect the values seen by the guest.

>          *

>          * However, it could cause problems down the line for QEMU,

>          * so provide a minimal v8.0 default.

>          *

>          * ??? Could read MIDR and use knowledge from cpu64.c.

>          * ??? Could map a page of memory into our temp guest and

>          *     run the tiniest of hand-crafted kernels to extract

>          *     the values seen by the guest.

>          * ??? Either of these sounds like too much effort just

>          *     to work around running a modern host kernel.

>          */

> 

> I have 4.15, and don't hit this assert; you have 4.14 and do,

> so I think you're going to be going through this codepath which

> currently sets only ahcf->isar.id_aa64pfr0 and none of the other

> ID register fields in the isar struct.

> 

> I'm not sure exactly which kernel commits added the ID register

> reading support. (The relevant kernel code is in

> arch/arm64/kvm/sys_regs.c I think.)


I compared that file between the downstream kernel source and upstream v4.15, and it looks like the following series (indeed released as part of v4.15) is what's missing down-stream, for this particular use case:

     1  27e64b4be4b8 regset: Add support for dynamically sized regsets
     2  94ef7ecbdf6f arm64: fpsimd: Correctly annotate exception helpers called from asm
     3  abf73988a7c2 arm64: signal: Verify extra data is user-readable in sys_rt_sigreturn
     4  93390c0a1b20 arm64: KVM: Hide unsupported AArch64 CPU features from guests
     5  b472db6cf8c6 arm64: efi: Add missing Kconfig dependency on KERNEL_MODE_NEON
     6  38b9aeb32fa7 arm64: Port deprecated instruction emulation to new sysctl interface
     7  9cf5b54fafed arm64: fpsimd: Simplify uses of {set,clear}_ti_thread_flag()
     8  672365649cca arm64/sve: System register and exception syndrome definitions
     9  1fc5dce78ad1 arm64/sve: Low-level SVE architectural state manipulation functions
    10  ddd25ad1fde8 arm64/sve: Kconfig update and conditional compilation support
    11  d0b8cd318788 arm64/sve: Signal frame and context structure definition
    12  22043a3c082a arm64/sve: Low-level CPU setup
    13  bc0ee4760364 arm64/sve: Core task context handling
    14  79ab047c75d6 arm64/sve: Support vector length resetting for new processes
    15  8cd969d28fd2 arm64/sve: Signal handling support
    16  7582e22038a2 arm64/sve: Backend logic for setting the vector length
    17  8f1eec57cdcc arm64: cpufeature: Move sys_caps_initialised declarations
    18  2e0f2478ea37 arm64/sve: Probe SVE capabilities and usable vector lengths
    19  1bd3f93641ec arm64/sve: Preserve SVE registers around kernel-mode NEON use
    20  fdfa976cae5c arm64/sve: Preserve SVE registers around EFI runtime service calls
    21  43d4da2c45b2 arm64/sve: ptrace and ELF coredump support
    22  2d2123bc7c7f arm64/sve: Add prctl controls for userspace vector length management
    23  4ffa09a939ab arm64/sve: Add sysctl to set the default vector length for new processes
    24  17eed27b02da arm64/sve: KVM: Prevent guests from using SVE
    25  aac45ffd1f8e arm64/sve: KVM: Treat guest SVE use as undefined instruction execution
    26  07d79fe7c223 arm64/sve: KVM: Hide SVE from CPU features exposed to guests
    27  43994d824e84 arm64/sve: Detect SVE and activate runtime support
    28  ce6990813f15 arm64/sve: Add documentation

The differences found by the simple "diff" that I mention above are mainly due to commit #4 (93390c0a1b20, "arm64: KVM: Hide unsupported AArch64 CPU features from guests", 2017-11-03).

I found a (likely non-final) version of the cover letter too, here: https://www.spinics.net/lists/arm-kernel/msg599528.html

I guess I should convince myself to install RHEL8 on the Mustang sometime...

Thanks!
Laszlo

> Anyway, I think we need to do at least one of:

>  * enhance the "provide a minimal v8.0 default" code in this

>    condition in kvm_arm_get_host_cpu_features() so that it

>    populates the ID registers sufficiently to avoid asserts

>    and other bad things

>  * make the asserts on ID register oddnesses be only for TCG

>    (ie where QEMU controls the values) and not for KVM

> 

> thanks

> -- PMM

>
diff mbox series

Patch

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 8e6779936eb..b5eff79f73b 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3296,6 +3296,11 @@  static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id)
     return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1;
 }
 
+static inline bool isar_feature_aa64_aa32(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL0) >= 2;
+}
+
 static inline bool isar_feature_aa64_sve(const ARMISARegisters *id)
 {
     return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0;
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 8f16e96b6c8..784a4c2dfcc 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -774,6 +774,7 @@  static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
     CPUARMState *env = &cpu->env;
     int pagebits;
     Error *local_err = NULL;
+    bool no_aa32 = false;
 
     /* If we needed to query the host kernel for the CPU features
      * then it's possible that might have failed in the initfn, but
@@ -820,6 +821,16 @@  static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
             set_feature(env, ARM_FEATURE_V7VE);
         }
     }
+
+    /*
+     * There exist AArch64 cpus without AArch32 support.  When KVM
+     * queries ID_ISAR0_EL1 on such a host, the value is UNKNOWN.
+     * Similarly, we cannot check ID_AA64PFR0 without AArch64 support.
+     */
+    if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+        no_aa32 = !cpu_isar_feature(aa64_aa32, cpu);
+    }
+
     if (arm_feature(env, ARM_FEATURE_V7VE)) {
         /* v7 Virtualization Extensions. In real hardware this implies
          * EL2 and also the presence of the Security Extensions.
@@ -829,7 +840,7 @@  static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
          * Presence of EL2 itself is ARM_FEATURE_EL2, and of the
          * Security Extensions is ARM_FEATURE_EL3.
          */
-        assert(cpu_isar_feature(arm_div, cpu));
+        assert(no_aa32 || cpu_isar_feature(arm_div, cpu));
         set_feature(env, ARM_FEATURE_LPAE);
         set_feature(env, ARM_FEATURE_V7);
     }
@@ -855,7 +866,7 @@  static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
     if (arm_feature(env, ARM_FEATURE_V6)) {
         set_feature(env, ARM_FEATURE_V5);
         if (!arm_feature(env, ARM_FEATURE_M)) {
-            assert(cpu_isar_feature(jazelle, cpu));
+            assert(no_aa32 || cpu_isar_feature(jazelle, cpu));
             set_feature(env, ARM_FEATURE_AUXCR);
         }
     }