diff mbox series

[v7] pflash: Require backend size to match device, improve errors

Message ID 20190308062455.29755-1-armbru@redhat.com
State New
Headers show
Series [v7] pflash: Require backend size to match device, improve errors | expand

Commit Message

Markus Armbruster March 8, 2019, 6:24 a.m. UTC
From: Alex Bennée <alex.bennee@linaro.org>


We reject undersized backends with a rather enigmatic "failed to read
the initial flash content" error.

We happily accept oversized images, ignoring their tail.  Throwing
away parts of firmware that way is pretty much certain to end in an
even more enigmatic failure to boot.

Require the backend's size to match the device's size exactly.  Report
mismatch as "device requires N bytes, block backend 'NAME' provides M
bytes".

Improve the error for actual read failures to "can't read initial
flash content from block backend 'NAME'.

We disabled code to limit device sizes to 8, 16, 32 or 64MiB more than
a decade ago in commit 95d1f3edd5e and c8b153d7949, v0.9.1.  Bury.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

Signed-off-by: Markus Armbruster <armbru@redhat.com>

---
 hw/block/pflash_cfi01.c | 31 ++++++++++++++++++++++---------
 hw/block/pflash_cfi02.c | 31 +++++++++++++++++++++++--------
 2 files changed, 45 insertions(+), 17 deletions(-)

-- 
2.17.2

Comments

Alex Bennée March 8, 2019, 10 a.m. UTC | #1
Markus Armbruster <armbru@redhat.com> writes:

> From: Alex Bennée <alex.bennee@linaro.org>

>

> We reject undersized backends with a rather enigmatic "failed to read

> the initial flash content" error.

>

> We happily accept oversized images, ignoring their tail.  Throwing

> away parts of firmware that way is pretty much certain to end in an

> even more enigmatic failure to boot.

>

> Require the backend's size to match the device's size exactly.  Report

> mismatch as "device requires N bytes, block backend 'NAME' provides M

> bytes".

>

> Improve the error for actual read failures to "can't read initial

> flash content from block backend 'NAME'.

>

> We disabled code to limit device sizes to 8, 16, 32 or 64MiB more than

> a decade ago in commit 95d1f3edd5e and c8b153d7949, v0.9.1.  Bury.

>

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> Signed-off-by: Markus Armbruster <armbru@redhat.com>

> ---

>  hw/block/pflash_cfi01.c | 31 ++++++++++++++++++++++---------

>  hw/block/pflash_cfi02.c | 31 +++++++++++++++++++++++--------

>  2 files changed, 45 insertions(+), 17 deletions(-)

>

> diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c

> index 9d1c356eb6..2e3a05c0b3 100644

> --- a/hw/block/pflash_cfi01.c

> +++ b/hw/block/pflash_cfi01.c

> @@ -730,13 +730,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)

>      }

>      device_len = sector_len_per_device * blocks_per_device;

>

> -    /* XXX: to be fixed */

> -#if 0

> -    if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) &&

> -        total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024))

> -        return NULL;

> -#endif

> -

>      memory_region_init_rom_device(

>          &pfl->mem, OBJECT(dev),

>          &pflash_cfi01_ops,

> @@ -763,12 +756,32 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)

>      }

>

>      if (pfl->blk) {

> +        /*

> +         * Validate the backing store is the right size for pflash

> +         * devices. If the user supplies a larger file we ignore the

> +         * tail.

> +         */


We need to drop the last sentence...

> +        int64_t backing_len = blk_getlength(pfl->blk);

> +

> +        if (backing_len < 0) {

> +            error_setg(errp, "can't get size of block backend '%s'",

> +                       blk_name(pfl->blk));

> +            return;

> +        }

> +        if (backing_len != total_len) {

> +            error_setg(errp, "device requires %" PRIu64 " bytes, "

> +                       "block backend '%s' provides %" PRIu64 " bytes",

> +                       total_len, blk_name(pfl->blk), backing_len);

> +            return;

> +        }

> +

>          /* read the initial flash content */

>          ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);

> -

>          if (ret < 0) {

>              vmstate_unregister_ram(&pfl->mem, DEVICE(pfl));

> -            error_setg(errp, "failed to read the initial flash content");

> +            error_setg(errp, "can't read initial flash content"

> +                       " from block backend '%s'",

> +                       blk_name(pfl->blk));

>              return;

>          }

>      }

> diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c

> index 33779ce807..45b88d65d8 100644

> --- a/hw/block/pflash_cfi02.c

> +++ b/hw/block/pflash_cfi02.c

> @@ -550,12 +550,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)

>      }

>

>      chip_len = pfl->sector_len * pfl->nb_blocs;

> -    /* XXX: to be fixed */

> -#if 0

> -    if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) &&

> -        total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024))

> -        return NULL;

> -#endif

>

>      memory_region_init_rom_device(&pfl->orig_mem, OBJECT(pfl), pfl->be ?

>                                    &pflash_cfi02_ops_be : &pflash_cfi02_ops_le,

> @@ -581,11 +575,32 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)

>      }

>

>      if (pfl->blk) {

> +        /*

> +         * Validate the backing store is the right size for pflash

> +         * devices. If the user supplies a larger file we ignore the

> +         * tail.

> +         */


And here.

> +        int64_t backing_len = blk_getlength(pfl->blk);

> +

> +        if (backing_len < 0) {

> +            error_setg(errp, "can't get size of block backend '%s'",

> +                       blk_name(pfl->blk));

> +            return;

> +        }

> +        if (backing_len != chip_len) {

> +            error_setg(errp, "device requires %" PRIu32 " bytes, "

> +                       "block backend '%s' provides %" PRIu64 " bytes",

> +                       chip_len, blk_name(pfl->blk), backing_len);

> +            return;

> +        }

> +

>          /* read the initial flash content */

>          ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len);

>          if (ret < 0) {

> -            vmstate_unregister_ram(&pfl->orig_mem, DEVICE(pfl));

> -            error_setg(errp, "failed to read the initial flash content");

> +            vmstate_unregister_ram(&pfl->mem, DEVICE(pfl));

> +            error_setg(errp, "can't read initial flash content"

> +                       " from block backend '%s'",

> +                       blk_name(pfl->blk));

>              return;

>          }

>      }


Otherwise all good... not that a r-b means anything from me ;-)

--
Alex Bennée
Laszlo Ersek March 8, 2019, 11:47 a.m. UTC | #2
On 03/08/19 07:24, Markus Armbruster wrote:
> From: Alex Bennée <alex.bennee@linaro.org>

> 

> We reject undersized backends with a rather enigmatic "failed to read

> the initial flash content" error.

> 

> We happily accept oversized images, ignoring their tail.  Throwing

> away parts of firmware that way is pretty much certain to end in an

> even more enigmatic failure to boot.

> 

> Require the backend's size to match the device's size exactly.  Report

> mismatch as "device requires N bytes, block backend 'NAME' provides M

> bytes".

> 

> Improve the error for actual read failures to "can't read initial

> flash content from block backend 'NAME'.

> 

> We disabled code to limit device sizes to 8, 16, 32 or 64MiB more than

> a decade ago in commit 95d1f3edd5e and c8b153d7949, v0.9.1.  Bury.

> 

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> Signed-off-by: Markus Armbruster <armbru@redhat.com>

> ---

>  hw/block/pflash_cfi01.c | 31 ++++++++++++++++++++++---------

>  hw/block/pflash_cfi02.c | 31 +++++++++++++++++++++++--------

>  2 files changed, 45 insertions(+), 17 deletions(-)

> 

> diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c

> index 9d1c356eb6..2e3a05c0b3 100644

> --- a/hw/block/pflash_cfi01.c

> +++ b/hw/block/pflash_cfi01.c

> @@ -730,13 +730,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)

>      }

>      device_len = sector_len_per_device * blocks_per_device;

>  

> -    /* XXX: to be fixed */

> -#if 0

> -    if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) &&

> -        total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024))

> -        return NULL;

> -#endif

> -

>      memory_region_init_rom_device(

>          &pfl->mem, OBJECT(dev),

>          &pflash_cfi01_ops,

> @@ -763,12 +756,32 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)

>      }

>  

>      if (pfl->blk) {

> +        /*

> +         * Validate the backing store is the right size for pflash

> +         * devices. If the user supplies a larger file we ignore the

> +         * tail.

> +         */

> +        int64_t backing_len = blk_getlength(pfl->blk);

> +

> +        if (backing_len < 0) {

> +            error_setg(errp, "can't get size of block backend '%s'",

> +                       blk_name(pfl->blk));

> +            return;

> +        }

> +        if (backing_len != total_len) {

> +            error_setg(errp, "device requires %" PRIu64 " bytes, "

> +                       "block backend '%s' provides %" PRIu64 " bytes",

> +                       total_len, blk_name(pfl->blk), backing_len);

> +            return;

> +        }

> +

>          /* read the initial flash content */

>          ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);

> -

>          if (ret < 0) {

>              vmstate_unregister_ram(&pfl->mem, DEVICE(pfl));

> -            error_setg(errp, "failed to read the initial flash content");

> +            error_setg(errp, "can't read initial flash content"

> +                       " from block backend '%s'",

> +                       blk_name(pfl->blk));

>              return;

>          }

>      }

> diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c

> index 33779ce807..45b88d65d8 100644

> --- a/hw/block/pflash_cfi02.c

> +++ b/hw/block/pflash_cfi02.c

> @@ -550,12 +550,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)

>      }

>  

>      chip_len = pfl->sector_len * pfl->nb_blocs;

> -    /* XXX: to be fixed */

> -#if 0

> -    if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) &&

> -        total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024))

> -        return NULL;

> -#endif

>  

>      memory_region_init_rom_device(&pfl->orig_mem, OBJECT(pfl), pfl->be ?

>                                    &pflash_cfi02_ops_be : &pflash_cfi02_ops_le,

> @@ -581,11 +575,32 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)

>      }

>  

>      if (pfl->blk) {

> +        /*

> +         * Validate the backing store is the right size for pflash

> +         * devices. If the user supplies a larger file we ignore the

> +         * tail.

> +         */

> +        int64_t backing_len = blk_getlength(pfl->blk);

> +

> +        if (backing_len < 0) {

> +            error_setg(errp, "can't get size of block backend '%s'",

> +                       blk_name(pfl->blk));

> +            return;

> +        }

> +        if (backing_len != chip_len) {

> +            error_setg(errp, "device requires %" PRIu32 " bytes, "

> +                       "block backend '%s' provides %" PRIu64 " bytes",

> +                       chip_len, blk_name(pfl->blk), backing_len);

> +            return;

> +        }

> +

>          /* read the initial flash content */

>          ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len);

>          if (ret < 0) {

> -            vmstate_unregister_ram(&pfl->orig_mem, DEVICE(pfl));

> -            error_setg(errp, "failed to read the initial flash content");

> +            vmstate_unregister_ram(&pfl->mem, DEVICE(pfl));

> +            error_setg(errp, "can't read initial flash content"

> +                       " from block backend '%s'",

> +                       blk_name(pfl->blk));

>              return;

>          }

>      }

> 


This one has got to be one of the longest bike-shedding sessions! :)

I'm fine with this patch, but I could suggest two improvements.

(1) When blk_getlength() fails, we could format the negative error code
returned by it into the error message.

(2) We could extract the common code to a new function in
"hw/block/block.c". (It says "Common code for block device models" on
the tin.)


I realize the vmstate_unregister_ram() rollbacks are not identical
between cfi01 and cfi02. However, that difference only draws my
attention to the fact that the unregister calls are not made on the
other error branches in this patch anyway!

So we should have a common function like this:

void pflash_cfi_read_initial_content(BlockBackend *blk, uint64_t size,
                                     void *buf, Error **errp)
{
    int64_t backing_len = blk_getlength(blk);
    int ret;

    if (backing_len < 0) {
        error_setg(...);
        return;
    }

    if (backing_len != size) {
        error_setg(...);
        return;
    }

    ret = blk_pread(blk, 0, buf, size);
    if (ret < 0) {
        error_setg(...);
    }
}

And at the call sites, we can do

    if (pfl->blk) {
        Error *local_err = NULL;

        pflash_cfi_read_initial_content(pfl->blk,
                                        total_len OR chip_len,
                                        pfl->storage,
                                        &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            vmstate_unregister_ram(...); // as appropriate for cfiXX
            return;
        }
    }

Thanks
Laszlo
Markus Armbruster March 8, 2019, 12:16 p.m. UTC | #3
Alex Bennée <alex.bennee@linaro.org> writes:

> Markus Armbruster <armbru@redhat.com> writes:

>

>> From: Alex Bennée <alex.bennee@linaro.org>

>>

>> We reject undersized backends with a rather enigmatic "failed to read

>> the initial flash content" error.

>>

>> We happily accept oversized images, ignoring their tail.  Throwing

>> away parts of firmware that way is pretty much certain to end in an

>> even more enigmatic failure to boot.

>>

>> Require the backend's size to match the device's size exactly.  Report

>> mismatch as "device requires N bytes, block backend 'NAME' provides M

>> bytes".

>>

>> Improve the error for actual read failures to "can't read initial

>> flash content from block backend 'NAME'.

>>

>> We disabled code to limit device sizes to 8, 16, 32 or 64MiB more than

>> a decade ago in commit 95d1f3edd5e and c8b153d7949, v0.9.1.  Bury.

>>

>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

>> Signed-off-by: Markus Armbruster <armbru@redhat.com>

>> ---

>>  hw/block/pflash_cfi01.c | 31 ++++++++++++++++++++++---------

>>  hw/block/pflash_cfi02.c | 31 +++++++++++++++++++++++--------

>>  2 files changed, 45 insertions(+), 17 deletions(-)

>>

>> diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c

>> index 9d1c356eb6..2e3a05c0b3 100644

>> --- a/hw/block/pflash_cfi01.c

>> +++ b/hw/block/pflash_cfi01.c

>> @@ -730,13 +730,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)

>>      }

>>      device_len = sector_len_per_device * blocks_per_device;

>>

>> -    /* XXX: to be fixed */

>> -#if 0

>> -    if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) &&

>> -        total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024))

>> -        return NULL;

>> -#endif

>> -

>>      memory_region_init_rom_device(

>>          &pfl->mem, OBJECT(dev),

>>          &pflash_cfi01_ops,

>> @@ -763,12 +756,32 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)

>>      }

>>

>>      if (pfl->blk) {

>> +        /*

>> +         * Validate the backing store is the right size for pflash

>> +         * devices. If the user supplies a larger file we ignore the

>> +         * tail.

>> +         */

>

> We need to drop the last sentence...


D'oh!

>> +        int64_t backing_len = blk_getlength(pfl->blk);

>> +

>> +        if (backing_len < 0) {

>> +            error_setg(errp, "can't get size of block backend '%s'",

>> +                       blk_name(pfl->blk));

>> +            return;

>> +        }

>> +        if (backing_len != total_len) {

>> +            error_setg(errp, "device requires %" PRIu64 " bytes, "

>> +                       "block backend '%s' provides %" PRIu64 " bytes",

>> +                       total_len, blk_name(pfl->blk), backing_len);

>> +            return;

>> +        }

>> +

>>          /* read the initial flash content */

>>          ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);

>> -

>>          if (ret < 0) {

>>              vmstate_unregister_ram(&pfl->mem, DEVICE(pfl));

>> -            error_setg(errp, "failed to read the initial flash content");

>> +            error_setg(errp, "can't read initial flash content"

>> +                       " from block backend '%s'",

>> +                       blk_name(pfl->blk));

>>              return;

>>          }

>>      }

>> diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c

>> index 33779ce807..45b88d65d8 100644

>> --- a/hw/block/pflash_cfi02.c

>> +++ b/hw/block/pflash_cfi02.c

>> @@ -550,12 +550,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)

>>      }

>>

>>      chip_len = pfl->sector_len * pfl->nb_blocs;

>> -    /* XXX: to be fixed */

>> -#if 0

>> -    if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) &&

>> -        total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024))

>> -        return NULL;

>> -#endif

>>

>>      memory_region_init_rom_device(&pfl->orig_mem, OBJECT(pfl), pfl->be ?

>>                                    &pflash_cfi02_ops_be : &pflash_cfi02_ops_le,

>> @@ -581,11 +575,32 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)

>>      }

>>

>>      if (pfl->blk) {

>> +        /*

>> +         * Validate the backing store is the right size for pflash

>> +         * devices. If the user supplies a larger file we ignore the

>> +         * tail.

>> +         */

>

> And here.

>

>> +        int64_t backing_len = blk_getlength(pfl->blk);

>> +

>> +        if (backing_len < 0) {

>> +            error_setg(errp, "can't get size of block backend '%s'",

>> +                       blk_name(pfl->blk));

>> +            return;

>> +        }

>> +        if (backing_len != chip_len) {

>> +            error_setg(errp, "device requires %" PRIu32 " bytes, "

>> +                       "block backend '%s' provides %" PRIu64 " bytes",

>> +                       chip_len, blk_name(pfl->blk), backing_len);

>> +            return;

>> +        }

>> +

>>          /* read the initial flash content */

>>          ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len);

>>          if (ret < 0) {

>> -            vmstate_unregister_ram(&pfl->orig_mem, DEVICE(pfl));

>> -            error_setg(errp, "failed to read the initial flash content");

>> +            vmstate_unregister_ram(&pfl->mem, DEVICE(pfl));

>> +            error_setg(errp, "can't read initial flash content"

>> +                       " from block backend '%s'",

>> +                       blk_name(pfl->blk));

>>              return;

>>          }

>>      }

>

> Otherwise all good... not that a r-b means anything from me ;-)


v8 coming up...  thanks!
Markus Armbruster March 8, 2019, 12:28 p.m. UTC | #4
Laszlo Ersek <lersek@redhat.com> writes:

> On 03/08/19 07:24, Markus Armbruster wrote:

>> From: Alex Bennée <alex.bennee@linaro.org>

>> 

>> We reject undersized backends with a rather enigmatic "failed to read

>> the initial flash content" error.

>> 

>> We happily accept oversized images, ignoring their tail.  Throwing

>> away parts of firmware that way is pretty much certain to end in an

>> even more enigmatic failure to boot.

>> 

>> Require the backend's size to match the device's size exactly.  Report

>> mismatch as "device requires N bytes, block backend 'NAME' provides M

>> bytes".

>> 

>> Improve the error for actual read failures to "can't read initial

>> flash content from block backend 'NAME'.

>> 

>> We disabled code to limit device sizes to 8, 16, 32 or 64MiB more than

>> a decade ago in commit 95d1f3edd5e and c8b153d7949, v0.9.1.  Bury.

>> 

>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

>> Signed-off-by: Markus Armbruster <armbru@redhat.com>

>> ---

>>  hw/block/pflash_cfi01.c | 31 ++++++++++++++++++++++---------

>>  hw/block/pflash_cfi02.c | 31 +++++++++++++++++++++++--------

>>  2 files changed, 45 insertions(+), 17 deletions(-)

>> 

>> diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c

>> index 9d1c356eb6..2e3a05c0b3 100644

>> --- a/hw/block/pflash_cfi01.c

>> +++ b/hw/block/pflash_cfi01.c

>> @@ -730,13 +730,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)

>>      }

>>      device_len = sector_len_per_device * blocks_per_device;

>>  

>> -    /* XXX: to be fixed */

>> -#if 0

>> -    if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) &&

>> -        total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024))

>> -        return NULL;

>> -#endif

>> -

>>      memory_region_init_rom_device(

>>          &pfl->mem, OBJECT(dev),

>>          &pflash_cfi01_ops,

>> @@ -763,12 +756,32 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)

>>      }

>>  

>>      if (pfl->blk) {

>> +        /*

>> +         * Validate the backing store is the right size for pflash

>> +         * devices. If the user supplies a larger file we ignore the

>> +         * tail.

>> +         */

>> +        int64_t backing_len = blk_getlength(pfl->blk);

>> +

>> +        if (backing_len < 0) {

>> +            error_setg(errp, "can't get size of block backend '%s'",

>> +                       blk_name(pfl->blk));

>> +            return;

>> +        }

>> +        if (backing_len != total_len) {

>> +            error_setg(errp, "device requires %" PRIu64 " bytes, "

>> +                       "block backend '%s' provides %" PRIu64 " bytes",

>> +                       total_len, blk_name(pfl->blk), backing_len);

>> +            return;

>> +        }

>> +

>>          /* read the initial flash content */

>>          ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);

>> -

>>          if (ret < 0) {

>>              vmstate_unregister_ram(&pfl->mem, DEVICE(pfl));

>> -            error_setg(errp, "failed to read the initial flash content");

>> +            error_setg(errp, "can't read initial flash content"

>> +                       " from block backend '%s'",

>> +                       blk_name(pfl->blk));

>>              return;

>>          }

>>      }

>> diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c

>> index 33779ce807..45b88d65d8 100644

>> --- a/hw/block/pflash_cfi02.c

>> +++ b/hw/block/pflash_cfi02.c

>> @@ -550,12 +550,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)

>>      }

>>  

>>      chip_len = pfl->sector_len * pfl->nb_blocs;

>> -    /* XXX: to be fixed */

>> -#if 0

>> -    if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) &&

>> -        total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024))

>> -        return NULL;

>> -#endif

>>  

>>      memory_region_init_rom_device(&pfl->orig_mem, OBJECT(pfl), pfl->be ?

>>                                    &pflash_cfi02_ops_be : &pflash_cfi02_ops_le,

>> @@ -581,11 +575,32 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)

>>      }

>>  

>>      if (pfl->blk) {

>> +        /*

>> +         * Validate the backing store is the right size for pflash

>> +         * devices. If the user supplies a larger file we ignore the

>> +         * tail.

>> +         */

>> +        int64_t backing_len = blk_getlength(pfl->blk);

>> +

>> +        if (backing_len < 0) {

>> +            error_setg(errp, "can't get size of block backend '%s'",

>> +                       blk_name(pfl->blk));

>> +            return;

>> +        }

>> +        if (backing_len != chip_len) {

>> +            error_setg(errp, "device requires %" PRIu32 " bytes, "

>> +                       "block backend '%s' provides %" PRIu64 " bytes",

>> +                       chip_len, blk_name(pfl->blk), backing_len);

>> +            return;

>> +        }

>> +

>>          /* read the initial flash content */

>>          ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len);

>>          if (ret < 0) {

>> -            vmstate_unregister_ram(&pfl->orig_mem, DEVICE(pfl));

>> -            error_setg(errp, "failed to read the initial flash content");

>> +            vmstate_unregister_ram(&pfl->mem, DEVICE(pfl));

>> +            error_setg(errp, "can't read initial flash content"

>> +                       " from block backend '%s'",

>> +                       blk_name(pfl->blk));

>>              return;

>>          }

>>      }

>> 

>

> This one has got to be one of the longest bike-shedding sessions! :)

>

> I'm fine with this patch, but I could suggest two improvements.

>

> (1) When blk_getlength() fails, we could format the negative error code

> returned by it into the error message.


I can do that.

> (2) We could extract the common code to a new function in

> "hw/block/block.c". (It says "Common code for block device models" on

> the tin.)


There's so much common code in these two files even before this patch...

> I realize the vmstate_unregister_ram() rollbacks are not identical

> between cfi01 and cfi02. However, that difference only draws my

> attention to the fact that the unregister calls are not made on the

> other error branches in this patch anyway!


You're right, that needs fixing.

> So we should have a common function like this:

>

> void pflash_cfi_read_initial_content(BlockBackend *blk, uint64_t size,

>                                      void *buf, Error **errp)

> {

>     int64_t backing_len = blk_getlength(blk);

>     int ret;

>

>     if (backing_len < 0) {

>         error_setg(...);

>         return;

>     }

>

>     if (backing_len != size) {

>         error_setg(...);

>         return;

>     }

>

>     ret = blk_pread(blk, 0, buf, size);

>     if (ret < 0) {

>         error_setg(...);

>     }

> }

>

> And at the call sites, we can do

>

>     if (pfl->blk) {

>         Error *local_err = NULL;

>

>         pflash_cfi_read_initial_content(pfl->blk,

>                                         total_len OR chip_len,

>                                         pfl->storage,

>                                         &local_err);

>         if (local_err) {

>             error_propagate(errp, local_err);

>             vmstate_unregister_ram(...); // as appropriate for cfiXX

>             return;

>         }

>     }


I'll give it a try for v8.  Thanks!
Kevin Wolf March 8, 2019, 1:35 p.m. UTC | #5
Am 08.03.2019 um 13:28 hat Markus Armbruster geschrieben:
> Laszlo Ersek <lersek@redhat.com> writes:

> > This one has got to be one of the longest bike-shedding sessions! :)

> >

> > I'm fine with this patch, but I could suggest two improvements.

> >

> > (1) When blk_getlength() fails, we could format the negative error code

> > returned by it into the error message.

> 

> I can do that.


By using error_setg_errno(), I assume. Not throwing away error details
is always good.

> > (2) We could extract the common code to a new function in

> > "hw/block/block.c". (It says "Common code for block device models" on

> > the tin.)

> 

> There's so much common code in these two files even before this patch...


My understanding is that hw/block/block.c contains code that is
potentially useful to all kinds of block devices, not random code that
two specific similar devices happen to share.

If we want to deduplicate some code in the flash devices, without any
expectation that other devices will use it at some point, I'd rather
create a new source file hw/block/pflash_common.c or something like
that.

Kevin
Laszlo Ersek March 8, 2019, 2:09 p.m. UTC | #6
On 03/08/19 14:35, Kevin Wolf wrote:
> Am 08.03.2019 um 13:28 hat Markus Armbruster geschrieben:

>> Laszlo Ersek <lersek@redhat.com> writes:

>>> This one has got to be one of the longest bike-shedding sessions! :)

>>>

>>> I'm fine with this patch, but I could suggest two improvements.

>>>

>>> (1) When blk_getlength() fails, we could format the negative error code

>>> returned by it into the error message.

>>

>> I can do that.

> 

> By using error_setg_errno(), I assume. Not throwing away error details

> is always good.

> 

>>> (2) We could extract the common code to a new function in

>>> "hw/block/block.c". (It says "Common code for block device models" on

>>> the tin.)

>>

>> There's so much common code in these two files even before this patch...

> 

> My understanding is that hw/block/block.c contains code that is

> potentially useful to all kinds of block devices, not random code that

> two specific similar devices happen to share.


Ah, OK.

> If we want to deduplicate some code in the flash devices, without any

> expectation that other devices will use it at some point, I'd rather

> create a new source file hw/block/pflash_common.c or something like

> that.


Sure, that makes a lot of sense then.

Cheers
Laszlo
Philippe Mathieu-Daudé March 8, 2019, 2:26 p.m. UTC | #7
On 3/8/19 2:35 PM, Kevin Wolf wrote:
> Am 08.03.2019 um 13:28 hat Markus Armbruster geschrieben:

>> Laszlo Ersek <lersek@redhat.com> writes:

>>> This one has got to be one of the longest bike-shedding sessions! :)

>>>

>>> I'm fine with this patch, but I could suggest two improvements.

>>>

>>> (1) When blk_getlength() fails, we could format the negative error code

>>> returned by it into the error message.

>>

>> I can do that.

> 

> By using error_setg_errno(), I assume. Not throwing away error details

> is always good.

> 

>>> (2) We could extract the common code to a new function in

>>> "hw/block/block.c". (It says "Common code for block device models" on

>>> the tin.)

>>

>> There's so much common code in these two files even before this patch...

> 

> My understanding is that hw/block/block.c contains code that is

> potentially useful to all kinds of block devices, not random code that

> two specific similar devices happen to share.

> 

> If we want to deduplicate some code in the flash devices, without any

> expectation that other devices will use it at some point, I'd rather

> create a new source file hw/block/pflash_common.c or something like

> that.


Agreed, I like that suggestion too.
Actually we already have been talking about a such
cleanup/refactor/rewrite with Markus, but this will wait the next QEMU
dev cycle.
Markus Armbruster March 8, 2019, 2:29 p.m. UTC | #8
Kevin Wolf <kwolf@redhat.com> writes:

> Am 08.03.2019 um 13:28 hat Markus Armbruster geschrieben:

>> Laszlo Ersek <lersek@redhat.com> writes:

>> > This one has got to be one of the longest bike-shedding sessions! :)

>> >

>> > I'm fine with this patch, but I could suggest two improvements.

>> >

>> > (1) When blk_getlength() fails, we could format the negative error code

>> > returned by it into the error message.

>> 

>> I can do that.

>

> By using error_setg_errno(), I assume. Not throwing away error details

> is always good.

>

>> > (2) We could extract the common code to a new function in

>> > "hw/block/block.c". (It says "Common code for block device models" on

>> > the tin.)

>> 

>> There's so much common code in these two files even before this patch...

>

> My understanding is that hw/block/block.c contains code that is

> potentially useful to all kinds of block devices, not random code that

> two specific similar devices happen to share.

>

> If we want to deduplicate some code in the flash devices, without any

> expectation that other devices will use it at some point, I'd rather

> create a new source file hw/block/pflash_common.c or something like

> that.


Yes.

The helper I came up with (appended) isn't really specific to flash
devices.  Would it be okay for hw/block/block.c even though only the two
flash devices use it for now?


bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size,
                                 Error **errp)
{
    int64_t blk_len;
    int ret;

    blk_len = blk_getlength(blk);
    if (blk_len < 0) {
        error_setg_errno(errp, -blk_len,
                         "can't get size of block backend '%s'",
                         blk_name(blk));
        return false;
    }
    if (blk_len != size) {
        error_setg(errp, "device requires %" PRIu64 " bytes, "
                   "block backend '%s' provides %" PRIu64 " bytes",
                   size, blk_name(blk), blk_len);
        return false;
    }

    /* TODO for @size > BDRV_REQUEST_MAX_BYTES, we'd need to loop */
    assert(size <= BDRV_REQUEST_MAX_BYTES);
    ret = blk_pread(blk, 0, buf, size);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "can't read block backend '%s'",
                         blk_name(blk));
        return false;
    }
    return true;
}
Philippe Mathieu-Daudé March 8, 2019, 2:40 p.m. UTC | #9
On 3/8/19 12:47 PM, Laszlo Ersek wrote:
> On 03/08/19 07:24, Markus Armbruster wrote:

>> From: Alex Bennée <alex.bennee@linaro.org>

>>

>> We reject undersized backends with a rather enigmatic "failed to read

>> the initial flash content" error.

>>

>> We happily accept oversized images, ignoring their tail.  Throwing

>> away parts of firmware that way is pretty much certain to end in an

>> even more enigmatic failure to boot.

>>

>> Require the backend's size to match the device's size exactly.  Report

>> mismatch as "device requires N bytes, block backend 'NAME' provides M

>> bytes".

>>

>> Improve the error for actual read failures to "can't read initial

>> flash content from block backend 'NAME'.

>>

>> We disabled code to limit device sizes to 8, 16, 32 or 64MiB more than

>> a decade ago in commit 95d1f3edd5e and c8b153d7949, v0.9.1.  Bury.

>>

>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

>> Signed-off-by: Markus Armbruster <armbru@redhat.com>

>> ---

>>  hw/block/pflash_cfi01.c | 31 ++++++++++++++++++++++---------

>>  hw/block/pflash_cfi02.c | 31 +++++++++++++++++++++++--------

>>  2 files changed, 45 insertions(+), 17 deletions(-)

>>

>> diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c

>> index 9d1c356eb6..2e3a05c0b3 100644

>> --- a/hw/block/pflash_cfi01.c

>> +++ b/hw/block/pflash_cfi01.c

>> @@ -730,13 +730,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)

>>      }

>>      device_len = sector_len_per_device * blocks_per_device;

>>  

>> -    /* XXX: to be fixed */

>> -#if 0

>> -    if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) &&

>> -        total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024))

>> -        return NULL;

>> -#endif

>> -

>>      memory_region_init_rom_device(

>>          &pfl->mem, OBJECT(dev),

>>          &pflash_cfi01_ops,

>> @@ -763,12 +756,32 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)

>>      }

>>  

>>      if (pfl->blk) {

>> +        /*

>> +         * Validate the backing store is the right size for pflash

>> +         * devices. If the user supplies a larger file we ignore the

>> +         * tail.

>> +         */

>> +        int64_t backing_len = blk_getlength(pfl->blk);

>> +

>> +        if (backing_len < 0) {

>> +            error_setg(errp, "can't get size of block backend '%s'",

>> +                       blk_name(pfl->blk));

>> +            return;

>> +        }

>> +        if (backing_len != total_len) {

>> +            error_setg(errp, "device requires %" PRIu64 " bytes, "

>> +                       "block backend '%s' provides %" PRIu64 " bytes",

>> +                       total_len, blk_name(pfl->blk), backing_len);

>> +            return;

>> +        }

>> +

>>          /* read the initial flash content */

>>          ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);

>> -

>>          if (ret < 0) {

>>              vmstate_unregister_ram(&pfl->mem, DEVICE(pfl));

>> -            error_setg(errp, "failed to read the initial flash content");

>> +            error_setg(errp, "can't read initial flash content"

>> +                       " from block backend '%s'",

>> +                       blk_name(pfl->blk));

>>              return;

>>          }

>>      }

>> diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c

>> index 33779ce807..45b88d65d8 100644

>> --- a/hw/block/pflash_cfi02.c

>> +++ b/hw/block/pflash_cfi02.c

>> @@ -550,12 +550,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)

>>      }

>>  

>>      chip_len = pfl->sector_len * pfl->nb_blocs;

>> -    /* XXX: to be fixed */

>> -#if 0

>> -    if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) &&

>> -        total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024))

>> -        return NULL;

>> -#endif

>>  

>>      memory_region_init_rom_device(&pfl->orig_mem, OBJECT(pfl), pfl->be ?

>>                                    &pflash_cfi02_ops_be : &pflash_cfi02_ops_le,

>> @@ -581,11 +575,32 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)

>>      }

>>  

>>      if (pfl->blk) {

>> +        /*

>> +         * Validate the backing store is the right size for pflash

>> +         * devices. If the user supplies a larger file we ignore the

>> +         * tail.

>> +         */

>> +        int64_t backing_len = blk_getlength(pfl->blk);

>> +

>> +        if (backing_len < 0) {

>> +            error_setg(errp, "can't get size of block backend '%s'",

>> +                       blk_name(pfl->blk));

>> +            return;

>> +        }

>> +        if (backing_len != chip_len) {

>> +            error_setg(errp, "device requires %" PRIu32 " bytes, "

>> +                       "block backend '%s' provides %" PRIu64 " bytes",

>> +                       chip_len, blk_name(pfl->blk), backing_len);

>> +            return;

>> +        }

>> +

>>          /* read the initial flash content */

>>          ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len);

>>          if (ret < 0) {

>> -            vmstate_unregister_ram(&pfl->orig_mem, DEVICE(pfl));

>> -            error_setg(errp, "failed to read the initial flash content");

>> +            vmstate_unregister_ram(&pfl->mem, DEVICE(pfl));

>> +            error_setg(errp, "can't read initial flash content"

>> +                       " from block backend '%s'",

>> +                       blk_name(pfl->blk));

>>              return;

>>          }

>>      }

>>

> 

> This one has got to be one of the longest bike-shedding sessions! :)

> 

> I'm fine with this patch, but I could suggest two improvements.

> 

> (1) When blk_getlength() fails, we could format the negative error code

> returned by it into the error message.


This one is a worthwhile a v8.

> (2) We could extract the common code to a new function in

> "hw/block/block.c". (It says "Common code for block device models" on

> the tin.)


Good idea, but I'd postpone that for the next dev cycle.

> I realize the vmstate_unregister_ram() rollbacks are not identical

> between cfi01 and cfi02. However, that difference only draws my

> attention to the fact that the unregister calls are not made on the

> other error branches in this patch anyway!

> 

> So we should have a common function like this:

> 

> void pflash_cfi_read_initial_content(BlockBackend *blk, uint64_t size,

>                                      void *buf, Error **errp)

> {

>     int64_t backing_len = blk_getlength(blk);

>     int ret;

> 

>     if (backing_len < 0) {

>         error_setg(...);

>         return;

>     }

> 

>     if (backing_len != size) {

>         error_setg(...);

>         return;

>     }

> 

>     ret = blk_pread(blk, 0, buf, size);

>     if (ret < 0) {

>         error_setg(...);

>     }

> }

> 

> And at the call sites, we can do

> 

>     if (pfl->blk) {

>         Error *local_err = NULL;

> 

>         pflash_cfi_read_initial_content(pfl->blk,

>                                         total_len OR chip_len,

>                                         pfl->storage,

>                                         &local_err);

>         if (local_err) {

>             error_propagate(errp, local_err);

>             vmstate_unregister_ram(...); // as appropriate for cfiXX

>             return;

>         }

>     }

> 

> Thanks

> Laszlo

>
Kevin Wolf March 8, 2019, 3:40 p.m. UTC | #10
Am 08.03.2019 um 15:29 hat Markus Armbruster geschrieben:
> Kevin Wolf <kwolf@redhat.com> writes:

> 

> > Am 08.03.2019 um 13:28 hat Markus Armbruster geschrieben:

> >> Laszlo Ersek <lersek@redhat.com> writes:

> >> > This one has got to be one of the longest bike-shedding sessions! :)

> >> >

> >> > I'm fine with this patch, but I could suggest two improvements.

> >> >

> >> > (1) When blk_getlength() fails, we could format the negative error code

> >> > returned by it into the error message.

> >> 

> >> I can do that.

> >

> > By using error_setg_errno(), I assume. Not throwing away error details

> > is always good.

> >

> >> > (2) We could extract the common code to a new function in

> >> > "hw/block/block.c". (It says "Common code for block device models" on

> >> > the tin.)

> >> 

> >> There's so much common code in these two files even before this patch...

> >

> > My understanding is that hw/block/block.c contains code that is

> > potentially useful to all kinds of block devices, not random code that

> > two specific similar devices happen to share.

> >

> > If we want to deduplicate some code in the flash devices, without any

> > expectation that other devices will use it at some point, I'd rather

> > create a new source file hw/block/pflash_common.c or something like

> > that.

> 

> Yes.

> 

> The helper I came up with (appended) isn't really specific to flash

> devices.  Would it be okay for hw/block/block.c even though only the two

> flash devices use it for now?


Hm, it feels more like a helper for devices that can't decide whether
they want to be a block device or not. Or that actually don't want to be
a block device, but use a BlockBackend anyway. Reading in the whole
image isn't something that a normal block device would do.

But yes, it doesn't have flash-specific knowledge, even though I hope
that it's functionality that will remain very specific to these two
devices.

So it's your call, I don't have a strong opinion either way.

> 

> bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size,

>                                  Error **errp)

> {

>     int64_t blk_len;

>     int ret;

> 

>     blk_len = blk_getlength(blk);

>     if (blk_len < 0) {

>         error_setg_errno(errp, -blk_len,

>                          "can't get size of block backend '%s'",

>                          blk_name(blk));

>         return false;

>     }

>     if (blk_len != size) {

>         error_setg(errp, "device requires %" PRIu64 " bytes, "

>                    "block backend '%s' provides %" PRIu64 " bytes",

>                    size, blk_name(blk), blk_len);


Should size use HWADDR_PRIu?

I'm not sure if printing the BlockBackend name is a good idea because
hopefully one day the BlockBackend will be anonymous even for the flash
devices.

>         return false;

>     }

> 

>     /* TODO for @size > BDRV_REQUEST_MAX_BYTES, we'd need to loop */

>     assert(size <= BDRV_REQUEST_MAX_BYTES);


I don't think we'd ever want to read in more than 2 GB into a memory
buffer. Before we even get close to this point, the devices should be
reworked to be more like an actual block device and read only what is
actually accessed.

>     ret = blk_pread(blk, 0, buf, size);

>     if (ret < 0) {

>         error_setg_errno(errp, -ret, "can't read block backend '%s'",

>                          blk_name(blk));

>         return false;

>     }

>     return true;

> }


Kevin
Philippe Mathieu-Daudé March 8, 2019, 4:45 p.m. UTC | #11
On 3/8/19 4:40 PM, Kevin Wolf wrote:
> Am 08.03.2019 um 15:29 hat Markus Armbruster geschrieben:

>> Kevin Wolf <kwolf@redhat.com> writes:

>>

>>> Am 08.03.2019 um 13:28 hat Markus Armbruster geschrieben:

>>>> Laszlo Ersek <lersek@redhat.com> writes:

>>>>> This one has got to be one of the longest bike-shedding sessions! :)

>>>>>

>>>>> I'm fine with this patch, but I could suggest two improvements.

>>>>>

>>>>> (1) When blk_getlength() fails, we could format the negative error code

>>>>> returned by it into the error message.

>>>>

>>>> I can do that.

>>>

>>> By using error_setg_errno(), I assume. Not throwing away error details

>>> is always good.

>>>

>>>>> (2) We could extract the common code to a new function in

>>>>> "hw/block/block.c". (It says "Common code for block device models" on

>>>>> the tin.)

>>>>

>>>> There's so much common code in these two files even before this patch...

>>>

>>> My understanding is that hw/block/block.c contains code that is

>>> potentially useful to all kinds of block devices, not random code that

>>> two specific similar devices happen to share.

>>>

>>> If we want to deduplicate some code in the flash devices, without any

>>> expectation that other devices will use it at some point, I'd rather

>>> create a new source file hw/block/pflash_common.c or something like

>>> that.

>>

>> Yes.

>>

>> The helper I came up with (appended) isn't really specific to flash

>> devices.  Would it be okay for hw/block/block.c even though only the two

>> flash devices use it for now?

> 

> Hm, it feels more like a helper for devices that can't decide whether

> they want to be a block device or not. Or that actually don't want to be

> a block device, but use a BlockBackend anyway. Reading in the whole

> image isn't something that a normal block device would do.

> 

> But yes, it doesn't have flash-specific knowledge, even though I hope

> that it's functionality that will remain very specific to these two

> devices.

> 

> So it's your call, I don't have a strong opinion either way.

> 

>>

>> bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size,

>>                                  Error **errp)

>> {

>>     int64_t blk_len;

>>     int ret;

>>

>>     blk_len = blk_getlength(blk);

>>     if (blk_len < 0) {

>>         error_setg_errno(errp, -blk_len,

>>                          "can't get size of block backend '%s'",

>>                          blk_name(blk));

>>         return false;

>>     }

>>     if (blk_len != size) {

>>         error_setg(errp, "device requires %" PRIu64 " bytes, "

>>                    "block backend '%s' provides %" PRIu64 " bytes",

>>                    size, blk_name(blk), blk_len);

> 

> Should size use HWADDR_PRIu?

> 

> I'm not sure if printing the BlockBackend name is a good idea because

> hopefully one day the BlockBackend will be anonymous even for the flash

> devices.

> 

>>         return false;

>>     }

>>

>>     /* TODO for @size > BDRV_REQUEST_MAX_BYTES, we'd need to loop */

>>     assert(size <= BDRV_REQUEST_MAX_BYTES);

> 

> I don't think we'd ever want to read in more than 2 GB into a memory

> buffer. Before we even get close to this point, the devices should be

> reworked to be more like an actual block device and read only what is

> actually accessed.


[bikeshedding again]

So you eventually found the root problem of this device. It tries to use
files as 'raw' format block. Since the pflash devices provide write
access, we create a block device in memory.
We should:
- use raw files as ROM (without using a pflash device)
- enforce the pflash device to use block formatted files
- provide a tool to help converting raw file to pflash block, or improve
  the device documentation.

>>     ret = blk_pread(blk, 0, buf, size);

>>     if (ret < 0) {

>>         error_setg_errno(errp, -ret, "can't read block backend '%s'",

>>                          blk_name(blk));

>>         return false;

>>     }

>>     return true;

>> }

> 

> Kevin

>
Markus Armbruster March 8, 2019, 5:03 p.m. UTC | #12
Kevin Wolf <kwolf@redhat.com> writes:

> Am 08.03.2019 um 15:29 hat Markus Armbruster geschrieben:

>> Kevin Wolf <kwolf@redhat.com> writes:

>> 

>> > Am 08.03.2019 um 13:28 hat Markus Armbruster geschrieben:

>> >> Laszlo Ersek <lersek@redhat.com> writes:

>> >> > This one has got to be one of the longest bike-shedding sessions! :)

>> >> >

>> >> > I'm fine with this patch, but I could suggest two improvements.

>> >> >

>> >> > (1) When blk_getlength() fails, we could format the negative error code

>> >> > returned by it into the error message.

>> >> 

>> >> I can do that.

>> >

>> > By using error_setg_errno(), I assume. Not throwing away error details

>> > is always good.

>> >

>> >> > (2) We could extract the common code to a new function in

>> >> > "hw/block/block.c". (It says "Common code for block device models" on

>> >> > the tin.)

>> >> 

>> >> There's so much common code in these two files even before this patch...

>> >

>> > My understanding is that hw/block/block.c contains code that is

>> > potentially useful to all kinds of block devices, not random code that

>> > two specific similar devices happen to share.

>> >

>> > If we want to deduplicate some code in the flash devices, without any

>> > expectation that other devices will use it at some point, I'd rather

>> > create a new source file hw/block/pflash_common.c or something like

>> > that.

>> 

>> Yes.

>> 

>> The helper I came up with (appended) isn't really specific to flash

>> devices.  Would it be okay for hw/block/block.c even though only the two

>> flash devices use it for now?

>

> Hm, it feels more like a helper for devices that can't decide whether

> they want to be a block device or not. Or that actually don't want to be

> a block device, but use a BlockBackend anyway.


I think the latter's precisely what the two pflash devices are.

>                                                Reading in the whole

> image isn't something that a normal block device would do.


No argument.

> But yes, it doesn't have flash-specific knowledge, even though I hope

> that it's functionality that will remain very specific to these two

> devices.

>

> So it's your call, I don't have a strong opinion either way.


Understood.

>> bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size,

>>                                  Error **errp)

>> {

>>     int64_t blk_len;

>>     int ret;

>> 

>>     blk_len = blk_getlength(blk);

>>     if (blk_len < 0) {

>>         error_setg_errno(errp, -blk_len,

>>                          "can't get size of block backend '%s'",

>>                          blk_name(blk));

>>         return false;

>>     }

>>     if (blk_len != size) {

>>         error_setg(errp, "device requires %" PRIu64 " bytes, "

>>                    "block backend '%s' provides %" PRIu64 " bytes",

>>                    size, blk_name(blk), blk_len);

>

> Should size use HWADDR_PRIu?


Yes.

> I'm not sure if printing the BlockBackend name is a good idea because

> hopefully one day the BlockBackend will be anonymous even for the flash

> devices.


Hmm.  Tell me what else I can use to identify the troublemaker to the
user.

For the second error, I could sidestep the issue and point to frontend
instead (oh, now I have an even thornier issue).  The first and third
error are *backend* errors, only related to the frontend since the
frontend happens to use them.  I could perhaps identify them as "backend
of frontend F".  If we want to do that going forward, we need suitable
helpers, so we don't reinvent this wheel in every block frontend.

>>         return false;

>>     }

>> 

>>     /* TODO for @size > BDRV_REQUEST_MAX_BYTES, we'd need to loop */

>>     assert(size <= BDRV_REQUEST_MAX_BYTES);

>

> I don't think we'd ever want to read in more than 2 GB into a memory

> buffer. Before we even get close to this point, the devices should be

> reworked to be more like an actual block device and read only what is

> actually accessed.


I'll steal from that to turn the comment into a proper excuse for my
lazy assertion.  Thanks!

>>     ret = blk_pread(blk, 0, buf, size);

>>     if (ret < 0) {

>>         error_setg_errno(errp, -ret, "can't read block backend '%s'",

>>                          blk_name(blk));

>>         return false;

>>     }

>>     return true;

>> }

>

> Kevin
Kevin Wolf March 8, 2019, 5:18 p.m. UTC | #13
Am 08.03.2019 um 18:03 hat Markus Armbruster geschrieben:
> >> bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size,

> >>                                  Error **errp)

> >> {

> >>     int64_t blk_len;

> >>     int ret;

> >> 

> >>     blk_len = blk_getlength(blk);

> >>     if (blk_len < 0) {

> >>         error_setg_errno(errp, -blk_len,

> >>                          "can't get size of block backend '%s'",

> >>                          blk_name(blk));

> >>         return false;

> >>     }

> >>     if (blk_len != size) {

> >>         error_setg(errp, "device requires %" PRIu64 " bytes, "

> >>                    "block backend '%s' provides %" PRIu64 " bytes",

> >>                    size, blk_name(blk), blk_len);

> >

> > Should size use HWADDR_PRIu?

> 

> Yes.

> 

> > I'm not sure if printing the BlockBackend name is a good idea because

> > hopefully one day the BlockBackend will be anonymous even for the flash

> > devices.

> 

> Hmm.  Tell me what else I can use to identify the troublemaker to the

> user.


My hope was that a caller of this would prefix the right context. For
example, if the device were created by -device, the error message would
be prefixed with the whole "-device driver=pflash...:" string, which
gives enough context to the user.

Machine code that instantiates the device based on -drive should
probably do something similar.

Kevin
Philippe Mathieu-Daudé March 8, 2019, 6:51 p.m. UTC | #14
On 3/8/19 4:40 PM, Kevin Wolf wrote:
> Am 08.03.2019 um 15:29 hat Markus Armbruster geschrieben:

>> Kevin Wolf <kwolf@redhat.com> writes:

>>

>>> Am 08.03.2019 um 13:28 hat Markus Armbruster geschrieben:

>>>> Laszlo Ersek <lersek@redhat.com> writes:

>>>>> This one has got to be one of the longest bike-shedding sessions! :)

>>>>>

>>>>> I'm fine with this patch, but I could suggest two improvements.

>>>>>

>>>>> (1) When blk_getlength() fails, we could format the negative error code

>>>>> returned by it into the error message.

>>>>

>>>> I can do that.

>>>

>>> By using error_setg_errno(), I assume. Not throwing away error details

>>> is always good.

>>>

>>>>> (2) We could extract the common code to a new function in

>>>>> "hw/block/block.c". (It says "Common code for block device models" on

>>>>> the tin.)

>>>>

>>>> There's so much common code in these two files even before this patch...

>>>

>>> My understanding is that hw/block/block.c contains code that is

>>> potentially useful to all kinds of block devices, not random code that

>>> two specific similar devices happen to share.

>>>

>>> If we want to deduplicate some code in the flash devices, without any

>>> expectation that other devices will use it at some point, I'd rather

>>> create a new source file hw/block/pflash_common.c or something like

>>> that.

>>

>> Yes.

>>

>> The helper I came up with (appended) isn't really specific to flash

>> devices.  Would it be okay for hw/block/block.c even though only the two

>> flash devices use it for now?

> 

> Hm, it feels more like a helper for devices that can't decide whether

> they want to be a block device or not. Or that actually don't want to be

> a block device, but use a BlockBackend anyway. Reading in the whole

> image isn't something that a normal block device would do.

> 

> But yes, it doesn't have flash-specific knowledge, even though I hope

> that it's functionality that will remain very specific to these two

> devices.

> 

> So it's your call, I don't have a strong opinion either way.

> 

>>

>> bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size,

>>                                  Error **errp)

>> {

>>     int64_t blk_len;

>>     int ret;

>>

>>     blk_len = blk_getlength(blk);

>>     if (blk_len < 0) {

>>         error_setg_errno(errp, -blk_len,

>>                          "can't get size of block backend '%s'",

>>                          blk_name(blk));

>>         return false;

>>     }

>>     if (blk_len != size) {

>>         error_setg(errp, "device requires %" PRIu64 " bytes, "

>>                    "block backend '%s' provides %" PRIu64 " bytes",

>>                    size, blk_name(blk), blk_len);

> 

> Should size use HWADDR_PRIu?

> 

> I'm not sure if printing the BlockBackend name is a good idea because

> hopefully one day the BlockBackend will be anonymous even for the flash

> devices.

> 

>>         return false;

>>     }

>>

>>     /* TODO for @size > BDRV_REQUEST_MAX_BYTES, we'd need to loop */

>>     assert(size <= BDRV_REQUEST_MAX_BYTES);

> 

> I don't think we'd ever want to read in more than 2 GB into a memory

> buffer. Before we even get close to this point, the devices should be

> reworked to be more like an actual block device and read only what is

> actually accessed.


The biggest NOR available in the market is 256 MiB (bigger size is
barely ChipSelect MMIO-addressable).

Maybe you can use:

#define NOR_FLASH_MAX_BYTES (256 * MiB)

and refuse bigger flashes.

We could also check what is the widest chip-select range addressable
by all the supported architectures. I don't think it's worth it.

> 

>>     ret = blk_pread(blk, 0, buf, size);


OK, this function is named blk_check_size_and_read_all. Here we
read_all. Refactoring this device we should be able to read at
most sizeof(the biggest sector).

But this implies some serious work.

>>     if (ret < 0) {

>>         error_setg_errno(errp, -ret, "can't read block backend '%s'",

>>                          blk_name(blk));

>>         return false;

>>     }

>>     return true;

>> }

> 

> Kevin

>
Markus Armbruster March 9, 2019, 9:20 a.m. UTC | #15
Philippe Mathieu-Daudé <philmd@redhat.com> writes:

> On 3/8/19 4:40 PM, Kevin Wolf wrote:

>> Am 08.03.2019 um 15:29 hat Markus Armbruster geschrieben:

>>> Kevin Wolf <kwolf@redhat.com> writes:

>>>

>>>> Am 08.03.2019 um 13:28 hat Markus Armbruster geschrieben:

>>>>> Laszlo Ersek <lersek@redhat.com> writes:

>>>>>> This one has got to be one of the longest bike-shedding sessions! :)

>>>>>>

>>>>>> I'm fine with this patch, but I could suggest two improvements.

>>>>>>

>>>>>> (1) When blk_getlength() fails, we could format the negative error code

>>>>>> returned by it into the error message.

>>>>>

>>>>> I can do that.

>>>>

>>>> By using error_setg_errno(), I assume. Not throwing away error details

>>>> is always good.

>>>>

>>>>>> (2) We could extract the common code to a new function in

>>>>>> "hw/block/block.c". (It says "Common code for block device models" on

>>>>>> the tin.)

>>>>>

>>>>> There's so much common code in these two files even before this patch...

>>>>

>>>> My understanding is that hw/block/block.c contains code that is

>>>> potentially useful to all kinds of block devices, not random code that

>>>> two specific similar devices happen to share.

>>>>

>>>> If we want to deduplicate some code in the flash devices, without any

>>>> expectation that other devices will use it at some point, I'd rather

>>>> create a new source file hw/block/pflash_common.c or something like

>>>> that.

>>>

>>> Yes.

>>>

>>> The helper I came up with (appended) isn't really specific to flash

>>> devices.  Would it be okay for hw/block/block.c even though only the two

>>> flash devices use it for now?

>> 

>> Hm, it feels more like a helper for devices that can't decide whether

>> they want to be a block device or not. Or that actually don't want to be

>> a block device, but use a BlockBackend anyway. Reading in the whole

>> image isn't something that a normal block device would do.

>> 

>> But yes, it doesn't have flash-specific knowledge, even though I hope

>> that it's functionality that will remain very specific to these two

>> devices.

>> 

>> So it's your call, I don't have a strong opinion either way.

>> 

>>>

>>> bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size,

>>>                                  Error **errp)

>>> {

>>>     int64_t blk_len;

>>>     int ret;

>>>

>>>     blk_len = blk_getlength(blk);

>>>     if (blk_len < 0) {

>>>         error_setg_errno(errp, -blk_len,

>>>                          "can't get size of block backend '%s'",

>>>                          blk_name(blk));

>>>         return false;

>>>     }

>>>     if (blk_len != size) {

>>>         error_setg(errp, "device requires %" PRIu64 " bytes, "

>>>                    "block backend '%s' provides %" PRIu64 " bytes",

>>>                    size, blk_name(blk), blk_len);

>> 

>> Should size use HWADDR_PRIu?

>> 

>> I'm not sure if printing the BlockBackend name is a good idea because

>> hopefully one day the BlockBackend will be anonymous even for the flash

>> devices.

>> 

>>>         return false;

>>>     }

>>>

>>>     /* TODO for @size > BDRV_REQUEST_MAX_BYTES, we'd need to loop */

>>>     assert(size <= BDRV_REQUEST_MAX_BYTES);

>> 

>> I don't think we'd ever want to read in more than 2 GB into a memory

>> buffer. Before we even get close to this point, the devices should be

>> reworked to be more like an actual block device and read only what is

>> actually accessed.

>

> The biggest NOR available in the market is 256 MiB (bigger size is

> barely ChipSelect MMIO-addressable).

>

> Maybe you can use:

>

> #define NOR_FLASH_MAX_BYTES (256 * MiB)

>

> and refuse bigger flashes.


The comment next to the definition of property "width" in pflash_cfi01.c
suggests the device model can emulate a bunch of flash chips wired
together:

    /* width here is the overall width of this QEMU device in bytes.
     * The QEMU device may be emulating a number of flash devices
     * wired up in parallel; the width of each individual flash
     * device should be specified via device-width. If the individual
     * devices have a maximum width which is greater than the width
     * they are being used for, this maximum width should be set via
     * max-device-width (which otherwise defaults to device-width).
     * So for instance a 32-bit wide QEMU flash device made from four
     * 16-bit flash devices used in 8-bit wide mode would be configured
     * with width = 4, device-width = 1, max-device-width = 2.
     *
     * If device-width is not specified we default to backwards
     * compatible behaviour which is a bad emulation of two
     * 16 bit devices making up a 32 bit wide QEMU device. This
     * is deprecated for new uses of this device.
     */

> We could also check what is the widest chip-select range addressable

> by all the supported architectures. I don't think it's worth it.

>

>> 

>>>     ret = blk_pread(blk, 0, buf, size);

>

> OK, this function is named blk_check_size_and_read_all. Here we

> read_all. Refactoring this device we should be able to read at

> most sizeof(the biggest sector).

>

> But this implies some serious work.


Here's another thing to consider: shadowing in RAM.  Attractive when you
got the RAM, and it's faster than (parallel) flash.  If you shadow
anyway, you can just as well use serial flash, and throw in compression.
Now, emulated pflash can execute code just as fast es emulated RAM.  The
question is what kind of hardware the firmware expects.  Even if it
supports a variety of hardware, it may still have preferences.

>>>     if (ret < 0) {

>>>         error_setg_errno(errp, -ret, "can't read block backend '%s'",

>>>                          blk_name(blk));

>>>         return false;

>>>     }

>>>     return true;

>>> }

>> 

>> Kevin

>>
Markus Armbruster March 18, 2019, 4:03 p.m. UTC | #16
Kevin Wolf <kwolf@redhat.com> writes:

> Am 08.03.2019 um 18:03 hat Markus Armbruster geschrieben:

>> >> bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size,

>> >>                                  Error **errp)

>> >> {

>> >>     int64_t blk_len;

>> >>     int ret;

>> >> 

>> >>     blk_len = blk_getlength(blk);

>> >>     if (blk_len < 0) {

>> >>         error_setg_errno(errp, -blk_len,

>> >>                          "can't get size of block backend '%s'",

>> >>                          blk_name(blk));

>> >>         return false;

>> >>     }

>> >>     if (blk_len != size) {

>> >>         error_setg(errp, "device requires %" PRIu64 " bytes, "

>> >>                    "block backend '%s' provides %" PRIu64 " bytes",

>> >>                    size, blk_name(blk), blk_len);

>> >

>> > Should size use HWADDR_PRIu?

>> 

>> Yes.

>> 

>> > I'm not sure if printing the BlockBackend name is a good idea because

>> > hopefully one day the BlockBackend will be anonymous even for the flash

>> > devices.

>> 

>> Hmm.  Tell me what else I can use to identify the troublemaker to the

>> user.

>

> My hope was that a caller of this would prefix the right context. For

> example, if the device were created by -device, the error message would

> be prefixed with the whole "-device driver=pflash...:" string, which

> gives enough context to the user.

>

> Machine code that instantiates the device based on -drive should

> probably do something similar.


I'm very much in favor of reporting errors like "where to fix it: what's
wrong".  Heck, I created infrastructure for it and put it to use.
Sadly, we're not even close to being able to using it as intended here.

Ideally, we'd annotate every bit of configuration with its location
information, and use that for error messages.

In reality, we make only half-hearted attempts here and there to keep
location information around.  It doesn't make it to realize():

    $ qemu-system-ppc64 -S -display none -M sam460ex -drive if=pflash,format=raw,file=1b.img
    qemu-system-ppc64: Initialization of device cfi.pflash01 failed: device requires 1048576 bytes, block backend 'pflash0' provides 512 bytes

As you can see, the qdev core prefixes the error with "Initialization of
device TYPE-NAME failed: " instead a location.  Better than nothing.
Ambiguous when there's more than one device of this type.

Say you decide to do the right thing *now*, i.e. get configuration
location information to realize().  What location information?  The
device being realized is a mandatory onboard device, created by code.
There is no configuration connected to it.

Then you think hard about the code, and realize that the -drive
if=pflash kind of configures this device (and its backend) anyway.  But
only "kind of", because the configuration is optional; the device is
created even when the user doesn't specify -drive if=pflash.  And then
there's still no configuration connected to it.

Even if a backend exists, it needn't be user-configured.  It could also
be created by default.  Again, we have nowhere to point to.

Even if we proclaim there won't ever be errors involving onboard devices
without backends or with default backends (haha), connecting the device
to the right piece of configuration is still tricky.  You're in a twisty
little maze of special cases, all different.

Now, if our machines were built entirely from configuration rather than
code, the ugly "no location" cases wouldn't exist.  With systematic
configuration location tracking, we could then do something like

    qemu-system-ppc64: /usr/share/qemu/ppc64/sam460ex.cfg:42: device requires 1048576 bytes, block backend provides 512 bytes
    qemu-system-ppc64: -drive if=pflash,format=raw,file=1b: info: this is the block backend

where /usr/share/qemu/ppc64/sam460ex.cfg:42 points to the spot that
configures the onboard cfi.pflash01.

For a device created with -device, we'd get

    qemu-system-ppc64: -device cfi.pflash01,drive=pfl0: device requires 1048576 bytes, block backend provides 512 bytes
    qemu-system-ppc64: -drive if=none,id=pfl0,format=raw,file=1b: info: this is the block backend

(hypothetical; cfi.pflash01 is not available with -device)

I hope you'll understand why I'm declining to drain this swamp right
now.

Naming the block backend helps the user and is simple.  You tell me
it'll break some day (if I understand you correctly).  Pity.  Any other
ideas on how to help the user that don't involve swamp draining?
Kevin Wolf March 18, 2019, 4:25 p.m. UTC | #17
Am 18.03.2019 um 17:03 hat Markus Armbruster geschrieben:
> Kevin Wolf <kwolf@redhat.com> writes:

> 

> > Am 08.03.2019 um 18:03 hat Markus Armbruster geschrieben:

> >> >> bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size,

> >> >>                                  Error **errp)

> >> >> {

> >> >>     int64_t blk_len;

> >> >>     int ret;

> >> >> 

> >> >>     blk_len = blk_getlength(blk);

> >> >>     if (blk_len < 0) {

> >> >>         error_setg_errno(errp, -blk_len,

> >> >>                          "can't get size of block backend '%s'",

> >> >>                          blk_name(blk));

> >> >>         return false;

> >> >>     }

> >> >>     if (blk_len != size) {

> >> >>         error_setg(errp, "device requires %" PRIu64 " bytes, "

> >> >>                    "block backend '%s' provides %" PRIu64 " bytes",

> >> >>                    size, blk_name(blk), blk_len);

> >> >

> >> > Should size use HWADDR_PRIu?

> >> 

> >> Yes.

> >> 

> >> > I'm not sure if printing the BlockBackend name is a good idea because

> >> > hopefully one day the BlockBackend will be anonymous even for the flash

> >> > devices.

> >> 

> >> Hmm.  Tell me what else I can use to identify the troublemaker to the

> >> user.

> >

> > My hope was that a caller of this would prefix the right context. For

> > example, if the device were created by -device, the error message would

> > be prefixed with the whole "-device driver=pflash...:" string, which

> > gives enough context to the user.

> >

> > Machine code that instantiates the device based on -drive should

> > probably do something similar.

> 

> I'm very much in favor of reporting errors like "where to fix it: what's

> wrong".  Heck, I created infrastructure for it and put it to use.

> Sadly, we're not even close to being able to using it as intended here.

> 

> Ideally, we'd annotate every bit of configuration with its location

> information, and use that for error messages.

> 

> In reality, we make only half-hearted attempts here and there to keep

> location information around.  It doesn't make it to realize():

> 

>     $ qemu-system-ppc64 -S -display none -M sam460ex -drive if=pflash,format=raw,file=1b.img

>     qemu-system-ppc64: Initialization of device cfi.pflash01 failed: device requires 1048576 bytes, block backend 'pflash0' provides 512 bytes


Good enough even without the 'pflash0' block backend name, honestly. If
I know that QEMU magically creates a block backend named 'pflash0', I
should also be able to figure out where 'cfi.pflash01' comes from.

> As you can see, the qdev core prefixes the error with "Initialization of

> device TYPE-NAME failed: " instead a location.  Better than nothing.

> Ambiguous when there's more than one device of this type.

> 

> Say you decide to do the right thing *now*, i.e. get configuration

> location information to realize().  What location information?  The

> device being realized is a mandatory onboard device, created by code.

> There is no configuration connected to it.

> 

> Then you think hard about the code, and realize that the -drive

> if=pflash kind of configures this device (and its backend) anyway.  But

> only "kind of", because the configuration is optional; the device is

> created even when the user doesn't specify -drive if=pflash.  And then

> there's still no configuration connected to it.

> 

> Even if a backend exists, it needn't be user-configured.  It could also

> be created by default.  Again, we have nowhere to point to.

> 

> Even if we proclaim there won't ever be errors involving onboard devices

> without backends or with default backends (haha), connecting the device

> to the right piece of configuration is still tricky.  You're in a twisty

> little maze of special cases, all different.

> 

> Now, if our machines were built entirely from configuration rather than

> code, the ugly "no location" cases wouldn't exist.  With systematic

> configuration location tracking, we could then do something like

> 

>     qemu-system-ppc64: /usr/share/qemu/ppc64/sam460ex.cfg:42: device requires 1048576 bytes, block backend provides 512 bytes

>     qemu-system-ppc64: -drive if=pflash,format=raw,file=1b: info: this is the block backend

> 

> where /usr/share/qemu/ppc64/sam460ex.cfg:42 points to the spot that

> configures the onboard cfi.pflash01.

> 

> For a device created with -device, we'd get

> 

>     qemu-system-ppc64: -device cfi.pflash01,drive=pfl0: device requires 1048576 bytes, block backend provides 512 bytes

>     qemu-system-ppc64: -drive if=none,id=pfl0,format=raw,file=1b: info: this is the block backend

> 

> (hypothetical; cfi.pflash01 is not available with -device)

> 

> I hope you'll understand why I'm declining to drain this swamp right

> now.


Yes, but I'll add the question if now is really the time to optimise
error messages for -drive.

> Naming the block backend helps the user and is simple.  You tell me

> it'll break some day (if I understand you correctly).  Pity.  Any other

> ideas on how to help the user that don't involve swamp draining?


I thought that the very work you're doing right now on pflash is
motivated by -blockdev support. The moment you achieve this goal, you'll
get an empty string as the block backend name here.

Of course, a message like "device requires 1048576 bytes, block backend
'' provides 512 bytes" is not the end of the world either. It's just a
decision whether our preferred interface with the best error messages is
-drive or -blockdev.

Kevin
Markus Armbruster March 18, 2019, 5:21 p.m. UTC | #18
Kevin Wolf <kwolf@redhat.com> writes:

> Am 18.03.2019 um 17:03 hat Markus Armbruster geschrieben:

>> Kevin Wolf <kwolf@redhat.com> writes:

>> 

>> > Am 08.03.2019 um 18:03 hat Markus Armbruster geschrieben:

>> >> >> bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size,

>> >> >>                                  Error **errp)

>> >> >> {

>> >> >>     int64_t blk_len;

>> >> >>     int ret;

>> >> >> 

>> >> >>     blk_len = blk_getlength(blk);

>> >> >>     if (blk_len < 0) {

>> >> >>         error_setg_errno(errp, -blk_len,

>> >> >>                          "can't get size of block backend '%s'",

>> >> >>                          blk_name(blk));

>> >> >>         return false;

>> >> >>     }

>> >> >>     if (blk_len != size) {

>> >> >>         error_setg(errp, "device requires %" PRIu64 " bytes, "

>> >> >>                    "block backend '%s' provides %" PRIu64 " bytes",

>> >> >>                    size, blk_name(blk), blk_len);

>> >> >

>> >> > Should size use HWADDR_PRIu?

>> >> 

>> >> Yes.

>> >> 

>> >> > I'm not sure if printing the BlockBackend name is a good idea because

>> >> > hopefully one day the BlockBackend will be anonymous even for the flash

>> >> > devices.

>> >> 

>> >> Hmm.  Tell me what else I can use to identify the troublemaker to the

>> >> user.

>> >

>> > My hope was that a caller of this would prefix the right context. For

>> > example, if the device were created by -device, the error message would

>> > be prefixed with the whole "-device driver=pflash...:" string, which

>> > gives enough context to the user.

>> >

>> > Machine code that instantiates the device based on -drive should

>> > probably do something similar.

>> 

>> I'm very much in favor of reporting errors like "where to fix it: what's

>> wrong".  Heck, I created infrastructure for it and put it to use.

>> Sadly, we're not even close to being able to using it as intended here.

>> 

>> Ideally, we'd annotate every bit of configuration with its location

>> information, and use that for error messages.

>> 

>> In reality, we make only half-hearted attempts here and there to keep

>> location information around.  It doesn't make it to realize():

>> 

>>     $ qemu-system-ppc64 -S -display none -M sam460ex -drive if=pflash,format=raw,file=1b.img

>>     qemu-system-ppc64: Initialization of device cfi.pflash01 failed: device requires 1048576 bytes, block backend 'pflash0' provides 512 bytes

>

> Good enough even without the 'pflash0' block backend name, honestly. If

> I know that QEMU magically creates a block backend named 'pflash0', I

> should also be able to figure out where 'cfi.pflash01' comes from.


    $ qemu-system-ppc64 -S -display none -M taihu -drive if=pflash,format=raw,file=eins.img -drive if=pflash,unit=1,format=raw,file=zwei.img
    qemu-system-ppc64: Initialization of device cfi.pflash02 failed: device requires 2097152 bytes, block backend provides 512 bytes

Which one's short, eins.img or zwei.img?

Good enough anyway?

>> As you can see, the qdev core prefixes the error with "Initialization of

>> device TYPE-NAME failed: " instead a location.  Better than nothing.

>> Ambiguous when there's more than one device of this type.

[...]
>> I hope you'll understand why I'm declining to drain this swamp right

>> now.

>

> Yes, but I'll add the question if now is really the time to optimise

> error messages for -drive.


It isn't, and ...

>> Naming the block backend helps the user and is simple.  You tell me

>> it'll break some day (if I understand you correctly).  Pity.  Any other

>> ideas on how to help the user that don't involve swamp draining?

>

> I thought that the very work you're doing right now on pflash is

> motivated by -blockdev support. The moment you achieve this goal, you'll

> get an empty string as the block backend name here.


... that's exactly why I'm asking for other ideas.

> Of course, a message like "device requires 1048576 bytes, block backend

> '' provides 512 bytes" is not the end of the world either. It's just a

> decision whether our preferred interface with the best error messages is

> -drive or -blockdev.


Given the sad state of location tracking, I'm afraid we do need to map
from BlockBackend to some text that helps the user with finding the
place to correct the problem.

Any ideas on that?
Kevin Wolf March 19, 2019, 9:03 a.m. UTC | #19
Am 18.03.2019 um 18:21 hat Markus Armbruster geschrieben:
> Kevin Wolf <kwolf@redhat.com> writes:

> 

> > Am 18.03.2019 um 17:03 hat Markus Armbruster geschrieben:

> >> Kevin Wolf <kwolf@redhat.com> writes:

> >> 

> >> > Am 08.03.2019 um 18:03 hat Markus Armbruster geschrieben:

> >> >> >> bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size,

> >> >> >>                                  Error **errp)

> >> >> >> {

> >> >> >>     int64_t blk_len;

> >> >> >>     int ret;

> >> >> >> 

> >> >> >>     blk_len = blk_getlength(blk);

> >> >> >>     if (blk_len < 0) {

> >> >> >>         error_setg_errno(errp, -blk_len,

> >> >> >>                          "can't get size of block backend '%s'",

> >> >> >>                          blk_name(blk));

> >> >> >>         return false;

> >> >> >>     }

> >> >> >>     if (blk_len != size) {

> >> >> >>         error_setg(errp, "device requires %" PRIu64 " bytes, "

> >> >> >>                    "block backend '%s' provides %" PRIu64 " bytes",

> >> >> >>                    size, blk_name(blk), blk_len);

> >> >> >

> >> >> > Should size use HWADDR_PRIu?

> >> >> 

> >> >> Yes.

> >> >> 

> >> >> > I'm not sure if printing the BlockBackend name is a good idea because

> >> >> > hopefully one day the BlockBackend will be anonymous even for the flash

> >> >> > devices.

> >> >> 

> >> >> Hmm.  Tell me what else I can use to identify the troublemaker to the

> >> >> user.

> >> >

> >> > My hope was that a caller of this would prefix the right context. For

> >> > example, if the device were created by -device, the error message would

> >> > be prefixed with the whole "-device driver=pflash...:" string, which

> >> > gives enough context to the user.

> >> >

> >> > Machine code that instantiates the device based on -drive should

> >> > probably do something similar.

> >> 

> >> I'm very much in favor of reporting errors like "where to fix it: what's

> >> wrong".  Heck, I created infrastructure for it and put it to use.

> >> Sadly, we're not even close to being able to using it as intended here.

> >> 

> >> Ideally, we'd annotate every bit of configuration with its location

> >> information, and use that for error messages.

> >> 

> >> In reality, we make only half-hearted attempts here and there to keep

> >> location information around.  It doesn't make it to realize():

> >> 

> >>     $ qemu-system-ppc64 -S -display none -M sam460ex -drive if=pflash,format=raw,file=1b.img

> >>     qemu-system-ppc64: Initialization of device cfi.pflash01 failed: device requires 1048576 bytes, block backend 'pflash0' provides 512 bytes

> >

> > Good enough even without the 'pflash0' block backend name, honestly. If

> > I know that QEMU magically creates a block backend named 'pflash0', I

> > should also be able to figure out where 'cfi.pflash01' comes from.

> 

>     $ qemu-system-ppc64 -S -display none -M taihu -drive if=pflash,format=raw,file=eins.img -drive if=pflash,unit=1,format=raw,file=zwei.img

>     qemu-system-ppc64: Initialization of device cfi.pflash02 failed: device requires 2097152 bytes, block backend provides 512 bytes

> 

> Which one's short, eins.img or zwei.img?

> 

> Good enough anyway?




> >> As you can see, the qdev core prefixes the error with "Initialization of

> >> device TYPE-NAME failed: " instead a location.  Better than nothing.

> >> Ambiguous when there's more than one device of this type.

> [...]

> >> I hope you'll understand why I'm declining to drain this swamp right

> >> now.

> >

> > Yes, but I'll add the question if now is really the time to optimise

> > error messages for -drive.

> 

> It isn't, and ...

> 

> >> Naming the block backend helps the user and is simple.  You tell me

> >> it'll break some day (if I understand you correctly).  Pity.  Any other

> >> ideas on how to help the user that don't involve swamp draining?

> >

> > I thought that the very work you're doing right now on pflash is

> > motivated by -blockdev support. The moment you achieve this goal, you'll

> > get an empty string as the block backend name here.

> 

> ... that's exactly why I'm asking for other ideas.

> 

> > Of course, a message like "device requires 1048576 bytes, block backend

> > '' provides 512 bytes" is not the end of the world either. It's just a

> > decision whether our preferred interface with the best error messages is

> > -drive or -blockdev.

> 

> Given the sad state of location tracking, I'm afraid we do need to map

> from BlockBackend to some text that helps the user with finding the

> place to correct the problem.

> 

> Any ideas on that?


If the given BlockBackend isn't empty, you could fall back to the node
name, i.e. bdrv_get_device_or_node_name(blk_bs(blk)).

If you want to report an error because it's empty, I think we actually
know that it's -drive because you can't create a BlockBackend with
-blockdev and flash devices don't create empty BlockBackends either. So
using blk_name() there looks fine.

Maybe we want a BlockBackend level function that returns the
BlockBackend name if it isn't empty, and the root node name otherwise?

Kevin
Markus Armbruster March 19, 2019, 10:34 a.m. UTC | #20
Kevin Wolf <kwolf@redhat.com> writes:

> Am 18.03.2019 um 18:21 hat Markus Armbruster geschrieben:

>> Kevin Wolf <kwolf@redhat.com> writes:

>> 

>> > Am 18.03.2019 um 17:03 hat Markus Armbruster geschrieben:

>> >> Kevin Wolf <kwolf@redhat.com> writes:

>> >> 

>> >> > Am 08.03.2019 um 18:03 hat Markus Armbruster geschrieben:

>> >> >> >> bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size,

>> >> >> >>                                  Error **errp)

>> >> >> >> {

>> >> >> >>     int64_t blk_len;

>> >> >> >>     int ret;

>> >> >> >> 

>> >> >> >>     blk_len = blk_getlength(blk);

>> >> >> >>     if (blk_len < 0) {

>> >> >> >>         error_setg_errno(errp, -blk_len,

>> >> >> >>                          "can't get size of block backend '%s'",

>> >> >> >>                          blk_name(blk));

>> >> >> >>         return false;

>> >> >> >>     }

>> >> >> >>     if (blk_len != size) {

>> >> >> >>         error_setg(errp, "device requires %" PRIu64 " bytes, "

>> >> >> >>                    "block backend '%s' provides %" PRIu64 " bytes",

>> >> >> >>                    size, blk_name(blk), blk_len);

>> >> >> >

>> >> >> > Should size use HWADDR_PRIu?

>> >> >> 

>> >> >> Yes.

>> >> >> 

>> >> >> > I'm not sure if printing the BlockBackend name is a good idea because

>> >> >> > hopefully one day the BlockBackend will be anonymous even for the flash

>> >> >> > devices.

>> >> >> 

>> >> >> Hmm.  Tell me what else I can use to identify the troublemaker to the

>> >> >> user.

>> >> >

>> >> > My hope was that a caller of this would prefix the right context. For

>> >> > example, if the device were created by -device, the error message would

>> >> > be prefixed with the whole "-device driver=pflash...:" string, which

>> >> > gives enough context to the user.

>> >> >

>> >> > Machine code that instantiates the device based on -drive should

>> >> > probably do something similar.

>> >> 

>> >> I'm very much in favor of reporting errors like "where to fix it: what's

>> >> wrong".  Heck, I created infrastructure for it and put it to use.

>> >> Sadly, we're not even close to being able to using it as intended here.

>> >> 

>> >> Ideally, we'd annotate every bit of configuration with its location

>> >> information, and use that for error messages.

>> >> 

>> >> In reality, we make only half-hearted attempts here and there to keep

>> >> location information around.  It doesn't make it to realize():

>> >> 

>> >>     $ qemu-system-ppc64 -S -display none -M sam460ex -drive if=pflash,format=raw,file=1b.img

>> >>     qemu-system-ppc64: Initialization of device cfi.pflash01 failed: device requires 1048576 bytes, block backend 'pflash0' provides 512 bytes

>> >

>> > Good enough even without the 'pflash0' block backend name, honestly. If

>> > I know that QEMU magically creates a block backend named 'pflash0', I

>> > should also be able to figure out where 'cfi.pflash01' comes from.

>> 

>>     $ qemu-system-ppc64 -S -display none -M taihu -drive if=pflash,format=raw,file=eins.img -drive if=pflash,unit=1,format=raw,file=zwei.img

>>     qemu-system-ppc64: Initialization of device cfi.pflash02 failed: device requires 2097152 bytes, block backend provides 512 bytes

>> 

>> Which one's short, eins.img or zwei.img?

>> 

>> Good enough anyway?

>

>

>

>> >> As you can see, the qdev core prefixes the error with "Initialization of

>> >> device TYPE-NAME failed: " instead a location.  Better than nothing.

>> >> Ambiguous when there's more than one device of this type.

>> [...]

>> >> I hope you'll understand why I'm declining to drain this swamp right

>> >> now.

>> >

>> > Yes, but I'll add the question if now is really the time to optimise

>> > error messages for -drive.

>> 

>> It isn't, and ...

>> 

>> >> Naming the block backend helps the user and is simple.  You tell me

>> >> it'll break some day (if I understand you correctly).  Pity.  Any other

>> >> ideas on how to help the user that don't involve swamp draining?

>> >

>> > I thought that the very work you're doing right now on pflash is

>> > motivated by -blockdev support. The moment you achieve this goal, you'll

>> > get an empty string as the block backend name here.

>> 

>> ... that's exactly why I'm asking for other ideas.

>> 

>> > Of course, a message like "device requires 1048576 bytes, block backend

>> > '' provides 512 bytes" is not the end of the world either. It's just a

>> > decision whether our preferred interface with the best error messages is

>> > -drive or -blockdev.

>> 

>> Given the sad state of location tracking, I'm afraid we do need to map

>> from BlockBackend to some text that helps the user with finding the

>> place to correct the problem.

>> 

>> Any ideas on that?

>

> If the given BlockBackend isn't empty, you could fall back to the node

> name, i.e. bdrv_get_device_or_node_name(blk_bs(blk)).

>

> If you want to report an error because it's empty, I think we actually

> know that it's -drive because you can't create a BlockBackend with

> -blockdev and flash devices don't create empty BlockBackends either. So

> using blk_name() there looks fine.


Now I'm confused.  You just convinced me I need more than blk_name().
Hmm, testing...

    $ qemu-system-x86_64 -nodefaults -S -display none -blockdev node-name=pflash0,driver=file,filename=eio.img -machine pflash0=pflash0
    qemu-system-x86_64: Initialization of device cfi.pflash01 failed: can't read block backend '': Input/output error

Yes, I need more.

> Maybe we want a BlockBackend level function that returns the

> BlockBackend name if it isn't empty, and the root node name otherwise?


Makes sense to me.

What about calling it blk_name()?  ;-P

This quick voodoo patch crashes:

diff --git a/block/block-backend.c b/block/block-backend.c
index edad02a0f2..3c2527f9c0 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -642,7 +642,7 @@ void monitor_remove_blk(BlockBackend *blk)
  */
 const char *blk_name(const BlockBackend *blk)
 {
-    return blk->name ?: "";
+    return blk->name ?: bdrv_get_device_or_node_name(blk_bs(blk));
 }
 
 /*

It's also not const-correct.
Kevin Wolf March 19, 2019, 10:46 a.m. UTC | #21
Am 19.03.2019 um 11:34 hat Markus Armbruster geschrieben:
> Kevin Wolf <kwolf@redhat.com> writes:

> 

> > Am 18.03.2019 um 18:21 hat Markus Armbruster geschrieben:

> >> Kevin Wolf <kwolf@redhat.com> writes:

> >> 

> >> > Am 18.03.2019 um 17:03 hat Markus Armbruster geschrieben:

> >> >> Kevin Wolf <kwolf@redhat.com> writes:

> >> >> 

> >> >> > Am 08.03.2019 um 18:03 hat Markus Armbruster geschrieben:

> >> >> >> >> bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size,

> >> >> >> >>                                  Error **errp)

> >> >> >> >> {

> >> >> >> >>     int64_t blk_len;

> >> >> >> >>     int ret;

> >> >> >> >> 

> >> >> >> >>     blk_len = blk_getlength(blk);

> >> >> >> >>     if (blk_len < 0) {

> >> >> >> >>         error_setg_errno(errp, -blk_len,

> >> >> >> >>                          "can't get size of block backend '%s'",

> >> >> >> >>                          blk_name(blk));

> >> >> >> >>         return false;

> >> >> >> >>     }

> >> >> >> >>     if (blk_len != size) {

> >> >> >> >>         error_setg(errp, "device requires %" PRIu64 " bytes, "

> >> >> >> >>                    "block backend '%s' provides %" PRIu64 " bytes",

> >> >> >> >>                    size, blk_name(blk), blk_len);

> >> >> >> >

> >> >> >> > Should size use HWADDR_PRIu?

> >> >> >> 

> >> >> >> Yes.

> >> >> >> 

> >> >> >> > I'm not sure if printing the BlockBackend name is a good idea because

> >> >> >> > hopefully one day the BlockBackend will be anonymous even for the flash

> >> >> >> > devices.

> >> >> >> 

> >> >> >> Hmm.  Tell me what else I can use to identify the troublemaker to the

> >> >> >> user.

> >> >> >

> >> >> > My hope was that a caller of this would prefix the right context. For

> >> >> > example, if the device were created by -device, the error message would

> >> >> > be prefixed with the whole "-device driver=pflash...:" string, which

> >> >> > gives enough context to the user.

> >> >> >

> >> >> > Machine code that instantiates the device based on -drive should

> >> >> > probably do something similar.

> >> >> 

> >> >> I'm very much in favor of reporting errors like "where to fix it: what's

> >> >> wrong".  Heck, I created infrastructure for it and put it to use.

> >> >> Sadly, we're not even close to being able to using it as intended here.

> >> >> 

> >> >> Ideally, we'd annotate every bit of configuration with its location

> >> >> information, and use that for error messages.

> >> >> 

> >> >> In reality, we make only half-hearted attempts here and there to keep

> >> >> location information around.  It doesn't make it to realize():

> >> >> 

> >> >>     $ qemu-system-ppc64 -S -display none -M sam460ex -drive if=pflash,format=raw,file=1b.img

> >> >>     qemu-system-ppc64: Initialization of device cfi.pflash01 failed: device requires 1048576 bytes, block backend 'pflash0' provides 512 bytes

> >> >

> >> > Good enough even without the 'pflash0' block backend name, honestly. If

> >> > I know that QEMU magically creates a block backend named 'pflash0', I

> >> > should also be able to figure out where 'cfi.pflash01' comes from.

> >> 

> >>     $ qemu-system-ppc64 -S -display none -M taihu -drive if=pflash,format=raw,file=eins.img -drive if=pflash,unit=1,format=raw,file=zwei.img

> >>     qemu-system-ppc64: Initialization of device cfi.pflash02 failed: device requires 2097152 bytes, block backend provides 512 bytes

> >> 

> >> Which one's short, eins.img or zwei.img?

> >> 

> >> Good enough anyway?

> >

> >

> >

> >> >> As you can see, the qdev core prefixes the error with "Initialization of

> >> >> device TYPE-NAME failed: " instead a location.  Better than nothing.

> >> >> Ambiguous when there's more than one device of this type.

> >> [...]

> >> >> I hope you'll understand why I'm declining to drain this swamp right

> >> >> now.

> >> >

> >> > Yes, but I'll add the question if now is really the time to optimise

> >> > error messages for -drive.

> >> 

> >> It isn't, and ...

> >> 

> >> >> Naming the block backend helps the user and is simple.  You tell me

> >> >> it'll break some day (if I understand you correctly).  Pity.  Any other

> >> >> ideas on how to help the user that don't involve swamp draining?

> >> >

> >> > I thought that the very work you're doing right now on pflash is

> >> > motivated by -blockdev support. The moment you achieve this goal, you'll

> >> > get an empty string as the block backend name here.

> >> 

> >> ... that's exactly why I'm asking for other ideas.

> >> 

> >> > Of course, a message like "device requires 1048576 bytes, block backend

> >> > '' provides 512 bytes" is not the end of the world either. It's just a

> >> > decision whether our preferred interface with the best error messages is

> >> > -drive or -blockdev.

> >> 

> >> Given the sad state of location tracking, I'm afraid we do need to map

> >> from BlockBackend to some text that helps the user with finding the

> >> place to correct the problem.

> >> 

> >> Any ideas on that?

> >

> > If the given BlockBackend isn't empty, you could fall back to the node

> > name, i.e. bdrv_get_device_or_node_name(blk_bs(blk)).

> >

> > If you want to report an error because it's empty, I think we actually

> > know that it's -drive because you can't create a BlockBackend with

> > -blockdev and flash devices don't create empty BlockBackends either. So

> > using blk_name() there looks fine.

> 

> Now I'm confused.  You just convinced me I need more than blk_name().

> Hmm, testing...

> 

>     $ qemu-system-x86_64 -nodefaults -S -display none -blockdev node-name=pflash0,driver=file,filename=eio.img -machine pflash0=pflash0

>     qemu-system-x86_64: Initialization of device cfi.pflash01 failed: can't read block backend '': Input/output error

> 

> Yes, I need more.


This is not an empty BlockBackend, it has a root node inserted. You do
need more for non-empty BlockBackends.

> > Maybe we want a BlockBackend level function that returns the

> > BlockBackend name if it isn't empty, and the root node name otherwise?

> 

> Makes sense to me.

> 

> What about calling it blk_name()?  ;-P

> 

> This quick voodoo patch crashes:

> 

> diff --git a/block/block-backend.c b/block/block-backend.c

> index edad02a0f2..3c2527f9c0 100644

> --- a/block/block-backend.c

> +++ b/block/block-backend.c

> @@ -642,7 +642,7 @@ void monitor_remove_blk(BlockBackend *blk)

>   */

>  const char *blk_name(const BlockBackend *blk)

>  {

> -    return blk->name ?: "";

> +    return blk->name ?: bdrv_get_device_or_node_name(blk_bs(blk));

>  }


I don't know the backtrace of your crash, but I assume it is because
blk_name() is called for an empty BlockBackend, so blk_bs(blk) == NULL.
(By the way, it could be just bdrv_get_node_name() in this context,
because we already know that the device name doesn't exist, but that one
doesn't like NULL either.)

Either this assumption is wrong, or my analysis that flash devices never
have empty BlockBackends attached was wrong, or this is a call from a
different place and a new function called only from flash instead of
changing blk_name() for all callers might actually have worked.

Kevin
Markus Armbruster March 19, 2019, 1:25 p.m. UTC | #22
Kevin Wolf <kwolf@redhat.com> writes:

> Am 19.03.2019 um 11:34 hat Markus Armbruster geschrieben:

>> Kevin Wolf <kwolf@redhat.com> writes:

>> 

>> > Am 18.03.2019 um 18:21 hat Markus Armbruster geschrieben:

[...]
>> >> Given the sad state of location tracking, I'm afraid we do need to map

>> >> from BlockBackend to some text that helps the user with finding the

>> >> place to correct the problem.

>> >> 

>> >> Any ideas on that?

>> >

>> > If the given BlockBackend isn't empty, you could fall back to the node

>> > name, i.e. bdrv_get_device_or_node_name(blk_bs(blk)).

>> >

>> > If you want to report an error because it's empty, I think we actually

>> > know that it's -drive because you can't create a BlockBackend with

>> > -blockdev and flash devices don't create empty BlockBackends either. So

>> > using blk_name() there looks fine.

>> 

>> Now I'm confused.  You just convinced me I need more than blk_name().

>> Hmm, testing...

>> 

>>     $ qemu-system-x86_64 -nodefaults -S -display none -blockdev node-name=pflash0,driver=file,filename=eio.img -machine pflash0=pflash0

>>     qemu-system-x86_64: Initialization of device cfi.pflash01 failed: can't read block backend '': Input/output error

>> 

>> Yes, I need more.

>

> This is not an empty BlockBackend, it has a root node inserted. You do

> need more for non-empty BlockBackends.

>

>> > Maybe we want a BlockBackend level function that returns the

>> > BlockBackend name if it isn't empty, and the root node name otherwise?

>> 

>> Makes sense to me.

>> 

>> What about calling it blk_name()?  ;-P

>> 

>> This quick voodoo patch crashes:

>> 

>> diff --git a/block/block-backend.c b/block/block-backend.c

>> index edad02a0f2..3c2527f9c0 100644

>> --- a/block/block-backend.c

>> +++ b/block/block-backend.c

>> @@ -642,7 +642,7 @@ void monitor_remove_blk(BlockBackend *blk)

>>   */

>>  const char *blk_name(const BlockBackend *blk)

>>  {

>> -    return blk->name ?: "";

>> +    return blk->name ?: bdrv_get_device_or_node_name(blk_bs(blk));

>>  }

>

> I don't know the backtrace of your crash, but I assume it is because

> blk_name() is called for an empty BlockBackend, so blk_bs(blk) == NULL.


It's infinite recursion, actually.

    [Many stackframes snipped...]

Note blk=0x555556a7abb0

    #232830 0x0000555555d7d396 in blk_name (blk=0x555556a7abb0) at /work/armbru/qemu/block/block-backend.c:645
    #232831 0x0000555555d7c480 in blk_root_get_name (child=0x55555690f140) at /work/armbru/qemu/block/block-backend.c:148
    #232832 0x0000555555d1df59 in bdrv_get_parent_name (bs=0x555556a75690) at /work/armbru/qemu/block.c:4825
    #232833 0x0000555555d1dfcd in bdrv_get_device_or_node_name (bs=0x555556a75690) at /work/armbru/qemu/block.c:4847

Same blk=0x555556a7abb0

    #232834 0x0000555555d7d396 in blk_name (blk=0x555556a7abb0) at /work/armbru/qemu/block/block-backend.c:645
    #232835 0x0000555555aa5c61 in blk_check_size_and_read_all (blk=0x555556a7abb0, buf=0x7fffcde00000, size=131072, errp=0x7fffffffd760) at /work/armbru/qemu/hw/block/block.c:42
    #232836 0x0000555555aae61e in pflash_cfi01_realize (dev=0x555556a65f00, errp=0x7fffffffd760) at /work/armbru/qemu/hw/block/pflash_cfi01.c:760
    #232837 0x0000555555acf97d in device_set_realized (obj=0x555556a65f00, value=true, errp=0x7fffffffd920) at /work/armbru/qemu/hw/core/qdev.c:834
    #232838 0x0000555555d10324 in property_set_bool (obj=0x555556a65f00, v=0x555556bb7c60, name=0x555555fdc849 "realized", opaque=0x555556a66450, errp=0x7fffffffd920) at /work/armbru/qemu/qom/object.c:2074
    #232839 0x0000555555d0e59a in object_property_set (obj=0x555556a65f00, v=0x555556bb7c60, name=0x555555fdc849 "realized", errp=0x7fffffffd920) at /work/armbru/qemu/qom/object.c:1266
    #232840 0x0000555555d1166c in object_property_set_qobject (obj=0x555556a65f00, value=0x555556bb7c40, name=0x555555fdc849 "realized", errp=0x7fffffffd920) at /work/armbru/qemu/qom/qom-qobject.c:27
    #232841 0x0000555555d0e87f in object_property_set_bool (obj=0x555556a65f00, value=true, name=0x555555fdc849 "realized", errp=0x7fffffffd920) at /work/armbru/qemu/qom/object.c:1332
    #232842 0x0000555555ace64f in qdev_init_nofail (dev=0x555556a65f00) at /work/armbru/qemu/hw/core/qdev.c:321
    #232843 0x000055555596b5d0 in pc_system_flash_map (pcms=0x555556a38260, rom_memory=0x555556913f20) at /work/armbru/qemu/hw/i386/pc_sysfw.c:192
    #232844 0x000055555596bb1e in pc_system_firmware_init (pcms=0x555556a38260, rom_memory=0x555556913f20) at /work/armbru/qemu/hw/i386/pc_sysfw.c:322
    #232845 0x0000555555962876 in pc_memory_init (pcms=0x555556a38260, system_memory=0x5555569e1300, rom_memory=0x555556913f20, ram_memory=0x7fffffffdb28) at /work/armbru/qemu/hw/i386/pc.c:1812
    #232846 0x0000555555966176 in pc_init1 (machine=0x555556a38260, host_type=0x555555f9e52c "i440FX-pcihost", pci_type=0x555555f9e525 "i440FX") at /work/armbru/qemu/hw/i386/pc_piix.c:181
    #232847 0x0000555555966cee in pc_init_v4_0 (machine=0x555556a38260) at /work/armbru/qemu/hw/i386/pc_piix.c:438
    #232848 0x0000555555ad83b1 in machine_run_board_init (machine=0x555556a38260) at /work/armbru/qemu/hw/core/machine.c:1030
    #232849 0x0000555555a328d6 in main (argc=9, argv=0x7fffffffdf58, envp=0x7fffffffdfa8) at /work/armbru/qemu/vl.c:4463

> (By the way, it could be just bdrv_get_node_name() in this context,

> because we already know that the device name doesn't exist, but that one

> doesn't like NULL either.)


If I do that, no crash, and the error message looks decent.

> Either this assumption is wrong, or my analysis that flash devices never

> have empty BlockBackends attached was wrong, or this is a call from a

> different place and a new function called only from flash instead of

> changing blk_name() for all callers might actually have worked.


I think the remaining (and non-trivial) question is what blk_name() is
supposed to do.

Back when I created it, BlockBackend had a somewhat different role, and
blk_name() always returned a non-null, non-empty string.

(Except for a wart: the name becomes empty on HMP drive_del, but that
doesn't really matter here).

blk_name() changed from "you can rely on it to give you a name" to
"maybe it gives you a name, maybe not" in commit e5e785500bf "blockdev:
Separate BB name management".

Should we change it back to "can rely on it"?
Kevin Wolf March 19, 2019, 1:55 p.m. UTC | #23
Am 19.03.2019 um 14:25 hat Markus Armbruster geschrieben:
> >> > Maybe we want a BlockBackend level function that returns the

> >> > BlockBackend name if it isn't empty, and the root node name otherwise?

> >> 

> >> Makes sense to me.

> >> 

> >> What about calling it blk_name()?  ;-P

> >> 

> >> This quick voodoo patch crashes:

> >> 

> >> diff --git a/block/block-backend.c b/block/block-backend.c

> >> index edad02a0f2..3c2527f9c0 100644

> >> --- a/block/block-backend.c

> >> +++ b/block/block-backend.c

> >> @@ -642,7 +642,7 @@ void monitor_remove_blk(BlockBackend *blk)

> >>   */

> >>  const char *blk_name(const BlockBackend *blk)

> >>  {

> >> -    return blk->name ?: "";

> >> +    return blk->name ?: bdrv_get_device_or_node_name(blk_bs(blk));

> >>  }

> >

> > I don't know the backtrace of your crash, but I assume it is because

> > blk_name() is called for an empty BlockBackend, so blk_bs(blk) == NULL.

> 

> It's infinite recursion, actually.


Ah, yes, makes sense.

> > (By the way, it could be just bdrv_get_node_name() in this context,

> > because we already know that the device name doesn't exist, but that one

> > doesn't like NULL either.)

> 

> If I do that, no crash, and the error message looks decent.


Leaves the question whether blk_name() can ever be called for an
anonymous empty BlockBackend. If it can, we must explicitly handle that
case because it would crash with that code.

> > Either this assumption is wrong, or my analysis that flash devices never

> > have empty BlockBackends attached was wrong, or this is a call from a

> > different place and a new function called only from flash instead of

> > changing blk_name() for all callers might actually have worked.

> 

> I think the remaining (and non-trivial) question is what blk_name() is

> supposed to do.

> 

> Back when I created it, BlockBackend had a somewhat different role, and

> blk_name() always returned a non-null, non-empty string.

> 

> (Except for a wart: the name becomes empty on HMP drive_del, but that

> doesn't really matter here).

> 

> blk_name() changed from "you can rely on it to give you a name" to

> "maybe it gives you a name, maybe not" in commit e5e785500bf "blockdev:

> Separate BB name management".

> 

> Should we change it back to "can rely on it"?


You can change it to "returns a non-empty string for BlockBackends that
aren't both anonymous and empty". This doesn't sound like much of a
simplification to me. If you want to make it "returns a non-empty
string, period", you need to figure out what to do with anonymous
empty BlockBackends.

You have to consider a few more things if you want it to return some
meaningful string instead of just a string. Currently, if it returns a
non-empty string, you will get the BlockBackend back when you call
blk_by_name() with this string. If you start returning node names from
blk_name(), but leave blk_by_name() unchanged, you don't know whether
you'll get a BlockBackend when you call blk_by_name().

If you do change both functions, in the context of blk_by_name(), a
BlockBackend won't have a single name any more, but you can identify it
both by its actual name and the node name of its root node (if present).

I'll stop here, but making this change feels like it could have many
implications. None of these implications look actually bad, but in order
to stay consistent, a lot of places need to be changed. I'm not sure if
it's worth the effort.

Kevin
Markus Armbruster March 19, 2019, 3:34 p.m. UTC | #24
Kevin Wolf <kwolf@redhat.com> writes:

> Am 19.03.2019 um 14:25 hat Markus Armbruster geschrieben:

>> Kevin Wolf <kwolf@redhat.com> writes:

>> 

>> > Am 19.03.2019 um 11:34 hat Markus Armbruster geschrieben:

>> >> Kevin Wolf <kwolf@redhat.com> writes:

>> >> 

>> >> > Am 18.03.2019 um 18:21 hat Markus Armbruster geschrieben:

>> [...]

>> >> >> Given the sad state of location tracking, I'm afraid we do need to map

>> >> >> from BlockBackend to some text that helps the user with finding the

>> >> >> place to correct the problem.

>> >> >> 

>> >> >> Any ideas on that?

>> >> >

>> >> > If the given BlockBackend isn't empty, you could fall back to the node

>> >> > name, i.e. bdrv_get_device_or_node_name(blk_bs(blk)).

>> >> >

>> >> > If you want to report an error because it's empty, I think we actually

>> >> > know that it's -drive because you can't create a BlockBackend with

>> >> > -blockdev and flash devices don't create empty BlockBackends either. So

>> >> > using blk_name() there looks fine.

>> >> 

>> >> Now I'm confused.  You just convinced me I need more than blk_name().

>> >> Hmm, testing...

>> >> 

>> >>     $ qemu-system-x86_64 -nodefaults -S -display none -blockdev node-name=pflash0,driver=file,filename=eio.img -machine pflash0=pflash0

>> >>     qemu-system-x86_64: Initialization of device cfi.pflash01 failed: can't read block backend '': Input/output error

>> >> 

>> >> Yes, I need more.

>> >

>> > This is not an empty BlockBackend, it has a root node inserted. You do

>> > need more for non-empty BlockBackends.

>> >

>> >> > Maybe we want a BlockBackend level function that returns the

>> >> > BlockBackend name if it isn't empty, and the root node name otherwise?

>> >> 

>> >> Makes sense to me.


Anchor [*] for later.

>> >> What about calling it blk_name()?  ;-P

>> >> 

>> >> This quick voodoo patch crashes:

>> >> 

>> >> diff --git a/block/block-backend.c b/block/block-backend.c

>> >> index edad02a0f2..3c2527f9c0 100644

>> >> --- a/block/block-backend.c

>> >> +++ b/block/block-backend.c

>> >> @@ -642,7 +642,7 @@ void monitor_remove_blk(BlockBackend *blk)

>> >>   */

>> >>  const char *blk_name(const BlockBackend *blk)

>> >>  {

>> >> -    return blk->name ?: "";

>> >> +    return blk->name ?: bdrv_get_device_or_node_name(blk_bs(blk));

>> >>  }

>> >

>> > I don't know the backtrace of your crash, but I assume it is because

>> > blk_name() is called for an empty BlockBackend, so blk_bs(blk) == NULL.

>> 

>> It's infinite recursion, actually.

>

> Ah, yes, makes sense.

>

>> > (By the way, it could be just bdrv_get_node_name() in this context,

>> > because we already know that the device name doesn't exist, but that one

>> > doesn't like NULL either.)

>> 

>> If I do that, no crash, and the error message looks decent.

>

> Leaves the question whether blk_name() can ever be called for an

> anonymous empty BlockBackend. If it can, we must explicitly handle that

> case because it would crash with that code.


Outcome of the discussion below: we should not change blk_name().  You can
call the unchanged blk_name() safely for any BlockBackend, but its value
is useful only for named ones.

Leaves it unable to fill my need, so I circle back to [*]: maybe we want
a BlockBackend level function that returns some text that helps the user
with finding the place to correct the problem.

We'd still have to figure out when it could used safely, and when its
value would actually be useful.

I'm afraid I've fallen behind too far on block layer evolution to
actually code and document this function without a lot of handholding.
Perhaps you doing it would be less work for you.

Note that quite a few existing uses of blk_name() are for error messages
and such.  These are actually wrong (and getting wronger with -blockdev
becoming used more widely).  Fixing them would be a nice way to
introduce the function.

>> > Either this assumption is wrong, or my analysis that flash devices never

>> > have empty BlockBackends attached was wrong, or this is a call from a

>> > different place and a new function called only from flash instead of

>> > changing blk_name() for all callers might actually have worked.

>> 

>> I think the remaining (and non-trivial) question is what blk_name() is

>> supposed to do.

>> 

>> Back when I created it, BlockBackend had a somewhat different role, and

>> blk_name() always returned a non-null, non-empty string.

>> 

>> (Except for a wart: the name becomes empty on HMP drive_del, but that

>> doesn't really matter here).

>> 

>> blk_name() changed from "you can rely on it to give you a name" to

>> "maybe it gives you a name, maybe not" in commit e5e785500bf "blockdev:

>> Separate BB name management".

>> 

>> Should we change it back to "can rely on it"?

>

> You can change it to "returns a non-empty string for BlockBackends that

> aren't both anonymous and empty". This doesn't sound like much of a

> simplification to me. If you want to make it "returns a non-empty

> string, period", you need to figure out what to do with anonymous

> empty BlockBackends.

>

> You have to consider a few more things if you want it to return some

> meaningful string instead of just a string. Currently, if it returns a

> non-empty string, you will get the BlockBackend back when you call

> blk_by_name() with this string. If you start returning node names from

> blk_name(), but leave blk_by_name() unchanged, you don't know whether

> you'll get a BlockBackend when you call blk_by_name().

>

> If you do change both functions, in the context of blk_by_name(), a

> BlockBackend won't have a single name any more, but you can identify it

> both by its actual name and the node name of its root node (if present).

>

> I'll stop here, but making this change feels like it could have many

> implications. None of these implications look actually bad, but in order

> to stay consistent, a lot of places need to be changed. I'm not sure if

> it's worth the effort.


You're right, this is a swamp we should not enter.
diff mbox series

Patch

diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index 9d1c356eb6..2e3a05c0b3 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -730,13 +730,6 @@  static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
     }
     device_len = sector_len_per_device * blocks_per_device;
 
-    /* XXX: to be fixed */
-#if 0
-    if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) &&
-        total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024))
-        return NULL;
-#endif
-
     memory_region_init_rom_device(
         &pfl->mem, OBJECT(dev),
         &pflash_cfi01_ops,
@@ -763,12 +756,32 @@  static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
     }
 
     if (pfl->blk) {
+        /*
+         * Validate the backing store is the right size for pflash
+         * devices. If the user supplies a larger file we ignore the
+         * tail.
+         */
+        int64_t backing_len = blk_getlength(pfl->blk);
+
+        if (backing_len < 0) {
+            error_setg(errp, "can't get size of block backend '%s'",
+                       blk_name(pfl->blk));
+            return;
+        }
+        if (backing_len != total_len) {
+            error_setg(errp, "device requires %" PRIu64 " bytes, "
+                       "block backend '%s' provides %" PRIu64 " bytes",
+                       total_len, blk_name(pfl->blk), backing_len);
+            return;
+        }
+
         /* read the initial flash content */
         ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);
-
         if (ret < 0) {
             vmstate_unregister_ram(&pfl->mem, DEVICE(pfl));
-            error_setg(errp, "failed to read the initial flash content");
+            error_setg(errp, "can't read initial flash content"
+                       " from block backend '%s'",
+                       blk_name(pfl->blk));
             return;
         }
     }
diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c
index 33779ce807..45b88d65d8 100644
--- a/hw/block/pflash_cfi02.c
+++ b/hw/block/pflash_cfi02.c
@@ -550,12 +550,6 @@  static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
     }
 
     chip_len = pfl->sector_len * pfl->nb_blocs;
-    /* XXX: to be fixed */
-#if 0
-    if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) &&
-        total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024))
-        return NULL;
-#endif
 
     memory_region_init_rom_device(&pfl->orig_mem, OBJECT(pfl), pfl->be ?
                                   &pflash_cfi02_ops_be : &pflash_cfi02_ops_le,
@@ -581,11 +575,32 @@  static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
     }
 
     if (pfl->blk) {
+        /*
+         * Validate the backing store is the right size for pflash
+         * devices. If the user supplies a larger file we ignore the
+         * tail.
+         */
+        int64_t backing_len = blk_getlength(pfl->blk);
+
+        if (backing_len < 0) {
+            error_setg(errp, "can't get size of block backend '%s'",
+                       blk_name(pfl->blk));
+            return;
+        }
+        if (backing_len != chip_len) {
+            error_setg(errp, "device requires %" PRIu32 " bytes, "
+                       "block backend '%s' provides %" PRIu64 " bytes",
+                       chip_len, blk_name(pfl->blk), backing_len);
+            return;
+        }
+
         /* read the initial flash content */
         ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len);
         if (ret < 0) {
-            vmstate_unregister_ram(&pfl->orig_mem, DEVICE(pfl));
-            error_setg(errp, "failed to read the initial flash content");
+            vmstate_unregister_ram(&pfl->mem, DEVICE(pfl));
+            error_setg(errp, "can't read initial flash content"
+                       " from block backend '%s'",
+                       blk_name(pfl->blk));
             return;
         }
     }