diff mbox series

[07/14] block/blklogwrites: drop error propagation

Message ID 20200909185930.26524-8-vsementsov@virtuozzo.com
State New
Headers show
Series block: deal with errp: part I | expand

Commit Message

Vladimir Sementsov-Ogievskiy Sept. 9, 2020, 6:59 p.m. UTC
It's simple to avoid error propagation in blk_log_writes_open(), we
just need to refactor blk_log_writes_find_cur_log_sector() a bit.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/blklogwrites.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

Comments

Ari Sundholm Sept. 10, 2020, 9:01 p.m. UTC | #1
Hi Vladimir!

Thank you for working on this. My comments below.

On 9/9/20 9:59 PM, Vladimir Sementsov-Ogievskiy wrote:
> It's simple to avoid error propagation in blk_log_writes_open(), we
> just need to refactor blk_log_writes_find_cur_log_sector() a bit.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>   block/blklogwrites.c | 23 +++++++++++------------
>   1 file changed, 11 insertions(+), 12 deletions(-)
> 
> diff --git a/block/blklogwrites.c b/block/blklogwrites.c
> index 7ef046cee9..c7da507b2d 100644
> --- a/block/blklogwrites.c
> +++ b/block/blklogwrites.c
> @@ -96,10 +96,10 @@ static inline bool blk_log_writes_sector_size_valid(uint32_t sector_size)
>           sector_size < (1ull << 24);
>   }
>   
> -static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,
> -                                                   uint32_t sector_size,
> -                                                   uint64_t nr_entries,
> -                                                   Error **errp)
> +static int64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,

I'd rather not change the return type for reasons detailed below.

> +                                                  uint32_t sector_size,
> +                                                  uint64_t nr_entries,
> +                                                  Error **errp)
>   {
>       uint64_t cur_sector = 1;
>       uint64_t cur_idx = 0;
> @@ -112,13 +112,13 @@ static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,
>           if (read_ret < 0) {
>               error_setg_errno(errp, -read_ret,
>                                "Failed to read log entry %"PRIu64, cur_idx);
> -            return (uint64_t)-1ull;
> +            return read_ret;

This is OK, provided the change in return type signedness is necessary 
in the first place.

>           }
>   
>           if (cur_entry.flags & ~cpu_to_le64(LOG_FLAG_MASK)) {
>               error_setg(errp, "Invalid flags 0x%"PRIx64" in log entry %"PRIu64,
>                          le64_to_cpu(cur_entry.flags), cur_idx);
> -            return (uint64_t)-1ull;
> +            return -EINVAL;

This is OK, provided the return type signedness change is necessary, 
although we already do have errp to indicate any errors.

>           }
>   
>           /* Account for the sector of the entry itself */
> @@ -143,7 +143,6 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
>   {
>       BDRVBlkLogWritesState *s = bs->opaque;
>       QemuOpts *opts;
> -    Error *local_err = NULL;
>       int ret;
>       uint64_t log_sector_size;
>       bool log_append;
> @@ -215,15 +214,15 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
>           s->nr_entries = 0;
>   
>           if (blk_log_writes_sector_size_valid(log_sector_size)) {
> -            s->cur_log_sector =
> +            int64_t cur_log_sector =
>                   blk_log_writes_find_cur_log_sector(s->log_file, log_sector_size,
> -                                    le64_to_cpu(log_sb.nr_entries), &local_err);
> -            if (local_err) {
> -                ret = -EINVAL;
> -                error_propagate(errp, local_err);
> +                                    le64_to_cpu(log_sb.nr_entries), errp);
> +            if (cur_log_sector < 0) {
> +                ret = cur_log_sector;

This changes the semantics slightly. Changing the return type to int64 
may theoretically cause valid return values to be interpreted as errors. 
Since we already do have a dedicated out pointer for the error value 
(errp), why not use it?

Assigning cur_log_sector to ret looks a bit odd to me. Why not use the 
original -EINVAL - or maybe some more appropriate errno value than -EINVAL?

I think the desired effect of this change could be achieved with less 
churn. How about just making just the following change in addition to 
adding ERRP_GUARD() to the beginning of the function and getting rid of 
local_err:

-                                    le64_to_cpu(log_sb.nr_entries), 
&local_err);
+                                    le64_to_cpu(log_sb.nr_entries), errp);
-            if (local_err) {
+            if (*errp) {
                  ret = -EINVAL;
-                error_propagate(errp, local_err);
>                   goto fail_log;
>               }
>   
> +            s->cur_log_sector = cur_log_sector;
>               s->nr_entries = le64_to_cpu(log_sb.nr_entries);
>           }
>       } else {
> 

Best regards,
Ari Sundholm
ari@tuxera.com
Markus Armbruster Sept. 11, 2020, 5:23 a.m. UTC | #2
Greg Kurz <groug@kaod.org> writes:

> On Wed,  9 Sep 2020 21:59:23 +0300
> Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> wrote:
>
>> It's simple to avoid error propagation in blk_log_writes_open(), we
>> just need to refactor blk_log_writes_find_cur_log_sector() a bit.
>> 
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>>  block/blklogwrites.c | 23 +++++++++++------------
>>  1 file changed, 11 insertions(+), 12 deletions(-)
>> 
>> diff --git a/block/blklogwrites.c b/block/blklogwrites.c
>> index 7ef046cee9..c7da507b2d 100644
>> --- a/block/blklogwrites.c
>> +++ b/block/blklogwrites.c
>> @@ -96,10 +96,10 @@ static inline bool blk_log_writes_sector_size_valid(uint32_t sector_size)
>>          sector_size < (1ull << 24);
>>  }
>>  
>> -static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,
>> -                                                   uint32_t sector_size,
>> -                                                   uint64_t nr_entries,
>> -                                                   Error **errp)
>> +static int64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,
>> +                                                  uint32_t sector_size,
>> +                                                  uint64_t nr_entries,
>> +                                                  Error **errp)
>>  {
>>      uint64_t cur_sector = 1;
>>      uint64_t cur_idx = 0;
>> @@ -112,13 +112,13 @@ static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,
>>          if (read_ret < 0) {
>>              error_setg_errno(errp, -read_ret,
>>                               "Failed to read log entry %"PRIu64, cur_idx);
>> -            return (uint64_t)-1ull;
>> +            return read_ret;
>
> This changes the error status of blk_log_writes_open() from -EINVAL to
> whatever is returned by bdrv_pread(). I guess this is an improvement
> worth to be mentioned in the changelog.
>
>>          }
>>  
>>          if (cur_entry.flags & ~cpu_to_le64(LOG_FLAG_MASK)) {
>>              error_setg(errp, "Invalid flags 0x%"PRIx64" in log entry %"PRIu64,
>>                         le64_to_cpu(cur_entry.flags), cur_idx);
>> -            return (uint64_t)-1ull;
>> +            return -EINVAL;
>>          }
>>  
>>          /* Account for the sector of the entry itself */
>> @@ -143,7 +143,6 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
>>  {
>>      BDRVBlkLogWritesState *s = bs->opaque;
>>      QemuOpts *opts;
>> -    Error *local_err = NULL;
>>      int ret;
>>      uint64_t log_sector_size;
>>      bool log_append;
>> @@ -215,15 +214,15 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
>>          s->nr_entries = 0;
>>  
>>          if (blk_log_writes_sector_size_valid(log_sector_size)) {
>> -            s->cur_log_sector =
>> +            int64_t cur_log_sector =
>>                  blk_log_writes_find_cur_log_sector(s->log_file, log_sector_size,
>> -                                    le64_to_cpu(log_sb.nr_entries), &local_err);
>> -            if (local_err) {
>> -                ret = -EINVAL;
>> -                error_propagate(errp, local_err);
>> +                                    le64_to_cpu(log_sb.nr_entries), errp);
>> +            if (cur_log_sector < 0) {
>> +                ret = cur_log_sector;
>
> This is converting int64_t to int, which is usually not recommended.
> In practice this is probably okay because cur_log_sector is supposed
> to be a negative errno (ie. an int) in this case.

It isn't: blk_log_writes_find_cur_log_sector() returns (uint64_t)-1ull
on error.

Aside: returning uint64_t is awkward.  We commonly use a signed integer
for non-negative offset or negative error.

> Maybe make this clear with a an assert(cur_log_sector >= INT_MIN) ?

Unless we change blk_log_writes_find_cur_log_sector() to return a
negative errno code, we need to ret = -EINVAL here.  Let's keep this
series simple.

>>                  goto fail_log;
>>              }
>>  
>> +            s->cur_log_sector = cur_log_sector;
>>              s->nr_entries = le64_to_cpu(log_sb.nr_entries);
>>          }
>>      } else {
Markus Armbruster Sept. 11, 2020, 8:03 a.m. UTC | #3
Ari Sundholm <ari@tuxera.com> writes:

> Hi Vladimir!
>
> Thank you for working on this. My comments below.
>
> On 9/9/20 9:59 PM, Vladimir Sementsov-Ogievskiy wrote:
>> It's simple to avoid error propagation in blk_log_writes_open(), we
>> just need to refactor blk_log_writes_find_cur_log_sector() a bit.
>> Signed-off-by: Vladimir Sementsov-Ogievskiy
>> <vsementsov@virtuozzo.com>
>> ---
>>   block/blklogwrites.c | 23 +++++++++++------------
>>   1 file changed, 11 insertions(+), 12 deletions(-)
>> diff --git a/block/blklogwrites.c b/block/blklogwrites.c
>> index 7ef046cee9..c7da507b2d 100644
>> --- a/block/blklogwrites.c
>> +++ b/block/blklogwrites.c
>> @@ -96,10 +96,10 @@ static inline bool blk_log_writes_sector_size_valid(uint32_t sector_size)
>>           sector_size < (1ull << 24);
>>   }
>>   -static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild
>> *log,
>> -                                                   uint32_t sector_size,
>> -                                                   uint64_t nr_entries,
>> -                                                   Error **errp)
>> +static int64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,
>
> I'd rather not change the return type for reasons detailed below.
>
>> +                                                  uint32_t sector_size,
>> +                                                  uint64_t nr_entries,
>> +                                                  Error **errp)
>>   {
>>       uint64_t cur_sector = 1;
>>       uint64_t cur_idx = 0;
>> @@ -112,13 +112,13 @@ static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,
>>           if (read_ret < 0) {
>>               error_setg_errno(errp, -read_ret,
>>                                "Failed to read log entry %"PRIu64, cur_idx);
>> -            return (uint64_t)-1ull;
>> +            return read_ret;
>
> This is OK, provided the change in return type signedness is necessary
> in the first place.
>
>>           }
>>             if (cur_entry.flags & ~cpu_to_le64(LOG_FLAG_MASK)) {
>>               error_setg(errp, "Invalid flags 0x%"PRIx64" in log entry %"PRIu64,
>>                          le64_to_cpu(cur_entry.flags), cur_idx);
>> -            return (uint64_t)-1ull;
>> +            return -EINVAL;
>
> This is OK, provided the return type signedness change is necessary,
> although we already do have errp to indicate any errors.
>
>>           }
>>             /* Account for the sector of the entry itself */
>> @@ -143,7 +143,6 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
>>   {
>>       BDRVBlkLogWritesState *s = bs->opaque;
>>       QemuOpts *opts;
>> -    Error *local_err = NULL;
>>       int ret;
>>       uint64_t log_sector_size;
>>       bool log_append;
>> @@ -215,15 +214,15 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
>>           s->nr_entries = 0;
>>             if (blk_log_writes_sector_size_valid(log_sector_size)) {
>> -            s->cur_log_sector =
>> +            int64_t cur_log_sector =
>>                   blk_log_writes_find_cur_log_sector(s->log_file, log_sector_size,
>> -                                    le64_to_cpu(log_sb.nr_entries), &local_err);
>> -            if (local_err) {
>> -                ret = -EINVAL;
>> -                error_propagate(errp, local_err);
>> +                                    le64_to_cpu(log_sb.nr_entries), errp);
>> +            if (cur_log_sector < 0) {
>> +                ret = cur_log_sector;
>
> This changes the semantics slightly. Changing the return type to int64
> may theoretically cause valid return values to be interpreted as
> errors. Since we already do have a dedicated out pointer for the error
> value (errp), why not use it?

Error.h's big comment:

 * Error reporting system loosely patterned after Glib's GError.
 *
 * = Rules =
 [...]
 * - Whenever practical, also return a value that indicates success /
 *   failure.  This can make the error checking more concise, and can
 *   avoid useless error object creation and destruction.  Note that
 *   we still have many functions returning void.  We recommend
 *   • bool-valued functions return true on success / false on failure,
 *   • pointer-valued functions return non-null / null pointer, and
 *   • integer-valued functions return non-negative / negative.

blk_log_writes_find_cur_log_sector() does return such an error value
before the patch: (uint64_t)-1.

The caller does not use it to check for errors.  It uses @err instead.
Awkward, has to error_propagate().

Avoiding error_propagate() reduces the error handling boileplate.  It
also improves behavior when @errp is &error_abort: we get the abort
right where the error happens instead of where we propagate it.

Furthermore, caller has to make an error code (-EINVAL), because
returning (uint64_t)-1 throws it away.  Yes, a detailed error is stored
into @err, but you can't cleanly extract the error code.

Using a signed integer for returning "non-negative offset or negative
errno code" is pervasive, starting with read() and write().  It hasn't
been a problem there, and it hasn't been a problem in the block layer.
8 exbi-blocks should do for a while.  Should it become troublesome, we
won't solve the problem by going unsigned and adding one bit, we'll
double the width and add 64.

Additional rationale for returning recognizable error values:

commit e3fe3988d7851cac30abffae06d2f555ff7bee62
Author: Markus Armbruster <armbru@redhat.com>
Date:   Tue Jul 7 18:05:31 2020 +0200

    error: Document Error API usage rules
    
    This merely codifies existing practice, with one exception: the rule
    advising against returning void, where existing practice is mixed.
    
    When the Error API was created, we adopted the (unwritten) rule to
    return void when the function returns no useful value on success,
    unlike GError, which recommends to return true on success and false on
    error then.
    
    When a function returns a distinct error value, say false, a checked
    call that passes the error up looks like
    
        if (!frobnicate(..., errp)) {
            handle the error...
        }
    
    When it returns void, we need
    
        Error *err = NULL;
    
        frobnicate(..., &err);
        if (err) {
            handle the error...
            error_propagate(errp, err);
        }
    
    Not only is this more verbose, it also creates an Error object even
    when @errp is null, &error_abort or &error_fatal.
    
    People got tired of the additional boilerplate, and started to ignore
    the unwritten rule.  The result is confusion among developers about
    the preferred usage.
    
    Make the rule advising against returning void official by putting it
    in writing.  This will hopefully reduce confusion.
    [...]

Additional rationale for avoiding error_propagate():

commit ae7c80a7bd73685437bf6ba9d7c26098351f4166
Author: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date:   Tue Jul 7 18:50:30 2020 +0200

    error: New macro ERRP_GUARD()
    
    Introduce a new ERRP_GUARD() macro, to be used at start of functions
    with an errp OUT parameter.
    
    It has three goals:
    
    1. Fix issue with error_fatal and error_prepend/error_append_hint: the
    user can't see this additional information, because exit() happens in
    error_setg earlier than information is added. [Reported by Greg Kurz]
    
    2. Fix issue with error_abort and error_propagate: when we wrap
    error_abort by local_err+error_propagate, the resulting coredump will
    refer to error_propagate and not to the place where error happened.
    (the macro itself doesn't fix the issue, but it allows us to [3.] drop
    the local_err+error_propagate pattern, which will definitely fix the
    issue) [Reported by Kevin Wolf]
    
    3. Drop local_err+error_propagate pattern, which is used to workaround
    void functions with errp parameter, when caller wants to know resulting
    status. (Note: actually these functions could be merely updated to
    return int error code).
    [...]

> Assigning cur_log_sector to ret looks a bit odd to me. Why not use the
> original -EINVAL - or maybe some more appropriate errno value than
> -EINVAL?

We assign the error code returned by
blk_log_writes_find_cur_log_sector(), which seems proper to me.

Care to suggest a better variable name?

> I think the desired effect of this change could be achieved with less
> churn. How about just making just the following change in addition to 
> adding ERRP_GUARD() to the beginning of the function and getting rid
> of local_err:
>
> -                                    le64_to_cpu(log_sb.nr_entries),
>                                      &local_err);
> +                                    le64_to_cpu(log_sb.nr_entries), errp);
> -            if (local_err) {
> +            if (*errp) {
>                  ret = -EINVAL;
> -                error_propagate(errp, local_err);
>>                   goto fail_log;
>>               }
>>   +            s->cur_log_sector = cur_log_sector;
>>               s->nr_entries = le64_to_cpu(log_sb.nr_entries);
>>           }
>>       } else {

I prefer this patch.  But it's up to the block maintainers.
Ari Sundholm Sept. 16, 2020, 4:52 p.m. UTC | #4
Hi,

On 9/11/20 11:03 AM, Markus Armbruster wrote:
> Ari Sundholm <ari@tuxera.com> writes:

> 

>> Hi Vladimir!

>>

>> Thank you for working on this. My comments below.

>>

>> On 9/9/20 9:59 PM, Vladimir Sementsov-Ogievskiy wrote:

>>> It's simple to avoid error propagation in blk_log_writes_open(), we

>>> just need to refactor blk_log_writes_find_cur_log_sector() a bit.

>>> Signed-off-by: Vladimir Sementsov-Ogievskiy

>>> <vsementsov@virtuozzo.com>

>>> ---

>>>    block/blklogwrites.c | 23 +++++++++++------------

>>>    1 file changed, 11 insertions(+), 12 deletions(-)

>>> diff --git a/block/blklogwrites.c b/block/blklogwrites.c

>>> index 7ef046cee9..c7da507b2d 100644

>>> --- a/block/blklogwrites.c

>>> +++ b/block/blklogwrites.c

>>> @@ -96,10 +96,10 @@ static inline bool blk_log_writes_sector_size_valid(uint32_t sector_size)

>>>            sector_size < (1ull << 24);

>>>    }

>>>    -static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild

>>> *log,

>>> -                                                   uint32_t sector_size,

>>> -                                                   uint64_t nr_entries,

>>> -                                                   Error **errp)

>>> +static int64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,

>>

>> I'd rather not change the return type for reasons detailed below.

>>

>>> +                                                  uint32_t sector_size,

>>> +                                                  uint64_t nr_entries,

>>> +                                                  Error **errp)

>>>    {

>>>        uint64_t cur_sector = 1;

>>>        uint64_t cur_idx = 0;

>>> @@ -112,13 +112,13 @@ static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,

>>>            if (read_ret < 0) {

>>>                error_setg_errno(errp, -read_ret,

>>>                                 "Failed to read log entry %"PRIu64, cur_idx);

>>> -            return (uint64_t)-1ull;

>>> +            return read_ret;

>>

>> This is OK, provided the change in return type signedness is necessary

>> in the first place.

>>

>>>            }

>>>              if (cur_entry.flags & ~cpu_to_le64(LOG_FLAG_MASK)) {

>>>                error_setg(errp, "Invalid flags 0x%"PRIx64" in log entry %"PRIu64,

>>>                           le64_to_cpu(cur_entry.flags), cur_idx);

>>> -            return (uint64_t)-1ull;

>>> +            return -EINVAL;

>>

>> This is OK, provided the return type signedness change is necessary,

>> although we already do have errp to indicate any errors.

>>

>>>            }

>>>              /* Account for the sector of the entry itself */

>>> @@ -143,7 +143,6 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,

>>>    {

>>>        BDRVBlkLogWritesState *s = bs->opaque;

>>>        QemuOpts *opts;

>>> -    Error *local_err = NULL;

>>>        int ret;

>>>        uint64_t log_sector_size;

>>>        bool log_append;

>>> @@ -215,15 +214,15 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,

>>>            s->nr_entries = 0;

>>>              if (blk_log_writes_sector_size_valid(log_sector_size)) {

>>> -            s->cur_log_sector =

>>> +            int64_t cur_log_sector =

>>>                    blk_log_writes_find_cur_log_sector(s->log_file, log_sector_size,

>>> -                                    le64_to_cpu(log_sb.nr_entries), &local_err);

>>> -            if (local_err) {

>>> -                ret = -EINVAL;

>>> -                error_propagate(errp, local_err);

>>> +                                    le64_to_cpu(log_sb.nr_entries), errp);

>>> +            if (cur_log_sector < 0) {

>>> +                ret = cur_log_sector;

>>

>> This changes the semantics slightly. Changing the return type to int64

>> may theoretically cause valid return values to be interpreted as

>> errors. Since we already do have a dedicated out pointer for the error

>> value (errp), why not use it?

> 

> Error.h's big comment:

> 

>   * Error reporting system loosely patterned after Glib's GError.

>   *

>   * = Rules =

>   [...]

>   * - Whenever practical, also return a value that indicates success /

>   *   failure.  This can make the error checking more concise, and can

>   *   avoid useless error object creation and destruction.  Note that

>   *   we still have many functions returning void.  We recommend

>   *   • bool-valued functions return true on success / false on failure,

>   *   • pointer-valued functions return non-null / null pointer, and

>   *   • integer-valued functions return non-negative / negative.

> 

> blk_log_writes_find_cur_log_sector() does return such an error value

> before the patch: (uint64_t)-1.

> 

> The caller does not use it to check for errors.  It uses @err instead.

> Awkward, has to error_propagate().

> 

> Avoiding error_propagate() reduces the error handling boileplate.  It

> also improves behavior when @errp is &error_abort: we get the abort

> right where the error happens instead of where we propagate it.

> 

> Furthermore, caller has to make an error code (-EINVAL), because

> returning (uint64_t)-1 throws it away.  Yes, a detailed error is stored

> into @err, but you can't cleanly extract the error code.

> 

> Using a signed integer for returning "non-negative offset or negative

> errno code" is pervasive, starting with read() and write().  It hasn't

> been a problem there, and it hasn't been a problem in the block layer.

> 8 exbi-blocks should do for a while.  Should it become troublesome, we

> won't solve the problem by going unsigned and adding one bit, we'll

> double the width and add 64.

> 


I am in complete agreement with eliminating error propagation within the 
blklogwrites driver. This was never a point of disagreement.

As error propagation is dropped in this patch, the awkwardness referred 
to above will be no more, making that a moot point.

My main issue was that this patch does more than just the mechanical 
transformation required. Changes that are not strictly necessary are 
made, and they slightly change the semantics while duplicating the error 
code and halving the range of the return type (instead of just returning 
*some* value in the absence of a possibility to return nothing, which 
will be thrown away by the caller anyway when an error has occurred).

However, as the consensus seems to be that it is best to change the 
return type to int64_t for consistency with the rest of the codebase, I 
will not object any further regarding that point. Having conceded that, 
this makes the difference between my preferred minimalistic approach and 
this patch insignificant, and it is not my intention to needlessly 
obstruct a perfectly fine patch series.

BTW, I do not think it would be difficult at all to extend the code in 
error.h to make it convenient to extract errno and/or win32 error values 
that have been explicitly provided, but this is a matter best discussed 
separately and left for a later patch.

As for general matters regarding error handling and separation between 
results and errors, I am open to discussing these off-list.

Best regards,
Ari Sundholm
ari@tuxera.com

> Additional rationale for returning recognizable error values:

> 

> commit e3fe3988d7851cac30abffae06d2f555ff7bee62

> Author: Markus Armbruster <armbru@redhat.com>

> Date:   Tue Jul 7 18:05:31 2020 +0200

> 

>      error: Document Error API usage rules

>      

>      This merely codifies existing practice, with one exception: the rule

>      advising against returning void, where existing practice is mixed.

>      

>      When the Error API was created, we adopted the (unwritten) rule to

>      return void when the function returns no useful value on success,

>      unlike GError, which recommends to return true on success and false on

>      error then.

>      

>      When a function returns a distinct error value, say false, a checked

>      call that passes the error up looks like

>      

>          if (!frobnicate(..., errp)) {

>              handle the error...

>          }

>      

>      When it returns void, we need

>      

>          Error *err = NULL;

>      

>          frobnicate(..., &err);

>          if (err) {

>              handle the error...

>              error_propagate(errp, err);

>          }

>      

>      Not only is this more verbose, it also creates an Error object even

>      when @errp is null, &error_abort or &error_fatal.

>      

>      People got tired of the additional boilerplate, and started to ignore

>      the unwritten rule.  The result is confusion among developers about

>      the preferred usage.

>      

>      Make the rule advising against returning void official by putting it

>      in writing.  This will hopefully reduce confusion.

>      [...]

> 

> Additional rationale for avoiding error_propagate():

> 

> commit ae7c80a7bd73685437bf6ba9d7c26098351f4166

> Author: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

> Date:   Tue Jul 7 18:50:30 2020 +0200

> 

>      error: New macro ERRP_GUARD()

>      

>      Introduce a new ERRP_GUARD() macro, to be used at start of functions

>      with an errp OUT parameter.

>      

>      It has three goals:

>      

>      1. Fix issue with error_fatal and error_prepend/error_append_hint: the

>      user can't see this additional information, because exit() happens in

>      error_setg earlier than information is added. [Reported by Greg Kurz]

>      

>      2. Fix issue with error_abort and error_propagate: when we wrap

>      error_abort by local_err+error_propagate, the resulting coredump will

>      refer to error_propagate and not to the place where error happened.

>      (the macro itself doesn't fix the issue, but it allows us to [3.] drop

>      the local_err+error_propagate pattern, which will definitely fix the

>      issue) [Reported by Kevin Wolf]

>      

>      3. Drop local_err+error_propagate pattern, which is used to workaround

>      void functions with errp parameter, when caller wants to know resulting

>      status. (Note: actually these functions could be merely updated to

>      return int error code).

>      [...]

> 

>> Assigning cur_log_sector to ret looks a bit odd to me. Why not use the

>> original -EINVAL - or maybe some more appropriate errno value than

>> -EINVAL?

> 

> We assign the error code returned by

> blk_log_writes_find_cur_log_sector(), which seems proper to me.

> 

> Care to suggest a better variable name?

> 

>> I think the desired effect of this change could be achieved with less

>> churn. How about just making just the following change in addition to

>> adding ERRP_GUARD() to the beginning of the function and getting rid

>> of local_err:

>>

>> -                                    le64_to_cpu(log_sb.nr_entries),

>>                                       &local_err);

>> +                                    le64_to_cpu(log_sb.nr_entries), errp);

>> -            if (local_err) {

>> +            if (*errp) {

>>                   ret = -EINVAL;

>> -                error_propagate(errp, local_err);

>>>                    goto fail_log;

>>>                }

>>>    +            s->cur_log_sector = cur_log_sector;

>>>                s->nr_entries = le64_to_cpu(log_sb.nr_entries);

>>>            }

>>>        } else {

> 

> I prefer this patch.  But it's up to the block maintainers.

>
Markus Armbruster Sept. 17, 2020, 7:12 a.m. UTC | #5
Ari Sundholm <ari@tuxera.com> writes:

> Hi,
>
> On 9/11/20 11:03 AM, Markus Armbruster wrote:
>> Ari Sundholm <ari@tuxera.com> writes:
>> 
>>> Hi Vladimir!
>>>
>>> Thank you for working on this. My comments below.
>>>
>>> On 9/9/20 9:59 PM, Vladimir Sementsov-Ogievskiy wrote:
>>>> It's simple to avoid error propagation in blk_log_writes_open(), we
>>>> just need to refactor blk_log_writes_find_cur_log_sector() a bit.
>>>> Signed-off-by: Vladimir Sementsov-Ogievskiy
>>>> <vsementsov@virtuozzo.com>
>>>> ---
>>>>    block/blklogwrites.c | 23 +++++++++++------------
>>>>    1 file changed, 11 insertions(+), 12 deletions(-)
>>>> diff --git a/block/blklogwrites.c b/block/blklogwrites.c
>>>> index 7ef046cee9..c7da507b2d 100644
>>>> --- a/block/blklogwrites.c
>>>> +++ b/block/blklogwrites.c
>>>> @@ -96,10 +96,10 @@ static inline bool blk_log_writes_sector_size_valid(uint32_t sector_size)
>>>>            sector_size < (1ull << 24);
>>>>    }
>>>>    -static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild
>>>> *log,
>>>> -                                                   uint32_t sector_size,
>>>> -                                                   uint64_t nr_entries,
>>>> -                                                   Error **errp)
>>>> +static int64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,
>>>
>>> I'd rather not change the return type for reasons detailed below.
>>>
>>>> +                                                  uint32_t sector_size,
>>>> +                                                  uint64_t nr_entries,
>>>> +                                                  Error **errp)
>>>>    {
>>>>        uint64_t cur_sector = 1;
>>>>        uint64_t cur_idx = 0;
>>>> @@ -112,13 +112,13 @@ static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,
>>>>            if (read_ret < 0) {
>>>>                error_setg_errno(errp, -read_ret,
>>>>                                 "Failed to read log entry %"PRIu64, cur_idx);
>>>> -            return (uint64_t)-1ull;
>>>> +            return read_ret;
>>>
>>> This is OK, provided the change in return type signedness is necessary
>>> in the first place.
>>>
>>>>            }
>>>>              if (cur_entry.flags & ~cpu_to_le64(LOG_FLAG_MASK)) {
>>>>                error_setg(errp, "Invalid flags 0x%"PRIx64" in log entry %"PRIu64,
>>>>                           le64_to_cpu(cur_entry.flags), cur_idx);
>>>> -            return (uint64_t)-1ull;
>>>> +            return -EINVAL;
>>>
>>> This is OK, provided the return type signedness change is necessary,
>>> although we already do have errp to indicate any errors.
>>>
>>>>            }
>>>>              /* Account for the sector of the entry itself */
>>>> @@ -143,7 +143,6 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
>>>>    {
>>>>        BDRVBlkLogWritesState *s = bs->opaque;
>>>>        QemuOpts *opts;
>>>> -    Error *local_err = NULL;
>>>>        int ret;
>>>>        uint64_t log_sector_size;
>>>>        bool log_append;
>>>> @@ -215,15 +214,15 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
>>>>            s->nr_entries = 0;
>>>>              if (blk_log_writes_sector_size_valid(log_sector_size)) {
>>>> -            s->cur_log_sector =
>>>> +            int64_t cur_log_sector =
>>>>                    blk_log_writes_find_cur_log_sector(s->log_file, log_sector_size,
>>>> -                                    le64_to_cpu(log_sb.nr_entries), &local_err);
>>>> -            if (local_err) {
>>>> -                ret = -EINVAL;
>>>> -                error_propagate(errp, local_err);
>>>> +                                    le64_to_cpu(log_sb.nr_entries), errp);
>>>> +            if (cur_log_sector < 0) {
>>>> +                ret = cur_log_sector;
>>>
>>> This changes the semantics slightly. Changing the return type to int64
>>> may theoretically cause valid return values to be interpreted as
>>> errors. Since we already do have a dedicated out pointer for the error
>>> value (errp), why not use it?
>> Error.h's big comment:
>>   * Error reporting system loosely patterned after Glib's GError.
>>   *
>>   * = Rules =
>>   [...]
>>   * - Whenever practical, also return a value that indicates success /
>>   *   failure.  This can make the error checking more concise, and can
>>   *   avoid useless error object creation and destruction.  Note that
>>   *   we still have many functions returning void.  We recommend
>>   *   • bool-valued functions return true on success / false on failure,
>>   *   • pointer-valued functions return non-null / null pointer, and
>>   *   • integer-valued functions return non-negative / negative.
>> blk_log_writes_find_cur_log_sector() does return such an error value
>> before the patch: (uint64_t)-1.
>> The caller does not use it to check for errors.  It uses @err
>> instead.
>> Awkward, has to error_propagate().
>> Avoiding error_propagate() reduces the error handling boileplate.
>> It
>> also improves behavior when @errp is &error_abort: we get the abort
>> right where the error happens instead of where we propagate it.
>> Furthermore, caller has to make an error code (-EINVAL), because
>> returning (uint64_t)-1 throws it away.  Yes, a detailed error is stored
>> into @err, but you can't cleanly extract the error code.
>> Using a signed integer for returning "non-negative offset or
>> negative
>> errno code" is pervasive, starting with read() and write().  It hasn't
>> been a problem there, and it hasn't been a problem in the block layer.
>> 8 exbi-blocks should do for a while.  Should it become troublesome, we
>> won't solve the problem by going unsigned and adding one bit, we'll
>> double the width and add 64.
>> 
>
> I am in complete agreement with eliminating error propagation within
> the blklogwrites driver. This was never a point of disagreement.
>
> As error propagation is dropped in this patch, the awkwardness
> referred to above will be no more, making that a moot point.
>
> My main issue was that this patch does more than just the mechanical
> transformation required. Changes that are not strictly necessary are 
> made, and they slightly change the semantics while duplicating the
> error code and halving the range of the return type (instead of just
> returning *some* value in the absence of a possibility to return
> nothing, which will be thrown away by the caller anyway when an error
> has occurred).

You have a point: the return type change should perhaps be a separate
patch.  Up to the maintainer.

> However, as the consensus seems to be that it is best to change the
> return type to int64_t for consistency with the rest of the codebase,
> I will not object any further regarding that point. Having conceded
> that, this makes the difference between my preferred minimalistic
> approach and this patch insignificant, and it is not my intention to
> needlessly obstruct a perfectly fine patch series.
>
> BTW, I do not think it would be difficult at all to extend the code in
> error.h to make it convenient to extract errno and/or win32 error
> values that have been explicitly provided, but this is a matter best
> discussed separately and left for a later patch.

Certainly feasible, but how would we deal with the fact that only some
Error objects have an error code?

> As for general matters regarding error handling and separation between
> results and errors, I am open to discussing these off-list.

Thanks!
diff mbox series

Patch

diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index 7ef046cee9..c7da507b2d 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -96,10 +96,10 @@  static inline bool blk_log_writes_sector_size_valid(uint32_t sector_size)
         sector_size < (1ull << 24);
 }
 
-static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,
-                                                   uint32_t sector_size,
-                                                   uint64_t nr_entries,
-                                                   Error **errp)
+static int64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,
+                                                  uint32_t sector_size,
+                                                  uint64_t nr_entries,
+                                                  Error **errp)
 {
     uint64_t cur_sector = 1;
     uint64_t cur_idx = 0;
@@ -112,13 +112,13 @@  static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,
         if (read_ret < 0) {
             error_setg_errno(errp, -read_ret,
                              "Failed to read log entry %"PRIu64, cur_idx);
-            return (uint64_t)-1ull;
+            return read_ret;
         }
 
         if (cur_entry.flags & ~cpu_to_le64(LOG_FLAG_MASK)) {
             error_setg(errp, "Invalid flags 0x%"PRIx64" in log entry %"PRIu64,
                        le64_to_cpu(cur_entry.flags), cur_idx);
-            return (uint64_t)-1ull;
+            return -EINVAL;
         }
 
         /* Account for the sector of the entry itself */
@@ -143,7 +143,6 @@  static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
 {
     BDRVBlkLogWritesState *s = bs->opaque;
     QemuOpts *opts;
-    Error *local_err = NULL;
     int ret;
     uint64_t log_sector_size;
     bool log_append;
@@ -215,15 +214,15 @@  static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
         s->nr_entries = 0;
 
         if (blk_log_writes_sector_size_valid(log_sector_size)) {
-            s->cur_log_sector =
+            int64_t cur_log_sector =
                 blk_log_writes_find_cur_log_sector(s->log_file, log_sector_size,
-                                    le64_to_cpu(log_sb.nr_entries), &local_err);
-            if (local_err) {
-                ret = -EINVAL;
-                error_propagate(errp, local_err);
+                                    le64_to_cpu(log_sb.nr_entries), errp);
+            if (cur_log_sector < 0) {
+                ret = cur_log_sector;
                 goto fail_log;
             }
 
+            s->cur_log_sector = cur_log_sector;
             s->nr_entries = le64_to_cpu(log_sb.nr_entries);
         }
     } else {