diff mbox series

[v5,01/10] hyperv: Convert Hyper-V status codes to strings

Message ID 1740611284-27506-2-git-send-email-nunodasneves@linux.microsoft.com
State New
Headers show
Series Introduce /dev/mshv root partition driver | expand

Commit Message

Nuno Das Neves Feb. 26, 2025, 11:07 p.m. UTC
Introduce hv_result_to_string() for this purpose. This allows
hypercall failures to be debugged more easily with dmesg.

Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
 drivers/hv/hv_common.c         | 65 ++++++++++++++++++++++++++++++++++
 drivers/hv/hv_proc.c           | 13 ++++---
 include/asm-generic/mshyperv.h |  1 +
 3 files changed, 74 insertions(+), 5 deletions(-)

Comments

Stanislav Kinsburskii Feb. 26, 2025, 11:26 p.m. UTC | #1
On Wed, Feb 26, 2025 at 03:07:55PM -0800, Nuno Das Neves wrote:
> Introduce hv_result_to_string() for this purpose. This allows
> hypercall failures to be debugged more easily with dmesg.
> 
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> ---
>  drivers/hv/hv_common.c         | 65 ++++++++++++++++++++++++++++++++++
>  drivers/hv/hv_proc.c           | 13 ++++---
>  include/asm-generic/mshyperv.h |  1 +
>  3 files changed, 74 insertions(+), 5 deletions(-)
> 

Reviewed-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Easwar Hariharan Feb. 27, 2025, 4:22 a.m. UTC | #2
On 2/26/2025 3:07 PM, Nuno Das Neves wrote:
> Introduce hv_result_to_string() for this purpose. This allows
> hypercall failures to be debugged more easily with dmesg.
> 

Let the commit message stand on its own, i.e. state that hv_result_to_string()
is introduced to convert hyper-v status codes to string.

> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> ---
>  drivers/hv/hv_common.c         | 65 ++++++++++++++++++++++++++++++++++
>  drivers/hv/hv_proc.c           | 13 ++++---
>  include/asm-generic/mshyperv.h |  1 +
>  3 files changed, 74 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> index 9804adb4cc56..ce20818688fe 100644
> --- a/drivers/hv/hv_common.c
> +++ b/drivers/hv/hv_common.c
> @@ -740,3 +740,68 @@ void hv_identify_partition_type(void)
>  			pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n");
>  	}
>  }
> +
> +const char *hv_result_to_string(u64 hv_status)
> +{
> +	switch (hv_result(hv_status)) {
> +	case HV_STATUS_SUCCESS:
> +		return "HV_STATUS_SUCCESS";
> +	case HV_STATUS_INVALID_HYPERCALL_CODE:
> +		return "HV_STATUS_INVALID_HYPERCALL_CODE";
> +	case HV_STATUS_INVALID_HYPERCALL_INPUT:
> +		return "HV_STATUS_INVALID_HYPERCALL_INPUT";
> +	case HV_STATUS_INVALID_ALIGNMENT:
> +		return "HV_STATUS_INVALID_ALIGNMENT";
> +	case HV_STATUS_INVALID_PARAMETER:
> +		return "HV_STATUS_INVALID_PARAMETER";
> +	case HV_STATUS_ACCESS_DENIED:
> +		return "HV_STATUS_ACCESS_DENIED";
> +	case HV_STATUS_INVALID_PARTITION_STATE:
> +		return "HV_STATUS_INVALID_PARTITION_STATE";
> +	case HV_STATUS_OPERATION_DENIED:
> +		return "HV_STATUS_OPERATION_DENIED";
> +	case HV_STATUS_UNKNOWN_PROPERTY:
> +		return "HV_STATUS_UNKNOWN_PROPERTY";
> +	case HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE:
> +		return "HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE";
> +	case HV_STATUS_INSUFFICIENT_MEMORY:
> +		return "HV_STATUS_INSUFFICIENT_MEMORY";
> +	case HV_STATUS_INVALID_PARTITION_ID:
> +		return "HV_STATUS_INVALID_PARTITION_ID";
> +	case HV_STATUS_INVALID_VP_INDEX:
> +		return "HV_STATUS_INVALID_VP_INDEX";
> +	case HV_STATUS_NOT_FOUND:
> +		return "HV_STATUS_NOT_FOUND";
> +	case HV_STATUS_INVALID_PORT_ID:
> +		return "HV_STATUS_INVALID_PORT_ID";
> +	case HV_STATUS_INVALID_CONNECTION_ID:
> +		return "HV_STATUS_INVALID_CONNECTION_ID";
> +	case HV_STATUS_INSUFFICIENT_BUFFERS:
> +		return "HV_STATUS_INSUFFICIENT_BUFFERS";
> +	case HV_STATUS_NOT_ACKNOWLEDGED:
> +		return "HV_STATUS_NOT_ACKNOWLEDGED";
> +	case HV_STATUS_INVALID_VP_STATE:
> +		return "HV_STATUS_INVALID_VP_STATE";
> +	case HV_STATUS_NO_RESOURCES:
> +		return "HV_STATUS_NO_RESOURCES";
> +	case HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED:
> +		return "HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED";
> +	case HV_STATUS_INVALID_LP_INDEX:
> +		return "HV_STATUS_INVALID_LP_INDEX";
> +	case HV_STATUS_INVALID_REGISTER_VALUE:
> +		return "HV_STATUS_INVALID_REGISTER_VALUE";
> +	case HV_STATUS_OPERATION_FAILED:
> +		return "HV_STATUS_OPERATION_FAILED";
> +	case HV_STATUS_TIME_OUT:
> +		return "HV_STATUS_TIME_OUT";
> +	case HV_STATUS_CALL_PENDING:
> +		return "HV_STATUS_CALL_PENDING";
> +	case HV_STATUS_VTL_ALREADY_ENABLED:
> +		return "HV_STATUS_VTL_ALREADY_ENABLED";
> +	default:
> +		return "Unknown";
> +	};
> +	return "Unknown";

Unnecessary extra return since the default case already returns "Unknown"

> +}
> +EXPORT_SYMBOL_GPL(hv_result_to_string);
> +

Extra line here ^

> diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
> index 2fae18e4f7d2..8fc30f509fa7 100644
> --- a/drivers/hv/hv_proc.c
> +++ b/drivers/hv/hv_proc.c
> @@ -87,7 +87,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
>  				     page_count, 0, input_page, NULL);
>  	local_irq_restore(flags);
>  	if (!hv_result_success(status)) {
> -		pr_err("Failed to deposit pages: %lld\n", status);
> +		pr_err("%s: Failed to deposit pages: %s\n", __func__,
> +		       hv_result_to_string(status));
>  		ret = hv_result_to_errno(status);
>  		goto err_free_allocations;
>  	}
> @@ -137,8 +138,9 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
>  
>  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>  			if (!hv_result_success(status)) {
> -				pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
> -				       lp_index, apic_id, status);
> +				pr_err("%s: cpu %u apic ID %u, %s\n",
> +				       __func__, lp_index, apic_id,
> +				       hv_result_to_string(status));
>  				ret = hv_result_to_errno(status);
>  			}
>  			break;
> @@ -179,8 +181,9 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
>  
>  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>  			if (!hv_result_success(status)) {
> -				pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
> -				       vp_index, flags, status);
> +				pr_err("%s: vcpu %u, lp %u, %s\n",
> +				       __func__, vp_index, flags,
> +				       hv_result_to_string(status));
>  				ret = hv_result_to_errno(status);
>  			}
>  			break;

There are more convertible instances in arch/x86/hyperv/irqdomain.c and drivers/iommu/hyperv-iommu.c

> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index b13b0cda4ac8..dc4729dba9ef 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -298,6 +298,7 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
>  	return __cpumask_to_vpset(vpset, cpus, func);
>  }
>  
> +const char *hv_result_to_string(u64 hv_status);
>  int hv_result_to_errno(u64 status);
>  void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
>  bool hv_is_hyperv_initialized(void);
Roman Kisel Feb. 27, 2025, 11:08 p.m. UTC | #3
On 2/27/2025 2:54 PM, Easwar Hariharan wrote:
[...]

> 
> Sorry, I have to disagree with this, a recent commit of mine[1] closed a WSL
> issue that was open for over 2 years for, partly, the utter uselessness of
> the hex return code of the hypercall.

Thanks for your efforts, and sorry to hear you had a frustrating
debugging experience (sounds like it).

Would be great to learn the details to understand how this function is
going to improve the situation:

1. How come the hex error code was useless, what is not matching
    anything in the Linux headers?
2. How having "Unknown" in the log can possibly be better?
3. Given that the select hv status codes and the proposed strings have
    1:1 correspondence, and there is the 1:N catch-all case for the
    "Unknown", how's that better?

> 
> [1] https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d2138eab8cde61e0e6f62d0713e45202e8457d6d
> 
> Thanks,
> Easwar (he/him)
Roman Kisel Feb. 27, 2025, 11:21 p.m. UTC | #4
On 2/27/2025 2:54 PM, Easwar Hariharan wrote:
> On 2/27/2025 9:02 AM, Roman Kisel wrote:

[...]

> 
> Sorry, I have to disagree with this, a recent commit of mine[1] closed a WSL
> issue that was open for over 2 years for, partly, the utter uselessness of
> the hex return code of the hypercall.

What hypercall was that? I see

		storvsc_log_ratelimited(device, loglevel,
			"tag#%d cmd 0x%x status: scsi 0x%x srb 0x%x hv 0x%x\n",
			scsi_cmd_to_rq(request->cmd)->tag,
			stor_pkt->vm_srb.cdb[0],
			vstor_packet->vm_srb.scsi_status,
			vstor_packet->vm_srb.srb_status,
			vstor_packet->status);

in your patch where `vstor_packet->status` is claimed to be a hypercall
status? I'd be surprised if the hypervisor concerned itself with
the details of visualized SCSI storage. The VMM on the host might and
should.

I'll look through the code to gain more confidence in my suspicion that
calling the SCSI virt storage packet status a hv status causd the
frustration with debugging, and if no counter examples found, will send
a patch to fix that log statement above.

> 
> [1] https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d2138eab8cde61e0e6f62d0713e45202e8457d6d
> 
> Thanks,
> Easwar (he/him)
Easwar Hariharan Feb. 27, 2025, 11:25 p.m. UTC | #5
On 2/27/2025 3:08 PM, Roman Kisel wrote:
> 
> 
> On 2/27/2025 2:54 PM, Easwar Hariharan wrote:
> [...]
> 
>>
>> Sorry, I have to disagree with this, a recent commit of mine[1] closed a WSL
>> issue that was open for over 2 years for, partly, the utter uselessness of
>> the hex return code of the hypercall.
> 
> Thanks for your efforts, and sorry to hear you had a frustrating
> debugging experience (sounds like it).

TBF, I didn't personally struggle with it for 2 years, IMHO, it was the opaqueness
of what the value meant that contributed to user pain.

> 
> Would be great to learn the details to understand how this function is
> going to improve the situation:
> 
> 1. How come the hex error code was useless, what is not matching
>    anything in the Linux headers?

It doesn't match anything in the Linux headers, but it's an NTSTATUS, not HVSTATUS.

Coming from the PoV of a user, it would be a much more useful message to see:

[  249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv STATUS_UNSUCCESSFUL

than 

[  249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv 0xc0000001

> 2. How having "Unknown" in the log can possibly be better?

IMHO, seeing "Unknown" in an error report means that there's a new return value
that needs to be mapped to errno in hv_status_to_errno() and updated here as well.

> 3. Given that the select hv status codes and the proposed strings have
>    1:1 correspondence, and there is the 1:N catch-all case for the
>    "Unknown", how's that better?
> 

I didn't really follow this question, but I suppose the answer to Q2 answers this as
well. If not, please expand and I'll try to answer.

Thanks,
Easwar (he/him)
Nuno Das Neves Feb. 27, 2025, 11:48 p.m. UTC | #6
On 2/26/2025 8:22 PM, Easwar Hariharan wrote:
> On 2/26/2025 3:07 PM, Nuno Das Neves wrote:
>> Introduce hv_result_to_string() for this purpose. This allows
>> hypercall failures to be debugged more easily with dmesg.
>>
> 
> Let the commit message stand on its own, i.e. state that hv_result_to_string()
> is introduced to convert hyper-v status codes to string.
> 
I thought since the subject line is part of the commit message, this kind of
phrasing is ok. However I see that in my email client it is a little odd because
the subject line is a bit far removed from the rest of the message.

I'll change it :)

>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>> ---
>>  drivers/hv/hv_common.c         | 65 ++++++++++++++++++++++++++++++++++
>>  drivers/hv/hv_proc.c           | 13 ++++---
>>  include/asm-generic/mshyperv.h |  1 +
>>  3 files changed, 74 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
>> index 9804adb4cc56..ce20818688fe 100644
>> --- a/drivers/hv/hv_common.c
>> +++ b/drivers/hv/hv_common.c
>> @@ -740,3 +740,68 @@ void hv_identify_partition_type(void)
>>  			pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n");
>>  	}
>>  }
>> +
>> +const char *hv_result_to_string(u64 hv_status)
>> +{
>> +	switch (hv_result(hv_status)) {
>> +	case HV_STATUS_SUCCESS:
>> +		return "HV_STATUS_SUCCESS";
>> +	case HV_STATUS_INVALID_HYPERCALL_CODE:
>> +		return "HV_STATUS_INVALID_HYPERCALL_CODE";
>> +	case HV_STATUS_INVALID_HYPERCALL_INPUT:
>> +		return "HV_STATUS_INVALID_HYPERCALL_INPUT";
>> +	case HV_STATUS_INVALID_ALIGNMENT:
>> +		return "HV_STATUS_INVALID_ALIGNMENT";
>> +	case HV_STATUS_INVALID_PARAMETER:
>> +		return "HV_STATUS_INVALID_PARAMETER";
>> +	case HV_STATUS_ACCESS_DENIED:
>> +		return "HV_STATUS_ACCESS_DENIED";
>> +	case HV_STATUS_INVALID_PARTITION_STATE:
>> +		return "HV_STATUS_INVALID_PARTITION_STATE";
>> +	case HV_STATUS_OPERATION_DENIED:
>> +		return "HV_STATUS_OPERATION_DENIED";
>> +	case HV_STATUS_UNKNOWN_PROPERTY:
>> +		return "HV_STATUS_UNKNOWN_PROPERTY";
>> +	case HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE:
>> +		return "HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE";
>> +	case HV_STATUS_INSUFFICIENT_MEMORY:
>> +		return "HV_STATUS_INSUFFICIENT_MEMORY";
>> +	case HV_STATUS_INVALID_PARTITION_ID:
>> +		return "HV_STATUS_INVALID_PARTITION_ID";
>> +	case HV_STATUS_INVALID_VP_INDEX:
>> +		return "HV_STATUS_INVALID_VP_INDEX";
>> +	case HV_STATUS_NOT_FOUND:
>> +		return "HV_STATUS_NOT_FOUND";
>> +	case HV_STATUS_INVALID_PORT_ID:
>> +		return "HV_STATUS_INVALID_PORT_ID";
>> +	case HV_STATUS_INVALID_CONNECTION_ID:
>> +		return "HV_STATUS_INVALID_CONNECTION_ID";
>> +	case HV_STATUS_INSUFFICIENT_BUFFERS:
>> +		return "HV_STATUS_INSUFFICIENT_BUFFERS";
>> +	case HV_STATUS_NOT_ACKNOWLEDGED:
>> +		return "HV_STATUS_NOT_ACKNOWLEDGED";
>> +	case HV_STATUS_INVALID_VP_STATE:
>> +		return "HV_STATUS_INVALID_VP_STATE";
>> +	case HV_STATUS_NO_RESOURCES:
>> +		return "HV_STATUS_NO_RESOURCES";
>> +	case HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED:
>> +		return "HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED";
>> +	case HV_STATUS_INVALID_LP_INDEX:
>> +		return "HV_STATUS_INVALID_LP_INDEX";
>> +	case HV_STATUS_INVALID_REGISTER_VALUE:
>> +		return "HV_STATUS_INVALID_REGISTER_VALUE";
>> +	case HV_STATUS_OPERATION_FAILED:
>> +		return "HV_STATUS_OPERATION_FAILED";
>> +	case HV_STATUS_TIME_OUT:
>> +		return "HV_STATUS_TIME_OUT";
>> +	case HV_STATUS_CALL_PENDING:
>> +		return "HV_STATUS_CALL_PENDING";
>> +	case HV_STATUS_VTL_ALREADY_ENABLED:
>> +		return "HV_STATUS_VTL_ALREADY_ENABLED";
>> +	default:
>> +		return "Unknown";
>> +	};
>> +	return "Unknown";
> 
> Unnecessary extra return since the default case already returns "Unknown"
> 
Good point, I think I'd prefer to remove the first return and leave the
default case empty.

>> +}
>> +EXPORT_SYMBOL_GPL(hv_result_to_string);
>> +
> 
> Extra line here ^
> 
Thanks

>> diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
>> index 2fae18e4f7d2..8fc30f509fa7 100644
>> --- a/drivers/hv/hv_proc.c
>> +++ b/drivers/hv/hv_proc.c
>> @@ -87,7 +87,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
>>  				     page_count, 0, input_page, NULL);
>>  	local_irq_restore(flags);
>>  	if (!hv_result_success(status)) {
>> -		pr_err("Failed to deposit pages: %lld\n", status);
>> +		pr_err("%s: Failed to deposit pages: %s\n", __func__,
>> +		       hv_result_to_string(status));
>>  		ret = hv_result_to_errno(status);
>>  		goto err_free_allocations;
>>  	}
>> @@ -137,8 +138,9 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
>>  
>>  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>>  			if (!hv_result_success(status)) {
>> -				pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
>> -				       lp_index, apic_id, status);
>> +				pr_err("%s: cpu %u apic ID %u, %s\n",
>> +				       __func__, lp_index, apic_id,
>> +				       hv_result_to_string(status));
>>  				ret = hv_result_to_errno(status);
>>  			}
>>  			break;
>> @@ -179,8 +181,9 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
>>  
>>  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>>  			if (!hv_result_success(status)) {
>> -				pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
>> -				       vp_index, flags, status);
>> +				pr_err("%s: vcpu %u, lp %u, %s\n",
>> +				       __func__, vp_index, flags,
>> +				       hv_result_to_string(status));
>>  				ret = hv_result_to_errno(status);
>>  			}
>>  			break;
> 
> There are more convertible instances in arch/x86/hyperv/irqdomain.c and drivers/iommu/hyperv-iommu.c
> 
Ah, thank you, happy to add those!

>> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
>> index b13b0cda4ac8..dc4729dba9ef 100644
>> --- a/include/asm-generic/mshyperv.h
>> +++ b/include/asm-generic/mshyperv.h
>> @@ -298,6 +298,7 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
>>  	return __cpumask_to_vpset(vpset, cpus, func);
>>  }
>>  
>> +const char *hv_result_to_string(u64 hv_status);
>>  int hv_result_to_errno(u64 status);
>>  void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
>>  bool hv_is_hyperv_initialized(void);
Nuno Das Neves Feb. 28, 2025, 12:15 a.m. UTC | #7
On 2/27/2025 9:02 AM, Roman Kisel wrote:
> 
> 
> On 2/26/2025 3:07 PM, Nuno Das Neves wrote:
> 
> [...]
> 
>> +
>> +const char *hv_result_to_string(u64 hv_status)
>> +{
>> +    switch (hv_result(hv_status)) {
> 
> [...]
> 
>> +        return "HV_STATUS_VTL_ALREADY_ENABLED";
>> +    default:
>> +        return "Unknown";
>> +    };
>> +    return "Unknown";
>> +}
>> +EXPORT_SYMBOL_GPL(hv_result_to_string);
> 
> Should we remove this and output the hexadecimal error code in ~3 places
> this function is used?
> 
I guess you're implying it's not worth adding such a function for only a
few places in the code? That is a good point, and a bit of an oversight
on my part while editing this series. Originally all the hypercall helper
functions in the driver code (10+ places) used this function as well, but
I removed those printks_()s as a temporary solution to limit the use of
printk in the driver code (as opposed to dev_printk() which is preferred).

I didn't think to remove *this* patch as a result of that change!
I do want to figure out a good way to add that logging back to the hypercall
helpers, so I do want to try and get some form of this patch in to aid
debugging hypercalls - it has been very very useful over time.

> The "Unknown" part would make debugging harder actually when something
> fails. I presume that the mainstream scenarios all work, and it is the
> edge cases that might fail, and these are likelier to produce "Unknown".
> 
That is a very good point. Ideally, we could log "Unknown" along with
the hex code instead of replacing it.

What do you think about keeping this function, but instead of using it
directly, introduce a "standard" way for logging hypercall errors which
can hopefully be used everywhere in the kernel?
e.g. a simple macro:
#define hv_hvcall_err(control, status)
do {
	u64 ___status = (status);
	pr_err("Hypercall: %#x err: %#x : %s", (control) & 0xFFFF, hv_result(___status), hv_result_to_string(___status));
} while (0)

I feel like this is the best of both worlds, and actually makes it even
easier to do this logging everywhere it is wanted (for me, that includes
all the /dev/mshv-related hypercalls).
We could add strings for the HVCALL_ values too, and/or include __func__
in the macro to aid in finding the context it was used in.

> Folks who actually debug failed hypercalls rarely have issues with
> looking up the error code, and printing "Unknown" to the log is worse
> than a hexadecimal. Like even the people who wrote the code got nothing
> to say about what is going on.
> 
Yep, totally agree having the hex code available can be valuable in
unexpected situations.
Roman Kisel Feb. 28, 2025, 4:40 p.m. UTC | #8
On 2/27/2025 4:15 PM, Nuno Das Neves wrote:
> On 2/27/2025 9:02 AM, Roman Kisel wrote:

[...]

> I guess you're implying it's not worth adding such a function for only a
> few places in the code? That is a good point, and a bit of an oversight
> on my part while editing this series. Originally all the hypercall helper
> functions in the driver code (10+ places) used this function as well, but
> I removed those printks_()s as a temporary solution to limit the use of
> printk in the driver code (as opposed to dev_printk() which is preferred).
> 
> I didn't think to remove *this* patch as a result of that change!
> I do want to figure out a good way to add that logging back to the hypercall
> helpers, so I do want to try and get some form of this patch in to aid
> debugging hypercalls - it has been very very useful over time.
> 

Right, I thought that the function looked more as a bring-up aid rather
than a full fledged solution to some problem.

>> The "Unknown" part would make debugging harder actually when something
>> fails. I presume that the mainstream scenarios all work, and it is the
>> edge cases that might fail, and these are likelier to produce "Unknown".
>>
> That is a very good point. Ideally, we could log "Unknown" along with
> the hex code instead of replacing it.
> 
> What do you think about keeping this function, but instead of using it
> directly, introduce a "standard" way for logging hypercall errors which
> can hopefully be used everywhere in the kernel?
> e.g. a simple macro:
> #define hv_hvcall_err(control, status)
> do {
> 	u64 ___status = (status);
> 	pr_err("Hypercall: %#x err: %#x : %s", (control) & 0xFFFF, hv_result(___status), hv_result_to_string(___status));
> } while (0)
> 
> I feel like this is the best of both worlds, and actually makes it even
> easier to do this logging everywhere it is wanted (for me, that includes
> all the /dev/mshv-related hypercalls).
> We could add strings for the HVCALL_ values too, and/or include __func__
> in the macro to aid in finding the context it was used in.
> 

That doesn’t seem to be common in the kernel from what I’ve seen in 
dmesg, although there is certainly a lot of appeal in that approach. 
However, we will have to remember to update the function each time when 
another status code is added not to leave things half-cooked.

Also it is a bit surprising the *kernel* should report that rather than 
the VMM from the user mode. E.g. the kernel does not report all errors 
on file open, file seek, etc. As I understand, the hv status codes are
later mapped to errno in a lossy manner, and errno is what the user mode
receives?

As long as the hex code is logged, I am fine with the change.

>> Folks who actually debug failed hypercalls rarely have issues with
>> looking up the error code, and printing "Unknown" to the log is worse
>> than a hexadecimal. Like even the people who wrote the code got nothing
>> to say about what is going on.
>>
> Yep, totally agree having the hex code available can be valuable in
> unexpected situations.
> 

Appreciate giving my concerns a thorough consideration!
Roman Kisel Feb. 28, 2025, 5:20 p.m. UTC | #9
On 2/27/2025 3:25 PM, Easwar Hariharan wrote:
> On 2/27/2025 3:08 PM, Roman Kisel wrote:

[...]

>> Would be great to learn the details to understand how this function is
>> going to improve the situation:
>>
>> 1. How come the hex error code was useless, what is not matching
>>     anything in the Linux headers?
> 
> It doesn't match anything in the Linux headers, but it's an NTSTATUS, not HVSTATUS.
> 

That is what it looks like from the code, I posted the details in the
parallel thread.

Here is a fix:
https://lore.kernel.org/linux-hyperv/20250227233110.36596-1-romank@linux.microsoft.com/

Also I think the commit description in your patch

https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d2138eab8cde61e0e6f62d0713e45202e8457d6d

conflates the hypervisor (ours runs bare-metal, Type 1) and the VMMs
(Virtual Machine Monitors)+VSPs (Virtual Service Providers, e.g StorVSP
that implements SCSI) running in the host/root/dom0 partition.

> Coming from the PoV of a user, it would be a much more useful message to see:
> 
> [  249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv STATUS_UNSUCCESSFUL
> 
> than
> 
> [  249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv 0xc0000001
> 

It is likely that the PoV of a user that you've mentioned is actually
a PoV of a (kernel) developer. It is hard to imagine that folks running
web sites, DB servers, LoBs, LLMs, etc. in Hyper-V VMs care about the
lowest software level of the virt stack in the form of the symbolic
name or the hex code. They need their VMs to be reliable or suggest
what the user may try if a configuration error is suspected.

To make the error log message useful to the user, the message should
mention ways of remediation or at least hint what might've gotten
wedged. Without that, that's only useful for the people who work with
the kernel code proper or the kernel interface to the user land.

So I'd think that the hex error codes from the hypervisor give the user
exactly as much as the error symbolic names do to get the system to the
desired state: nothing. Even less when the error reported "Unknown" :)

>> 2. How having "Unknown" in the log can possibly be better?
> 
> IMHO, seeing "Unknown" in an error report means that there's a new return value
> that needs to be mapped to errno in hv_status_to_errno() and updated here as well.
> 

It means that to the developer. To the user, it means the developers
messed something up and to make matters even worse they didn't leave any
breadcrumbs (e.g. the hex code) to see what's wrong to help the user and
themselves: there is just that "Unknown" thing in the log.

>> 3. Given that the select hv status codes and the proposed strings have
>>     1:1 correspondence, and there is the 1:N catch-all case for the
>>     "Unknown", how's that better?
>>
> 
> I didn't really follow this question, but I suppose the answer to Q2 answers this as
> well. If not, please expand and I'll try to answer.
>

Sorry about that chunk, hit "Send" without looking the e-mail over
another time. Appreciate the discussion very much!


> Thanks,
> Easwar (he/him)
Easwar Hariharan Feb. 28, 2025, 8:22 p.m. UTC | #10
On 2/28/2025 9:20 AM, Roman Kisel wrote:
> 
> 
> On 2/27/2025 3:25 PM, Easwar Hariharan wrote:
>> On 2/27/2025 3:08 PM, Roman Kisel wrote:
> 
> [...]
> 
>>> Would be great to learn the details to understand how this function is
>>> going to improve the situation:
>>>
>>> 1. How come the hex error code was useless, what is not matching
>>>     anything in the Linux headers?
>>
>> It doesn't match anything in the Linux headers, but it's an NTSTATUS, not HVSTATUS.
>>
> 
> That is what it looks like from the code, I posted the details in the
> parallel thread.
> 
> Here is a fix:
> https://lore.kernel.org/linux-hyperv/20250227233110.36596-1-romank@linux.microsoft.com/
> 
> Also I think the commit description in your patch
> 
> https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d2138eab8cde61e0e6f62d0713e45202e8457d6d
> 
> conflates the hypervisor (ours runs bare-metal, Type 1) and the VMMs
> (Virtual Machine Monitors)+VSPs (Virtual Service Providers, e.g StorVSP
> that implements SCSI) running in the host/root/dom0 partition.

Agreed, that was what I was led to believe, your patch would help with that
miscommunication, though not in its current form. See my review comment in that
thread.

> 
>> Coming from the PoV of a user, it would be a much more useful message to see:
>>
>> [  249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv STATUS_UNSUCCESSFUL
>>
>> than
>>
>> [  249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv 0xc0000001
>>
> 
> It is likely that the PoV of a user that you've mentioned is actually
> a PoV of a (kernel) developer.

Actually, no, it's PoV of the WSL users that are having the discussion in
the linked github issue. FWIW, that issue also occurred in Azure with multiple
incidents coming into our queue because of the unusable flood of error messages.

> It is hard to imagine that folks running
> web sites, DB servers, LoBs, LLMs, etc. in Hyper-V VMs care about the
> lowest software level of the virt stack in the form of the symbolic
> name or the hex code. They need their VMs to be reliable or suggest
> what the user may try if a configuration error is suspected.
> 
> To make the error log message useful to the user, the message should
> mention ways of remediation or at least hint what might've gotten
> wedged. Without that, that's only useful for the people who work with
> the kernel code proper or the kernel interface to the user land.

There's a step between seeing the issue and fixing it that you're missing,
i.e. the reporting.

An issue that says "flood of hv_storvsc errors reporting status
unsuccessful" is better than the same without that status information:
https://github.com/microsoft/WSL/issues/9173

> 
> So I'd think that the hex error codes from the hypervisor give the user
> exactly as much as the error symbolic names do to get the system to the
> desired state: nothing. 
I continue to disagree, seeing HV_STATUS_NO_RESOURCES is better than 0x1D,
because the user may think to look at `top` or `free -h` or similar to see
what could be killed to improve the situation.

> Even less when the error reported "Unknown" :)

I agree on the uselessness of "Unknown" to the user, except as already mentioned
below, as a prompt for the code to be updated.

> 
>>> 2. How having "Unknown" in the log can possibly be better?
>>
>> IMHO, seeing "Unknown" in an error report means that there's a new return value
>> that needs to be mapped to errno in hv_status_to_errno() and updated here as well.
>>
> 
> It means that to the developer. To the user, it means the developers
> messed something up and to make matters even worse they didn't leave any
> breadcrumbs (e.g. the hex code) to see what's wrong to help the user and
> themselves: there is just that "Unknown" thing in the log.

I think Nuno's compromise addresses this very well, to also print the hex code.

> 
>>> 3. Given that the select hv status codes and the proposed strings have
>>>     1:1 correspondence, and there is the 1:N catch-all case for the
>>>     "Unknown", how's that better?
>>>
>>
>> I didn't really follow this question, but I suppose the answer to Q2 answers this as
>> well. If not, please expand and I'll try to answer.
>>
> 
> Sorry about that chunk, hit "Send" without looking the e-mail over
> another time. Appreciate the discussion very much!
> 
> 
>> Thanks,
>> Easwar (he/him)
>
Roman Kisel Feb. 28, 2025, 10:26 p.m. UTC | #11
On 2/28/2025 12:22 PM, Easwar Hariharan wrote:
> On 2/28/2025 9:20 AM, Roman Kisel wrote:
>>

[...]

>>
>> So I'd think that the hex error codes from the hypervisor give the user
>> exactly as much as the error symbolic names do to get the system to the
>> desired state: nothing.
> I continue to disagree, seeing HV_STATUS_NO_RESOURCES is better than 0x1D,
> because the user may think to look at `top` or `free -h` or similar to see
> what could be killed to improve the situation.
> 

I agree that the symbolic name might save the step of looking up the
error code in the headers. Now, the next step depends on how much the
user is into virt technologies (if at all). That is
to illustrate the point that a hint in the logs (or in the
Documentation) is crucial of what to do next.

The symbolic name might mislead; a hex code maybe with an addition of
"please look up what may fix this at <URL> or report the problem here
<URL>" would look better to _my imaginary_ customer :) That would be
as much friendly as possible, if the kernel needs to print any of that
at all. Likely the VMM in the user land if it gets that code as-is.

Thank you for the fair critique and the time!

[...]

>>> Thanks,
>>> Easwar (he/him)
>>
>
Michael Kelley March 6, 2025, 5:57 p.m. UTC | #12
From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Wednesday, February 26, 2025 3:08 PM
> 
> Introduce hv_result_to_string() for this purpose. This allows
> hypercall failures to be debugged more easily with dmesg.
> 
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> ---
>  drivers/hv/hv_common.c         | 65 ++++++++++++++++++++++++++++++++++
>  drivers/hv/hv_proc.c           | 13 ++++---
>  include/asm-generic/mshyperv.h |  1 +
>  3 files changed, 74 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> index 9804adb4cc56..ce20818688fe 100644
> --- a/drivers/hv/hv_common.c
> +++ b/drivers/hv/hv_common.c
> @@ -740,3 +740,68 @@ void hv_identify_partition_type(void)
>  			pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n");
>  	}
>  }
> +
> +const char *hv_result_to_string(u64 hv_status)
> +{
> +	switch (hv_result(hv_status)) {
> +	case HV_STATUS_SUCCESS:
> +		return "HV_STATUS_SUCCESS";
> +	case HV_STATUS_INVALID_HYPERCALL_CODE:
> +		return "HV_STATUS_INVALID_HYPERCALL_CODE";
> +	case HV_STATUS_INVALID_HYPERCALL_INPUT:
> +		return "HV_STATUS_INVALID_HYPERCALL_INPUT";
> +	case HV_STATUS_INVALID_ALIGNMENT:
> +		return "HV_STATUS_INVALID_ALIGNMENT";
> +	case HV_STATUS_INVALID_PARAMETER:
> +		return "HV_STATUS_INVALID_PARAMETER";
> +	case HV_STATUS_ACCESS_DENIED:
> +		return "HV_STATUS_ACCESS_DENIED";
> +	case HV_STATUS_INVALID_PARTITION_STATE:
> +		return "HV_STATUS_INVALID_PARTITION_STATE";
> +	case HV_STATUS_OPERATION_DENIED:
> +		return "HV_STATUS_OPERATION_DENIED";
> +	case HV_STATUS_UNKNOWN_PROPERTY:
> +		return "HV_STATUS_UNKNOWN_PROPERTY";
> +	case HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE:
> +		return "HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE";
> +	case HV_STATUS_INSUFFICIENT_MEMORY:
> +		return "HV_STATUS_INSUFFICIENT_MEMORY";
> +	case HV_STATUS_INVALID_PARTITION_ID:
> +		return "HV_STATUS_INVALID_PARTITION_ID";
> +	case HV_STATUS_INVALID_VP_INDEX:
> +		return "HV_STATUS_INVALID_VP_INDEX";
> +	case HV_STATUS_NOT_FOUND:
> +		return "HV_STATUS_NOT_FOUND";
> +	case HV_STATUS_INVALID_PORT_ID:
> +		return "HV_STATUS_INVALID_PORT_ID";
> +	case HV_STATUS_INVALID_CONNECTION_ID:
> +		return "HV_STATUS_INVALID_CONNECTION_ID";
> +	case HV_STATUS_INSUFFICIENT_BUFFERS:
> +		return "HV_STATUS_INSUFFICIENT_BUFFERS";
> +	case HV_STATUS_NOT_ACKNOWLEDGED:
> +		return "HV_STATUS_NOT_ACKNOWLEDGED";
> +	case HV_STATUS_INVALID_VP_STATE:
> +		return "HV_STATUS_INVALID_VP_STATE";
> +	case HV_STATUS_NO_RESOURCES:
> +		return "HV_STATUS_NO_RESOURCES";
> +	case HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED:
> +		return "HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED";
> +	case HV_STATUS_INVALID_LP_INDEX:
> +		return "HV_STATUS_INVALID_LP_INDEX";
> +	case HV_STATUS_INVALID_REGISTER_VALUE:
> +		return "HV_STATUS_INVALID_REGISTER_VALUE";
> +	case HV_STATUS_OPERATION_FAILED:
> +		return "HV_STATUS_OPERATION_FAILED";
> +	case HV_STATUS_TIME_OUT:
> +		return "HV_STATUS_TIME_OUT";
> +	case HV_STATUS_CALL_PENDING:
> +		return "HV_STATUS_CALL_PENDING";
> +	case HV_STATUS_VTL_ALREADY_ENABLED:
> +		return "HV_STATUS_VTL_ALREADY_ENABLED";
> +	default:
> +		return "Unknown";
> +	};
> +	return "Unknown";
> +}
> +EXPORT_SYMBOL_GPL(hv_result_to_string);
> +
> diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
> index 2fae18e4f7d2..8fc30f509fa7 100644
> --- a/drivers/hv/hv_proc.c
> +++ b/drivers/hv/hv_proc.c
> @@ -87,7 +87,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32
> num_pages)
>  				     page_count, 0, input_page, NULL);
>  	local_irq_restore(flags);
>  	if (!hv_result_success(status)) {
> -		pr_err("Failed to deposit pages: %lld\n", status);
> +		pr_err("%s: Failed to deposit pages: %s\n", __func__,
> +		       hv_result_to_string(status));
>  		ret = hv_result_to_errno(status);
>  		goto err_free_allocations;
>  	}
> @@ -137,8 +138,9 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
> 
>  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>  			if (!hv_result_success(status)) {
> -				pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
> -				       lp_index, apic_id, status);
> +				pr_err("%s: cpu %u apic ID %u, %s\n",
> +				       __func__, lp_index, apic_id,
> +				       hv_result_to_string(status));
>  				ret = hv_result_to_errno(status);
>  			}
>  			break;
> @@ -179,8 +181,9 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index,
> u32 flags)
> 
>  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>  			if (!hv_result_success(status)) {
> -				pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
> -				       vp_index, flags, status);
> +				pr_err("%s: vcpu %u, lp %u, %s\n",
> +				       __func__, vp_index, flags,
> +				       hv_result_to_string(status));
>  				ret = hv_result_to_errno(status);
>  			}
>  			break;
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index b13b0cda4ac8..dc4729dba9ef 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -298,6 +298,7 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
>  	return __cpumask_to_vpset(vpset, cpus, func);
>  }
> 
> +const char *hv_result_to_string(u64 hv_status);
>  int hv_result_to_errno(u64 status);
>  void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
>  bool hv_is_hyperv_initialized(void);
> --
> 2.34.1

I've read through the other comments on this patch. I definitely vote
for outputting both the hex code along with a string translation, which
could be empty if the hex code is unrecognized by the translation code.

I can see providing something like hv_hvcall_err() as Nuno proposed, since
that standardizes the text output. But I wonder if it would be too limiting.
For example, in the changes above, both hv_call_add_logical_proc() and
hv_call_create_vp() output additional debugging values, which we probably
don't want to give up.

Lastly, from an implementation standpoint, rather than using a big
switch statement, build a static array of entries that each have the
hex code and string equivalent. Then hv_result_to_string() loops through
the array looking for a match. This won't be any slower than the big switch
statement. I've seen other places in the kernel where string names are
output, and looking up the strings in a static array is the typical approach.
You'll have to work through the details and see if avoids being too clumsy,
but I think it will be OK.

Michael
Michael Kelley March 6, 2025, 6:09 p.m. UTC | #13
From: Michael Kelley <mhklinux@outlook.com> Sent: Thursday, March 6, 2025 9:58 AM

> 
> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Wednesday, February
> 26, 2025 3:08 PM
> >
> > Introduce hv_result_to_string() for this purpose. This allows
> > hypercall failures to be debugged more easily with dmesg.
> >
> > Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> > ---
> >  drivers/hv/hv_common.c         | 65 ++++++++++++++++++++++++++++++++++
> >  drivers/hv/hv_proc.c           | 13 ++++---
> >  include/asm-generic/mshyperv.h |  1 +
> >  3 files changed, 74 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> > index 9804adb4cc56..ce20818688fe 100644
> > --- a/drivers/hv/hv_common.c
> > +++ b/drivers/hv/hv_common.c
> > @@ -740,3 +740,68 @@ void hv_identify_partition_type(void)
> >  			pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n");
> >  	}
> >  }
> > +
> > +const char *hv_result_to_string(u64 hv_status)
> > +{
> > +	switch (hv_result(hv_status)) {
> > +	case HV_STATUS_SUCCESS:
> > +		return "HV_STATUS_SUCCESS";
> > +	case HV_STATUS_INVALID_HYPERCALL_CODE:
> > +		return "HV_STATUS_INVALID_HYPERCALL_CODE";
> > +	case HV_STATUS_INVALID_HYPERCALL_INPUT:
> > +		return "HV_STATUS_INVALID_HYPERCALL_INPUT";
> > +	case HV_STATUS_INVALID_ALIGNMENT:
> > +		return "HV_STATUS_INVALID_ALIGNMENT";
> > +	case HV_STATUS_INVALID_PARAMETER:
> > +		return "HV_STATUS_INVALID_PARAMETER";
> > +	case HV_STATUS_ACCESS_DENIED:
> > +		return "HV_STATUS_ACCESS_DENIED";
> > +	case HV_STATUS_INVALID_PARTITION_STATE:
> > +		return "HV_STATUS_INVALID_PARTITION_STATE";
> > +	case HV_STATUS_OPERATION_DENIED:
> > +		return "HV_STATUS_OPERATION_DENIED";
> > +	case HV_STATUS_UNKNOWN_PROPERTY:
> > +		return "HV_STATUS_UNKNOWN_PROPERTY";
> > +	case HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE:
> > +		return "HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE";
> > +	case HV_STATUS_INSUFFICIENT_MEMORY:
> > +		return "HV_STATUS_INSUFFICIENT_MEMORY";
> > +	case HV_STATUS_INVALID_PARTITION_ID:
> > +		return "HV_STATUS_INVALID_PARTITION_ID";
> > +	case HV_STATUS_INVALID_VP_INDEX:
> > +		return "HV_STATUS_INVALID_VP_INDEX";
> > +	case HV_STATUS_NOT_FOUND:
> > +		return "HV_STATUS_NOT_FOUND";
> > +	case HV_STATUS_INVALID_PORT_ID:
> > +		return "HV_STATUS_INVALID_PORT_ID";
> > +	case HV_STATUS_INVALID_CONNECTION_ID:
> > +		return "HV_STATUS_INVALID_CONNECTION_ID";
> > +	case HV_STATUS_INSUFFICIENT_BUFFERS:
> > +		return "HV_STATUS_INSUFFICIENT_BUFFERS";
> > +	case HV_STATUS_NOT_ACKNOWLEDGED:
> > +		return "HV_STATUS_NOT_ACKNOWLEDGED";
> > +	case HV_STATUS_INVALID_VP_STATE:
> > +		return "HV_STATUS_INVALID_VP_STATE";
> > +	case HV_STATUS_NO_RESOURCES:
> > +		return "HV_STATUS_NO_RESOURCES";
> > +	case HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED:
> > +		return "HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED";
> > +	case HV_STATUS_INVALID_LP_INDEX:
> > +		return "HV_STATUS_INVALID_LP_INDEX";
> > +	case HV_STATUS_INVALID_REGISTER_VALUE:
> > +		return "HV_STATUS_INVALID_REGISTER_VALUE";
> > +	case HV_STATUS_OPERATION_FAILED:
> > +		return "HV_STATUS_OPERATION_FAILED";
> > +	case HV_STATUS_TIME_OUT:
> > +		return "HV_STATUS_TIME_OUT";
> > +	case HV_STATUS_CALL_PENDING:
> > +		return "HV_STATUS_CALL_PENDING";
> > +	case HV_STATUS_VTL_ALREADY_ENABLED:
> > +		return "HV_STATUS_VTL_ALREADY_ENABLED";
> > +	default:
> > +		return "Unknown";
> > +	};
> > +	return "Unknown";
> > +}
> > +EXPORT_SYMBOL_GPL(hv_result_to_string);
> > +
> > diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
> > index 2fae18e4f7d2..8fc30f509fa7 100644
> > --- a/drivers/hv/hv_proc.c
> > +++ b/drivers/hv/hv_proc.c
> > @@ -87,7 +87,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32
> > num_pages)
> >  				     page_count, 0, input_page, NULL);
> >  	local_irq_restore(flags);
> >  	if (!hv_result_success(status)) {
> > -		pr_err("Failed to deposit pages: %lld\n", status);
> > +		pr_err("%s: Failed to deposit pages: %s\n", __func__,
> > +		       hv_result_to_string(status));
> >  		ret = hv_result_to_errno(status);
> >  		goto err_free_allocations;
> >  	}
> > @@ -137,8 +138,9 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
> >
> >  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
> >  			if (!hv_result_success(status)) {
> > -				pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
> > -				       lp_index, apic_id, status);
> > +				pr_err("%s: cpu %u apic ID %u, %s\n",
> > +				       __func__, lp_index, apic_id,
> > +				       hv_result_to_string(status));
> >  				ret = hv_result_to_errno(status);
> >  			}
> >  			break;
> > @@ -179,8 +181,9 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index,
> > u32 flags)
> >
> >  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
> >  			if (!hv_result_success(status)) {
> > -				pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
> > -				       vp_index, flags, status);
> > +				pr_err("%s: vcpu %u, lp %u, %s\n",
> > +				       __func__, vp_index, flags,
> > +				       hv_result_to_string(status));
> >  				ret = hv_result_to_errno(status);
> >  			}
> >  			break;
> > diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> > index b13b0cda4ac8..dc4729dba9ef 100644
> > --- a/include/asm-generic/mshyperv.h
> > +++ b/include/asm-generic/mshyperv.h
> > @@ -298,6 +298,7 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
> >  	return __cpumask_to_vpset(vpset, cpus, func);
> >  }
> >
> > +const char *hv_result_to_string(u64 hv_status);
> >  int hv_result_to_errno(u64 status);
> >  void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
> >  bool hv_is_hyperv_initialized(void);
> > --
> > 2.34.1
> 
> I've read through the other comments on this patch. I definitely vote
> for outputting both the hex code along with a string translation, which
> could be empty if the hex code is unrecognized by the translation code.
> 
> I can see providing something like hv_hvcall_err() as Nuno proposed, since
> that standardizes the text output. But I wonder if it would be too limiting.
> For example, in the changes above, both hv_call_add_logical_proc() and
> hv_call_create_vp() output additional debugging values, which we probably
> don't want to give up.
> 
> Lastly, from an implementation standpoint, rather than using a big
> switch statement, build a static array of entries that each have the
> hex code and string equivalent. Then hv_result_to_string() loops through
> the array looking for a match. This won't be any slower than the big switch
> statement. I've seen other places in the kernel where string names are
> output, and looking up the strings in a static array is the typical approach.
> You'll have to work through the details and see if avoids being too clumsy,
> but I think it will be OK.
> 

Better yet, also include the translated errno in each static array entry.
Then hv_result_to_errno() can do the same kind of lookup instead of
having its own switch statement. I did a quick look to see if the two
functions might be combined to do only a single lookup, but that looks
somewhat clumsy unless someone else spots a better way to handle it.
The cost of doing two lookups doesn't really matter in an error case.

FWIW, hv_result_to_errno() and the new hv_result_to_string() are both
slightly misnamed. The input argument is a full 64-bit hv_status, not the
smaller 16-bit result field. hv_status_to_errno() and hv_status_to_string()
would be more precise.

Michael
Nuno Das Neves March 6, 2025, 6:40 p.m. UTC | #14
On 3/6/2025 10:09 AM, Michael Kelley wrote:
> From: Michael Kelley <mhklinux@outlook.com> Sent: Thursday, March 6, 2025 9:58 AM
> 
>>
>> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Wednesday, February
>> 26, 2025 3:08 PM
>>>
>>> Introduce hv_result_to_string() for this purpose. This allows
>>> hypercall failures to be debugged more easily with dmesg.
>>>
>>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>>> ---
>>>  drivers/hv/hv_common.c         | 65 ++++++++++++++++++++++++++++++++++
>>>  drivers/hv/hv_proc.c           | 13 ++++---
>>>  include/asm-generic/mshyperv.h |  1 +
>>>  3 files changed, 74 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
>>> index 9804adb4cc56..ce20818688fe 100644
>>> --- a/drivers/hv/hv_common.c
>>> +++ b/drivers/hv/hv_common.c
>>> @@ -740,3 +740,68 @@ void hv_identify_partition_type(void)
>>>  			pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n");
>>>  	}
>>>  }
>>> +
>>> +const char *hv_result_to_string(u64 hv_status)
>>> +{
>>> +	switch (hv_result(hv_status)) {
>>> +	case HV_STATUS_SUCCESS:
>>> +		return "HV_STATUS_SUCCESS";
>>> +	case HV_STATUS_INVALID_HYPERCALL_CODE:
>>> +		return "HV_STATUS_INVALID_HYPERCALL_CODE";
>>> +	case HV_STATUS_INVALID_HYPERCALL_INPUT:
>>> +		return "HV_STATUS_INVALID_HYPERCALL_INPUT";
>>> +	case HV_STATUS_INVALID_ALIGNMENT:
>>> +		return "HV_STATUS_INVALID_ALIGNMENT";
>>> +	case HV_STATUS_INVALID_PARAMETER:
>>> +		return "HV_STATUS_INVALID_PARAMETER";
>>> +	case HV_STATUS_ACCESS_DENIED:
>>> +		return "HV_STATUS_ACCESS_DENIED";
>>> +	case HV_STATUS_INVALID_PARTITION_STATE:
>>> +		return "HV_STATUS_INVALID_PARTITION_STATE";
>>> +	case HV_STATUS_OPERATION_DENIED:
>>> +		return "HV_STATUS_OPERATION_DENIED";
>>> +	case HV_STATUS_UNKNOWN_PROPERTY:
>>> +		return "HV_STATUS_UNKNOWN_PROPERTY";
>>> +	case HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE:
>>> +		return "HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE";
>>> +	case HV_STATUS_INSUFFICIENT_MEMORY:
>>> +		return "HV_STATUS_INSUFFICIENT_MEMORY";
>>> +	case HV_STATUS_INVALID_PARTITION_ID:
>>> +		return "HV_STATUS_INVALID_PARTITION_ID";
>>> +	case HV_STATUS_INVALID_VP_INDEX:
>>> +		return "HV_STATUS_INVALID_VP_INDEX";
>>> +	case HV_STATUS_NOT_FOUND:
>>> +		return "HV_STATUS_NOT_FOUND";
>>> +	case HV_STATUS_INVALID_PORT_ID:
>>> +		return "HV_STATUS_INVALID_PORT_ID";
>>> +	case HV_STATUS_INVALID_CONNECTION_ID:
>>> +		return "HV_STATUS_INVALID_CONNECTION_ID";
>>> +	case HV_STATUS_INSUFFICIENT_BUFFERS:
>>> +		return "HV_STATUS_INSUFFICIENT_BUFFERS";
>>> +	case HV_STATUS_NOT_ACKNOWLEDGED:
>>> +		return "HV_STATUS_NOT_ACKNOWLEDGED";
>>> +	case HV_STATUS_INVALID_VP_STATE:
>>> +		return "HV_STATUS_INVALID_VP_STATE";
>>> +	case HV_STATUS_NO_RESOURCES:
>>> +		return "HV_STATUS_NO_RESOURCES";
>>> +	case HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED:
>>> +		return "HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED";
>>> +	case HV_STATUS_INVALID_LP_INDEX:
>>> +		return "HV_STATUS_INVALID_LP_INDEX";
>>> +	case HV_STATUS_INVALID_REGISTER_VALUE:
>>> +		return "HV_STATUS_INVALID_REGISTER_VALUE";
>>> +	case HV_STATUS_OPERATION_FAILED:
>>> +		return "HV_STATUS_OPERATION_FAILED";
>>> +	case HV_STATUS_TIME_OUT:
>>> +		return "HV_STATUS_TIME_OUT";
>>> +	case HV_STATUS_CALL_PENDING:
>>> +		return "HV_STATUS_CALL_PENDING";
>>> +	case HV_STATUS_VTL_ALREADY_ENABLED:
>>> +		return "HV_STATUS_VTL_ALREADY_ENABLED";
>>> +	default:
>>> +		return "Unknown";
>>> +	};
>>> +	return "Unknown";
>>> +}
>>> +EXPORT_SYMBOL_GPL(hv_result_to_string);
>>> +
>>> diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
>>> index 2fae18e4f7d2..8fc30f509fa7 100644
>>> --- a/drivers/hv/hv_proc.c
>>> +++ b/drivers/hv/hv_proc.c
>>> @@ -87,7 +87,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32
>>> num_pages)
>>>  				     page_count, 0, input_page, NULL);
>>>  	local_irq_restore(flags);
>>>  	if (!hv_result_success(status)) {
>>> -		pr_err("Failed to deposit pages: %lld\n", status);
>>> +		pr_err("%s: Failed to deposit pages: %s\n", __func__,
>>> +		       hv_result_to_string(status));
>>>  		ret = hv_result_to_errno(status);
>>>  		goto err_free_allocations;
>>>  	}
>>> @@ -137,8 +138,9 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
>>>
>>>  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>>>  			if (!hv_result_success(status)) {
>>> -				pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
>>> -				       lp_index, apic_id, status);
>>> +				pr_err("%s: cpu %u apic ID %u, %s\n",
>>> +				       __func__, lp_index, apic_id,
>>> +				       hv_result_to_string(status));
>>>  				ret = hv_result_to_errno(status);
>>>  			}
>>>  			break;
>>> @@ -179,8 +181,9 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index,
>>> u32 flags)
>>>
>>>  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>>>  			if (!hv_result_success(status)) {
>>> -				pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
>>> -				       vp_index, flags, status);
>>> +				pr_err("%s: vcpu %u, lp %u, %s\n",
>>> +				       __func__, vp_index, flags,
>>> +				       hv_result_to_string(status));
>>>  				ret = hv_result_to_errno(status);
>>>  			}
>>>  			break;
>>> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
>>> index b13b0cda4ac8..dc4729dba9ef 100644
>>> --- a/include/asm-generic/mshyperv.h
>>> +++ b/include/asm-generic/mshyperv.h
>>> @@ -298,6 +298,7 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
>>>  	return __cpumask_to_vpset(vpset, cpus, func);
>>>  }
>>>
>>> +const char *hv_result_to_string(u64 hv_status);
>>>  int hv_result_to_errno(u64 status);
>>>  void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
>>>  bool hv_is_hyperv_initialized(void);
>>> --
>>> 2.34.1
>>
>> I've read through the other comments on this patch. I definitely vote
>> for outputting both the hex code along with a string translation, which
>> could be empty if the hex code is unrecognized by the translation code.
>>
>> I can see providing something like hv_hvcall_err() as Nuno proposed, since
>> that standardizes the text output. But I wonder if it would be too limiting.
>> For example, in the changes above, both hv_call_add_logical_proc() and
>> hv_call_create_vp() output additional debugging values, which we probably
>> don't want to give up.
>>
>> Lastly, from an implementation standpoint, rather than using a big
>> switch statement, build a static array of entries that each have the
>> hex code and string equivalent. Then hv_result_to_string() loops through
>> the array looking for a match. This won't be any slower than the big switch
>> statement. I've seen other places in the kernel where string names are
>> output, and looking up the strings in a static array is the typical approach.
>> You'll have to work through the details and see if avoids being too clumsy,
>> but I think it will be OK.
>>
> 
> Better yet, also include the translated errno in each static array entry.
> Then hv_result_to_errno() can do the same kind of lookup instead of
> having its own switch statement. I did a quick look to see if the two
> functions might be combined to do only a single lookup, but that looks
> somewhat clumsy unless someone else spots a better way to handle it.
> The cost of doing two lookups doesn't really matter in an error case.
> 
> FWIW, hv_result_to_errno() and the new hv_result_to_string() are both
> slightly misnamed. The input argument is a full 64-bit hv_status, not the
> smaller 16-bit result field. hv_status_to_errno() and hv_status_to_string()
> would be more precise.
> 
Hmm, well I'll admit I was and still am rather confused on this point.

In the TLFS (section 3.8) the entire 64-bit return value is called the
"hypercall result value".
The 16-bit HV_STATUS part is *also* called the "result" in this section.
Later, in section 3.12, the 16-bit field is referred to as a "status value
field".
Furthermore, the name of the 16-bit value, itself, is HV_STATUS.

Despite the inconsistency, in my mind it makes the most sense that the
16-bit HV_STATUS part the "status" and the entire 64-bit return value the
"result". I am aware that elsewhere (and in the driver patches in this
series), the name "status" is used to refer to the entire 64-bit return
value.

These functions were actually called hv_status_to_errno() and hv_status_to_string()
in the past, but I changed them to use "result" by following my own logic, and I
thought this also matched the naming of hv_result() and hv_result_success().
However I now realize that the "result" in these names refers to the *output* of
these functions... they take a u64 status as a parameter after all..

So in the end I'm rather bothered by this whole situation. I can change these
names back to "status" (although hv_result_to_errno() is already merged, I
could send a fixup), or I could keep "result", which I think is a more
logical name for the 64-bit value, even though it somewhat contradicts how
the term is already used in the kernel.

Given it doesn't seem to be well-defined in the first place, I'm not really
sure the best route.

Nuno

> Michael
Michael Kelley March 6, 2025, 6:57 p.m. UTC | #15
From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Thursday, March 6, 2025 10:41 AM
> 
> On 3/6/2025 10:09 AM, Michael Kelley wrote:
> > From: Michael Kelley <mhklinux@outlook.com> Sent: Thursday, March 6, 2025 9:58 AM
> >

[snip]

> >> I've read through the other comments on this patch. I definitely vote
> >> for outputting both the hex code along with a string translation, which
> >> could be empty if the hex code is unrecognized by the translation code.
> >>
> >> I can see providing something like hv_hvcall_err() as Nuno proposed, since
> >> that standardizes the text output. But I wonder if it would be too limiting.
> >> For example, in the changes above, both hv_call_add_logical_proc() and
> >> hv_call_create_vp() output additional debugging values, which we probably
> >> don't want to give up.
> >>
> >> Lastly, from an implementation standpoint, rather than using a big
> >> switch statement, build a static array of entries that each have the
> >> hex code and string equivalent. Then hv_result_to_string() loops through
> >> the array looking for a match. This won't be any slower than the big switch
> >> statement. I've seen other places in the kernel where string names are
> >> output, and looking up the strings in a static array is the typical approach.
> >> You'll have to work through the details and see if avoids being too clumsy,
> >> but I think it will be OK.
> >>
> >
> > Better yet, also include the translated errno in each static array entry.
> > Then hv_result_to_errno() can do the same kind of lookup instead of
> > having its own switch statement. I did a quick look to see if the two
> > functions might be combined to do only a single lookup, but that looks
> > somewhat clumsy unless someone else spots a better way to handle it.
> > The cost of doing two lookups doesn't really matter in an error case.
> >
> > FWIW, hv_result_to_errno() and the new hv_result_to_string() are both
> > slightly misnamed. The input argument is a full 64-bit hv_status, not the
> > smaller 16-bit result field. hv_status_to_errno() and hv_status_to_string()
> > would be more precise.
> >
> Hmm, well I'll admit I was and still am rather confused on this point.
> 
> In the TLFS (section 3.8) the entire 64-bit return value is called the  "hypercall result value".
> The 16-bit HV_STATUS part is *also* called the "result" in this section.
> Later, in section 3.12, the 16-bit field is referred to as a "status value field".
> Furthermore, the name of the 16-bit value, itself, is HV_STATUS.
> 
> Despite the inconsistency, in my mind it makes the most sense that the
> 16-bit HV_STATUS part the "status" and the entire 64-bit return value the
> "result". I am aware that elsewhere (and in the driver patches in this
> series), the name "status" is used to refer to the entire 64-bit return
> value.
> 
> These functions were actually called hv_status_to_errno() and hv_status_to_string()
> in the past, but I changed them to use "result" by following my own logic, and I
> thought this also matched the naming of hv_result() and hv_result_success().
> However I now realize that the "result" in these names refers to the *output* of
> these functions... they take a u64 status as a parameter after all..
> 
> So in the end I'm rather bothered by this whole situation. I can change these
> names back to "status" (although hv_result_to_errno() is already merged, I
> could send a fixup), or I could keep "result", which I think is a more
> logical name for the 64-bit value, even though it somewhat contradicts how
> the term is already used in the kernel.
> 
> Given it doesn't seem to be well-defined in the first place, I'm not really
> sure the best route.
> 

Hmmm. You are right. I had in my mind that "status" is the full 64-bit
value, and "result" is the 16-bit error code. But that's certainly not
always the case. And as you point out, it doesn't comport with the TLFS,
and the TLFS itself is not consistent in the terminology.

Ignore my comment. The difference doesn't have any real impact. Leave
the sorting out for some other time. :-)

Michael
Nuno Das Neves March 7, 2025, 7:38 p.m. UTC | #16
On 3/6/2025 9:57 AM, Michael Kelley wrote:
> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Wednesday, February 26, 2025 3:08 PM
>>
>> Introduce hv_result_to_string() for this purpose. This allows
>> hypercall failures to be debugged more easily with dmesg.
>>
>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>> ---
>>  drivers/hv/hv_common.c         | 65 ++++++++++++++++++++++++++++++++++
>>  drivers/hv/hv_proc.c           | 13 ++++---
>>  include/asm-generic/mshyperv.h |  1 +
>>  3 files changed, 74 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
>> index 9804adb4cc56..ce20818688fe 100644
>> --- a/drivers/hv/hv_common.c
>> +++ b/drivers/hv/hv_common.c
>> @@ -740,3 +740,68 @@ void hv_identify_partition_type(void)
>>  			pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n");
>>  	}
>>  }
>> +
>> +const char *hv_result_to_string(u64 hv_status)
>> +{
>> +	switch (hv_result(hv_status)) {
>> +	case HV_STATUS_SUCCESS:
>> +		return "HV_STATUS_SUCCESS";
>> +	case HV_STATUS_INVALID_HYPERCALL_CODE:
>> +		return "HV_STATUS_INVALID_HYPERCALL_CODE";
>> +	case HV_STATUS_INVALID_HYPERCALL_INPUT:
>> +		return "HV_STATUS_INVALID_HYPERCALL_INPUT";
>> +	case HV_STATUS_INVALID_ALIGNMENT:
>> +		return "HV_STATUS_INVALID_ALIGNMENT";
>> +	case HV_STATUS_INVALID_PARAMETER:
>> +		return "HV_STATUS_INVALID_PARAMETER";
>> +	case HV_STATUS_ACCESS_DENIED:
>> +		return "HV_STATUS_ACCESS_DENIED";
>> +	case HV_STATUS_INVALID_PARTITION_STATE:
>> +		return "HV_STATUS_INVALID_PARTITION_STATE";
>> +	case HV_STATUS_OPERATION_DENIED:
>> +		return "HV_STATUS_OPERATION_DENIED";
>> +	case HV_STATUS_UNKNOWN_PROPERTY:
>> +		return "HV_STATUS_UNKNOWN_PROPERTY";
>> +	case HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE:
>> +		return "HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE";
>> +	case HV_STATUS_INSUFFICIENT_MEMORY:
>> +		return "HV_STATUS_INSUFFICIENT_MEMORY";
>> +	case HV_STATUS_INVALID_PARTITION_ID:
>> +		return "HV_STATUS_INVALID_PARTITION_ID";
>> +	case HV_STATUS_INVALID_VP_INDEX:
>> +		return "HV_STATUS_INVALID_VP_INDEX";
>> +	case HV_STATUS_NOT_FOUND:
>> +		return "HV_STATUS_NOT_FOUND";
>> +	case HV_STATUS_INVALID_PORT_ID:
>> +		return "HV_STATUS_INVALID_PORT_ID";
>> +	case HV_STATUS_INVALID_CONNECTION_ID:
>> +		return "HV_STATUS_INVALID_CONNECTION_ID";
>> +	case HV_STATUS_INSUFFICIENT_BUFFERS:
>> +		return "HV_STATUS_INSUFFICIENT_BUFFERS";
>> +	case HV_STATUS_NOT_ACKNOWLEDGED:
>> +		return "HV_STATUS_NOT_ACKNOWLEDGED";
>> +	case HV_STATUS_INVALID_VP_STATE:
>> +		return "HV_STATUS_INVALID_VP_STATE";
>> +	case HV_STATUS_NO_RESOURCES:
>> +		return "HV_STATUS_NO_RESOURCES";
>> +	case HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED:
>> +		return "HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED";
>> +	case HV_STATUS_INVALID_LP_INDEX:
>> +		return "HV_STATUS_INVALID_LP_INDEX";
>> +	case HV_STATUS_INVALID_REGISTER_VALUE:
>> +		return "HV_STATUS_INVALID_REGISTER_VALUE";
>> +	case HV_STATUS_OPERATION_FAILED:
>> +		return "HV_STATUS_OPERATION_FAILED";
>> +	case HV_STATUS_TIME_OUT:
>> +		return "HV_STATUS_TIME_OUT";
>> +	case HV_STATUS_CALL_PENDING:
>> +		return "HV_STATUS_CALL_PENDING";
>> +	case HV_STATUS_VTL_ALREADY_ENABLED:
>> +		return "HV_STATUS_VTL_ALREADY_ENABLED";
>> +	default:
>> +		return "Unknown";
>> +	};
>> +	return "Unknown";
>> +}
>> +EXPORT_SYMBOL_GPL(hv_result_to_string);
>> +
>> diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
>> index 2fae18e4f7d2..8fc30f509fa7 100644
>> --- a/drivers/hv/hv_proc.c
>> +++ b/drivers/hv/hv_proc.c
>> @@ -87,7 +87,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32
>> num_pages)
>>  				     page_count, 0, input_page, NULL);
>>  	local_irq_restore(flags);
>>  	if (!hv_result_success(status)) {
>> -		pr_err("Failed to deposit pages: %lld\n", status);
>> +		pr_err("%s: Failed to deposit pages: %s\n", __func__,
>> +		       hv_result_to_string(status));
>>  		ret = hv_result_to_errno(status);
>>  		goto err_free_allocations;
>>  	}
>> @@ -137,8 +138,9 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
>>
>>  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>>  			if (!hv_result_success(status)) {
>> -				pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
>> -				       lp_index, apic_id, status);
>> +				pr_err("%s: cpu %u apic ID %u, %s\n",
>> +				       __func__, lp_index, apic_id,
>> +				       hv_result_to_string(status));
>>  				ret = hv_result_to_errno(status);
>>  			}
>>  			break;
>> @@ -179,8 +181,9 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index,
>> u32 flags)
>>
>>  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>>  			if (!hv_result_success(status)) {
>> -				pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
>> -				       vp_index, flags, status);
>> +				pr_err("%s: vcpu %u, lp %u, %s\n",
>> +				       __func__, vp_index, flags,
>> +				       hv_result_to_string(status));
>>  				ret = hv_result_to_errno(status);
>>  			}
>>  			break;
>> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
>> index b13b0cda4ac8..dc4729dba9ef 100644
>> --- a/include/asm-generic/mshyperv.h
>> +++ b/include/asm-generic/mshyperv.h
>> @@ -298,6 +298,7 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
>>  	return __cpumask_to_vpset(vpset, cpus, func);
>>  }
>>
>> +const char *hv_result_to_string(u64 hv_status);
>>  int hv_result_to_errno(u64 status);
>>  void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
>>  bool hv_is_hyperv_initialized(void);
>> --
>> 2.34.1
> 
> I've read through the other comments on this patch. I definitely vote
> for outputting both the hex code along with a string translation, which
> could be empty if the hex code is unrecognized by the translation code.
> 
> I can see providing something like hv_hvcall_err() as Nuno proposed, since
> that standardizes the text output. But I wonder if it would be too limiting.
> For example, in the changes above, both hv_call_add_logical_proc() and
> hv_call_create_vp() output additional debugging values, which we probably
> don't want to give up.
> 

Good point - that is easy though, I'll add a __VA_ARGS__ to the macro so any
custom message can be printed alongside the other info.

> Lastly, from an implementation standpoint, rather than using a big
> switch statement, build a static array of entries that each have the
> hex code and string equivalent. Then hv_result_to_string() loops through
> the array looking for a match. This won't be any slower than the big switch
> statement. I've seen other places in the kernel where string names are
> output, and looking up the strings in a static array is the typical approach.
> You'll have to work through the details and see if avoids being too clumsy,
> but I think it will be OK.

I'll try it out, agree the perf difference probably won't be significant or
even matter much.

Thanks
Nuno

> 
> Michael
diff mbox series

Patch

diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 9804adb4cc56..ce20818688fe 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -740,3 +740,68 @@  void hv_identify_partition_type(void)
 			pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n");
 	}
 }
+
+const char *hv_result_to_string(u64 hv_status)
+{
+	switch (hv_result(hv_status)) {
+	case HV_STATUS_SUCCESS:
+		return "HV_STATUS_SUCCESS";
+	case HV_STATUS_INVALID_HYPERCALL_CODE:
+		return "HV_STATUS_INVALID_HYPERCALL_CODE";
+	case HV_STATUS_INVALID_HYPERCALL_INPUT:
+		return "HV_STATUS_INVALID_HYPERCALL_INPUT";
+	case HV_STATUS_INVALID_ALIGNMENT:
+		return "HV_STATUS_INVALID_ALIGNMENT";
+	case HV_STATUS_INVALID_PARAMETER:
+		return "HV_STATUS_INVALID_PARAMETER";
+	case HV_STATUS_ACCESS_DENIED:
+		return "HV_STATUS_ACCESS_DENIED";
+	case HV_STATUS_INVALID_PARTITION_STATE:
+		return "HV_STATUS_INVALID_PARTITION_STATE";
+	case HV_STATUS_OPERATION_DENIED:
+		return "HV_STATUS_OPERATION_DENIED";
+	case HV_STATUS_UNKNOWN_PROPERTY:
+		return "HV_STATUS_UNKNOWN_PROPERTY";
+	case HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE:
+		return "HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE";
+	case HV_STATUS_INSUFFICIENT_MEMORY:
+		return "HV_STATUS_INSUFFICIENT_MEMORY";
+	case HV_STATUS_INVALID_PARTITION_ID:
+		return "HV_STATUS_INVALID_PARTITION_ID";
+	case HV_STATUS_INVALID_VP_INDEX:
+		return "HV_STATUS_INVALID_VP_INDEX";
+	case HV_STATUS_NOT_FOUND:
+		return "HV_STATUS_NOT_FOUND";
+	case HV_STATUS_INVALID_PORT_ID:
+		return "HV_STATUS_INVALID_PORT_ID";
+	case HV_STATUS_INVALID_CONNECTION_ID:
+		return "HV_STATUS_INVALID_CONNECTION_ID";
+	case HV_STATUS_INSUFFICIENT_BUFFERS:
+		return "HV_STATUS_INSUFFICIENT_BUFFERS";
+	case HV_STATUS_NOT_ACKNOWLEDGED:
+		return "HV_STATUS_NOT_ACKNOWLEDGED";
+	case HV_STATUS_INVALID_VP_STATE:
+		return "HV_STATUS_INVALID_VP_STATE";
+	case HV_STATUS_NO_RESOURCES:
+		return "HV_STATUS_NO_RESOURCES";
+	case HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED:
+		return "HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED";
+	case HV_STATUS_INVALID_LP_INDEX:
+		return "HV_STATUS_INVALID_LP_INDEX";
+	case HV_STATUS_INVALID_REGISTER_VALUE:
+		return "HV_STATUS_INVALID_REGISTER_VALUE";
+	case HV_STATUS_OPERATION_FAILED:
+		return "HV_STATUS_OPERATION_FAILED";
+	case HV_STATUS_TIME_OUT:
+		return "HV_STATUS_TIME_OUT";
+	case HV_STATUS_CALL_PENDING:
+		return "HV_STATUS_CALL_PENDING";
+	case HV_STATUS_VTL_ALREADY_ENABLED:
+		return "HV_STATUS_VTL_ALREADY_ENABLED";
+	default:
+		return "Unknown";
+	};
+	return "Unknown";
+}
+EXPORT_SYMBOL_GPL(hv_result_to_string);
+
diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
index 2fae18e4f7d2..8fc30f509fa7 100644
--- a/drivers/hv/hv_proc.c
+++ b/drivers/hv/hv_proc.c
@@ -87,7 +87,8 @@  int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
 				     page_count, 0, input_page, NULL);
 	local_irq_restore(flags);
 	if (!hv_result_success(status)) {
-		pr_err("Failed to deposit pages: %lld\n", status);
+		pr_err("%s: Failed to deposit pages: %s\n", __func__,
+		       hv_result_to_string(status));
 		ret = hv_result_to_errno(status);
 		goto err_free_allocations;
 	}
@@ -137,8 +138,9 @@  int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
 
 		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
 			if (!hv_result_success(status)) {
-				pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
-				       lp_index, apic_id, status);
+				pr_err("%s: cpu %u apic ID %u, %s\n",
+				       __func__, lp_index, apic_id,
+				       hv_result_to_string(status));
 				ret = hv_result_to_errno(status);
 			}
 			break;
@@ -179,8 +181,9 @@  int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
 
 		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
 			if (!hv_result_success(status)) {
-				pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
-				       vp_index, flags, status);
+				pr_err("%s: vcpu %u, lp %u, %s\n",
+				       __func__, vp_index, flags,
+				       hv_result_to_string(status));
 				ret = hv_result_to_errno(status);
 			}
 			break;
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index b13b0cda4ac8..dc4729dba9ef 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -298,6 +298,7 @@  static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
 	return __cpumask_to_vpset(vpset, cpus, func);
 }
 
+const char *hv_result_to_string(u64 hv_status);
 int hv_result_to_errno(u64 status);
 void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
 bool hv_is_hyperv_initialized(void);