diff mbox series

libbpf: add support for printing BTF character arrays as strings

Message ID 20250531072031.2263491-1-blakejones@google.com
State New
Headers show
Series libbpf: add support for printing BTF character arrays as strings | expand

Commit Message

Blake Jones May 31, 2025, 7:20 a.m. UTC
The BTF dumper code currently displays arrays of characters as just that -
arrays, with each character formatted individually. Sometimes this is what
makes sense, but it's nice to be able to treat that array as a string.

This change adds a special case to the btf_dump functionality to allow
arrays of single-byte integer values to be printed as character strings.
Characters for which isprint() returns false are printed as hex-escaped
values. This is enabled when the new ".print_strings" is set to 1 in the
btf_dump_type_data_opts structure.

As an example, here's what it looks like to dump the string "hello" using
a few different field values for btf_dump_type_data_opts (.compact = 1):

- .print_strings = 0, .skip_names = 0:  (char[6])['h','e','l','l','o',]
- .print_strings = 0, .skip_names = 1:  ['h','e','l','l','o',]
- .print_strings = 1, .skip_names = 0:  (char[6])"hello"
- .print_strings = 1, .skip_names = 1:  "hello"

Here's the string "h\xff", dumped with .compact = 1 and .skip_names = 1:

- .print_strings = 0:  ['h',-1,]
- .print_strings = 1:  "h\xff"

Signed-off-by: Blake Jones <blakejones@google.com>
---
 tools/lib/bpf/btf.h                           |   3 +-
 tools/lib/bpf/btf_dump.c                      |  51 ++++++++-
 .../selftests/bpf/prog_tests/btf_dump.c       | 102 ++++++++++++++++++
 3 files changed, 154 insertions(+), 2 deletions(-)

Comments

Alexei Starovoitov May 31, 2025, 6:20 p.m. UTC | #1
On Sat, May 31, 2025 at 12:20 AM Blake Jones <blakejones@google.com> wrote:
>
> The BTF dumper code currently displays arrays of characters as just that -
> arrays, with each character formatted individually. Sometimes this is what
> makes sense, but it's nice to be able to treat that array as a string.
>
> This change adds a special case to the btf_dump functionality to allow
> arrays of single-byte integer values to be printed as character strings.
> Characters for which isprint() returns false are printed as hex-escaped
> values. This is enabled when the new ".print_strings" is set to 1 in the
> btf_dump_type_data_opts structure.
>
> As an example, here's what it looks like to dump the string "hello" using
> a few different field values for btf_dump_type_data_opts (.compact = 1):
>
> - .print_strings = 0, .skip_names = 0:  (char[6])['h','e','l','l','o',]
> - .print_strings = 0, .skip_names = 1:  ['h','e','l','l','o',]
> - .print_strings = 1, .skip_names = 0:  (char[6])"hello"
> - .print_strings = 1, .skip_names = 1:  "hello"
>
> Here's the string "h\xff", dumped with .compact = 1 and .skip_names = 1:
>
> - .print_strings = 0:  ['h',-1,]
> - .print_strings = 1:  "h\xff"
>
> Signed-off-by: Blake Jones <blakejones@google.com>
> ---
>  tools/lib/bpf/btf.h                           |   3 +-
>  tools/lib/bpf/btf_dump.c                      |  51 ++++++++-
>  .../selftests/bpf/prog_tests/btf_dump.c       | 102 ++++++++++++++++++
>  3 files changed, 154 insertions(+), 2 deletions(-)

Please split selftests vs main libbpf parts.

> diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> index 4392451d634b..be8e8e26d245 100644
> --- a/tools/lib/bpf/btf.h
> +++ b/tools/lib/bpf/btf.h
> @@ -326,9 +326,10 @@ struct btf_dump_type_data_opts {
>         bool compact;           /* no newlines/indentation */
>         bool skip_names;        /* skip member/type names */
>         bool emit_zeroes;       /* show 0-valued fields */
> +       bool print_strings;     /* print char arrays as strings */
>         size_t :0;
>  };
> -#define btf_dump_type_data_opts__last_field emit_zeroes
> +#define btf_dump_type_data_opts__last_field print_strings
>
>  LIBBPF_API int
>  btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
> diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
> index 460c3e57fadb..a07dd5accdd8 100644
> --- a/tools/lib/bpf/btf_dump.c
> +++ b/tools/lib/bpf/btf_dump.c
> @@ -75,6 +75,7 @@ struct btf_dump_data {
>         bool is_array_member;
>         bool is_array_terminated;
>         bool is_array_char;
> +       bool print_strings;

Looks useful, but make sure to add a feature detection
to perf, since it has to work with old and new libbpf.

>  };
>
>  struct btf_dump {
> @@ -2028,6 +2029,50 @@ static int btf_dump_var_data(struct btf_dump *d,
>         return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0);
>  }
>
> +static int btf_dump_string_data(struct btf_dump *d,
> +                               const struct btf_type *t,
> +                               __u32 id,
> +                               const void *data)
> +{
> +       const struct btf_array *array = btf_array(t);
> +       __u32 i;
> +
> +       if (!btf_is_int(skip_mods_and_typedefs(d->btf, array->type, NULL)) ||
> +           btf__resolve_size(d->btf, array->type) != 1 ||
> +           !d->typed_dump->print_strings) {
> +               pr_warn("unexpected %s() call for array type %u\n",
> +                       __func__, array->type);
> +               return -EINVAL;
> +       }
> +
> +       btf_dump_data_pfx(d);
> +       btf_dump_printf(d, "\"");
> +
> +       for (i = 0; i < array->nelems; i++, data++) {
> +               char c;
> +
> +               if (data >= d->typed_dump->data_end)
> +                       return -E2BIG;
> +
> +               c = *(char *)data;
> +               if (c == '\0') {
> +                       /* When printing character arrays as strings, NUL bytes
> +                        * are always treated as string terminators; they are
> +                        * never printed.
> +                        */

Please use normal kernel style comments.
We're gradually getting away from networking style.

> +                       break;
> +               }
> +               if (isprint(c))
> +                       btf_dump_printf(d, "%c", c);
> +               else
> +                       btf_dump_printf(d, "\\x%02x", *(__u8 *)data);
> +       }
> +
> +       btf_dump_printf(d, "\"");
> +
> +       return 0;
> +}
> +
>  static int btf_dump_array_data(struct btf_dump *d,
>                                const struct btf_type *t,
>                                __u32 id,
> @@ -2055,8 +2100,11 @@ static int btf_dump_array_data(struct btf_dump *d,
>                  * char arrays, so if size is 1 and element is
>                  * printable as a char, we'll do that.
>                  */
> -               if (elem_size == 1)
> +               if (elem_size == 1) {
> +                       if (d->typed_dump->print_strings)
> +                               return btf_dump_string_data(d, t, id, data);
>                         d->typed_dump->is_array_char = true;
> +               }
>         }
>
>         /* note that we increment depth before calling btf_dump_print() below;
> @@ -2544,6 +2592,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
>         d->typed_dump->compact = OPTS_GET(opts, compact, false);
>         d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false);
>         d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false);
> +       d->typed_dump->print_strings = OPTS_GET(opts, print_strings, false);
>
>         ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0);
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
> index c0a776feec23..70e51943f148 100644
> --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
> +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
> @@ -879,6 +879,106 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
>                           "static int bpf_cgrp_storage_busy = (int)2", 2);
>  }
>
> +/*
> + * String-like types are generally not named, so they need to be
> + * found this way rather than via btf__find_by_name().
> + */

This is the correct style of comments.

> +static int find_char_array_type(struct btf *btf, int nelems)
> +{
> +       const int nr_types = btf__type_cnt(btf);
> +       const int char_type = btf__find_by_name(btf, "char");
> +
> +       for (int i = 1; i < nr_types; i++) {
> +               const struct btf_type *t;
> +               const struct btf_array *at;
> +
> +               t = btf__type_by_id(btf, i);
> +               if (btf_kind(t) != BTF_KIND_ARRAY)
> +                       continue;
> +
> +               at = btf_array(t);
> +               if (at->nelems == nelems && at->type == char_type)
> +                       return i;
> +       }
> +
> +       return -ENOENT;
> +}
> +
> +static int btf_dump_string_data(struct btf *btf, struct btf_dump *d,
> +                               char *str, struct btf_dump_type_data_opts *opts,
> +                               char *ptr, size_t ptr_sz,
> +                               const char *expected_val)
> +{
> +       char name[64];
> +       size_t type_sz;
> +       int type_id;
> +       int ret = 0;
> +
> +       snprintf(name, sizeof(name), "char[%zu]", ptr_sz);
> +       type_id = find_char_array_type(btf, ptr_sz);
> +       if (!ASSERT_GE(type_id, 0, "find type id"))
> +               return -ENOENT;
> +       type_sz = btf__resolve_size(btf, type_id);
> +       str[0] = '\0';
> +       ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, opts);
> +       if (type_sz <= ptr_sz) {
> +               if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
> +                       return -EINVAL;
> +       } else {
> +               if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG"))
> +                       return -EINVAL;
> +       }
> +       if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match"))
> +               return -EFAULT;
> +       return 0;
> +}
> +
> +static void test_btf_dump_string_data(struct btf *btf, struct btf_dump *d,
> +                                     char *str)
> +{
> +       DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
> +
> +       opts.compact = true;
> +       opts.emit_zeroes = false;
> +       opts.print_strings = true;
> +
> +       opts.skip_names = false;
> +       btf_dump_string_data(btf, d, str, &opts, "foo", 4,
> +               "(char[4])\"foo\"");

we allow up to 100 char per line.
Don't split it that short.

pw-bot: cr
Ian Rogers June 2, 2025, 3:05 p.m. UTC | #2
On Sat, May 31, 2025 at 11:20 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Sat, May 31, 2025 at 12:20 AM Blake Jones <blakejones@google.com> wrote:
> >
> > The BTF dumper code currently displays arrays of characters as just that -
> > arrays, with each character formatted individually. Sometimes this is what
> > makes sense, but it's nice to be able to treat that array as a string.
> >
> > This change adds a special case to the btf_dump functionality to allow
> > arrays of single-byte integer values to be printed as character strings.
> > Characters for which isprint() returns false are printed as hex-escaped
> > values. This is enabled when the new ".print_strings" is set to 1 in the
> > btf_dump_type_data_opts structure.
> >
> > As an example, here's what it looks like to dump the string "hello" using
> > a few different field values for btf_dump_type_data_opts (.compact = 1):
> >
> > - .print_strings = 0, .skip_names = 0:  (char[6])['h','e','l','l','o',]
> > - .print_strings = 0, .skip_names = 1:  ['h','e','l','l','o',]
> > - .print_strings = 1, .skip_names = 0:  (char[6])"hello"
> > - .print_strings = 1, .skip_names = 1:  "hello"
> >
> > Here's the string "h\xff", dumped with .compact = 1 and .skip_names = 1:
> >
> > - .print_strings = 0:  ['h',-1,]
> > - .print_strings = 1:  "h\xff"
> >
> > Signed-off-by: Blake Jones <blakejones@google.com>
> > ---
> >  tools/lib/bpf/btf.h                           |   3 +-
> >  tools/lib/bpf/btf_dump.c                      |  51 ++++++++-
> >  .../selftests/bpf/prog_tests/btf_dump.c       | 102 ++++++++++++++++++
> >  3 files changed, 154 insertions(+), 2 deletions(-)
>
> Please split selftests vs main libbpf parts.
>
> > diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> > index 4392451d634b..be8e8e26d245 100644
> > --- a/tools/lib/bpf/btf.h
> > +++ b/tools/lib/bpf/btf.h
> > @@ -326,9 +326,10 @@ struct btf_dump_type_data_opts {
> >         bool compact;           /* no newlines/indentation */
> >         bool skip_names;        /* skip member/type names */
> >         bool emit_zeroes;       /* show 0-valued fields */
> > +       bool print_strings;     /* print char arrays as strings */
> >         size_t :0;
> >  };
> > -#define btf_dump_type_data_opts__last_field emit_zeroes
> > +#define btf_dump_type_data_opts__last_field print_strings
> >
> >  LIBBPF_API int
> >  btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
> > diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
> > index 460c3e57fadb..a07dd5accdd8 100644
> > --- a/tools/lib/bpf/btf_dump.c
> > +++ b/tools/lib/bpf/btf_dump.c
> > @@ -75,6 +75,7 @@ struct btf_dump_data {
> >         bool is_array_member;
> >         bool is_array_terminated;
> >         bool is_array_char;
> > +       bool print_strings;
>
> Looks useful, but make sure to add a feature detection
> to perf, since it has to work with old and new libbpf.

Just for clarity on this. We'll need a "libbpf-strings" feature like
the existing "libbpf" one:
https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/build/feature/test-libbpf.c?h=perf-tools-next

Currently these features are only used if perf is built with
LIBBPF_DYNAMIC=1 as part of the build arguments (ie its not the
default):
https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/Makefile.config?h=perf-tools-next#n580

If no suitable libbpf is detected then the build will error out. I
guess if feature-libbpf is present but not feature-libbpf-strings then
we'll need a perf #define so that the string feature won't cause
perf's build to fail. We could make it so that perf's build fails if
feature-libbpf and feature-libbpf-strings are missing, but that's
likely too much for people using LIBBPF_DYNAMIC=1 today.

Thanks,
Ian

> >  };
> >
> >  struct btf_dump {
> > @@ -2028,6 +2029,50 @@ static int btf_dump_var_data(struct btf_dump *d,
> >         return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0);
> >  }
> >
> > +static int btf_dump_string_data(struct btf_dump *d,
> > +                               const struct btf_type *t,
> > +                               __u32 id,
> > +                               const void *data)
> > +{
> > +       const struct btf_array *array = btf_array(t);
> > +       __u32 i;
> > +
> > +       if (!btf_is_int(skip_mods_and_typedefs(d->btf, array->type, NULL)) ||
> > +           btf__resolve_size(d->btf, array->type) != 1 ||
> > +           !d->typed_dump->print_strings) {
> > +               pr_warn("unexpected %s() call for array type %u\n",
> > +                       __func__, array->type);
> > +               return -EINVAL;
> > +       }
> > +
> > +       btf_dump_data_pfx(d);
> > +       btf_dump_printf(d, "\"");
> > +
> > +       for (i = 0; i < array->nelems; i++, data++) {
> > +               char c;
> > +
> > +               if (data >= d->typed_dump->data_end)
> > +                       return -E2BIG;
> > +
> > +               c = *(char *)data;
> > +               if (c == '\0') {
> > +                       /* When printing character arrays as strings, NUL bytes
> > +                        * are always treated as string terminators; they are
> > +                        * never printed.
> > +                        */
>
> Please use normal kernel style comments.
> We're gradually getting away from networking style.
>
> > +                       break;
> > +               }
> > +               if (isprint(c))
> > +                       btf_dump_printf(d, "%c", c);
> > +               else
> > +                       btf_dump_printf(d, "\\x%02x", *(__u8 *)data);
> > +       }
> > +
> > +       btf_dump_printf(d, "\"");
> > +
> > +       return 0;
> > +}
> > +
> >  static int btf_dump_array_data(struct btf_dump *d,
> >                                const struct btf_type *t,
> >                                __u32 id,
> > @@ -2055,8 +2100,11 @@ static int btf_dump_array_data(struct btf_dump *d,
> >                  * char arrays, so if size is 1 and element is
> >                  * printable as a char, we'll do that.
> >                  */
> > -               if (elem_size == 1)
> > +               if (elem_size == 1) {
> > +                       if (d->typed_dump->print_strings)
> > +                               return btf_dump_string_data(d, t, id, data);
> >                         d->typed_dump->is_array_char = true;
> > +               }
> >         }
> >
> >         /* note that we increment depth before calling btf_dump_print() below;
> > @@ -2544,6 +2592,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
> >         d->typed_dump->compact = OPTS_GET(opts, compact, false);
> >         d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false);
> >         d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false);
> > +       d->typed_dump->print_strings = OPTS_GET(opts, print_strings, false);
> >
> >         ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0);
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
> > index c0a776feec23..70e51943f148 100644
> > --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
> > +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
> > @@ -879,6 +879,106 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
> >                           "static int bpf_cgrp_storage_busy = (int)2", 2);
> >  }
> >
> > +/*
> > + * String-like types are generally not named, so they need to be
> > + * found this way rather than via btf__find_by_name().
> > + */
>
> This is the correct style of comments.
>
> > +static int find_char_array_type(struct btf *btf, int nelems)
> > +{
> > +       const int nr_types = btf__type_cnt(btf);
> > +       const int char_type = btf__find_by_name(btf, "char");
> > +
> > +       for (int i = 1; i < nr_types; i++) {
> > +               const struct btf_type *t;
> > +               const struct btf_array *at;
> > +
> > +               t = btf__type_by_id(btf, i);
> > +               if (btf_kind(t) != BTF_KIND_ARRAY)
> > +                       continue;
> > +
> > +               at = btf_array(t);
> > +               if (at->nelems == nelems && at->type == char_type)
> > +                       return i;
> > +       }
> > +
> > +       return -ENOENT;
> > +}
> > +
> > +static int btf_dump_string_data(struct btf *btf, struct btf_dump *d,
> > +                               char *str, struct btf_dump_type_data_opts *opts,
> > +                               char *ptr, size_t ptr_sz,
> > +                               const char *expected_val)
> > +{
> > +       char name[64];
> > +       size_t type_sz;
> > +       int type_id;
> > +       int ret = 0;
> > +
> > +       snprintf(name, sizeof(name), "char[%zu]", ptr_sz);
> > +       type_id = find_char_array_type(btf, ptr_sz);
> > +       if (!ASSERT_GE(type_id, 0, "find type id"))
> > +               return -ENOENT;
> > +       type_sz = btf__resolve_size(btf, type_id);
> > +       str[0] = '\0';
> > +       ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, opts);
> > +       if (type_sz <= ptr_sz) {
> > +               if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
> > +                       return -EINVAL;
> > +       } else {
> > +               if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG"))
> > +                       return -EINVAL;
> > +       }
> > +       if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match"))
> > +               return -EFAULT;
> > +       return 0;
> > +}
> > +
> > +static void test_btf_dump_string_data(struct btf *btf, struct btf_dump *d,
> > +                                     char *str)
> > +{
> > +       DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
> > +
> > +       opts.compact = true;
> > +       opts.emit_zeroes = false;
> > +       opts.print_strings = true;
> > +
> > +       opts.skip_names = false;
> > +       btf_dump_string_data(btf, d, str, &opts, "foo", 4,
> > +               "(char[4])\"foo\"");
>
> we allow up to 100 char per line.
> Don't split it that short.
>
> pw-bot: cr
Alexei Starovoitov June 2, 2025, 6:39 p.m. UTC | #3
On Mon, Jun 2, 2025 at 8:05 AM Ian Rogers <irogers@google.com> wrote:
>
> On Sat, May 31, 2025 at 11:20 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Sat, May 31, 2025 at 12:20 AM Blake Jones <blakejones@google.com> wrote:
> > >
> > > The BTF dumper code currently displays arrays of characters as just that -
> > > arrays, with each character formatted individually. Sometimes this is what
> > > makes sense, but it's nice to be able to treat that array as a string.
> > >
> > > This change adds a special case to the btf_dump functionality to allow
> > > arrays of single-byte integer values to be printed as character strings.
> > > Characters for which isprint() returns false are printed as hex-escaped
> > > values. This is enabled when the new ".print_strings" is set to 1 in the
> > > btf_dump_type_data_opts structure.
> > >
> > > As an example, here's what it looks like to dump the string "hello" using
> > > a few different field values for btf_dump_type_data_opts (.compact = 1):
> > >
> > > - .print_strings = 0, .skip_names = 0:  (char[6])['h','e','l','l','o',]
> > > - .print_strings = 0, .skip_names = 1:  ['h','e','l','l','o',]
> > > - .print_strings = 1, .skip_names = 0:  (char[6])"hello"
> > > - .print_strings = 1, .skip_names = 1:  "hello"
> > >
> > > Here's the string "h\xff", dumped with .compact = 1 and .skip_names = 1:
> > >
> > > - .print_strings = 0:  ['h',-1,]
> > > - .print_strings = 1:  "h\xff"
> > >
> > > Signed-off-by: Blake Jones <blakejones@google.com>
> > > ---
> > >  tools/lib/bpf/btf.h                           |   3 +-
> > >  tools/lib/bpf/btf_dump.c                      |  51 ++++++++-
> > >  .../selftests/bpf/prog_tests/btf_dump.c       | 102 ++++++++++++++++++
> > >  3 files changed, 154 insertions(+), 2 deletions(-)
> >
> > Please split selftests vs main libbpf parts.
> >
> > > diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> > > index 4392451d634b..be8e8e26d245 100644
> > > --- a/tools/lib/bpf/btf.h
> > > +++ b/tools/lib/bpf/btf.h
> > > @@ -326,9 +326,10 @@ struct btf_dump_type_data_opts {
> > >         bool compact;           /* no newlines/indentation */
> > >         bool skip_names;        /* skip member/type names */
> > >         bool emit_zeroes;       /* show 0-valued fields */
> > > +       bool print_strings;     /* print char arrays as strings */
> > >         size_t :0;
> > >  };
> > > -#define btf_dump_type_data_opts__last_field emit_zeroes
> > > +#define btf_dump_type_data_opts__last_field print_strings
> > >
> > >  LIBBPF_API int
> > >  btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
> > > diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
> > > index 460c3e57fadb..a07dd5accdd8 100644
> > > --- a/tools/lib/bpf/btf_dump.c
> > > +++ b/tools/lib/bpf/btf_dump.c
> > > @@ -75,6 +75,7 @@ struct btf_dump_data {
> > >         bool is_array_member;
> > >         bool is_array_terminated;
> > >         bool is_array_char;
> > > +       bool print_strings;
> >
> > Looks useful, but make sure to add a feature detection
> > to perf, since it has to work with old and new libbpf.
>
> Just for clarity on this. We'll need a "libbpf-strings" feature like
> the existing "libbpf" one:
> https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/build/feature/test-libbpf.c?h=perf-tools-next
>
> Currently these features are only used if perf is built with
> LIBBPF_DYNAMIC=1 as part of the build arguments (ie its not the
> default):
> https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/Makefile.config?h=perf-tools-next#n580
>
> If no suitable libbpf is detected then the build will error out. I
> guess if feature-libbpf is present but not feature-libbpf-strings then
> we'll need a perf #define so that the string feature won't cause
> perf's build to fail.

Yes. Something like this.
It will also allow libbpf and perf patches to land in parallel.
Blake Jones June 2, 2025, 8:02 p.m. UTC | #4
Hi Alexei,

Thanks for taking a look at this.

On Sat, May 31, 2025 at 11:20 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
> Please split selftests vs main libbpf parts.
> [...]
> Please use normal kernel style comments.
> We're gradually getting away from networking style.
> [...]
> we allow up to 100 char per line.
> Don't split it that short.

I'll clean these up and send out a new patch set.

Blake
Blake Jones June 2, 2025, 8:26 p.m. UTC | #5
On Mon, Jun 2, 2025 at 11:39 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
> > If no suitable libbpf is detected then the build will error out. I
> > guess if feature-libbpf is present but not feature-libbpf-strings then
> > we'll need a perf #define so that the string feature won't cause
> > perf's build to fail.
>
> Yes. Something like this.
> It will also allow libbpf and perf patches to land in parallel.

Ah, so I could test the perf changes using this libbpf, even though
it wouldn't be present in the same source tree until they're merged?
That's great - if I have to do a bit more work to reduce the overall
merge latency I'm happy to do that.

Blake
Andrii Nakryiko June 3, 2025, 12:06 a.m. UTC | #6
On Sat, May 31, 2025 at 12:20 AM Blake Jones <blakejones@google.com> wrote:
>
> The BTF dumper code currently displays arrays of characters as just that -
> arrays, with each character formatted individually. Sometimes this is what
> makes sense, but it's nice to be able to treat that array as a string.
>
> This change adds a special case to the btf_dump functionality to allow
> arrays of single-byte integer values to be printed as character strings.
> Characters for which isprint() returns false are printed as hex-escaped
> values. This is enabled when the new ".print_strings" is set to 1 in the
> btf_dump_type_data_opts structure.
>
> As an example, here's what it looks like to dump the string "hello" using
> a few different field values for btf_dump_type_data_opts (.compact = 1):
>
> - .print_strings = 0, .skip_names = 0:  (char[6])['h','e','l','l','o',]
> - .print_strings = 0, .skip_names = 1:  ['h','e','l','l','o',]
> - .print_strings = 1, .skip_names = 0:  (char[6])"hello"
> - .print_strings = 1, .skip_names = 1:  "hello"
>
> Here's the string "h\xff", dumped with .compact = 1 and .skip_names = 1:
>
> - .print_strings = 0:  ['h',-1,]
> - .print_strings = 1:  "h\xff"
>
> Signed-off-by: Blake Jones <blakejones@google.com>
> ---
>  tools/lib/bpf/btf.h                           |   3 +-
>  tools/lib/bpf/btf_dump.c                      |  51 ++++++++-
>  .../selftests/bpf/prog_tests/btf_dump.c       | 102 ++++++++++++++++++
>  3 files changed, 154 insertions(+), 2 deletions(-)
>
> diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> index 4392451d634b..be8e8e26d245 100644
> --- a/tools/lib/bpf/btf.h
> +++ b/tools/lib/bpf/btf.h
> @@ -326,9 +326,10 @@ struct btf_dump_type_data_opts {
>         bool compact;           /* no newlines/indentation */
>         bool skip_names;        /* skip member/type names */
>         bool emit_zeroes;       /* show 0-valued fields */
> +       bool print_strings;     /* print char arrays as strings */

let's use "emit_strings" naming, so it's consistent with emit_zeroes?

>         size_t :0;
>  };
> -#define btf_dump_type_data_opts__last_field emit_zeroes
> +#define btf_dump_type_data_opts__last_field print_strings
>
>  LIBBPF_API int
>  btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
> diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
> index 460c3e57fadb..a07dd5accdd8 100644
> --- a/tools/lib/bpf/btf_dump.c
> +++ b/tools/lib/bpf/btf_dump.c
> @@ -75,6 +75,7 @@ struct btf_dump_data {
>         bool is_array_member;
>         bool is_array_terminated;
>         bool is_array_char;
> +       bool print_strings;

ditto, emit_strings (and maybe put it next to emit_zeroes then)

>  };
>
>  struct btf_dump {
> @@ -2028,6 +2029,50 @@ static int btf_dump_var_data(struct btf_dump *d,
>         return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0);
>  }
>
> +static int btf_dump_string_data(struct btf_dump *d,
> +                               const struct btf_type *t,
> +                               __u32 id,
> +                               const void *data)
> +{
> +       const struct btf_array *array = btf_array(t);
> +       __u32 i;
> +
> +       if (!btf_is_int(skip_mods_and_typedefs(d->btf, array->type, NULL)) ||
> +           btf__resolve_size(d->btf, array->type) != 1 ||
> +           !d->typed_dump->print_strings) {
> +               pr_warn("unexpected %s() call for array type %u\n",
> +                       __func__, array->type);
> +               return -EINVAL;
> +       }
> +

IMO, a bit too defensive. You literally checked that we have char[] in
the caller, I think it's fine not to double-check that here, let's
drop this

> +       btf_dump_data_pfx(d);
> +       btf_dump_printf(d, "\"");
> +
> +       for (i = 0; i < array->nelems; i++, data++) {
> +               char c;
> +
> +               if (data >= d->typed_dump->data_end)
> +                       return -E2BIG;
> +
> +               c = *(char *)data;
> +               if (c == '\0') {
> +                       /* When printing character arrays as strings, NUL bytes
> +                        * are always treated as string terminators; they are
> +                        * never printed.
> +                        */
> +                       break;

what if there are non-zero characters after the terminating zero?
should we keep going and if there is any non-zero one, still emit
them? or maybe that should be an extra option?... When capturing some
data and dumping, it might be important to know all the contents (it
might be garbage or not, but you'll still see non-garbage values
before \0, so maybe it's fine to always do it?)

> +               }
> +               if (isprint(c))
> +                       btf_dump_printf(d, "%c", c);
> +               else
> +                       btf_dump_printf(d, "\\x%02x", *(__u8 *)data);
> +       }
> +
> +       btf_dump_printf(d, "\"");
> +
> +       return 0;
> +}
> +

[...]

> +/*
> + * String-like types are generally not named, so they need to be
> + * found this way rather than via btf__find_by_name().
> + */
> +static int find_char_array_type(struct btf *btf, int nelems)
> +{
> +       const int nr_types = btf__type_cnt(btf);
> +       const int char_type = btf__find_by_name(btf, "char");
> +
> +       for (int i = 1; i < nr_types; i++) {
> +               const struct btf_type *t;
> +               const struct btf_array *at;
> +
> +               t = btf__type_by_id(btf, i);
> +               if (btf_kind(t) != BTF_KIND_ARRAY)

btf_is_array()

> +                       continue;
> +
> +               at = btf_array(t);
> +               if (at->nelems == nelems && at->type == char_type)
> +                       return i;
> +       }
> +
> +       return -ENOENT;
> +}
> +
> +static int btf_dump_string_data(struct btf *btf, struct btf_dump *d,
> +                               char *str, struct btf_dump_type_data_opts *opts,
> +                               char *ptr, size_t ptr_sz,
> +                               const char *expected_val)
> +{
> +       char name[64];
> +       size_t type_sz;
> +       int type_id;
> +       int ret = 0;
> +
> +       snprintf(name, sizeof(name), "char[%zu]", ptr_sz);
> +       type_id = find_char_array_type(btf, ptr_sz);

instead of trying to find a suitable type in kernel BTF, just generate
a tiny custom BTF with necessary char[N] types? see btf__add_xxx()
usage for an example.

> +       if (!ASSERT_GE(type_id, 0, "find type id"))
> +               return -ENOENT;
> +       type_sz = btf__resolve_size(btf, type_id);
> +       str[0] = '\0';
> +       ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, opts);
> +       if (type_sz <= ptr_sz) {
> +               if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
> +                       return -EINVAL;
> +       } else {
> +               if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG"))
> +                       return -EINVAL;
> +       }
> +       if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match"))
> +               return -EFAULT;
> +       return 0;
> +}
> +

[...]
diff mbox series

Patch

diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 4392451d634b..be8e8e26d245 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -326,9 +326,10 @@  struct btf_dump_type_data_opts {
 	bool compact;		/* no newlines/indentation */
 	bool skip_names;	/* skip member/type names */
 	bool emit_zeroes;	/* show 0-valued fields */
+	bool print_strings;	/* print char arrays as strings */
 	size_t :0;
 };
-#define btf_dump_type_data_opts__last_field emit_zeroes
+#define btf_dump_type_data_opts__last_field print_strings
 
 LIBBPF_API int
 btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 460c3e57fadb..a07dd5accdd8 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -75,6 +75,7 @@  struct btf_dump_data {
 	bool is_array_member;
 	bool is_array_terminated;
 	bool is_array_char;
+	bool print_strings;
 };
 
 struct btf_dump {
@@ -2028,6 +2029,50 @@  static int btf_dump_var_data(struct btf_dump *d,
 	return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0);
 }
 
+static int btf_dump_string_data(struct btf_dump *d,
+				const struct btf_type *t,
+				__u32 id,
+				const void *data)
+{
+	const struct btf_array *array = btf_array(t);
+	__u32 i;
+
+	if (!btf_is_int(skip_mods_and_typedefs(d->btf, array->type, NULL)) ||
+	    btf__resolve_size(d->btf, array->type) != 1 ||
+	    !d->typed_dump->print_strings) {
+		pr_warn("unexpected %s() call for array type %u\n",
+			__func__, array->type);
+		return -EINVAL;
+	}
+
+	btf_dump_data_pfx(d);
+	btf_dump_printf(d, "\"");
+
+	for (i = 0; i < array->nelems; i++, data++) {
+		char c;
+
+		if (data >= d->typed_dump->data_end)
+			return -E2BIG;
+
+		c = *(char *)data;
+		if (c == '\0') {
+			/* When printing character arrays as strings, NUL bytes
+			 * are always treated as string terminators; they are
+			 * never printed.
+			 */
+			break;
+		}
+		if (isprint(c))
+			btf_dump_printf(d, "%c", c);
+		else
+			btf_dump_printf(d, "\\x%02x", *(__u8 *)data);
+	}
+
+	btf_dump_printf(d, "\"");
+
+	return 0;
+}
+
 static int btf_dump_array_data(struct btf_dump *d,
 			       const struct btf_type *t,
 			       __u32 id,
@@ -2055,8 +2100,11 @@  static int btf_dump_array_data(struct btf_dump *d,
 		 * char arrays, so if size is 1 and element is
 		 * printable as a char, we'll do that.
 		 */
-		if (elem_size == 1)
+		if (elem_size == 1) {
+			if (d->typed_dump->print_strings)
+				return btf_dump_string_data(d, t, id, data);
 			d->typed_dump->is_array_char = true;
+		}
 	}
 
 	/* note that we increment depth before calling btf_dump_print() below;
@@ -2544,6 +2592,7 @@  int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
 	d->typed_dump->compact = OPTS_GET(opts, compact, false);
 	d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false);
 	d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false);
+	d->typed_dump->print_strings = OPTS_GET(opts, print_strings, false);
 
 	ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0);
 
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index c0a776feec23..70e51943f148 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -879,6 +879,106 @@  static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
 			  "static int bpf_cgrp_storage_busy = (int)2", 2);
 }
 
+/*
+ * String-like types are generally not named, so they need to be
+ * found this way rather than via btf__find_by_name().
+ */
+static int find_char_array_type(struct btf *btf, int nelems)
+{
+	const int nr_types = btf__type_cnt(btf);
+	const int char_type = btf__find_by_name(btf, "char");
+
+	for (int i = 1; i < nr_types; i++) {
+		const struct btf_type *t;
+		const struct btf_array *at;
+
+		t = btf__type_by_id(btf, i);
+		if (btf_kind(t) != BTF_KIND_ARRAY)
+			continue;
+
+		at = btf_array(t);
+		if (at->nelems == nelems && at->type == char_type)
+			return i;
+	}
+
+	return -ENOENT;
+}
+
+static int btf_dump_string_data(struct btf *btf, struct btf_dump *d,
+				char *str, struct btf_dump_type_data_opts *opts,
+				char *ptr, size_t ptr_sz,
+				const char *expected_val)
+{
+	char name[64];
+	size_t type_sz;
+	int type_id;
+	int ret = 0;
+
+	snprintf(name, sizeof(name), "char[%zu]", ptr_sz);
+	type_id = find_char_array_type(btf, ptr_sz);
+	if (!ASSERT_GE(type_id, 0, "find type id"))
+		return -ENOENT;
+	type_sz = btf__resolve_size(btf, type_id);
+	str[0] = '\0';
+	ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, opts);
+	if (type_sz <= ptr_sz) {
+		if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
+			return -EINVAL;
+	} else {
+		if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG"))
+			return -EINVAL;
+	}
+	if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match"))
+		return -EFAULT;
+	return 0;
+}
+
+static void test_btf_dump_string_data(struct btf *btf, struct btf_dump *d,
+				      char *str)
+{
+	DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+
+	opts.compact = true;
+	opts.emit_zeroes = false;
+	opts.print_strings = true;
+
+	opts.skip_names = false;
+	btf_dump_string_data(btf, d, str, &opts, "foo", 4,
+		"(char[4])\"foo\"");
+
+	opts.skip_names = true;
+	btf_dump_string_data(btf, d, str, &opts, "foo", 4,
+		"\"foo\"");
+
+	/* This should have no effect. */
+	opts.emit_zeroes = false;
+	btf_dump_string_data(btf, d, str, &opts, "foo", 4,
+		"\"foo\"");
+
+	/* This should have no effect. */
+	opts.compact = false;
+	btf_dump_string_data(btf, d, str, &opts, "foo", 4,
+		"\"foo\"");
+
+	/* Non-printable characters come out as hex. */
+	btf_dump_string_data(btf, d, str, &opts, "fo\xff", 4,
+		"\"fo\\xff\"");
+	btf_dump_string_data(btf, d, str, &opts, "fo\x7", 4,
+		"\"fo\\x07\"");
+
+	/* Should get printed properly even though there's no NUL. */
+	char food[4] = { 'f', 'o', 'o', 'd' };
+
+	btf_dump_string_data(btf, d, str, &opts, food, 4,
+		"\"food\"");
+
+	/* The embedded NUL should terminate the string. */
+	char embed[4] = { 'f', 'o', '\0', 'd' };
+
+	btf_dump_string_data(btf, d, str, &opts, embed, 4,
+		"\"fo\"");
+}
+
 static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str,
 			     const char *name, const char *expected_val,
 			     void *data, size_t data_sz)
@@ -970,6 +1070,8 @@  void test_btf_dump() {
 		test_btf_dump_struct_data(btf, d, str);
 	if (test__start_subtest("btf_dump: var_data"))
 		test_btf_dump_var_data(btf, d, str);
+	if (test__start_subtest("btf_dump: string_data"))
+		test_btf_dump_string_data(btf, d, str);
 	btf_dump__free(d);
 	btf__free(btf);