Message ID | e5f2fbe4f492d37569c389aebcb91168f98783ba.1720774981.git.maciej.wieczor-retman@intel.com |
---|---|
State | New |
Headers | show |
Series | [v4,1/2] selftests/resctrl: Adjust effective L3 cache size with SNC enabled | expand |
Hi Maciej, On 7/12/24 2:04 AM, Maciej Wieczor-Retman wrote: > Sub-NUMA Cluster divides CPUs sharing an L3 cache into separate NUMA > nodes. Systems may support splitting into either two, three or four > nodes. > > When SNC mode is enabled the effective amount of L3 cache available > for allocation is divided by the number of nodes per L3. > > Detect which SNC mode is active by comparing the number of CPUs > that share a cache with CPU0, with the number of CPUs on node0. > > Signed-off-by: Tony Luck <tony.luck@intel.com> > Co-developed-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com> > Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com> Since you are "From:" author there is no need for a "Co-developed-by" for you. Tony does need one. Please check: "When to use Acked-by:, Cc:, and Co-developed-by:" in Documentation/process/submitting-patches.rst (checkpatch.pl also warns about this). > --- > Changelog v4: > - Make returned value a static local variable so the function only runs > the logic once. (Reinette) > > Changelog v3: > - Add comparison between present and online cpus to test if the > calculated SNC mode is credible. (Reinette) > - Added comment to cache size modification to better explain why it is > needed there. (Reinette) > - Fix facts in patch message. (Reinette) > - Change snc_ways() to snc_nodes_per_l3_cache(). (Reinette) > > tools/testing/selftests/resctrl/resctrl.h | 4 ++ > tools/testing/selftests/resctrl/resctrlfs.c | 73 +++++++++++++++++++++ > 2 files changed, 77 insertions(+) > > diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h > index 2dda56084588..851b37c9c38a 100644 > --- a/tools/testing/selftests/resctrl/resctrl.h > +++ b/tools/testing/selftests/resctrl/resctrl.h > @@ -11,6 +11,7 @@ > #include <signal.h> > #include <dirent.h> > #include <stdbool.h> > +#include <ctype.h> > #include <sys/stat.h> > #include <sys/ioctl.h> > #include <sys/mount.h> > @@ -43,6 +44,8 @@ > > #define DEFAULT_SPAN (250 * MB) > > +#define MAX_SNC 4 > + > /* > * user_params: User supplied parameters > * @cpu: CPU number to which the benchmark will be bound to > @@ -120,6 +123,7 @@ extern volatile int *value_sink; > > extern char llc_occup_path[1024]; > > +int snc_nodes_per_l3_cache(void); > int get_vendor(void); > bool check_resctrlfs_support(void); > int filter_dmesg(void); > diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c > index 250c320349a7..803dd415984c 100644 > --- a/tools/testing/selftests/resctrl/resctrlfs.c > +++ b/tools/testing/selftests/resctrl/resctrlfs.c > @@ -156,6 +156,68 @@ int get_domain_id(const char *resource, int cpu_no, int *domain_id) > return 0; > } > > +/* > + * Count number of CPUs in a /sys bit map "bit map" -> "bitmap" > + */ > +static unsigned int count_sys_bitmap_bits(char *name) > +{ > + FILE *fp = fopen(name, "r"); > + int count = 0, c; > + > + if (!fp) > + return 0; > + > + while ((c = fgetc(fp)) != EOF) { > + if (!isxdigit(c)) > + continue; > + switch (c) { > + case 'f': > + count++; > + case '7': case 'b': case 'd': case 'e': > + count++; > + case '3': case '5': case '6': case '9': case 'a': case 'c': > + count++; > + case '1': case '2': case '4': case '8': > + count++; > + } > + } > + fclose(fp); > + > + return count; > +} > + > +/* > + * Detect SNC by comparing #CPUs in node0 with #CPUs sharing LLC with CPU0. > + * If some CPUs are offline the numbers may not be exact multiples of each > + * other. Any offline CPUs on node0 will be also gone from shared_cpu_map of > + * CPU0 but offline CPUs from other nodes will only make the cache_cpus value > + * lower. Still try to get the ratio right by preventing the second possibility. This all seems unnecessary since the next patch sets snc_mode to 1 if there are any offline CPUs. Seems more appropriate to move the offline CPU handling to this patch. > + */ > +int snc_nodes_per_l3_cache(void) > +{ > + int node_cpus, cache_cpus, i; > + static int snc_mode; > + > + if (!snc_mode) { > + node_cpus = count_sys_bitmap_bits("/sys/devices/system/node/node0/cpumap"); > + cache_cpus = count_sys_bitmap_bits("/sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_map"); > + > + if (!node_cpus || !cache_cpus) { > + ksft_print_msg("Could not determine Sub-NUMA Cluster mode.\n"); > + return 1; > + } > + > + for (i = 1; i <= MAX_SNC ; i++) { (nit: unnecessary space) > + if (i * node_cpus >= cache_cpus) { > + snc_mode = i; > + break; > + } This is irrelevant after the subsequent patch but note that there are scenarios where above loop cannot set snc_mode and the function will thus return 0 since snc_mode is not initialized. This is a problem in division done by following hunk. > + } > + } > + > + return snc_mode; > +} > + > /* > * get_cache_size - Get cache size for a specified CPU > * @cpu_no: CPU number > @@ -211,6 +273,17 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size > break; > } > > + /* > + * The amount of cache represented by each bit in the masks > + * in the schemata file is reduced by a factor equal to SNC > + * nodes per L3 cache. > + * E.g. on a SNC-2 system with a 100MB L3 cache a test that > + * allocates memory from its local SNC node (default behavior > + * without using libnuma) will only see 50 MB llc_occupancy > + * with a fully populated L3 mask in the schemata file. > + */ > + if (cache_num == 3) > + *cache_size /= snc_nodes_per_l3_cache(); > return 0; > } > Reinette
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index 2dda56084588..851b37c9c38a 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -11,6 +11,7 @@ #include <signal.h> #include <dirent.h> #include <stdbool.h> +#include <ctype.h> #include <sys/stat.h> #include <sys/ioctl.h> #include <sys/mount.h> @@ -43,6 +44,8 @@ #define DEFAULT_SPAN (250 * MB) +#define MAX_SNC 4 + /* * user_params: User supplied parameters * @cpu: CPU number to which the benchmark will be bound to @@ -120,6 +123,7 @@ extern volatile int *value_sink; extern char llc_occup_path[1024]; +int snc_nodes_per_l3_cache(void); int get_vendor(void); bool check_resctrlfs_support(void); int filter_dmesg(void); diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index 250c320349a7..803dd415984c 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -156,6 +156,68 @@ int get_domain_id(const char *resource, int cpu_no, int *domain_id) return 0; } +/* + * Count number of CPUs in a /sys bit map + */ +static unsigned int count_sys_bitmap_bits(char *name) +{ + FILE *fp = fopen(name, "r"); + int count = 0, c; + + if (!fp) + return 0; + + while ((c = fgetc(fp)) != EOF) { + if (!isxdigit(c)) + continue; + switch (c) { + case 'f': + count++; + case '7': case 'b': case 'd': case 'e': + count++; + case '3': case '5': case '6': case '9': case 'a': case 'c': + count++; + case '1': case '2': case '4': case '8': + count++; + } + } + fclose(fp); + + return count; +} + +/* + * Detect SNC by comparing #CPUs in node0 with #CPUs sharing LLC with CPU0. + * If some CPUs are offline the numbers may not be exact multiples of each + * other. Any offline CPUs on node0 will be also gone from shared_cpu_map of + * CPU0 but offline CPUs from other nodes will only make the cache_cpus value + * lower. Still try to get the ratio right by preventing the second possibility. + */ +int snc_nodes_per_l3_cache(void) +{ + int node_cpus, cache_cpus, i; + static int snc_mode; + + if (!snc_mode) { + node_cpus = count_sys_bitmap_bits("/sys/devices/system/node/node0/cpumap"); + cache_cpus = count_sys_bitmap_bits("/sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_map"); + + if (!node_cpus || !cache_cpus) { + ksft_print_msg("Could not determine Sub-NUMA Cluster mode.\n"); + return 1; + } + + for (i = 1; i <= MAX_SNC ; i++) { + if (i * node_cpus >= cache_cpus) { + snc_mode = i; + break; + } + } + } + + return snc_mode; +} + /* * get_cache_size - Get cache size for a specified CPU * @cpu_no: CPU number @@ -211,6 +273,17 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size break; } + /* + * The amount of cache represented by each bit in the masks + * in the schemata file is reduced by a factor equal to SNC + * nodes per L3 cache. + * E.g. on a SNC-2 system with a 100MB L3 cache a test that + * allocates memory from its local SNC node (default behavior + * without using libnuma) will only see 50 MB llc_occupancy + * with a fully populated L3 mask in the schemata file. + */ + if (cache_num == 3) + *cache_size /= snc_nodes_per_l3_cache(); return 0; }