@@ -11,6 +11,7 @@
#include <signal.h>
#include <dirent.h>
#include <stdbool.h>
+#include <ctype.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
@@ -43,6 +44,8 @@
#define DEFAULT_SPAN (250 * MB)
+#define MAX_SNC 4
+
/*
* user_params: User supplied parameters
* @cpu: CPU number to which the benchmark will be bound to
@@ -120,6 +123,7 @@ extern volatile int *value_sink;
extern char llc_occup_path[1024];
+int snc_nodes_per_l3_cache(void);
int get_vendor(void);
bool check_resctrlfs_support(void);
int filter_dmesg(void);
@@ -156,6 +156,65 @@ int get_domain_id(const char *resource, int cpu_no, int *domain_id)
return 0;
}
+/*
+ * Count number of CPUs in a /sys bit map
+ */
+static unsigned int count_sys_bitmap_bits(char *name)
+{
+ FILE *fp = fopen(name, "r");
+ int count = 0, c;
+
+ if (!fp)
+ return 0;
+
+ while ((c = fgetc(fp)) != EOF) {
+ if (!isxdigit(c))
+ continue;
+ switch (c) {
+ case 'f':
+ count++;
+ case '7': case 'b': case 'd': case 'e':
+ count++;
+ case '3': case '5': case '6': case '9': case 'a': case 'c':
+ count++;
+ case '1': case '2': case '4': case '8':
+ count++;
+ }
+ }
+ fclose(fp);
+
+ return count;
+}
+
+/*
+ * Detect SNC by comparing #CPUs in node0 with #CPUs sharing LLC with CPU0.
+ * If some CPUs are offline the numbers may not be exact multiples of each
+ * other. Any offline CPUs on node0 will be also gone from shared_cpu_map of
+ * CPU0 but offline CPUs from other nodes will only make the cache_cpus value
+ * lower. Still try to get the ratio right by preventing the second possibility.
+ */
+int snc_nodes_per_l3_cache(void)
+{
+ int node_cpus, cache_cpus, i, ret = 1;
+
+ node_cpus = count_sys_bitmap_bits("/sys/devices/system/node/node0/cpumap");
+ cache_cpus = count_sys_bitmap_bits("/sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_map");
+
+ if (!node_cpus || !cache_cpus) {
+ ksft_print_msg("Could not determine Sub-NUMA Cluster mode!\n");
+ return 1;
+ }
+
+ for (i = 1; i <= MAX_SNC ; i++) {
+ if (i * node_cpus >= cache_cpus) {
+ ret = i;
+ break;
+ }
+ }
+
+ return ret;
+}
+
/*
* get_cache_size - Get cache size for a specified CPU
* @cpu_no: CPU number
@@ -211,6 +270,17 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size
break;
}
+ /*
+ * The amount of cache represented by each bit in the masks
+ * in the schemata file is reduced by a factor equal to SNC
+ * nodes per L3 cache.
+ * E.g. on a SNC-2 system with a 100MB L3 cache a test that
+ * allocates memory from its local SNC node (default behavior
+ * without using libnuma) will only see 50 MB llc_occupancy
+ * with a fully populated L3 mask in the schemata file.
+ */
+ if (cache_num == 3)
+ *cache_size /= snc_nodes_per_l3_cache();
return 0;
}