@@ -63,6 +63,7 @@
#define IRQ_M_TIMER 7
#define IRQ_S_EXT 9
#define IRQ_M_EXT 11
+#define IRQ_PMU_OVF 13
/* Exception causes */
#define EXC_INST_MISALIGNED 0
@@ -151,6 +152,8 @@
#define CSR_HPMCOUNTER30H 0xc9e
#define CSR_HPMCOUNTER31H 0xc9f
+#define CSR_SSCOUNTOVF 0xda0
+
#define CSR_SSTATUS 0x100
#define CSR_SIE 0x104
#define CSR_STVEC 0x105
@@ -212,7 +215,10 @@
# define RV_IRQ_SOFT IRQ_S_SOFT
# define RV_IRQ_TIMER IRQ_S_TIMER
# define RV_IRQ_EXT IRQ_S_EXT
-#endif /* CONFIG_RISCV_M_MODE */
+# define RV_IRQ_PMU IRQ_PMU_OVF
+# define SIP_LCOFIP (_AC(0x1, UL) << IRQ_PMU_OVF)
+
+#endif /* !CONFIG_RISCV_M_MODE */
/* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */
#define IE_SIE (_AC(0x1, UL) << RV_IRQ_SOFT)
@@ -50,6 +50,7 @@ extern unsigned long elf_hwcap;
* available logical extension id.
*/
enum riscv_isa_ext_id {
+ RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE,
RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
};
@@ -71,6 +71,7 @@ EXPORT_SYMBOL_GPL(__riscv_isa_extension_available);
}
static struct riscv_isa_ext_data isa_ext_arr[] = {
+ __RISCV_ISA_EXT_DATA(sscofpmf, sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
__RISCV_ISA_EXT_DATA("", "", RISCV_ISA_EXT_MAX),
};
@@ -11,8 +11,13 @@
#include <linux/mod_devicetable.h>
#include <linux/perf/riscv_pmu.h>
#include <linux/platform_device.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_irq.h>
+#include <linux/of.h>
#include <asm/sbi.h>
+#include <asm/hwcap.h>
union sbi_pmu_ctr_info {
unsigned long value;
@@ -33,6 +38,7 @@ union sbi_pmu_ctr_info {
* per_cpu in case of harts with different pmu counters
*/
static union sbi_pmu_ctr_info *pmu_ctr_list;
+static unsigned int riscv_pmu_irq;
struct pmu_event_data {
union {
@@ -450,33 +456,223 @@ static int pmu_sbi_get_ctrinfo(int nctr)
return 0;
}
+static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
+{
+ int idx = 0;
+ struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+ unsigned long cmask = 0;
+ union sbi_pmu_ctr_info *info;
+
+ /* We should only stop the used hardware counters */
+ for_each_set_bit(idx, cpu_hw_evt->used_event_ctrs, RISCV_MAX_COUNTERS) {
+ info = &pmu_ctr_list[idx];
+ if (info->type != SBI_PMU_CTR_TYPE_FW)
+ cmask |= (1 << idx);
+ }
+ /* No need to check the error here as we can't do anything about the error */
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0, cmask, 0, 0, 0, 0);
+}
+
+/**
+ * This function starts all the used counters in two step approach.
+ * Any counter that did not overflow can be start in a single step
+ * while the overflowed counters need to be started with updated initialization
+ * value.
+ */
+static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
+ unsigned long ctr_ovf_mask)
+{
+ int idx = 0;
+ struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+ struct perf_event *event;
+ unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
+ unsigned long ctr_start_mask = 0;
+ uint64_t max_period;
+ struct hw_perf_event *hwc;
+ u64 init_val = 0;
+
+ ctr_start_mask = cpu_hw_evt->used_event_ctrs[0] & ~ctr_ovf_mask;
+
+ /* Start all the counters that did not overflow in a single shot */
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask,
+ 0, 0, 0, 0);
+
+ /* Reinitialize and start all the counter that overflowed */
+ while (ctr_ovf_mask) {
+ if (ctr_ovf_mask & 0x01) {
+ event = cpu_hw_evt->events[idx];
+ hwc = &event->hw;
+ max_period = riscv_pmu_ctr_get_width_mask(event);
+ init_val = local64_read(&hwc->prev_count) & max_period;
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1,
+ flag, init_val, 0, 0);
+ }
+ ctr_ovf_mask = ctr_ovf_mask >> 1;
+ idx++;
+ }
+}
+
+static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
+{
+ struct perf_sample_data data;
+ struct pt_regs *regs;
+ struct hw_perf_event *hw_evt;
+ union sbi_pmu_ctr_info *info;
+ int lidx, hidx, fidx;
+ struct riscv_pmu *pmu;
+ struct perf_event *event;
+ struct cpu_hw_events *cpu_hw_evt = dev;
+ unsigned long overflow;
+ unsigned long overflowed_ctrs = 0;
+
+ fidx = find_first_bit(cpu_hw_evt->used_event_ctrs, RISCV_MAX_COUNTERS);
+ event = cpu_hw_evt->events[fidx];
+ if (!event) {
+ csr_clear(CSR_SIP, SIP_LCOFIP);
+ return IRQ_NONE;
+ }
+
+ pmu = to_riscv_pmu(event->pmu);
+ pmu_sbi_stop_all(pmu);
+
+ /* Overflow status register should only be read after counter are stopped */
+ overflow = csr_read(CSR_SSCOUNTOVF);
+
+ /**
+ * Overflow interrupt pending bit should only be cleared after stopping
+ * all the counters to avoid any race condition.
+ */
+ csr_clear(CSR_SIP, SIP_LCOFIP);
+
+ /* No overflow bit is set */
+ if (!overflow)
+ return IRQ_NONE;
+
+ regs = get_irq_regs();
+
+ for_each_set_bit(lidx, cpu_hw_evt->used_event_ctrs, RISCV_MAX_COUNTERS) {
+ struct perf_event *event = cpu_hw_evt->events[lidx];
+
+ /* Skip if invalid event or user did not request a sampling */
+ if (!event || !is_sampling_event(event))
+ continue;
+
+ info = &pmu_ctr_list[lidx];
+ /* Firmware counter don't support overflow yet */
+ if (!info || info->type == SBI_PMU_CTR_TYPE_FW)
+ continue;
+
+ /* compute hardware counter index */
+ hidx = info->csr - CSR_CYCLE;
+ /* check if the corresponding bit is set in sscountovf */
+ if (!(overflow & (1 << hidx)))
+ continue;
+
+ /*
+ * Keep a track of overflowed counters so that they can be started
+ * with updated initial value.
+ */
+ overflowed_ctrs |= 1 << lidx;
+ hw_evt = &event->hw;
+ riscv_pmu_event_update(event);
+ perf_sample_data_init(&data, 0, hw_evt->last_period);
+ if (riscv_pmu_event_set_period(event)) {
+ /*
+ * Unlike other ISAs, RISC-V don't have to disable interrupts
+ * to avoid throttling here. As per the specification, the
+ * interrupt remains disabled until the OF bit is set.
+ * Interrupts are enabled again only during the start.
+ * TODO: We will need to stop the guest counters once
+ * virtualization support is added.
+ */
+ perf_event_overflow(event, &data, regs);
+ }
+ }
+ pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs);
+
+ return IRQ_HANDLED;
+}
+
static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
{
struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node);
+ struct cpu_hw_events __percpu *hw_events = pmu->hw_events;
/* Enable the access for TIME csr only from the user mode now */
csr_write(CSR_SCOUNTEREN, 0x2);
/* Stop all the counters so that they can be enabled from perf */
- sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
- 0, GENMASK_ULL(pmu->num_counters - 1, 0), 0, 0, 0, 0);
+ pmu_sbi_stop_all(pmu);
+
+ if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
+ hw_events->irq = riscv_pmu_irq;
+ csr_clear(CSR_IP, BIT(RV_IRQ_PMU));
+ csr_set(CSR_IE, BIT(RV_IRQ_PMU));
+ enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE);
+ }
return 0;
}
static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node)
{
+ if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
+ disable_percpu_irq(riscv_pmu_irq);
+ csr_clear(CSR_IE, BIT(RV_IRQ_PMU));
+ }
+
/* Disable all counters access for user mode now */
csr_write(CSR_SCOUNTEREN, 0x0);
return 0;
}
+static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pdev)
+{
+ int ret;
+ struct cpu_hw_events __percpu *hw_events = pmu->hw_events;
+ struct device_node *cpu, *child;
+ struct irq_domain *domain = NULL;
+
+ if (!riscv_isa_extension_available(NULL, SSCOFPMF))
+ return -EOPNOTSUPP;
+
+ for_each_of_cpu_node(cpu) {
+ child = of_get_compatible_child(cpu, "riscv,cpu-intc");
+ if (!child) {
+ pr_err("Failed to find INTC node\n");
+ return -ENODEV;
+ }
+ domain = irq_find_host(child);
+ of_node_put(child);
+ if (domain)
+ break;
+ }
+ if (!domain) {
+ pr_err("Failed to find INTC IRQ root domain\n");
+ return -ENODEV;
+ }
+
+ riscv_pmu_irq = irq_create_mapping(domain, RV_IRQ_PMU);
+ if (!riscv_pmu_irq) {
+ pr_err("Failed to map PMU interrupt for node\n");
+ return -ENODEV;
+ }
+
+ ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_handler, "riscv-pmu", hw_events);
+ if (ret) {
+ pr_err("registering percpu irq failed [%d]\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
static int pmu_sbi_device_probe(struct platform_device *pdev)
{
struct riscv_pmu *pmu = NULL;
int num_counters;
- int ret;
+ int ret = -ENODEV;
pr_info("SBI PMU extension is available\n");
/* Notify legacy implementation that SBI pmu is available*/
@@ -488,13 +684,19 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
num_counters = pmu_sbi_find_num_ctrs();
if (num_counters < 0) {
pr_err("SBI PMU extension doesn't provide any counters\n");
- return -ENODEV;
+ goto out_free;
}
/* cache all the information about counters now */
if (pmu_sbi_get_ctrinfo(num_counters))
- return -ENODEV;
+ goto out_free;
+ ret = pmu_sbi_setup_irqs(pmu, pdev);
+ if (ret < 0) {
+ pr_info("Perf sampling/filtering is not supported as sscof extension is not available\n");
+ pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+ pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
+ }
pmu->num_counters = num_counters;
pmu->ctr_start = pmu_sbi_ctr_start;
pmu->ctr_stop = pmu_sbi_ctr_stop;
@@ -515,6 +717,10 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
}
return 0;
+
+out_free:
+ kfree(pmu);
+ return ret;
}
static struct platform_driver pmu_sbi_driver = {
@@ -544,8 +750,6 @@ static int __init pmu_sbi_devinit(void)
}
ret = platform_driver_register(&pmu_sbi_driver);
- if (ret)
- return ret;
pdev = platform_device_register_simple(RISCV_PMU_PDEV_NAME, -1, NULL, 0);
if (IS_ERR(pdev)) {
@@ -29,6 +29,8 @@
struct cpu_hw_events {
/* currently enabled events */
int n_events;
+ /* Counter overflow interrupt */
+ int irq;
/* currently enabled events */
struct perf_event *events[RISCV_MAX_COUNTERS];
/* currently enabled counters */