diff mbox series

[v5,3/9] RISC-V: Add a perf core library for pmu drivers

Message ID 20211225054647.1750577-4-atishp@rivosinc.com
State New
Headers show
Series Improve RISC-V Perf support using SBI PMU and sscofpmf extension | expand

Commit Message

Atish Patra Dec. 25, 2021, 5:46 a.m. UTC
From: Atish Patra <atish.patra@wdc.com>

Implement a perf core library that can support all the essential perf
features in future. It can also accommodate any type of PMU implementation
in future. Currently, both SBI based perf driver and legacy driver
implemented uses the library. Most of the common perf functionalities
are kept in this core library wile PMU specific driver can implement PMU
specific features. For example, the SBI specific functionality will be
implemented in the SBI specific driver.

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Signed-off-by: Atish Patra <atishp@rivosinc.com>
---
 drivers/perf/Kconfig           |  10 +
 drivers/perf/Makefile          |   1 +
 drivers/perf/riscv_pmu.c       | 328 +++++++++++++++++++++++++++++++++
 include/linux/cpuhotplug.h     |   1 +
 include/linux/perf/riscv_pmu.h |  65 +++++++
 5 files changed, 405 insertions(+)
 create mode 100644 drivers/perf/riscv_pmu.c
 create mode 100644 include/linux/perf/riscv_pmu.h

Comments

Anup Patel Jan. 18, 2022, 5:01 a.m. UTC | #1
On Sat, Dec 25, 2021 at 11:17 AM Atish Patra <atishp@atishpatra.org> wrote:
>
> From: Atish Patra <atish.patra@wdc.com>
>
> Implement a perf core library that can support all the essential perf
> features in future. It can also accommodate any type of PMU implementation
> in future. Currently, both SBI based perf driver and legacy driver
> implemented uses the library. Most of the common perf functionalities
> are kept in this core library wile PMU specific driver can implement PMU
> specific features. For example, the SBI specific functionality will be
> implemented in the SBI specific driver.
>
> Signed-off-by: Atish Patra <atish.patra@wdc.com>
> Signed-off-by: Atish Patra <atishp@rivosinc.com>
> ---
>  drivers/perf/Kconfig           |  10 +
>  drivers/perf/Makefile          |   1 +
>  drivers/perf/riscv_pmu.c       | 328 +++++++++++++++++++++++++++++++++
>  include/linux/cpuhotplug.h     |   1 +
>  include/linux/perf/riscv_pmu.h |  65 +++++++
>  5 files changed, 405 insertions(+)
>  create mode 100644 drivers/perf/riscv_pmu.c
>  create mode 100644 include/linux/perf/riscv_pmu.h
>
> diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
> index 77522e5efe11..03ca0315df73 100644
> --- a/drivers/perf/Kconfig
> +++ b/drivers/perf/Kconfig
> @@ -56,6 +56,16 @@ config ARM_PMU
>           Say y if you want to use CPU performance monitors on ARM-based
>           systems.
>
> +config RISCV_PMU
> +       depends on RISCV
> +       bool "RISC-V PMU framework"
> +       default y
> +       help
> +         Say y if you want to use CPU performance monitors on RISCV-based
> +         systems. This provides the core PMU framework that abstracts common
> +         PMU functionalities in a core library so that different PMU drivers
> +         can reuse it.
> +
>  config ARM_PMU_ACPI
>         depends on ARM_PMU && ACPI
>         def_bool y
> diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
> index 5260b116c7da..76e5c50e24bb 100644
> --- a/drivers/perf/Makefile
> +++ b/drivers/perf/Makefile
> @@ -10,6 +10,7 @@ obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
>  obj-$(CONFIG_HISI_PMU) += hisilicon/
>  obj-$(CONFIG_QCOM_L2_PMU)      += qcom_l2_pmu.o
>  obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
> +obj-$(CONFIG_RISCV_PMU) += riscv_pmu.o
>  obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
>  obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
>  obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
> diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
> new file mode 100644
> index 000000000000..83c3e8d085c7
> --- /dev/null
> +++ b/drivers/perf/riscv_pmu.c
> @@ -0,0 +1,328 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * RISC-V performance counter support.
> + *
> + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> + *
> + * This implementation is based on old RISC-V perf and ARM perf event code
> + * which are in turn based on sparc64 and x86 code.
> + */
> +
> +#include <linux/cpumask.h>
> +#include <linux/irq.h>
> +#include <linux/irqdesc.h>
> +#include <linux/perf/riscv_pmu.h>
> +#include <linux/printk.h>
> +#include <linux/smp.h>
> +
> +static unsigned long csr_read_num(int csr_num)
> +{
> +#define switchcase_csr_read(__csr_num, __val)          {\
> +       case __csr_num:                                 \
> +               __val = csr_read(__csr_num);            \
> +               break; }
> +#define switchcase_csr_read_2(__csr_num, __val)                {\
> +       switchcase_csr_read(__csr_num + 0, __val)        \
> +       switchcase_csr_read(__csr_num + 1, __val)}
> +#define switchcase_csr_read_4(__csr_num, __val)                {\
> +       switchcase_csr_read_2(__csr_num + 0, __val)      \
> +       switchcase_csr_read_2(__csr_num + 2, __val)}
> +#define switchcase_csr_read_8(__csr_num, __val)                {\
> +       switchcase_csr_read_4(__csr_num + 0, __val)      \
> +       switchcase_csr_read_4(__csr_num + 4, __val)}
> +#define switchcase_csr_read_16(__csr_num, __val)       {\
> +       switchcase_csr_read_8(__csr_num + 0, __val)      \
> +       switchcase_csr_read_8(__csr_num + 8, __val)}
> +#define switchcase_csr_read_32(__csr_num, __val)       {\
> +       switchcase_csr_read_16(__csr_num + 0, __val)     \
> +       switchcase_csr_read_16(__csr_num + 16, __val)}
> +
> +       unsigned long ret = 0;
> +
> +       switch (csr_num) {
> +       switchcase_csr_read_32(CSR_CYCLE, ret)
> +       switchcase_csr_read_32(CSR_CYCLEH, ret)
> +       default :
> +               break;
> +       }
> +
> +       return ret;
> +#undef switchcase_csr_read_32
> +#undef switchcase_csr_read_16
> +#undef switchcase_csr_read_8
> +#undef switchcase_csr_read_4
> +#undef switchcase_csr_read_2
> +#undef switchcase_csr_read
> +}
> +
> +/*
> + * Read the CSR of a corresponding counter.
> + */
> +unsigned long riscv_pmu_ctr_read_csr(unsigned long csr)
> +{
> +       if (csr < CSR_CYCLE || csr > CSR_HPMCOUNTER31H ||
> +          (csr > CSR_HPMCOUNTER31 && csr < CSR_CYCLEH)) {
> +               pr_err("Invalid performance counter csr %lx\n", csr);
> +               return -EINVAL;
> +       }
> +
> +       return csr_read_num(csr);
> +}
> +
> +uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event)

Use u64 inplace of uint64_t for consistency.

> +{
> +       int cwidth;
> +       struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +       struct hw_perf_event *hwc = &event->hw;
> +
> +       if (!rvpmu->ctr_get_width)
> +       /**
> +        * If the pmu driver doesn't support counter width, set it to default maximum
> +        * allowed by the specification.
> +        */

Improve indentation of this comment block and ensure it does not go
beyond 80 characters.

> +               cwidth = 63;
> +       else {
> +               if (hwc->idx == -1)
> +                       /* Handle init case where idx is not initialized yet */
> +                       cwidth = rvpmu->ctr_get_width(0);
> +               else
> +                       cwidth = rvpmu->ctr_get_width(hwc->idx);
> +       }
> +
> +       return GENMASK_ULL(cwidth, 0);
> +}
> +
> +u64 riscv_pmu_event_update(struct perf_event *event)
> +{
> +       struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +       struct hw_perf_event *hwc = &event->hw;
> +       u64 prev_raw_count, new_raw_count;
> +       unsigned long cmask;
> +       u64 oldval, delta;
> +
> +       if (!rvpmu->ctr_read)
> +               return 0;
> +
> +       cmask = riscv_pmu_ctr_get_width_mask(event);
> +
> +       do {
> +               prev_raw_count = local64_read(&hwc->prev_count);
> +               new_raw_count = rvpmu->ctr_read(event);
> +               oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
> +                                        new_raw_count);
> +       } while (oldval != prev_raw_count);
> +
> +       delta = (new_raw_count - prev_raw_count) & cmask;
> +       local64_add(delta, &event->count);
> +       local64_sub(delta, &hwc->period_left);
> +
> +       return delta;
> +}
> +
> +static void riscv_pmu_stop(struct perf_event *event, int flags)
> +{
> +       struct hw_perf_event *hwc = &event->hw;
> +       struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +
> +       WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
> +
> +       if (!(hwc->state & PERF_HES_STOPPED)) {
> +               if (rvpmu->ctr_stop) {
> +                       rvpmu->ctr_stop(event, 0);
> +                       hwc->state |= PERF_HES_STOPPED;
> +               }
> +               riscv_pmu_event_update(event);
> +               hwc->state |= PERF_HES_UPTODATE;
> +       }
> +}
> +
> +int riscv_pmu_event_set_period(struct perf_event *event)
> +{
> +       struct hw_perf_event *hwc = &event->hw;
> +       s64 left = local64_read(&hwc->period_left);
> +       s64 period = hwc->sample_period;
> +       int overflow = 0;
> +       uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
> +
> +       if (unlikely(left <= -period)) {
> +               left = period;
> +               local64_set(&hwc->period_left, left);
> +               hwc->last_period = period;
> +               overflow = 1;
> +       }
> +
> +       if (unlikely(left <= 0)) {
> +               left += period;
> +               local64_set(&hwc->period_left, left);
> +               hwc->last_period = period;
> +               overflow = 1;
> +       }
> +
> +       /*
> +        * Limit the maximum period to prevent the counter value
> +        * from overtaking the one we are about to program. In
> +        * effect we are reducing max_period to account for
> +        * interrupt latency (and we are being very conservative).
> +        */
> +       if (left > (max_period >> 1))
> +               left = (max_period >> 1);
> +
> +       local64_set(&hwc->prev_count, (u64)-left);
> +       perf_event_update_userpage(event);
> +
> +       return overflow;
> +}
> +
> +static void riscv_pmu_start(struct perf_event *event, int flags)
> +{
> +       struct hw_perf_event *hwc = &event->hw;
> +       struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +       uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
> +       u64 init_val;
> +
> +       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
> +               return;
> +
> +       if (flags & PERF_EF_RELOAD) {
> +               WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
> +
> +               /*
> +                * Set the counter to the period to the next interrupt here,
> +                * if you have any.
> +                */

This comment block is not followed by any code.

Do you expect code to be added in future ?

> +       }
> +
> +       hwc->state = 0;
> +       riscv_pmu_event_set_period(event);
> +       init_val = local64_read(&hwc->prev_count) & max_period;
> +       rvpmu->ctr_start(event, init_val);
> +       perf_event_update_userpage(event);
> +}
> +
> +static int riscv_pmu_add(struct perf_event *event, int flags)
> +{
> +       struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +       struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
> +       struct hw_perf_event *hwc = &event->hw;
> +       int idx;
> +
> +       idx = rvpmu->ctr_get_idx(event);
> +       if (idx < 0)
> +               return idx;
> +
> +       hwc->idx = idx;
> +       cpuc->events[idx] = event;
> +       cpuc->n_events++;
> +       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
> +       if (flags & PERF_EF_START)
> +               riscv_pmu_start(event, PERF_EF_RELOAD);
> +
> +       /* Propagate our changes to the userspace mapping. */
> +       perf_event_update_userpage(event);
> +
> +       return 0;
> +}
> +
> +static void riscv_pmu_del(struct perf_event *event, int flags)
> +{
> +       struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +       struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
> +       struct hw_perf_event *hwc = &event->hw;
> +
> +       riscv_pmu_stop(event, PERF_EF_UPDATE);
> +       cpuc->events[hwc->idx] = NULL;
> +       /* The firmware need to reset the counter mapping */
> +       if (rvpmu->ctr_stop)
> +               rvpmu->ctr_stop(event, RISCV_PMU_STOP_FLAG_RESET);
> +       cpuc->n_events--;
> +       if (rvpmu->ctr_clear_idx)
> +               rvpmu->ctr_clear_idx(event);
> +       perf_event_update_userpage(event);
> +       hwc->idx = -1;
> +}
> +
> +static void riscv_pmu_read(struct perf_event *event)
> +{
> +       riscv_pmu_event_update(event);
> +}
> +
> +static int riscv_pmu_event_init(struct perf_event *event)
> +{
> +       struct hw_perf_event *hwc = &event->hw;
> +       struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +       int mapped_event;
> +       u64 event_config = 0;
> +       uint64_t cmask;
> +
> +       hwc->flags = 0;
> +       mapped_event = rvpmu->event_map(event, &event_config);
> +       if (mapped_event < 0) {
> +               pr_debug("event %x:%llx not supported\n", event->attr.type,
> +                        event->attr.config);
> +               return mapped_event;
> +       }
> +
> +       /*
> +        * idx is set to -1 because the index of a general event should not be
> +        * decided until binding to some counter in pmu->add().
> +        * config will contain the information about counter CSR
> +        * the idx will contain the counter index
> +        */
> +       hwc->config = event_config;
> +       hwc->idx = -1;
> +       hwc->event_base = mapped_event;
> +
> +       if (!is_sampling_event(event)) {
> +               /*
> +                * For non-sampling runs, limit the sample_period to half
> +                * of the counter width. That way, the new counter value
> +                * is far less likely to overtake the previous one unless
> +                * you have some serious IRQ latency issues.
> +                */
> +               cmask = riscv_pmu_ctr_get_width_mask(event);
> +               hwc->sample_period  =  cmask >> 1;
> +               hwc->last_period    = hwc->sample_period;
> +               local64_set(&hwc->period_left, hwc->sample_period);
> +       }
> +
> +       return 0;
> +}
> +
> +struct riscv_pmu *riscv_pmu_alloc(void)
> +{
> +       struct riscv_pmu *pmu;
> +       int cpuid, i;
> +       struct cpu_hw_events *cpuc;
> +
> +       pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
> +       if (!pmu)
> +               goto out;
> +
> +       pmu->hw_events = alloc_percpu_gfp(struct cpu_hw_events, GFP_KERNEL);
> +       if (!pmu->hw_events) {
> +               pr_info("failed to allocate per-cpu PMU data.\n");
> +               goto out_free_pmu;
> +       }
> +
> +       for_each_possible_cpu(cpuid) {
> +               cpuc = per_cpu_ptr(pmu->hw_events, cpuid);
> +               cpuc->n_events = 0;
> +               for (i = 0; i < RISCV_MAX_COUNTERS; i++)
> +                       cpuc->events[i] = NULL;
> +       }
> +       pmu->pmu = (struct pmu) {
> +               .event_init     = riscv_pmu_event_init,
> +               .add            = riscv_pmu_add,
> +               .del            = riscv_pmu_del,
> +               .start          = riscv_pmu_start,
> +               .stop           = riscv_pmu_stop,
> +               .read           = riscv_pmu_read,
> +       };
> +
> +       return pmu;
> +
> +out_free_pmu:
> +       kfree(pmu);
> +out:
> +       return NULL;
> +}
> diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
> index 39cf84a30b9f..1743b8a4624d 100644
> --- a/include/linux/cpuhotplug.h
> +++ b/include/linux/cpuhotplug.h
> @@ -123,6 +123,7 @@ enum cpuhp_state {
>         CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
>         CPUHP_AP_PERF_ARM_ACPI_STARTING,
>         CPUHP_AP_PERF_ARM_STARTING,
> +       CPUHP_AP_PERF_RISCV_STARTING,

First of all, CPUHP_AP_PERF_RISCV_STARTING should not
be part of this patch.

In general, we should avoid adding RISC-V specific HP state
if CPUHP_AP_ONLINE_DYN is sufficient.

>         CPUHP_AP_ARM_L2X0_STARTING,
>         CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
>         CPUHP_AP_ARM_ARCH_TIMER_STARTING,
> diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
> new file mode 100644
> index 000000000000..0d8979765d79
> --- /dev/null
> +++ b/include/linux/perf/riscv_pmu.h
> @@ -0,0 +1,65 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2018 SiFive
> + * Copyright (C) 2018 Andes Technology Corporation
> + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> + *
> + */
> +
> +#ifndef _ASM_RISCV_PERF_EVENT_H
> +#define _ASM_RISCV_PERF_EVENT_H
> +
> +#include <linux/perf_event.h>
> +#include <linux/ptrace.h>
> +#include <linux/interrupt.h>
> +
> +#ifdef CONFIG_RISCV_PMU
> +
> +/*
> + * The RISCV_MAX_COUNTERS parameter should be specified.
> + */
> +
> +#define RISCV_MAX_COUNTERS     64
> +#define RISCV_OP_UNSUPP                (-EOPNOTSUPP)
> +#define RISCV_PMU_PDEV_NAME    "riscv-pmu"
> +
> +#define RISCV_PMU_STOP_FLAG_RESET 1
> +
> +struct cpu_hw_events {
> +       /* currently enabled events */
> +       int                     n_events;
> +       /* currently enabled events */
> +       struct perf_event       *events[RISCV_MAX_COUNTERS];
> +       /* currently enabled counters */
> +       DECLARE_BITMAP(used_event_ctrs, RISCV_MAX_COUNTERS);
> +};
> +
> +struct riscv_pmu {
> +       struct pmu      pmu;
> +       char            *name;
> +
> +       irqreturn_t     (*handle_irq)(int irq_num, void *dev);
> +
> +       int             num_counters;
> +       u64             (*ctr_read)(struct perf_event *event);
> +       int             (*ctr_get_idx)(struct perf_event *event);
> +       int             (*ctr_get_width)(int idx);
> +       void            (*ctr_clear_idx)(struct perf_event *event);
> +       void            (*ctr_start)(struct perf_event *event, u64 init_val);
> +       void            (*ctr_stop)(struct perf_event *event, unsigned long flag);
> +       int             (*event_map)(struct perf_event *event, u64 *config);
> +
> +       struct cpu_hw_events    __percpu *hw_events;
> +       struct hlist_node       node;
> +};
> +
> +#define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu))
> +unsigned long riscv_pmu_ctr_read_csr(unsigned long csr);
> +int riscv_pmu_event_set_period(struct perf_event *event);
> +uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event);
> +u64 riscv_pmu_event_update(struct perf_event *event);
> +struct riscv_pmu *riscv_pmu_alloc(void);
> +
> +#endif /* CONFIG_RISCV_PMU */
> +
> +#endif /* _ASM_RISCV_PERF_EVENT_H */
> --
> 2.33.1
>

Apart from minor comments above, it looks good to me.

Reviewed-by: Anup Patel <anup@brainfault.org>

Regards,
Anup
diff mbox series

Patch

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 77522e5efe11..03ca0315df73 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -56,6 +56,16 @@  config ARM_PMU
 	  Say y if you want to use CPU performance monitors on ARM-based
 	  systems.
 
+config RISCV_PMU
+	depends on RISCV
+	bool "RISC-V PMU framework"
+	default y
+	help
+	  Say y if you want to use CPU performance monitors on RISCV-based
+	  systems. This provides the core PMU framework that abstracts common
+	  PMU functionalities in a core library so that different PMU drivers
+	  can reuse it.
+
 config ARM_PMU_ACPI
 	depends on ARM_PMU && ACPI
 	def_bool y
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 5260b116c7da..76e5c50e24bb 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -10,6 +10,7 @@  obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
 obj-$(CONFIG_HISI_PMU) += hisilicon/
 obj-$(CONFIG_QCOM_L2_PMU)	+= qcom_l2_pmu.o
 obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
+obj-$(CONFIG_RISCV_PMU) += riscv_pmu.o
 obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
 obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
new file mode 100644
index 000000000000..83c3e8d085c7
--- /dev/null
+++ b/drivers/perf/riscv_pmu.c
@@ -0,0 +1,328 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V performance counter support.
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ *
+ * This implementation is based on old RISC-V perf and ARM perf event code
+ * which are in turn based on sparc64 and x86 code.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/perf/riscv_pmu.h>
+#include <linux/printk.h>
+#include <linux/smp.h>
+
+static unsigned long csr_read_num(int csr_num)
+{
+#define switchcase_csr_read(__csr_num, __val)		{\
+	case __csr_num:					\
+		__val = csr_read(__csr_num);		\
+		break; }
+#define switchcase_csr_read_2(__csr_num, __val)		{\
+	switchcase_csr_read(__csr_num + 0, __val)	 \
+	switchcase_csr_read(__csr_num + 1, __val)}
+#define switchcase_csr_read_4(__csr_num, __val)		{\
+	switchcase_csr_read_2(__csr_num + 0, __val)	 \
+	switchcase_csr_read_2(__csr_num + 2, __val)}
+#define switchcase_csr_read_8(__csr_num, __val)		{\
+	switchcase_csr_read_4(__csr_num + 0, __val)	 \
+	switchcase_csr_read_4(__csr_num + 4, __val)}
+#define switchcase_csr_read_16(__csr_num, __val)	{\
+	switchcase_csr_read_8(__csr_num + 0, __val)	 \
+	switchcase_csr_read_8(__csr_num + 8, __val)}
+#define switchcase_csr_read_32(__csr_num, __val)	{\
+	switchcase_csr_read_16(__csr_num + 0, __val)	 \
+	switchcase_csr_read_16(__csr_num + 16, __val)}
+
+	unsigned long ret = 0;
+
+	switch (csr_num) {
+	switchcase_csr_read_32(CSR_CYCLE, ret)
+	switchcase_csr_read_32(CSR_CYCLEH, ret)
+	default :
+		break;
+	}
+
+	return ret;
+#undef switchcase_csr_read_32
+#undef switchcase_csr_read_16
+#undef switchcase_csr_read_8
+#undef switchcase_csr_read_4
+#undef switchcase_csr_read_2
+#undef switchcase_csr_read
+}
+
+/*
+ * Read the CSR of a corresponding counter.
+ */
+unsigned long riscv_pmu_ctr_read_csr(unsigned long csr)
+{
+	if (csr < CSR_CYCLE || csr > CSR_HPMCOUNTER31H ||
+	   (csr > CSR_HPMCOUNTER31 && csr < CSR_CYCLEH)) {
+		pr_err("Invalid performance counter csr %lx\n", csr);
+		return -EINVAL;
+	}
+
+	return csr_read_num(csr);
+}
+
+uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event)
+{
+	int cwidth;
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (!rvpmu->ctr_get_width)
+	/**
+	 * If the pmu driver doesn't support counter width, set it to default maximum
+	 * allowed by the specification.
+	 */
+		cwidth = 63;
+	else {
+		if (hwc->idx == -1)
+			/* Handle init case where idx is not initialized yet */
+			cwidth = rvpmu->ctr_get_width(0);
+		else
+			cwidth = rvpmu->ctr_get_width(hwc->idx);
+	}
+
+	return GENMASK_ULL(cwidth, 0);
+}
+
+u64 riscv_pmu_event_update(struct perf_event *event)
+{
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 prev_raw_count, new_raw_count;
+	unsigned long cmask;
+	u64 oldval, delta;
+
+	if (!rvpmu->ctr_read)
+		return 0;
+
+	cmask = riscv_pmu_ctr_get_width_mask(event);
+
+	do {
+		prev_raw_count = local64_read(&hwc->prev_count);
+		new_raw_count = rvpmu->ctr_read(event);
+		oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+					 new_raw_count);
+	} while (oldval != prev_raw_count);
+
+	delta = (new_raw_count - prev_raw_count) & cmask;
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+
+	return delta;
+}
+
+static void riscv_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		if (rvpmu->ctr_stop) {
+			rvpmu->ctr_stop(event, 0);
+			hwc->state |= PERF_HES_STOPPED;
+		}
+		riscv_pmu_event_update(event);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+}
+
+int riscv_pmu_event_set_period(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int overflow = 0;
+	uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		overflow = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		overflow = 1;
+	}
+
+	/*
+	 * Limit the maximum period to prevent the counter value
+	 * from overtaking the one we are about to program. In
+	 * effect we are reducing max_period to account for
+	 * interrupt latency (and we are being very conservative).
+	 */
+	if (left > (max_period >> 1))
+		left = (max_period >> 1);
+
+	local64_set(&hwc->prev_count, (u64)-left);
+	perf_event_update_userpage(event);
+
+	return overflow;
+}
+
+static void riscv_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
+	u64 init_val;
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+		/*
+		 * Set the counter to the period to the next interrupt here,
+		 * if you have any.
+		 */
+	}
+
+	hwc->state = 0;
+	riscv_pmu_event_set_period(event);
+	init_val = local64_read(&hwc->prev_count) & max_period;
+	rvpmu->ctr_start(event, init_val);
+	perf_event_update_userpage(event);
+}
+
+static int riscv_pmu_add(struct perf_event *event, int flags)
+{
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	idx = rvpmu->ctr_get_idx(event);
+	if (idx < 0)
+		return idx;
+
+	hwc->idx = idx;
+	cpuc->events[idx] = event;
+	cpuc->n_events++;
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+	if (flags & PERF_EF_START)
+		riscv_pmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void riscv_pmu_del(struct perf_event *event, int flags)
+{
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+
+	riscv_pmu_stop(event, PERF_EF_UPDATE);
+	cpuc->events[hwc->idx] = NULL;
+	/* The firmware need to reset the counter mapping */
+	if (rvpmu->ctr_stop)
+		rvpmu->ctr_stop(event, RISCV_PMU_STOP_FLAG_RESET);
+	cpuc->n_events--;
+	if (rvpmu->ctr_clear_idx)
+		rvpmu->ctr_clear_idx(event);
+	perf_event_update_userpage(event);
+	hwc->idx = -1;
+}
+
+static void riscv_pmu_read(struct perf_event *event)
+{
+	riscv_pmu_event_update(event);
+}
+
+static int riscv_pmu_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	int mapped_event;
+	u64 event_config = 0;
+	uint64_t cmask;
+
+	hwc->flags = 0;
+	mapped_event = rvpmu->event_map(event, &event_config);
+	if (mapped_event < 0) {
+		pr_debug("event %x:%llx not supported\n", event->attr.type,
+			 event->attr.config);
+		return mapped_event;
+	}
+
+	/*
+	 * idx is set to -1 because the index of a general event should not be
+	 * decided until binding to some counter in pmu->add().
+	 * config will contain the information about counter CSR
+	 * the idx will contain the counter index
+	 */
+	hwc->config = event_config;
+	hwc->idx = -1;
+	hwc->event_base = mapped_event;
+
+	if (!is_sampling_event(event)) {
+		/*
+		 * For non-sampling runs, limit the sample_period to half
+		 * of the counter width. That way, the new counter value
+		 * is far less likely to overtake the previous one unless
+		 * you have some serious IRQ latency issues.
+		 */
+		cmask = riscv_pmu_ctr_get_width_mask(event);
+		hwc->sample_period  =  cmask >> 1;
+		hwc->last_period    = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	return 0;
+}
+
+struct riscv_pmu *riscv_pmu_alloc(void)
+{
+	struct riscv_pmu *pmu;
+	int cpuid, i;
+	struct cpu_hw_events *cpuc;
+
+	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+	if (!pmu)
+		goto out;
+
+	pmu->hw_events = alloc_percpu_gfp(struct cpu_hw_events, GFP_KERNEL);
+	if (!pmu->hw_events) {
+		pr_info("failed to allocate per-cpu PMU data.\n");
+		goto out_free_pmu;
+	}
+
+	for_each_possible_cpu(cpuid) {
+		cpuc = per_cpu_ptr(pmu->hw_events, cpuid);
+		cpuc->n_events = 0;
+		for (i = 0; i < RISCV_MAX_COUNTERS; i++)
+			cpuc->events[i] = NULL;
+	}
+	pmu->pmu = (struct pmu) {
+		.event_init	= riscv_pmu_event_init,
+		.add		= riscv_pmu_add,
+		.del		= riscv_pmu_del,
+		.start		= riscv_pmu_start,
+		.stop		= riscv_pmu_stop,
+		.read		= riscv_pmu_read,
+	};
+
+	return pmu;
+
+out_free_pmu:
+	kfree(pmu);
+out:
+	return NULL;
+}
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 39cf84a30b9f..1743b8a4624d 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -123,6 +123,7 @@  enum cpuhp_state {
 	CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
 	CPUHP_AP_PERF_ARM_ACPI_STARTING,
 	CPUHP_AP_PERF_ARM_STARTING,
+	CPUHP_AP_PERF_RISCV_STARTING,
 	CPUHP_AP_ARM_L2X0_STARTING,
 	CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
 	CPUHP_AP_ARM_ARCH_TIMER_STARTING,
diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
new file mode 100644
index 000000000000..0d8979765d79
--- /dev/null
+++ b/include/linux/perf/riscv_pmu.h
@@ -0,0 +1,65 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 SiFive
+ * Copyright (C) 2018 Andes Technology Corporation
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ *
+ */
+
+#ifndef _ASM_RISCV_PERF_EVENT_H
+#define _ASM_RISCV_PERF_EVENT_H
+
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
+#include <linux/interrupt.h>
+
+#ifdef CONFIG_RISCV_PMU
+
+/*
+ * The RISCV_MAX_COUNTERS parameter should be specified.
+ */
+
+#define RISCV_MAX_COUNTERS	64
+#define RISCV_OP_UNSUPP		(-EOPNOTSUPP)
+#define RISCV_PMU_PDEV_NAME	"riscv-pmu"
+
+#define RISCV_PMU_STOP_FLAG_RESET 1
+
+struct cpu_hw_events {
+	/* currently enabled events */
+	int			n_events;
+	/* currently enabled events */
+	struct perf_event	*events[RISCV_MAX_COUNTERS];
+	/* currently enabled counters */
+	DECLARE_BITMAP(used_event_ctrs, RISCV_MAX_COUNTERS);
+};
+
+struct riscv_pmu {
+	struct pmu	pmu;
+	char		*name;
+
+	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
+
+	int		num_counters;
+	u64		(*ctr_read)(struct perf_event *event);
+	int		(*ctr_get_idx)(struct perf_event *event);
+	int		(*ctr_get_width)(int idx);
+	void		(*ctr_clear_idx)(struct perf_event *event);
+	void		(*ctr_start)(struct perf_event *event, u64 init_val);
+	void		(*ctr_stop)(struct perf_event *event, unsigned long flag);
+	int		(*event_map)(struct perf_event *event, u64 *config);
+
+	struct cpu_hw_events	__percpu *hw_events;
+	struct hlist_node	node;
+};
+
+#define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu))
+unsigned long riscv_pmu_ctr_read_csr(unsigned long csr);
+int riscv_pmu_event_set_period(struct perf_event *event);
+uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event);
+u64 riscv_pmu_event_update(struct perf_event *event);
+struct riscv_pmu *riscv_pmu_alloc(void);
+
+#endif /* CONFIG_RISCV_PMU */
+
+#endif /* _ASM_RISCV_PERF_EVENT_H */