@@ -22,10 +22,259 @@
#include "qemu/range.h"
#include "sysemu/sysemu.h"
#include "exec/memory.h"
+#include "qemu/queue.h"
#include "hw/sysbus.h"
#include "trace.h"
#include "hw/platform-bus.h"
+static void vfio_intp_interrupt(VFIOINTp *intp);
+typedef void (*eventfd_user_side_handler_t)(VFIOINTp *intp);
+static int vfio_set_trigger_eventfd(VFIOINTp *intp,
+ eventfd_user_side_handler_t handler);
+
+/*
+ * Functions only used when eventfd are handled on user-side
+ * ie. without irqfd
+ */
+
+/**
+ * vfio_platform_eoi - IRQ completion routine
+ * @vbasedev: the VFIO device
+ *
+ * de-asserts the active virtual IRQ and unmask the physical IRQ
+ * (masked by the VFIO driver). Handle pending IRQs if any.
+ * eoi function is called on the first access to any MMIO region
+ * after an IRQ was triggered. It is assumed this access corresponds
+ * to the IRQ status register reset. With such a mechanism, a single
+ * IRQ can be handled at a time since there is no way to know which
+ * IRQ was completed by the guest (we would need additional details
+ * about the IRQ status register mask)
+ */
+static void vfio_platform_eoi(VFIODevice *vbasedev)
+{
+ VFIOINTp *intp;
+ VFIOPlatformDevice *vdev =
+ container_of(vbasedev, VFIOPlatformDevice, vbasedev);
+
+ qemu_mutex_lock(&vdev->intp_mutex);
+ QLIST_FOREACH(intp, &vdev->intp_list, next) {
+ if (intp->state == VFIO_IRQ_ACTIVE) {
+ trace_vfio_platform_eoi(intp->pin,
+ event_notifier_get_fd(&intp->interrupt));
+ intp->state = VFIO_IRQ_INACTIVE;
+
+ /* deassert the virtual IRQ and unmask physical one */
+ qemu_set_irq(intp->qemuirq, 0);
+ vfio_unmask_single_irqindex(vbasedev, intp->pin);
+
+ /* a single IRQ can be active at a time */
+ break;
+ }
+ }
+ /* in case there are pending IRQs, handle them one at a time */
+ if (!QSIMPLEQ_EMPTY(&vdev->pending_intp_queue)) {
+ intp = QSIMPLEQ_FIRST(&vdev->pending_intp_queue);
+ trace_vfio_platform_eoi_handle_pending(intp->pin);
+ qemu_mutex_unlock(&vdev->intp_mutex);
+ vfio_intp_interrupt(intp);
+ qemu_mutex_lock(&vdev->intp_mutex);
+ QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext);
+ qemu_mutex_unlock(&vdev->intp_mutex);
+ } else {
+ qemu_mutex_unlock(&vdev->intp_mutex);
+ }
+}
+
+/**
+ * vfio_mmap_set_enabled - enable/disable the fast path mode
+ * @vdev: the VFIO platform device
+ * @enabled: the target mmap state
+ *
+ * true ~ fast path = MMIO region is mmaped (no KVM TRAP)
+ * false ~ slow path = MMIO region is trapped and region callbacks
+ * are called slow path enables to trap the IRQ status register
+ * guest reset
+*/
+
+static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
+{
+ VFIORegion *region;
+ int i;
+
+ trace_vfio_platform_mmap_set_enabled(enabled);
+
+ for (i = 0; i < vdev->vbasedev.num_regions; i++) {
+ region = vdev->regions[i];
+
+ /* register space is unmapped to trap EOI */
+ memory_region_set_enabled(®ion->mmap_mem, enabled);
+ }
+}
+
+/**
+ * vfio_intp_mmap_enable - timer function, restores the fast path
+ * if there is no more active IRQ
+ * @opaque: actually points to the VFIO platform device
+ *
+ * Called on mmap timer timout, this function checks whether the
+ * IRQ is still active and in the negative restores the fast path.
+ * by construction a single eventfd is handled at a time.
+ * if the IRQ is still active, the timer is restarted.
+ */
+static void vfio_intp_mmap_enable(void *opaque)
+{
+ VFIOINTp *tmp;
+ VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque;
+
+ qemu_mutex_lock(&vdev->intp_mutex);
+ QLIST_FOREACH(tmp, &vdev->intp_list, next) {
+ if (tmp->state == VFIO_IRQ_ACTIVE) {
+ trace_vfio_platform_intp_mmap_enable(tmp->pin);
+ /* re-program the timer to check active status later */
+ timer_mod(vdev->mmap_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+ vdev->mmap_timeout);
+ qemu_mutex_unlock(&vdev->intp_mutex);
+ return;
+ }
+ }
+ vfio_mmap_set_enabled(vdev, true);
+ qemu_mutex_unlock(&vdev->intp_mutex);
+}
+
+/**
+ * vfio_intp_interrupt - The user-side eventfd handler
+ * @opaque: opaque pointer which in practice is the VFIOINTp*
+ *
+ * the function can be entered
+ * - in event handler context: this IRQ is inactive
+ * in that case, the vIRQ is injected into the guest if there
+ * is no other active or pending IRQ.
+ * - in IOhandler context: this IRQ is pending.
+ * there is no ACTIVE IRQ
+ */
+static void vfio_intp_interrupt(VFIOINTp *intp)
+{
+ int ret;
+ VFIOINTp *tmp;
+ VFIOPlatformDevice *vdev = intp->vdev;
+ bool delay_handling = false;
+
+ qemu_mutex_lock(&vdev->intp_mutex);
+ if (intp->state == VFIO_IRQ_INACTIVE) {
+ QLIST_FOREACH(tmp, &vdev->intp_list, next) {
+ if (tmp->state == VFIO_IRQ_ACTIVE ||
+ tmp->state == VFIO_IRQ_PENDING) {
+ delay_handling = true;
+ break;
+ }
+ }
+ }
+ if (delay_handling) {
+ /*
+ * the new IRQ gets a pending status and is pushed in
+ * the pending queue
+ */
+ intp->state = VFIO_IRQ_PENDING;
+ trace_vfio_intp_interrupt_set_pending(intp->pin);
+ QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue,
+ intp, pqnext);
+ ret = event_notifier_test_and_clear(&intp->interrupt);
+ qemu_mutex_unlock(&vdev->intp_mutex);
+ return;
+ }
+
+ /* no active IRQ, the new IRQ can be forwarded to the guest */
+ trace_vfio_platform_intp_interrupt(intp->pin,
+ event_notifier_get_fd(&intp->interrupt));
+
+ if (intp->state == VFIO_IRQ_INACTIVE) {
+ ret = event_notifier_test_and_clear(&intp->interrupt);
+ if (!ret) {
+ error_report("Error when clearing fd=%d (ret = %d)\n",
+ event_notifier_get_fd(&intp->interrupt), ret);
+ }
+ } /* else this is a pending IRQ that moves to ACTIVE state */
+
+ intp->state = VFIO_IRQ_ACTIVE;
+
+ /* sets slow path */
+ vfio_mmap_set_enabled(vdev, false);
+
+ /* trigger the virtual IRQ */
+ qemu_set_irq(intp->qemuirq, 1);
+
+ /* schedule the mmap timer which will restore mmap path after EOI*/
+ if (vdev->mmap_timeout) {
+ timer_mod(vdev->mmap_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+ vdev->mmap_timeout);
+ }
+ qemu_mutex_unlock(&vdev->intp_mutex);
+}
+
+/**
+ * vfio_start_eventfd_injection - starts the virtual IRQ injection using
+ * user-side handled eventfds
+ * @intp: the IRQ struct pointer
+ */
+
+static int vfio_start_eventfd_injection(VFIOINTp *intp)
+{
+ int ret;
+ VFIODevice *vbasedev = &intp->vdev->vbasedev;
+
+ vfio_mask_single_irqindex(vbasedev, intp->pin);
+
+ ret = vfio_set_trigger_eventfd(intp, vfio_intp_interrupt);
+ if (ret) {
+ error_report("vfio: Error: Failed to pass IRQ fd to the driver: %m");
+ vfio_unmask_single_irqindex(vbasedev, intp->pin);
+ return ret;
+ }
+ vfio_unmask_single_irqindex(vbasedev, intp->pin);
+ return 0;
+}
+
+/*
+ * Functions used whatever the injection method
+ */
+
+/**
+ * vfio_set_trigger_eventfd - set VFIO eventfd handling
+ * ie. program the VFIO driver to associates a given IRQ index
+ * with a fd handler
+ *
+ * @intp: IRQ struct pointer
+ * @handler: handler to be called on eventfd trigger
+ */
+static int vfio_set_trigger_eventfd(VFIOINTp *intp,
+ eventfd_user_side_handler_t handler)
+{
+ VFIODevice *vbasedev = &intp->vdev->vbasedev;
+ struct vfio_irq_set *irq_set;
+ int argsz, ret;
+ int32_t *pfd;
+
+ argsz = sizeof(*irq_set) + sizeof(*pfd);
+ irq_set = g_malloc0(argsz);
+ irq_set->argsz = argsz;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = intp->pin;
+ irq_set->start = 0;
+ irq_set->count = 1;
+ pfd = (int32_t *)&irq_set->data;
+ *pfd = event_notifier_get_fd(&intp->interrupt);
+ qemu_set_fd_handler(*pfd, (IOHandler *)handler, NULL, intp);
+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+ g_free(irq_set);
+ if (ret < 0) {
+ error_report("vfio: Failed to set trigger eventfd: %m");
+ qemu_set_fd_handler(*pfd, NULL, NULL, NULL);
+ }
+ return ret;
+}
+
/* not implemented yet */
static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev)
{
@@ -39,6 +288,40 @@ static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev)
}
/**
+ * vfio_init_intp - allocate, initialize the IRQ struct pointer
+ * and add it into the list of IRQ
+ * @vbasedev: the VFIO device
+ * @index: VFIO device IRQ index
+ */
+static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev, unsigned int index)
+{
+ int ret;
+ VFIOPlatformDevice *vdev =
+ container_of(vbasedev, VFIOPlatformDevice, vbasedev);
+ SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev);
+ VFIOINTp *intp;
+
+ /* allocate and populate a new VFIOINTp structure put in a queue list */
+ intp = g_malloc0(sizeof(*intp));
+ intp->vdev = vdev;
+ intp->pin = index;
+ intp->state = VFIO_IRQ_INACTIVE;
+ sysbus_init_irq(sbdev, &intp->qemuirq);
+
+ /* Get an eventfd for trigger */
+ ret = event_notifier_init(&intp->interrupt, 0);
+ if (ret) {
+ g_free(intp);
+ error_report("vfio: Error: trigger event_notifier_init failed ");
+ return NULL;
+ }
+
+ /* store the new intp in qlist */
+ QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
+ return intp;
+}
+
+/**
* vfio_populate_device - initialize MMIO region and IRQ
* @vbasedev: the VFIO device
*
@@ -47,7 +330,9 @@ static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev)
*/
static int vfio_populate_device(VFIODevice *vbasedev)
{
+ struct vfio_irq_info irq = { .argsz = sizeof(irq) };
struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+ VFIOINTp *intp, *tmp;
int i, ret = -1;
VFIOPlatformDevice *vdev =
container_of(vbasedev, VFIOPlatformDevice, vbasedev);
@@ -80,7 +365,37 @@ static int vfio_populate_device(VFIODevice *vbasedev)
(unsigned long)vdev->regions[i]->fd_offset);
}
+ vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+ vfio_intp_mmap_enable, vdev);
+
+ QSIMPLEQ_INIT(&vdev->pending_intp_queue);
+
+ for (i = 0; i < vbasedev->num_irqs; i++) {
+ irq.index = i;
+
+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
+ if (ret) {
+ error_printf("vfio: error getting device %s irq info",
+ vbasedev->name);
+ goto irq_err;
+ } else {
+ trace_vfio_platform_populate_interrupts(irq.index,
+ irq.count,
+ irq.flags);
+ intp = vfio_init_intp(vbasedev, irq.index);
+ if (!intp) {
+ error_report("vfio: Error installing IRQ %d up", i);
+ goto irq_err;
+ }
+ }
+ }
return 0;
+irq_err:
+ timer_del(vdev->mmap_timer);
+ QLIST_FOREACH_SAFE(intp, &vdev->intp_list, next, tmp) {
+ QLIST_REMOVE(intp, next);
+ g_free(intp);
+ }
error:
for (i = 0; i < vbasedev->num_regions; i++) {
g_free(vdev->regions[i]);
@@ -93,6 +408,7 @@ error:
static VFIODeviceOps vfio_platform_ops = {
.vfio_compute_needs_reset = vfio_platform_compute_needs_reset,
.vfio_hot_reset_multi = vfio_platform_hot_reset_multi,
+ .vfio_eoi = vfio_platform_eoi,
.vfio_populate_device = vfio_populate_device,
};
@@ -220,6 +536,7 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp)
vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
vbasedev->ops = &vfio_platform_ops;
+ vdev->start_irq_fn = vfio_start_eventfd_injection;
trace_vfio_platform_realize(vbasedev->name, vdev->compat);
@@ -243,6 +560,8 @@ static const VMStateDescription vfio_platform_vmstate = {
static Property vfio_platform_dev_properties[] = {
DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
+ DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
+ mmap_timeout, 1100),
DEFINE_PROP_END_OF_LIST(),
};
@@ -18,16 +18,49 @@
#include "hw/sysbus.h"
#include "hw/vfio/vfio-common.h"
+#include "qemu/event_notifier.h"
+#include "qemu/queue.h"
+#include "hw/irq.h"
#define TYPE_VFIO_PLATFORM "vfio-platform"
+enum {
+ VFIO_IRQ_INACTIVE = 0,
+ VFIO_IRQ_PENDING = 1,
+ VFIO_IRQ_ACTIVE = 2,
+ /* VFIO_IRQ_ACTIVE_AND_PENDING cannot happen with VFIO */
+};
+
+typedef struct VFIOINTp {
+ QLIST_ENTRY(VFIOINTp) next; /* entry for IRQ list */
+ QSIMPLEQ_ENTRY(VFIOINTp) pqnext; /* entry for pending IRQ queue */
+ EventNotifier interrupt; /* eventfd triggered on interrupt */
+ EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
+ qemu_irq qemuirq;
+ struct VFIOPlatformDevice *vdev; /* back pointer to device */
+ int state; /* inactive, pending, active */
+ bool kvm_accel; /* set when QEMU bypass through KVM enabled */
+ uint8_t pin; /* index */
+ uint32_t virtualID; /* virtual IRQ */
+} VFIOINTp;
+
+typedef int (*start_irq_fn_t)(VFIOINTp *intp);
+
typedef struct VFIOPlatformDevice {
SysBusDevice sbdev;
VFIODevice vbasedev; /* not a QOM object */
VFIORegion **regions;
+ QLIST_HEAD(, VFIOINTp) intp_list; /* list of IRQ */
+ /* queue of pending IRQ */
+ QSIMPLEQ_HEAD(pending_intp_queue, VFIOINTp) pending_intp_queue;
char *compat; /* compatibility string */
+ uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
+ QEMUTimer *mmap_timer; /* enable mmaps after periods w/o interrupts */
+ start_irq_fn_t start_irq_fn;
+ QemuMutex intp_mutex;
} VFIOPlatformDevice;
+
typedef struct VFIOPlatformDeviceClass {
/*< private >*/
SysBusDeviceClass parent_class;
This patch adds the code requested to assign interrupts to a guest. The interrupts are mediated through user handled eventfds only. The mechanics to start the IRQ handling is not yet there through. Signed-off-by: Eric Auger <eric.auger@linaro.org> --- v8 -> v9: - free irq related resources in case of error in vfio_populate_device --- hw/vfio/platform.c | 319 ++++++++++++++++++++++++++++++++++++++++ include/hw/vfio/vfio-platform.h | 33 +++++ 2 files changed, 352 insertions(+)