@@ -24,6 +24,8 @@
#include "qemu/error-report.h"
#include "qemu/module.h"
+#include "hw/vfio/pci.h"
+
#ifndef DEBUG_S390PCI_BUS
#define DEBUG_S390PCI_BUS 0
#endif
@@ -737,6 +739,42 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn)
object_unref(OBJECT(iommu));
}
+static S390PCIDMACount *s390_start_dma_count(S390pciState *s, VFIODevice *vdev)
+{
+ int id = vdev->group->container->fd;
+ S390PCIDMACount *cnt;
+
+ if (vdev->group->container->dma_limit == 0) {
+ return NULL;
+ }
+
+ QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) {
+ if (cnt->id == id) {
+ cnt->users++;
+ return cnt;
+ }
+ }
+
+ cnt = g_new0(S390PCIDMACount, 1);
+ cnt->id = id;
+ cnt->users = 1;
+ cnt->avail = vdev->group->container->dma_limit;
+ QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link);
+ return cnt;
+}
+
+static void s390_end_dma_count(S390pciState *s, S390PCIDMACount *cnt)
+{
+ if (cnt == NULL) {
+ return;
+ }
+
+ cnt->users--;
+ if (cnt->users == 0) {
+ QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link);
+ }
+}
+
static void s390_pcihost_realize(DeviceState *dev, Error **errp)
{
PCIBus *b;
@@ -764,6 +802,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp)
s->bus_no = 0;
QTAILQ_INIT(&s->pending_sei);
QTAILQ_INIT(&s->zpci_devs);
+ QTAILQ_INIT(&s->zpci_dma_limit);
css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false,
S390_ADAPTER_SUPPRESSIBLE, errp);
@@ -902,6 +941,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
{
S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev);
PCIDevice *pdev = NULL;
+ VFIOPCIDevice *vpdev = NULL;
S390PCIBusDevice *pbdev = NULL;
if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
@@ -941,17 +981,20 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
}
}
+ pbdev->pdev = pdev;
+ pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn);
+ pbdev->iommu->pbdev = pbdev;
+ pbdev->state = ZPCI_FS_DISABLED;
+
if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
pbdev->fh |= FH_SHM_VFIO;
+ vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+ pbdev->iommu->dma_limit = s390_start_dma_count(s,
+ &vpdev->vbasedev);
} else {
pbdev->fh |= FH_SHM_EMUL;
}
- pbdev->pdev = pdev;
- pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn);
- pbdev->iommu->pbdev = pbdev;
- pbdev->state = ZPCI_FS_DISABLED;
-
if (s390_pci_msix_init(pbdev)) {
error_setg(errp, "MSI-X support is mandatory "
"in the S390 architecture");
@@ -1004,6 +1047,9 @@ static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
pbdev->fid = 0;
QTAILQ_REMOVE(&s->zpci_devs, pbdev, link);
g_hash_table_remove(s->zpci_table, &pbdev->idx);
+ if (pbdev->iommu->dma_limit) {
+ s390_end_dma_count(s, pbdev->iommu->dma_limit);
+ }
qdev_unrealize(dev);
}
}
@@ -265,6 +265,13 @@ typedef struct S390IOTLBEntry {
uint64_t perm;
} S390IOTLBEntry;
+typedef struct S390PCIDMACount {
+ int id;
+ int users;
+ uint32_t avail;
+ QTAILQ_ENTRY(S390PCIDMACount) link;
+} S390PCIDMACount;
+
typedef struct S390PCIBusDevice S390PCIBusDevice;
typedef struct S390PCIIOMMU {
Object parent_obj;
@@ -277,6 +284,7 @@ typedef struct S390PCIIOMMU {
uint64_t pba;
uint64_t pal;
GHashTable *iotlb;
+ S390PCIDMACount *dma_limit;
} S390PCIIOMMU;
typedef struct S390PCIIOMMUTable {
@@ -352,6 +360,7 @@ typedef struct S390pciState {
GHashTable *zpci_table;
QTAILQ_HEAD(, SeiContainer) pending_sei;
QTAILQ_HEAD(, S390PCIBusDevice) zpci_devs;
+ QTAILQ_HEAD(, S390PCIDMACount) zpci_dma_limit;
} S390pciState;
S390pciState *s390_get_phb(void);
@@ -32,6 +32,9 @@
} \
} while (0)
+#define INC_DMA_AVAIL(iommu) if (iommu->dma_limit) iommu->dma_limit->avail++;
+#define DEC_DMA_AVAIL(iommu) if (iommu->dma_limit) iommu->dma_limit->avail--;
+
static void s390_set_status_code(CPUS390XState *env,
uint8_t r, uint64_t status_code)
{
@@ -572,7 +575,8 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
return 0;
}
-static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
+static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu,
+ S390IOTLBEntry *entry)
{
S390IOTLBEntry *cache = g_hash_table_lookup(iommu->iotlb, &entry->iova);
IOMMUTLBEntry notify = {
@@ -585,14 +589,15 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
if (entry->perm == IOMMU_NONE) {
if (!cache) {
- return;
+ goto out;
}
g_hash_table_remove(iommu->iotlb, &entry->iova);
+ INC_DMA_AVAIL(iommu);
} else {
if (cache) {
if (cache->perm == entry->perm &&
cache->translated_addr == entry->translated_addr) {
- return;
+ goto out;
}
notify.perm = IOMMU_NONE;
@@ -606,9 +611,13 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
cache->len = PAGE_SIZE;
cache->perm = entry->perm;
g_hash_table_replace(iommu->iotlb, &cache->iova, cache);
+ DEC_DMA_AVAIL(iommu);
}
memory_region_notify_iommu(&iommu->iommu_mr, 0, notify);
+
+out:
+ return iommu->dma_limit ? iommu->dma_limit->avail : 1;
}
int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
@@ -620,6 +629,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
S390PCIIOMMU *iommu;
S390IOTLBEntry entry;
hwaddr start, end;
+ uint32_t dma_avail;
if (env->psw.mask & PSW_MASK_PSTATE) {
s390_program_interrupt(env, PGM_PRIVILEGED, ra);
@@ -675,8 +685,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
}
start += entry.len;
- while (entry.iova < start && entry.iova < end) {
- s390_pci_update_iotlb(iommu, &entry);
+ dma_avail = 1; /* Assume non-zero dma_avail to start */
+ while (entry.iova < start && entry.iova < end && dma_avail > 0) {
+ dma_avail = s390_pci_update_iotlb(iommu, &entry);
entry.iova += PAGE_SIZE;
entry.translated_addr += PAGE_SIZE;
}
@@ -689,7 +700,13 @@ err:
s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0);
} else {
pbdev->fmb.counter[ZPCI_FMB_CNT_RPCIT]++;
- setcc(cpu, ZPCI_PCI_LS_OK);
+ if (dma_avail > 0) {
+ setcc(cpu, ZPCI_PCI_LS_OK);
+ } else {
+ /* vfio DMA mappings are exhausted, trigger a RPCIT */
+ setcc(cpu, ZPCI_PCI_LS_ERR);
+ s390_set_status_code(env, r1, ZPCI_RPCIT_ST_INSUFF_RES);
+ }
}
return 0;
}
@@ -254,6 +254,9 @@ typedef struct ClpReqRspQueryPciGrp {
#define ZPCI_STPCIFC_ST_INVAL_DMAAS 28
#define ZPCI_STPCIFC_ST_ERROR_RECOVER 40
+/* Refresh PCI Translations status codes */
+#define ZPCI_RPCIT_ST_INSUFF_RES 16
+
/* FIB function controls */
#define ZPCI_FIB_FC_ENABLED 0x80
#define ZPCI_FIB_FC_ERROR 0x40
When an s390 guest is using lazy unmapping, it can result in a very large number of oustanding DMA requests, far beyond the default limit configured for vfio. Let's track DMA usage similar to vfio in the host, and trigger the guest to flush their DMA mappings before vfio runs out. Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> --- hw/s390x/s390-pci-bus.c | 56 +++++++++++++++++++++++++++++++++++++++++++----- hw/s390x/s390-pci-bus.h | 9 ++++++++ hw/s390x/s390-pci-inst.c | 29 +++++++++++++++++++------ hw/s390x/s390-pci-inst.h | 3 +++ 4 files changed, 86 insertions(+), 11 deletions(-)