!454 [sync] PR-453: generate code patches with openeuler !245 !247 !243

From: @openeuler-sync-bot Reviewed-by: @imxcc Signed-off-by: @imxcc
2022-02-12 12:42:49 +00:00 · 2022-02-12 12:42:49 +00:00 · 0c597c01e6
commit 0c597c01e6
parent 732529e0ac f0eb1ad563
52 changed files with 5606 additions and 1 deletions
--- a/bios-tables-test-Allow-changes-to-q35-SSDT.dimmpxm-f.patch
+++ b/bios-tables-test-Allow-changes-to-q35-SSDT.dimmpxm-f.patch
@ -0,0 +1,23 @@
+From 00c4115a1388ee72295b99fce1f6ad49bf761134 Mon Sep 17 00:00:00 2001
+From: Yan Wang <wangyan122@huawei.com>
+Date: Thu, 10 Feb 2022 17:08:08 +0800
+Subject: [PATCH] bios-tables-test: Allow changes to q35/SSDT.dimmpxm file
+
+List test/data/acpi/q35/SSDT.dimmpxm as the expected files allowed to
+be changed in tests/qtest/bios-tables-test-allowed-diff.h
+
+Signed-off-by: Yan Wang <wangyan122@huawei.com>
+---
+ tests/qtest/bios-tables-test-allowed-diff.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
+index dfb8523c8b..81148a604f 100644
+--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
+@@ -1 +1,2 @@
+ /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/q35/SSDT.dimmpxm",
+-- 
+2.27.0
+
--- a/hw-arm-smmu-common-Allow-domain-invalidation-for-NH_.patch
+++ b/hw-arm-smmu-common-Allow-domain-invalidation-for-NH_.patch
@ -0,0 +1,33 @@
+From 876d18c962f0ead31d8458cd7ac19178be78455c Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Fri, 19 Mar 2021 12:22:48 -0400
+Subject: [PATCH] hw/arm/smmu-common: Allow domain invalidation for
+ NH_ALL/NSNH_ALL
+
+NH_ALL/NSNH_ALL corresponds to a domain granularity invalidation,
+ie. all the notifier range gets invalidation, whatever the ASID.
+So let's set the granularity to IOMMU_INV_GRAN_DOMAIN to allow
+the consumer to benefit from the info if it can.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Suggested-by: chenxiang (M) <chenxiang66@hisilicon.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/arm/smmu-common.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
+index 3a1ecf81d6..2ec4222c93 100644
+--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
+@@ -477,6 +477,7 @@ static void smmu_unmap_notifier_range(IOMMUNotifier *n)
+     event.entry.iova = n->start;
+     event.entry.perm = IOMMU_NONE;
+     event.entry.addr_mask = n->end - n->start;
+    event.entry.granularity = IOMMU_INV_GRAN_DOMAIN;
+ 
+     memory_region_notify_iommu_one(n, &event);
+ }
+-- 
+2.27.0
+
--- a/hw-arm-smmuv3-Advertise-MSI_TRANSLATE-attribute.patch
+++ b/hw-arm-smmuv3-Advertise-MSI_TRANSLATE-attribute.patch
@ -0,0 +1,32 @@
+From 5a759ab19d508361053e388694546216705d173b Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Tue, 28 Aug 2018 09:21:53 -0400
+Subject: [PATCH] hw/arm/smmuv3: Advertise MSI_TRANSLATE attribute
+
+The SMMUv3 has the peculiarity to translate MSI
+transactionss. let's advertise the corresponding
+attribute.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/arm/smmuv3.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
+index 9b87d16217..12f354a0d5 100644
+--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
+@@ -1596,6 +1596,9 @@ static int smmuv3_get_attr(IOMMUMemoryRegion *iommu,
+     if (attr == IOMMU_ATTR_VFIO_NESTED) {
+         *(bool *) data = true;
+         return 0;
+    } else if (attr == IOMMU_ATTR_MSI_TRANSLATE) {
+        *(bool *) data = true;
+        return 0;
+     }
+     return -EINVAL;
+ }
+-- 
+2.27.0
+
--- a/hw-arm-smmuv3-Allow-MAP-notifiers.patch
+++ b/hw-arm-smmuv3-Allow-MAP-notifiers.patch
@ -0,0 +1,37 @@
+From dc126664134989975ce9ab9e7d5d2c8916628bf6 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Wed, 18 Mar 2020 11:17:36 +0100
+Subject: [PATCH] hw/arm/smmuv3: Allow MAP notifiers
+
+We now have all bricks to support nested paging. This
+uses MAP notifiers to map the MSIs. So let's allow MAP
+notifiers to be registered.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/arm/smmuv3.c | 8 --------
+ 1 file changed, 8 deletions(-)
+
+diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
+index 9aeb420428..45f21c53fe 100644
+--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
+@@ -1628,14 +1628,6 @@ static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
+         return -EINVAL;
+     }
+ 
+-    if (new & IOMMU_NOTIFIER_MAP) {
+-        error_setg(errp,
+-                   "device %02x.%02x.%x requires iommu MAP notifier which is "
+-                   "not currently supported", pci_bus_num(sdev->bus),
+-                   PCI_SLOT(sdev->devfn), PCI_FUNC(sdev->devfn));
+-        return -EINVAL;
+-    }
+-
+     if (old == IOMMU_NOTIFIER_NONE) {
+         trace_smmuv3_notify_flag_add(iommu->parent_obj.name);
+         QLIST_INSERT_HEAD(&s->devices_with_notifiers, sdev, next);
+-- 
+2.27.0
+
--- a/hw-arm-smmuv3-Fill-the-IOTLBEntry-arch_id-on-NH_VA-i.patch
+++ b/hw-arm-smmuv3-Fill-the-IOTLBEntry-arch_id-on-NH_VA-i.patch
@ -0,0 +1,34 @@
+From dcda615b3d9b1acffee3d31d57974cc9e4bd0dee Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Tue, 4 Sep 2018 08:48:33 -0400
+Subject: [PATCH] hw/arm/smmuv3: Fill the IOTLBEntry arch_id on NH_VA
+ invalidation
+
+When the guest invalidates one S1 entry, it passes the asid.
+When propagating this invalidation downto the host, the asid
+information also must be passed. So let's fill the arch_id field
+introduced for that purpose and accordingly set the flags to
+indicate its presence.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/arm/smmuv3.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
+index 3416f6a639..696c588f08 100644
+--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
+@@ -833,6 +833,8 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
+     event.entry.iova = iova;
+     event.entry.addr_mask = num_pages * (1 << granule) - 1;
+     event.entry.perm = IOMMU_NONE;
+    event.entry.flags = IOMMU_INV_FLAGS_ARCHID;
+    event.entry.arch_id = asid;
+ 
+     memory_region_notify_iommu_one(n, &event);
+ }
+-- 
+2.27.0
+
--- a/hw-arm-smmuv3-Fill-the-IOTLBEntry-leaf-field-on-NH_V.patch
+++ b/hw-arm-smmuv3-Fill-the-IOTLBEntry-leaf-field-on-NH_V.patch
@ -0,0 +1,77 @@
+From c219274b7b6a472d7340a4f72a052ba33ed19659 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Thu, 14 Mar 2019 09:55:13 -0400
+Subject: [PATCH] hw/arm/smmuv3: Fill the IOTLBEntry leaf field on NH_VA
+ invalidation
+
+Let's propagate the leaf attribute throughout the invalidation path.
+This hint is used to reduce the scope of the invalidations to the
+last level of translation. Not enforcing it induces large performance
+penalties in nested mode.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/arm/smmuv3.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
+index 696c588f08..ad816e850c 100644
+--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
+@@ -800,7 +800,7 @@ epilogue:
+ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
+                                IOMMUNotifier *n,
+                                int asid, dma_addr_t iova,
+-                               uint8_t tg, uint64_t num_pages)
+                               uint8_t tg, uint64_t num_pages, bool leaf)
+ {
+     SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu);
+     IOMMUTLBEvent event = {};
+@@ -835,6 +835,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
+     event.entry.perm = IOMMU_NONE;
+     event.entry.flags = IOMMU_INV_FLAGS_ARCHID;
+     event.entry.arch_id = asid;
+    event.entry.leaf = leaf;
+ 
+     memory_region_notify_iommu_one(n, &event);
+ }
+@@ -866,7 +867,7 @@ static void smmuv3_notify_asid(IOMMUMemoryRegion *mr,
+ 
+ /* invalidate an asid/iova range tuple in all mr's */
+ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova,
+-                                      uint8_t tg, uint64_t num_pages)
+                                      uint8_t tg, uint64_t num_pages, bool leaf)
+ {
+     SMMUDevice *sdev;
+ 
+@@ -878,7 +879,7 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova,
+                                         tg, num_pages);
+ 
+         IOMMU_NOTIFIER_FOREACH(n, mr) {
+-            smmuv3_notify_iova(mr, n, asid, iova, tg, num_pages);
+            smmuv3_notify_iova(mr, n, asid, iova, tg, num_pages, leaf);
+         }
+     }
+ }
+@@ -903,7 +904,7 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
+ 
+     if (!tg) {
+         trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, 1, ttl, leaf);
+-        smmuv3_inv_notifiers_iova(s, asid, addr, tg, 1);
+        smmuv3_inv_notifiers_iova(s, asid, addr, tg, 1, leaf);
+         smmu_iotlb_inv_iova(s, asid, addr, tg, 1, ttl);
+         return;
+     }
+@@ -921,7 +922,7 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
+ 
+         num_pages = (mask + 1) >> granule;
+         trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf);
+-        smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages);
+        smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages, leaf);
+         smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl);
+         addr += mask + 1;
+     }
+-- 
+2.27.0
+
--- a/hw-arm-smmuv3-Implement-fault-injection.patch
+++ b/hw-arm-smmuv3-Implement-fault-injection.patch
@ -0,0 +1,107 @@
+From d31c754470b4b651d0e19c66738fbcc8fc6abf3c Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Thu, 13 Sep 2018 14:24:45 +0200
+Subject: [PATCH] hw/arm/smmuv3: Implement fault injection
+
+We convert iommu_fault structs received from the kernel
+into the data struct used by the emulation code and record
+the evnts into the virtual event queue.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/arm/smmuv3.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 71 insertions(+)
+
+diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
+index 58139f707d..9aeb420428 100644
+--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
+@@ -1660,6 +1660,76 @@ static int smmuv3_get_attr(IOMMUMemoryRegion *iommu,
+     return -EINVAL;
+ }
+ 
+struct iommu_fault;
+
+static inline int
+smmuv3_inject_faults(IOMMUMemoryRegion *iommu_mr, int count,
+                     struct iommu_fault *buf)
+{
+#ifdef __linux__
+    SMMUDevice *sdev = container_of(iommu_mr, SMMUDevice, iommu);
+    SMMUv3State *s3 = sdev->smmu;
+    uint32_t sid = smmu_get_sid(sdev);
+    int i;
+
+    for (i = 0; i < count; i++) {
+        SMMUEventInfo info = {};
+        struct iommu_fault_unrecoverable *record;
+
+        if (buf[i].type != IOMMU_FAULT_DMA_UNRECOV) {
+            continue;
+        }
+
+        info.sid = sid;
+        record = &buf[i].event;
+
+        switch (record->reason) {
+        case IOMMU_FAULT_REASON_PASID_INVALID:
+            info.type = SMMU_EVT_C_BAD_SUBSTREAMID;
+            /* TODO further fill info.u.c_bad_substream */
+            break;
+        case IOMMU_FAULT_REASON_PASID_FETCH:
+            info.type = SMMU_EVT_F_CD_FETCH;
+            break;
+        case IOMMU_FAULT_REASON_BAD_PASID_ENTRY:
+            info.type = SMMU_EVT_C_BAD_CD;
+            /* TODO further fill info.u.c_bad_cd */
+            break;
+        case IOMMU_FAULT_REASON_WALK_EABT:
+            info.type = SMMU_EVT_F_WALK_EABT;
+            info.u.f_walk_eabt.addr = record->addr;
+            info.u.f_walk_eabt.addr2 = record->fetch_addr;
+            break;
+        case IOMMU_FAULT_REASON_PTE_FETCH:
+            info.type = SMMU_EVT_F_TRANSLATION;
+            info.u.f_translation.addr = record->addr;
+            break;
+        case IOMMU_FAULT_REASON_OOR_ADDRESS:
+            info.type = SMMU_EVT_F_ADDR_SIZE;
+            info.u.f_addr_size.addr = record->addr;
+            break;
+        case IOMMU_FAULT_REASON_ACCESS:
+            info.type = SMMU_EVT_F_ACCESS;
+            info.u.f_access.addr = record->addr;
+            break;
+        case IOMMU_FAULT_REASON_PERMISSION:
+            info.type = SMMU_EVT_F_PERMISSION;
+            info.u.f_permission.addr = record->addr;
+            break;
+        default:
+            warn_report("%s Unexpected fault reason received from host: %d",
+                        __func__, record->reason);
+            continue;
+        }
+
+        smmuv3_record_event(s3, &info);
+    }
+    return 0;
+#else
+    return -1;
+#endif
+}
+
+ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass,
+                                                   void *data)
+ {
+@@ -1668,6 +1738,7 @@ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass,
+     imrc->translate = smmuv3_translate;
+     imrc->notify_flag_changed = smmuv3_notify_flag_changed;
+     imrc->get_attr = smmuv3_get_attr;
+    imrc->inject_faults = smmuv3_inject_faults;
+ }
+ 
+ static const TypeInfo smmuv3_type_info = {
+-- 
+2.27.0
+
--- a/hw-arm-smmuv3-Improve-stage1-ASID-invalidation.patch
+++ b/hw-arm-smmuv3-Improve-stage1-ASID-invalidation.patch
@ -0,0 +1,107 @@
+From de53feaa37a267a21ed30a642e1e64c5fcfbc4a4 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Sun, 14 Feb 2021 12:30:57 -0500
+Subject: [PATCH] hw/arm/smmuv3: Improve stage1 ASID invalidation
+
+At the moment ASID invalidation command (CMD_TLBI_NH_ASID) is
+propagated as a domain invalidation (the whole notifier range
+is invalidated independently on any ASID information).
+
+The new granularity field now allows to be more precise and
+restrict the invalidation to a peculiar ASID. Set the corresponding
+fields and flag.
+
+We still keep the iova and addr_mask settings for consumers that
+do not support the new fields, like VHOST.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/arm/smmuv3.c     | 44 ++++++++++++++++++++++++++++++++++++++++++--
+ hw/arm/trace-events |  1 +
+ 2 files changed, 43 insertions(+), 2 deletions(-)
+
+diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
+index 94e2c658f8..da5dac1ba5 100644
+--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
+@@ -836,6 +836,31 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
+     memory_region_notify_iommu_one(n, &event);
+ }
+ 
+/**
+ * smmuv3_notify_asid - call the notifier @n for a given asid
+ *
+ * @mr: IOMMU mr region handle
+ * @n: notifier to be called
+ * @asid: address space ID or negative value if we don't care
+ */
+static void smmuv3_notify_asid(IOMMUMemoryRegion *mr,
+                               IOMMUNotifier *n, int asid)
+{
+    IOMMUTLBEvent event = {};
+
+    event.type = IOMMU_NOTIFIER_UNMAP;
+    event.entry.target_as = &address_space_memory;
+    event.entry.perm = IOMMU_NONE;
+    event.entry.granularity = IOMMU_INV_GRAN_PASID;
+    event.entry.flags = IOMMU_INV_FLAGS_ARCHID;
+    event.entry.arch_id = asid;
+    event.entry.iova = n->start;
+    event.entry.addr_mask = n->end - n->start;
+
+    memory_region_notify_iommu_one(n, &event);
+}
+
+
+ /* invalidate an asid/iova range tuple in all mr's */
+ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova,
+                                       uint8_t tg, uint64_t num_pages)
+@@ -913,6 +938,22 @@ smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data)
+     return true;
+ }
+ 
+static void smmuv3_s1_asid_inval(SMMUState *s, uint16_t asid)
+{
+    SMMUDevice *sdev;
+
+    trace_smmuv3_s1_asid_inval(asid);
+    QLIST_FOREACH(sdev, &s->devices_with_notifiers, next) {
+        IOMMUMemoryRegion *mr = &sdev->iommu;
+        IOMMUNotifier *n;
+
+        IOMMU_NOTIFIER_FOREACH(n, mr) {
+            smmuv3_notify_asid(mr, n, asid);
+        }
+    }
+    smmu_iotlb_inv_asid(s, asid);
+}
+
+ static int smmuv3_cmdq_consume(SMMUv3State *s)
+ {
+     SMMUState *bs = ARM_SMMU(s);
+@@ -1027,8 +1068,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
+             uint16_t asid = CMD_ASID(&cmd);
+ 
+             trace_smmuv3_cmdq_tlbi_nh_asid(asid);
+-            smmu_inv_notifiers_all(&s->smmu_state);
+-            smmu_iotlb_inv_asid(bs, asid);
+            smmuv3_s1_asid_inval(bs, asid);
+             break;
+         }
+         case SMMU_CMD_TLBI_NH_ALL:
+diff --git a/hw/arm/trace-events b/hw/arm/trace-events
+index 2dee296c8f..1447ad5a90 100644
+--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
+@@ -46,6 +46,7 @@ smmuv3_cmdq_cfgi_cd(uint32_t sid) "sid=0x%x"
+ smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache HIT for sid=0x%x (hits=%d, misses=%d, hit rate=%d)"
+ smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid=0x%x (hits=%d, misses=%d, hit rate=%d)"
+ smmuv3_s1_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf) "vmid=%d asid=%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d"
+smmuv3_s1_asid_inval(int asid) "asid=%d"
+ smmuv3_cmdq_tlbi_nh(void) ""
+ smmuv3_cmdq_tlbi_nh_asid(uint16_t asid) "asid=%d"
+ smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x"
+-- 
+2.27.0
+
--- a/hw-arm-smmuv3-Pass-stage-1-configurations-to-the-hos.patch
+++ b/hw-arm-smmuv3-Pass-stage-1-configurations-to-the-hos.patch
@ -0,0 +1,161 @@
+From 2e5929ec2a35a7a227dc7ba70a557a84993a366d Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Thu, 9 Aug 2018 21:04:19 +0200
+Subject: [PATCH] hw/arm/smmuv3: Pass stage 1 configurations to the host
+
+In case PASID PciOps are set for the device we call
+the set_pasid_table() callback on each STE update.
+
+This allows to pass the guest stage 1 configuration
+to the host and apply it at physical level.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/arm/smmu-internal.h |  1 +
+ hw/arm/smmuv3.c        | 71 ++++++++++++++++++++++++++++++++++++------
+ hw/arm/trace-events    |  1 +
+ 3 files changed, 64 insertions(+), 9 deletions(-)
+
+diff --git a/hw/arm/smmu-internal.h b/hw/arm/smmu-internal.h
+index 2d75b31953..5ef8c598c6 100644
+--- a/hw/arm/smmu-internal.h
+++ b/hw/arm/smmu-internal.h
+@@ -105,6 +105,7 @@ typedef struct SMMUIOTLBPageInvInfo {
+ } SMMUIOTLBPageInvInfo;
+ 
+ typedef struct SMMUSIDRange {
+    SMMUState *state;
+     uint32_t start;
+     uint32_t end;
+ } SMMUSIDRange;
+diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
+index ad816e850c..58139f707d 100644
+--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
+@@ -16,6 +16,10 @@
+  * with this program; if not, see <http://www.gnu.org/licenses/>.
+  */
+ 
+#ifdef __linux__
+#include "linux/iommu.h"
+#endif
+
+ #include "qemu/osdep.h"
+ #include "qemu/bitops.h"
+ #include "hw/irq.h"
+@@ -928,6 +932,61 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
+     }
+ }
+ 
+static void smmuv3_notify_config_change(SMMUState *bs, uint32_t sid)
+{
+#ifdef __linux__
+    IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, sid);
+    SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid,
+                           .inval_ste_allowed = true};
+    IOMMUConfig iommu_config = {};
+    SMMUTransCfg *cfg;
+    SMMUDevice *sdev;
+
+    if (!mr) {
+        return;
+    }
+
+    sdev = container_of(mr, SMMUDevice, iommu);
+
+    /* flush QEMU config cache */
+    smmuv3_flush_config(sdev);
+
+    if (!pci_device_is_pasid_ops_set(sdev->bus, sdev->devfn)) {
+        return;
+    }
+
+    cfg = smmuv3_get_config(sdev, &event);
+
+    if (!cfg) {
+        return;
+    }
+
+    iommu_config.pasid_cfg.argsz = sizeof(struct iommu_pasid_table_config);
+    iommu_config.pasid_cfg.version = PASID_TABLE_CFG_VERSION_1;
+    iommu_config.pasid_cfg.format = IOMMU_PASID_FORMAT_SMMUV3;
+    iommu_config.pasid_cfg.base_ptr = cfg->s1ctxptr;
+    iommu_config.pasid_cfg.pasid_bits = 0;
+    iommu_config.pasid_cfg.vendor_data.smmuv3.version = PASID_TABLE_SMMUV3_CFG_VERSION_1;
+
+    if (cfg->disabled || cfg->bypassed) {
+        iommu_config.pasid_cfg.config = IOMMU_PASID_CONFIG_BYPASS;
+    } else if (cfg->aborted) {
+        iommu_config.pasid_cfg.config = IOMMU_PASID_CONFIG_ABORT;
+    } else {
+        iommu_config.pasid_cfg.config = IOMMU_PASID_CONFIG_TRANSLATE;
+    }
+
+    trace_smmuv3_notify_config_change(mr->parent_obj.name,
+                                      iommu_config.pasid_cfg.config,
+                                      iommu_config.pasid_cfg.base_ptr);
+
+    if (pci_device_set_pasid_table(sdev->bus, sdev->devfn, &iommu_config)) {
+        error_report("Failed to pass PASID table to host for iommu mr %s (%m)",
+                     mr->parent_obj.name);
+    }
+#endif
+}
+
+ static gboolean
+ smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data)
+ {
+@@ -938,6 +997,7 @@ smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data)
+     if (sid < sid_range->start || sid > sid_range->end) {
+         return false;
+     }
+    smmuv3_notify_config_change(sid_range->state, sid);
+     trace_smmuv3_config_cache_inv(sid);
+     return true;
+ }
+@@ -1008,22 +1068,14 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
+         case SMMU_CMD_CFGI_STE:
+         {
+             uint32_t sid = CMD_SID(&cmd);
+-            IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, sid);
+-            SMMUDevice *sdev;
+ 
+             if (CMD_SSEC(&cmd)) {
+                 cmd_error = SMMU_CERROR_ILL;
+                 break;
+             }
+ 
+-            if (!mr) {
+-                break;
+-            }
+-
+             trace_smmuv3_cmdq_cfgi_ste(sid);
+-            sdev = container_of(mr, SMMUDevice, iommu);
+-            smmuv3_flush_config(sdev);
+-
+            smmuv3_notify_config_change(bs, sid);
+             break;
+         }
+         case SMMU_CMD_CFGI_STE_RANGE: /* same as SMMU_CMD_CFGI_ALL */
+@@ -1038,6 +1090,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
+             }
+ 
+             mask = (1ULL << (range + 1)) - 1;
+            sid_range.state = bs;
+             sid_range.start = sid & ~mask;
+             sid_range.end = sid_range.start + mask;
+ 
+diff --git a/hw/arm/trace-events b/hw/arm/trace-events
+index 1447ad5a90..d9851d663e 100644
+--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
+@@ -53,4 +53,5 @@ smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x"
+ smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s"
+ smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s"
+ smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64
+smmuv3_notify_config_change(const char *name, uint8_t config, uint64_t s1ctxptr) "iommu mr=%s config=%d s1ctxptr=0x%"PRIx64
+ 
+-- 
+2.27.0
+
--- a/hw-arm-smmuv3-Post-load-stage-1-configurations-to-th.patch
+++ b/hw-arm-smmuv3-Post-load-stage-1-configurations-to-th.patch
@ -0,0 +1,110 @@
+From 1b95c995f032c21bf6607dda8ede0f5856bb190a Mon Sep 17 00:00:00 2001
+From: Kunkun Jiang <jiangkunkun@huawei.com>
+Date: Tue, 11 May 2021 10:08:16 +0800
+Subject: [PATCH] hw/arm/smmuv3: Post-load stage 1 configurations to the host
+
+In nested mode, we call the set_pasid_table() callback on each
+STE update to pass the guest stage 1 configuration to the host
+and apply it at physical level.
+
+In the case of live migration, we need to manually call the
+set_pasid_table() to load the guest stage 1 configurations to
+the host. If this operation fails, the migration fails.
+
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/arm/smmuv3.c | 33 ++++++++++++++++++++++++++++-----
+ 1 file changed, 28 insertions(+), 5 deletions(-)
+
+diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
+index 45f21c53fe..291e3a12e8 100644
+--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
+@@ -932,7 +932,7 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
+     }
+ }
+ 
+-static void smmuv3_notify_config_change(SMMUState *bs, uint32_t sid)
+static int smmuv3_notify_config_change(SMMUState *bs, uint32_t sid)
+ {
+ #ifdef __linux__
+     IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, sid);
+@@ -941,9 +941,10 @@ static void smmuv3_notify_config_change(SMMUState *bs, uint32_t sid)
+     IOMMUConfig iommu_config = {};
+     SMMUTransCfg *cfg;
+     SMMUDevice *sdev;
+    int ret;
+ 
+     if (!mr) {
+-        return;
+        return 0;
+     }
+ 
+     sdev = container_of(mr, SMMUDevice, iommu);
+@@ -952,13 +953,13 @@ static void smmuv3_notify_config_change(SMMUState *bs, uint32_t sid)
+     smmuv3_flush_config(sdev);
+ 
+     if (!pci_device_is_pasid_ops_set(sdev->bus, sdev->devfn)) {
+-        return;
+        return 0;
+     }
+ 
+     cfg = smmuv3_get_config(sdev, &event);
+ 
+     if (!cfg) {
+-        return;
+        return 0;
+     }
+ 
+     iommu_config.pasid_cfg.argsz = sizeof(struct iommu_pasid_table_config);
+@@ -980,10 +981,13 @@ static void smmuv3_notify_config_change(SMMUState *bs, uint32_t sid)
+                                       iommu_config.pasid_cfg.config,
+                                       iommu_config.pasid_cfg.base_ptr);
+ 
+-    if (pci_device_set_pasid_table(sdev->bus, sdev->devfn, &iommu_config)) {
+    ret = pci_device_set_pasid_table(sdev->bus, sdev->devfn, &iommu_config);
+    if (ret) {
+         error_report("Failed to pass PASID table to host for iommu mr %s (%m)",
+                      mr->parent_obj.name);
+     }
+
+    return ret;
+ #endif
+ }
+ 
+@@ -1553,6 +1557,24 @@ static void smmu_realize(DeviceState *d, Error **errp)
+     smmu_init_irq(s, dev);
+ }
+ 
+static int smmuv3_post_load(void *opaque, int version_id)
+{
+    SMMUv3State *s3 = opaque;
+    SMMUState *s = &(s3->smmu_state);
+    SMMUDevice *sdev;
+    int ret = 0;
+
+    QLIST_FOREACH(sdev, &s->devices_with_notifiers, next) {
+        uint32_t sid = smmu_get_sid(sdev);
+        ret = smmuv3_notify_config_change(s, sid);
+        if (ret) {
+            break;
+        }
+    }
+
+    return ret;
+}
+
+ static const VMStateDescription vmstate_smmuv3_queue = {
+     .name = "smmuv3_queue",
+     .version_id = 1,
+@@ -1571,6 +1593,7 @@ static const VMStateDescription vmstate_smmuv3 = {
+     .version_id = 1,
+     .minimum_version_id = 1,
+     .priority = MIG_PRI_IOMMU,
+    .post_load = smmuv3_post_load,
+     .fields = (VMStateField[]) {
+         VMSTATE_UINT32(features, SMMUv3State),
+         VMSTATE_UINT8(sid_size, SMMUv3State),
+-- 
+2.27.0
+
--- a/hw-arm-smmuv3-Store-the-PASID-table-GPA-in-the-trans.patch
+++ b/hw-arm-smmuv3-Store-the-PASID-table-GPA-in-the-trans.patch
@ -0,0 +1,45 @@
+From f937ce4124d57eea27d516957a2efa0e7fbdf198 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Thu, 9 Aug 2018 20:56:44 +0200
+Subject: [PATCH] hw/arm/smmuv3: Store the PASID table GPA in the translation
+ config
+
+For VFIO integration we will need to pass the Context Descriptor (CD)
+table GPA to the host. The CD table is also referred to as the PASID
+table. Its GPA corresponds to the s1ctrptr field of the Stream Table
+Entry. So let's decode and store it in the configuration structure.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/arm/smmuv3.c              | 1 +
+ include/hw/arm/smmu-common.h | 1 +
+ 2 files changed, 2 insertions(+)
+
+diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
+index 12f354a0d5..3416f6a639 100644
+--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
+@@ -358,6 +358,7 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
+                       "SMMUv3 S1 stalling fault model not allowed yet\n");
+         goto bad_ste;
+     }
+    cfg->s1ctxptr = STE_CTXPTR(ste);
+     return 0;
+ 
+ bad_ste:
+diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
+index 706be3c6d0..d578339935 100644
+--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
+@@ -76,6 +76,7 @@ typedef struct SMMUTransCfg {
+     uint8_t tbi;               /* Top Byte Ignore */
+     uint16_t asid;
+     SMMUTransTableInfo tt[2];
+    dma_addr_t s1ctxptr;
+     uint32_t iotlb_hits;       /* counts IOTLB hits for this asid */
+     uint32_t iotlb_misses;     /* counts IOTLB misses for this asid */
+ } SMMUTransCfg;
+-- 
+2.27.0
+
--- a/ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch
+++ b/ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch
@ -0,0 +1,41 @@
+From 9169beed83ea77059a7240aae5621dcfb3178cba Mon Sep 17 00:00:00 2001
+From: Prasad J Pandit <address@hidden>
+Date: Mon, 21 Jun 2021 09:22:35 +0800
+Subject: [PATCH] ide: ahci: add check to avoid null dereference
+ (CVE-2019-12067)
+
+Fix CVE-2019-12067
+
+AHCI emulator while committing DMA buffer in ahci_commit_buf()
+may do a NULL dereference if the command header 'ad->cur_cmd'
+is null. Add check to avoid it.
+
+Reported-by: Bugs SysSec <address@hidden>
+Signed-off-by: Prasad J Pandit <address@hidden>
+
+Signed-off-by: Jiajie Li <lijiajie11@huawei.com>
+Signed-off-by: Yan Wang <wangyan122@huawei.com>
+---
+ hw/ide/ahci.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
+index a94c6e26fb..256b58026a 100644
+--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
+@@ -1459,8 +1459,10 @@ static void ahci_commit_buf(const IDEDMA *dma, uint32_t tx_bytes)
+ {
+     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
+ 
+-    tx_bytes += le32_to_cpu(ad->cur_cmd->status);
+-    ad->cur_cmd->status = cpu_to_le32(tx_bytes);
+    if (ad->cur_cmd) {
+        tx_bytes += le32_to_cpu(ad->cur_cmd->status);
+        ad->cur_cmd->status = cpu_to_le32(tx_bytes);
+    }
+ }
+ 
+ static int ahci_dma_rw_buf(const IDEDMA *dma, bool is_write)
+-- 
+2.27.0
+
--- a/iommu-Introduce-generic-header.patch
+++ b/iommu-Introduce-generic-header.patch
@ -0,0 +1,53 @@
+From 5e312f7b41ec48dc7dc9805af9f52aa8ed393bf9 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Tue, 9 Jul 2019 12:20:12 +0200
+Subject: [PATCH] iommu: Introduce generic header
+
+This header is meant to exposes data types used by
+several IOMMU devices such as struct for SVA and
+nested stage configuration.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ include/hw/iommu/iommu.h | 28 ++++++++++++++++++++++++++++
+ 1 file changed, 28 insertions(+)
+ create mode 100644 include/hw/iommu/iommu.h
+
+diff --git a/include/hw/iommu/iommu.h b/include/hw/iommu/iommu.h
+new file mode 100644
+index 0000000000..12092bda7b
+--- /dev/null
+++ b/include/hw/iommu/iommu.h
+@@ -0,0 +1,28 @@
+/*
+ * common header for iommu devices
+ *
+ * Copyright Red Hat, Inc. 2019
+ *
+ * Authors:
+ *  Eric Auger <eric.auger@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_HW_IOMMU_IOMMU_H
+#define QEMU_HW_IOMMU_IOMMU_H
+#ifdef __linux__
+#include <linux/iommu.h>
+#endif
+
+typedef struct IOMMUConfig {
+    union {
+#ifdef __linux__
+        struct iommu_pasid_table_config pasid_cfg;
+#endif
+          };
+} IOMMUConfig;
+
+
+#endif /* QEMU_HW_IOMMU_IOMMU_H */
+-- 
+2.27.0
+
--- a/linux-headers-update-against-5.10-and-manual-clear-v.patch
+++ b/linux-headers-update-against-5.10-and-manual-clear-v.patch
@ -0,0 +1,82 @@
+From 40512773625a4f8ddd96a5af924f119b89a14706 Mon Sep 17 00:00:00 2001
+From: Zenghui Yu <yuzenghui@huawei.com>
+Date: Sat, 8 May 2021 17:31:03 +0800
+Subject: [PATCH] linux-headers: update against 5.10 and manual clear vfio
+ dirty log series
+
+The new capability VFIO_DIRTY_LOG_MANUAL_CLEAR and the new ioctl
+VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR and
+VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP have been introduced in
+the kernel, update the header to add them.
+
+Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ linux-headers/linux/vfio.h | 36 +++++++++++++++++++++++++++++++++++-
+ 1 file changed, 35 insertions(+), 1 deletion(-)
+
+diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
+index e680594f27..f4ff038e8c 100644
+--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
+@@ -52,6 +52,16 @@
+ /* Supports the vaddr flag for DMA map and unmap */
+ #define VFIO_UPDATE_VADDR		10
+ 
+/*
+ * The vfio_iommu driver may support user clears dirty log manually, which means
+ * dirty log can be requested to not cleared automatically after dirty log is
+ * copied to userspace, it's user's duty to clear dirty log.
+ *
+ * Note: please refer to VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR and
+ * VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP.
+ */
+#define VFIO_DIRTY_LOG_MANUAL_CLEAR	11
+
+ /*
+  * The IOCTL interface is designed for extensibility by embedding the
+  * structure length (argsz) and flags into structures passed between
+@@ -1196,8 +1206,30 @@ struct vfio_iommu_type1_dma_unmap {
+  * actual bitmap. If dirty pages logging is not enabled, an error will be
+  * returned.
+  *
+- * Only one of the flags _START, _STOP and _GET may be specified at a time.
+ * The VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR flag is almost same as
+ * VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP, except that it requires underlying
+ * dirty bitmap is not cleared automatically. The user can clear it manually by
+ * calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP flag set.
+  *
+ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP flag set,
+ * instructs the IOMMU driver to clear the dirty status of pages in a bitmap
+ * for IOMMU container for a given IOVA range. The user must specify the IOVA
+ * range, the bitmap and the pgsize through the structure
+ * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface
+ * supports clearing a bitmap of the smallest supported pgsize only and can be
+ * modified in future to clear a bitmap of any specified supported pgsize. The
+ * user must provide a memory area for the bitmap memory and specify its size
+ * in bitmap.size. One bit is used to represent one page consecutively starting
+ * from iova offset. The user should provide page size in bitmap.pgsize field.
+ * A bit set in the bitmap indicates that the page at that offset from iova is
+ * cleared the dirty status, and dirty tracking is re-enabled for that page. The
+ * caller must set argsz to a value including the size of structure
+ * vfio_iommu_dirty_bitmap_get, but excluing the size of the actual bitmap. If
+ * dirty pages logging is not enabled, an error will be returned. Note: user
+ * should clear dirty log before handle corresponding dirty pages.
+ *
+ * Only one of the flags _START, _STOP, _GET, _GET_NOCLEAR_, and _CLEAR may be
+ * specified at a time.
+  */
+ struct vfio_iommu_type1_dirty_bitmap {
+ 	__u32        argsz;
+@@ -1205,6 +1237,8 @@ struct vfio_iommu_type1_dirty_bitmap {
+ #define VFIO_IOMMU_DIRTY_PAGES_FLAG_START	(1 << 0)
+ #define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP	(1 << 1)
+ #define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP	(1 << 2)
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR	(1 << 3)
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP	(1 << 4)
+ 	__u8         data[];
+ };
+ 
+-- 
+2.27.0
+
--- a/log-Add-some-logs-on-VM-runtime-path.patch
+++ b/log-Add-some-logs-on-VM-runtime-path.patch
@ -0,0 +1,170 @@
+From d0ed3afacd2af1cbfcfb615471ade3c8c4185c00 Mon Sep 17 00:00:00 2001
+From: Yan Wang <wangyan122@huawei.com>
+Date: Tue, 8 Feb 2022 15:48:01 +0800
+Subject: [PATCH] log: Add some logs on VM runtime path
+
+Add logs on VM runtime path, to make it easier to do trouble shooting.
+
+Signed-off-by: Ying Fang <fangying1@huawei.com>
+Signed-off-by: Yan Wang <wangyan122@huawei.com>
+---
+ hw/virtio/virtio-pci.c |  2 ++
+ hw/virtio/virtio.c     | 14 ++++++++++++--
+ monitor/monitor.c      |  9 +++++++++
+ qapi/qmp-dispatch.c    | 15 +++++++++++++++
+ softmmu/qdev-monitor.c |  4 +++-
+ 5 files changed, 41 insertions(+), 3 deletions(-)
+
+diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
+index 750aa47ec1..38a5dc1ba8 100644
+--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
+@@ -1772,7 +1772,9 @@ static void virtio_pci_device_unplugged(DeviceState *d)
+     VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+     bool modern = virtio_pci_modern(proxy);
+     bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+ 
+    qemu_log("unplug device name: %s\n", !vdev ? "NULL" : vdev->name);
+     virtio_pci_stop_ioeventfd(proxy);
+ 
+     if (modern) {
+diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
+index ea7c079fb0..9b4ac58a16 100644
+--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
+@@ -1945,7 +1945,14 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
+         k->set_status(vdev, val);
+     }
+     vdev->status = val;
+-
+    if (val) {
+        qemu_log("%s device status is %d that means %s\n",
+                 vdev->name, val,
+                 (val & VIRTIO_CONFIG_S_DRIVER_OK) ? "DRIVER OK" :
+                 (val & VIRTIO_CONFIG_S_DRIVER) ? "DRIVER" :
+                 (val & VIRTIO_CONFIG_S_ACKNOWLEDGE) ? "ACKNOWLEDGE" :
+                 (val & VIRTIO_CONFIG_S_FAILED) ? "FAILED" : "UNKNOWN");
+    }
+     return 0;
+ }
+ 
+@@ -2389,8 +2396,11 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
+             break;
+     }
+ 
+-    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
+    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) {
+        qemu_log("unacceptable queue_size (%d) or num (%d)\n",
+                 queue_size, i);
+         abort();
+    }
+ 
+     vdev->vq[i].vring.num = queue_size;
+     vdev->vq[i].vring.num_default = queue_size;
+diff --git a/monitor/monitor.c b/monitor/monitor.c
+index 21c7a68758..013c628695 100644
+--- a/monitor/monitor.c
+++ b/monitor/monitor.c
+@@ -29,6 +29,7 @@
+ #include "qapi/qapi-emit-events.h"
+ #include "qapi/qapi-visit-control.h"
+ #include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qjson.h"
+ #include "qemu/error-report.h"
+ #include "qemu/option.h"
+ #include "sysemu/qtest.h"
+@@ -318,6 +319,7 @@ static void monitor_qapi_event_emit(QAPIEvent event, QDict *qdict)
+ {
+     Monitor *mon;
+     MonitorQMP *qmp_mon;
+    GString *json;
+ 
+     trace_monitor_protocol_event_emit(event, qdict);
+     QTAILQ_FOREACH(mon, &mon_list, entry) {
+@@ -328,6 +330,13 @@ static void monitor_qapi_event_emit(QAPIEvent event, QDict *qdict)
+         qmp_mon = container_of(mon, MonitorQMP, common);
+         if (qmp_mon->commands != &qmp_cap_negotiation_commands) {
+             qmp_send_response(qmp_mon, qdict);
+            json = qobject_to_json(QOBJECT(qdict));
+            if (json) {
+                if (!strstr(json->str, "RTC_CHANGE")) {
+                    qemu_log("%s\n", json->str);
+                }
+                g_string_free(json, true);
+            }
+         }
+     }
+ }
+diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
+index d378bccac7..bb005594d3 100644
+--- a/qapi/qmp-dispatch.c
+++ b/qapi/qmp-dispatch.c
+@@ -25,6 +25,7 @@
+ #include "qapi/qmp/qbool.h"
+ #include "qemu/coroutine.h"
+ #include "qemu/main-loop.h"
+#include "qemu/log.h"
+ 
+ Visitor *qobject_input_visitor_new_qmp(QObject *obj)
+ {
+@@ -147,6 +148,7 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
+     QObject *id;
+     QObject *ret = NULL;
+     QDict *rsp = NULL;
+    GString *json;
+ 
+     dict = qobject_to(QDict, request);
+     if (!dict) {
+@@ -204,6 +206,19 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
+         qobject_ref(args);
+     }
+ 
+    json = qobject_to_json(QOBJECT(args));
+    if (json) {
+        if ((strcmp(command, "query-block-jobs") != 0)
+            && (strcmp(command, "query-migrate") != 0)
+            && (strcmp(command, "query-blockstats") != 0)
+            && (strcmp(command, "query-balloon") != 0)
+            && (strcmp(command, "set_password") != 0)) {
+                qemu_log("qmp_cmd_name: %s, arguments: %s\n",
+                         command, json->str);
+        }
+        g_string_free(json, true);
+    }
+
+     assert(!(oob && qemu_in_coroutine()));
+     assert(monitor_cur() == NULL);
+     if (!!(cmd->options & QCO_COROUTINE) == qemu_in_coroutine()) {
+diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
+index 01f3834db5..dfd6429bf3 100644
+--- a/softmmu/qdev-monitor.c
+++ b/softmmu/qdev-monitor.c
+@@ -36,6 +36,7 @@
+ #include "qemu/option.h"
+ #include "qemu/qemu-print.h"
+ #include "qemu/option_int.h"
+#include "qemu/log.h"
+ #include "sysemu/block-backend.h"
+ #include "migration/misc.h"
+ #include "migration/migration.h"
+@@ -635,6 +636,7 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts,
+     if (path != NULL) {
+         bus = qbus_find(path, errp);
+         if (!bus) {
+            error_setg(errp, "can not find bus for %s", driver);
+             return NULL;
+         }
+         if (!object_dynamic_cast(OBJECT(bus), dc->bus_type)) {
+@@ -707,7 +709,7 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts,
+     if (*errp) {
+         goto err_del_dev;
+     }
+-
+    qemu_log("add qdev %s:%s success\n", driver, dev->id ? dev->id : "none");
+     if (!qdev_realize(DEVICE(dev), bus, errp)) {
+         goto err_del_dev;
+     }
+-- 
+2.27.0
+
--- a/memory-Add-IOMMU_ATTR_MSI_TRANSLATE-IOMMU-memory-reg.patch
+++ b/memory-Add-IOMMU_ATTR_MSI_TRANSLATE-IOMMU-memory-reg.patch
@ -0,0 +1,32 @@
+From 062923fd4e6d11e1b724f2dd059f8b0c6e65bf7a Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Mon, 25 Mar 2019 16:35:05 +0100
+Subject: [PATCH] memory: Add IOMMU_ATTR_MSI_TRANSLATE IOMMU memory region
+ attribute
+
+We introduce a new IOMMU Memory Region attribute, IOMMU_ATTR_MSI_TRANSLATE
+which tells whether the virtual IOMMU translates MSIs. ARM SMMU
+will expose this attribute since, as opposed to Intel DMAR, MSIs
+are translated as any other DMA requests.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ include/exec/memory.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/include/exec/memory.h b/include/exec/memory.h
+index 864bcaeb01..76ef99ed27 100644
+--- a/include/exec/memory.h
+++ b/include/exec/memory.h
+@@ -323,6 +323,7 @@ typedef struct MemoryRegionClass {
+ enum IOMMUMemoryRegionAttr {
+     IOMMU_ATTR_SPAPR_TCE_FD,
+     IOMMU_ATTR_VFIO_NESTED,
+    IOMMU_ATTR_MSI_TRANSLATE,
+ };
+ 
+ /*
+-- 
+2.27.0
+
--- a/memory-Add-IOMMU_ATTR_VFIO_NESTED-IOMMU-memory-regio.patch
+++ b/memory-Add-IOMMU_ATTR_VFIO_NESTED-IOMMU-memory-regio.patch
@ -0,0 +1,72 @@
+From b380e3e0c30fb68dbbfb1397f3c374adfff77ac4 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Mon, 1 Jul 2019 11:30:30 +0200
+Subject: [PATCH] memory: Add IOMMU_ATTR_VFIO_NESTED IOMMU memory region
+ attribute
+
+We introduce a new IOMMU Memory Region attribute,
+IOMMU_ATTR_VFIO_NESTED that tells whether the virtual IOMMU
+requires HW nested paging for VFIO integration.
+
+Current Intel virtual IOMMU device supports "Caching
+Mode" and does not require 2 stages at physical level to be
+integrated with VFIO. However SMMUv3 does not implement such
+"caching mode" and requires to use HW nested paging.
+
+As such SMMUv3 is the first IOMMU device to advertise this
+attribute.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/arm/smmuv3.c       | 12 ++++++++++++
+ include/exec/memory.h |  3 ++-
+ 2 files changed, 14 insertions(+), 1 deletion(-)
+
+diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
+index da5dac1ba5..9b87d16217 100644
+--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
+@@ -1589,6 +1589,17 @@ static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
+     return 0;
+ }
+ 
+static int smmuv3_get_attr(IOMMUMemoryRegion *iommu,
+                           enum IOMMUMemoryRegionAttr attr,
+                           void *data)
+{
+    if (attr == IOMMU_ATTR_VFIO_NESTED) {
+        *(bool *) data = true;
+        return 0;
+    }
+    return -EINVAL;
+}
+
+ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass,
+                                                   void *data)
+ {
+@@ -1596,6 +1607,7 @@ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass,
+ 
+     imrc->translate = smmuv3_translate;
+     imrc->notify_flag_changed = smmuv3_notify_flag_changed;
+    imrc->get_attr = smmuv3_get_attr;
+ }
+ 
+ static const TypeInfo smmuv3_type_info = {
+diff --git a/include/exec/memory.h b/include/exec/memory.h
+index c3180075e1..864bcaeb01 100644
+--- a/include/exec/memory.h
+++ b/include/exec/memory.h
+@@ -321,7 +321,8 @@ typedef struct MemoryRegionClass {
+ 
+ 
+ enum IOMMUMemoryRegionAttr {
+-    IOMMU_ATTR_SPAPR_TCE_FD
+    IOMMU_ATTR_SPAPR_TCE_FD,
+    IOMMU_ATTR_VFIO_NESTED,
+ };
+ 
+ /*
+-- 
+2.27.0
+
--- a/memory-Add-new-fields-in-IOTLBEntry.patch
+++ b/memory-Add-new-fields-in-IOTLBEntry.patch
@ -0,0 +1,184 @@
+From da97cef20d4ee5a8f3942953836b35e7f7dd974f Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Tue, 4 Sep 2018 08:43:05 -0400
+Subject: [PATCH] memory: Add new fields in IOTLBEntry
+
+The current IOTLBEntry becomes too simple to interact with
+some physical IOMMUs. IOTLBs can be invalidated with different
+granularities: domain, pasid, addr. Current IOTLB entry only offers
+page selective invalidation. Let's add a granularity field
+that conveys this information.
+
+TLB entries are usually tagged with some ids such as the asid
+or pasid. When propagating an invalidation command from the
+guest to the host, we need to pass those IDs.
+
+Also we add a leaf field which indicates, in case of invalidation
+notification, whether only cache entries for the last level of
+translation are required to be invalidated.
+
+A flag field is introduced to inform whether those fields are set.
+
+To enforce all existing users do not use those new fields,
+initialize the IOMMUTLBEvents when needed.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/arm/smmu-common.c     |  2 +-
+ hw/arm/smmuv3.c          |  2 +-
+ hw/i386/intel_iommu.c    |  6 +++---
+ hw/ppc/spapr_iommu.c     |  2 +-
+ hw/virtio/virtio-iommu.c |  4 ++--
+ include/exec/memory.h    | 36 +++++++++++++++++++++++++++++++++++-
+ 6 files changed, 43 insertions(+), 9 deletions(-)
+
+diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
+index 0459850a93..3a1ecf81d6 100644
+--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
+@@ -470,7 +470,7 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid)
+ /* Unmap the whole notifier's range */
+ static void smmu_unmap_notifier_range(IOMMUNotifier *n)
+ {
+-    IOMMUTLBEvent event;
+    IOMMUTLBEvent event = {};
+ 
+     event.type = IOMMU_NOTIFIER_UNMAP;
+     event.entry.target_as = &address_space_memory;
+diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
+index 01b60bee49..94e2c658f8 100644
+--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
+@@ -802,7 +802,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
+                                uint8_t tg, uint64_t num_pages)
+ {
+     SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu);
+-    IOMMUTLBEvent event;
+    IOMMUTLBEvent event = {};
+     uint8_t granule;
+ 
+     if (!tg) {
+diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
+index f584449d8d..fae282ef5e 100644
+--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
+@@ -1193,7 +1193,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
+     uint32_t offset;
+     uint64_t slpte;
+     uint64_t subpage_size, subpage_mask;
+-    IOMMUTLBEvent event;
+    IOMMUTLBEvent event = {};
+     uint64_t iova = start;
+     uint64_t iova_next;
+     int ret = 0;
+@@ -2425,7 +2425,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
+                                           VTDInvDesc *inv_desc)
+ {
+     VTDAddressSpace *vtd_dev_as;
+-    IOMMUTLBEvent event;
+    IOMMUTLBEvent event = {};
+     struct VTDBus *vtd_bus;
+     hwaddr addr;
+     uint64_t sz;
+@@ -3481,7 +3481,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
+     size = remain = end - start + 1;
+ 
+     while (remain >= VTD_PAGE_SIZE) {
+-        IOMMUTLBEvent event;
+        IOMMUTLBEvent event = {};
+         uint64_t mask = dma_aligned_pow2_mask(start, end, s->aw_bits);
+         uint64_t size = mask + 1;
+ 
+diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
+index db01071858..454df25d44 100644
+--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
+@@ -449,7 +449,7 @@ static void spapr_tce_reset(DeviceState *dev)
+ static target_ulong put_tce_emu(SpaprTceTable *tcet, target_ulong ioba,
+                                 target_ulong tce)
+ {
+-    IOMMUTLBEvent event;
+    IOMMUTLBEvent event = {};
+     hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+     unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift;
+ 
+diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
+index 1b23e8e18c..83ed2b82e6 100644
+--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
+@@ -129,7 +129,7 @@ static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start,
+                                     hwaddr virt_end, hwaddr paddr,
+                                     uint32_t flags)
+ {
+-    IOMMUTLBEvent event;
+    IOMMUTLBEvent event = {};
+     IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ,
+                                               flags & VIRTIO_IOMMU_MAP_F_WRITE);
+ 
+@@ -154,7 +154,7 @@ static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start,
+ static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start,
+                                       hwaddr virt_end)
+ {
+-    IOMMUTLBEvent event;
+    IOMMUTLBEvent event = {};
+     uint64_t delta = virt_end - virt_start;
+ 
+     if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) {
+diff --git a/include/exec/memory.h b/include/exec/memory.h
+index 20f1b27377..c3180075e1 100644
+--- a/include/exec/memory.h
+++ b/include/exec/memory.h
+@@ -113,14 +113,48 @@ typedef enum {
+     IOMMU_RW   = 3,
+ } IOMMUAccessFlags;
+ 
+/* Granularity of the cache invalidation */
+typedef enum {
+    IOMMU_INV_GRAN_ADDR = 0,
+    IOMMU_INV_GRAN_PASID,
+    IOMMU_INV_GRAN_DOMAIN,
+} IOMMUInvGranularity;
+
+ #define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | ((w) ? IOMMU_WO : 0))
+ 
+/**
+ * struct IOMMUTLBEntry - IOMMU TLB entry
+ *
+ * Structure used when performing a translation or when notifying MAP or
+ * UNMAP (invalidation) events
+ *
+ * @target_as: target address space
+ * @iova: IO virtual address (input)
+ * @translated_addr: translated address (output)
+ * @addr_mask: address mask (0xfff means 4K binding), must be multiple of 2
+ * @perm: permission flag of the mapping (NONE encodes no mapping or
+ * invalidation notification)
+ * @granularity: granularity of the invalidation
+ * @flags: informs whether the following fields are set
+ * @arch_id: architecture specific ID tagging the TLB
+ * @pasid: PASID tagging the TLB
+ * @leaf: when @perm is NONE, indicates whether only caches for the last
+ * level of translation need to be invalidated.
+ */
+ struct IOMMUTLBEntry {
+     AddressSpace    *target_as;
+     hwaddr           iova;
+     hwaddr           translated_addr;
+-    hwaddr           addr_mask;  /* 0xfff = 4k translation */
+    hwaddr           addr_mask;
+     IOMMUAccessFlags perm;
+    IOMMUInvGranularity granularity;
+#define IOMMU_INV_FLAGS_PASID  (1 << 0)
+#define IOMMU_INV_FLAGS_ARCHID (1 << 1)
+#define IOMMU_INV_FLAGS_LEAF   (1 << 2)
+    uint32_t         flags;
+    uint32_t         arch_id;
+    uint32_t         pasid;
+    bool             leaf;
+ };
+ 
+ /*
+-- 
+2.27.0
+
--- a/memory-Introduce-IOMMU-Memory-Region-inject_faults-A.patch
+++ b/memory-Introduce-IOMMU-Memory-Region-inject_faults-A.patch
@ -0,0 +1,88 @@
+From d2dce19165f133935ff72e209f19bc43ab4d1421 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Thu, 13 Sep 2018 14:13:04 +0200
+Subject: [PATCH] memory: Introduce IOMMU Memory Region inject_faults API
+
+This new API allows to inject @count iommu_faults into
+the IOMMU memory region.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ include/exec/memory.h | 24 ++++++++++++++++++++++++
+ softmmu/memory.c      | 10 ++++++++++
+ 2 files changed, 34 insertions(+)
+
+diff --git a/include/exec/memory.h b/include/exec/memory.h
+index 76ef99ed27..3e84d62e40 100644
+--- a/include/exec/memory.h
+++ b/include/exec/memory.h
+@@ -103,6 +103,8 @@ struct MemoryRegionSection {
+     bool nonvolatile;
+ };
+ 
+struct iommu_fault;
+
+ typedef struct IOMMUTLBEntry IOMMUTLBEntry;
+ 
+ /* See address_space_translate: bit 0 is read, bit 1 is write.  */
+@@ -523,6 +525,19 @@ struct IOMMUMemoryRegionClass {
+      int (*iommu_set_page_size_mask)(IOMMUMemoryRegion *iommu,
+                                      uint64_t page_size_mask,
+                                      Error **errp);
+
+    /*
+     * Inject @count faults into the IOMMU memory region
+     *
+     * Optional method: if this method is not provided, then
+     * memory_region_injection_faults() will return -ENOENT
+     *
+     * @iommu: the IOMMU memory region to inject the faults in
+     * @count: number of faults to inject
+     * @buf: fault buffer
+     */
+    int (*inject_faults)(IOMMUMemoryRegion *iommu, int count,
+                         struct iommu_fault *buf);
+ };
+ 
+ typedef struct RamDiscardListener RamDiscardListener;
+@@ -1819,6 +1834,15 @@ int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr);
+ int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr,
+                                            uint64_t page_size_mask,
+                                            Error **errp);
+/**
+ * memory_region_inject_faults : inject @count faults stored in @buf
+ *
+ * @iommu_mr: the IOMMU memory region
+ * @count: number of faults to be injected
+ * @buf: buffer containing the faults
+ */
+int memory_region_inject_faults(IOMMUMemoryRegion *iommu_mr, int count,
+                                struct iommu_fault *buf);
+ 
+ /**
+  * memory_region_name: get a memory region's name
+diff --git a/softmmu/memory.c b/softmmu/memory.c
+index 7340e19ff5..9f98209ab2 100644
+--- a/softmmu/memory.c
+++ b/softmmu/memory.c
+@@ -2111,6 +2111,16 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
+     rdmc->unregister_listener(rdm, rdl);
+ }
+ 
+int memory_region_inject_faults(IOMMUMemoryRegion *iommu_mr, int count,
+                                struct iommu_fault *buf)
+{
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+    if (!imrc->inject_faults) {
+        return -ENOENT;
+    }
+    return imrc->inject_faults(iommu_mr, count, buf);
+}
+
+ void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
+ {
+     uint8_t mask = 1 << client;
+-- 
+2.27.0
+
--- a/monitor-Discard-BLOCK_IO_ERROR-event-when-VM-reboote.patch
+++ b/monitor-Discard-BLOCK_IO_ERROR-event-when-VM-reboote.patch
@ -0,0 +1,97 @@
+From f5af9ac3c9af4602812060759f6f95da8725314b Mon Sep 17 00:00:00 2001
+From: Yan Wang <wangyan122@huawei.com>
+Date: Thu, 10 Feb 2022 11:18:13 +0800
+Subject: [PATCH] monitor: Discard BLOCK_IO_ERROR event when VM rebooted
+
+Throttled event like QAPI_EVENT_BLOCK_IO_ERROR may be queued
+to limit event rate. Event may be delivered when VM is rebooted
+if the event was queued in the *monitor_qapi_event_state* hash table.
+Which may casue VM pause and other related problems.
+Such as seabios blocked during virtio-scsi initialization:
+    vring_add_buf(vq, sg, out_num, in_num, 0, 0);
+    vring_kick(vp, vq, 1);
+    ------------> VM paused here <-----------
+    /* Wait for reply */
+    while (!vring_more_used(vq)) usleep(5);
+
+Signed-off-by: Yan Wang <wangyan122@huawei.com>
+---
+ include/monitor/monitor.h |  2 ++
+ monitor/monitor.c         | 30 ++++++++++++++++++++++++++++++
+ softmmu/runstate.c        |  1 +
+ 3 files changed, 33 insertions(+)
+
+diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
+index 12d395d62d..847445f972 100644
+--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
+@@ -56,4 +56,6 @@ void monitor_register_hmp(const char *name, bool info,
+ void monitor_register_hmp_info_hrt(const char *name,
+                                    HumanReadableText *(*handler)(Error **errp));
+ 
+void monitor_qapi_event_discard_io_error(void);
+
+ #endif /* MONITOR_H */
+diff --git a/monitor/monitor.c b/monitor/monitor.c
+index 013c628695..fb4ae9531c 100644
+--- a/monitor/monitor.c
+++ b/monitor/monitor.c
+@@ -34,6 +34,9 @@
+ #include "qemu/option.h"
+ #include "sysemu/qtest.h"
+ #include "trace.h"
+#include "qemu/log.h"
+#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qobject.h"
+ 
+ /*
+  * To prevent flooding clients, events can be throttled. The
+@@ -767,6 +770,33 @@ int monitor_init_opts(QemuOpts *opts, Error **errp)
+     return ret;
+ }
+ 
+void monitor_qapi_event_discard_io_error(void)
+{
+    GHashTableIter event_iter;
+    MonitorQAPIEventState *evstate;
+    gpointer key, value;
+    GString *json;
+
+    qemu_mutex_lock(&monitor_lock);
+    g_hash_table_iter_init(&event_iter, monitor_qapi_event_state);
+    while (g_hash_table_iter_next(&event_iter, &key, &value)) {
+        evstate = key;
+        /* Only QAPI_EVENT_BLOCK_IO_ERROR is discarded */
+        if (evstate->event == QAPI_EVENT_BLOCK_IO_ERROR) {
+            g_hash_table_iter_remove(&event_iter);
+            json = qobject_to_json(QOBJECT(evstate->qdict));
+            qemu_log(" %s event discarded\n", json->str);
+            timer_del(evstate->timer);
+            timer_free(evstate->timer);
+            qobject_unref(evstate->data);
+            qobject_unref(evstate->qdict);
+            g_string_free(json, true);
+            g_free(evstate);
+        }
+    }
+    qemu_mutex_unlock(&monitor_lock);
+}
+
+ QemuOptsList qemu_mon_opts = {
+     .name = "mon",
+     .implied_opt_name = "chardev",
+diff --git a/softmmu/runstate.c b/softmmu/runstate.c
+index 10d9b7365a..5736d908db 100644
+--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
+@@ -448,6 +448,7 @@ void qemu_system_reset(ShutdownCause reason)
+         qapi_event_send_reset(shutdown_caused_by_guest(reason), reason);
+     }
+     cpu_synchronize_all_post_reset();
+    monitor_qapi_event_discard_io_error();
+ }
+ 
+ /*
+-- 
+2.27.0
+
--- a/monitor-limit-io-error-qmp-event-to-at-most-once-per.patch
+++ b/monitor-limit-io-error-qmp-event-to-at-most-once-per.patch
@ -0,0 +1,29 @@
+From 44f45b5c163efed5387dac40e229e0a50bf5921a Mon Sep 17 00:00:00 2001
+From: Yan Wang <wangyan122@huawei.com>
+Date: Thu, 10 Feb 2022 11:35:58 +0800
+Subject: [PATCH] monitor: limit io error qmp event to at most once per 60s
+
+The speed of BLOCK IO ERROR event maybe very high (thousands per
+second).  If we report all BLOCK IO ERRORs, the log file will be flooded
+with BLOCK IO ERROR event.  So throttle it to at most once per 60s.
+
+Signed-off-by: Yan Wang <wangyan122@huawei.com>
+---
+ monitor/monitor.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/monitor/monitor.c b/monitor/monitor.c
+index fb4ae9531c..621e79eb66 100644
+--- a/monitor/monitor.c
+++ b/monitor/monitor.c
+@@ -300,6 +300,7 @@ static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = {
+     [QAPI_EVENT_QUORUM_FAILURE]    = { 1000 * SCALE_MS },
+     [QAPI_EVENT_VSERPORT_CHANGE]   = { 1000 * SCALE_MS },
+     [QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE] = { 1000 * SCALE_MS },
+    [QAPI_EVENT_BLOCK_IO_ERROR]    = { 60L * 1000 * SCALE_MS },
+ };
+ 
+ /*
+-- 
+2.27.0
+
--- a/net-eepro100-validate-various-address-valuesi-CVE-20.patch
+++ b/net-eepro100-validate-various-address-valuesi-CVE-20.patch
@ -0,0 +1,58 @@
+From 5db012b1116d21c64da88ad206b3589ddf5f219b Mon Sep 17 00:00:00 2001
+From: zhouli57 <zhouli57@huawei.com>
+Date: Sat, 18 Dec 2021 09:39:57 +0800
+Subject: [PATCH] net: eepro100: validate various address
+ valuesi(CVE-2021-20255)
+
+fix CVE-2021-20255
+
+patch link: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg06098.html
+
+fix CVE-2021-20255, sync patch from ostms platform.
+
+Signed-off-by: zhouli57 <zhouli57@huawei.com>
+Signed-off-by: Yan Wang <wangyan122@huawei.com>
+---
+ hw/net/eepro100.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
+index 16e95ef9cc..2474cf3dc2 100644
+--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
+@@ -279,6 +279,9 @@ typedef struct {
+     /* Quasi static device properties (no need to save them). */
+     uint16_t stats_size;
+     bool has_extended_tcb_support;
+
+    /* Flag to avoid recursions. */
+    bool busy;
+ } EEPRO100State;
+ 
+ /* Word indices in EEPROM. */
+@@ -837,6 +840,14 @@ static void action_command(EEPRO100State *s)
+        Therefore we limit the number of iterations. */
+     unsigned max_loop_count = 16;
+ 
+    if (s->busy) {
+        /* Prevent recursions. */
+        logout("recursion in %s:%u\n", __FILE__, __LINE__);
+        return;
+    }
+
+    s->busy = true;
+
+     for (;;) {
+         bool bit_el;
+         bool bit_s;
+@@ -933,6 +944,7 @@ static void action_command(EEPRO100State *s)
+     }
+     TRACE(OTHER, logout("CU list empty\n"));
+     /* List is empty. Now CU is idle or suspended. */
+    s->busy = false;
+ }
+ 
+ static void eepro100_cu_command(EEPRO100State * s, uint8_t val)
+-- 
+2.27.0
+
--- a/pci-Add-return_page_response-pci-ops.patch
+++ b/pci-Add-return_page_response-pci-ops.patch
@ -0,0 +1,86 @@
+From 228345cfa59c764e725e2d3680a4bc3ecb237609 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Fri, 6 Nov 2020 14:34:35 +0100
+Subject: [PATCH] pci: Add return_page_response pci ops
+
+Add a new PCI operation that allows to return page responses
+to registered VFIO devices
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/pci/pci.c             | 16 ++++++++++++++++
+ include/hw/iommu/iommu.h |  8 ++++++++
+ include/hw/pci/pci.h     |  4 ++++
+ 3 files changed, 28 insertions(+)
+
+diff --git a/hw/pci/pci.c b/hw/pci/pci.c
+index 4a9374c025..64db325d6b 100644
+--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
+@@ -2793,6 +2793,22 @@ int pci_device_set_pasid_table(PCIBus *bus, int32_t devfn,
+     return -ENOENT;
+ }
+ 
+int pci_device_return_page_response(PCIBus *bus, int32_t devfn,
+                                    IOMMUPageResponse *resp)
+{
+    PCIDevice *dev;
+
+    if (!bus) {
+        return -EINVAL;
+    }
+
+    dev = bus->devices[devfn];
+    if (dev && dev->pasid_ops && dev->pasid_ops->return_page_response) {
+        return dev->pasid_ops->return_page_response(bus, devfn, resp);
+    }
+    return -ENOENT;
+}
+
+ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
+ {
+     Range *range = opaque;
+diff --git a/include/hw/iommu/iommu.h b/include/hw/iommu/iommu.h
+index 12092bda7b..5890f095b1 100644
+--- a/include/hw/iommu/iommu.h
+++ b/include/hw/iommu/iommu.h
+@@ -24,5 +24,13 @@ typedef struct IOMMUConfig {
+           };
+ } IOMMUConfig;
+ 
+typedef struct IOMMUPageResponse {
+    union {
+#ifdef __linux__
+        struct iommu_page_response resp;
+#endif
+          };
+} IOMMUPageResponse;
+
+ 
+ #endif /* QEMU_HW_IOMMU_IOMMU_H */
+diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
+index abffa12a99..809eb32f4a 100644
+--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
+@@ -268,6 +268,8 @@ typedef struct PCIReqIDCache PCIReqIDCache;
+ 
+ struct PCIPASIDOps {
+     int (*set_pasid_table)(PCIBus *bus, int32_t devfn, IOMMUConfig *config);
+    int (*return_page_response)(PCIBus *bus, int32_t devfn,
+                                IOMMUPageResponse *resp);
+ };
+ typedef struct PCIPASIDOps PCIPASIDOps;
+ 
+@@ -508,6 +510,8 @@ void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
+ void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops);
+ bool pci_device_is_pasid_ops_set(PCIBus *bus, int32_t devfn);
+ int pci_device_set_pasid_table(PCIBus *bus, int32_t devfn, IOMMUConfig *config);
+int pci_device_return_page_response(PCIBus *bus, int32_t devfn,
+                                    IOMMUPageResponse *resp);
+ 
+ static inline void
+ pci_set_byte(uint8_t *config, uint8_t val)
+-- 
+2.27.0
+
--- a/pci-check-bus-pointer-before-dereference.patch
+++ b/pci-check-bus-pointer-before-dereference.patch
@ -0,0 +1,51 @@
+From 92da19fb18c234bb8872b9d8f7dedcc73e5fcafb Mon Sep 17 00:00:00 2001
+From: Prasad J Pandit <pjp@fedoraproject.org>
+Date: Wed, 14 Oct 2020 15:00:20 +0800
+Subject: [PATCH] pci: check bus pointer before dereference
+
+fix CVE-2020-25742
+
+patch link: https://lists.nongnu.org/archive/html/qemu-devel/2020-09/msg05294.html
+
+While mapping IRQ level in pci_change_irq_level() routine,
+it does not check if pci_get_bus() returned a valid pointer.
+It may lead to a NULL pointer dereference issue. Add check to
+avoid it.
+
+  -> https://ruhr-uni-bochum.sciebo.de/s/NNWP2GfwzYKeKwE?path=%2Flsi_nullptr1
+     ==1183858==Hint: address points to the zero page.
+     #0 pci_change_irq_level hw/pci/pci.c:259
+     #1 pci_irq_handler hw/pci/pci.c:1445
+     #2 pci_set_irq hw/pci/pci.c:1463
+     #3 lsi_set_irq hw/scsi/lsi53c895a.c:488
+     #4 lsi_update_irq hw/scsi/lsi53c895a.c:523
+     #5 lsi_script_scsi_interrupt hw/scsi/lsi53c895a.c:554
+     #6 lsi_execute_script hw/scsi/lsi53c895a.c:1149
+     #7 lsi_reg_writeb hw/scsi/lsi53c895a.c:1984
+     #8 lsi_io_write hw/scsi/lsi53c895a.c:2146
+     ...
+
+Reported-by: Ruhr-University <bugs-syssec@rub.de>
+Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
+Signed-off-by: Yan Wang <wangyan122@huawei.com>
+---
+ hw/pci/pci.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/hw/pci/pci.c b/hw/pci/pci.c
+index e5993c1ef5..6d1c39a9de 100644
+--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
+@@ -270,6 +270,9 @@ static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change)
+     PCIBus *bus;
+     for (;;) {
+         bus = pci_get_bus(pci_dev);
+        if (!bus) {
+            return;
+        }
+         irq_num = bus->map_irq(pci_dev, irq_num);
+         if (bus->set_irq)
+             break;
+-- 
+2.27.0
+
--- a/pci-introduce-PCIPASIDOps-to-PCIDevice.patch
+++ b/pci-introduce-PCIPASIDOps-to-PCIDevice.patch
@ -0,0 +1,127 @@
+From c71485494970e7aa986be2b05bf7e2847017e264 Mon Sep 17 00:00:00 2001
+From: Liu Yi L <yi.l.liu@intel.com>
+Date: Fri, 5 Jul 2019 19:01:36 +0800
+Subject: [PATCH] pci: introduce PCIPASIDOps to PCIDevice
+
+This patch introduces PCIPASIDOps for IOMMU related operations.
+
+https://lists.gnu.org/archive/html/qemu-devel/2018-03/msg00078.html
+https://lists.gnu.org/archive/html/qemu-devel/2018-03/msg00940.html
+
+So far, to setup virt-SVA for assigned SVA capable device, needs to
+configure host translation structures for specific pasid. (e.g. bind
+guest page table to host and enable nested translation in host).
+Besides, vIOMMU emulator needs to forward guest's cache invalidation
+to host since host nested translation is enabled. e.g. on VT-d, guest
+owns 1st level translation table, thus cache invalidation for 1st
+level should be propagated to host.
+
+This patch adds two functions: alloc_pasid and free_pasid to support
+guest pasid allocation and free. The implementations of the callbacks
+would be device passthru modules. Like vfio.
+
+Cc: Kevin Tian <kevin.tian@intel.com>
+Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Eric Auger <eric.auger@redhat.com>
+Cc: Yi Sun <yi.y.sun@linux.intel.com>
+Cc: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
+Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/pci/pci.c         | 34 ++++++++++++++++++++++++++++++++++
+ include/hw/pci/pci.h | 11 +++++++++++
+ 2 files changed, 45 insertions(+)
+
+diff --git a/hw/pci/pci.c b/hw/pci/pci.c
+index e5993c1ef5..4a9374c025 100644
+--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
+@@ -2759,6 +2759,40 @@ void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque)
+     bus->iommu_opaque = opaque;
+ }
+ 
+void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops)
+{
+    assert(ops && !dev->pasid_ops);
+    dev->pasid_ops = ops;
+}
+
+bool pci_device_is_pasid_ops_set(PCIBus *bus, int32_t devfn)
+{
+    PCIDevice *dev;
+
+    if (!bus) {
+        return false;
+    }
+
+    dev = bus->devices[devfn];
+    return !!(dev && dev->pasid_ops);
+}
+
+int pci_device_set_pasid_table(PCIBus *bus, int32_t devfn,
+                               IOMMUConfig *config)
+{
+    PCIDevice *dev;
+
+    if (!bus) {
+        return -EINVAL;
+    }
+
+    dev = bus->devices[devfn];
+    if (dev && dev->pasid_ops && dev->pasid_ops->set_pasid_table) {
+        return dev->pasid_ops->set_pasid_table(bus, devfn, config);
+    }
+    return -ENOENT;
+}
+
+ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
+ {
+     Range *range = opaque;
+diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
+index e7cdf2d5ec..abffa12a99 100644
+--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
+@@ -9,6 +9,7 @@
+ 
+ #include "hw/pci/pcie.h"
+ #include "qom/object.h"
+#include "hw/iommu/iommu.h"
+ 
+ extern bool pci_available;
+ 
+@@ -265,6 +266,11 @@ struct PCIReqIDCache {
+ };
+ typedef struct PCIReqIDCache PCIReqIDCache;
+ 
+struct PCIPASIDOps {
+    int (*set_pasid_table)(PCIBus *bus, int32_t devfn, IOMMUConfig *config);
+};
+typedef struct PCIPASIDOps PCIPASIDOps;
+
+ struct PCIDevice {
+     DeviceState qdev;
+     bool partially_hotplugged;
+@@ -361,6 +367,7 @@ struct PCIDevice {
+     /* ID of standby device in net_failover pair */
+     char *failover_pair_id;
+     uint32_t acpi_index;
+    PCIPASIDOps *pasid_ops;
+ };
+ 
+ void pci_register_bar(PCIDevice *pci_dev, int region_num,
+@@ -498,6 +505,10 @@ typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
+ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
+ void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
+ 
+void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops);
+bool pci_device_is_pasid_ops_set(PCIBus *bus, int32_t devfn);
+int pci_device_set_pasid_table(PCIBus *bus, int32_t devfn, IOMMUConfig *config);
+
+ static inline void
+ pci_set_byte(uint8_t *config, uint8_t val)
+ {
+-- 
+2.27.0
+
--- a/qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch
+++ b/qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch
@ -0,0 +1,31 @@
+From ada323e932c83271184a6ddba1cfd74a29378963 Mon Sep 17 00:00:00 2001
+From: Kunkun Jiang <jiangkunkun@huawei.com>
+Date: Thu, 29 Jul 2021 15:24:48 +0800
+Subject: [PATCH] qdev/monitors: Fix reundant error_setg of qdev_add_device
+
+There is an extra log "error_setg" in qdev_add_device(). When
+hot-plug a device, if the corresponding bus doesn't exist, it
+will trigger an asseration "assert(*errp == NULL)".
+
+Fixes: 515a7970490 (log: Add some logs on VM runtime path)
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+Signed-off-by: Yan Wang <wangyan122@huawei.com>
+---
+ softmmu/qdev-monitor.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
+index dfd6429bf3..4a20f5dbd7 100644
+--- a/softmmu/qdev-monitor.c
+++ b/softmmu/qdev-monitor.c
+@@ -636,7 +636,6 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts,
+     if (path != NULL) {
+         bus = qbus_find(path, errp);
+         if (!bus) {
+-            error_setg(errp, "can not find bus for %s", driver);
+             return NULL;
+         }
+         if (!object_dynamic_cast(OBJECT(bus), dc->bus_type)) {
+-- 
+2.27.0
+
--- a/qemu.spec
+++ b/qemu.spec
@ -1,6 +1,6 @@
 Name: qemu
 Version: 6.2.0
-Release: 11
+Release: 12
 Epoch: 2
 Summary: QEMU is a generic and open source machine emulator and virtualizer
 License: GPLv2 and BSD and MIT and CC-BY-SA-4.0
@ -86,6 +86,57 @@ Patch0073: seabios-increase-the-seabios-high-mem-zone-size.patch
 Patch0074: seabios-increase-the-seabios-minibiostable.patch
 Patch0075: IPv6-add-support-for-IPv6-protocol.patch
 Patch0076: Use-post-increment-only-in-inffast.c.patch
+Patch0077: util-log-add-CONFIG_DISABLE_QEMU_LOG-macro.patch
+Patch0078: log-Add-some-logs-on-VM-runtime-path.patch
+Patch0079: qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch
+Patch0080: bios-tables-test-Allow-changes-to-q35-SSDT.dimmpxm-f.patch
+Patch0081: smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch
+Patch0082: net-eepro100-validate-various-address-valuesi-CVE-20.patch
+Patch0083: pci-check-bus-pointer-before-dereference.patch
+Patch0084: ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch
+Patch0085: tap-return-err-when-tap-TUNGETIFF-fail.patch
+Patch0086: xhci-check-reg-to-avoid-OOB-read.patch
+Patch0087: monitor-Discard-BLOCK_IO_ERROR-event-when-VM-reboote.patch
+Patch0088: monitor-limit-io-error-qmp-event-to-at-most-once-per.patch
+Patch0089: linux-headers-update-against-5.10-and-manual-clear-v.patch
+Patch0090: vfio-Maintain-DMA-mapping-range-for-the-container.patch
+Patch0091: vfio-migration-Add-support-for-manual-clear-vfio-dir.patch
+Patch0092: update-linux-headers-Import-iommu.h.patch
+Patch0093: vfio.h-and-iommu.h-header-update-against-5.10.patch
+Patch0094: memory-Add-new-fields-in-IOTLBEntry.patch
+Patch0095: hw-arm-smmuv3-Improve-stage1-ASID-invalidation.patch
+Patch0096: hw-arm-smmu-common-Allow-domain-invalidation-for-NH_.patch
+Patch0097: memory-Add-IOMMU_ATTR_VFIO_NESTED-IOMMU-memory-regio.patch
+Patch0098: memory-Add-IOMMU_ATTR_MSI_TRANSLATE-IOMMU-memory-reg.patch
+Patch0099: memory-Introduce-IOMMU-Memory-Region-inject_faults-A.patch
+Patch0100: iommu-Introduce-generic-header.patch
+Patch0101: pci-introduce-PCIPASIDOps-to-PCIDevice.patch
+Patch0102: vfio-Force-nested-if-iommu-requires-it.patch
+Patch0103: vfio-Introduce-hostwin_from_range-helper.patch
+Patch0104: vfio-Introduce-helpers-to-DMA-map-unmap-a-RAM-sectio.patch
+Patch0105: vfio-Set-up-nested-stage-mappings.patch
+Patch0106: vfio-Pass-stage-1-MSI-bindings-to-the-host.patch
+Patch0107: vfio-Helper-to-get-IRQ-info-including-capabilities.patch
+Patch0108: vfio-pci-Register-handler-for-iommu-fault.patch
+Patch0109: vfio-pci-Set-up-the-DMA-FAULT-region.patch
+Patch0110: vfio-pci-Implement-the-DMA-fault-handler.patch
+Patch0111: hw-arm-smmuv3-Advertise-MSI_TRANSLATE-attribute.patch
+Patch0112: hw-arm-smmuv3-Store-the-PASID-table-GPA-in-the-trans.patch
+Patch0113: hw-arm-smmuv3-Fill-the-IOTLBEntry-arch_id-on-NH_VA-i.patch
+Patch0114: hw-arm-smmuv3-Fill-the-IOTLBEntry-leaf-field-on-NH_V.patch
+Patch0115: hw-arm-smmuv3-Pass-stage-1-configurations-to-the-hos.patch
+Patch0116: hw-arm-smmuv3-Implement-fault-injection.patch
+Patch0117: hw-arm-smmuv3-Allow-MAP-notifiers.patch
+Patch0118: pci-Add-return_page_response-pci-ops.patch
+Patch0119: vfio-pci-Implement-return_page_response-page-respons.patch
+Patch0120: vfio-common-Avoid-unmap-ram-section-at-vfio_listener.patch
+Patch0121: vfio-Introduce-helpers-to-mark-dirty-pages-of-a-RAM-.patch
+Patch0122: vfio-Add-vfio_prereg_listener_log_sync-in-nested-sta.patch
+Patch0123: vfio-Add-vfio_prereg_listener_log_clear-to-re-enable.patch
+Patch0124: vfio-Add-vfio_prereg_listener_global_log_start-stop-.patch
+Patch0125: hw-arm-smmuv3-Post-load-stage-1-configurations-to-th.patch
+Patch0126: vfio-common-Fix-incorrect-address-alignment-in-vfio_.patch
+Patch0127: vfio-common-Add-address-alignment-check-in-vfio_list.patch

 BuildRequires: flex
 BuildRequires: gcc
@ -530,6 +581,63 @@ getent passwd qemu >/dev/null || \
 %endif

 %changelog
+* Sat Feb 12 2022 Chen Qun <kuhn.chenqun@huawei.com>
+- linux-headers: update against 5.10 and manual clear vfio dirty log series
+- vfio: Maintain DMA mapping range for the container
+- vfio/migration: Add support for manual clear vfio dirty log
+- update-linux-headers: Import iommu.h
+- vfio.h and iommu.h header update against 5.10
+- memory: Add new fields in IOTLBEntry
+- hw/arm/smmuv3: Improve stage1 ASID invalidation
+- hw/arm/smmu-common: Allow domain invalidation for NH_ALL/NSNH_ALL
+- memory: Add IOMMU_ATTR_VFIO_NESTED IOMMU memory region attribute
+- memory: Add IOMMU_ATTR_MSI_TRANSLATE IOMMU memory region attribute
+- memory: Introduce IOMMU Memory Region inject_faults API
+- iommu: Introduce generic header
+- pci: introduce PCIPASIDOps to PCIDevice
+- vfio: Force nested if iommu requires it
+- vfio: Introduce hostwin_from_range helper
+- vfio: Introduce helpers to DMA map/unmap a RAM section
+- vfio: Set up nested stage mappings
+- vfio: Pass stage 1 MSI bindings to the host
+- vfio: Helper to get IRQ info including capabilities
+- vfio/pci: Register handler for iommu fault
+- vfio/pci: Set up the DMA FAULT region
+- vfio/pci: Implement the DMA fault handler
+- hw/arm/smmuv3: Advertise MSI_TRANSLATE attribute
+- hw/arm/smmuv3: Store the PASID table GPA in the translation config
+- hw/arm/smmuv3: Fill the IOTLBEntry arch_id on NH_VA invalidation
+- hw/arm/smmuv3: Fill the IOTLBEntry leaf field on NH_VA invalidation
+- hw/arm/smmuv3: Pass stage 1 configurations to the host
+- hw/arm/smmuv3: Implement fault injection
+- hw/arm/smmuv3: Allow MAP notifiers
+- pci: Add return_page_response pci ops
+- vfio/pci: Implement return_page_response page response callback
+- vfio/common: Avoid unmap ram section at vfio_listener_region_del() in nested mode
+- vfio: Introduce helpers to mark dirty pages of a RAM section
+- vfio: Add vfio_prereg_listener_log_sync in nested stage
+- vfio: Add vfio_prereg_listener_log_clear to re-enable mark dirty pages
+- vfio: Add vfio_prereg_listener_global_log_start/stop in nested stage
+- hw/arm/smmuv3: Post-load stage 1 configurations to the host
+- vfio/common: Fix incorrect address alignment in vfio_dma_map_ram_section
+- vfio/common: Add address alignment check in vfio_listener_region_del
+
+* Sat Feb 12 2022 Chen Qun <kuhn.chenqun@huawei.com>
+- log: Add some logs on VM runtime path
+- qdev/monitors: Fix reundant error_setg of qdev_add_device
+- bios-tables-test: Allow changes to q35/SSDT.dimmpxm file
+- smbios: Add missing member of type 4 for smbios 3.0
+- net: eepro100: validate various address valuesi(CVE-2021-20255)
+- pci: check bus pointer before dereference
+- ide: ahci: add check to avoid null dereference (CVE-2019-12067)
+- tap: return err when tap TUNGETIFF fail
+- xhci: check reg to avoid OOB read
+- monitor: Discard BLOCK_IO_ERROR event when VM rebooted
+- monitor: limit io error qmp event to at most once per 60s
+
+* Sat Feb 12 2022 Chen Qun <kuhn.chenqun@huawei.com>
+- util/log: add CONFIG_DISABLE_QEMU_LOG macro
+
 * Sat Feb 12 2022 Yan Wang <wangyan122@huawei.com>
 - ipxe: IPv6 add support for IPv6 protocol
 - u-boot: Use post increment only in inffast.c
--- a/smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch
+++ b/smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch
@ -0,0 +1,56 @@
+From 937e22eda2480a64095928ee8df0d37b3313bb64 Mon Sep 17 00:00:00 2001
+From: Ying Fang <fangying1@huawei.com>
+Date: Tue, 14 Apr 2020 14:53:44 +0800
+Subject: [PATCH] smbios: Add missing member of type 4 for smbios 3.0
+
+According to smbios 3.0 spec, for processor information (type 4),
+it adds three new members (Core Count 2, Core enabled 2, thread count 2) for 3.0, Without this three members, we can not get correct cpu frequency from dmi,
+Because it will failed to check the length of Processor Infomation in DMI.
+
+The corresponding codes in kernel is like:
+    if (dm->type == DMI_ENTRY_PROCESSOR &&
+        dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) {
+        u16 val = (u16)get_unaligned((const u16 *)
+                (dmi_data + DMI_PROCESSOR_MAX_SPEED));
+        *mhz = val > *mhz ? val : *mhz;
+    }
+
+Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
+Signed-off-by: Yan Wang <wangyan122@huawei.com>
+---
+ hw/smbios/smbios.c           | 4 +++-
+ include/hw/firmware/smbios.h | 3 +++
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
+index 7397e56737..66be9aee09 100644
+--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
+@@ -688,7 +688,9 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance)
+     t->thread_count = ms->smp.threads;
+     t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */
+     t->processor_family2 = cpu_to_le16(0x01); /* Other */
+-
+    t->corecount2 = 0;
+    t->enabledcorecount2 = 0;
+    t->threadcount2 = 0;
+     SMBIOS_BUILD_TABLE_POST;
+     smbios_type4_count++;
+ }
+diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h
+index 5a0dd0c8cf..5a696cf75a 100644
+--- a/include/hw/firmware/smbios.h
+++ b/include/hw/firmware/smbios.h
+@@ -193,6 +193,9 @@ struct smbios_type_4 {
+     uint8_t thread_count;
+     uint16_t processor_characteristics;
+     uint16_t processor_family2;
+    uint16_t corecount2;
+    uint16_t enabledcorecount2;
+    uint16_t threadcount2;
+ } QEMU_PACKED;
+ 
+ /* SMBIOS type 11 - OEM strings */
+-- 
+2.27.0
+
--- a/tap-return-err-when-tap-TUNGETIFF-fail.patch
+++ b/tap-return-err-when-tap-TUNGETIFF-fail.patch
@ -0,0 +1,30 @@
+From 48a38f409a25f26605d65346c8ed9403c4b36c80 Mon Sep 17 00:00:00 2001
+From: Yan Wang <wangyan122@huawei.com>
+Date: Thu, 10 Feb 2022 10:28:59 +0800
+Subject: [PATCH] tap: return err when tap TUNGETIFF fail
+
+When hotplug ovs kernel netcard, even tap TUNGETIFF failed,
+the hotplug would go on and would lead to qemu assert.
+The failure should lead to the free_fail.
+
+Signed-off-by: miaoyubo <miaoyubo@huawei.com>
+Signed-off-by: Yan Wang <wangyan122@huawei.com>
+---
+ net/tap.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/tap.c b/net/tap.c
+index f716be3e3f..c5cbeaa7a2 100644
+--- a/net/tap.c
+++ b/net/tap.c
+@@ -900,6 +900,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
+             if (i == 0) {
+                 vnet_hdr = tap_probe_vnet_hdr(fd, errp);
+                 if (vnet_hdr < 0) {
+                    ret = -1;
+                     goto free_fail;
+                 }
+             } else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) {
+-- 
+2.27.0
+
--- a/update-linux-headers-Import-iommu.h.patch
+++ b/update-linux-headers-Import-iommu.h.patch
@ -0,0 +1,29 @@
+From 694acf3c321908d26ce508842b7bd076664ffbc6 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Thu, 9 May 2019 10:23:42 -0400
+Subject: [PATCH] update-linux-headers: Import iommu.h
+
+Update the script to import the new iommu.h uapi header.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ scripts/update-linux-headers.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
+index fea4d6eb65..acde610733 100755
+--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
+@@ -144,7 +144,7 @@ done
+ 
+ rm -rf "$output/linux-headers/linux"
+ mkdir -p "$output/linux-headers/linux"
+-for header in kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \
+for header in kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h iommu.h \
+               psci.h psp-sev.h userfaultfd.h mman.h; do
+     cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux"
+ done
+-- 
+2.27.0
+
--- a/util-log-add-CONFIG_DISABLE_QEMU_LOG-macro.patch
+++ b/util-log-add-CONFIG_DISABLE_QEMU_LOG-macro.patch
@ -0,0 +1,41 @@
+From 05462305ec8b9ce5b414ede1e7e680b16d1a08ad Mon Sep 17 00:00:00 2001
+From: Yan Wang <wangyan122@huawei.com>
+Date: Fri, 11 Feb 2022 18:20:59 +0800
+Subject: [PATCH] util/log: add CONFIG_DISABLE_QEMU_LOG macro
+
+Using CONFIG_DISABLE_QEMU_LOG macro to control
+qemu_log function.
+
+Signed-off-by: Yan Wang <wangyan122@huawei.com>
+---
+ util/log.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/util/log.c b/util/log.c
+index 2ee1500bee..ed3029fe5c 100644
+--- a/util/log.c
+++ b/util/log.c
+@@ -34,6 +34,12 @@ int qemu_loglevel;
+ static int log_append = 0;
+ static GArray *debug_regions;
+ 
+#ifdef CONFIG_DISABLE_QEMU_LOG
+int qemu_log(const char *fmt, ...)
+{
+    return 0;
+}
+#else
+ /* Return the number of characters emitted.  */
+ int qemu_log(const char *fmt, ...)
+ {
+@@ -56,6 +62,7 @@ int qemu_log(const char *fmt, ...)
+     rcu_read_unlock();
+     return ret;
+ }
+#endif
+ 
+ static void __attribute__((__constructor__)) qemu_logfile_init(void)
+ {
+-- 
+2.27.0
+
--- a/vfio-Add-vfio_prereg_listener_global_log_start-stop-.patch
+++ b/vfio-Add-vfio_prereg_listener_global_log_start-stop-.patch
@ -0,0 +1,71 @@
+From 287c63ab540533f1f9642e753c091caa7e6e2511 Mon Sep 17 00:00:00 2001
+From: Kunkun Jiang <jiangkunkun@huawei.com>
+Date: Tue, 11 May 2021 10:08:15 +0800
+Subject: [PATCH] vfio: Add vfio_prereg_listener_global_log_start/stop in
+ nested stage
+
+In nested mode, we set up the stage 2 and stage 1 separately. In my
+opinion, vfio_memory_prereg_listener is used for stage 2 and
+vfio_memory_listener is used for stage 1. So it feels weird to call
+the global_log_start/stop interface in vfio_memory_listener to switch
+dirty tracking, although this won't cause any errors. Add
+global_log_start/stop interface in vfio_memory_prereg_listener
+can separate stage 2 from stage 1.
+
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/common.c | 24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index 20c820aa74..65f3979492 100644
+--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
+@@ -1501,6 +1501,17 @@ static void vfio_listener_log_global_start(MemoryListener *listener)
+ {
+     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+ 
+    /* For nested mode, vfio_prereg_listener is used to start dirty tracking */
+    if (container->iommu_type != VFIO_TYPE1_NESTING_IOMMU) {
+        vfio_set_dirty_page_tracking(container, true);
+    }
+}
+
+static void vfio_prereg_listener_log_global_start(MemoryListener *listener)
+{
+    VFIOContainer *container =
+        container_of(listener, VFIOContainer, prereg_listener);
+
+     vfio_set_dirty_page_tracking(container, true);
+ }
+ 
+@@ -1508,6 +1519,17 @@ static void vfio_listener_log_global_stop(MemoryListener *listener)
+ {
+     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+ 
+    /* For nested mode, vfio_prereg_listener is used to stop dirty tracking */
+    if (container->iommu_type != VFIO_TYPE1_NESTING_IOMMU) {
+        vfio_set_dirty_page_tracking(container, false);
+    }
+}
+
+static void vfio_prereg_listener_log_global_stop(MemoryListener *listener)
+{
+    VFIOContainer *container =
+        container_of(listener, VFIOContainer, prereg_listener);
+
+     vfio_set_dirty_page_tracking(container, false);
+ }
+ 
+@@ -1922,6 +1944,8 @@ static const MemoryListener vfio_memory_listener = {
+ static MemoryListener vfio_memory_prereg_listener = {
+     .region_add = vfio_prereg_listener_region_add,
+     .region_del = vfio_prereg_listener_region_del,
+    .log_global_start = vfio_prereg_listener_log_global_start,
+    .log_global_stop = vfio_prereg_listener_log_global_stop,
+     .log_sync = vfio_prereg_listener_log_sync,
+     .log_clear = vfio_prereg_listener_log_clear,
+ };
+-- 
+2.27.0
+
--- a/vfio-Add-vfio_prereg_listener_log_clear-to-re-enable.patch
+++ b/vfio-Add-vfio_prereg_listener_log_clear-to-re-enable.patch
@ -0,0 +1,84 @@
+From 7086df6d90cd698a3e20cf4cf6e9a834f168cd8f Mon Sep 17 00:00:00 2001
+From: Kunkun Jiang <jiangkunkun@huawei.com>
+Date: Sat, 31 Jul 2021 09:40:24 +0800
+Subject: [PATCH] vfio: Add vfio_prereg_listener_log_clear to re-enable mark
+ dirty pages
+
+When tracking dirty pages, we just need to pay attention to stage 2
+mappings. Legacy vfio_listener_log_clear cannot be used in nested
+stage. This patch adds vfio_prereg_listener_log_clear to re-enable
+dirty pages in nested mode.
+
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/common.c | 40 +++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 39 insertions(+), 1 deletion(-)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index 2506cd57ee..20c820aa74 100644
+--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
+@@ -1857,6 +1857,43 @@ static int vfio_physical_log_clear(VFIOContainer *container,
+     return ret;
+ }
+ 
+static void vfio_prereg_listener_log_clear(MemoryListener *listener,
+                                           MemoryRegionSection *section)
+{
+    VFIOContainer *container =
+        container_of(listener, VFIOContainer, prereg_listener);
+
+    if (!memory_region_is_ram(section->mr)) {
+        return;
+    }
+
+    vfio_physical_log_clear(container, section);
+}
+
+static int vfio_clear_dirty_bitmap(VFIOContainer *container,
+                                   MemoryRegionSection *section)
+{
+    if (memory_region_is_iommu(section->mr)) {
+        /*
+         * In nested mode, stage 2 (gpa->hpa) and stage 1 (giova->gpa) are
+         * set up separately. It is inappropriate to pass 'giova' to kernel
+         * to get dirty pages. We only need to focus on stage 2 mapping when
+         * marking dirty pages.
+         */
+        if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
+            return 0;
+        }
+
+        /*
+         * TODO: x86. With the log_clear() interface added, x86 may inplement
+         * its own method.
+         */
+    }
+
+    /* Here we assume that memory_region_is_ram(section->mr) == true */
+    return vfio_physical_log_clear(container, section);
+}
+
+ static void vfio_listener_log_clear(MemoryListener *listener,
+                                     MemoryRegionSection *section)
+ {
+@@ -1868,7 +1905,7 @@ static void vfio_listener_log_clear(MemoryListener *listener,
+     }
+ 
+     if (vfio_devices_all_dirty_tracking(container)) {
+-        vfio_physical_log_clear(container, section);
+        vfio_clear_dirty_bitmap(container, section);
+     }
+ }
+ 
+@@ -1886,6 +1923,7 @@ static MemoryListener vfio_memory_prereg_listener = {
+     .region_add = vfio_prereg_listener_region_add,
+     .region_del = vfio_prereg_listener_region_del,
+     .log_sync = vfio_prereg_listener_log_sync,
+    .log_clear = vfio_prereg_listener_log_clear,
+ };
+ 
+ static void vfio_listener_release(VFIOContainer *container)
+-- 
+2.27.0
+
--- a/vfio-Add-vfio_prereg_listener_log_sync-in-nested-sta.patch
+++ b/vfio-Add-vfio_prereg_listener_log_sync-in-nested-sta.patch
@ -0,0 +1,74 @@
+From f4523389bf57593484308124e06d67855bb79315 Mon Sep 17 00:00:00 2001
+From: Kunkun Jiang <jiangkunkun@huawei.com>
+Date: Tue, 11 May 2021 10:08:14 +0800
+Subject: [PATCH] vfio: Add vfio_prereg_listener_log_sync in nested stage
+
+In nested mode, we set up the stage 2 (gpa->hpa)and stage 1
+(giova->gpa) separately by vfio_prereg_listener_region_add()
+and vfio_listener_region_add(). So when marking dirty pages
+we just need to pay attention to stage 2 mappings.
+
+Legacy vfio_listener_log_sync cannot be used in nested stage.
+This patch adds vfio_prereg_listener_log_sync to mark dirty
+pages in nested mode.
+
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/common.c | 27 +++++++++++++++++++++++++++
+ 1 file changed, 27 insertions(+)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index 6136b1ef61..2506cd57ee 100644
+--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
+@@ -1579,6 +1579,22 @@ static int vfio_dma_sync_ram_section_dirty_bitmap(VFIOContainer *container,
+                     int128_get64(section->size), ram_addr);
+ }
+ 
+static void vfio_prereg_listener_log_sync(MemoryListener *listener,
+                                          MemoryRegionSection *section)
+{
+    VFIOContainer *container =
+        container_of(listener, VFIOContainer, prereg_listener);
+
+    if (!memory_region_is_ram(section->mr) ||
+        !container->dirty_pages_supported) {
+        return;
+    }
+
+    if (vfio_devices_all_dirty_tracking(container)) {
+        vfio_dma_sync_ram_section_dirty_bitmap(container, section);
+    }
+}
+
+ typedef struct {
+     IOMMUNotifier n;
+     VFIOGuestIOMMU *giommu;
+@@ -1666,6 +1682,16 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
+     if (memory_region_is_iommu(section->mr)) {
+         VFIOGuestIOMMU *giommu;
+ 
+        /*
+         * In nested mode, stage 2 (gpa->hpa) and stage 1 (giova->gpa) are
+         * set up separately. It is inappropriate to pass 'giova' to kernel
+         * to get dirty pages. We only need to focus on stage 2 mapping when
+         * marking dirty pages.
+         */
+        if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
+            return 0;
+        }
+
+         QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
+             if (MEMORY_REGION(giommu->iommu) == section->mr &&
+                 giommu->n.start == section->offset_within_region) {
+@@ -1859,6 +1885,7 @@ static const MemoryListener vfio_memory_listener = {
+ static MemoryListener vfio_memory_prereg_listener = {
+     .region_add = vfio_prereg_listener_region_add,
+     .region_del = vfio_prereg_listener_region_del,
+    .log_sync = vfio_prereg_listener_log_sync,
+ };
+ 
+ static void vfio_listener_release(VFIOContainer *container)
+-- 
+2.27.0
+
--- a/vfio-Force-nested-if-iommu-requires-it.patch
+++ b/vfio-Force-nested-if-iommu-requires-it.patch
@ -0,0 +1,101 @@
+From e7eef5af743a53f0415267ebe9bba2e5f0e05816 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Tue, 28 Aug 2018 16:16:20 +0200
+Subject: [PATCH] vfio: Force nested if iommu requires it
+
+In case we detect the address space is translated by
+a virtual IOMMU which requires HW nested paging to
+integrate with VFIO, let's set up the container with
+the VFIO_TYPE1_NESTING_IOMMU iommu_type.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/common.c | 36 ++++++++++++++++++++++++++++--------
+ 1 file changed, 28 insertions(+), 8 deletions(-)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index 6cb91e7ffd..d7533637c9 100644
+--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
+@@ -2045,27 +2045,38 @@ static void vfio_put_address_space(VFIOAddressSpace *space)
+  * vfio_get_iommu_type - selects the richest iommu_type (v2 first)
+  */
+ static int vfio_get_iommu_type(VFIOContainer *container,
+                               bool want_nested,
+                                Error **errp)
+ {
+-    int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
+    int iommu_types[] = { VFIO_TYPE1_NESTING_IOMMU,
+                          VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
+                           VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
+-    int i;
+    int i, ret = -EINVAL;
+ 
+     for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
+         if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
+-            return iommu_types[i];
+            if (iommu_types[i] == VFIO_TYPE1_NESTING_IOMMU && !want_nested) {
+                continue;
+            }
+            ret = iommu_types[i];
+            break;
+         }
+     }
+-    error_setg(errp, "No available IOMMU models");
+-    return -EINVAL;
+    if (ret < 0) {
+        error_setg(errp, "No available IOMMU models");
+    } else if (want_nested && ret != VFIO_TYPE1_NESTING_IOMMU) {
+        error_setg(errp, "Nested mode requested but not supported");
+        ret = -EINVAL;
+    }
+    return ret;
+ }
+ 
+ static int vfio_init_container(VFIOContainer *container, int group_fd,
+-                               Error **errp)
+                               bool want_nested, Error **errp)
+ {
+     int iommu_type, dirty_log_manual_clear, ret;
+ 
+-    iommu_type = vfio_get_iommu_type(container, errp);
+    iommu_type = vfio_get_iommu_type(container, want_nested, errp);
+     if (iommu_type < 0) {
+         return iommu_type;
+     }
+@@ -2177,6 +2188,14 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
+     VFIOContainer *container;
+     int ret, fd;
+     VFIOAddressSpace *space;
+    IOMMUMemoryRegion *iommu_mr;
+    bool nested = false;
+
+    if (memory_region_is_iommu(as->root)) {
+        iommu_mr = IOMMU_MEMORY_REGION(as->root);
+        memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_VFIO_NESTED,
+                                     (void *)&nested);
+    }
+ 
+     space = vfio_get_address_space(as);
+ 
+@@ -2257,7 +2276,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
+     QLIST_INIT(&container->vrdl_list);
+     QLIST_INIT(&container->dma_list);
+ 
+-    ret = vfio_init_container(container, group->fd, errp);
+    ret = vfio_init_container(container, group->fd, nested, errp);
+     if (ret) {
+         goto free_container_exit;
+     }
+@@ -2269,6 +2288,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
+     }
+ 
+     switch (container->iommu_type) {
+    case VFIO_TYPE1_NESTING_IOMMU:
+     case VFIO_TYPE1v2_IOMMU:
+     case VFIO_TYPE1_IOMMU:
+     {
+-- 
+2.27.0
+
--- a/vfio-Helper-to-get-IRQ-info-including-capabilities.patch
+++ b/vfio-Helper-to-get-IRQ-info-including-capabilities.patch
@ -0,0 +1,178 @@
+From a4336765c99a876743c0ead89997ad6f97d7b442 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Thu, 20 Jun 2019 16:39:57 +0200
+Subject: [PATCH] vfio: Helper to get IRQ info including capabilities
+
+As done for vfio regions, add helpers to retrieve irq info
+including their optional capabilities.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/common.c              | 97 +++++++++++++++++++++++++++++++++++
+ hw/vfio/trace-events          |  1 +
+ include/hw/vfio/vfio-common.h |  7 +++
+ 3 files changed, 105 insertions(+)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index 1f78af121d..d05a485808 100644
+--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
+@@ -1919,6 +1919,25 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
+     return true;
+ }
+ 
+struct vfio_info_cap_header *
+vfio_get_irq_info_cap(struct vfio_irq_info *info, uint16_t id)
+{
+    struct vfio_info_cap_header *hdr;
+    void *ptr = info;
+
+    if (!(info->flags & VFIO_IRQ_INFO_FLAG_CAPS)) {
+        return NULL;
+    }
+
+    for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
+        if (hdr->id == id) {
+            return hdr;
+        }
+    }
+
+    return NULL;
+}
+
+ static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
+                                           struct vfio_region_info *info)
+ {
+@@ -2887,6 +2906,33 @@ retry:
+     return 0;
+ }
+ 
+int vfio_get_irq_info(VFIODevice *vbasedev, int index,
+                      struct vfio_irq_info **info)
+{
+    size_t argsz = sizeof(struct vfio_irq_info);
+
+    *info = g_malloc0(argsz);
+
+    (*info)->index = index;
+retry:
+    (*info)->argsz = argsz;
+
+    if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, *info)) {
+        g_free(*info);
+        *info = NULL;
+        return -errno;
+    }
+
+    if ((*info)->argsz > argsz) {
+        argsz = (*info)->argsz;
+        *info = g_realloc(*info, argsz);
+
+        goto retry;
+    }
+
+    return 0;
+}
+
+ int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
+                              uint32_t subtype, struct vfio_region_info **info)
+ {
+@@ -2922,6 +2968,42 @@ int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
+     return -ENODEV;
+ }
+ 
+int vfio_get_dev_irq_info(VFIODevice *vbasedev, uint32_t type,
+                          uint32_t subtype, struct vfio_irq_info **info)
+{
+    int i;
+
+    for (i = 0; i < vbasedev->num_irqs; i++) {
+        struct vfio_info_cap_header *hdr;
+        struct vfio_irq_info_cap_type *cap_type;
+
+        if (vfio_get_irq_info(vbasedev, i, info)) {
+            continue;
+        }
+
+        hdr = vfio_get_irq_info_cap(*info, VFIO_IRQ_INFO_CAP_TYPE);
+        if (!hdr) {
+            g_free(*info);
+            continue;
+        }
+
+        cap_type = container_of(hdr, struct vfio_irq_info_cap_type, header);
+
+        trace_vfio_get_dev_irq(vbasedev->name, i,
+                               cap_type->type, cap_type->subtype);
+
+        if (cap_type->type == type && cap_type->subtype == subtype) {
+            return 0;
+        }
+
+        g_free(*info);
+    }
+
+    *info = NULL;
+    return -ENODEV;
+}
+
+
+ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
+ {
+     struct vfio_region_info *info = NULL;
+@@ -2937,6 +3019,21 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
+     return ret;
+ }
+ 
+bool vfio_has_irq_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
+{
+    struct vfio_region_info *info = NULL;
+    bool ret = false;
+
+    if (!vfio_get_region_info(vbasedev, region, &info)) {
+        if (vfio_get_region_info_cap(info, cap_type)) {
+            ret = true;
+        }
+        g_free(info);
+    }
+
+    return ret;
+}
+
+ /*
+  * Interfaces for IBM EEH (Enhanced Error Handling)
+  */
+diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
+index 35bd415d6d..f5fe201ab5 100644
+--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
+@@ -117,6 +117,7 @@ vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Re
+ vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries"
+ vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
+ vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8"
+vfio_get_dev_irq(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8"
+ vfio_dma_unmap_overflow_workaround(void) ""
+ vfio_iommu_addr_inv_iotlb(int asid, uint64_t addr, uint64_t size, uint64_t nb_granules, bool leaf) "nested IOTLB invalidate asid=%d, addr=0x%"PRIx64" granule_size=0x%"PRIx64" nb_granules=0x%"PRIx64" leaf=%d"
+ vfio_iommu_asid_inv_iotlb(int asid) "nested IOTLB invalidate asid=%d"
+diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
+index a838a939e4..7fdca26fa0 100644
+--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
+@@ -254,6 +254,13 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
+                              unsigned int *avail);
+ struct vfio_info_cap_header *
+ vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
+int vfio_get_irq_info(VFIODevice *vbasedev, int index,
+                      struct vfio_irq_info **info);
+int vfio_get_dev_irq_info(VFIODevice *vbasedev, uint32_t type,
+                          uint32_t subtype, struct vfio_irq_info **info);
+bool vfio_has_irq_cap(VFIODevice *vbasedev, int irq, uint16_t cap_type);
+struct vfio_info_cap_header *
+vfio_get_irq_info_cap(struct vfio_irq_info *info, uint16_t id);
+ #endif
+ extern const MemoryListener vfio_prereg_listener;
+ 
+-- 
+2.27.0
+
--- a/vfio-Introduce-helpers-to-DMA-map-unmap-a-RAM-sectio.patch
+++ b/vfio-Introduce-helpers-to-DMA-map-unmap-a-RAM-sectio.patch
@ -0,0 +1,280 @@
+From dab969657d8ff8b175856f91b035b74849cf69ba Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Thu, 30 Aug 2018 15:04:25 +0200
+Subject: [PATCH] vfio: Introduce helpers to DMA map/unmap a RAM section
+
+Let's introduce two helpers that allow to DMA map/unmap a RAM
+section. Those helpers will be called for nested stage setup in
+another call site. Also the vfio_listener_region_add/del()
+structure may be clearer.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/common.c     | 206 +++++++++++++++++++++++++------------------
+ hw/vfio/trace-events |   4 +-
+ 2 files changed, 123 insertions(+), 87 deletions(-)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index d358789f19..b3dc090840 100644
+--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
+@@ -922,13 +922,130 @@ hostwin_from_range(VFIOContainer *container, hwaddr iova, hwaddr end)
+     return NULL;
+ }
+ 
+static int vfio_dma_map_ram_section(VFIOContainer *container,
+                                    MemoryRegionSection *section, Error **err)
+{
+    VFIOHostDMAWindow *hostwin;
+    Int128 llend, llsize;
+    hwaddr iova, end;
+    void *vaddr;
+    int ret;
+
+    assert(memory_region_is_ram(section->mr));
+
+    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    llend = int128_make64(section->offset_within_address_space);
+    llend = int128_add(llend, section->size);
+    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
+    end = int128_get64(int128_sub(llend, int128_one()));
+
+    vaddr = memory_region_get_ram_ptr(section->mr) +
+            section->offset_within_region +
+            (iova - section->offset_within_address_space);
+
+    hostwin = hostwin_from_range(container, iova, end);
+    if (!hostwin) {
+        error_setg(err, "Container %p can't map guest IOVA region"
+                   " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end);
+        return -EFAULT;
+    }
+
+    trace_vfio_dma_map_ram(iova, end, vaddr);
+
+    llsize = int128_sub(llend, int128_make64(iova));
+
+    if (memory_region_is_ram_device(section->mr)) {
+        hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
+
+        if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
+            trace_vfio_listener_region_add_no_dma_map(
+                memory_region_name(section->mr),
+                section->offset_within_address_space,
+                int128_getlo(section->size),
+                pgmask + 1);
+            return 0;
+        }
+    }
+
+    ret = vfio_dma_map(container, iova, int128_get64(llsize),
+                       vaddr, section->readonly);
+    if (ret) {
+        error_setg(err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
+                   "0x%"HWADDR_PRIx", %p) = %d (%m)",
+                   container, iova, int128_get64(llsize), vaddr, ret);
+        if (memory_region_is_ram_device(section->mr)) {
+            /* Allow unexpected mappings not to be fatal for RAM devices */
+            error_report_err(*err);
+            return 0;
+        }
+        return ret;
+    }
+    return 0;
+}
+
+static void vfio_dma_unmap_ram_section(VFIOContainer *container,
+                                       MemoryRegionSection *section)
+{
+    Int128 llend, llsize;
+    hwaddr iova, end;
+    bool try_unmap = true;
+    int ret;
+
+    iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
+    llend = int128_make64(section->offset_within_address_space);
+    llend = int128_add(llend, section->size);
+    llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
+
+    if (int128_ge(int128_make64(iova), llend)) {
+        return;
+    }
+    end = int128_get64(int128_sub(llend, int128_one()));
+
+    llsize = int128_sub(llend, int128_make64(iova));
+
+    trace_vfio_dma_unmap_ram(iova, end);
+
+    if (memory_region_is_ram_device(section->mr)) {
+        hwaddr pgmask;
+        VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end);
+
+        assert(hostwin); /* or region_add() would have failed */
+
+        pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
+        try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
+    } else if (memory_region_has_ram_discard_manager(section->mr)) {
+        vfio_unregister_ram_discard_listener(container, section);
+        /* Unregistering will trigger an unmap. */
+        try_unmap = false;
+    }
+
+    if (try_unmap) {
+        if (int128_eq(llsize, int128_2_64())) {
+            /* The unmap ioctl doesn't accept a full 64-bit span. */
+            llsize = int128_rshift(llsize, 1);
+            ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
+            if (ret) {
+                error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
+                             "0x%"HWADDR_PRIx") = %d (%m)",
+                             container, iova, int128_get64(llsize), ret);
+            }
+            iova += int128_get64(llsize);
+        }
+        ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
+        if (ret) {
+            error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
+                         "0x%"HWADDR_PRIx") = %d (%m)",
+                         container, iova, int128_get64(llsize), ret);
+        }
+    }
+}
+
+ static void vfio_listener_region_add(MemoryListener *listener,
+                                      MemoryRegionSection *section)
+ {
+     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+     hwaddr iova, end;
+-    Int128 llend, llsize;
+-    void *vaddr;
+    Int128 llend;
+     int ret;
+     VFIOHostDMAWindow *hostwin;
+     Error *err = NULL;
+@@ -1092,38 +1209,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
+         return;
+     }
+ 
+-    vaddr = memory_region_get_ram_ptr(section->mr) +
+-            section->offset_within_region +
+-            (iova - section->offset_within_address_space);
+-
+-    trace_vfio_listener_region_add_ram(iova, end, vaddr);
+-
+-    llsize = int128_sub(llend, int128_make64(iova));
+-
+-    if (memory_region_is_ram_device(section->mr)) {
+-        hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
+-
+-        if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
+-            trace_vfio_listener_region_add_no_dma_map(
+-                memory_region_name(section->mr),
+-                section->offset_within_address_space,
+-                int128_getlo(section->size),
+-                pgmask + 1);
+-            return;
+-        }
+-    }
+-
+-    ret = vfio_dma_map(container, iova, int128_get64(llsize),
+-                       vaddr, section->readonly);
+-    if (ret) {
+-        error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
+-                   "0x%"HWADDR_PRIx", %p) = %d (%m)",
+-                   container, iova, int128_get64(llsize), vaddr, ret);
+-        if (memory_region_is_ram_device(section->mr)) {
+-            /* Allow unexpected mappings not to be fatal for RAM devices */
+-            error_report_err(err);
+-            return;
+-        }
+    if (vfio_dma_map_ram_section(container, section, &err)) {
+         goto fail;
+     }
+ 
+@@ -1157,10 +1243,6 @@ static void vfio_listener_region_del(MemoryListener *listener,
+                                      MemoryRegionSection *section)
+ {
+     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+-    hwaddr iova, end;
+-    Int128 llend, llsize;
+-    int ret;
+-    bool try_unmap = true;
+ 
+     if (vfio_listener_skipped_section(section)) {
+         trace_vfio_listener_region_del_skip(
+@@ -1200,53 +1282,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
+          */
+     }
+ 
+-    iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
+-    llend = int128_make64(section->offset_within_address_space);
+-    llend = int128_add(llend, section->size);
+-    llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
+-
+-    if (int128_ge(int128_make64(iova), llend)) {
+-        return;
+-    }
+-    end = int128_get64(int128_sub(llend, int128_one()));
+-
+-    llsize = int128_sub(llend, int128_make64(iova));
+-
+-    trace_vfio_listener_region_del(iova, end);
+-
+-    if (memory_region_is_ram_device(section->mr)) {
+-        hwaddr pgmask;
+-        VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end);
+-
+-        assert(hostwin); /* or region_add() would have failed */
+-
+-        pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
+-        try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
+-    } else if (memory_region_has_ram_discard_manager(section->mr)) {
+-        vfio_unregister_ram_discard_listener(container, section);
+-        /* Unregistering will trigger an unmap. */
+-        try_unmap = false;
+-    }
+-
+-    if (try_unmap) {
+-        if (int128_eq(llsize, int128_2_64())) {
+-            /* The unmap ioctl doesn't accept a full 64-bit span. */
+-            llsize = int128_rshift(llsize, 1);
+-            ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
+-            if (ret) {
+-                error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
+-                             "0x%"HWADDR_PRIx") = %d (%m)",
+-                             container, iova, int128_get64(llsize), ret);
+-            }
+-            iova += int128_get64(llsize);
+-        }
+-        ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
+-        if (ret) {
+-            error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
+-                         "0x%"HWADDR_PRIx") = %d (%m)",
+-                         container, iova, int128_get64(llsize), ret);
+-        }
+-    }
+    vfio_dma_unmap_ram_section(container, section);
+ 
+     memory_region_unref(section->mr);
+ 
+diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
+index 0ef1b5f4a6..a37563a315 100644
+--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
+@@ -99,10 +99,10 @@ vfio_iommu_map_notify(const char *op, uint64_t iova_start, uint64_t iova_end) "i
+ vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add 0x%"PRIx64" - 0x%"PRIx64
+ vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d"
+ vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64
+-vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
+vfio_dma_map_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
+ vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA"
+ vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64
+-vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
+vfio_dma_unmap_ram(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
+ vfio_disconnect_container(int fd) "close container->fd=%d"
+ vfio_put_group(int fd) "close group->fd=%d"
+ vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u"
+-- 
+2.27.0
+
--- a/vfio-Introduce-helpers-to-mark-dirty-pages-of-a-RAM-.patch
+++ b/vfio-Introduce-helpers-to-mark-dirty-pages-of-a-RAM-.patch
@ -0,0 +1,64 @@
+From 1675d767aa9bd496178b4d74e01a40dbbd97eccb Mon Sep 17 00:00:00 2001
+From: Kunkun Jiang <jiangkunkun@huawei.com>
+Date: Tue, 11 May 2021 10:08:13 +0800
+Subject: [PATCH] vfio: Introduce helpers to mark dirty pages of a RAM section
+
+Extract part of the code from vfio_sync_dirty_bitmap to form a
+new helper, which allows to mark dirty pages of a RAM section.
+This helper will be called for nested stage.
+
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/common.c | 22 ++++++++++++++--------
+ 1 file changed, 14 insertions(+), 8 deletions(-)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index bdfcc854fe..6136b1ef61 100644
+--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
+@@ -1566,6 +1566,19 @@ err_out:
+     return ret;
+ }
+ 
+static int vfio_dma_sync_ram_section_dirty_bitmap(VFIOContainer *container,
+                                                  MemoryRegionSection *section)
+{
+    ram_addr_t ram_addr;
+
+    ram_addr = memory_region_get_ram_addr(section->mr) +
+               section->offset_within_region;
+
+    return vfio_get_dirty_bitmap(container,
+                    REAL_HOST_PAGE_ALIGN(section->offset_within_address_space),
+                    int128_get64(section->size), ram_addr);
+}
+
+ typedef struct {
+     IOMMUNotifier n;
+     VFIOGuestIOMMU *giommu;
+@@ -1650,8 +1663,6 @@ static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container,
+ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
+                                   MemoryRegionSection *section)
+ {
+-    ram_addr_t ram_addr;
+-
+     if (memory_region_is_iommu(section->mr)) {
+         VFIOGuestIOMMU *giommu;
+ 
+@@ -1682,12 +1693,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
+         return vfio_sync_ram_discard_listener_dirty_bitmap(container, section);
+     }
+ 
+-    ram_addr = memory_region_get_ram_addr(section->mr) +
+-               section->offset_within_region;
+-
+-    return vfio_get_dirty_bitmap(container,
+-                   REAL_HOST_PAGE_ALIGN(section->offset_within_address_space),
+-                   int128_get64(section->size), ram_addr);
+    return vfio_dma_sync_ram_section_dirty_bitmap(container, section);
+ }
+ 
+ static void vfio_listener_log_sync(MemoryListener *listener,
+-- 
+2.27.0
+
--- a/vfio-Introduce-hostwin_from_range-helper.patch
+++ b/vfio-Introduce-hostwin_from_range-helper.patch
@ -0,0 +1,89 @@
+From 85232739b4852f1a51dde58c9007ed0deb17c2f2 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Fri, 22 Mar 2019 18:05:23 +0100
+Subject: [PATCH] vfio: Introduce hostwin_from_range helper
+
+Let's introduce a hostwin_from_range() helper that returns the
+hostwin encapsulating an IOVA range or NULL if none is found.
+
+This improves the readibility of callers and removes the usage
+of hostwin_found.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/common.c | 36 +++++++++++++++++-------------------
+ 1 file changed, 17 insertions(+), 19 deletions(-)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index d7533637c9..d358789f19 100644
+--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
+@@ -909,6 +909,19 @@ static void vfio_unregister_ram_discard_listener(VFIOContainer *container,
+     g_free(vrdl);
+ }
+ 
+static VFIOHostDMAWindow *
+hostwin_from_range(VFIOContainer *container, hwaddr iova, hwaddr end)
+{
+    VFIOHostDMAWindow *hostwin;
+
+    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+        if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
+            return hostwin;
+        }
+    }
+    return NULL;
+}
+
+ static void vfio_listener_region_add(MemoryListener *listener,
+                                      MemoryRegionSection *section)
+ {
+@@ -918,7 +931,6 @@ static void vfio_listener_region_add(MemoryListener *listener,
+     void *vaddr;
+     int ret;
+     VFIOHostDMAWindow *hostwin;
+-    bool hostwin_found;
+     Error *err = NULL;
+ 
+     if (vfio_listener_skipped_section(section)) {
+@@ -1011,15 +1023,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
+ #endif
+     }
+ 
+-    hostwin_found = false;
+-    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+-        if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
+-            hostwin_found = true;
+-            break;
+-        }
+-    }
+-
+-    if (!hostwin_found) {
+    hostwin = hostwin_from_range(container, iova, end);
+    if (!hostwin) {
+         error_setg(&err, "Container %p can't map guest IOVA region"
+                    " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end);
+         goto fail;
+@@ -1211,16 +1216,9 @@ static void vfio_listener_region_del(MemoryListener *listener,
+ 
+     if (memory_region_is_ram_device(section->mr)) {
+         hwaddr pgmask;
+-        VFIOHostDMAWindow *hostwin;
+-        bool hostwin_found = false;
+        VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end);
+ 
+-        QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+-            if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
+-                hostwin_found = true;
+-                break;
+-            }
+-        }
+-        assert(hostwin_found); /* or region_add() would have failed */
+        assert(hostwin); /* or region_add() would have failed */
+ 
+         pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
+         try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
+-- 
+2.27.0
+
--- a/vfio-Maintain-DMA-mapping-range-for-the-container.patch
+++ b/vfio-Maintain-DMA-mapping-range-for-the-container.patch
@ -0,0 +1,191 @@
+From ac1bf3edcd2b807cf81ada500716f13b1394d58e Mon Sep 17 00:00:00 2001
+From: Zenghui Yu <yuzenghui@huawei.com>
+Date: Sat, 8 May 2021 17:31:04 +0800
+Subject: [PATCH] vfio: Maintain DMA mapping range for the container
+
+When synchronizing dirty bitmap from kernel VFIO we do it in a
+per-iova-range fashion and we allocate the userspace bitmap for each of the
+ioctl. This patch introduces `struct VFIODMARange` to describe a range of
+the given DMA mapping with respect to a VFIO_IOMMU_MAP_DMA operation, and
+make the bitmap cache of this range be persistent so that we don't need to
+g_try_malloc0() every time. Note that the new structure is almost a copy of
+`struct vfio_iommu_type1_dma_map` but only internally used by QEMU.
+
+More importantly, the cached per-iova-range dirty bitmap will be further
+used when we want to add support for the CLEAR_BITMAP and this cached
+bitmap will be used to guarantee we don't clear any unknown dirty bits
+otherwise that can be a severe data loss issue for migration code.
+
+It's pretty intuitive to maintain a bitmap per container since we perform
+log_sync at this granule. But I don't know how to deal with things like
+memory hot-{un}plug, sparse DMA mappings, etc. Suggestions welcome.
+
+* yet something to-do:
+  - can't work with guest viommu
+  - no locks
+  - etc
+
+[ The idea and even the commit message are largely inherited from kvm side.
+  See commit 9f4bf4baa8b820c7930e23c9566c9493db7e1d25. ]
+
+Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
+Signed-off-by: Kunkun Jiang <jinagkunkun@huawei.com>
+---
+ hw/vfio/common.c              | 62 +++++++++++++++++++++++++++++++----
+ include/hw/vfio/vfio-common.h |  9 +++++
+ 2 files changed, 65 insertions(+), 6 deletions(-)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index 080046e3f5..86ea784919 100644
+--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
+@@ -445,6 +445,29 @@ unmap_exit:
+     return ret;
+ }
+ 
+static VFIODMARange *vfio_lookup_match_range(VFIOContainer *container,
+        hwaddr start_addr, hwaddr size)
+{
+    VFIODMARange *qrange;
+
+    QLIST_FOREACH(qrange, &container->dma_list, next) {
+        if (qrange->iova == start_addr && qrange->size == size) {
+            return qrange;
+        }
+    }
+    return NULL;
+}
+
+static void vfio_dma_range_init_dirty_bitmap(VFIODMARange *qrange)
+{
+    uint64_t pages, size;
+
+    pages = REAL_HOST_PAGE_ALIGN(qrange->size) / qemu_real_host_page_size;
+    size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / BITS_PER_BYTE;
+
+    qrange->bitmap = g_malloc0(size);
+}
+
+ /*
+  * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
+  */
+@@ -458,12 +481,29 @@ static int vfio_dma_unmap(VFIOContainer *container,
+         .iova = iova,
+         .size = size,
+     };
+    VFIODMARange *qrange;
+ 
+     if (iotlb && container->dirty_pages_supported &&
+         vfio_devices_all_running_and_saving(container)) {
+         return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
+     }
+ 
+    /*
+     * unregister the DMA range
+     *
+     * It seems that the memory layer will give us the same section as the one
+     * used in region_add(). Otherwise it'll be complicated to manipulate the
+     * bitmap across region_{add,del}. Is there any guarantee?
+     *
+     * But there is really not such a restriction on the kernel interface
+     * (VFIO_IOMMU_DIRTY_PAGES_FLAG_{UN}MAP_DMA, etc).
+     */
+    qrange = vfio_lookup_match_range(container, iova, size);
+    assert(qrange);
+    g_free(qrange->bitmap);
+    QLIST_REMOVE(qrange, next);
+    g_free(qrange);
+
+     while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
+         /*
+          * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
+@@ -500,6 +540,14 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
+         .iova = iova,
+         .size = size,
+     };
+    VFIODMARange *qrange;
+
+    qrange = g_malloc0(sizeof(*qrange));
+    qrange->iova = iova;
+    qrange->size = size;
+    QLIST_INSERT_HEAD(&container->dma_list, qrange, next);
+    /* XXX allocate the dirty bitmap on demand */
+    vfio_dma_range_init_dirty_bitmap(qrange);
+ 
+     if (!readonly) {
+         map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
+@@ -1256,9 +1304,14 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
+ {
+     struct vfio_iommu_type1_dirty_bitmap *dbitmap;
+     struct vfio_iommu_type1_dirty_bitmap_get *range;
+    VFIODMARange *qrange;
+     uint64_t pages;
+     int ret;
+ 
+    qrange = vfio_lookup_match_range(container, iova, size);
+    /* the same as vfio_dma_unmap() */
+    assert(qrange);
+
+     dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
+ 
+     dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
+@@ -1277,11 +1330,8 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
+     pages = REAL_HOST_PAGE_ALIGN(range->size) / qemu_real_host_page_size;
+     range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
+                                          BITS_PER_BYTE;
+-    range->bitmap.data = g_try_malloc0(range->bitmap.size);
+-    if (!range->bitmap.data) {
+-        ret = -ENOMEM;
+-        goto err_out;
+-    }
+
+    range->bitmap.data = (__u64 *)qrange->bitmap;
+ 
+     ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
+     if (ret) {
+@@ -1297,7 +1347,6 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
+     trace_vfio_get_dirty_bitmap(container->fd, range->iova, range->size,
+                                 range->bitmap.size, ram_addr);
+ err_out:
+-    g_free(range->bitmap.data);
+     g_free(dbitmap);
+ 
+     return ret;
+@@ -2061,6 +2110,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
+     QLIST_INIT(&container->giommu_list);
+     QLIST_INIT(&container->hostwin_list);
+     QLIST_INIT(&container->vrdl_list);
+    QLIST_INIT(&container->dma_list);
+ 
+     ret = vfio_init_container(container, group->fd, errp);
+     if (ret) {
+diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
+index 8af11b0a76..20b9c8a1d3 100644
+--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
+@@ -76,6 +76,14 @@ typedef struct VFIOAddressSpace {
+ 
+ struct VFIOGroup;
+ 
+typedef struct VFIODMARange {
+    QLIST_ENTRY(VFIODMARange) next;
+    hwaddr iova;
+    size_t size;
+    void *vaddr; /* unused */
+    unsigned long *bitmap; /* dirty bitmap cache for this range */
+} VFIODMARange;
+
+ typedef struct VFIOContainer {
+     VFIOAddressSpace *space;
+     int fd; /* /dev/vfio/vfio, empowered by the attached groups */
+@@ -93,6 +101,7 @@ typedef struct VFIOContainer {
+     QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
+     QLIST_HEAD(, VFIOGroup) group_list;
+     QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
+    QLIST_HEAD(, VFIODMARange) dma_list;
+     QLIST_ENTRY(VFIOContainer) next;
+ } VFIOContainer;
+ 
+-- 
+2.27.0
+
--- a/vfio-Pass-stage-1-MSI-bindings-to-the-host.patch
+++ b/vfio-Pass-stage-1-MSI-bindings-to-the-host.patch
@ -0,0 +1,262 @@
+From 8b4fbe869f8a1f510896c86067d2e4fc3dc82eb9 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Tue, 14 Aug 2018 08:08:11 -0400
+Subject: [PATCH] vfio: Pass stage 1 MSI bindings to the host
+
+We register the stage1 MSI bindings when enabling the vectors
+and we unregister them on msi disable.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/common.c              | 59 +++++++++++++++++++++++++++
+ hw/vfio/pci.c                 | 76 ++++++++++++++++++++++++++++++++++-
+ hw/vfio/trace-events          |  2 +
+ include/hw/vfio/vfio-common.h | 12 ++++++
+ 4 files changed, 147 insertions(+), 2 deletions(-)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index 58f8a43a43..1f78af121d 100644
+--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
+@@ -774,6 +774,65 @@ static void vfio_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+     }
+ }
+ 
+int vfio_iommu_set_msi_binding(VFIOContainer *container, int n,
+                               IOMMUTLBEntry *iotlb)
+{
+    struct vfio_iommu_type1_set_msi_binding ustruct;
+    VFIOMSIBinding *binding;
+    int ret;
+
+    QLIST_FOREACH(binding, &container->msibinding_list, next) {
+        if (binding->index == n) {
+            return 0;
+        }
+    }
+
+    ustruct.argsz = sizeof(struct vfio_iommu_type1_set_msi_binding);
+    ustruct.iova = iotlb->iova;
+    ustruct.flags = VFIO_IOMMU_BIND_MSI;
+    ustruct.gpa = iotlb->translated_addr;
+    ustruct.size = iotlb->addr_mask + 1;
+    ret = ioctl(container->fd, VFIO_IOMMU_SET_MSI_BINDING , &ustruct);
+    if (ret) {
+        error_report("%s: failed to register the stage1 MSI binding (%m)",
+                     __func__);
+        return ret;
+    }
+    binding =  g_new0(VFIOMSIBinding, 1);
+    binding->iova = ustruct.iova;
+    binding->gpa = ustruct.gpa;
+    binding->size = ustruct.size;
+    binding->index = n;
+
+    QLIST_INSERT_HEAD(&container->msibinding_list, binding, next);
+    return 0;
+}
+
+int vfio_iommu_unset_msi_binding(VFIOContainer *container, int n)
+{
+    struct vfio_iommu_type1_set_msi_binding ustruct;
+    VFIOMSIBinding *binding, *tmp;
+    int ret;
+
+    ustruct.argsz = sizeof(struct vfio_iommu_type1_set_msi_binding);
+    QLIST_FOREACH_SAFE(binding, &container->msibinding_list, next, tmp) {
+        if (binding->index != n) {
+            continue;
+        }
+        ustruct.flags = VFIO_IOMMU_UNBIND_MSI;
+        ustruct.iova = binding->iova;
+        ret = ioctl(container->fd, VFIO_IOMMU_SET_MSI_BINDING , &ustruct);
+        if (ret) {
+            error_report("Failed to unregister the stage1 MSI binding "
+                         "for iova=0x%"PRIx64" (%m)", binding->iova);
+        }
+        QLIST_REMOVE(binding, next);
+        g_free(binding);
+        return ret;
+    }
+    return 0;
+}
+
+ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+ {
+     VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
+diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
+index ae5e014e5d..99c52a0944 100644
+--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
+@@ -365,6 +365,65 @@ static void vfio_msi_interrupt(void *opaque)
+     notify(&vdev->pdev, nr);
+ }
+ 
+static bool vfio_iommu_require_msi_binding(IOMMUMemoryRegion *iommu_mr)
+{
+    bool msi_translate = false, nested = false;
+
+    memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_MSI_TRANSLATE,
+                                 (void *)&msi_translate);
+    memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_VFIO_NESTED,
+                                 (void *)&nested);
+    if (!nested || !msi_translate) {
+        return false;
+    }
+   return true;
+}
+
+static int vfio_register_msi_binding(VFIOPCIDevice *vdev,
+                                     int vector_n, bool set)
+{
+    VFIOContainer *container = vdev->vbasedev.group->container;
+    PCIDevice *dev = &vdev->pdev;
+    AddressSpace *as = pci_device_iommu_address_space(dev);
+    IOMMUMemoryRegionClass *imrc;
+    IOMMUMemoryRegion *iommu_mr;
+    IOMMUTLBEntry entry;
+    MSIMessage msg;
+
+    if (as == &address_space_memory) {
+        return 0;
+    }
+
+    iommu_mr = IOMMU_MEMORY_REGION(as->root);
+    if (!vfio_iommu_require_msi_binding(iommu_mr)) {
+        return 0;
+    }
+
+    /* MSI doorbell address is translated by an IOMMU */
+
+    if (!set) { /* unregister */
+        trace_vfio_unregister_msi_binding(vdev->vbasedev.name, vector_n);
+
+        return vfio_iommu_unset_msi_binding(container, vector_n);
+    }
+
+    msg = pci_get_msi_message(dev, vector_n);
+    imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
+
+    rcu_read_lock();
+    entry = imrc->translate(iommu_mr, msg.address, IOMMU_WO, 0);
+    rcu_read_unlock();
+
+    if (entry.perm == IOMMU_NONE) {
+        return -ENOENT;
+    }
+
+    trace_vfio_register_msi_binding(vdev->vbasedev.name, vector_n,
+                                    msg.address, entry.translated_addr);
+
+    return vfio_iommu_set_msi_binding(container, vector_n, &entry);
+}
+
+ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
+ {
+     struct vfio_irq_set *irq_set;
+@@ -382,7 +441,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
+     fds = (int32_t *)&irq_set->data;
+ 
+     for (i = 0; i < vdev->nr_vectors; i++) {
+-        int fd = -1;
+        int ret, fd = -1;
+ 
+         /*
+          * MSI vs MSI-X - The guest has direct access to MSI mask and pending
+@@ -391,6 +450,12 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
+          * KVM signaling path only when configured and unmasked.
+          */
+         if (vdev->msi_vectors[i].use) {
+            ret = vfio_register_msi_binding(vdev, i, true);
+            if (ret) {
+                error_report("%s failed to register S1 MSI binding "
+                             "for vector %d(%d)", vdev->vbasedev.name, i, ret);
+                goto out;
+            }
+             if (vdev->msi_vectors[i].virq < 0 ||
+                 (msix && msix_is_masked(&vdev->pdev, i))) {
+                 fd = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
+@@ -404,6 +469,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
+ 
+     ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
+ 
+out:
+     g_free(irq_set);
+ 
+     return ret;
+@@ -718,7 +784,8 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev)
+ 
+ static void vfio_msix_disable(VFIOPCIDevice *vdev)
+ {
+-    int i;
+    int ret, i;
+
+ 
+     msix_unset_vector_notifiers(&vdev->pdev);
+ 
+@@ -730,6 +797,11 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev)
+         if (vdev->msi_vectors[i].use) {
+             vfio_msix_vector_release(&vdev->pdev, i);
+             msix_vector_unuse(&vdev->pdev, i);
+            ret = vfio_register_msi_binding(vdev, i, false);
+            if (ret) {
+                error_report("%s: failed to unregister S1 MSI binding "
+                             "for vector %d(%d)", vdev->vbasedev.name, i, ret);
+            }
+         }
+     }
+ 
+diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
+index 20069935f5..35bd415d6d 100644
+--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
+@@ -120,6 +120,8 @@ vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype
+ vfio_dma_unmap_overflow_workaround(void) ""
+ vfio_iommu_addr_inv_iotlb(int asid, uint64_t addr, uint64_t size, uint64_t nb_granules, bool leaf) "nested IOTLB invalidate asid=%d, addr=0x%"PRIx64" granule_size=0x%"PRIx64" nb_granules=0x%"PRIx64" leaf=%d"
+ vfio_iommu_asid_inv_iotlb(int asid) "nested IOTLB invalidate asid=%d"
+vfio_register_msi_binding(const char *name, int vector, uint64_t giova, uint64_t gdb) "%s: register vector %d gIOVA=0x%"PRIx64 "-> gDB=0x%"PRIx64" stage 1 mapping"
+vfio_unregister_msi_binding(const char *name, int vector) "%s: unregister vector %d stage 1 mapping"
+ 
+ # platform.c
+ vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d"
+diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
+index 0234f5e1b1..a838a939e4 100644
+--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
+@@ -74,6 +74,14 @@ typedef struct VFIOAddressSpace {
+     QLIST_ENTRY(VFIOAddressSpace) list;
+ } VFIOAddressSpace;
+ 
+typedef struct VFIOMSIBinding {
+    int index;
+    hwaddr iova;
+    hwaddr gpa;
+    hwaddr size;
+    QLIST_ENTRY(VFIOMSIBinding) next;
+} VFIOMSIBinding;
+
+ struct VFIOGroup;
+ 
+ typedef struct VFIODMARange {
+@@ -103,6 +111,7 @@ typedef struct VFIOContainer {
+     QLIST_HEAD(, VFIOGroup) group_list;
+     QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
+     QLIST_HEAD(, VFIODMARange) dma_list;
+    QLIST_HEAD(, VFIOMSIBinding) msibinding_list;
+     QLIST_ENTRY(VFIOContainer) next;
+ } VFIOContainer;
+ 
+@@ -222,6 +231,9 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp);
+ void vfio_put_group(VFIOGroup *group);
+ int vfio_get_device(VFIOGroup *group, const char *name,
+                     VFIODevice *vbasedev, Error **errp);
+int vfio_iommu_set_msi_binding(VFIOContainer *container, int n,
+                               IOMMUTLBEntry *entry);
+int vfio_iommu_unset_msi_binding(VFIOContainer *container, int n);
+ 
+ extern const MemoryRegionOps vfio_region_ops;
+ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
+-- 
+2.27.0
+
--- a/vfio-Set-up-nested-stage-mappings.patch
+++ b/vfio-Set-up-nested-stage-mappings.patch
@ -0,0 +1,281 @@
+From 96581a5ee46e89dbc9e1ebe247b00adefb1c7a41 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Wed, 29 Aug 2018 18:10:12 +0200
+Subject: [PATCH] vfio: Set up nested stage mappings
+
+In nested mode, legacy vfio_iommu_map_notify cannot be used as
+there is no "caching" mode and we do not trap on map.
+
+On Intel, vfio_iommu_map_notify was used to DMA map the RAM
+through the host single stage.
+
+With nested mode, we need to setup the stage 2 and the stage 1
+separately. This patch introduces a prereg_listener to setup
+the stage 2 mapping.
+
+The stage 1 mapping, owned by the guest, is passed to the host
+when the guest invalidates the stage 1 configuration, through
+a dedicated PCIPASIDOps callback. Guest IOTLB invalidations
+are cascaded downto the host through another IOMMU MR UNMAP
+notifier.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/common.c     | 139 +++++++++++++++++++++++++++++++++++++++++--
+ hw/vfio/pci.c        |  21 +++++++
+ hw/vfio/trace-events |   2 +
+ 3 files changed, 157 insertions(+), 5 deletions(-)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index b3dc090840..58f8a43a43 100644
+--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
+@@ -707,6 +707,73 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
+     return true;
+ }
+ 
+/* Propagate a guest IOTLB invalidation to the host (nested mode) */
+static void vfio_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
+    struct vfio_iommu_type1_cache_invalidate ustruct = {};
+    VFIOContainer *container = giommu->container;
+    int ret;
+
+    assert(iotlb->perm == IOMMU_NONE);
+
+    ustruct.argsz = sizeof(ustruct);
+    ustruct.flags = 0;
+    ustruct.info.argsz = sizeof(struct iommu_cache_invalidate_info);
+    ustruct.info.version = IOMMU_CACHE_INVALIDATE_INFO_VERSION_1;
+    ustruct.info.cache = IOMMU_CACHE_INV_TYPE_IOTLB;
+
+    switch (iotlb->granularity) {
+    case IOMMU_INV_GRAN_DOMAIN:
+        ustruct.info.granularity = IOMMU_INV_GRANU_DOMAIN;
+        break;
+    case IOMMU_INV_GRAN_PASID:
+    {
+        struct iommu_inv_pasid_info *pasid_info;
+        int archid = -1;
+
+        pasid_info = &ustruct.info.granu.pasid_info;
+        ustruct.info.granularity = IOMMU_INV_GRANU_PASID;
+        if (iotlb->flags & IOMMU_INV_FLAGS_ARCHID) {
+            pasid_info->flags |= IOMMU_INV_ADDR_FLAGS_ARCHID;
+            archid = iotlb->arch_id;
+        }
+        pasid_info->archid = archid;
+        trace_vfio_iommu_asid_inv_iotlb(archid);
+        break;
+    }
+    case IOMMU_INV_GRAN_ADDR:
+    {
+        hwaddr start = iotlb->iova + giommu->iommu_offset;
+        struct iommu_inv_addr_info *addr_info;
+        size_t size = iotlb->addr_mask + 1;
+        int archid = -1;
+
+        addr_info = &ustruct.info.granu.addr_info;
+        ustruct.info.granularity = IOMMU_INV_GRANU_ADDR;
+        if (iotlb->leaf) {
+            addr_info->flags |= IOMMU_INV_ADDR_FLAGS_LEAF;
+        }
+        if (iotlb->flags & IOMMU_INV_FLAGS_ARCHID) {
+            addr_info->flags |= IOMMU_INV_ADDR_FLAGS_ARCHID;
+            archid = iotlb->arch_id;
+        }
+        addr_info->archid = archid;
+        addr_info->addr = start;
+        addr_info->granule_size = size;
+        addr_info->nb_granules = 1;
+        trace_vfio_iommu_addr_inv_iotlb(archid, start, size,
+                                        1, iotlb->leaf);
+        break;
+    }
+    }
+
+    ret = ioctl(container->fd, VFIO_IOMMU_CACHE_INVALIDATE, &ustruct);
+    if (ret) {
+        error_report("%p: failed to invalidate CACHE (%d)", container, ret);
+    }
+}
+
+ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+ {
+     VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
+@@ -1040,6 +1107,35 @@ static void vfio_dma_unmap_ram_section(VFIOContainer *container,
+     }
+ }
+ 
+static void vfio_prereg_listener_region_add(MemoryListener *listener,
+                                            MemoryRegionSection *section)
+{
+    VFIOContainer *container =
+        container_of(listener, VFIOContainer, prereg_listener);
+    Error *err = NULL;
+
+    if (!memory_region_is_ram(section->mr)) {
+        return;
+    }
+
+    vfio_dma_map_ram_section(container, section, &err);
+    if (err) {
+        error_report_err(err);
+    }
+}
+static void vfio_prereg_listener_region_del(MemoryListener *listener,
+                                     MemoryRegionSection *section)
+{
+    VFIOContainer *container =
+        container_of(listener, VFIOContainer, prereg_listener);
+
+    if (!memory_region_is_ram(section->mr)) {
+        return;
+    }
+
+    vfio_dma_unmap_ram_section(container, section);
+}
+
+ static void vfio_listener_region_add(MemoryListener *listener,
+                                      MemoryRegionSection *section)
+ {
+@@ -1150,9 +1246,10 @@ static void vfio_listener_region_add(MemoryListener *listener,
+     memory_region_ref(section->mr);
+ 
+     if (memory_region_is_iommu(section->mr)) {
+        IOMMUNotify notify;
+         VFIOGuestIOMMU *giommu;
+         IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
+-        int iommu_idx;
+        int iommu_idx, flags;
+ 
+         trace_vfio_listener_region_add_iommu(iova, end);
+         /*
+@@ -1171,8 +1268,18 @@ static void vfio_listener_region_add(MemoryListener *listener,
+         llend = int128_sub(llend, int128_one());
+         iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
+                                                        MEMTXATTRS_UNSPECIFIED);
+-        iommu_notifier_init(&giommu->n, vfio_iommu_map_notify,
+-                            IOMMU_NOTIFIER_IOTLB_EVENTS,
+
+        if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
+            /* IOTLB unmap notifier to propagate guest IOTLB invalidations */
+            flags = IOMMU_NOTIFIER_UNMAP;
+            notify = vfio_iommu_unmap_notify;
+        } else {
+            /* MAP/UNMAP IOTLB notifier */
+            flags = IOMMU_NOTIFIER_IOTLB_EVENTS;
+            notify = vfio_iommu_map_notify;
+        }
+
+        iommu_notifier_init(&giommu->n, notify, flags,
+                             section->offset_within_region,
+                             int128_get64(llend),
+                             iommu_idx);
+@@ -1192,7 +1299,9 @@ static void vfio_listener_region_add(MemoryListener *listener,
+             goto fail;
+         }
+         QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
+-        memory_region_iommu_replay(giommu->iommu, &giommu->n);
+        if (flags & IOMMU_NOTIFIER_MAP) {
+            memory_region_iommu_replay(giommu->iommu, &giommu->n);
+        }
+ 
+         return;
+     }
+@@ -1672,10 +1781,16 @@ static const MemoryListener vfio_memory_listener = {
+     .log_clear = vfio_listener_log_clear,
+ };
+ 
+static MemoryListener vfio_memory_prereg_listener = {
+    .region_add = vfio_prereg_listener_region_add,
+    .region_del = vfio_prereg_listener_region_del,
+};
+
+ static void vfio_listener_release(VFIOContainer *container)
+ {
+     memory_listener_unregister(&container->listener);
+-    if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
+    if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
+        container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
+         memory_listener_unregister(&container->prereg_listener);
+     }
+ }
+@@ -2351,6 +2466,20 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
+             vfio_get_iommu_info_migration(container, info);
+         }
+         g_free(info);
+
+        if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
+            container->prereg_listener = vfio_memory_prereg_listener;
+            memory_listener_register(&container->prereg_listener,
+                                     &address_space_memory);
+            if (container->error) {
+                memory_listener_unregister(&container->prereg_listener);
+                ret = -1;
+                error_propagate_prepend(errp, container->error,
+                                    "RAM memory listener initialization failed "
+                                    "for container");
+                goto free_container_exit;
+            }
+        }
+         break;
+     }
+     case VFIO_SPAPR_TCE_v2_IOMMU:
+diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
+index 7b45353ce2..ae5e014e5d 100644
+--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
+@@ -2797,6 +2797,25 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
+     vdev->req_enabled = false;
+ }
+ 
+static int vfio_iommu_set_pasid_table(PCIBus *bus, int32_t devfn,
+                                      IOMMUConfig *config)
+{
+    PCIDevice *pdev = bus->devices[devfn];
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    VFIOContainer *container = vdev->vbasedev.group->container;
+    struct vfio_iommu_type1_set_pasid_table info;
+
+    info.argsz = sizeof(info);
+    info.flags = VFIO_PASID_TABLE_FLAG_SET;
+    memcpy(&info.config, &config->pasid_cfg, sizeof(config->pasid_cfg));
+
+    return ioctl(container->fd, VFIO_IOMMU_SET_PASID_TABLE, &info);
+}
+
+static PCIPASIDOps vfio_pci_pasid_ops = {
+    .set_pasid_table = vfio_iommu_set_pasid_table,
+};
+
+ static void vfio_realize(PCIDevice *pdev, Error **errp)
+ {
+     VFIOPCIDevice *vdev = VFIO_PCI(pdev);
+@@ -3108,6 +3127,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
+     vfio_register_req_notifier(vdev);
+     vfio_setup_resetfn_quirk(vdev);
+ 
+    pci_setup_pasid_ops(pdev, &vfio_pci_pasid_ops);
+
+     return;
+ 
+ out_deregister:
+diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
+index a37563a315..20069935f5 100644
+--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
+@@ -118,6 +118,8 @@ vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Devic
+ vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
+ vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8"
+ vfio_dma_unmap_overflow_workaround(void) ""
+vfio_iommu_addr_inv_iotlb(int asid, uint64_t addr, uint64_t size, uint64_t nb_granules, bool leaf) "nested IOTLB invalidate asid=%d, addr=0x%"PRIx64" granule_size=0x%"PRIx64" nb_granules=0x%"PRIx64" leaf=%d"
+vfio_iommu_asid_inv_iotlb(int asid) "nested IOTLB invalidate asid=%d"
+ 
+ # platform.c
+ vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d"
+-- 
+2.27.0
+
--- a/vfio-common-Add-address-alignment-check-in-vfio_list.patch
+++ b/vfio-common-Add-address-alignment-check-in-vfio_list.patch
@ -0,0 +1,53 @@
+From 00c553f53657bf4bc165d859187215dba7110246 Mon Sep 17 00:00:00 2001
+From: Kunkun Jiang <jiangkunkun@huawei.com>
+Date: Tue, 14 Sep 2021 14:21:46 +0800
+Subject: [PATCH] vfio/common: Add address alignment check in
+ vfio_listener_region_del
+
+Both vfio_listener_region_add and vfio_listener_region_del have
+reference counting operations on ram section->mr. If the 'iova'
+and 'llend' of the ram section do not pass the alignment
+check, the ram section should not be mapped or unmapped. It means
+that the reference counting should not be changed.
+
+However, the address alignment check is missing in
+vfio_listener_region_del. This makes memory_region_unref will
+be unconditional called and causes unintended problems in some
+scenarios.
+
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/common.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index 89c49f5508..4d45c2b625 100644
+--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
+@@ -1411,6 +1411,8 @@ static void vfio_listener_region_del(MemoryListener *listener,
+                                      MemoryRegionSection *section)
+ {
+     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+    hwaddr iova;
+    Int128 llend;
+ 
+     if (vfio_listener_skipped_section(section)) {
+         trace_vfio_listener_region_del_skip(
+@@ -1460,6 +1462,14 @@ static void vfio_listener_region_del(MemoryListener *listener,
+          */
+     }
+ 
+    iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
+    llend = int128_make64(section->offset_within_address_space);
+    llend = int128_add(llend, section->size);
+    llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
+    if (int128_ge(int128_make64(iova), llend)) {
+        return;
+    }
+
+     vfio_dma_unmap_ram_section(container, section);
+ 
+     memory_region_unref(section->mr);
+-- 
+2.27.0
+
--- a/vfio-common-Avoid-unmap-ram-section-at-vfio_listener.patch
+++ b/vfio-common-Avoid-unmap-ram-section-at-vfio_listener.patch
@ -0,0 +1,39 @@
+From 9d7b782a0b2c5288e82f3064b4c5b7bf18887280 Mon Sep 17 00:00:00 2001
+From: Kunkun Jiang <jiangkunkun@huawei.com>
+Date: Sat, 31 Jul 2021 10:02:18 +0800
+Subject: [PATCH] vfio/common: Avoid unmap ram section at
+ vfio_listener_region_del() in nested mode
+
+The ram section will be unmapped at vfio_prereg_listener_region_del()
+in nested mode. So let's avoid unmap ram section at
+vfio_listener_region_dev().
+
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/common.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index d05a485808..bdfcc854fe 100644
+--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
+@@ -1441,6 +1441,16 @@ static void vfio_listener_region_del(MemoryListener *listener,
+             }
+         }
+ 
+        /*
+         * In nested mode, stage 2 (gpa->hpa) and the stage 1
+         * (giova->gpa) are set separately. The ram section
+         * will be unmapped in vfio_prereg_listener_region_del().
+         * Hence it doesn't need to unmap ram section here.
+         */
+        if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
+            return;
+        }
+
+         /*
+          * FIXME: We assume the one big unmap below is adequate to
+          * remove any individual page mappings in the IOMMU which
+-- 
+2.27.0
+
--- a/vfio-common-Fix-incorrect-address-alignment-in-vfio_.patch
+++ b/vfio-common-Fix-incorrect-address-alignment-in-vfio_.patch
@ -0,0 +1,40 @@
+From c2a4ce033db6ab74256e28da382c797a98047d4b Mon Sep 17 00:00:00 2001
+From: Kunkun Jiang <jiangkunkun@huawei.com>
+Date: Tue, 7 Sep 2021 15:14:12 +0800
+Subject: [PATCH] vfio/common: Fix incorrect address alignment in
+ vfio_dma_map_ram_section
+
+The 'iova' will be passed to host kernel for mapping with the
+HPA. It is related to the host page size. So TARGET_PAGE_ALIGN
+should be replaced by REAL_HOST_PAGE_ALIGN. In the case of
+large granularity (64K), it may return early when map MMIO RAM
+section. And because of the inconsistency with
+vfio_dma_unmap_ram_section, it may cause 'assert(qrange)'
+in vfio_dma_unmap.
+
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
+---
+ hw/vfio/common.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index 65f3979492..89c49f5508 100644
+--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
+@@ -1059,10 +1059,10 @@ static int vfio_dma_map_ram_section(VFIOContainer *container,
+ 
+     assert(memory_region_is_ram(section->mr));
+ 
+-    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
+     llend = int128_make64(section->offset_within_address_space);
+     llend = int128_add(llend, section->size);
+-    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
+    llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
+     end = int128_get64(int128_sub(llend, int128_one()));
+ 
+     vaddr = memory_region_get_ram_ptr(section->mr) +
+-- 
+2.27.0
+
--- a/vfio-migration-Add-support-for-manual-clear-vfio-dir.patch
+++ b/vfio-migration-Add-support-for-manual-clear-vfio-dir.patch
@ -0,0 +1,224 @@
+From 815258f81a660ad87272191dca4a9726cb2bf5b2 Mon Sep 17 00:00:00 2001
+From: Zenghui Yu <yuzenghui@huawei.com>
+Date: Sat, 8 May 2021 17:31:05 +0800
+Subject: [PATCH] vfio/migration: Add support for manual clear vfio dirty log
+
+The new capability VFIO_DIRTY_LOG_MANUAL_CLEAR and the new ioctl
+VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR and
+VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP have been introduced in
+the kernel, tweak the userspace side to use them.
+
+Check if the kernel supports VFIO_DIRTY_LOG_MANUAL_CLEAR and
+provide the log_clear() hook for vfio_memory_listener. If the
+kernel supports it, deliever the clear message to kernel.
+
+Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/common.c              | 149 +++++++++++++++++++++++++++++++++-
+ include/hw/vfio/vfio-common.h |   1 +
+ 2 files changed, 148 insertions(+), 2 deletions(-)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index 86ea784919..6cb91e7ffd 100644
+--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
+@@ -1315,7 +1315,9 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
+     dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
+ 
+     dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
+-    dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP;
+    dbitmap->flags = container->dirty_log_manual_clear ?
+                     VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR :
+                     VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP;
+     range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data;
+     range->iova = iova;
+     range->size = size;
+@@ -1491,6 +1493,141 @@ static void vfio_listener_log_sync(MemoryListener *listener,
+     }
+ }
+ 
+/*
+ * I'm not sure if there's any alignment requirement for the CLEAR_BITMAP
+ * ioctl. But copy from kvm side and align {start, size} with 64 pages.
+ *
+ * I think the code can be simplified a lot if no alignment requirement.
+ */
+#define VFIO_CLEAR_LOG_SHIFT  6
+#define VFIO_CLEAR_LOG_ALIGN  (qemu_real_host_page_size << VFIO_CLEAR_LOG_SHIFT)
+#define VFIO_CLEAR_LOG_MASK   (-VFIO_CLEAR_LOG_ALIGN)
+
+static int vfio_log_clear_one_range(VFIOContainer *container,
+        VFIODMARange *qrange, uint64_t start, uint64_t size)
+{
+    struct vfio_iommu_type1_dirty_bitmap *dbitmap;
+    struct vfio_iommu_type1_dirty_bitmap_get *range;
+
+    dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
+
+    dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
+    dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP;
+    range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data;
+
+    /*
+     * Now let's deal with the actual bitmap, which is almost the same
+     * as the kvm side.
+     */
+    uint64_t end, bmap_start, start_delta, bmap_npages;
+    unsigned long *bmap_clear = NULL, psize = qemu_real_host_page_size;
+    int ret;
+
+    bmap_start = start & VFIO_CLEAR_LOG_MASK;
+    start_delta = start - bmap_start;
+    bmap_start /= psize;
+
+    bmap_npages = DIV_ROUND_UP(size + start_delta, VFIO_CLEAR_LOG_ALIGN)
+        << VFIO_CLEAR_LOG_SHIFT;
+    end = qrange->size / psize;
+    if (bmap_npages > end - bmap_start) {
+        bmap_npages = end - bmap_start;
+    }
+    start_delta /= psize;
+
+    if (start_delta) {
+        bmap_clear = bitmap_new(bmap_npages);
+        bitmap_copy_with_src_offset(bmap_clear, qrange->bitmap,
+                                    bmap_start, start_delta + size / psize);
+        bitmap_clear(bmap_clear, 0, start_delta);
+        range->bitmap.data = (__u64 *)bmap_clear;
+    } else {
+        range->bitmap.data = (__u64 *)(qrange->bitmap + BIT_WORD(bmap_start));
+    }
+
+    range->iova = qrange->iova + bmap_start * psize;
+    range->size = bmap_npages * psize;
+    range->bitmap.size = ROUND_UP(bmap_npages, sizeof(__u64) * BITS_PER_BYTE) /
+                                               BITS_PER_BYTE;
+    range->bitmap.pgsize = qemu_real_host_page_size;
+
+    ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
+    if (ret) {
+        error_report("Failed to clear dirty log for iova: 0x%"PRIx64
+                " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova,
+                (uint64_t)range->size, errno);
+        goto err_out;
+    }
+
+    bitmap_clear(qrange->bitmap, bmap_start + start_delta, size / psize);
+err_out:
+    g_free(bmap_clear);
+    g_free(dbitmap);
+    return 0;
+}
+
+static int vfio_physical_log_clear(VFIOContainer *container,
+                                   MemoryRegionSection *section)
+{
+    uint64_t start, size, offset, count;
+    VFIODMARange *qrange;
+    int ret = 0;
+
+    if (!container->dirty_log_manual_clear) {
+        /* No need to do explicit clear */
+        return ret;
+    }
+
+    start = section->offset_within_address_space;
+    size = int128_get64(section->size);
+
+    if (!size) {
+        return ret;
+    }
+
+    QLIST_FOREACH(qrange, &container->dma_list, next) {
+        /*
+         * Discard ranges that do not overlap the section (e.g., the
+         * Memory BAR regions of the device)
+         */
+        if (qrange->iova > start + size - 1 ||
+            start > qrange->iova + qrange->size - 1) {
+            continue;
+        }
+
+        if (start >= qrange->iova) {
+            /* The range starts before section or is aligned to it. */
+            offset = start - qrange->iova;
+            count = MIN(qrange->size - offset, size);
+        } else {
+            /* The range starts after section. */
+            offset = 0;
+            count = MIN(qrange->size, size - (qrange->iova - start));
+        }
+        ret = vfio_log_clear_one_range(container, qrange, offset, count);
+        if (ret < 0) {
+            break;
+        }
+    }
+
+    return ret;
+}
+
+static void vfio_listener_log_clear(MemoryListener *listener,
+                                    MemoryRegionSection *section)
+{
+    VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+
+    if (vfio_listener_skipped_section(section) ||
+        !container->dirty_pages_supported) {
+        return;
+    }
+
+    if (vfio_devices_all_dirty_tracking(container)) {
+        vfio_physical_log_clear(container, section);
+    }
+}
+
+ static const MemoryListener vfio_memory_listener = {
+     .name = "vfio",
+     .region_add = vfio_listener_region_add,
+@@ -1498,6 +1635,7 @@ static const MemoryListener vfio_memory_listener = {
+     .log_global_start = vfio_listener_log_global_start,
+     .log_global_stop = vfio_listener_log_global_stop,
+     .log_sync = vfio_listener_log_sync,
+    .log_clear = vfio_listener_log_clear,
+ };
+ 
+ static void vfio_listener_release(VFIOContainer *container)
+@@ -1925,7 +2063,7 @@ static int vfio_get_iommu_type(VFIOContainer *container,
+ static int vfio_init_container(VFIOContainer *container, int group_fd,
+                                Error **errp)
+ {
+-    int iommu_type, ret;
+    int iommu_type, dirty_log_manual_clear, ret;
+ 
+     iommu_type = vfio_get_iommu_type(container, errp);
+     if (iommu_type < 0) {
+@@ -1954,6 +2092,13 @@ static int vfio_init_container(VFIOContainer *container, int group_fd,
+     }
+ 
+     container->iommu_type = iommu_type;
+
+    dirty_log_manual_clear = ioctl(container->fd, VFIO_CHECK_EXTENSION,
+                                   VFIO_DIRTY_LOG_MANUAL_CLEAR);
+    if (dirty_log_manual_clear) {
+        container->dirty_log_manual_clear = dirty_log_manual_clear;
+    }
+
+     return 0;
+ }
+ 
+diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
+index 20b9c8a1d3..0234f5e1b1 100644
+--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
+@@ -93,6 +93,7 @@ typedef struct VFIOContainer {
+     Error *error;
+     bool initialized;
+     bool dirty_pages_supported;
+    bool dirty_log_manual_clear;
+     uint64_t dirty_pgsizes;
+     uint64_t max_dirty_bitmap_size;
+     unsigned long pgsizes;
+-- 
+2.27.0
+
--- a/vfio-pci-Implement-return_page_response-page-respons.patch
+++ b/vfio-pci-Implement-return_page_response-page-respons.patch
@ -0,0 +1,199 @@
+From 6bbf810edebdb89a6958519ee3adfb1888520231 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Fri, 6 Nov 2020 12:03:29 -0500
+Subject: [PATCH] vfio/pci: Implement return_page_response page response
+ callback
+
+This patch implements the page response path. The
+response is written into the page response ring buffer and then
+update header's head index is updated. This path is not used
+by this series. It is introduced here as a POC for vSVA/ARM
+integration.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/pci.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ hw/vfio/pci.h |   2 +
+ 2 files changed, 125 insertions(+)
+
+diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
+index c54e62fe8f..8e24f9c7d1 100644
+--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
+@@ -2693,6 +2693,61 @@ out:
+     g_free(fault_region_info);
+ }
+ 
+static void vfio_init_fault_response_regions(VFIOPCIDevice *vdev, Error **errp)
+{
+    struct vfio_region_info *fault_region_info = NULL;
+    struct vfio_region_info_cap_fault *cap_fault;
+    VFIODevice *vbasedev = &vdev->vbasedev;
+    struct vfio_info_cap_header *hdr;
+    char *fault_region_name;
+    int ret;
+
+    ret = vfio_get_dev_region_info(&vdev->vbasedev,
+                                   VFIO_REGION_TYPE_NESTED,
+                                   VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT_RESPONSE,
+                                   &fault_region_info);
+    if (ret) {
+        goto out;
+    }
+
+    hdr = vfio_get_region_info_cap(fault_region_info,
+                                   VFIO_REGION_INFO_CAP_DMA_FAULT_RESPONSE);
+    if (!hdr) {
+        error_setg(errp, "failed to retrieve DMA FAULT RESPONSE capability");
+        goto out;
+    }
+    cap_fault = container_of(hdr, struct vfio_region_info_cap_fault,
+                             header);
+    if (cap_fault->version != 1) {
+        error_setg(errp, "Unsupported DMA FAULT RESPONSE API version %d",
+                   cap_fault->version);
+        goto out;
+    }
+
+    fault_region_name = g_strdup_printf("%s DMA FAULT RESPONSE %d",
+                                        vbasedev->name,
+                                        fault_region_info->index);
+
+    ret = vfio_region_setup(OBJECT(vdev), vbasedev,
+                            &vdev->dma_fault_response_region,
+                            fault_region_info->index,
+                            fault_region_name);
+    g_free(fault_region_name);
+    if (ret) {
+        error_setg_errno(errp, -ret,
+                         "failed to set up the DMA FAULT RESPONSE region %d",
+                         fault_region_info->index);
+        goto out;
+    }
+
+    ret = vfio_region_mmap(&vdev->dma_fault_response_region);
+    if (ret) {
+        error_setg_errno(errp, -ret, "Failed to mmap the DMA FAULT RESPONSE queue");
+    }
+out:
+    g_free(fault_region_info);
+}
+
+ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
+ {
+     VFIODevice *vbasedev = &vdev->vbasedev;
+@@ -2768,6 +2823,12 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
+         return;
+     }
+ 
+    vfio_init_fault_response_regions(vdev, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+     irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
+ 
+     ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
+@@ -2946,8 +3007,68 @@ static int vfio_iommu_set_pasid_table(PCIBus *bus, int32_t devfn,
+     return ioctl(container->fd, VFIO_IOMMU_SET_PASID_TABLE, &info);
+ }
+ 
+static int vfio_iommu_return_page_response(PCIBus *bus, int32_t devfn,
+                                           IOMMUPageResponse *resp)
+{
+    PCIDevice *pdev = bus->devices[devfn];
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    struct iommu_page_response *response = &resp->resp;
+    struct vfio_region_dma_fault_response header;
+    struct iommu_page_response *queue;
+    char *queue_buffer = NULL;
+    ssize_t bytes;
+
+    if (!vdev->dma_fault_response_region.mem) {
+        return -EINVAL;
+    }
+
+    /* read the header */
+    bytes = pread(vdev->vbasedev.fd, &header, sizeof(header),
+                  vdev->dma_fault_response_region.fd_offset);
+    if (bytes != sizeof(header)) {
+        error_report("%s unable to read the fault region header (0x%lx)",
+                     __func__, bytes);
+        return -1;
+    }
+
+    /* Normally the fault queue is mmapped */
+    queue = (struct iommu_page_response *)vdev->dma_fault_response_region.mmaps[0].mmap;
+    if (!queue) {
+        size_t queue_size = header.nb_entries * header.entry_size;
+
+        error_report("%s: fault queue not mmapped: slower fault handling",
+                     vdev->vbasedev.name);
+
+        queue_buffer = g_malloc(queue_size);
+        bytes = pread(vdev->vbasedev.fd, queue_buffer, queue_size,
+                      vdev->dma_fault_response_region.fd_offset + header.offset);
+        if (bytes != queue_size) {
+            error_report("%s unable to read the fault queue (0x%lx)",
+                         __func__, bytes);
+            return -1;
+        }
+
+        queue = (struct iommu_page_response *)queue_buffer;
+    }
+    /* deposit the new response in the queue and increment the head */
+    memcpy(queue + header.head, response, header.entry_size);
+
+    vdev->fault_response_head_index =
+        (vdev->fault_response_head_index + 1) % header.nb_entries;
+    bytes = pwrite(vdev->vbasedev.fd, &vdev->fault_response_head_index, 4,
+                   vdev->dma_fault_response_region.fd_offset);
+    if (bytes != 4) {
+        error_report("%s unable to write the fault response region head index (0x%lx)",
+                     __func__, bytes);
+    }
+    g_free(queue_buffer);
+
+    return 0;
+}
+
+ static PCIPASIDOps vfio_pci_pasid_ops = {
+     .set_pasid_table = vfio_iommu_set_pasid_table,
+    .return_page_response = vfio_iommu_return_page_response,
+ };
+ 
+ static void vfio_dma_fault_notifier_handler(void *opaque)
+@@ -3411,6 +3532,7 @@ static void vfio_instance_finalize(Object *obj)
+     vfio_display_finalize(vdev);
+     vfio_bars_finalize(vdev);
+     vfio_region_finalize(&vdev->dma_fault_region);
+    vfio_region_finalize(&vdev->dma_fault_response_region);
+     g_free(vdev->emulated_config_bits);
+     g_free(vdev->rom);
+     /*
+@@ -3432,6 +3554,7 @@ static void vfio_exitfn(PCIDevice *pdev)
+     vfio_unregister_err_notifier(vdev);
+     vfio_unregister_ext_irq_notifiers(vdev);
+     vfio_region_exit(&vdev->dma_fault_region);
+    vfio_region_exit(&vdev->dma_fault_response_region);
+     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+     if (vdev->irqchip_change_notifier.notify) {
+         kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
+diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
+index 03ac8919ef..61b3bf1303 100644
+--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
+@@ -147,6 +147,8 @@ struct VFIOPCIDevice {
+     VFIOPCIExtIRQ *ext_irqs;
+     VFIORegion dma_fault_region;
+     uint32_t fault_tail_index;
+    VFIORegion dma_fault_response_region;
+    uint32_t fault_response_head_index;
+     int (*resetfn)(struct VFIOPCIDevice *);
+     uint32_t vendor_id;
+     uint32_t device_id;
+-- 
+2.27.0
+
--- a/vfio-pci-Implement-the-DMA-fault-handler.patch
+++ b/vfio-pci-Implement-the-DMA-fault-handler.patch
@ -0,0 +1,96 @@
+From d33cc7eccb68c6a1488804c94ff5c1197ee0fc6e Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Tue, 5 Mar 2019 16:35:32 +0100
+Subject: [PATCH] vfio/pci: Implement the DMA fault handler
+
+Whenever the eventfd is triggered, we retrieve the DMA fault(s)
+from the mmapped fault region and inject them in the iommu
+memory region.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/pci.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ hw/vfio/pci.h |  1 +
+ 2 files changed, 51 insertions(+)
+
+diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
+index 76bc9d3506..c54e62fe8f 100644
+--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
+@@ -2953,10 +2953,60 @@ static PCIPASIDOps vfio_pci_pasid_ops = {
+ static void vfio_dma_fault_notifier_handler(void *opaque)
+ {
+     VFIOPCIExtIRQ *ext_irq = opaque;
+    VFIOPCIDevice *vdev = ext_irq->vdev;
+    PCIDevice *pdev = &vdev->pdev;
+    AddressSpace *as = pci_device_iommu_address_space(pdev);
+    IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(as->root);
+    struct vfio_region_dma_fault header;
+    struct iommu_fault *queue;
+    char *queue_buffer = NULL;
+    ssize_t bytes;
+ 
+     if (!event_notifier_test_and_clear(&ext_irq->notifier)) {
+         return;
+     }
+
+    bytes = pread(vdev->vbasedev.fd, &header, sizeof(header),
+                  vdev->dma_fault_region.fd_offset);
+    if (bytes != sizeof(header)) {
+        error_report("%s unable to read the fault region header (0x%lx)",
+                     __func__, bytes);
+        return;
+    }
+
+    /* Normally the fault queue is mmapped */
+    queue = (struct iommu_fault *)vdev->dma_fault_region.mmaps[0].mmap;
+    if (!queue) {
+        size_t queue_size = header.nb_entries * header.entry_size;
+
+        error_report("%s: fault queue not mmapped: slower fault handling",
+                     vdev->vbasedev.name);
+
+        queue_buffer = g_malloc(queue_size);
+        bytes =  pread(vdev->vbasedev.fd, queue_buffer, queue_size,
+                       vdev->dma_fault_region.fd_offset + header.offset);
+        if (bytes != queue_size) {
+            error_report("%s unable to read the fault queue (0x%lx)",
+                         __func__, bytes);
+            return;
+        }
+
+        queue = (struct iommu_fault *)queue_buffer;
+    }
+
+    while (vdev->fault_tail_index != header.head) {
+        memory_region_inject_faults(iommu_mr, 1,
+                                    &queue[vdev->fault_tail_index]);
+        vdev->fault_tail_index =
+            (vdev->fault_tail_index + 1) % header.nb_entries;
+    }
+    bytes = pwrite(vdev->vbasedev.fd, &vdev->fault_tail_index, 4,
+                   vdev->dma_fault_region.fd_offset);
+    if (bytes != 4) {
+        error_report("%s unable to write the fault region tail index (0x%lx)",
+                     __func__, bytes);
+    }
+    g_free(queue_buffer);
+ }
+ 
+ static int vfio_register_ext_irq_handler(VFIOPCIDevice *vdev,
+diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
+index eef91065f1..03ac8919ef 100644
+--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
+@@ -146,6 +146,7 @@ struct VFIOPCIDevice {
+     EventNotifier req_notifier;
+     VFIOPCIExtIRQ *ext_irqs;
+     VFIORegion dma_fault_region;
+    uint32_t fault_tail_index;
+     int (*resetfn)(struct VFIOPCIDevice *);
+     uint32_t vendor_id;
+     uint32_t device_id;
+-- 
+2.27.0
+
--- a/vfio-pci-Register-handler-for-iommu-fault.patch
+++ b/vfio-pci-Register-handler-for-iommu-fault.patch
@ -0,0 +1,168 @@
+From 574455d1363e818905e05cd23ef0948e83a16a51 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Thu, 13 Dec 2018 04:39:30 -0500
+Subject: [PATCH] vfio/pci: Register handler for iommu fault
+
+We use the new extended IRQ VFIO_IRQ_TYPE_NESTED type and
+VFIO_IRQ_SUBTYPE_DMA_FAULT subtype to set/unset
+a notifier for physical DMA faults. The associated eventfd is
+triggered, in nested mode, whenever a fault is detected at IOMMU
+physical level.
+
+The actual handler will be implemented in subsequent patches.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/pci.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++-
+ hw/vfio/pci.h |  7 +++++
+ 2 files changed, 87 insertions(+), 1 deletion(-)
+
+diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
+index 99c52a0944..37a70932c6 100644
+--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
+@@ -2888,6 +2888,76 @@ static PCIPASIDOps vfio_pci_pasid_ops = {
+     .set_pasid_table = vfio_iommu_set_pasid_table,
+ };
+ 
+static void vfio_dma_fault_notifier_handler(void *opaque)
+{
+    VFIOPCIExtIRQ *ext_irq = opaque;
+
+    if (!event_notifier_test_and_clear(&ext_irq->notifier)) {
+        return;
+    }
+}
+
+static int vfio_register_ext_irq_handler(VFIOPCIDevice *vdev,
+                                         uint32_t type, uint32_t subtype,
+                                         IOHandler *handler)
+{
+    int32_t fd, ext_irq_index, index;
+    struct vfio_irq_info *irq_info;
+    Error *err = NULL;
+    EventNotifier *n;
+    int ret;
+
+    ret = vfio_get_dev_irq_info(&vdev->vbasedev, type, subtype, &irq_info);
+    if (ret) {
+        return ret;
+    }
+    index = irq_info->index;
+    ext_irq_index = irq_info->index - VFIO_PCI_NUM_IRQS;
+    g_free(irq_info);
+
+    vdev->ext_irqs[ext_irq_index].vdev = vdev;
+    vdev->ext_irqs[ext_irq_index].index = index;
+    n = &vdev->ext_irqs[ext_irq_index].notifier;
+
+    ret = event_notifier_init(n, 0);
+    if (ret) {
+        error_report("vfio: Unable to init event notifier for ext irq %d(%d)",
+                     ext_irq_index, ret);
+        return ret;
+    }
+
+    fd = event_notifier_get_fd(n);
+    qemu_set_fd_handler(fd, vfio_dma_fault_notifier_handler, NULL,
+                        &vdev->ext_irqs[ext_irq_index]);
+
+    ret = vfio_set_irq_signaling(&vdev->vbasedev, index, 0,
+                                 VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err);
+    if (ret) {
+        error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+        qemu_set_fd_handler(fd, NULL, NULL, vdev);
+        event_notifier_cleanup(n);
+    }
+    return ret;
+}
+
+static void vfio_unregister_ext_irq_notifiers(VFIOPCIDevice *vdev)
+{
+    VFIODevice *vbasedev = &vdev->vbasedev;
+    Error *err = NULL;
+    int i;
+
+    for (i = 0; i < vbasedev->num_irqs - VFIO_PCI_NUM_IRQS; i++) {
+        if (vfio_set_irq_signaling(vbasedev, i + VFIO_PCI_NUM_IRQS , 0,
+                                   VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) {
+            error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+        }
+        qemu_set_fd_handler(event_notifier_get_fd(&vdev->ext_irqs[i].notifier),
+                            NULL, NULL, vdev);
+        event_notifier_cleanup(&vdev->ext_irqs[i].notifier);
+    }
+    g_free(vdev->ext_irqs);
+}
+
+ static void vfio_realize(PCIDevice *pdev, Error **errp)
+ {
+     VFIOPCIDevice *vdev = VFIO_PCI(pdev);
+@@ -2898,7 +2968,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
+     ssize_t len;
+     struct stat st;
+     int groupid;
+-    int i, ret;
+    int i, ret, nb_ext_irqs;
+     bool is_mdev;
+ 
+     if (!vdev->vbasedev.sysfsdev) {
+@@ -2986,6 +3056,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
+         goto error;
+     }
+ 
+    nb_ext_irqs = vdev->vbasedev.num_irqs - VFIO_PCI_NUM_IRQS;
+    if (nb_ext_irqs > 0) {
+        vdev->ext_irqs = g_new0(VFIOPCIExtIRQ, nb_ext_irqs);
+    }
+
+     vfio_populate_device(vdev, &err);
+     if (err) {
+         error_propagate(errp, err);
+@@ -3197,6 +3272,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
+ 
+     vfio_register_err_notifier(vdev);
+     vfio_register_req_notifier(vdev);
+    vfio_register_ext_irq_handler(vdev, VFIO_IRQ_TYPE_NESTED,
+                                  VFIO_IRQ_SUBTYPE_DMA_FAULT,
+                                  vfio_dma_fault_notifier_handler);
+     vfio_setup_resetfn_quirk(vdev);
+ 
+     pci_setup_pasid_ops(pdev, &vfio_pci_pasid_ops);
+@@ -3239,6 +3317,7 @@ static void vfio_exitfn(PCIDevice *pdev)
+ 
+     vfio_unregister_req_notifier(vdev);
+     vfio_unregister_err_notifier(vdev);
+    vfio_unregister_ext_irq_notifiers(vdev);
+     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+     if (vdev->irqchip_change_notifier.notify) {
+         kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
+diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
+index 64777516d1..a8b06737fb 100644
+--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
+@@ -114,6 +114,12 @@ typedef struct VFIOMSIXInfo {
+     unsigned long *pending;
+ } VFIOMSIXInfo;
+ 
+typedef struct VFIOPCIExtIRQ {
+    struct VFIOPCIDevice *vdev;
+    EventNotifier notifier;
+    uint32_t index;
+} VFIOPCIExtIRQ;
+
+ #define TYPE_VFIO_PCI "vfio-pci"
+ OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI)
+ 
+@@ -138,6 +144,7 @@ struct VFIOPCIDevice {
+     PCIHostDeviceAddress host;
+     EventNotifier err_notifier;
+     EventNotifier req_notifier;
+    VFIOPCIExtIRQ *ext_irqs;
+     int (*resetfn)(struct VFIOPCIDevice *);
+     uint32_t vendor_id;
+     uint32_t device_id;
+-- 
+2.27.0
+
--- a/vfio-pci-Set-up-the-DMA-FAULT-region.patch
+++ b/vfio-pci-Set-up-the-DMA-FAULT-region.patch
@ -0,0 +1,132 @@
+From e701d0fef4fbb7935d6aa7d22d82eb2dcfee2431 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Thu, 13 Dec 2018 10:57:53 -0500
+Subject: [PATCH] vfio/pci: Set up the DMA FAULT region
+
+Set up the fault region which is composed of the actual fault
+queue (mmappable) and a header used to handle it. The fault
+queue is mmapped.
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ hw/vfio/pci.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++
+ hw/vfio/pci.h |  1 +
+ 2 files changed, 65 insertions(+)
+
+diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
+index 37a70932c6..76bc9d3506 100644
+--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
+@@ -2638,11 +2638,67 @@ int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
+     return 0;
+ }
+ 
+static void vfio_init_fault_regions(VFIOPCIDevice *vdev, Error **errp)
+{
+    struct vfio_region_info *fault_region_info = NULL;
+    struct vfio_region_info_cap_fault *cap_fault;
+    VFIODevice *vbasedev = &vdev->vbasedev;
+    struct vfio_info_cap_header *hdr;
+    char *fault_region_name;
+    int ret;
+
+    ret = vfio_get_dev_region_info(&vdev->vbasedev,
+                                   VFIO_REGION_TYPE_NESTED,
+                                   VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT,
+                                   &fault_region_info);
+    if (ret) {
+        goto out;
+    }
+
+    hdr = vfio_get_region_info_cap(fault_region_info,
+                                   VFIO_REGION_INFO_CAP_DMA_FAULT);
+    if (!hdr) {
+        error_setg(errp, "failed to retrieve DMA FAULT capability");
+        goto out;
+    }
+    cap_fault = container_of(hdr, struct vfio_region_info_cap_fault,
+                             header);
+    if (cap_fault->version != 1) {
+        error_setg(errp, "Unsupported DMA FAULT API version %d",
+                   cap_fault->version);
+        goto out;
+    }
+
+    fault_region_name = g_strdup_printf("%s DMA FAULT %d",
+                                        vbasedev->name,
+                                        fault_region_info->index);
+
+    ret = vfio_region_setup(OBJECT(vdev), vbasedev,
+                            &vdev->dma_fault_region,
+                            fault_region_info->index,
+                            fault_region_name);
+    g_free(fault_region_name);
+    if (ret) {
+        error_setg_errno(errp, -ret,
+                         "failed to set up the DMA FAULT region %d",
+                         fault_region_info->index);
+        goto out;
+    }
+
+    ret = vfio_region_mmap(&vdev->dma_fault_region);
+    if (ret) {
+        error_setg_errno(errp, -ret, "Failed to mmap the DMA FAULT queue");
+    }
+out:
+    g_free(fault_region_info);
+}
+
+ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
+ {
+     VFIODevice *vbasedev = &vdev->vbasedev;
+     struct vfio_region_info *reg_info;
+     struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
+    Error *err = NULL;
+     int i, ret = -1;
+ 
+     /* Sanity check device */
+@@ -2706,6 +2762,12 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
+         }
+     }
+ 
+    vfio_init_fault_regions(vdev, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+     irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
+ 
+     ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
+@@ -3298,6 +3360,7 @@ static void vfio_instance_finalize(Object *obj)
+ 
+     vfio_display_finalize(vdev);
+     vfio_bars_finalize(vdev);
+    vfio_region_finalize(&vdev->dma_fault_region);
+     g_free(vdev->emulated_config_bits);
+     g_free(vdev->rom);
+     /*
+@@ -3318,6 +3381,7 @@ static void vfio_exitfn(PCIDevice *pdev)
+     vfio_unregister_req_notifier(vdev);
+     vfio_unregister_err_notifier(vdev);
+     vfio_unregister_ext_irq_notifiers(vdev);
+    vfio_region_exit(&vdev->dma_fault_region);
+     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+     if (vdev->irqchip_change_notifier.notify) {
+         kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
+diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
+index a8b06737fb..eef91065f1 100644
+--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
+@@ -145,6 +145,7 @@ struct VFIOPCIDevice {
+     EventNotifier err_notifier;
+     EventNotifier req_notifier;
+     VFIOPCIExtIRQ *ext_irqs;
+    VFIORegion dma_fault_region;
+     int (*resetfn)(struct VFIOPCIDevice *);
+     uint32_t vendor_id;
+     uint32_t device_id;
+-- 
+2.27.0
+
--- a/vfio.h-and-iommu.h-header-update-against-5.10.patch
+++ b/vfio.h-and-iommu.h-header-update-against-5.10.patch
@ -0,0 +1,701 @@
+From 36b65d7312a343cb636e6963b8262dce9420ebc6 Mon Sep 17 00:00:00 2001
+From: Kunkun Jiang <jiangkunkun@huawei.com>
+Date: Fri, 30 Jul 2021 09:15:31 +0800
+Subject: [PATCH] vfio.h and iommu.h header update against 5.10
+
+Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
+---
+ linux-headers/linux/iommu.h | 395 ++++++++++++++++++++++++++++++++++++
+ linux-headers/linux/vfio.h  | 220 +++++++++++++++++++-
+ 2 files changed, 613 insertions(+), 2 deletions(-)
+ create mode 100644 linux-headers/linux/iommu.h
+
+diff --git a/linux-headers/linux/iommu.h b/linux-headers/linux/iommu.h
+new file mode 100644
+index 0000000000..773b7dc2d6
+--- /dev/null
+++ b/linux-headers/linux/iommu.h
+@@ -0,0 +1,395 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * IOMMU user API definitions
+ */
+
+#ifndef IOMMU_H
+#define IOMMU_H
+
+#include <linux/types.h>
+
+#define IOMMU_FAULT_PERM_READ	(1 << 0) /* read */
+#define IOMMU_FAULT_PERM_WRITE	(1 << 1) /* write */
+#define IOMMU_FAULT_PERM_EXEC	(1 << 2) /* exec */
+#define IOMMU_FAULT_PERM_PRIV	(1 << 3) /* privileged */
+
+/* Generic fault types, can be expanded IRQ remapping fault */
+enum iommu_fault_type {
+	IOMMU_FAULT_DMA_UNRECOV = 1,	/* unrecoverable fault */
+	IOMMU_FAULT_PAGE_REQ,		/* page request fault */
+};
+
+enum iommu_fault_reason {
+	IOMMU_FAULT_REASON_UNKNOWN = 0,
+
+	/* Could not access the PASID table (fetch caused external abort) */
+	IOMMU_FAULT_REASON_PASID_FETCH,
+
+	/* PASID entry is invalid or has configuration errors */
+	IOMMU_FAULT_REASON_BAD_PASID_ENTRY,
+
+	/*
+	 * PASID is out of range (e.g. exceeds the maximum PASID
+	 * supported by the IOMMU) or disabled.
+	 */
+	IOMMU_FAULT_REASON_PASID_INVALID,
+
+	/*
+	 * An external abort occurred fetching (or updating) a translation
+	 * table descriptor
+	 */
+	IOMMU_FAULT_REASON_WALK_EABT,
+
+	/*
+	 * Could not access the page table entry (Bad address),
+	 * actual translation fault
+	 */
+	IOMMU_FAULT_REASON_PTE_FETCH,
+
+	/* Protection flag check failed */
+	IOMMU_FAULT_REASON_PERMISSION,
+
+	/* access flag check failed */
+	IOMMU_FAULT_REASON_ACCESS,
+
+	/* Output address of a translation stage caused Address Size fault */
+	IOMMU_FAULT_REASON_OOR_ADDRESS,
+};
+
+/**
+ * struct iommu_fault_unrecoverable - Unrecoverable fault data
+ * @reason: reason of the fault, from &enum iommu_fault_reason
+ * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values)
+ * @pasid: Process Address Space ID
+ * @perm: requested permission access using by the incoming transaction
+ *        (IOMMU_FAULT_PERM_* values)
+ * @addr: offending page address
+ * @fetch_addr: address that caused a fetch abort, if any
+ */
+struct iommu_fault_unrecoverable {
+	__u32	reason;
+#define IOMMU_FAULT_UNRECOV_PASID_VALID		(1 << 0)
+#define IOMMU_FAULT_UNRECOV_ADDR_VALID		(1 << 1)
+#define IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID	(1 << 2)
+	__u32	flags;
+	__u32	pasid;
+	__u32	perm;
+	__u64	addr;
+	__u64	fetch_addr;
+};
+
+/**
+ * struct iommu_fault_page_request - Page Request data
+ * @flags: encodes whether the corresponding fields are valid and whether this
+ *         is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values).
+ *         When IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID is set, the page response
+ *         must have the same PASID value as the page request. When it is clear,
+ *         the page response should not have a PASID.
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @perm: requested page permissions (IOMMU_FAULT_PERM_* values)
+ * @addr: page address
+ * @private_data: device-specific private information
+ */
+struct iommu_fault_page_request {
+#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID	(1 << 0)
+#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE	(1 << 1)
+#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA	(1 << 2)
+#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID	(1 << 3)
+	__u32	flags;
+	__u32	pasid;
+	__u32	grpid;
+	__u32	perm;
+	__u64	addr;
+	__u64	private_data[2];
+};
+
+/**
+ * struct iommu_fault - Generic fault data
+ * @type: fault type from &enum iommu_fault_type
+ * @padding: reserved for future use (should be zero)
+ * @event: fault event, when @type is %IOMMU_FAULT_DMA_UNRECOV
+ * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ
+ * @padding2: sets the fault size to allow for future extensions
+ */
+struct iommu_fault {
+	__u32	type;
+	__u32	padding;
+	union {
+		struct iommu_fault_unrecoverable event;
+		struct iommu_fault_page_request prm;
+		__u8 padding2[56];
+	};
+};
+
+/**
+ * enum iommu_page_response_code - Return status of fault handlers
+ * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
+ *	populated, retry the access. This is "Success" in PCI PRI.
+ * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from
+ *	this device if possible. This is "Response Failure" in PCI PRI.
+ * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
+ *	access. This is "Invalid Request" in PCI PRI.
+ */
+enum iommu_page_response_code {
+	IOMMU_PAGE_RESP_SUCCESS = 0,
+	IOMMU_PAGE_RESP_INVALID,
+	IOMMU_PAGE_RESP_FAILURE,
+};
+
+/**
+ * struct iommu_page_response - Generic page response information
+ * @argsz: User filled size of this data
+ * @version: API version of this structure
+ * @flags: encodes whether the corresponding fields are valid
+ *         (IOMMU_FAULT_PAGE_RESPONSE_* values)
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @code: response code from &enum iommu_page_response_code
+ */
+struct iommu_page_response {
+	__u32	argsz;
+#define IOMMU_PAGE_RESP_VERSION_1	1
+	__u32	version;
+#define IOMMU_PAGE_RESP_PASID_VALID	(1 << 0)
+	__u32	flags;
+	__u32	pasid;
+	__u32	grpid;
+	__u32	code;
+};
+
+/* defines the granularity of the invalidation */
+enum iommu_inv_granularity {
+	IOMMU_INV_GRANU_DOMAIN,	/* domain-selective invalidation */
+	IOMMU_INV_GRANU_PASID,	/* PASID-selective invalidation */
+	IOMMU_INV_GRANU_ADDR,	/* page-selective invalidation */
+	IOMMU_INV_GRANU_NR,	/* number of invalidation granularities */
+};
+
+/**
+ * struct iommu_inv_addr_info - Address Selective Invalidation Structure
+ *
+ * @flags: indicates the granularity of the address-selective invalidation
+ * - If the PASID bit is set, the @pasid field is populated and the invalidation
+ *   relates to cache entries tagged with this PASID and matching the address
+ *   range.
+ * - If ARCHID bit is set, @archid is populated and the invalidation relates
+ *   to cache entries tagged with this architecture specific ID and matching
+ *   the address range.
+ * - Both PASID and ARCHID can be set as they may tag different caches.
+ * - If neither PASID or ARCHID is set, global addr invalidation applies.
+ * - The LEAF flag indicates whether only the leaf PTE caching needs to be
+ *   invalidated and other paging structure caches can be preserved.
+ * @pasid: process address space ID
+ * @archid: architecture-specific ID
+ * @addr: first stage/level input address
+ * @granule_size: page/block size of the mapping in bytes
+ * @nb_granules: number of contiguous granules to be invalidated
+ */
+struct iommu_inv_addr_info {
+#define IOMMU_INV_ADDR_FLAGS_PASID	(1 << 0)
+#define IOMMU_INV_ADDR_FLAGS_ARCHID	(1 << 1)
+#define IOMMU_INV_ADDR_FLAGS_LEAF	(1 << 2)
+	__u32	flags;
+	__u32	archid;
+	__u64	pasid;
+	__u64	addr;
+	__u64	granule_size;
+	__u64	nb_granules;
+};
+
+/**
+ * struct iommu_inv_pasid_info - PASID Selective Invalidation Structure
+ *
+ * @flags: indicates the granularity of the PASID-selective invalidation
+ * - If the PASID bit is set, the @pasid field is populated and the invalidation
+ *   relates to cache entries tagged with this PASID and matching the address
+ *   range.
+ * - If the ARCHID bit is set, the @archid is populated and the invalidation
+ *   relates to cache entries tagged with this architecture specific ID and
+ *   matching the address range.
+ * - Both PASID and ARCHID can be set as they may tag different caches.
+ * - At least one of PASID or ARCHID must be set.
+ * @pasid: process address space ID
+ * @archid: architecture-specific ID
+ */
+struct iommu_inv_pasid_info {
+#define IOMMU_INV_PASID_FLAGS_PASID	(1 << 0)
+#define IOMMU_INV_PASID_FLAGS_ARCHID	(1 << 1)
+	__u32	flags;
+	__u32	archid;
+	__u64	pasid;
+};
+
+/**
+ * struct iommu_cache_invalidate_info - First level/stage invalidation
+ *     information
+ * @argsz: User filled size of this data
+ * @version: API version of this structure
+ * @cache: bitfield that allows to select which caches to invalidate
+ * @granularity: defines the lowest granularity used for the invalidation:
+ *     domain > PASID > addr
+ * @padding: reserved for future use (should be zero)
+ * @pasid_info: invalidation data when @granularity is %IOMMU_INV_GRANU_PASID
+ * @addr_info: invalidation data when @granularity is %IOMMU_INV_GRANU_ADDR
+ *
+ * Not all the combinations of cache/granularity are valid:
+ *
+ * +--------------+---------------+---------------+---------------+
+ * | type /       |   DEV_IOTLB   |     IOTLB     |      PASID    |
+ * | granularity  |               |               |      cache    |
+ * +==============+===============+===============+===============+
+ * | DOMAIN       |       N/A     |       Y       |       Y       |
+ * +--------------+---------------+---------------+---------------+
+ * | PASID        |       Y       |       Y       |       Y       |
+ * +--------------+---------------+---------------+---------------+
+ * | ADDR         |       Y       |       Y       |       N/A     |
+ * +--------------+---------------+---------------+---------------+
+ *
+ * Invalidations by %IOMMU_INV_GRANU_DOMAIN don't take any argument other than
+ * @version and @cache.
+ *
+ * If multiple cache types are invalidated simultaneously, they all
+ * must support the used granularity.
+ */
+struct iommu_cache_invalidate_info {
+	__u32	argsz;
+#define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1
+	__u32	version;
+/* IOMMU paging structure cache */
+#define IOMMU_CACHE_INV_TYPE_IOTLB	(1 << 0) /* IOMMU IOTLB */
+#define IOMMU_CACHE_INV_TYPE_DEV_IOTLB	(1 << 1) /* Device IOTLB */
+#define IOMMU_CACHE_INV_TYPE_PASID	(1 << 2) /* PASID cache */
+#define IOMMU_CACHE_INV_TYPE_NR		(3)
+	__u8	cache;
+	__u8	granularity;
+	__u8	padding[6];
+	union {
+		struct iommu_inv_pasid_info pasid_info;
+		struct iommu_inv_addr_info addr_info;
+	} granu;
+};
+
+/**
+ * struct iommu_gpasid_bind_data_vtd - Intel VT-d specific data on device and guest
+ * SVA binding.
+ *
+ * @flags:	VT-d PASID table entry attributes
+ * @pat:	Page attribute table data to compute effective memory type
+ * @emt:	Extended memory type
+ *
+ * Only guest vIOMMU selectable and effective options are passed down to
+ * the host IOMMU.
+ */
+struct iommu_gpasid_bind_data_vtd {
+#define IOMMU_SVA_VTD_GPASID_SRE	(1 << 0) /* supervisor request */
+#define IOMMU_SVA_VTD_GPASID_EAFE	(1 << 1) /* extended access enable */
+#define IOMMU_SVA_VTD_GPASID_PCD	(1 << 2) /* page-level cache disable */
+#define IOMMU_SVA_VTD_GPASID_PWT	(1 << 3) /* page-level write through */
+#define IOMMU_SVA_VTD_GPASID_EMTE	(1 << 4) /* extended mem type enable */
+#define IOMMU_SVA_VTD_GPASID_CD		(1 << 5) /* PASID-level cache disable */
+#define IOMMU_SVA_VTD_GPASID_LAST	(1 << 6)
+	__u64 flags;
+	__u32 pat;
+	__u32 emt;
+};
+
+#define IOMMU_SVA_VTD_GPASID_MTS_MASK	(IOMMU_SVA_VTD_GPASID_CD | \
+					 IOMMU_SVA_VTD_GPASID_EMTE | \
+					 IOMMU_SVA_VTD_GPASID_PCD |  \
+					 IOMMU_SVA_VTD_GPASID_PWT)
+
+/**
+ * struct iommu_gpasid_bind_data - Information about device and guest PASID binding
+ * @argsz:	User filled size of this data
+ * @version:	Version of this data structure
+ * @format:	PASID table entry format
+ * @flags:	Additional information on guest bind request
+ * @gpgd:	Guest page directory base of the guest mm to bind
+ * @hpasid:	Process address space ID used for the guest mm in host IOMMU
+ * @gpasid:	Process address space ID used for the guest mm in guest IOMMU
+ * @addr_width:	Guest virtual address width
+ * @padding:	Reserved for future use (should be zero)
+ * @vtd:	Intel VT-d specific data
+ *
+ * Guest to host PASID mapping can be an identity or non-identity, where guest
+ * has its own PASID space. For non-identify mapping, guest to host PASID lookup
+ * is needed when VM programs guest PASID into an assigned device. VMM may
+ * trap such PASID programming then request host IOMMU driver to convert guest
+ * PASID to host PASID based on this bind data.
+ */
+struct iommu_gpasid_bind_data {
+	__u32 argsz;
+#define IOMMU_GPASID_BIND_VERSION_1	1
+	__u32 version;
+#define IOMMU_PASID_FORMAT_INTEL_VTD	1
+#define IOMMU_PASID_FORMAT_LAST		2
+	__u32 format;
+	__u32 addr_width;
+#define IOMMU_SVA_GPASID_VAL	(1 << 0) /* guest PASID valid */
+	__u64 flags;
+	__u64 gpgd;
+	__u64 hpasid;
+	__u64 gpasid;
+	__u8  padding[8];
+	/* Vendor specific data */
+	union {
+		struct iommu_gpasid_bind_data_vtd vtd;
+	} vendor;
+};
+
+/**
+ * struct iommu_pasid_smmuv3 - ARM SMMUv3 Stream Table Entry stage 1 related
+ *     information
+ * @version: API version of this structure
+ * @s1fmt: STE s1fmt (format of the CD table: single CD, linear table
+ *         or 2-level table)
+ * @s1dss: STE s1dss (specifies the behavior when @pasid_bits != 0
+ *         and no PASID is passed along with the incoming transaction)
+ * @padding: reserved for future use (should be zero)
+ *
+ * The PASID table is referred to as the Context Descriptor (CD) table on ARM
+ * SMMUv3. Please refer to the ARM SMMU 3.x spec (ARM IHI 0070A) for full
+ * details.
+ */
+struct iommu_pasid_smmuv3 {
+#define PASID_TABLE_SMMUV3_CFG_VERSION_1 1
+	__u32	version;
+	__u8	s1fmt;
+	__u8	s1dss;
+	__u8	padding[2];
+};
+
+/**
+ * struct iommu_pasid_table_config - PASID table data used to bind guest PASID
+ *     table to the host IOMMU
+ * @argsz: User filled size of this data
+ * @version: API version to prepare for future extensions
+ * @base_ptr: guest physical address of the PASID table
+ * @format: format of the PASID table
+ * @pasid_bits: number of PASID bits used in the PASID table
+ * @config: indicates whether the guest translation stage must
+ *          be translated, bypassed or aborted.
+ * @padding: reserved for future use (should be zero)
+ * @vendor_data.smmuv3: table information when @format is
+ * %IOMMU_PASID_FORMAT_SMMUV3
+ */
+struct iommu_pasid_table_config {
+	__u32	argsz;
+#define PASID_TABLE_CFG_VERSION_1 1
+	__u32	version;
+	__u64	base_ptr;
+#define IOMMU_PASID_FORMAT_SMMUV3	1
+	__u32	format;
+	__u8	pasid_bits;
+#define IOMMU_PASID_CONFIG_TRANSLATE	1
+#define IOMMU_PASID_CONFIG_BYPASS	2
+#define IOMMU_PASID_CONFIG_ABORT	3
+	__u8	config;
+	__u8    padding[2];
+	union {
+		struct iommu_pasid_smmuv3 smmuv3;
+	} vendor_data;
+};
+
+#endif /* _UAPI_IOMMU_H */
+diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
+index f4ff038e8c..cf8e208fac 100644
+--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
+@@ -14,6 +14,7 @@
+ 
+ #include <linux/types.h>
+ #include <linux/ioctl.h>
+#include <linux/iommu.h>
+ 
+ #define VFIO_API_VERSION	0
+ 
+@@ -334,6 +335,7 @@ struct vfio_region_info_cap_type {
+ #define VFIO_REGION_TYPE_GFX                    (1)
+ #define VFIO_REGION_TYPE_CCW			(2)
+ #define VFIO_REGION_TYPE_MIGRATION              (3)
+#define VFIO_REGION_TYPE_NESTED			(4)
+ 
+ /* sub-types for VFIO_REGION_TYPE_PCI_* */
+ 
+@@ -362,6 +364,10 @@ struct vfio_region_info_cap_type {
+ /* sub-types for VFIO_REGION_TYPE_GFX */
+ #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
+ 
+/* sub-types for VFIO_REGION_TYPE_NESTED */
+#define VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT	(1)
+#define VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT_RESPONSE	(2)
+
+ /**
+  * struct vfio_region_gfx_edid - EDID region layout.
+  *
+@@ -721,11 +727,30 @@ struct vfio_irq_info {
+ #define VFIO_IRQ_INFO_MASKABLE		(1 << 1)
+ #define VFIO_IRQ_INFO_AUTOMASKED	(1 << 2)
+ #define VFIO_IRQ_INFO_NORESIZE		(1 << 3)
+#define VFIO_IRQ_INFO_FLAG_CAPS		(1 << 4) /* Info supports caps */
+ 	__u32	index;		/* IRQ index */
+ 	__u32	count;		/* Number of IRQs within this index */
+	__u32	cap_offset;	/* Offset within info struct of first cap */
+ };
+ #define VFIO_DEVICE_GET_IRQ_INFO	_IO(VFIO_TYPE, VFIO_BASE + 9)
+ 
+/*
+ * The irq type capability allows IRQs unique to a specific device or
+ * class of devices to be exposed.
+ *
+ * The structures below define version 1 of this capability.
+ */
+#define VFIO_IRQ_INFO_CAP_TYPE      3
+
+struct vfio_irq_info_cap_type {
+	struct vfio_info_cap_header header;
+	__u32 type;     /* global per bus driver */
+	__u32 subtype;  /* type specific */
+};
+
+#define VFIO_IRQ_TYPE_NESTED				(1)
+#define VFIO_IRQ_SUBTYPE_DMA_FAULT			(1)
+
+ /**
+  * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set)
+  *
+@@ -827,7 +852,8 @@ enum {
+ 	VFIO_PCI_MSIX_IRQ_INDEX,
+ 	VFIO_PCI_ERR_IRQ_INDEX,
+ 	VFIO_PCI_REQ_IRQ_INDEX,
+-	VFIO_PCI_NUM_IRQS
+	VFIO_PCI_NUM_IRQS = 5	/* Fixed user ABI, IRQ indexes >=5 use   */
+				/* device specific cap to define content */
+ };
+ 
+ /*
+@@ -1012,6 +1038,68 @@ struct vfio_device_feature {
+  */
+ #define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN	(0)
+ 
+/*
+ * Capability exposed by the DMA fault region
+ * @version: ABI version
+ */
+#define VFIO_REGION_INFO_CAP_DMA_FAULT	6
+
+struct vfio_region_info_cap_fault {
+	struct vfio_info_cap_header header;
+	__u32 version;
+};
+
+/*
+ * Capability exposed by the DMA fault response region
+ * @version: ABI version
+ */
+#define VFIO_REGION_INFO_CAP_DMA_FAULT_RESPONSE	7
+
+struct vfio_region_info_cap_fault_response {
+	struct vfio_info_cap_header header;
+	__u32 version;
+};
+
+/*
+ * DMA Fault Region Layout
+ * @tail: index relative to the start of the ring buffer at which the
+ *        consumer finds the next item in the buffer
+ * @entry_size: fault ring buffer entry size in bytes
+ * @nb_entries: max capacity of the fault ring buffer
+ * @offset: ring buffer offset relative to the start of the region
+ * @head: index relative to the start of the ring buffer at which the
+ *        producer (kernel) inserts items into the buffers
+ */
+struct vfio_region_dma_fault {
+	/* Write-Only */
+	__u32   tail;
+	/* Read-Only */
+	__u32   entry_size;
+	__u32	nb_entries;
+	__u32	offset;
+	__u32   head;
+};
+
+/*
+ * DMA Fault Response Region Layout
+ * @head: index relative to the start of the ring buffer at which the
+ *        producer (userspace) insert responses into the buffer
+ * @entry_size: fault ring buffer entry size in bytes
+ * @nb_entries: max capacity of the fault ring buffer
+ * @offset: ring buffer offset relative to the start of the region
+ * @tail: index relative to the start of the ring buffer at which the
+ *        consumer (kernel) finds the next item in the buffer
+ */
+struct vfio_region_dma_fault_response {
+	/* Write-Only */
+	__u32   head;
+	/* Read-Only */
+	__u32   entry_size;
+	__u32	nb_entries;
+	__u32	offset;
+	__u32   tail;
+};
+
+ /* -------- API for Type1 VFIO IOMMU -------- */
+ 
+ /**
+@@ -1124,7 +1212,7 @@ struct vfio_iommu_type1_dma_map {
+ struct vfio_bitmap {
+ 	__u64        pgsize;	/* page size for bitmap in bytes */
+ 	__u64        size;	/* in bytes */
+-	__u64 *data;	/* one bit per page */
+	__u64        *data;	/* one bit per page */
+ };
+ 
+ /**
+@@ -1250,6 +1338,134 @@ struct vfio_iommu_type1_dirty_bitmap_get {
+ 
+ #define VFIO_IOMMU_DIRTY_PAGES             _IO(VFIO_TYPE, VFIO_BASE + 17)
+ 
+/*
+ * VFIO_IOMMU_BIND_PROCESS
+ *
+ * Allocate a PASID for a process address space, and use it to attach this
+ * process to all devices in the container. Devices can then tag their DMA
+ * traffic with the returned @pasid to perform transactions on the associated
+ * virtual address space. Mapping and unmapping buffers is performed by standard
+ * functions such as mmap and malloc.
+ *
+ * If flag is VFIO_IOMMU_BIND_PID, @pid contains the pid of a foreign process to
+ * bind. Otherwise the current task is bound. Given that the caller owns the
+ * device, setting this flag grants the caller read and write permissions on the
+ * entire address space of foreign process described by @pid. Therefore,
+ * permission to perform the bind operation on a foreign process is governed by
+ * the ptrace access mode PTRACE_MODE_ATTACH_REALCREDS check. See man ptrace(2)
+ * for more information.
+ *
+ * On success, VFIO writes a Process Address Space ID (PASID) into @pasid. This
+ * ID is unique to a process and can be used on all devices in the container.
+ *
+ * On fork, the child inherits the device fd and can use the bonds setup by its
+ * parent. Consequently, the child has R/W access on the address spaces bound by
+ * its parent. After an execv, the device fd is closed and the child doesn't
+ * have access to the address space anymore.
+ *
+ * To remove a bond between process and container, VFIO_IOMMU_UNBIND ioctl is
+ * issued with the same parameters. If a pid was specified in VFIO_IOMMU_BIND,
+ * it should also be present for VFIO_IOMMU_UNBIND. Otherwise unbind the current
+ * task from the container.
+ */
+struct vfio_iommu_type1_bind_process {
+	__u32	flags;
+#define VFIO_IOMMU_BIND_PID		(1 << 0)
+	__u32	pasid;
+	__s32	pid;
+};
+
+/*
+ * Only mode supported at the moment is VFIO_IOMMU_BIND_PROCESS, which takes
+ * vfio_iommu_type1_bind_process in data.
+ */
+struct vfio_iommu_type1_bind {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IOMMU_BIND_PROCESS		(1 << 0)
+	__u8	data[];
+};
+
+/*
+ * VFIO_IOMMU_BIND - _IOWR(VFIO_TYPE, VFIO_BASE + 22, struct vfio_iommu_bind)
+ *
+ * Manage address spaces of devices in this container. Initially a TYPE1
+ * container can only have one address space, managed with
+ * VFIO_IOMMU_MAP/UNMAP_DMA.
+ *
+ * An IOMMU of type VFIO_TYPE1_NESTING_IOMMU can be managed by both MAP/UNMAP
+ * and BIND ioctls at the same time. MAP/UNMAP acts on the stage-2 (host) page
+ * tables, and BIND manages the stage-1 (guest) page tables. Other types of
+ * IOMMU may allow MAP/UNMAP and BIND to coexist, where MAP/UNMAP controls
+ * non-PASID traffic and BIND controls PASID traffic. But this depends on the
+ * underlying IOMMU architecture and isn't guaranteed.
+ *
+ * Availability of this feature depends on the device, its bus, the underlying
+ * IOMMU and the CPU architecture.
+ *
+ * returns: 0 on success, -errno on failure.
+ */
+#define VFIO_IOMMU_BIND		_IO(VFIO_TYPE, VFIO_BASE + 22)
+
+/*
+ * VFIO_IOMMU_UNBIND - _IOWR(VFIO_TYPE, VFIO_BASE + 23, struct vfio_iommu_bind)
+ *
+ * Undo what was done by the corresponding VFIO_IOMMU_BIND ioctl.
+ */
+#define VFIO_IOMMU_UNBIND	_IO(VFIO_TYPE, VFIO_BASE + 23)
+
+/*
+ * VFIO_IOMMU_SET_PASID_TABLE - _IOWR(VFIO_TYPE, VFIO_BASE + 18,
+ *			struct vfio_iommu_type1_set_pasid_table)
+ *
+ * The SET operation passes a PASID table to the host while the
+ * UNSET operation detaches the one currently programmed. It is
+ * allowed to "SET" the table several times without unsetting as
+ * long as the table config does not stay IOMMU_PASID_CONFIG_TRANSLATE.
+ */
+struct vfio_iommu_type1_set_pasid_table {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_PASID_TABLE_FLAG_SET	(1 << 0)
+#define VFIO_PASID_TABLE_FLAG_UNSET	(1 << 1)
+	struct iommu_pasid_table_config config; /* used on SET */
+};
+
+#define VFIO_IOMMU_SET_PASID_TABLE	_IO(VFIO_TYPE, VFIO_BASE + 18)
+
+/**
+ * VFIO_IOMMU_CACHE_INVALIDATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19,
+ *			struct vfio_iommu_type1_cache_invalidate)
+ *
+ * Propagate guest IOMMU cache invalidation to the host.
+ */
+struct vfio_iommu_type1_cache_invalidate {
+	__u32   argsz;
+	__u32   flags;
+	struct iommu_cache_invalidate_info info;
+};
+#define VFIO_IOMMU_CACHE_INVALIDATE      _IO(VFIO_TYPE, VFIO_BASE + 19)
+
+/**
+ * VFIO_IOMMU_SET_MSI_BINDING - _IOWR(VFIO_TYPE, VFIO_BASE + 20,
+ *			struct vfio_iommu_type1_set_msi_binding)
+ *
+ * Pass a stage 1 MSI doorbell mapping to the host so that this
+ * latter can build a nested stage2 mapping. Or conversely tear
+ * down a previously bound stage 1 MSI binding.
+ */
+struct vfio_iommu_type1_set_msi_binding {
+	__u32   argsz;
+	__u32   flags;
+#define VFIO_IOMMU_BIND_MSI	(1 << 0)
+#define VFIO_IOMMU_UNBIND_MSI	(1 << 1)
+	__u64	iova;	/* MSI guest IOVA */
+	/* Fields below are used on BIND */
+	__u64	gpa;	/* MSI guest physical address */
+	__u64	size;	/* size of stage1 mapping (bytes) */
+};
+#define VFIO_IOMMU_SET_MSI_BINDING      _IO(VFIO_TYPE, VFIO_BASE + 20)
+
+ /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
+ 
+ /*
+-- 
+2.27.0
+
--- a/xhci-check-reg-to-avoid-OOB-read.patch
+++ b/xhci-check-reg-to-avoid-OOB-read.patch
@ -0,0 +1,47 @@
+From a95ada20170af0a71529c1583846e402cdbb850b Mon Sep 17 00:00:00 2001
+From: Yan Wang <wangyan122@huawei.com>
+Date: Thu, 10 Feb 2022 10:41:40 +0800
+Subject: [PATCH] xhci: check reg to avoid OOB read
+
+Add a sanity check to fix OOB read access.
+
+Signed-off-by: Yan Wang <wangyan122@huawei.com>
+---
+ hw/usb/hcd-xhci.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
+index e01700039b..08cd63e159 100644
+--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
+@@ -27,6 +27,7 @@
+ #include "hw/qdev-properties.h"
+ #include "trace.h"
+ #include "qapi/error.h"
+#include "qemu/log.h"
+ 
+ #include "hcd-xhci.h"
+ 
+@@ -3017,14 +3018,17 @@ static void xhci_runtime_write(void *ptr, hwaddr reg,
+     XHCIInterrupter *intr;
+     int v;
+ 
+-    trace_usb_xhci_runtime_write(reg, val);
+-
+     if (reg < 0x20) {
+         trace_usb_xhci_unimplemented("runtime write", reg);
+         return;
+     }
+     v = (reg - 0x20) / 0x20;
+    if (v >= xhci->numintrs) {
+        qemu_log("intr nr out of range (%d >= %d)\n", v, xhci->numintrs);
+        return;
+    }
+     intr = &xhci->intr[v];
+    trace_usb_xhci_runtime_write(reg, val);
+ 
+     switch (reg & 0x1f) {
+     case 0x00: /* IMAN */
+-- 
+2.27.0
+