iommu/amd: Handle parallel invalidate_range_start/end calls correctly
Add a counter to the pasid_state so that we do not restore
the original page-table before all invalidate_range_start
to invalidate_range_end sections have finished.
Signed-off-by: Joerg Roedel <jroedel@suse.de>
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index c65f3ec..d4daa05 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -45,6 +45,8 @@
struct pasid_state {
struct list_head list; /* For global state-list */
atomic_t count; /* Reference count */
+ atomic_t mmu_notifier_count; /* Counting nested mmu_notifier
+ calls */
struct task_struct *task; /* Task bound to this PASID */
struct mm_struct *mm; /* mm_struct for the faults */
struct mmu_notifier mn; /* mmu_otifier handle */
@@ -433,8 +435,11 @@
pasid_state = mn_to_state(mn);
dev_state = pasid_state->device_state;
- amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
- __pa(empty_page_table));
+ if (atomic_add_return(1, &pasid_state->mmu_notifier_count) == 1) {
+ amd_iommu_domain_set_gcr3(dev_state->domain,
+ pasid_state->pasid,
+ __pa(empty_page_table));
+ }
}
static void mn_invalidate_range_end(struct mmu_notifier *mn,
@@ -447,8 +452,11 @@
pasid_state = mn_to_state(mn);
dev_state = pasid_state->device_state;
- amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
- __pa(pasid_state->mm->pgd));
+ if (atomic_dec_and_test(&pasid_state->mmu_notifier_count)) {
+ amd_iommu_domain_set_gcr3(dev_state->domain,
+ pasid_state->pasid,
+ __pa(pasid_state->mm->pgd));
+ }
}
static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
@@ -642,6 +650,7 @@
goto out;
atomic_set(&pasid_state->count, 1);
+ atomic_set(&pasid_state->mmu_notifier_count, 0);
init_waitqueue_head(&pasid_state->wq);
spin_lock_init(&pasid_state->lock);