drm/i915: ppgtt binding/unbinding support

This adds support to bind/unbind objects and wires it up. Objects are
only put into the ppgtt when necessary, i.e. at execbuf time.

Objects are still unconditionally put into the global gtt.

v2: Kill the quick hack and explicitly pass cache_level to ppgtt_bind
like for the global gtt function. Noticed by Chris Wilson.

Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Tested-by: Chris Wilson <chris@chris-wilson.co.uk>
Tested-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Reviewed-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 03a9e49..35c8b53 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -850,6 +850,8 @@
 
 	unsigned int cache_level:2;
 
+	unsigned int has_aliasing_ppgtt_mapping:1;
+
 	struct page **pages;
 
 	/**
@@ -1249,6 +1251,11 @@
 /* i915_gem_gtt.c */
 int __must_check i915_gem_init_aliasing_ppgtt(struct drm_device *dev);
 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev);
+void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
+			    struct drm_i915_gem_object *obj,
+			    enum i915_cache_level cache_level);
+void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
+			      struct drm_i915_gem_object *obj);
 
 void i915_gem_restore_gtt_mappings(struct drm_device *dev);
 int __must_check i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 27fe07a..5909299 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2020,6 +2020,7 @@
 int
 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 {
+	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
 	int ret = 0;
 
 	if (obj->gtt_space == NULL)
@@ -2064,6 +2065,11 @@
 	trace_i915_gem_object_unbind(obj);
 
 	i915_gem_gtt_unbind_object(obj);
+	if (obj->has_aliasing_ppgtt_mapping) {
+		i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
+		obj->has_aliasing_ppgtt_mapping = 0;
+	}
+
 	i915_gem_object_put_pages_gtt(obj);
 
 	list_del_init(&obj->gtt_list);
@@ -2882,6 +2888,8 @@
 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 				    enum i915_cache_level cache_level)
 {
+	struct drm_device *dev = obj->base.dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	int ret;
 
 	if (obj->cache_level == cache_level)
@@ -2910,6 +2918,9 @@
 		}
 
 		i915_gem_gtt_rebind_object(obj, cache_level);
+		if (obj->has_aliasing_ppgtt_mapping)
+			i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
+					       obj, cache_level);
 	}
 
 	if (cache_level == I915_CACHE_NONE) {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index b964998..9835b2e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -515,6 +515,7 @@
 			    struct drm_file *file,
 			    struct list_head *objects)
 {
+	drm_i915_private_t *dev_priv = ring->dev->dev_private;
 	struct drm_i915_gem_object *obj;
 	int ret, retry;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
@@ -623,6 +624,14 @@
 			}
 
 			i915_gem_object_unpin(obj);
+
+			/* ... and ensure ppgtt mapping exist if needed. */
+			if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
+				i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
+						       obj, obj->cache_level);
+
+				obj->has_aliasing_ppgtt_mapping = 1;
+			}
 		}
 
 		if (ret != -ENOSPC || retry > 1)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f408f8c..2eacd78 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -34,22 +34,31 @@
 				   unsigned first_entry,
 				   unsigned num_entries)
 {
-	int i, j;
 	uint32_t *pt_vaddr;
 	uint32_t scratch_pte;
+	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
+	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+	unsigned last_pte, i;
 
 	scratch_pte = GEN6_PTE_ADDR_ENCODE(ppgtt->scratch_page_dma_addr);
 	scratch_pte |= GEN6_PTE_VALID | GEN6_PTE_CACHE_LLC;
 
-	for (i = 0; i < ppgtt->num_pd_entries; i++) {
-		pt_vaddr = kmap_atomic(ppgtt->pt_pages[i]);
+	while (num_entries) {
+		last_pte = first_pte + num_entries;
+		if (last_pte > I915_PPGTT_PT_ENTRIES)
+			last_pte = I915_PPGTT_PT_ENTRIES;
 
-		for (j = 0; j < I915_PPGTT_PT_ENTRIES; j++)
-			pt_vaddr[j] = scratch_pte;
+		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
+
+		for (i = first_pte; i < last_pte; i++)
+			pt_vaddr[i] = scratch_pte;
 
 		kunmap_atomic(pt_vaddr);
-	}
 
+		num_entries -= last_pte - first_pte;
+		first_pte = 0;
+		act_pd++;
+	}
 }
 
 int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
@@ -168,6 +177,131 @@
 	kfree(ppgtt);
 }
 
+static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
+					 struct scatterlist *sg_list,
+					 unsigned sg_len,
+					 unsigned first_entry,
+					 uint32_t pte_flags)
+{
+	uint32_t *pt_vaddr, pte;
+	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
+	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+	unsigned i, j, m, segment_len;
+	dma_addr_t page_addr;
+	struct scatterlist *sg;
+
+	/* init sg walking */
+	sg = sg_list;
+	i = 0;
+	segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
+	m = 0;
+
+	while (i < sg_len) {
+		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
+
+		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
+			page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
+			pte = GEN6_PTE_ADDR_ENCODE(page_addr);
+			pt_vaddr[j] = pte | pte_flags;
+
+			/* grab the next page */
+			m++;
+			if (m == segment_len) {
+				sg = sg_next(sg);
+				i++;
+				if (i == sg_len)
+					break;
+
+				segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
+				m = 0;
+			}
+		}
+
+		kunmap_atomic(pt_vaddr);
+
+		first_pte = 0;
+		act_pd++;
+	}
+}
+
+static void i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt,
+				    unsigned first_entry, unsigned num_entries,
+				    struct page **pages, uint32_t pte_flags)
+{
+	uint32_t *pt_vaddr, pte;
+	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
+	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+	unsigned last_pte, i;
+	dma_addr_t page_addr;
+
+	while (num_entries) {
+		last_pte = first_pte + num_entries;
+		last_pte = min_t(unsigned, last_pte, I915_PPGTT_PT_ENTRIES);
+
+		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
+
+		for (i = first_pte; i < last_pte; i++) {
+			page_addr = page_to_phys(*pages);
+			pte = GEN6_PTE_ADDR_ENCODE(page_addr);
+			pt_vaddr[i] = pte | pte_flags;
+
+			pages++;
+		}
+
+		kunmap_atomic(pt_vaddr);
+
+		num_entries -= last_pte - first_pte;
+		first_pte = 0;
+		act_pd++;
+	}
+}
+
+void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
+			    struct drm_i915_gem_object *obj,
+			    enum i915_cache_level cache_level)
+{
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t pte_flags = GEN6_PTE_VALID;
+
+	switch (cache_level) {
+	case I915_CACHE_LLC_MLC:
+		pte_flags |= GEN6_PTE_CACHE_LLC_MLC;
+		break;
+	case I915_CACHE_LLC:
+		pte_flags |= GEN6_PTE_CACHE_LLC;
+		break;
+	case I915_CACHE_NONE:
+		pte_flags |= GEN6_PTE_UNCACHED;
+		break;
+	default:
+		BUG();
+	}
+
+	if (dev_priv->mm.gtt->needs_dmar) {
+		BUG_ON(!obj->sg_list);
+
+		i915_ppgtt_insert_sg_entries(ppgtt,
+					     obj->sg_list,
+					     obj->num_sg,
+					     obj->gtt_space->start >> PAGE_SHIFT,
+					     pte_flags);
+	} else
+		i915_ppgtt_insert_pages(ppgtt,
+					obj->gtt_space->start >> PAGE_SHIFT,
+					obj->base.size >> PAGE_SHIFT,
+					obj->pages,
+					pte_flags);
+}
+
+void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
+			      struct drm_i915_gem_object *obj)
+{
+	i915_ppgtt_clear_range(ppgtt,
+			       obj->gtt_space->start >> PAGE_SHIFT,
+			       obj->base.size >> PAGE_SHIFT);
+}
+
 /* XXX kill agp_type! */
 static unsigned int cache_level_to_agp_type(struct drm_device *dev,
 					    enum i915_cache_level cache_level)