Merge branch 'drm-nouveau-fixes-3.9' of git://anongit.freedesktop.org/git/nouveau/linux-2.6

misc regression fixes from Ben.

* 'drm-nouveau-fixes-3.9' of git://anongit.freedesktop.org/git/nouveau/linux-2.6:
  drm/nv50-: prevent some races between modesetting and page flipping
  drm/nouveau/i2c: drop parent refcount when creating ports
  drm/nv84: fix regression in page flipping
  drm/nouveau: Fix typo in init_idx_addr_latched().
  drm/nouveau: Disable AGP on PowerPC again.
  drm/nve0/graph: some random reg moved on kepler
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c
index 61cec0f..4857f91 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c
@@ -350,7 +350,7 @@
 		nv_wr32(priv, GPC_UNIT(gpc, 0x0918), magicgpc918);
 	}
 
-	nv_wr32(priv, GPC_BCAST(0x1bd4), magicgpc918);
+	nv_wr32(priv, GPC_BCAST(0x3fd4), magicgpc918);
 	nv_wr32(priv, GPC_BCAST(0x08ac), nv_rd32(priv, 0x100800));
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c
index 2cc1e6a..9c41b58 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c
@@ -869,7 +869,7 @@
 		init->offset += 2;
 
 		init_wr32(init, dreg, idata);
-		init_mask(init, creg, ~mask, data | idata);
+		init_mask(init, creg, ~mask, data | iaddr);
 	}
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c
index a114a0e..2e98e8a 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c
@@ -142,6 +142,7 @@
 	/* drop port's i2c subdev refcount, i2c handles this itself */
 	if (ret == 0) {
 		list_add_tail(&port->head, &i2c->ports);
+		atomic_dec(&parent->refcount);
 		atomic_dec(&engine->refcount);
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_agp.c b/drivers/gpu/drm/nouveau/nouveau_agp.c
index d28430c..6e7a55f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_agp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_agp.c
@@ -47,6 +47,18 @@
 	if (drm->agp.stat == UNKNOWN) {
 		if (!nouveau_agpmode)
 			return false;
+#ifdef __powerpc__
+		/* Disable AGP by default on all PowerPC machines for
+		 * now -- At least some UniNorth-2 AGP bridges are
+		 * known to be broken: DMA from the host to the card
+		 * works just fine, but writeback from the card to the
+		 * host goes straight to memory untranslated bypassing
+		 * the GATT somehow, making them quite painful to deal
+		 * with...
+		 */
+		if (nouveau_agpmode == -1)
+			return false;
+#endif
 		return true;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index a6237c9..87a5a56 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -55,9 +55,9 @@
 
 /* offsets in shared sync bo of various structures */
 #define EVO_SYNC(c, o) ((c) * 0x0100 + (o))
-#define EVO_MAST_NTFY     EVO_SYNC(  0, 0x00)
-#define EVO_FLIP_SEM0(c)  EVO_SYNC((c), 0x00)
-#define EVO_FLIP_SEM1(c)  EVO_SYNC((c), 0x10)
+#define EVO_MAST_NTFY     EVO_SYNC(      0, 0x00)
+#define EVO_FLIP_SEM0(c)  EVO_SYNC((c) + 1, 0x00)
+#define EVO_FLIP_SEM1(c)  EVO_SYNC((c) + 1, 0x10)
 
 #define EVO_CORE_HANDLE      (0xd1500000)
 #define EVO_CHAN_HANDLE(t,i) (0xd15c0000 | (((t) & 0x00ff) << 8) | (i))
@@ -341,10 +341,8 @@
 
 struct nv50_sync {
 	struct nv50_dmac base;
-	struct {
-		u32 offset;
-		u16 value;
-	} sem;
+	u32 addr;
+	u32 data;
 };
 
 struct nv50_ovly {
@@ -471,13 +469,33 @@
 	return nv50_disp(dev)->sync;
 }
 
+struct nv50_display_flip {
+	struct nv50_disp *disp;
+	struct nv50_sync *chan;
+};
+
+static bool
+nv50_display_flip_wait(void *data)
+{
+	struct nv50_display_flip *flip = data;
+	if (nouveau_bo_rd32(flip->disp->sync, flip->chan->addr / 4) ==
+					      flip->chan->data);
+		return true;
+	usleep_range(1, 2);
+	return false;
+}
+
 void
 nv50_display_flip_stop(struct drm_crtc *crtc)
 {
-	struct nv50_sync *sync = nv50_sync(crtc);
+	struct nouveau_device *device = nouveau_dev(crtc->dev);
+	struct nv50_display_flip flip = {
+		.disp = nv50_disp(crtc->dev),
+		.chan = nv50_sync(crtc),
+	};
 	u32 *push;
 
-	push = evo_wait(sync, 8);
+	push = evo_wait(flip.chan, 8);
 	if (push) {
 		evo_mthd(push, 0x0084, 1);
 		evo_data(push, 0x00000000);
@@ -487,8 +505,10 @@
 		evo_data(push, 0x00000000);
 		evo_mthd(push, 0x0080, 1);
 		evo_data(push, 0x00000000);
-		evo_kick(push, sync);
+		evo_kick(push, flip.chan);
 	}
+
+	nv_wait_cb(device, nv50_display_flip_wait, &flip);
 }
 
 int
@@ -496,11 +516,10 @@
 		       struct nouveau_channel *chan, u32 swap_interval)
 {
 	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
-	struct nv50_disp *disp = nv50_disp(crtc->dev);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	struct nv50_sync *sync = nv50_sync(crtc);
+	int head = nv_crtc->index, ret;
 	u32 *push;
-	int ret;
 
 	swap_interval <<= 4;
 	if (swap_interval == 0)
@@ -510,58 +529,64 @@
 	if (unlikely(push == NULL))
 		return -EBUSY;
 
-	/* synchronise with the rendering channel, if necessary */
-	if (likely(chan)) {
+	if (chan && nv_mclass(chan->object) < NV84_CHANNEL_IND_CLASS) {
+		ret = RING_SPACE(chan, 8);
+		if (ret)
+			return ret;
+
+		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2);
+		OUT_RING  (chan, NvEvoSema0 + head);
+		OUT_RING  (chan, sync->addr ^ 0x10);
+		BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1);
+		OUT_RING  (chan, sync->data + 1);
+		BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_OFFSET, 2);
+		OUT_RING  (chan, sync->addr);
+		OUT_RING  (chan, sync->data);
+	} else
+	if (chan && nv_mclass(chan->object) < NVC0_CHANNEL_IND_CLASS) {
+		u64 addr = nv84_fence_crtc(chan, head) + sync->addr;
+		ret = RING_SPACE(chan, 12);
+		if (ret)
+			return ret;
+
+		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
+		OUT_RING  (chan, chan->vram);
+		BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		OUT_RING  (chan, upper_32_bits(addr ^ 0x10));
+		OUT_RING  (chan, lower_32_bits(addr ^ 0x10));
+		OUT_RING  (chan, sync->data + 1);
+		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG);
+		BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		OUT_RING  (chan, upper_32_bits(addr));
+		OUT_RING  (chan, lower_32_bits(addr));
+		OUT_RING  (chan, sync->data);
+		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
+	} else
+	if (chan) {
+		u64 addr = nv84_fence_crtc(chan, head) + sync->addr;
 		ret = RING_SPACE(chan, 10);
 		if (ret)
 			return ret;
 
-		if (nv_mclass(chan->object) < NV84_CHANNEL_IND_CLASS) {
-			BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2);
-			OUT_RING  (chan, NvEvoSema0 + nv_crtc->index);
-			OUT_RING  (chan, sync->sem.offset);
-			BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1);
-			OUT_RING  (chan, 0xf00d0000 | sync->sem.value);
-			BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_OFFSET, 2);
-			OUT_RING  (chan, sync->sem.offset ^ 0x10);
-			OUT_RING  (chan, 0x74b1e000);
-			BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
-			OUT_RING  (chan, NvSema);
-		} else
-		if (nv_mclass(chan->object) < NVC0_CHANNEL_IND_CLASS) {
-			u64 offset = nv84_fence_crtc(chan, nv_crtc->index);
-			offset += sync->sem.offset;
+		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		OUT_RING  (chan, upper_32_bits(addr ^ 0x10));
+		OUT_RING  (chan, lower_32_bits(addr ^ 0x10));
+		OUT_RING  (chan, sync->data + 1);
+		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG |
+				 NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD);
+		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
+		OUT_RING  (chan, upper_32_bits(addr));
+		OUT_RING  (chan, lower_32_bits(addr));
+		OUT_RING  (chan, sync->data);
+		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL |
+				 NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD);
+	}
 
-			BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-			OUT_RING  (chan, upper_32_bits(offset));
-			OUT_RING  (chan, lower_32_bits(offset));
-			OUT_RING  (chan, 0xf00d0000 | sync->sem.value);
-			OUT_RING  (chan, 0x00000002);
-			BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-			OUT_RING  (chan, upper_32_bits(offset));
-			OUT_RING  (chan, lower_32_bits(offset ^ 0x10));
-			OUT_RING  (chan, 0x74b1e000);
-			OUT_RING  (chan, 0x00000001);
-		} else {
-			u64 offset = nv84_fence_crtc(chan, nv_crtc->index);
-			offset += sync->sem.offset;
-
-			BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-			OUT_RING  (chan, upper_32_bits(offset));
-			OUT_RING  (chan, lower_32_bits(offset));
-			OUT_RING  (chan, 0xf00d0000 | sync->sem.value);
-			OUT_RING  (chan, 0x00001002);
-			BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-			OUT_RING  (chan, upper_32_bits(offset));
-			OUT_RING  (chan, lower_32_bits(offset ^ 0x10));
-			OUT_RING  (chan, 0x74b1e000);
-			OUT_RING  (chan, 0x00001001);
-		}
-
+	if (chan) {
+		sync->addr ^= 0x10;
+		sync->data++;
 		FIRE_RING (chan);
 	} else {
-		nouveau_bo_wr32(disp->sync, sync->sem.offset / 4,
-				0xf00d0000 | sync->sem.value);
 		evo_sync(crtc->dev);
 	}
 
@@ -575,9 +600,9 @@
 		evo_data(push, 0x40000000);
 	}
 	evo_mthd(push, 0x0088, 4);
-	evo_data(push, sync->sem.offset);
-	evo_data(push, 0xf00d0000 | sync->sem.value);
-	evo_data(push, 0x74b1e000);
+	evo_data(push, sync->addr);
+	evo_data(push, sync->data++);
+	evo_data(push, sync->data);
 	evo_data(push, NvEvoSync);
 	evo_mthd(push, 0x00a0, 2);
 	evo_data(push, 0x00000000);
@@ -605,9 +630,6 @@
 	evo_mthd(push, 0x0080, 1);
 	evo_data(push, 0x00000000);
 	evo_kick(push, sync);
-
-	sync->sem.offset ^= 0x10;
-	sync->sem.value++;
 	return 0;
 }
 
@@ -1379,7 +1401,8 @@
 	if (ret)
 		goto out;
 
-	head->sync.sem.offset = EVO_SYNC(1 + index, 0x00);
+	head->sync.addr = EVO_FLIP_SEM0(index);
+	head->sync.data = 0x00000000;
 
 	/* allocate overlay resources */
 	ret = nv50_pioc_create(disp->core, NV50_DISP_OIMM_CLASS, index,
@@ -2112,15 +2135,23 @@
 int
 nv50_display_init(struct drm_device *dev)
 {
-	u32 *push = evo_wait(nv50_mast(dev), 32);
-	if (push) {
-		evo_mthd(push, 0x0088, 1);
-		evo_data(push, NvEvoSync);
-		evo_kick(push, nv50_mast(dev));
-		return 0;
+	struct nv50_disp *disp = nv50_disp(dev);
+	struct drm_crtc *crtc;
+	u32 *push;
+
+	push = evo_wait(nv50_mast(dev), 32);
+	if (!push)
+		return -EBUSY;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct nv50_sync *sync = nv50_sync(crtc);
+		nouveau_bo_wr32(disp->sync, sync->addr / 4, sync->data);
 	}
 
-	return -EBUSY;
+	evo_mthd(push, 0x0088, 1);
+	evo_data(push, NvEvoSync);
+	evo_kick(push, nv50_mast(dev));
+	return 0;
 }
 
 void