Merge branch 'for-airlied-next' of git://people.freedesktop.org/~mlankhorst/linux into drm-next
Merge the move to generic fences for TTM using drivers.
* 'for-airlied-next' of git://people.freedesktop.org/~mlankhorst/linux:
drm/nouveau: use shared fences for readable objects
drm/nouveau: Keep only a single list for validation.
drm/ttm: use rcu in core ttm
drm/vmwgfx: use rcu in vmw_user_dmabuf_synccpu_grab
drm/radeon: use rcu waits in some ioctls
drm/nouveau: use rcu in nouveau_gem_ioctl_cpu_prep
drm/ttm: flip the switch, and convert to dma_fence
drm/qxl: rework to new fence interface
drm/nouveau: rework to new fence interface
drm/vmwgfx: rework to new fence interface, v2
drm/vmwgfx: get rid of different types of fence_flags entirely
drm/radeon: use common fence implementation for fences, v4
drm/ttm: kill off some members to ttm_validate_buffer
drm/ttm: add interruptible parameter to ttm_eu_reserve_buffers
drm/ttm: kill fence_lock
drm/ttm: call ttm_bo_wait while inside a reservation
drm/nouveau: require reservations for nouveau_fence_sync and nouveau_bo_fence
drm/nouveau: add reservation to nouveau_gem_ioctl_cpu_prep
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 0591ca0..eea74b1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -88,13 +88,13 @@
static void
nv10_bo_put_tile_region(struct drm_device *dev, struct nouveau_drm_tile *tile,
- struct nouveau_fence *fence)
+ struct fence *fence)
{
struct nouveau_drm *drm = nouveau_drm(dev);
if (tile) {
spin_lock(&drm->tile.lock);
- tile->fence = nouveau_fence_ref(fence);
+ tile->fence = (struct nouveau_fence *)fence_get(fence);
tile->used = false;
spin_unlock(&drm->tile.lock);
}
@@ -970,13 +970,14 @@
}
mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
- ret = nouveau_fence_sync(bo->sync_obj, chan);
+ ret = nouveau_fence_sync(nouveau_bo(bo), chan, true);
if (ret == 0) {
ret = drm->ttm.move(chan, bo, &bo->mem, new_mem);
if (ret == 0) {
ret = nouveau_fence_new(chan, false, &fence);
if (ret == 0) {
- ret = ttm_bo_move_accel_cleanup(bo, fence,
+ ret = ttm_bo_move_accel_cleanup(bo,
+ &fence->base,
evict,
no_wait_gpu,
new_mem);
@@ -1167,8 +1168,9 @@
{
struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
struct drm_device *dev = drm->dev;
+ struct fence *fence = reservation_object_get_excl(bo->resv);
- nv10_bo_put_tile_region(dev, *old_tile, bo->sync_obj);
+ nv10_bo_put_tile_region(dev, *old_tile, fence);
*old_tile = new_tile;
}
@@ -1212,9 +1214,7 @@
}
/* Fallback to software copy. */
- spin_lock(&bo->bdev->fence_lock);
ret = ttm_bo_wait(bo, true, intr, no_wait_gpu);
- spin_unlock(&bo->bdev->fence_lock);
if (ret == 0)
ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
@@ -1458,47 +1458,14 @@
}
void
-nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence)
+nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence, bool exclusive)
{
- struct nouveau_fence *new_fence = nouveau_fence_ref(fence);
- struct nouveau_fence *old_fence = NULL;
+ struct reservation_object *resv = nvbo->bo.resv;
- spin_lock(&nvbo->bo.bdev->fence_lock);
- old_fence = nvbo->bo.sync_obj;
- nvbo->bo.sync_obj = new_fence;
- spin_unlock(&nvbo->bo.bdev->fence_lock);
-
- nouveau_fence_unref(&old_fence);
-}
-
-static void
-nouveau_bo_fence_unref(void **sync_obj)
-{
- nouveau_fence_unref((struct nouveau_fence **)sync_obj);
-}
-
-static void *
-nouveau_bo_fence_ref(void *sync_obj)
-{
- return nouveau_fence_ref(sync_obj);
-}
-
-static bool
-nouveau_bo_fence_signalled(void *sync_obj)
-{
- return nouveau_fence_done(sync_obj);
-}
-
-static int
-nouveau_bo_fence_wait(void *sync_obj, bool lazy, bool intr)
-{
- return nouveau_fence_wait(sync_obj, lazy, intr);
-}
-
-static int
-nouveau_bo_fence_flush(void *sync_obj)
-{
- return 0;
+ if (exclusive)
+ reservation_object_add_excl_fence(resv, &fence->base);
+ else if (fence)
+ reservation_object_add_shared_fence(resv, &fence->base);
}
struct ttm_bo_driver nouveau_bo_driver = {
@@ -1511,11 +1478,6 @@
.move_notify = nouveau_bo_move_ntfy,
.move = nouveau_bo_move,
.verify_access = nouveau_bo_verify_access,
- .sync_obj_signaled = nouveau_bo_fence_signalled,
- .sync_obj_wait = nouveau_bo_fence_wait,
- .sync_obj_flush = nouveau_bo_fence_flush,
- .sync_obj_unref = nouveau_bo_fence_unref,
- .sync_obj_ref = nouveau_bo_fence_ref,
.fault_reserve_notify = &nouveau_ttm_fault_reserve_notify,
.io_mem_reserve = &nouveau_ttm_io_mem_reserve,
.io_mem_free = &nouveau_ttm_io_mem_free,
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index 4ef88e8..ae95b2d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -78,7 +78,7 @@
void nouveau_bo_wr16(struct nouveau_bo *, unsigned index, u16 val);
u32 nouveau_bo_rd32(struct nouveau_bo *, unsigned index);
void nouveau_bo_wr32(struct nouveau_bo *, unsigned index, u32 val);
-void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *);
+void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *, bool exclusive);
int nouveau_bo_validate(struct nouveau_bo *, bool interruptible,
bool no_wait_gpu);
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 65b4fd5..a9ec525 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -658,7 +658,7 @@
spin_unlock_irqrestore(&dev->event_lock, flags);
/* Synchronize with the old framebuffer */
- ret = nouveau_fence_sync(old_bo->bo.sync_obj, chan);
+ ret = nouveau_fence_sync(old_bo, chan, false);
if (ret)
goto fail;
@@ -717,19 +717,24 @@
}
mutex_lock(&cli->mutex);
+ ret = ttm_bo_reserve(&new_bo->bo, true, false, false, NULL);
+ if (ret)
+ goto fail_unpin;
/* synchronise rendering channel with the kernel's channel */
- spin_lock(&new_bo->bo.bdev->fence_lock);
- fence = nouveau_fence_ref(new_bo->bo.sync_obj);
- spin_unlock(&new_bo->bo.bdev->fence_lock);
- ret = nouveau_fence_sync(fence, chan);
- nouveau_fence_unref(&fence);
- if (ret)
+ ret = nouveau_fence_sync(new_bo, chan, false);
+ if (ret) {
+ ttm_bo_unreserve(&new_bo->bo);
goto fail_unpin;
+ }
- ret = ttm_bo_reserve(&old_bo->bo, true, false, false, NULL);
- if (ret)
- goto fail_unpin;
+ if (new_bo != old_bo) {
+ ttm_bo_unreserve(&new_bo->bo);
+
+ ret = ttm_bo_reserve(&old_bo->bo, true, false, false, NULL);
+ if (ret)
+ goto fail_unpin;
+ }
/* Initialize a page flip struct */
*s = (struct nouveau_page_flip_state)
@@ -775,7 +780,7 @@
/* Update the crtc struct and cleanup */
crtc->primary->fb = fb;
- nouveau_bo_fence(old_bo, fence);
+ nouveau_bo_fence(old_bo, fence, false);
ttm_bo_unreserve(&old_bo->bo);
if (old_bo != new_bo)
nouveau_bo_unpin(old_bo);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 0a93114..decfe6c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -28,6 +28,7 @@
#include <linux/ktime.h>
#include <linux/hrtimer.h>
+#include <trace/events/fence.h>
#include <nvif/notify.h>
#include <nvif/event.h>
@@ -36,123 +37,210 @@
#include "nouveau_dma.h"
#include "nouveau_fence.h"
-struct fence_work {
- struct work_struct base;
- struct list_head head;
- void (*func)(void *);
- void *data;
-};
+static const struct fence_ops nouveau_fence_ops_uevent;
+static const struct fence_ops nouveau_fence_ops_legacy;
+
+static inline struct nouveau_fence *
+from_fence(struct fence *fence)
+{
+ return container_of(fence, struct nouveau_fence, base);
+}
+
+static inline struct nouveau_fence_chan *
+nouveau_fctx(struct nouveau_fence *fence)
+{
+ return container_of(fence->base.lock, struct nouveau_fence_chan, lock);
+}
static void
nouveau_fence_signal(struct nouveau_fence *fence)
{
- struct fence_work *work, *temp;
+ fence_signal_locked(&fence->base);
+ list_del(&fence->head);
- list_for_each_entry_safe(work, temp, &fence->work, head) {
- schedule_work(&work->base);
- list_del(&work->head);
+ if (test_bit(FENCE_FLAG_USER_BITS, &fence->base.flags)) {
+ struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
+
+ if (!--fctx->notify_ref)
+ nvif_notify_put(&fctx->notify);
}
- fence->channel = NULL;
- list_del(&fence->head);
+ fence_put(&fence->base);
+}
+
+static struct nouveau_fence *
+nouveau_local_fence(struct fence *fence, struct nouveau_drm *drm) {
+ struct nouveau_fence_priv *priv = (void*)drm->fence;
+
+ if (fence->ops != &nouveau_fence_ops_legacy &&
+ fence->ops != &nouveau_fence_ops_uevent)
+ return NULL;
+
+ if (fence->context < priv->context_base ||
+ fence->context >= priv->context_base + priv->contexts)
+ return NULL;
+
+ return from_fence(fence);
}
void
nouveau_fence_context_del(struct nouveau_fence_chan *fctx)
{
- struct nouveau_fence *fence, *fnext;
- spin_lock(&fctx->lock);
- list_for_each_entry_safe(fence, fnext, &fctx->pending, head) {
+ struct nouveau_fence *fence;
+
+ nvif_notify_fini(&fctx->notify);
+
+ spin_lock_irq(&fctx->lock);
+ while (!list_empty(&fctx->pending)) {
+ fence = list_entry(fctx->pending.next, typeof(*fence), head);
+
+ nouveau_fence_signal(fence);
+ fence->channel = NULL;
+ }
+ spin_unlock_irq(&fctx->lock);
+}
+
+static void
+nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx)
+{
+ struct nouveau_fence *fence;
+
+ u32 seq = fctx->read(chan);
+
+ while (!list_empty(&fctx->pending)) {
+ fence = list_entry(fctx->pending.next, typeof(*fence), head);
+
+ if ((int)(seq - fence->base.seqno) < 0)
+ return;
+
nouveau_fence_signal(fence);
}
- spin_unlock(&fctx->lock);
+}
+
+static int
+nouveau_fence_wait_uevent_handler(struct nvif_notify *notify)
+{
+ struct nouveau_fence_chan *fctx =
+ container_of(notify, typeof(*fctx), notify);
+ unsigned long flags;
+
+ spin_lock_irqsave(&fctx->lock, flags);
+ if (!list_empty(&fctx->pending)) {
+ struct nouveau_fence *fence;
+
+ fence = list_entry(fctx->pending.next, typeof(*fence), head);
+ nouveau_fence_update(fence->channel, fctx);
+ }
+ spin_unlock_irqrestore(&fctx->lock, flags);
+
+ /* Always return keep here. NVIF refcount is handled with nouveau_fence_update */
+ return NVIF_NOTIFY_KEEP;
}
void
-nouveau_fence_context_new(struct nouveau_fence_chan *fctx)
+nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx)
{
+ struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
+ int ret;
+
INIT_LIST_HEAD(&fctx->flip);
INIT_LIST_HEAD(&fctx->pending);
spin_lock_init(&fctx->lock);
+ fctx->context = priv->context_base + chan->chid;
+
+ if (!priv->uevent)
+ return;
+
+ ret = nvif_notify_init(chan->object, NULL,
+ nouveau_fence_wait_uevent_handler, false,
+ G82_CHANNEL_DMA_V0_NTFY_UEVENT,
+ &(struct nvif_notify_uevent_req) { },
+ sizeof(struct nvif_notify_uevent_req),
+ sizeof(struct nvif_notify_uevent_rep),
+ &fctx->notify);
+
+ WARN_ON(ret);
}
+struct nouveau_fence_work {
+ struct work_struct work;
+ struct fence_cb cb;
+ void (*func)(void *);
+ void *data;
+};
+
static void
nouveau_fence_work_handler(struct work_struct *kwork)
{
- struct fence_work *work = container_of(kwork, typeof(*work), base);
+ struct nouveau_fence_work *work = container_of(kwork, typeof(*work), work);
work->func(work->data);
kfree(work);
}
-void
-nouveau_fence_work(struct nouveau_fence *fence,
- void (*func)(void *), void *data)
+static void nouveau_fence_work_cb(struct fence *fence, struct fence_cb *cb)
{
- struct nouveau_channel *chan = fence->channel;
- struct nouveau_fence_chan *fctx;
- struct fence_work *work = NULL;
+ struct nouveau_fence_work *work = container_of(cb, typeof(*work), cb);
- if (nouveau_fence_done(fence)) {
- func(data);
- return;
- }
-
- fctx = chan->fence;
- work = kmalloc(sizeof(*work), GFP_KERNEL);
- if (!work) {
- WARN_ON(nouveau_fence_wait(fence, false, false));
- func(data);
- return;
- }
-
- spin_lock(&fctx->lock);
- if (!fence->channel) {
- spin_unlock(&fctx->lock);
- kfree(work);
- func(data);
- return;
- }
-
- INIT_WORK(&work->base, nouveau_fence_work_handler);
- work->func = func;
- work->data = data;
- list_add(&work->head, &fence->work);
- spin_unlock(&fctx->lock);
+ schedule_work(&work->work);
}
-static void
-nouveau_fence_update(struct nouveau_channel *chan)
+void
+nouveau_fence_work(struct fence *fence,
+ void (*func)(void *), void *data)
{
- struct nouveau_fence_chan *fctx = chan->fence;
- struct nouveau_fence *fence, *fnext;
+ struct nouveau_fence_work *work;
- spin_lock(&fctx->lock);
- list_for_each_entry_safe(fence, fnext, &fctx->pending, head) {
- if (fctx->read(chan) < fence->sequence)
- break;
+ if (fence_is_signaled(fence))
+ goto err;
- nouveau_fence_signal(fence);
- nouveau_fence_unref(&fence);
+ work = kmalloc(sizeof(*work), GFP_KERNEL);
+ if (!work) {
+ WARN_ON(nouveau_fence_wait((struct nouveau_fence *)fence,
+ false, false));
+ goto err;
}
- spin_unlock(&fctx->lock);
+
+ INIT_WORK(&work->work, nouveau_fence_work_handler);
+ work->func = func;
+ work->data = data;
+
+ if (fence_add_callback(fence, &work->cb, nouveau_fence_work_cb) < 0)
+ goto err_free;
+ return;
+
+err_free:
+ kfree(work);
+err:
+ func(data);
}
int
nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
{
struct nouveau_fence_chan *fctx = chan->fence;
+ struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
int ret;
fence->channel = chan;
fence->timeout = jiffies + (15 * HZ);
- fence->sequence = ++fctx->sequence;
+ if (priv->uevent)
+ fence_init(&fence->base, &nouveau_fence_ops_uevent,
+ &fctx->lock,
+ priv->context_base + chan->chid, ++fctx->sequence);
+ else
+ fence_init(&fence->base, &nouveau_fence_ops_legacy,
+ &fctx->lock,
+ priv->context_base + chan->chid, ++fctx->sequence);
+
+ trace_fence_emit(&fence->base);
ret = fctx->emit(fence);
if (!ret) {
- kref_get(&fence->kref);
- spin_lock(&fctx->lock);
+ fence_get(&fence->base);
+ spin_lock_irq(&fctx->lock);
+ nouveau_fence_update(chan, fctx);
list_add_tail(&fence->head, &fctx->pending);
- spin_unlock(&fctx->lock);
+ spin_unlock_irq(&fctx->lock);
}
return ret;
@@ -161,115 +249,71 @@
bool
nouveau_fence_done(struct nouveau_fence *fence)
{
- if (fence->channel)
- nouveau_fence_update(fence->channel);
- return !fence->channel;
-}
+ if (fence->base.ops == &nouveau_fence_ops_legacy ||
+ fence->base.ops == &nouveau_fence_ops_uevent) {
+ struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
+ unsigned long flags;
-struct nouveau_fence_wait {
- struct nouveau_fence_priv *priv;
- struct nvif_notify notify;
-};
+ if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags))
+ return true;
-static int
-nouveau_fence_wait_uevent_handler(struct nvif_notify *notify)
-{
- struct nouveau_fence_wait *wait =
- container_of(notify, typeof(*wait), notify);
- wake_up_all(&wait->priv->waiting);
- return NVIF_NOTIFY_KEEP;
-}
-
-static int
-nouveau_fence_wait_uevent(struct nouveau_fence *fence, bool intr)
-
-{
- struct nouveau_channel *chan = fence->channel;
- struct nouveau_fence_priv *priv = chan->drm->fence;
- struct nouveau_fence_wait wait = { .priv = priv };
- int ret = 0;
-
- ret = nvif_notify_init(chan->object, NULL,
- nouveau_fence_wait_uevent_handler, false,
- G82_CHANNEL_DMA_V0_NTFY_UEVENT,
- &(struct nvif_notify_uevent_req) {
- },
- sizeof(struct nvif_notify_uevent_req),
- sizeof(struct nvif_notify_uevent_rep),
- &wait.notify);
- if (ret)
- return ret;
-
- nvif_notify_get(&wait.notify);
-
- if (fence->timeout) {
- unsigned long timeout = fence->timeout - jiffies;
-
- if (time_before(jiffies, fence->timeout)) {
- if (intr) {
- ret = wait_event_interruptible_timeout(
- priv->waiting,
- nouveau_fence_done(fence),
- timeout);
- } else {
- ret = wait_event_timeout(priv->waiting,
- nouveau_fence_done(fence),
- timeout);
- }
- }
-
- if (ret >= 0) {
- fence->timeout = jiffies + ret;
- if (time_after_eq(jiffies, fence->timeout))
- ret = -EBUSY;
- }
- } else {
- if (intr) {
- ret = wait_event_interruptible(priv->waiting,
- nouveau_fence_done(fence));
- } else {
- wait_event(priv->waiting, nouveau_fence_done(fence));
- }
+ spin_lock_irqsave(&fctx->lock, flags);
+ nouveau_fence_update(fence->channel, fctx);
+ spin_unlock_irqrestore(&fctx->lock, flags);
}
-
- nvif_notify_fini(&wait.notify);
- if (unlikely(ret < 0))
- return ret;
-
- return 0;
+ return fence_is_signaled(&fence->base);
}
-int
-nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
+static long
+nouveau_fence_wait_legacy(struct fence *f, bool intr, long wait)
{
- struct nouveau_channel *chan = fence->channel;
- struct nouveau_fence_priv *priv = chan ? chan->drm->fence : NULL;
+ struct nouveau_fence *fence = from_fence(f);
unsigned long sleep_time = NSEC_PER_MSEC / 1000;
- ktime_t t;
- int ret = 0;
-
- while (priv && priv->uevent && lazy && !nouveau_fence_done(fence)) {
- ret = nouveau_fence_wait_uevent(fence, intr);
- if (ret < 0)
- return ret;
- }
+ unsigned long t = jiffies, timeout = t + wait;
while (!nouveau_fence_done(fence)) {
- if (fence->timeout && time_after_eq(jiffies, fence->timeout)) {
- ret = -EBUSY;
- break;
+ ktime_t kt;
+
+ t = jiffies;
+
+ if (wait != MAX_SCHEDULE_TIMEOUT && time_after_eq(t, timeout)) {
+ __set_current_state(TASK_RUNNING);
+ return 0;
}
__set_current_state(intr ? TASK_INTERRUPTIBLE :
TASK_UNINTERRUPTIBLE);
- if (lazy) {
- t = ktime_set(0, sleep_time);
- schedule_hrtimeout(&t, HRTIMER_MODE_REL);
- sleep_time *= 2;
- if (sleep_time > NSEC_PER_MSEC)
- sleep_time = NSEC_PER_MSEC;
+
+ kt = ktime_set(0, sleep_time);
+ schedule_hrtimeout(&kt, HRTIMER_MODE_REL);
+ sleep_time *= 2;
+ if (sleep_time > NSEC_PER_MSEC)
+ sleep_time = NSEC_PER_MSEC;
+
+ if (intr && signal_pending(current))
+ return -ERESTARTSYS;
+ }
+
+ __set_current_state(TASK_RUNNING);
+
+ return timeout - t;
+}
+
+static int
+nouveau_fence_wait_busy(struct nouveau_fence *fence, bool intr)
+{
+ int ret = 0;
+
+ while (!nouveau_fence_done(fence)) {
+ if (time_after_eq(jiffies, fence->timeout)) {
+ ret = -EBUSY;
+ break;
}
+ __set_current_state(intr ?
+ TASK_INTERRUPTIBLE :
+ TASK_UNINTERRUPTIBLE);
+
if (intr && signal_pending(current)) {
ret = -ERESTARTSYS;
break;
@@ -281,47 +325,86 @@
}
int
-nouveau_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *chan)
+nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
+{
+ long ret;
+
+ if (!lazy)
+ return nouveau_fence_wait_busy(fence, intr);
+
+ ret = fence_wait_timeout(&fence->base, intr, 15 * HZ);
+ if (ret < 0)
+ return ret;
+ else if (!ret)
+ return -EBUSY;
+ else
+ return 0;
+}
+
+int
+nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive)
{
struct nouveau_fence_chan *fctx = chan->fence;
- struct nouveau_channel *prev;
- int ret = 0;
+ struct fence *fence;
+ struct reservation_object *resv = nvbo->bo.resv;
+ struct reservation_object_list *fobj;
+ struct nouveau_fence *f;
+ int ret = 0, i;
- prev = fence ? fence->channel : NULL;
- if (prev) {
- if (unlikely(prev != chan && !nouveau_fence_done(fence))) {
- ret = fctx->sync(fence, prev, chan);
- if (unlikely(ret))
- ret = nouveau_fence_wait(fence, true, false);
- }
+ if (!exclusive) {
+ ret = reservation_object_reserve_shared(resv);
+
+ if (ret)
+ return ret;
+ }
+
+ fobj = reservation_object_get_list(resv);
+ fence = reservation_object_get_excl(resv);
+
+ if (fence && (!exclusive || !fobj || !fobj->shared_count)) {
+ struct nouveau_channel *prev = NULL;
+
+ f = nouveau_local_fence(fence, chan->drm);
+ if (f)
+ prev = f->channel;
+
+ if (!prev || (prev != chan && (ret = fctx->sync(f, prev, chan))))
+ ret = fence_wait(fence, true);
+
+ return ret;
+ }
+
+ if (!exclusive || !fobj)
+ return ret;
+
+ for (i = 0; i < fobj->shared_count && !ret; ++i) {
+ struct nouveau_channel *prev = NULL;
+
+ fence = rcu_dereference_protected(fobj->shared[i],
+ reservation_object_held(resv));
+
+ f = nouveau_local_fence(fence, chan->drm);
+ if (f)
+ prev = f->channel;
+
+ if (!prev || (ret = fctx->sync(f, prev, chan)))
+ ret = fence_wait(fence, true);
+
+ if (ret)
+ break;
}
return ret;
}
-static void
-nouveau_fence_del(struct kref *kref)
-{
- struct nouveau_fence *fence = container_of(kref, typeof(*fence), kref);
- kfree(fence);
-}
-
void
nouveau_fence_unref(struct nouveau_fence **pfence)
{
if (*pfence)
- kref_put(&(*pfence)->kref, nouveau_fence_del);
+ fence_put(&(*pfence)->base);
*pfence = NULL;
}
-struct nouveau_fence *
-nouveau_fence_ref(struct nouveau_fence *fence)
-{
- if (fence)
- kref_get(&fence->kref);
- return fence;
-}
-
int
nouveau_fence_new(struct nouveau_channel *chan, bool sysmem,
struct nouveau_fence **pfence)
@@ -336,9 +419,7 @@
if (!fence)
return -ENOMEM;
- INIT_LIST_HEAD(&fence->work);
fence->sysmem = sysmem;
- kref_init(&fence->kref);
ret = nouveau_fence_emit(fence, chan);
if (ret)
@@ -347,3 +428,92 @@
*pfence = fence;
return ret;
}
+
+static const char *nouveau_fence_get_get_driver_name(struct fence *fence)
+{
+ return "nouveau";
+}
+
+static const char *nouveau_fence_get_timeline_name(struct fence *f)
+{
+ struct nouveau_fence *fence = from_fence(f);
+ struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
+
+ return fence->channel ? fctx->name : "dead channel";
+}
+
+/*
+ * In an ideal world, read would not assume the channel context is still alive.
+ * This function may be called from another device, running into free memory as a
+ * result. The drm node should still be there, so we can derive the index from
+ * the fence context.
+ */
+static bool nouveau_fence_is_signaled(struct fence *f)
+{
+ struct nouveau_fence *fence = from_fence(f);
+ struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
+ struct nouveau_channel *chan = fence->channel;
+
+ return (int)(fctx->read(chan) - fence->base.seqno) >= 0;
+}
+
+static bool nouveau_fence_no_signaling(struct fence *f)
+{
+ struct nouveau_fence *fence = from_fence(f);
+
+ /*
+ * caller should have a reference on the fence,
+ * else fence could get freed here
+ */
+ WARN_ON(atomic_read(&fence->base.refcount.refcount) <= 1);
+
+ /*
+ * This needs uevents to work correctly, but fence_add_callback relies on
+ * being able to enable signaling. It will still get signaled eventually,
+ * just not right away.
+ */
+ if (nouveau_fence_is_signaled(f)) {
+ list_del(&fence->head);
+
+ fence_put(&fence->base);
+ return false;
+ }
+
+ return true;
+}
+
+static const struct fence_ops nouveau_fence_ops_legacy = {
+ .get_driver_name = nouveau_fence_get_get_driver_name,
+ .get_timeline_name = nouveau_fence_get_timeline_name,
+ .enable_signaling = nouveau_fence_no_signaling,
+ .signaled = nouveau_fence_is_signaled,
+ .wait = nouveau_fence_wait_legacy,
+ .release = NULL
+};
+
+static bool nouveau_fence_enable_signaling(struct fence *f)
+{
+ struct nouveau_fence *fence = from_fence(f);
+ struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
+ bool ret;
+
+ if (!fctx->notify_ref++)
+ nvif_notify_get(&fctx->notify);
+
+ ret = nouveau_fence_no_signaling(f);
+ if (ret)
+ set_bit(FENCE_FLAG_USER_BITS, &fence->base.flags);
+ else if (!--fctx->notify_ref)
+ nvif_notify_put(&fctx->notify);
+
+ return ret;
+}
+
+static const struct fence_ops nouveau_fence_ops_uevent = {
+ .get_driver_name = nouveau_fence_get_get_driver_name,
+ .get_timeline_name = nouveau_fence_get_timeline_name,
+ .enable_signaling = nouveau_fence_enable_signaling,
+ .signaled = nouveau_fence_is_signaled,
+ .wait = fence_default_wait,
+ .release = NULL
+};
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index c57bb61..986c813 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -1,33 +1,35 @@
#ifndef __NOUVEAU_FENCE_H__
#define __NOUVEAU_FENCE_H__
+#include <linux/fence.h>
+#include <nvif/notify.h>
+
struct nouveau_drm;
+struct nouveau_bo;
struct nouveau_fence {
+ struct fence base;
+
struct list_head head;
- struct list_head work;
- struct kref kref;
bool sysmem;
struct nouveau_channel *channel;
unsigned long timeout;
- u32 sequence;
};
int nouveau_fence_new(struct nouveau_channel *, bool sysmem,
struct nouveau_fence **);
-struct nouveau_fence *
-nouveau_fence_ref(struct nouveau_fence *);
void nouveau_fence_unref(struct nouveau_fence **);
int nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
bool nouveau_fence_done(struct nouveau_fence *);
-void nouveau_fence_work(struct nouveau_fence *, void (*)(void *), void *);
+void nouveau_fence_work(struct fence *, void (*)(void *), void *);
int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
-int nouveau_fence_sync(struct nouveau_fence *, struct nouveau_channel *);
+int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive);
struct nouveau_fence_chan {
+ spinlock_t lock;
struct list_head pending;
struct list_head flip;
@@ -38,8 +40,12 @@
int (*emit32)(struct nouveau_channel *, u64, u32);
int (*sync32)(struct nouveau_channel *, u64, u32);
- spinlock_t lock;
u32 sequence;
+ u32 context;
+ char name[24];
+
+ struct nvif_notify notify;
+ int notify_ref;
};
struct nouveau_fence_priv {
@@ -49,13 +55,13 @@
int (*context_new)(struct nouveau_channel *);
void (*context_del)(struct nouveau_channel *);
- wait_queue_head_t waiting;
+ u32 contexts, context_base;
bool uevent;
};
#define nouveau_fence(drm) ((struct nouveau_fence_priv *)(drm)->fence)
-void nouveau_fence_context_new(struct nouveau_fence_chan *);
+void nouveau_fence_context_new(struct nouveau_channel *, struct nouveau_fence_chan *);
void nouveau_fence_context_del(struct nouveau_fence_chan *);
int nv04_fence_create(struct nouveau_drm *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 292a677..b7dbd16 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -98,17 +98,23 @@
nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nouveau_vma *vma)
{
const bool mapped = nvbo->bo.mem.mem_type != TTM_PL_SYSTEM;
- struct nouveau_fence *fence = NULL;
+ struct reservation_object *resv = nvbo->bo.resv;
+ struct reservation_object_list *fobj;
+ struct fence *fence = NULL;
+
+ fobj = reservation_object_get_list(resv);
list_del(&vma->head);
- if (mapped) {
- spin_lock(&nvbo->bo.bdev->fence_lock);
- fence = nouveau_fence_ref(nvbo->bo.sync_obj);
- spin_unlock(&nvbo->bo.bdev->fence_lock);
- }
+ if (fobj && fobj->shared_count > 1)
+ ttm_bo_wait(&nvbo->bo, true, false, false);
+ else if (fobj && fobj->shared_count == 1)
+ fence = rcu_dereference_protected(fobj->shared[0],
+ reservation_object_held(resv));
+ else
+ fence = reservation_object_get_excl(nvbo->bo.resv);
- if (fence) {
+ if (fence && mapped) {
nouveau_fence_work(fence, nouveau_gem_object_delete, vma);
} else {
if (mapped)
@@ -116,7 +122,6 @@
nouveau_vm_put(vma);
kfree(vma);
}
- nouveau_fence_unref(&fence);
}
void
@@ -288,24 +293,23 @@
}
struct validate_op {
- struct list_head vram_list;
- struct list_head gart_list;
- struct list_head both_list;
+ struct list_head list;
struct ww_acquire_ctx ticket;
};
static void
-validate_fini_list(struct list_head *list, struct nouveau_fence *fence,
- struct ww_acquire_ctx *ticket)
+validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence,
+ struct drm_nouveau_gem_pushbuf_bo *pbbo)
{
- struct list_head *entry, *tmp;
struct nouveau_bo *nvbo;
+ struct drm_nouveau_gem_pushbuf_bo *b;
- list_for_each_safe(entry, tmp, list) {
- nvbo = list_entry(entry, struct nouveau_bo, entry);
+ while (!list_empty(&op->list)) {
+ nvbo = list_entry(op->list.next, struct nouveau_bo, entry);
+ b = &pbbo[nvbo->pbbo_index];
if (likely(fence))
- nouveau_bo_fence(nvbo, fence);
+ nouveau_bo_fence(nvbo, fence, !!b->write_domains);
if (unlikely(nvbo->validate_mapped)) {
ttm_bo_kunmap(&nvbo->kmap);
@@ -314,23 +318,16 @@
list_del(&nvbo->entry);
nvbo->reserved_by = NULL;
- ttm_bo_unreserve_ticket(&nvbo->bo, ticket);
+ ttm_bo_unreserve_ticket(&nvbo->bo, &op->ticket);
drm_gem_object_unreference_unlocked(&nvbo->gem);
}
}
static void
-validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence)
+validate_fini(struct validate_op *op, struct nouveau_fence *fence,
+ struct drm_nouveau_gem_pushbuf_bo *pbbo)
{
- validate_fini_list(&op->vram_list, fence, &op->ticket);
- validate_fini_list(&op->gart_list, fence, &op->ticket);
- validate_fini_list(&op->both_list, fence, &op->ticket);
-}
-
-static void
-validate_fini(struct validate_op *op, struct nouveau_fence *fence)
-{
- validate_fini_no_ticket(op, fence);
+ validate_fini_no_ticket(op, fence, pbbo);
ww_acquire_fini(&op->ticket);
}
@@ -344,6 +341,9 @@
int trycnt = 0;
int ret, i;
struct nouveau_bo *res_bo = NULL;
+ LIST_HEAD(gart_list);
+ LIST_HEAD(vram_list);
+ LIST_HEAD(both_list);
ww_acquire_init(&op->ticket, &reservation_ww_class);
retry:
@@ -360,9 +360,8 @@
gem = drm_gem_object_lookup(dev, file_priv, b->handle);
if (!gem) {
NV_PRINTK(error, cli, "Unknown handle 0x%08x\n", b->handle);
- ww_acquire_done(&op->ticket);
- validate_fini(op, NULL);
- return -ENOENT;
+ ret = -ENOENT;
+ break;
}
nvbo = nouveau_gem_object(gem);
if (nvbo == res_bo) {
@@ -375,14 +374,16 @@
NV_PRINTK(error, cli, "multiple instances of buffer %d on "
"validation list\n", b->handle);
drm_gem_object_unreference_unlocked(gem);
- ww_acquire_done(&op->ticket);
- validate_fini(op, NULL);
- return -EINVAL;
+ ret = -EINVAL;
+ break;
}
ret = ttm_bo_reserve(&nvbo->bo, true, false, true, &op->ticket);
if (ret) {
- validate_fini_no_ticket(op, NULL);
+ list_splice_tail_init(&vram_list, &op->list);
+ list_splice_tail_init(&gart_list, &op->list);
+ list_splice_tail_init(&both_list, &op->list);
+ validate_fini_no_ticket(op, NULL, NULL);
if (unlikely(ret == -EDEADLK)) {
ret = ttm_bo_reserve_slowpath(&nvbo->bo, true,
&op->ticket);
@@ -390,12 +391,9 @@
res_bo = nvbo;
}
if (unlikely(ret)) {
- ww_acquire_done(&op->ticket);
- ww_acquire_fini(&op->ticket);
- drm_gem_object_unreference_unlocked(gem);
if (ret != -ERESTARTSYS)
NV_PRINTK(error, cli, "fail reserve\n");
- return ret;
+ break;
}
}
@@ -404,45 +402,32 @@
nvbo->pbbo_index = i;
if ((b->valid_domains & NOUVEAU_GEM_DOMAIN_VRAM) &&
(b->valid_domains & NOUVEAU_GEM_DOMAIN_GART))
- list_add_tail(&nvbo->entry, &op->both_list);
+ list_add_tail(&nvbo->entry, &both_list);
else
if (b->valid_domains & NOUVEAU_GEM_DOMAIN_VRAM)
- list_add_tail(&nvbo->entry, &op->vram_list);
+ list_add_tail(&nvbo->entry, &vram_list);
else
if (b->valid_domains & NOUVEAU_GEM_DOMAIN_GART)
- list_add_tail(&nvbo->entry, &op->gart_list);
+ list_add_tail(&nvbo->entry, &gart_list);
else {
NV_PRINTK(error, cli, "invalid valid domains: 0x%08x\n",
b->valid_domains);
- list_add_tail(&nvbo->entry, &op->both_list);
- ww_acquire_done(&op->ticket);
- validate_fini(op, NULL);
- return -EINVAL;
+ list_add_tail(&nvbo->entry, &both_list);
+ ret = -EINVAL;
+ break;
}
if (nvbo == res_bo)
goto retry;
}
ww_acquire_done(&op->ticket);
- return 0;
-}
-
-static int
-validate_sync(struct nouveau_channel *chan, struct nouveau_bo *nvbo)
-{
- struct nouveau_fence *fence = NULL;
- int ret = 0;
-
- spin_lock(&nvbo->bo.bdev->fence_lock);
- fence = nouveau_fence_ref(nvbo->bo.sync_obj);
- spin_unlock(&nvbo->bo.bdev->fence_lock);
-
- if (fence) {
- ret = nouveau_fence_sync(fence, chan);
- nouveau_fence_unref(&fence);
- }
-
+ list_splice_tail(&vram_list, &op->list);
+ list_splice_tail(&gart_list, &op->list);
+ list_splice_tail(&both_list, &op->list);
+ if (ret)
+ validate_fini(op, NULL, NULL);
return ret;
+
}
static int
@@ -474,9 +459,10 @@
return ret;
}
- ret = validate_sync(chan, nvbo);
+ ret = nouveau_fence_sync(nvbo, chan, !!b->write_domains);
if (unlikely(ret)) {
- NV_PRINTK(error, cli, "fail post-validate sync\n");
+ if (ret != -ERESTARTSYS)
+ NV_PRINTK(error, cli, "fail post-validate sync\n");
return ret;
}
@@ -513,11 +499,9 @@
struct validate_op *op, int *apply_relocs)
{
struct nouveau_cli *cli = nouveau_cli(file_priv);
- int ret, relocs = 0;
+ int ret;
- INIT_LIST_HEAD(&op->vram_list);
- INIT_LIST_HEAD(&op->gart_list);
- INIT_LIST_HEAD(&op->both_list);
+ INIT_LIST_HEAD(&op->list);
if (nr_buffers == 0)
return 0;
@@ -529,34 +513,14 @@
return ret;
}
- ret = validate_list(chan, cli, &op->vram_list, pbbo, user_buffers);
+ ret = validate_list(chan, cli, &op->list, pbbo, user_buffers);
if (unlikely(ret < 0)) {
if (ret != -ERESTARTSYS)
- NV_PRINTK(error, cli, "validate vram_list\n");
- validate_fini(op, NULL);
+ NV_PRINTK(error, cli, "validating bo list\n");
+ validate_fini(op, NULL, NULL);
return ret;
}
- relocs += ret;
-
- ret = validate_list(chan, cli, &op->gart_list, pbbo, user_buffers);
- if (unlikely(ret < 0)) {
- if (ret != -ERESTARTSYS)
- NV_PRINTK(error, cli, "validate gart_list\n");
- validate_fini(op, NULL);
- return ret;
- }
- relocs += ret;
-
- ret = validate_list(chan, cli, &op->both_list, pbbo, user_buffers);
- if (unlikely(ret < 0)) {
- if (ret != -ERESTARTSYS)
- NV_PRINTK(error, cli, "validate both_list\n");
- validate_fini(op, NULL);
- return ret;
- }
- relocs += ret;
-
- *apply_relocs = relocs;
+ *apply_relocs = ret;
return 0;
}
@@ -659,9 +623,7 @@
data |= r->vor;
}
- spin_lock(&nvbo->bo.bdev->fence_lock);
- ret = ttm_bo_wait(&nvbo->bo, false, false, false);
- spin_unlock(&nvbo->bo.bdev->fence_lock);
+ ret = ttm_bo_wait(&nvbo->bo, true, false, false);
if (ret) {
NV_PRINTK(error, cli, "reloc wait_idle failed: %d\n", ret);
break;
@@ -839,7 +801,7 @@
}
out:
- validate_fini(&op, fence);
+ validate_fini(&op, fence, bo);
nouveau_fence_unref(&fence);
out_prevalid:
@@ -884,17 +846,29 @@
struct drm_gem_object *gem;
struct nouveau_bo *nvbo;
bool no_wait = !!(req->flags & NOUVEAU_GEM_CPU_PREP_NOWAIT);
- int ret = -EINVAL;
+ bool write = !!(req->flags & NOUVEAU_GEM_CPU_PREP_WRITE);
+ int ret;
gem = drm_gem_object_lookup(dev, file_priv, req->handle);
if (!gem)
return -ENOENT;
nvbo = nouveau_gem_object(gem);
- spin_lock(&nvbo->bo.bdev->fence_lock);
- ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait);
- spin_unlock(&nvbo->bo.bdev->fence_lock);
+ if (no_wait)
+ ret = reservation_object_test_signaled_rcu(nvbo->bo.resv, write) ? 0 : -EBUSY;
+ else {
+ long lret;
+
+ lret = reservation_object_wait_timeout_rcu(nvbo->bo.resv, write, true, 30 * HZ);
+ if (!lret)
+ ret = -EBUSY;
+ else if (lret > 0)
+ ret = 0;
+ else
+ ret = lret;
+ }
drm_gem_object_unreference_unlocked(gem);
+
return ret;
}
diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c
index 239c2c5a..4484131 100644
--- a/drivers/gpu/drm/nouveau/nv04_fence.c
+++ b/drivers/gpu/drm/nouveau/nv04_fence.c
@@ -41,7 +41,7 @@
int ret = RING_SPACE(chan, 2);
if (ret == 0) {
BEGIN_NV04(chan, NvSubSw, 0x0150, 1);
- OUT_RING (chan, fence->sequence);
+ OUT_RING (chan, fence->base.seqno);
FIRE_RING (chan);
}
return ret;
@@ -75,7 +75,7 @@
{
struct nv04_fence_chan *fctx = kzalloc(sizeof(*fctx), GFP_KERNEL);
if (fctx) {
- nouveau_fence_context_new(&fctx->base);
+ nouveau_fence_context_new(chan, &fctx->base);
fctx->base.emit = nv04_fence_emit;
fctx->base.sync = nv04_fence_sync;
fctx->base.read = nv04_fence_read;
@@ -105,5 +105,7 @@
priv->base.dtor = nv04_fence_destroy;
priv->base.context_new = nv04_fence_context_new;
priv->base.context_del = nv04_fence_context_del;
+ priv->base.contexts = 15;
+ priv->base.context_base = fence_context_alloc(priv->base.contexts);
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nv10_fence.c b/drivers/gpu/drm/nouveau/nv10_fence.c
index 4faaf0a..737d066 100644
--- a/drivers/gpu/drm/nouveau/nv10_fence.c
+++ b/drivers/gpu/drm/nouveau/nv10_fence.c
@@ -33,7 +33,7 @@
int ret = RING_SPACE(chan, 2);
if (ret == 0) {
BEGIN_NV04(chan, 0, NV10_SUBCHAN_REF_CNT, 1);
- OUT_RING (chan, fence->sequence);
+ OUT_RING (chan, fence->base.seqno);
FIRE_RING (chan);
}
return ret;
@@ -75,7 +75,7 @@
if (!fctx)
return -ENOMEM;
- nouveau_fence_context_new(&fctx->base);
+ nouveau_fence_context_new(chan, &fctx->base);
fctx->base.emit = nv10_fence_emit;
fctx->base.read = nv10_fence_read;
fctx->base.sync = nv10_fence_sync;
@@ -106,6 +106,8 @@
priv->base.dtor = nv10_fence_destroy;
priv->base.context_new = nv10_fence_context_new;
priv->base.context_del = nv10_fence_context_del;
+ priv->base.contexts = 31;
+ priv->base.context_base = fence_context_alloc(priv->base.contexts);
spin_lock_init(&priv->lock);
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nv17_fence.c b/drivers/gpu/drm/nouveau/nv17_fence.c
index ca90747..6f9a1f8 100644
--- a/drivers/gpu/drm/nouveau/nv17_fence.c
+++ b/drivers/gpu/drm/nouveau/nv17_fence.c
@@ -84,7 +84,7 @@
if (!fctx)
return -ENOMEM;
- nouveau_fence_context_new(&fctx->base);
+ nouveau_fence_context_new(chan, &fctx->base);
fctx->base.emit = nv10_fence_emit;
fctx->base.read = nv10_fence_read;
fctx->base.sync = nv17_fence_sync;
@@ -124,6 +124,8 @@
priv->base.resume = nv17_fence_resume;
priv->base.context_new = nv17_fence_context_new;
priv->base.context_del = nv10_fence_context_del;
+ priv->base.contexts = 31;
+ priv->base.context_base = fence_context_alloc(priv->base.contexts);
spin_lock_init(&priv->lock);
ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
diff --git a/drivers/gpu/drm/nouveau/nv50_fence.c b/drivers/gpu/drm/nouveau/nv50_fence.c
index 195cf51..08fad36 100644
--- a/drivers/gpu/drm/nouveau/nv50_fence.c
+++ b/drivers/gpu/drm/nouveau/nv50_fence.c
@@ -46,7 +46,7 @@
if (!fctx)
return -ENOMEM;
- nouveau_fence_context_new(&fctx->base);
+ nouveau_fence_context_new(chan, &fctx->base);
fctx->base.emit = nv10_fence_emit;
fctx->base.read = nv10_fence_read;
fctx->base.sync = nv17_fence_sync;
@@ -95,6 +95,8 @@
priv->base.resume = nv17_fence_resume;
priv->base.context_new = nv50_fence_context_new;
priv->base.context_del = nv10_fence_context_del;
+ priv->base.contexts = 127;
+ priv->base.context_base = fence_context_alloc(priv->base.contexts);
spin_lock_init(&priv->lock);
ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c
index 933a779..a2f2808 100644
--- a/drivers/gpu/drm/nouveau/nv84_fence.c
+++ b/drivers/gpu/drm/nouveau/nv84_fence.c
@@ -82,7 +82,7 @@
else
addr += fctx->vma.offset;
- return fctx->base.emit32(chan, addr, fence->sequence);
+ return fctx->base.emit32(chan, addr, fence->base.seqno);
}
static int
@@ -97,7 +97,7 @@
else
addr += fctx->vma.offset;
- return fctx->base.sync32(chan, addr, fence->sequence);
+ return fctx->base.sync32(chan, addr, fence->base.seqno);
}
static u32
@@ -139,12 +139,13 @@
if (!fctx)
return -ENOMEM;
- nouveau_fence_context_new(&fctx->base);
+ nouveau_fence_context_new(chan, &fctx->base);
fctx->base.emit = nv84_fence_emit;
fctx->base.sync = nv84_fence_sync;
fctx->base.read = nv84_fence_read;
fctx->base.emit32 = nv84_fence_emit32;
fctx->base.sync32 = nv84_fence_sync32;
+ fctx->base.sequence = nv84_fence_read(chan);
ret = nouveau_bo_vma_add(priv->bo, cli->vm, &fctx->vma);
if (ret == 0) {
@@ -168,13 +169,12 @@
static bool
nv84_fence_suspend(struct nouveau_drm *drm)
{
- struct nouveau_fifo *pfifo = nvkm_fifo(&drm->device);
struct nv84_fence_priv *priv = drm->fence;
int i;
- priv->suspend = vmalloc((pfifo->max + 1) * sizeof(u32));
+ priv->suspend = vmalloc(priv->base.contexts * sizeof(u32));
if (priv->suspend) {
- for (i = 0; i <= pfifo->max; i++)
+ for (i = 0; i < priv->base.contexts; i++)
priv->suspend[i] = nouveau_bo_rd32(priv->bo, i*4);
}
@@ -184,12 +184,11 @@
static void
nv84_fence_resume(struct nouveau_drm *drm)
{
- struct nouveau_fifo *pfifo = nvkm_fifo(&drm->device);
struct nv84_fence_priv *priv = drm->fence;
int i;
if (priv->suspend) {
- for (i = 0; i <= pfifo->max; i++)
+ for (i = 0; i < priv->base.contexts; i++)
nouveau_bo_wr32(priv->bo, i*4, priv->suspend[i]);
vfree(priv->suspend);
priv->suspend = NULL;
@@ -229,10 +228,11 @@
priv->base.context_new = nv84_fence_context_new;
priv->base.context_del = nv84_fence_context_del;
- init_waitqueue_head(&priv->base.waiting);
+ priv->base.contexts = pfifo->max + 1;
+ priv->base.context_base = fence_context_alloc(priv->base.contexts);
priv->base.uevent = true;
- ret = nouveau_bo_new(drm->dev, 16 * (pfifo->max + 1), 0,
+ ret = nouveau_bo_new(drm->dev, 16 * priv->base.contexts, 0,
TTM_PL_FLAG_VRAM, 0, 0, NULL, &priv->bo);
if (ret == 0) {
ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM);
@@ -246,7 +246,7 @@
}
if (ret == 0)
- ret = nouveau_bo_new(drm->dev, 16 * (pfifo->max + 1), 0,
+ ret = nouveau_bo_new(drm->dev, 16 * priv->base.contexts, 0,
TTM_PL_FLAG_TT, 0, 0, NULL,
&priv->bo_gart);
if (ret == 0) {
diff --git a/drivers/gpu/drm/qxl/Makefile b/drivers/gpu/drm/qxl/Makefile
index ea046ba..ac0d748 100644
--- a/drivers/gpu/drm/qxl/Makefile
+++ b/drivers/gpu/drm/qxl/Makefile
@@ -4,6 +4,6 @@
ccflags-y := -Iinclude/drm
-qxl-y := qxl_drv.o qxl_kms.o qxl_display.o qxl_ttm.o qxl_fb.o qxl_object.o qxl_gem.o qxl_cmd.o qxl_image.o qxl_draw.o qxl_debugfs.o qxl_irq.o qxl_dumb.o qxl_ioctl.o qxl_fence.o qxl_release.o
+qxl-y := qxl_drv.o qxl_kms.o qxl_display.o qxl_ttm.o qxl_fb.o qxl_object.o qxl_gem.o qxl_cmd.o qxl_image.o qxl_draw.o qxl_debugfs.o qxl_irq.o qxl_dumb.o qxl_ioctl.o qxl_release.o
obj-$(CONFIG_DRM_QXL)+= qxl.o
diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c
index eb89653..9782364 100644
--- a/drivers/gpu/drm/qxl/qxl_cmd.c
+++ b/drivers/gpu/drm/qxl/qxl_cmd.c
@@ -620,17 +620,10 @@
if (ret == -EBUSY)
return -EBUSY;
- if (surf->fence.num_active_releases > 0 && stall == false) {
- qxl_bo_unreserve(surf);
- return -EBUSY;
- }
-
if (stall)
mutex_unlock(&qdev->surf_evict_mutex);
- spin_lock(&surf->tbo.bdev->fence_lock);
ret = ttm_bo_wait(&surf->tbo, true, true, !stall);
- spin_unlock(&surf->tbo.bdev->fence_lock);
if (stall)
mutex_lock(&qdev->surf_evict_mutex);
diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c
index c3c2bbd..a4a63fd 100644
--- a/drivers/gpu/drm/qxl/qxl_debugfs.c
+++ b/drivers/gpu/drm/qxl/qxl_debugfs.c
@@ -57,11 +57,21 @@
struct qxl_device *qdev = node->minor->dev->dev_private;
struct qxl_bo *bo;
+ spin_lock(&qdev->release_lock);
list_for_each_entry(bo, &qdev->gem.objects, list) {
- seq_printf(m, "size %ld, pc %d, sync obj %p, num releases %d\n",
- (unsigned long)bo->gem_base.size, bo->pin_count,
- bo->tbo.sync_obj, bo->fence.num_active_releases);
+ struct reservation_object_list *fobj;
+ int rel;
+
+ rcu_read_lock();
+ fobj = rcu_dereference(bo->tbo.resv->fence);
+ rel = fobj ? fobj->shared_count : 0;
+ rcu_read_unlock();
+
+ seq_printf(m, "size %ld, pc %d, num releases %d\n",
+ (unsigned long)bo->gem_base.size,
+ bo->pin_count, rel);
}
+ spin_unlock(&qdev->release_lock);
return 0;
}
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index f6022b7..a8be876 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -31,6 +31,7 @@
* Definitions taken from spice-protocol, plus kernel driver specific bits.
*/
+#include <linux/fence.h>
#include <linux/workqueue.h>
#include <linux/firmware.h>
#include <linux/platform_device.h>
@@ -95,13 +96,6 @@
QXL_INTERRUPT_IO_CMD |\
QXL_INTERRUPT_CLIENT_MONITORS_CONFIG)
-struct qxl_fence {
- struct qxl_device *qdev;
- uint32_t num_active_releases;
- uint32_t *release_ids;
- struct radix_tree_root tree;
-};
-
struct qxl_bo {
/* Protected by gem.mutex */
struct list_head list;
@@ -113,13 +107,13 @@
unsigned pin_count;
void *kptr;
int type;
+
/* Constant after initialization */
struct drm_gem_object gem_base;
bool is_primary; /* is this now a primary surface */
bool hw_surf_alloc;
struct qxl_surface surf;
uint32_t surface_id;
- struct qxl_fence fence; /* per bo fence - list of releases */
struct qxl_release *surf_create;
};
#define gem_to_qxl_bo(gobj) container_of((gobj), struct qxl_bo, gem_base)
@@ -191,6 +185,8 @@
* spice-protocol/qxl_dev.h */
#define QXL_MAX_RES 96
struct qxl_release {
+ struct fence base;
+
int id;
int type;
uint32_t release_offset;
@@ -284,7 +280,9 @@
uint8_t slot_gen_bits;
uint64_t va_slot_mask;
+ spinlock_t release_lock;
struct idr release_idr;
+ uint32_t release_seqno;
spinlock_t release_idr_lock;
struct mutex async_io_mutex;
unsigned int last_sent_io_cmd;
@@ -561,10 +559,4 @@
void qxl_surface_evict(struct qxl_device *qdev, struct qxl_bo *surf, bool freeing);
int qxl_update_surface(struct qxl_device *qdev, struct qxl_bo *surf);
-/* qxl_fence.c */
-void qxl_fence_add_release_locked(struct qxl_fence *qfence, uint32_t rel_id);
-int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id);
-int qxl_fence_init(struct qxl_device *qdev, struct qxl_fence *qfence);
-void qxl_fence_fini(struct qxl_fence *qfence);
-
#endif
diff --git a/drivers/gpu/drm/qxl/qxl_fence.c b/drivers/gpu/drm/qxl/qxl_fence.c
deleted file mode 100644
index ae59e91..0000000
--- a/drivers/gpu/drm/qxl/qxl_fence.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright 2013 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Dave Airlie
- * Alon Levy
- */
-
-
-#include "qxl_drv.h"
-
-/* QXL fencing-
-
- When we submit operations to the GPU we pass a release reference to the GPU
- with them, the release reference is then added to the release ring when
- the GPU is finished with that particular operation and has removed it from
- its tree.
-
- So we have can have multiple outstanding non linear fences per object.
-
- From a TTM POV we only care if the object has any outstanding releases on
- it.
-
- we wait until all outstanding releases are processeed.
-
- sync object is just a list of release ids that represent that fence on
- that buffer.
-
- we just add new releases onto the sync object attached to the object.
-
- This currently uses a radix tree to store the list of release ids.
-
- For some reason every so often qxl hw fails to release, things go wrong.
-*/
-/* must be called with the fence lock held */
-void qxl_fence_add_release_locked(struct qxl_fence *qfence, uint32_t rel_id)
-{
- radix_tree_insert(&qfence->tree, rel_id, qfence);
- qfence->num_active_releases++;
-}
-
-int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id)
-{
- void *ret;
- int retval = 0;
- struct qxl_bo *bo = container_of(qfence, struct qxl_bo, fence);
-
- spin_lock(&bo->tbo.bdev->fence_lock);
-
- ret = radix_tree_delete(&qfence->tree, rel_id);
- if (ret == qfence)
- qfence->num_active_releases--;
- else {
- DRM_DEBUG("didn't find fence in radix tree for %d\n", rel_id);
- retval = -ENOENT;
- }
- spin_unlock(&bo->tbo.bdev->fence_lock);
- return retval;
-}
-
-
-int qxl_fence_init(struct qxl_device *qdev, struct qxl_fence *qfence)
-{
- qfence->qdev = qdev;
- qfence->num_active_releases = 0;
- INIT_RADIX_TREE(&qfence->tree, GFP_ATOMIC);
- return 0;
-}
-
-void qxl_fence_fini(struct qxl_fence *qfence)
-{
- kfree(qfence->release_ids);
- qfence->num_active_releases = 0;
-}
diff --git a/drivers/gpu/drm/qxl/qxl_kms.c b/drivers/gpu/drm/qxl/qxl_kms.c
index fd88eb4..7234561 100644
--- a/drivers/gpu/drm/qxl/qxl_kms.c
+++ b/drivers/gpu/drm/qxl/qxl_kms.c
@@ -223,6 +223,7 @@
idr_init(&qdev->release_idr);
spin_lock_init(&qdev->release_idr_lock);
+ spin_lock_init(&qdev->release_lock);
idr_init(&qdev->surf_id_idr);
spin_lock_init(&qdev->surf_id_idr_lock);
diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c
index adad12d..69c104c 100644
--- a/drivers/gpu/drm/qxl/qxl_object.c
+++ b/drivers/gpu/drm/qxl/qxl_object.c
@@ -36,7 +36,6 @@
qdev = (struct qxl_device *)bo->gem_base.dev->dev_private;
qxl_surface_evict(qdev, bo, false);
- qxl_fence_fini(&bo->fence);
mutex_lock(&qdev->gem.mutex);
list_del_init(&bo->list);
mutex_unlock(&qdev->gem.mutex);
@@ -102,7 +101,6 @@
bo->type = domain;
bo->pin_count = pinned ? 1 : 0;
bo->surface_id = 0;
- qxl_fence_init(qdev, &bo->fence);
INIT_LIST_HEAD(&bo->list);
if (surf)
diff --git a/drivers/gpu/drm/qxl/qxl_object.h b/drivers/gpu/drm/qxl/qxl_object.h
index 83a4232..37af1bc 100644
--- a/drivers/gpu/drm/qxl/qxl_object.h
+++ b/drivers/gpu/drm/qxl/qxl_object.h
@@ -76,12 +76,10 @@
}
return r;
}
- spin_lock(&bo->tbo.bdev->fence_lock);
if (mem_type)
*mem_type = bo->tbo.mem.mem_type;
- if (bo->tbo.sync_obj)
- r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
- spin_unlock(&bo->tbo.bdev->fence_lock);
+
+ r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
ttm_bo_unreserve(&bo->tbo);
return r;
}
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 14e776f..15158c5 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -21,6 +21,7 @@
*/
#include "qxl_drv.h"
#include "qxl_object.h"
+#include <trace/events/fence.h>
/*
* drawable cmd cache - allocate a bunch of VRAM pages, suballocate
@@ -39,6 +40,88 @@
static const int release_size_per_bo[] = { RELEASE_SIZE, SURFACE_RELEASE_SIZE, RELEASE_SIZE };
static const int releases_per_bo[] = { RELEASES_PER_BO, SURFACE_RELEASES_PER_BO, RELEASES_PER_BO };
+static const char *qxl_get_driver_name(struct fence *fence)
+{
+ return "qxl";
+}
+
+static const char *qxl_get_timeline_name(struct fence *fence)
+{
+ return "release";
+}
+
+static bool qxl_nop_signaling(struct fence *fence)
+{
+ /* fences are always automatically signaled, so just pretend we did this.. */
+ return true;
+}
+
+static long qxl_fence_wait(struct fence *fence, bool intr, signed long timeout)
+{
+ struct qxl_device *qdev;
+ struct qxl_release *release;
+ int count = 0, sc = 0;
+ bool have_drawable_releases;
+ unsigned long cur, end = jiffies + timeout;
+
+ qdev = container_of(fence->lock, struct qxl_device, release_lock);
+ release = container_of(fence, struct qxl_release, base);
+ have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE;
+
+retry:
+ sc++;
+
+ if (fence_is_signaled_locked(fence))
+ goto signaled;
+
+ qxl_io_notify_oom(qdev);
+
+ for (count = 0; count < 11; count++) {
+ if (!qxl_queue_garbage_collect(qdev, true))
+ break;
+
+ if (fence_is_signaled_locked(fence))
+ goto signaled;
+ }
+
+ if (fence_is_signaled_locked(fence))
+ goto signaled;
+
+ if (have_drawable_releases || sc < 4) {
+ if (sc > 2)
+ /* back off */
+ usleep_range(500, 1000);
+
+ if (time_after(jiffies, end))
+ return 0;
+
+ if (have_drawable_releases && sc > 300) {
+ FENCE_WARN(fence, "failed to wait on release %d "
+ "after spincount %d\n",
+ fence->context & ~0xf0000000, sc);
+ goto signaled;
+ }
+ goto retry;
+ }
+ /*
+ * yeah, original sync_obj_wait gave up after 3 spins when
+ * have_drawable_releases is not set.
+ */
+
+signaled:
+ cur = jiffies;
+ if (time_after(cur, end))
+ return 0;
+ return end - cur;
+}
+
+static const struct fence_ops qxl_fence_ops = {
+ .get_driver_name = qxl_get_driver_name,
+ .get_timeline_name = qxl_get_timeline_name,
+ .enable_signaling = qxl_nop_signaling,
+ .wait = qxl_fence_wait,
+};
+
static uint64_t
qxl_release_alloc(struct qxl_device *qdev, int type,
struct qxl_release **ret)
@@ -46,13 +129,13 @@
struct qxl_release *release;
int handle;
size_t size = sizeof(*release);
- int idr_ret;
release = kmalloc(size, GFP_KERNEL);
if (!release) {
DRM_ERROR("Out of memory\n");
return 0;
}
+ release->base.ops = NULL;
release->type = type;
release->release_offset = 0;
release->surface_release_id = 0;
@@ -60,44 +143,59 @@
idr_preload(GFP_KERNEL);
spin_lock(&qdev->release_idr_lock);
- idr_ret = idr_alloc(&qdev->release_idr, release, 1, 0, GFP_NOWAIT);
+ handle = idr_alloc(&qdev->release_idr, release, 1, 0, GFP_NOWAIT);
+ release->base.seqno = ++qdev->release_seqno;
spin_unlock(&qdev->release_idr_lock);
idr_preload_end();
- handle = idr_ret;
- if (idr_ret < 0)
- goto release_fail;
+ if (handle < 0) {
+ kfree(release);
+ *ret = NULL;
+ return handle;
+ }
*ret = release;
QXL_INFO(qdev, "allocated release %lld\n", handle);
release->id = handle;
-release_fail:
-
return handle;
}
+static void
+qxl_release_free_list(struct qxl_release *release)
+{
+ while (!list_empty(&release->bos)) {
+ struct ttm_validate_buffer *entry;
+
+ entry = container_of(release->bos.next,
+ struct ttm_validate_buffer, head);
+
+ list_del(&entry->head);
+ kfree(entry);
+ }
+}
+
void
qxl_release_free(struct qxl_device *qdev,
struct qxl_release *release)
{
- struct qxl_bo_list *entry, *tmp;
QXL_INFO(qdev, "release %d, type %d\n", release->id,
release->type);
if (release->surface_release_id)
qxl_surface_id_dealloc(qdev, release->surface_release_id);
- list_for_each_entry_safe(entry, tmp, &release->bos, tv.head) {
- struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
- QXL_INFO(qdev, "release %llx\n",
- drm_vma_node_offset_addr(&entry->tv.bo->vma_node)
- - DRM_FILE_OFFSET);
- qxl_fence_remove_release(&bo->fence, release->id);
- qxl_bo_unref(&bo);
- kfree(entry);
- }
spin_lock(&qdev->release_idr_lock);
idr_remove(&qdev->release_idr, release->id);
spin_unlock(&qdev->release_idr_lock);
- kfree(release);
+
+ if (release->base.ops) {
+ WARN_ON(list_empty(&release->bos));
+ qxl_release_free_list(release);
+
+ fence_signal(&release->base);
+ fence_put(&release->base);
+ } else {
+ qxl_release_free_list(release);
+ kfree(release);
+ }
}
static int qxl_release_bo_alloc(struct qxl_device *qdev,
@@ -142,6 +240,10 @@
return ret;
}
+ ret = reservation_object_reserve_shared(bo->tbo.resv);
+ if (ret)
+ return ret;
+
/* allocate a surface for reserved + validated buffers */
ret = qxl_bo_check_id(bo->gem_base.dev->dev_private, bo);
if (ret)
@@ -159,7 +261,7 @@
if (list_is_singular(&release->bos))
return 0;
- ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos);
+ ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos, !no_intr);
if (ret)
return ret;
@@ -199,6 +301,8 @@
/* stash the release after the create command */
idr_ret = qxl_release_alloc(qdev, QXL_RELEASE_SURFACE_CMD, release);
+ if (idr_ret < 0)
+ return idr_ret;
bo = qxl_bo_ref(to_qxl_bo(entry->tv.bo));
(*release)->release_offset = create_rel->release_offset + 64;
@@ -239,6 +343,11 @@
}
idr_ret = qxl_release_alloc(qdev, type, release);
+ if (idr_ret < 0) {
+ if (rbo)
+ *rbo = NULL;
+ return idr_ret;
+ }
mutex_lock(&qdev->release_mutex);
if (qdev->current_release_bo_offset[cur_idx] + 1 >= releases_per_bo[cur_idx]) {
@@ -319,12 +428,13 @@
void qxl_release_fence_buffer_objects(struct qxl_release *release)
{
- struct ttm_validate_buffer *entry;
struct ttm_buffer_object *bo;
struct ttm_bo_global *glob;
struct ttm_bo_device *bdev;
struct ttm_bo_driver *driver;
struct qxl_bo *qbo;
+ struct ttm_validate_buffer *entry;
+ struct qxl_device *qdev;
/* if only one object on the release its the release itself
since these objects are pinned no need to reserve */
@@ -333,26 +443,32 @@
bo = list_first_entry(&release->bos, struct ttm_validate_buffer, head)->bo;
bdev = bo->bdev;
+ qdev = container_of(bdev, struct qxl_device, mman.bdev);
+
+ /*
+ * Since we never really allocated a context and we don't want to conflict,
+ * set the highest bits. This will break if we really allow exporting of dma-bufs.
+ */
+ fence_init(&release->base, &qxl_fence_ops, &qdev->release_lock,
+ release->id | 0xf0000000, release->base.seqno);
+ trace_fence_emit(&release->base);
+
driver = bdev->driver;
glob = bo->glob;
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->fence_lock);
+ /* acquire release_lock to protect bo->resv->fence and its contents */
+ spin_lock(&qdev->release_lock);
list_for_each_entry(entry, &release->bos, head) {
bo = entry->bo;
qbo = to_qxl_bo(bo);
- if (!entry->bo->sync_obj)
- entry->bo->sync_obj = &qbo->fence;
-
- qxl_fence_add_release_locked(&qbo->fence, release->id);
-
+ reservation_object_add_shared_fence(bo->resv, &release->base);
ttm_bo_add_to_lru(bo);
__ttm_bo_unreserve(bo);
- entry->reserved = false;
}
- spin_unlock(&bdev->fence_lock);
+ spin_unlock(&qdev->release_lock);
spin_unlock(&glob->lru_lock);
ww_acquire_fini(&release->ticket);
}
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index f66c59b..abe945a 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -357,92 +357,6 @@
return ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
}
-
-static int qxl_sync_obj_wait(void *sync_obj,
- bool lazy, bool interruptible)
-{
- struct qxl_fence *qfence = (struct qxl_fence *)sync_obj;
- int count = 0, sc = 0;
- struct qxl_bo *bo = container_of(qfence, struct qxl_bo, fence);
-
- if (qfence->num_active_releases == 0)
- return 0;
-
-retry:
- if (sc == 0) {
- if (bo->type == QXL_GEM_DOMAIN_SURFACE)
- qxl_update_surface(qfence->qdev, bo);
- } else if (sc >= 1) {
- qxl_io_notify_oom(qfence->qdev);
- }
-
- sc++;
-
- for (count = 0; count < 10; count++) {
- bool ret;
- ret = qxl_queue_garbage_collect(qfence->qdev, true);
- if (ret == false)
- break;
-
- if (qfence->num_active_releases == 0)
- return 0;
- }
-
- if (qfence->num_active_releases) {
- bool have_drawable_releases = false;
- void **slot;
- struct radix_tree_iter iter;
- int release_id;
-
- radix_tree_for_each_slot(slot, &qfence->tree, &iter, 0) {
- struct qxl_release *release;
-
- release_id = iter.index;
- release = qxl_release_from_id_locked(qfence->qdev, release_id);
- if (release == NULL)
- continue;
-
- if (release->type == QXL_RELEASE_DRAWABLE)
- have_drawable_releases = true;
- }
-
- qxl_queue_garbage_collect(qfence->qdev, true);
-
- if (have_drawable_releases || sc < 4) {
- if (sc > 2)
- /* back off */
- usleep_range(500, 1000);
- if (have_drawable_releases && sc > 300) {
- WARN(1, "sync obj %d still has outstanding releases %d %d %d %ld %d\n", sc, bo->surface_id, bo->is_primary, bo->pin_count, (unsigned long)bo->gem_base.size, qfence->num_active_releases);
- return -EBUSY;
- }
- goto retry;
- }
- }
- return 0;
-}
-
-static int qxl_sync_obj_flush(void *sync_obj)
-{
- return 0;
-}
-
-static void qxl_sync_obj_unref(void **sync_obj)
-{
- *sync_obj = NULL;
-}
-
-static void *qxl_sync_obj_ref(void *sync_obj)
-{
- return sync_obj;
-}
-
-static bool qxl_sync_obj_signaled(void *sync_obj)
-{
- struct qxl_fence *qfence = (struct qxl_fence *)sync_obj;
- return (qfence->num_active_releases == 0);
-}
-
static void qxl_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *new_mem)
{
@@ -469,16 +383,9 @@
.verify_access = &qxl_verify_access,
.io_mem_reserve = &qxl_ttm_io_mem_reserve,
.io_mem_free = &qxl_ttm_io_mem_free,
- .sync_obj_signaled = &qxl_sync_obj_signaled,
- .sync_obj_wait = &qxl_sync_obj_wait,
- .sync_obj_flush = &qxl_sync_obj_flush,
- .sync_obj_unref = &qxl_sync_obj_unref,
- .sync_obj_ref = &qxl_sync_obj_ref,
.move_notify = &qxl_bo_move_notify,
};
-
-
int qxl_ttm_init(struct qxl_device *qdev)
{
int r;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 83a2461..d80dc54 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -66,6 +66,7 @@
#include <linux/kref.h>
#include <linux/interval_tree.h>
#include <linux/hashtable.h>
+#include <linux/fence.h>
#include <ttm/ttm_bo_api.h>
#include <ttm/ttm_bo_driver.h>
@@ -354,17 +355,19 @@
/* sync_seq is protected by ring emission lock */
uint64_t sync_seq[RADEON_NUM_RINGS];
atomic64_t last_seq;
- bool initialized;
+ bool initialized, delayed_irq;
struct delayed_work lockup_work;
};
struct radeon_fence {
+ struct fence base;
+
struct radeon_device *rdev;
- struct kref kref;
- /* protected by radeon_fence.lock */
uint64_t seq;
/* RB, DMA, etc. */
unsigned ring;
+
+ wait_queue_t fence_wake;
};
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
@@ -782,6 +785,7 @@
int radeon_irq_kms_init(struct radeon_device *rdev);
void radeon_irq_kms_fini(struct radeon_device *rdev);
void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring);
+bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring);
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring);
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc);
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc);
@@ -2308,6 +2312,7 @@
struct radeon_mman mman;
struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS];
wait_queue_head_t fence_queue;
+ unsigned fence_context;
struct mutex ring_lock;
struct radeon_ring ring[RADEON_NUM_RINGS];
bool ib_pool_ready;
@@ -2441,7 +2446,17 @@
/*
* Cast helper
*/
-#define to_radeon_fence(p) ((struct radeon_fence *)(p))
+extern const struct fence_ops radeon_fence_ops;
+
+static inline struct radeon_fence *to_radeon_fence(struct fence *f)
+{
+ struct radeon_fence *__f = container_of(f, struct radeon_fence, base);
+
+ if (__f->base.ops == &radeon_fence_ops)
+ return __f;
+
+ return NULL;
+}
/*
* Registers read & write functions.
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index bd328cb..6e3d1c8 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -253,11 +253,17 @@
int i;
for (i = 0; i < p->nrelocs; i++) {
+ struct reservation_object *resv;
+ struct fence *fence;
+
if (!p->relocs[i].robj)
continue;
+ resv = p->relocs[i].robj->tbo.resv;
+ fence = reservation_object_get_excl(resv);
+
radeon_semaphore_sync_to(p->ib.semaphore,
- p->relocs[i].robj->tbo.sync_obj);
+ (struct radeon_fence *)fence);
}
}
@@ -427,7 +433,7 @@
ttm_eu_fence_buffer_objects(&parser->ticket,
&parser->validated,
- parser->ib.fence);
+ &parser->ib.fence->base);
} else if (backoff) {
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index d30f1cc..e84a76e 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1253,6 +1253,7 @@
for (i = 0; i < RADEON_NUM_RINGS; i++) {
rdev->ring[i].idx = i;
}
+ rdev->fence_context = fence_context_alloc(RADEON_NUM_RINGS);
DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n",
radeon_family_name[rdev->family], pdev->vendor, pdev->device,
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index bd0d687..bc894c1 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -476,11 +476,6 @@
obj = new_radeon_fb->obj;
new_rbo = gem_to_radeon_bo(obj);
- spin_lock(&new_rbo->tbo.bdev->fence_lock);
- if (new_rbo->tbo.sync_obj)
- work->fence = radeon_fence_ref(new_rbo->tbo.sync_obj);
- spin_unlock(&new_rbo->tbo.bdev->fence_lock);
-
/* pin the new buffer */
DRM_DEBUG_DRIVER("flip-ioctl() cur_rbo = %p, new_rbo = %p\n",
work->old_rbo, new_rbo);
@@ -499,6 +494,7 @@
DRM_ERROR("failed to pin new rbo buffer before flip\n");
goto cleanup;
}
+ work->fence = (struct radeon_fence *)fence_get(reservation_object_get_excl(new_rbo->tbo.resv));
radeon_bo_get_tiling_flags(new_rbo, &tiling_flags, NULL);
radeon_bo_unreserve(new_rbo);
@@ -582,7 +578,6 @@
drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
radeon_fence_unref(&work->fence);
kfree(work);
-
return r;
}
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index ecdba3a..af9f2d6 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -130,15 +130,18 @@
struct radeon_fence **fence,
int ring)
{
+ u64 seq = ++rdev->fence_drv[ring].sync_seq[ring];
+
/* we are protected by the ring emission mutex */
*fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
if ((*fence) == NULL) {
return -ENOMEM;
}
- kref_init(&((*fence)->kref));
(*fence)->rdev = rdev;
- (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
+ (*fence)->seq = seq;
(*fence)->ring = ring;
+ fence_init(&(*fence)->base, &radeon_fence_ops,
+ &rdev->fence_queue.lock, rdev->fence_context + ring, seq);
radeon_fence_ring_emit(rdev, ring, *fence);
trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
radeon_fence_schedule_check(rdev, ring);
@@ -146,6 +149,41 @@
}
/**
+ * radeon_fence_check_signaled - callback from fence_queue
+ *
+ * this function is called with fence_queue lock held, which is also used
+ * for the fence locking itself, so unlocked variants are used for
+ * fence_signal, and remove_wait_queue.
+ */
+static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+{
+ struct radeon_fence *fence;
+ u64 seq;
+
+ fence = container_of(wait, struct radeon_fence, fence_wake);
+
+ /*
+ * We cannot use radeon_fence_process here because we're already
+ * in the waitqueue, in a call from wake_up_all.
+ */
+ seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
+ if (seq >= fence->seq) {
+ int ret = fence_signal_locked(&fence->base);
+
+ if (!ret)
+ FENCE_TRACE(&fence->base, "signaled from irq context\n");
+ else
+ FENCE_TRACE(&fence->base, "was already signaled\n");
+
+ radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
+ __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
+ fence_put(&fence->base);
+ } else
+ FENCE_TRACE(&fence->base, "pending\n");
+ return 0;
+}
+
+/**
* radeon_fence_activity - check for fence activity
*
* @rdev: radeon_device pointer
@@ -242,6 +280,15 @@
return;
}
+ if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) {
+ unsigned long irqflags;
+
+ fence_drv->delayed_irq = false;
+ spin_lock_irqsave(&rdev->irq.lock, irqflags);
+ radeon_irq_set(rdev);
+ spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+ }
+
if (radeon_fence_activity(rdev, ring))
wake_up_all(&rdev->fence_queue);
@@ -276,21 +323,6 @@
}
/**
- * radeon_fence_destroy - destroy a fence
- *
- * @kref: fence kref
- *
- * Frees the fence object (all asics).
- */
-static void radeon_fence_destroy(struct kref *kref)
-{
- struct radeon_fence *fence;
-
- fence = container_of(kref, struct radeon_fence, kref);
- kfree(fence);
-}
-
-/**
* radeon_fence_seq_signaled - check if a fence sequence number has signaled
*
* @rdev: radeon device pointer
@@ -318,6 +350,75 @@
return false;
}
+static bool radeon_fence_is_signaled(struct fence *f)
+{
+ struct radeon_fence *fence = to_radeon_fence(f);
+ struct radeon_device *rdev = fence->rdev;
+ unsigned ring = fence->ring;
+ u64 seq = fence->seq;
+
+ if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
+ return true;
+ }
+
+ if (down_read_trylock(&rdev->exclusive_lock)) {
+ radeon_fence_process(rdev, ring);
+ up_read(&rdev->exclusive_lock);
+
+ if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/**
+ * radeon_fence_enable_signaling - enable signalling on fence
+ * @fence: fence
+ *
+ * This function is called with fence_queue lock held, and adds a callback
+ * to fence_queue that checks if this fence is signaled, and if so it
+ * signals the fence and removes itself.
+ */
+static bool radeon_fence_enable_signaling(struct fence *f)
+{
+ struct radeon_fence *fence = to_radeon_fence(f);
+ struct radeon_device *rdev = fence->rdev;
+
+ if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
+ return false;
+
+ if (down_read_trylock(&rdev->exclusive_lock)) {
+ radeon_irq_kms_sw_irq_get(rdev, fence->ring);
+
+ if (radeon_fence_activity(rdev, fence->ring))
+ wake_up_all_locked(&rdev->fence_queue);
+
+ /* did fence get signaled after we enabled the sw irq? */
+ if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
+ radeon_irq_kms_sw_irq_put(rdev, fence->ring);
+ up_read(&rdev->exclusive_lock);
+ return false;
+ }
+
+ up_read(&rdev->exclusive_lock);
+ } else {
+ /* we're probably in a lockup, lets not fiddle too much */
+ if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
+ rdev->fence_drv[fence->ring].delayed_irq = true;
+ radeon_fence_schedule_check(rdev, fence->ring);
+ }
+
+ fence->fence_wake.flags = 0;
+ fence->fence_wake.private = NULL;
+ fence->fence_wake.func = radeon_fence_check_signaled;
+ __add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
+ fence_get(f);
+
+ FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
+ return true;
+}
+
/**
* radeon_fence_signaled - check if a fence has signaled
*
@@ -330,8 +431,15 @@
{
if (!fence)
return true;
- if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring))
+
+ if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
+ int ret;
+
+ ret = fence_signal(&fence->base);
+ if (!ret)
+ FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
return true;
+ }
return false;
}
@@ -433,17 +541,15 @@
uint64_t seq[RADEON_NUM_RINGS] = {};
long r;
- if (fence == NULL) {
- WARN(1, "Querying an invalid fence : %p !\n", fence);
- return -EINVAL;
- }
-
seq[fence->ring] = fence->seq;
r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
if (r < 0) {
return r;
}
+ r = fence_signal(&fence->base);
+ if (!r)
+ FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
return 0;
}
@@ -557,7 +663,7 @@
*/
struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
{
- kref_get(&fence->kref);
+ fence_get(&fence->base);
return fence;
}
@@ -574,7 +680,7 @@
*fence = NULL;
if (tmp) {
- kref_put(&tmp->kref, radeon_fence_destroy);
+ fence_put(&tmp->base);
}
}
@@ -887,3 +993,72 @@
return 0;
#endif
}
+
+static const char *radeon_fence_get_driver_name(struct fence *fence)
+{
+ return "radeon";
+}
+
+static const char *radeon_fence_get_timeline_name(struct fence *f)
+{
+ struct radeon_fence *fence = to_radeon_fence(f);
+ switch (fence->ring) {
+ case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
+ case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
+ case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
+ case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
+ case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
+ case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
+ case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
+ case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
+ default: WARN_ON_ONCE(1); return "radeon.unk";
+ }
+}
+
+static inline bool radeon_test_signaled(struct radeon_fence *fence)
+{
+ return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
+}
+
+static signed long radeon_fence_default_wait(struct fence *f, bool intr,
+ signed long t)
+{
+ struct radeon_fence *fence = to_radeon_fence(f);
+ struct radeon_device *rdev = fence->rdev;
+ bool signaled;
+
+ fence_enable_sw_signaling(&fence->base);
+
+ /*
+ * This function has to return -EDEADLK, but cannot hold
+ * exclusive_lock during the wait because some callers
+ * may already hold it. This means checking needs_reset without
+ * lock, and not fiddling with any gpu internals.
+ *
+ * The callback installed with fence_enable_sw_signaling will
+ * run before our wait_event_*timeout call, so we will see
+ * both the signaled fence and the changes to needs_reset.
+ */
+
+ if (intr)
+ t = wait_event_interruptible_timeout(rdev->fence_queue,
+ ((signaled = radeon_test_signaled(fence)) ||
+ rdev->needs_reset), t);
+ else
+ t = wait_event_timeout(rdev->fence_queue,
+ ((signaled = radeon_test_signaled(fence)) ||
+ rdev->needs_reset), t);
+
+ if (t > 0 && !signaled)
+ return -EDEADLK;
+ return t;
+}
+
+const struct fence_ops radeon_fence_ops = {
+ .get_driver_name = radeon_fence_get_driver_name,
+ .get_timeline_name = radeon_fence_get_timeline_name,
+ .enable_signaling = radeon_fence_enable_signaling,
+ .signaled = radeon_fence_is_signaled,
+ .wait = radeon_fence_default_wait,
+ .release = NULL,
+};
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 01b5894..4b7c8ec 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -94,7 +94,7 @@
{
struct radeon_bo *robj;
uint32_t domain;
- int r;
+ long r;
/* FIXME: reeimplement */
robj = gem_to_radeon_bo(gobj);
@@ -110,9 +110,12 @@
}
if (domain == RADEON_GEM_DOMAIN_CPU) {
/* Asking for cpu access wait for object idle */
- r = radeon_bo_wait(robj, NULL, false);
- if (r) {
- printk(KERN_ERR "Failed to wait for object !\n");
+ r = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true, 30 * HZ);
+ if (!r)
+ r = -EBUSY;
+
+ if (r < 0 && r != -EINTR) {
+ printk(KERN_ERR "Failed to wait for object: %li\n", r);
return r;
}
}
@@ -449,15 +452,22 @@
struct drm_radeon_gem_wait_idle *args = data;
struct drm_gem_object *gobj;
struct radeon_bo *robj;
- int r;
+ int r = 0;
uint32_t cur_placement = 0;
+ long ret;
gobj = drm_gem_object_lookup(dev, filp, args->handle);
if (gobj == NULL) {
return -ENOENT;
}
robj = gem_to_radeon_bo(gobj);
- r = radeon_bo_wait(robj, &cur_placement, false);
+
+ ret = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true, 30 * HZ);
+ if (ret == 0)
+ r = -EBUSY;
+ else if (ret < 0)
+ r = ret;
+
/* Flush HDP cache via MMIO if necessary */
if (rdev->asic->mmio_hdp_flush &&
radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM)
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index f0bff4b..7784911 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -324,6 +324,21 @@
}
/**
+ * radeon_irq_kms_sw_irq_get_delayed - enable software interrupt
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring whose interrupt you want to enable
+ *
+ * Enables the software interrupt for a specific ring (all asics).
+ * The software interrupt is generally used to signal a fence on
+ * a particular ring.
+ */
+bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring)
+{
+ return atomic_inc_return(&rdev->irq.ring_int[ring]) == 1;
+}
+
+/**
* radeon_irq_kms_sw_irq_put - disable software interrupt
*
* @rdev: radeon device pointer
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index 0157bc2..a69bd44 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -122,6 +122,7 @@
it = interval_tree_iter_first(&rmn->objects, start, end);
while (it) {
struct radeon_bo *bo;
+ struct fence *fence;
int r;
bo = container_of(it, struct radeon_bo, mn_it);
@@ -133,8 +134,9 @@
continue;
}
- if (bo->tbo.sync_obj) {
- r = radeon_fence_wait(bo->tbo.sync_obj, false);
+ fence = reservation_object_get_excl(bo->tbo.resv);
+ if (fence) {
+ r = radeon_fence_wait((struct radeon_fence *)fence, false);
if (r)
DRM_ERROR("(%d) failed to wait for user bo\n", r);
}
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index c97a424..aadbd36 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -482,7 +482,7 @@
u64 bytes_moved = 0, initial_bytes_moved;
u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
- r = ttm_eu_reserve_buffers(ticket, head);
+ r = ttm_eu_reserve_buffers(ticket, head, true);
if (unlikely(r != 0)) {
return r;
}
@@ -779,12 +779,10 @@
r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL);
if (unlikely(r != 0))
return r;
- spin_lock(&bo->tbo.bdev->fence_lock);
if (mem_type)
*mem_type = bo->tbo.mem.mem_type;
- if (bo->tbo.sync_obj)
- r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
- spin_unlock(&bo->tbo.bdev->fence_lock);
+
+ r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
ttm_bo_unreserve(&bo->tbo);
return r;
}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 822eb36..62d1f4d 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -270,12 +270,12 @@
BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0);
/* sync other rings */
- fence = bo->sync_obj;
+ fence = (struct radeon_fence *)reservation_object_get_excl(bo->resv);
r = radeon_copy(rdev, old_start, new_start,
new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */
&fence);
/* FIXME: handle copy error */
- r = ttm_bo_move_accel_cleanup(bo, (void *)fence,
+ r = ttm_bo_move_accel_cleanup(bo, &fence->base,
evict, no_wait_gpu, new_mem);
radeon_fence_unref(&fence);
return r;
@@ -488,31 +488,6 @@
{
}
-static int radeon_sync_obj_wait(void *sync_obj, bool lazy, bool interruptible)
-{
- return radeon_fence_wait((struct radeon_fence *)sync_obj, interruptible);
-}
-
-static int radeon_sync_obj_flush(void *sync_obj)
-{
- return 0;
-}
-
-static void radeon_sync_obj_unref(void **sync_obj)
-{
- radeon_fence_unref((struct radeon_fence **)sync_obj);
-}
-
-static void *radeon_sync_obj_ref(void *sync_obj)
-{
- return radeon_fence_ref((struct radeon_fence *)sync_obj);
-}
-
-static bool radeon_sync_obj_signaled(void *sync_obj)
-{
- return radeon_fence_signaled((struct radeon_fence *)sync_obj);
-}
-
/*
* TTM backend functions.
*/
@@ -847,11 +822,6 @@
.evict_flags = &radeon_evict_flags,
.move = &radeon_bo_move,
.verify_access = &radeon_verify_access,
- .sync_obj_signaled = &radeon_sync_obj_signaled,
- .sync_obj_wait = &radeon_sync_obj_wait,
- .sync_obj_flush = &radeon_sync_obj_flush,
- .sync_obj_unref = &radeon_sync_obj_unref,
- .sync_obj_ref = &radeon_sync_obj_ref,
.move_notify = &radeon_bo_move_notify,
.fault_reserve_notify = &radeon_bo_fault_reserve_notify,
.io_mem_reserve = &radeon_ttm_io_mem_reserve,
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 5729e9b..ba4f389 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -400,6 +400,7 @@
{
int32_t *msg, msg_type, handle;
unsigned img_size = 0;
+ struct fence *f;
void *ptr;
int i, r;
@@ -409,8 +410,9 @@
return -EINVAL;
}
- if (bo->tbo.sync_obj) {
- r = radeon_fence_wait(bo->tbo.sync_obj, false);
+ f = reservation_object_get_excl(bo->tbo.resv);
+ if (f) {
+ r = radeon_fence_wait((struct radeon_fence *)f, false);
if (r) {
DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
return r;
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 4751c67..671ee56 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -399,7 +399,7 @@
INIT_LIST_HEAD(&head);
list_add(&tv.head, &head);
- r = ttm_eu_reserve_buffers(&ticket, &head);
+ r = ttm_eu_reserve_buffers(&ticket, &head, true);
if (r)
return r;
@@ -424,7 +424,7 @@
if (r)
goto error;
- ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence);
+ ttm_eu_fence_buffer_objects(&ticket, &head, &ib.fence->base);
radeon_ib_free(rdev, &ib);
return 0;
@@ -693,8 +693,14 @@
incr, R600_PTE_VALID);
if (ib.length_dw != 0) {
+ struct fence *fence;
+
radeon_asic_vm_pad_ib(rdev, &ib);
- radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj);
+
+ fence = reservation_object_get_excl(pd->tbo.resv);
+ radeon_semaphore_sync_to(ib.semaphore,
+ (struct radeon_fence *)fence);
+
radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use);
WARN_ON(ib.length_dw > ndw);
r = radeon_ib_schedule(rdev, &ib, NULL, false);
@@ -820,8 +826,11 @@
struct radeon_bo *pt = vm->page_tables[pt_idx].bo;
unsigned nptes;
uint64_t pte;
+ struct fence *fence;
- radeon_semaphore_sync_to(ib->semaphore, pt->tbo.sync_obj);
+ fence = reservation_object_get_excl(pt->tbo.resv);
+ radeon_semaphore_sync_to(ib->semaphore,
+ (struct radeon_fence *)fence);
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index b992ec3..a11969a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -40,6 +40,7 @@
#include <linux/file.h>
#include <linux/module.h>
#include <linux/atomic.h>
+#include <linux/reservation.h>
#define TTM_ASSERT_LOCKED(param)
#define TTM_DEBUG(fmt, arg...)
@@ -142,7 +143,6 @@
BUG_ON(atomic_read(&bo->list_kref.refcount));
BUG_ON(atomic_read(&bo->kref.refcount));
BUG_ON(atomic_read(&bo->cpu_writers));
- BUG_ON(bo->sync_obj != NULL);
BUG_ON(bo->mem.mm_node != NULL);
BUG_ON(!list_empty(&bo->lru));
BUG_ON(!list_empty(&bo->ddestroy));
@@ -403,36 +403,48 @@
ww_mutex_unlock (&bo->resv->lock);
}
+static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
+{
+ struct reservation_object_list *fobj;
+ struct fence *fence;
+ int i;
+
+ fobj = reservation_object_get_list(bo->resv);
+ fence = reservation_object_get_excl(bo->resv);
+ if (fence && !fence->ops->signaled)
+ fence_enable_sw_signaling(fence);
+
+ for (i = 0; fobj && i < fobj->shared_count; ++i) {
+ fence = rcu_dereference_protected(fobj->shared[i],
+ reservation_object_held(bo->resv));
+
+ if (!fence->ops->signaled)
+ fence_enable_sw_signaling(fence);
+ }
+}
+
static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
{
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_bo_global *glob = bo->glob;
- struct ttm_bo_driver *driver = bdev->driver;
- void *sync_obj = NULL;
int put_count;
int ret;
spin_lock(&glob->lru_lock);
ret = __ttm_bo_reserve(bo, false, true, false, NULL);
- spin_lock(&bdev->fence_lock);
- (void) ttm_bo_wait(bo, false, false, true);
- if (!ret && !bo->sync_obj) {
- spin_unlock(&bdev->fence_lock);
- put_count = ttm_bo_del_from_lru(bo);
-
- spin_unlock(&glob->lru_lock);
- ttm_bo_cleanup_memtype_use(bo);
-
- ttm_bo_list_ref_sub(bo, put_count, true);
-
- return;
- }
- if (bo->sync_obj)
- sync_obj = driver->sync_obj_ref(bo->sync_obj);
- spin_unlock(&bdev->fence_lock);
-
if (!ret) {
+ if (!ttm_bo_wait(bo, false, false, true)) {
+ put_count = ttm_bo_del_from_lru(bo);
+
+ spin_unlock(&glob->lru_lock);
+ ttm_bo_cleanup_memtype_use(bo);
+
+ ttm_bo_list_ref_sub(bo, put_count, true);
+
+ return;
+ } else
+ ttm_bo_flush_all_fences(bo);
/*
* Make NO_EVICT bos immediately available to
@@ -451,10 +463,6 @@
list_add_tail(&bo->ddestroy, &bdev->ddestroy);
spin_unlock(&glob->lru_lock);
- if (sync_obj) {
- driver->sync_obj_flush(sync_obj);
- driver->sync_obj_unref(&sync_obj);
- }
schedule_delayed_work(&bdev->wq,
((HZ / 100) < 1) ? 1 : HZ / 100);
}
@@ -475,44 +483,26 @@
bool interruptible,
bool no_wait_gpu)
{
- struct ttm_bo_device *bdev = bo->bdev;
- struct ttm_bo_driver *driver = bdev->driver;
struct ttm_bo_global *glob = bo->glob;
int put_count;
int ret;
- spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, false, true);
if (ret && !no_wait_gpu) {
- void *sync_obj;
-
- /*
- * Take a reference to the fence and unreserve,
- * at this point the buffer should be dead, so
- * no new sync objects can be attached.
- */
- sync_obj = driver->sync_obj_ref(bo->sync_obj);
- spin_unlock(&bdev->fence_lock);
-
- __ttm_bo_unreserve(bo);
+ long lret;
+ ww_mutex_unlock(&bo->resv->lock);
spin_unlock(&glob->lru_lock);
- ret = driver->sync_obj_wait(sync_obj, false, interruptible);
- driver->sync_obj_unref(&sync_obj);
- if (ret)
- return ret;
+ lret = reservation_object_wait_timeout_rcu(bo->resv,
+ true,
+ interruptible,
+ 30 * HZ);
- /*
- * remove sync_obj with ttm_bo_wait, the wait should be
- * finished, and no new wait object should have been added.
- */
- spin_lock(&bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, false, true);
- WARN_ON(ret);
- spin_unlock(&bdev->fence_lock);
- if (ret)
- return ret;
+ if (lret < 0)
+ return lret;
+ else if (lret == 0)
+ return -EBUSY;
spin_lock(&glob->lru_lock);
ret = __ttm_bo_reserve(bo, false, true, false, NULL);
@@ -529,8 +519,14 @@
spin_unlock(&glob->lru_lock);
return 0;
}
- } else
- spin_unlock(&bdev->fence_lock);
+
+ /*
+ * remove sync_obj with ttm_bo_wait, the wait should be
+ * finished, and no new wait object should have been added.
+ */
+ ret = ttm_bo_wait(bo, false, false, true);
+ WARN_ON(ret);
+ }
if (ret || unlikely(list_empty(&bo->ddestroy))) {
__ttm_bo_unreserve(bo);
@@ -668,9 +664,7 @@
struct ttm_placement placement;
int ret = 0;
- spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
- spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0)) {
if (ret != -ERESTARTSYS) {
@@ -961,7 +955,6 @@
{
int ret = 0;
struct ttm_mem_reg mem;
- struct ttm_bo_device *bdev = bo->bdev;
lockdep_assert_held(&bo->resv->lock.base);
@@ -970,9 +963,7 @@
* Have the driver move function wait for idle when necessary,
* instead of doing it here.
*/
- spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
- spin_unlock(&bdev->fence_lock);
if (ret)
return ret;
mem.num_pages = bo->num_pages;
@@ -1462,7 +1453,6 @@
bdev->glob = glob;
bdev->need_dma32 = need_dma32;
bdev->val_seq = 0;
- spin_lock_init(&bdev->fence_lock);
mutex_lock(&glob->device_list_mutex);
list_add_tail(&bdev->device_list, &glob->device_list);
mutex_unlock(&glob->device_list_mutex);
@@ -1515,65 +1505,56 @@
EXPORT_SYMBOL(ttm_bo_unmap_virtual);
-
int ttm_bo_wait(struct ttm_buffer_object *bo,
bool lazy, bool interruptible, bool no_wait)
{
- struct ttm_bo_driver *driver = bo->bdev->driver;
- struct ttm_bo_device *bdev = bo->bdev;
- void *sync_obj;
- int ret = 0;
+ struct reservation_object_list *fobj;
+ struct reservation_object *resv;
+ struct fence *excl;
+ long timeout = 15 * HZ;
+ int i;
- if (likely(bo->sync_obj == NULL))
- return 0;
+ resv = bo->resv;
+ fobj = reservation_object_get_list(resv);
+ excl = reservation_object_get_excl(resv);
+ if (excl) {
+ if (!fence_is_signaled(excl)) {
+ if (no_wait)
+ return -EBUSY;
- while (bo->sync_obj) {
-
- if (driver->sync_obj_signaled(bo->sync_obj)) {
- void *tmp_obj = bo->sync_obj;
- bo->sync_obj = NULL;
- clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
- spin_unlock(&bdev->fence_lock);
- driver->sync_obj_unref(&tmp_obj);
- spin_lock(&bdev->fence_lock);
- continue;
- }
-
- if (no_wait)
- return -EBUSY;
-
- sync_obj = driver->sync_obj_ref(bo->sync_obj);
- spin_unlock(&bdev->fence_lock);
- ret = driver->sync_obj_wait(sync_obj,
- lazy, interruptible);
- if (unlikely(ret != 0)) {
- driver->sync_obj_unref(&sync_obj);
- spin_lock(&bdev->fence_lock);
- return ret;
- }
- spin_lock(&bdev->fence_lock);
- if (likely(bo->sync_obj == sync_obj)) {
- void *tmp_obj = bo->sync_obj;
- bo->sync_obj = NULL;
- clear_bit(TTM_BO_PRIV_FLAG_MOVING,
- &bo->priv_flags);
- spin_unlock(&bdev->fence_lock);
- driver->sync_obj_unref(&sync_obj);
- driver->sync_obj_unref(&tmp_obj);
- spin_lock(&bdev->fence_lock);
- } else {
- spin_unlock(&bdev->fence_lock);
- driver->sync_obj_unref(&sync_obj);
- spin_lock(&bdev->fence_lock);
+ timeout = fence_wait_timeout(excl,
+ interruptible, timeout);
}
}
+
+ for (i = 0; fobj && timeout > 0 && i < fobj->shared_count; ++i) {
+ struct fence *fence;
+ fence = rcu_dereference_protected(fobj->shared[i],
+ reservation_object_held(resv));
+
+ if (!fence_is_signaled(fence)) {
+ if (no_wait)
+ return -EBUSY;
+
+ timeout = fence_wait_timeout(fence,
+ interruptible, timeout);
+ }
+ }
+
+ if (timeout < 0)
+ return timeout;
+
+ if (timeout == 0)
+ return -EBUSY;
+
+ reservation_object_add_excl_fence(resv, NULL);
+ clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
return 0;
}
EXPORT_SYMBOL(ttm_bo_wait);
int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait)
{
- struct ttm_bo_device *bdev = bo->bdev;
int ret = 0;
/*
@@ -1583,9 +1564,7 @@
ret = ttm_bo_reserve(bo, true, no_wait, false, NULL);
if (unlikely(ret != 0))
return ret;
- spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, true, no_wait);
- spin_unlock(&bdev->fence_lock);
if (likely(ret == 0))
atomic_inc(&bo->cpu_writers);
ttm_bo_unreserve(bo);
@@ -1642,9 +1621,7 @@
* Wait for GPU, then move to system cached.
*/
- spin_lock(&bo->bdev->fence_lock);
ret = ttm_bo_wait(bo, false, false, false);
- spin_unlock(&bo->bdev->fence_lock);
if (unlikely(ret != 0))
goto out;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 30e5d90..824af90 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -37,6 +37,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
+#include <linux/reservation.h>
void ttm_bo_free_old_node(struct ttm_buffer_object *bo)
{
@@ -444,8 +445,6 @@
struct ttm_buffer_object **new_obj)
{
struct ttm_buffer_object *fbo;
- struct ttm_bo_device *bdev = bo->bdev;
- struct ttm_bo_driver *driver = bdev->driver;
int ret;
fbo = kmalloc(sizeof(*fbo), GFP_KERNEL);
@@ -466,12 +465,6 @@
drm_vma_node_reset(&fbo->vma_node);
atomic_set(&fbo->cpu_writers, 0);
- spin_lock(&bdev->fence_lock);
- if (bo->sync_obj)
- fbo->sync_obj = driver->sync_obj_ref(bo->sync_obj);
- else
- fbo->sync_obj = NULL;
- spin_unlock(&bdev->fence_lock);
kref_init(&fbo->list_kref);
kref_init(&fbo->kref);
fbo->destroy = &ttm_transfered_destroy;
@@ -644,30 +637,20 @@
EXPORT_SYMBOL(ttm_bo_kunmap);
int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
- void *sync_obj,
+ struct fence *fence,
bool evict,
bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
{
struct ttm_bo_device *bdev = bo->bdev;
- struct ttm_bo_driver *driver = bdev->driver;
struct ttm_mem_type_manager *man = &bdev->man[new_mem->mem_type];
struct ttm_mem_reg *old_mem = &bo->mem;
int ret;
struct ttm_buffer_object *ghost_obj;
- void *tmp_obj = NULL;
- spin_lock(&bdev->fence_lock);
- if (bo->sync_obj) {
- tmp_obj = bo->sync_obj;
- bo->sync_obj = NULL;
- }
- bo->sync_obj = driver->sync_obj_ref(sync_obj);
+ reservation_object_add_excl_fence(bo->resv, fence);
if (evict) {
ret = ttm_bo_wait(bo, false, false, false);
- spin_unlock(&bdev->fence_lock);
- if (tmp_obj)
- driver->sync_obj_unref(&tmp_obj);
if (ret)
return ret;
@@ -688,14 +671,13 @@
*/
set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
- spin_unlock(&bdev->fence_lock);
- if (tmp_obj)
- driver->sync_obj_unref(&tmp_obj);
ret = ttm_buffer_object_transfer(bo, &ghost_obj);
if (ret)
return ret;
+ reservation_object_add_excl_fence(ghost_obj->resv, fence);
+
/**
* If we're not moving to fixed memory, the TTM object
* needs to stay alive. Otherwhise hang it on the ghost
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 0ce48e5..d05437f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -45,10 +45,8 @@
struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct ttm_bo_device *bdev = bo->bdev;
int ret = 0;
- spin_lock(&bdev->fence_lock);
if (likely(!test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)))
goto out_unlock;
@@ -82,7 +80,6 @@
VM_FAULT_NOPAGE;
out_unlock:
- spin_unlock(&bdev->fence_lock);
return ret;
}
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index e8dac87..adafc0f 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -32,20 +32,12 @@
#include <linux/sched.h>
#include <linux/module.h>
-static void ttm_eu_backoff_reservation_locked(struct list_head *list)
+static void ttm_eu_backoff_reservation_reverse(struct list_head *list,
+ struct ttm_validate_buffer *entry)
{
- struct ttm_validate_buffer *entry;
-
- list_for_each_entry(entry, list, head) {
+ list_for_each_entry_continue_reverse(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
- if (!entry->reserved)
- continue;
- entry->reserved = false;
- if (entry->removed) {
- ttm_bo_add_to_lru(bo);
- entry->removed = false;
- }
__ttm_bo_unreserve(bo);
}
}
@@ -56,27 +48,9 @@
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
- if (!entry->reserved)
- continue;
+ unsigned put_count = ttm_bo_del_from_lru(bo);
- if (!entry->removed) {
- entry->put_count = ttm_bo_del_from_lru(bo);
- entry->removed = true;
- }
- }
-}
-
-static void ttm_eu_list_ref_sub(struct list_head *list)
-{
- struct ttm_validate_buffer *entry;
-
- list_for_each_entry(entry, list, head) {
- struct ttm_buffer_object *bo = entry->bo;
-
- if (entry->put_count) {
- ttm_bo_list_ref_sub(bo, entry->put_count, true);
- entry->put_count = 0;
- }
+ ttm_bo_list_ref_sub(bo, put_count, true);
}
}
@@ -91,11 +65,18 @@
entry = list_first_entry(list, struct ttm_validate_buffer, head);
glob = entry->bo->glob;
+
spin_lock(&glob->lru_lock);
- ttm_eu_backoff_reservation_locked(list);
+ list_for_each_entry(entry, list, head) {
+ struct ttm_buffer_object *bo = entry->bo;
+
+ ttm_bo_add_to_lru(bo);
+ __ttm_bo_unreserve(bo);
+ }
+ spin_unlock(&glob->lru_lock);
+
if (ticket)
ww_acquire_fini(ticket);
- spin_unlock(&glob->lru_lock);
}
EXPORT_SYMBOL(ttm_eu_backoff_reservation);
@@ -112,7 +93,7 @@
*/
int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
- struct list_head *list)
+ struct list_head *list, bool intr)
{
struct ttm_bo_global *glob;
struct ttm_validate_buffer *entry;
@@ -121,60 +102,55 @@
if (list_empty(list))
return 0;
- list_for_each_entry(entry, list, head) {
- entry->reserved = false;
- entry->put_count = 0;
- entry->removed = false;
- }
-
entry = list_first_entry(list, struct ttm_validate_buffer, head);
glob = entry->bo->glob;
if (ticket)
ww_acquire_init(ticket, &reservation_ww_class);
-retry:
+
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
- /* already slowpath reserved? */
- if (entry->reserved)
+ ret = __ttm_bo_reserve(bo, intr, (ticket == NULL), true,
+ ticket);
+ if (!ret && unlikely(atomic_read(&bo->cpu_writers) > 0)) {
+ __ttm_bo_unreserve(bo);
+
+ ret = -EBUSY;
+ }
+
+ if (!ret)
continue;
- ret = __ttm_bo_reserve(bo, true, (ticket == NULL), true,
- ticket);
+ /* uh oh, we lost out, drop every reservation and try
+ * to only reserve this buffer, then start over if
+ * this succeeds.
+ */
+ ttm_eu_backoff_reservation_reverse(list, entry);
- if (ret == -EDEADLK) {
- /* uh oh, we lost out, drop every reservation and try
- * to only reserve this buffer, then start over if
- * this succeeds.
- */
- BUG_ON(ticket == NULL);
- spin_lock(&glob->lru_lock);
- ttm_eu_backoff_reservation_locked(list);
- spin_unlock(&glob->lru_lock);
- ttm_eu_list_ref_sub(list);
+ if (ret == -EDEADLK && intr) {
ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
ticket);
- if (unlikely(ret != 0)) {
- if (ret == -EINTR)
- ret = -ERESTARTSYS;
- goto err_fini;
- }
-
- entry->reserved = true;
- if (unlikely(atomic_read(&bo->cpu_writers) > 0)) {
- ret = -EBUSY;
- goto err;
- }
- goto retry;
- } else if (ret)
- goto err;
-
- entry->reserved = true;
- if (unlikely(atomic_read(&bo->cpu_writers) > 0)) {
- ret = -EBUSY;
- goto err;
+ } else if (ret == -EDEADLK) {
+ ww_mutex_lock_slow(&bo->resv->lock, ticket);
+ ret = 0;
}
+
+ if (unlikely(ret != 0)) {
+ if (ret == -EINTR)
+ ret = -ERESTARTSYS;
+ if (ticket) {
+ ww_acquire_done(ticket);
+ ww_acquire_fini(ticket);
+ }
+ return ret;
+ }
+
+ /* move this item to the front of the list,
+ * forces correct iteration of the loop without keeping track
+ */
+ list_del(&entry->head);
+ list_add(&entry->head, list);
}
if (ticket)
@@ -182,25 +158,12 @@
spin_lock(&glob->lru_lock);
ttm_eu_del_from_lru_locked(list);
spin_unlock(&glob->lru_lock);
- ttm_eu_list_ref_sub(list);
return 0;
-
-err:
- spin_lock(&glob->lru_lock);
- ttm_eu_backoff_reservation_locked(list);
- spin_unlock(&glob->lru_lock);
- ttm_eu_list_ref_sub(list);
-err_fini:
- if (ticket) {
- ww_acquire_done(ticket);
- ww_acquire_fini(ticket);
- }
- return ret;
}
EXPORT_SYMBOL(ttm_eu_reserve_buffers);
void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
- struct list_head *list, void *sync_obj)
+ struct list_head *list, struct fence *fence)
{
struct ttm_validate_buffer *entry;
struct ttm_buffer_object *bo;
@@ -217,24 +180,15 @@
glob = bo->glob;
spin_lock(&glob->lru_lock);
- spin_lock(&bdev->fence_lock);
list_for_each_entry(entry, list, head) {
bo = entry->bo;
- entry->old_sync_obj = bo->sync_obj;
- bo->sync_obj = driver->sync_obj_ref(sync_obj);
+ reservation_object_add_excl_fence(bo->resv, fence);
ttm_bo_add_to_lru(bo);
__ttm_bo_unreserve(bo);
- entry->reserved = false;
}
- spin_unlock(&bdev->fence_lock);
spin_unlock(&glob->lru_lock);
if (ticket)
ww_acquire_fini(ticket);
-
- list_for_each_entry(entry, list, head) {
- if (entry->old_sync_obj)
- driver->sync_obj_unref(&entry->old_sync_obj);
- }
}
EXPORT_SYMBOL(ttm_eu_fence_buffer_objects);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 37c093c..cff2bf9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -802,44 +802,6 @@
}
/**
- * FIXME: We're using the old vmware polling method to sync.
- * Do this with fences instead.
- */
-
-static void *vmw_sync_obj_ref(void *sync_obj)
-{
-
- return (void *)
- vmw_fence_obj_reference((struct vmw_fence_obj *) sync_obj);
-}
-
-static void vmw_sync_obj_unref(void **sync_obj)
-{
- vmw_fence_obj_unreference((struct vmw_fence_obj **) sync_obj);
-}
-
-static int vmw_sync_obj_flush(void *sync_obj)
-{
- vmw_fence_obj_flush((struct vmw_fence_obj *) sync_obj);
- return 0;
-}
-
-static bool vmw_sync_obj_signaled(void *sync_obj)
-{
- return vmw_fence_obj_signaled((struct vmw_fence_obj *) sync_obj,
- DRM_VMW_FENCE_FLAG_EXEC);
-
-}
-
-static int vmw_sync_obj_wait(void *sync_obj, bool lazy, bool interruptible)
-{
- return vmw_fence_obj_wait((struct vmw_fence_obj *) sync_obj,
- DRM_VMW_FENCE_FLAG_EXEC,
- lazy, interruptible,
- VMW_FENCE_WAIT_TIMEOUT);
-}
-
-/**
* vmw_move_notify - TTM move_notify_callback
*
* @bo: The TTM buffer object about to move.
@@ -863,11 +825,7 @@
*/
static void vmw_swap_notify(struct ttm_buffer_object *bo)
{
- struct ttm_bo_device *bdev = bo->bdev;
-
- spin_lock(&bdev->fence_lock);
ttm_bo_wait(bo, false, false, false);
- spin_unlock(&bdev->fence_lock);
}
@@ -880,11 +838,6 @@
.evict_flags = vmw_evict_flags,
.move = NULL,
.verify_access = vmw_verify_access,
- .sync_obj_signaled = vmw_sync_obj_signaled,
- .sync_obj_wait = vmw_sync_obj_wait,
- .sync_obj_flush = vmw_sync_obj_flush,
- .sync_obj_unref = vmw_sync_obj_unref,
- .sync_obj_ref = vmw_sync_obj_ref,
.move_notify = vmw_move_notify,
.swap_notify = vmw_swap_notify,
.fault_reserve_notify = &vmw_ttm_fault_reserve_notify,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 99f7317..4ee799b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -342,7 +342,6 @@
uint32_t *cmd_bounce;
uint32_t cmd_bounce_size;
struct list_head resource_list;
- uint32_t fence_flags;
struct ttm_buffer_object *cur_query_bo;
struct list_head res_relocations;
uint32_t *buf_start;
@@ -704,6 +703,7 @@
extern void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes);
extern int vmw_fifo_send_fence(struct vmw_private *dev_priv,
uint32_t *seqno);
+extern void vmw_fifo_ping_host_locked(struct vmw_private *, uint32_t reason);
extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason);
extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv);
extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 7bfdaa1..0ceaddc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -346,13 +346,10 @@
++sw_context->cur_val_buf;
val_buf = &vval_buf->base;
val_buf->bo = ttm_bo_reference(bo);
- val_buf->reserved = false;
list_add_tail(&val_buf->head, &sw_context->validate_nodes);
vval_buf->validate_as_mob = validate_as_mob;
}
- sw_context->fence_flags |= DRM_VMW_FENCE_FLAG_EXEC;
-
if (p_val_node)
*p_val_node = val_node;
@@ -2338,13 +2335,9 @@
if (p_handle != NULL)
ret = vmw_user_fence_create(file_priv, dev_priv->fman,
- sequence,
- DRM_VMW_FENCE_FLAG_EXEC,
- p_fence, p_handle);
+ sequence, p_fence, p_handle);
else
- ret = vmw_fence_create(dev_priv->fman, sequence,
- DRM_VMW_FENCE_FLAG_EXEC,
- p_fence);
+ ret = vmw_fence_create(dev_priv->fman, sequence, p_fence);
if (unlikely(ret != 0 && !synced)) {
(void) vmw_fallback_wait(dev_priv, false, false,
@@ -2396,7 +2389,7 @@
BUG_ON(fence == NULL);
fence_rep.handle = fence_handle;
- fence_rep.seqno = fence->seqno;
+ fence_rep.seqno = fence->base.seqno;
vmw_update_seqno(dev_priv, &dev_priv->fifo);
fence_rep.passed_seqno = dev_priv->last_read_seqno;
}
@@ -2417,8 +2410,7 @@
ttm_ref_object_base_unref(vmw_fp->tfile,
fence_handle, TTM_REF_USAGE);
DRM_ERROR("Fence copy error. Syncing.\n");
- (void) vmw_fence_obj_wait(fence, fence->signal_mask,
- false, false,
+ (void) vmw_fence_obj_wait(fence, false, false,
VMW_FENCE_WAIT_TIMEOUT);
}
}
@@ -2470,7 +2462,6 @@
sw_context->fp = vmw_fpriv(file_priv);
sw_context->cur_reloc = 0;
sw_context->cur_val_buf = 0;
- sw_context->fence_flags = 0;
INIT_LIST_HEAD(&sw_context->resource_list);
sw_context->cur_query_bo = dev_priv->pinned_bo;
sw_context->last_query_ctx = NULL;
@@ -2496,7 +2487,7 @@
if (unlikely(ret != 0))
goto out_err_nores;
- ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes);
+ ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes, true);
if (unlikely(ret != 0))
goto out_err;
@@ -2684,10 +2675,7 @@
query_val.bo = ttm_bo_reference(dev_priv->dummy_query_bo);
list_add_tail(&query_val.head, &validate_list);
- do {
- ret = ttm_eu_reserve_buffers(&ticket, &validate_list);
- } while (ret == -ERESTARTSYS);
-
+ ret = ttm_eu_reserve_buffers(&ticket, &validate_list, false);
if (unlikely(ret != 0)) {
vmw_execbuf_unpin_panic(dev_priv);
goto out_no_reserve;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 436b013..197164f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -35,7 +35,7 @@
struct vmw_private *dev_priv;
spinlock_t lock;
struct list_head fence_list;
- struct work_struct work;
+ struct work_struct work, ping_work;
u32 user_fence_size;
u32 fence_size;
u32 event_fence_action_size;
@@ -46,6 +46,7 @@
bool goal_irq_on; /* Protected by @goal_irq_mutex */
bool seqno_valid; /* Protected by @lock, and may not be set to true
without the @goal_irq_mutex held. */
+ unsigned ctx;
};
struct vmw_user_fence {
@@ -80,6 +81,12 @@
uint32_t *tv_usec;
};
+static struct vmw_fence_manager *
+fman_from_fence(struct vmw_fence_obj *fence)
+{
+ return container_of(fence->base.lock, struct vmw_fence_manager, lock);
+}
+
/**
* Note on fencing subsystem usage of irqs:
* Typically the vmw_fences_update function is called
@@ -102,25 +109,143 @@
* objects with actions attached to them.
*/
-static void vmw_fence_obj_destroy_locked(struct kref *kref)
+static void vmw_fence_obj_destroy(struct fence *f)
{
struct vmw_fence_obj *fence =
- container_of(kref, struct vmw_fence_obj, kref);
+ container_of(f, struct vmw_fence_obj, base);
- struct vmw_fence_manager *fman = fence->fman;
- unsigned int num_fences;
+ struct vmw_fence_manager *fman = fman_from_fence(fence);
+ unsigned long irq_flags;
+ spin_lock_irqsave(&fman->lock, irq_flags);
list_del_init(&fence->head);
- num_fences = --fman->num_fence_objects;
- spin_unlock_irq(&fman->lock);
- if (fence->destroy)
- fence->destroy(fence);
- else
- kfree(fence);
-
- spin_lock_irq(&fman->lock);
+ --fman->num_fence_objects;
+ spin_unlock_irqrestore(&fman->lock, irq_flags);
+ fence->destroy(fence);
}
+static const char *vmw_fence_get_driver_name(struct fence *f)
+{
+ return "vmwgfx";
+}
+
+static const char *vmw_fence_get_timeline_name(struct fence *f)
+{
+ return "svga";
+}
+
+static void vmw_fence_ping_func(struct work_struct *work)
+{
+ struct vmw_fence_manager *fman =
+ container_of(work, struct vmw_fence_manager, ping_work);
+
+ vmw_fifo_ping_host(fman->dev_priv, SVGA_SYNC_GENERIC);
+}
+
+static bool vmw_fence_enable_signaling(struct fence *f)
+{
+ struct vmw_fence_obj *fence =
+ container_of(f, struct vmw_fence_obj, base);
+
+ struct vmw_fence_manager *fman = fman_from_fence(fence);
+ struct vmw_private *dev_priv = fman->dev_priv;
+
+ __le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+ u32 seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+ if (seqno - fence->base.seqno < VMW_FENCE_WRAP)
+ return false;
+
+ if (mutex_trylock(&dev_priv->hw_mutex)) {
+ vmw_fifo_ping_host_locked(dev_priv, SVGA_SYNC_GENERIC);
+ mutex_unlock(&dev_priv->hw_mutex);
+ } else
+ schedule_work(&fman->ping_work);
+
+ return true;
+}
+
+struct vmwgfx_wait_cb {
+ struct fence_cb base;
+ struct task_struct *task;
+};
+
+static void
+vmwgfx_wait_cb(struct fence *fence, struct fence_cb *cb)
+{
+ struct vmwgfx_wait_cb *wait =
+ container_of(cb, struct vmwgfx_wait_cb, base);
+
+ wake_up_process(wait->task);
+}
+
+static void __vmw_fences_update(struct vmw_fence_manager *fman);
+
+static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout)
+{
+ struct vmw_fence_obj *fence =
+ container_of(f, struct vmw_fence_obj, base);
+
+ struct vmw_fence_manager *fman = fman_from_fence(fence);
+ struct vmw_private *dev_priv = fman->dev_priv;
+ struct vmwgfx_wait_cb cb;
+ long ret = timeout;
+ unsigned long irq_flags;
+
+ if (likely(vmw_fence_obj_signaled(fence)))
+ return timeout;
+
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_seqno_waiter_add(dev_priv);
+
+ spin_lock_irqsave(f->lock, irq_flags);
+
+ if (intr && signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ goto out;
+ }
+
+ cb.base.func = vmwgfx_wait_cb;
+ cb.task = current;
+ list_add(&cb.base.node, &f->cb_list);
+
+ while (ret > 0) {
+ __vmw_fences_update(fman);
+ if (test_bit(FENCE_FLAG_SIGNALED_BIT, &f->flags))
+ break;
+
+ if (intr)
+ __set_current_state(TASK_INTERRUPTIBLE);
+ else
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock_irqrestore(f->lock, irq_flags);
+
+ ret = schedule_timeout(ret);
+
+ spin_lock_irqsave(f->lock, irq_flags);
+ if (ret > 0 && intr && signal_pending(current))
+ ret = -ERESTARTSYS;
+ }
+
+ if (!list_empty(&cb.base.node))
+ list_del(&cb.base.node);
+ __set_current_state(TASK_RUNNING);
+
+out:
+ spin_unlock_irqrestore(f->lock, irq_flags);
+
+ vmw_seqno_waiter_remove(dev_priv);
+
+ return ret;
+}
+
+static struct fence_ops vmw_fence_ops = {
+ .get_driver_name = vmw_fence_get_driver_name,
+ .get_timeline_name = vmw_fence_get_timeline_name,
+ .enable_signaling = vmw_fence_enable_signaling,
+ .wait = vmw_fence_wait,
+ .release = vmw_fence_obj_destroy,
+};
+
/**
* Execute signal actions on fences recently signaled.
@@ -180,12 +305,14 @@
INIT_LIST_HEAD(&fman->fence_list);
INIT_LIST_HEAD(&fman->cleanup_list);
INIT_WORK(&fman->work, &vmw_fence_work_func);
+ INIT_WORK(&fman->ping_work, &vmw_fence_ping_func);
fman->fifo_down = true;
fman->user_fence_size = ttm_round_pot(sizeof(struct vmw_user_fence));
fman->fence_size = ttm_round_pot(sizeof(struct vmw_fence_obj));
fman->event_fence_action_size =
ttm_round_pot(sizeof(struct vmw_event_fence_action));
mutex_init(&fman->goal_irq_mutex);
+ fman->ctx = fence_context_alloc(1);
return fman;
}
@@ -196,6 +323,7 @@
bool lists_empty;
(void) cancel_work_sync(&fman->work);
+ (void) cancel_work_sync(&fman->ping_work);
spin_lock_irqsave(&fman->lock, irq_flags);
lists_empty = list_empty(&fman->fence_list) &&
@@ -207,23 +335,16 @@
}
static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
- struct vmw_fence_obj *fence,
- u32 seqno,
- uint32_t mask,
+ struct vmw_fence_obj *fence, u32 seqno,
void (*destroy) (struct vmw_fence_obj *fence))
{
unsigned long irq_flags;
- unsigned int num_fences;
int ret = 0;
- fence->seqno = seqno;
+ fence_init(&fence->base, &vmw_fence_ops, &fman->lock,
+ fman->ctx, seqno);
INIT_LIST_HEAD(&fence->seq_passed_actions);
- fence->fman = fman;
- fence->signaled = 0;
- fence->signal_mask = mask;
- kref_init(&fence->kref);
fence->destroy = destroy;
- init_waitqueue_head(&fence->queue);
spin_lock_irqsave(&fman->lock, irq_flags);
if (unlikely(fman->fifo_down)) {
@@ -231,7 +352,7 @@
goto out_unlock;
}
list_add_tail(&fence->head, &fman->fence_list);
- num_fences = ++fman->num_fence_objects;
+ ++fman->num_fence_objects;
out_unlock:
spin_unlock_irqrestore(&fman->lock, irq_flags);
@@ -239,38 +360,6 @@
}
-struct vmw_fence_obj *vmw_fence_obj_reference(struct vmw_fence_obj *fence)
-{
- if (unlikely(fence == NULL))
- return NULL;
-
- kref_get(&fence->kref);
- return fence;
-}
-
-/**
- * vmw_fence_obj_unreference
- *
- * Note that this function may not be entered with disabled irqs since
- * it may re-enable them in the destroy function.
- *
- */
-void vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p)
-{
- struct vmw_fence_obj *fence = *fence_p;
- struct vmw_fence_manager *fman;
-
- if (unlikely(fence == NULL))
- return;
-
- fman = fence->fman;
- *fence_p = NULL;
- spin_lock_irq(&fman->lock);
- BUG_ON(atomic_read(&fence->kref.refcount) == 0);
- kref_put(&fence->kref, vmw_fence_obj_destroy_locked);
- spin_unlock_irq(&fman->lock);
-}
-
static void vmw_fences_perform_actions(struct vmw_fence_manager *fman,
struct list_head *list)
{
@@ -326,7 +415,7 @@
list_for_each_entry(fence, &fman->fence_list, head) {
if (!list_empty(&fence->seq_passed_actions)) {
fman->seqno_valid = true;
- iowrite32(fence->seqno,
+ iowrite32(fence->base.seqno,
fifo_mem + SVGA_FIFO_FENCE_GOAL);
break;
}
@@ -353,27 +442,27 @@
*/
static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence)
{
+ struct vmw_fence_manager *fman = fman_from_fence(fence);
u32 goal_seqno;
__le32 __iomem *fifo_mem;
- if (fence->signaled & DRM_VMW_FENCE_FLAG_EXEC)
+ if (fence_is_signaled_locked(&fence->base))
return false;
- fifo_mem = fence->fman->dev_priv->mmio_virt;
+ fifo_mem = fman->dev_priv->mmio_virt;
goal_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE_GOAL);
- if (likely(fence->fman->seqno_valid &&
- goal_seqno - fence->seqno < VMW_FENCE_WRAP))
+ if (likely(fman->seqno_valid &&
+ goal_seqno - fence->base.seqno < VMW_FENCE_WRAP))
return false;
- iowrite32(fence->seqno, fifo_mem + SVGA_FIFO_FENCE_GOAL);
- fence->fman->seqno_valid = true;
+ iowrite32(fence->base.seqno, fifo_mem + SVGA_FIFO_FENCE_GOAL);
+ fman->seqno_valid = true;
return true;
}
-void vmw_fences_update(struct vmw_fence_manager *fman)
+static void __vmw_fences_update(struct vmw_fence_manager *fman)
{
- unsigned long flags;
struct vmw_fence_obj *fence, *next_fence;
struct list_head action_list;
bool needs_rerun;
@@ -382,32 +471,25 @@
seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
rerun:
- spin_lock_irqsave(&fman->lock, flags);
list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) {
- if (seqno - fence->seqno < VMW_FENCE_WRAP) {
+ if (seqno - fence->base.seqno < VMW_FENCE_WRAP) {
list_del_init(&fence->head);
- fence->signaled |= DRM_VMW_FENCE_FLAG_EXEC;
+ fence_signal_locked(&fence->base);
INIT_LIST_HEAD(&action_list);
list_splice_init(&fence->seq_passed_actions,
&action_list);
vmw_fences_perform_actions(fman, &action_list);
- wake_up_all(&fence->queue);
} else
break;
}
- needs_rerun = vmw_fence_goal_new_locked(fman, seqno);
-
- if (!list_empty(&fman->cleanup_list))
- (void) schedule_work(&fman->work);
- spin_unlock_irqrestore(&fman->lock, flags);
-
/*
* Rerun if the fence goal seqno was updated, and the
* hardware might have raced with that update, so that
* we missed a fence_goal irq.
*/
+ needs_rerun = vmw_fence_goal_new_locked(fman, seqno);
if (unlikely(needs_rerun)) {
new_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
if (new_seqno != seqno) {
@@ -415,79 +497,58 @@
goto rerun;
}
}
+
+ if (!list_empty(&fman->cleanup_list))
+ (void) schedule_work(&fman->work);
}
-bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
- uint32_t flags)
+void vmw_fences_update(struct vmw_fence_manager *fman)
{
- struct vmw_fence_manager *fman = fence->fman;
unsigned long irq_flags;
- uint32_t signaled;
spin_lock_irqsave(&fman->lock, irq_flags);
- signaled = fence->signaled;
+ __vmw_fences_update(fman);
spin_unlock_irqrestore(&fman->lock, irq_flags);
+}
- flags &= fence->signal_mask;
- if ((signaled & flags) == flags)
+bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence)
+{
+ struct vmw_fence_manager *fman = fman_from_fence(fence);
+
+ if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags))
return 1;
- if ((signaled & DRM_VMW_FENCE_FLAG_EXEC) == 0)
- vmw_fences_update(fman);
+ vmw_fences_update(fman);
- spin_lock_irqsave(&fman->lock, irq_flags);
- signaled = fence->signaled;
- spin_unlock_irqrestore(&fman->lock, irq_flags);
-
- return ((signaled & flags) == flags);
+ return fence_is_signaled(&fence->base);
}
-int vmw_fence_obj_wait(struct vmw_fence_obj *fence,
- uint32_t flags, bool lazy,
+int vmw_fence_obj_wait(struct vmw_fence_obj *fence, bool lazy,
bool interruptible, unsigned long timeout)
{
- struct vmw_private *dev_priv = fence->fman->dev_priv;
- long ret;
+ long ret = fence_wait_timeout(&fence->base, interruptible, timeout);
- if (likely(vmw_fence_obj_signaled(fence, flags)))
+ if (likely(ret > 0))
return 0;
-
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
- vmw_seqno_waiter_add(dev_priv);
-
- if (interruptible)
- ret = wait_event_interruptible_timeout
- (fence->queue,
- vmw_fence_obj_signaled(fence, flags),
- timeout);
+ else if (ret == 0)
+ return -EBUSY;
else
- ret = wait_event_timeout
- (fence->queue,
- vmw_fence_obj_signaled(fence, flags),
- timeout);
-
- vmw_seqno_waiter_remove(dev_priv);
-
- if (unlikely(ret == 0))
- ret = -EBUSY;
- else if (likely(ret > 0))
- ret = 0;
-
- return ret;
+ return ret;
}
void vmw_fence_obj_flush(struct vmw_fence_obj *fence)
{
- struct vmw_private *dev_priv = fence->fman->dev_priv;
+ struct vmw_private *dev_priv = fman_from_fence(fence)->dev_priv;
vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
}
static void vmw_fence_destroy(struct vmw_fence_obj *fence)
{
- struct vmw_fence_manager *fman = fence->fman;
+ struct vmw_fence_manager *fman = fman_from_fence(fence);
- kfree(fence);
+ fence_free(&fence->base);
+
/*
* Free kernel space accounting.
*/
@@ -497,7 +558,6 @@
int vmw_fence_create(struct vmw_fence_manager *fman,
uint32_t seqno,
- uint32_t mask,
struct vmw_fence_obj **p_fence)
{
struct ttm_mem_global *mem_glob = vmw_mem_glob(fman->dev_priv);
@@ -515,7 +575,7 @@
goto out_no_object;
}
- ret = vmw_fence_obj_init(fman, fence, seqno, mask,
+ ret = vmw_fence_obj_init(fman, fence, seqno,
vmw_fence_destroy);
if (unlikely(ret != 0))
goto out_err_init;
@@ -535,7 +595,7 @@
{
struct vmw_user_fence *ufence =
container_of(fence, struct vmw_user_fence, fence);
- struct vmw_fence_manager *fman = fence->fman;
+ struct vmw_fence_manager *fman = fman_from_fence(fence);
ttm_base_object_kfree(ufence, base);
/*
@@ -559,7 +619,6 @@
int vmw_user_fence_create(struct drm_file *file_priv,
struct vmw_fence_manager *fman,
uint32_t seqno,
- uint32_t mask,
struct vmw_fence_obj **p_fence,
uint32_t *p_handle)
{
@@ -586,7 +645,7 @@
}
ret = vmw_fence_obj_init(fman, &ufence->fence, seqno,
- mask, vmw_user_fence_destroy);
+ vmw_user_fence_destroy);
if (unlikely(ret != 0)) {
kfree(ufence);
goto out_no_object;
@@ -629,7 +688,6 @@
void vmw_fence_fifo_down(struct vmw_fence_manager *fman)
{
- unsigned long irq_flags;
struct list_head action_list;
int ret;
@@ -638,35 +696,32 @@
* restart when we've released the fman->lock.
*/
- spin_lock_irqsave(&fman->lock, irq_flags);
+ spin_lock_irq(&fman->lock);
fman->fifo_down = true;
while (!list_empty(&fman->fence_list)) {
struct vmw_fence_obj *fence =
list_entry(fman->fence_list.prev, struct vmw_fence_obj,
head);
- kref_get(&fence->kref);
+ fence_get(&fence->base);
spin_unlock_irq(&fman->lock);
- ret = vmw_fence_obj_wait(fence, fence->signal_mask,
- false, false,
+ ret = vmw_fence_obj_wait(fence, false, false,
VMW_FENCE_WAIT_TIMEOUT);
if (unlikely(ret != 0)) {
list_del_init(&fence->head);
- fence->signaled |= DRM_VMW_FENCE_FLAG_EXEC;
+ fence_signal(&fence->base);
INIT_LIST_HEAD(&action_list);
list_splice_init(&fence->seq_passed_actions,
&action_list);
vmw_fences_perform_actions(fman, &action_list);
- wake_up_all(&fence->queue);
}
- spin_lock_irq(&fman->lock);
-
BUG_ON(!list_empty(&fence->head));
- kref_put(&fence->kref, vmw_fence_obj_destroy_locked);
+ fence_put(&fence->base);
+ spin_lock_irq(&fman->lock);
}
- spin_unlock_irqrestore(&fman->lock, irq_flags);
+ spin_unlock_irq(&fman->lock);
}
void vmw_fence_fifo_up(struct vmw_fence_manager *fman)
@@ -716,14 +771,14 @@
timeout = jiffies;
if (time_after_eq(timeout, (unsigned long)arg->kernel_cookie)) {
- ret = ((vmw_fence_obj_signaled(fence, arg->flags)) ?
+ ret = ((vmw_fence_obj_signaled(fence)) ?
0 : -EBUSY);
goto out;
}
timeout = (unsigned long)arg->kernel_cookie - timeout;
- ret = vmw_fence_obj_wait(fence, arg->flags, arg->lazy, true, timeout);
+ ret = vmw_fence_obj_wait(fence, arg->lazy, true, timeout);
out:
ttm_base_object_unref(&base);
@@ -758,12 +813,12 @@
}
fence = &(container_of(base, struct vmw_user_fence, base)->fence);
- fman = fence->fman;
+ fman = fman_from_fence(fence);
- arg->signaled = vmw_fence_obj_signaled(fence, arg->flags);
+ arg->signaled = vmw_fence_obj_signaled(fence);
+
+ arg->signaled_flags = arg->flags;
spin_lock_irq(&fman->lock);
-
- arg->signaled_flags = fence->signaled;
arg->passed_seqno = dev_priv->last_read_seqno;
spin_unlock_irq(&fman->lock);
@@ -876,7 +931,7 @@
{
struct vmw_event_fence_action *eaction =
container_of(action, struct vmw_event_fence_action, action);
- struct vmw_fence_manager *fman = eaction->fence->fman;
+ struct vmw_fence_manager *fman = fman_from_fence(eaction->fence);
unsigned long irq_flags;
spin_lock_irqsave(&fman->lock, irq_flags);
@@ -900,7 +955,7 @@
static void vmw_fence_obj_add_action(struct vmw_fence_obj *fence,
struct vmw_fence_action *action)
{
- struct vmw_fence_manager *fman = fence->fman;
+ struct vmw_fence_manager *fman = fman_from_fence(fence);
unsigned long irq_flags;
bool run_update = false;
@@ -908,7 +963,7 @@
spin_lock_irqsave(&fman->lock, irq_flags);
fman->pending_actions[action->type]++;
- if (fence->signaled & DRM_VMW_FENCE_FLAG_EXEC) {
+ if (fence_is_signaled_locked(&fence->base)) {
struct list_head action_list;
INIT_LIST_HEAD(&action_list);
@@ -960,7 +1015,7 @@
bool interruptible)
{
struct vmw_event_fence_action *eaction;
- struct vmw_fence_manager *fman = fence->fman;
+ struct vmw_fence_manager *fman = fman_from_fence(fence);
struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
unsigned long irq_flags;
@@ -1000,7 +1055,8 @@
bool interruptible)
{
struct vmw_event_fence_pending *event;
- struct drm_device *dev = fence->fman->dev_priv->dev;
+ struct vmw_fence_manager *fman = fman_from_fence(fence);
+ struct drm_device *dev = fman->dev_priv->dev;
unsigned long irq_flags;
int ret;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
index faf2e78..26a4add 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
@@ -27,6 +27,8 @@
#ifndef _VMWGFX_FENCE_H_
+#include <linux/fence.h>
+
#define VMW_FENCE_WAIT_TIMEOUT (5*HZ)
struct vmw_private;
@@ -50,16 +52,11 @@
};
struct vmw_fence_obj {
- struct kref kref;
- u32 seqno;
+ struct fence base;
- struct vmw_fence_manager *fman;
struct list_head head;
- uint32_t signaled;
- uint32_t signal_mask;
struct list_head seq_passed_actions;
void (*destroy)(struct vmw_fence_obj *fence);
- wait_queue_head_t queue;
};
extern struct vmw_fence_manager *
@@ -67,17 +64,29 @@
extern void vmw_fence_manager_takedown(struct vmw_fence_manager *fman);
-extern void vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p);
+static inline void
+vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p)
+{
+ struct vmw_fence_obj *fence = *fence_p;
-extern struct vmw_fence_obj *
-vmw_fence_obj_reference(struct vmw_fence_obj *fence);
+ *fence_p = NULL;
+ if (fence)
+ fence_put(&fence->base);
+}
+
+static inline struct vmw_fence_obj *
+vmw_fence_obj_reference(struct vmw_fence_obj *fence)
+{
+ if (fence)
+ fence_get(&fence->base);
+ return fence;
+}
extern void vmw_fences_update(struct vmw_fence_manager *fman);
-extern bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
- uint32_t flags);
+extern bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence);
-extern int vmw_fence_obj_wait(struct vmw_fence_obj *fence, uint32_t flags,
+extern int vmw_fence_obj_wait(struct vmw_fence_obj *fence,
bool lazy,
bool interruptible, unsigned long timeout);
@@ -85,13 +94,11 @@
extern int vmw_fence_create(struct vmw_fence_manager *fman,
uint32_t seqno,
- uint32_t mask,
struct vmw_fence_obj **p_fence);
extern int vmw_user_fence_create(struct drm_file *file_priv,
struct vmw_fence_manager *fman,
uint32_t sequence,
- uint32_t mask,
struct vmw_fence_obj **p_fence,
uint32_t *p_handle);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 6ccd993..d9b4e69 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -160,16 +160,21 @@
return vmw_fifo_send_fence(dev_priv, &dummy);
}
-void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
+void vmw_fifo_ping_host_locked(struct vmw_private *dev_priv, uint32_t reason)
{
__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
- mutex_lock(&dev_priv->hw_mutex);
-
if (unlikely(ioread32(fifo_mem + SVGA_FIFO_BUSY) == 0)) {
iowrite32(1, fifo_mem + SVGA_FIFO_BUSY);
vmw_write(dev_priv, SVGA_REG_SYNC, reason);
}
+}
+
+void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
+{
+ mutex_lock(&dev_priv->hw_mutex);
+
+ vmw_fifo_ping_host_locked(dev_priv, reason);
mutex_unlock(&dev_priv->hw_mutex);
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index a432c0d..ff0e03b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -567,13 +567,18 @@
int ret;
if (flags & drm_vmw_synccpu_allow_cs) {
- struct ttm_bo_device *bdev = bo->bdev;
+ bool nonblock = !!(flags & drm_vmw_synccpu_dontblock);
+ long lret;
- spin_lock(&bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, true,
- !!(flags & drm_vmw_synccpu_dontblock));
- spin_unlock(&bdev->fence_lock);
- return ret;
+ if (nonblock)
+ return reservation_object_test_signaled_rcu(bo->resv, true) ? 0 : -EBUSY;
+
+ lret = reservation_object_wait_timeout_rcu(bo->resv, true, true, MAX_SCHEDULE_TIMEOUT);
+ if (!lret)
+ return -EBUSY;
+ else if (lret < 0)
+ return lret;
+ return 0;
}
ret = ttm_bo_synccpu_write_grab
@@ -1215,7 +1220,7 @@
INIT_LIST_HEAD(&val_list);
val_buf->bo = ttm_bo_reference(&res->backup->base);
list_add_tail(&val_buf->head, &val_list);
- ret = ttm_eu_reserve_buffers(NULL, &val_list);
+ ret = ttm_eu_reserve_buffers(NULL, &val_list, interruptible);
if (unlikely(ret != 0))
goto out_no_reserve;
@@ -1419,25 +1424,16 @@
struct vmw_fence_obj *fence)
{
struct ttm_bo_device *bdev = bo->bdev;
- struct ttm_bo_driver *driver = bdev->driver;
- struct vmw_fence_obj *old_fence_obj;
+
struct vmw_private *dev_priv =
container_of(bdev, struct vmw_private, bdev);
- if (fence == NULL)
+ if (fence == NULL) {
vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL);
- else
- driver->sync_obj_ref(fence);
-
- spin_lock(&bdev->fence_lock);
-
- old_fence_obj = bo->sync_obj;
- bo->sync_obj = fence;
-
- spin_unlock(&bdev->fence_lock);
-
- if (old_fence_obj)
- vmw_fence_obj_unreference(&old_fence_obj);
+ reservation_object_add_excl_fence(bo->resv, &fence->base);
+ fence_put(&fence->base);
+ } else
+ reservation_object_add_excl_fence(bo->resv, &fence->base);
}
/**
@@ -1475,7 +1471,6 @@
if (mem->mem_type != VMW_PL_MOB) {
struct vmw_resource *res, *n;
- struct ttm_bo_device *bdev = bo->bdev;
struct ttm_validate_buffer val_buf;
val_buf.bo = bo;
@@ -1491,9 +1486,7 @@
list_del_init(&res->mob_head);
}
- spin_lock(&bdev->fence_lock);
(void) ttm_bo_wait(bo, false, false, false);
- spin_unlock(&bdev->fence_lock);
}
}
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index e3d39c8..70b4491 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -173,7 +173,6 @@
* @lru: List head for the lru list.
* @ddestroy: List head for the delayed destroy list.
* @swap: List head for swap LRU list.
- * @sync_obj: Pointer to a synchronization object.
* @priv_flags: Flags describing buffer object internal state.
* @vma_node: Address space manager node.
* @offset: The current GPU offset, which can have different meanings
@@ -237,13 +236,9 @@
struct list_head io_reserve_lru;
/**
- * Members protected by struct buffer_object_device::fence_lock
- * In addition, setting sync_obj to anything else
- * than NULL requires bo::reserved to be held. This allows for
- * checking NULL while reserved but not holding the mentioned lock.
+ * Members protected by a bo reservation.
*/
- void *sync_obj;
unsigned long priv_flags;
struct drm_vma_offset_node vma_node;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 5c8bb56..142d752 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -312,11 +312,6 @@
* @move: Callback for a driver to hook in accelerated functions to
* move a buffer.
* If set to NULL, a potentially slow memcpy() move is used.
- * @sync_obj_signaled: See ttm_fence_api.h
- * @sync_obj_wait: See ttm_fence_api.h
- * @sync_obj_flush: See ttm_fence_api.h
- * @sync_obj_unref: See ttm_fence_api.h
- * @sync_obj_ref: See ttm_fence_api.h
*/
struct ttm_bo_driver {
@@ -418,23 +413,6 @@
int (*verify_access) (struct ttm_buffer_object *bo,
struct file *filp);
- /**
- * In case a driver writer dislikes the TTM fence objects,
- * the driver writer can replace those with sync objects of
- * his / her own. If it turns out that no driver writer is
- * using these. I suggest we remove these hooks and plug in
- * fences directly. The bo driver needs the following functionality:
- * See the corresponding functions in the fence object API
- * documentation.
- */
-
- bool (*sync_obj_signaled) (void *sync_obj);
- int (*sync_obj_wait) (void *sync_obj,
- bool lazy, bool interruptible);
- int (*sync_obj_flush) (void *sync_obj);
- void (*sync_obj_unref) (void **sync_obj);
- void *(*sync_obj_ref) (void *sync_obj);
-
/* hook to notify driver about a driver move so it
* can do tiling things */
void (*move_notify)(struct ttm_buffer_object *bo,
@@ -521,8 +499,6 @@
*
* @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
* @man: An array of mem_type_managers.
- * @fence_lock: Protects the synchronizing members on *all* bos belonging
- * to this device.
* @vma_manager: Address space manager
* lru_lock: Spinlock that protects the buffer+device lru lists and
* ddestroy lists.
@@ -542,7 +518,6 @@
struct ttm_bo_global *glob;
struct ttm_bo_driver *driver;
struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
- spinlock_t fence_lock;
/*
* Protected by internal locks.
@@ -1025,7 +1000,7 @@
* ttm_bo_move_accel_cleanup.
*
* @bo: A pointer to a struct ttm_buffer_object.
- * @sync_obj: A sync object that signals when moving is complete.
+ * @fence: A fence object that signals when moving is complete.
* @evict: This is an evict move. Don't return until the buffer is idle.
* @no_wait_gpu: Return immediately if the GPU is busy.
* @new_mem: struct ttm_mem_reg indicating where to move.
@@ -1039,7 +1014,7 @@
*/
extern int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
- void *sync_obj,
+ struct fence *fence,
bool evict, bool no_wait_gpu,
struct ttm_mem_reg *new_mem);
/**
diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h
index 16db7d0..ff11a42 100644
--- a/include/drm/ttm/ttm_execbuf_util.h
+++ b/include/drm/ttm/ttm_execbuf_util.h
@@ -39,19 +39,11 @@
*
* @head: list head for thread-private list.
* @bo: refcounted buffer object pointer.
- * @reserved: Indicates whether @bo has been reserved for validation.
- * @removed: Indicates whether @bo has been removed from lru lists.
- * @put_count: Number of outstanding references on bo::list_kref.
- * @old_sync_obj: Pointer to a sync object about to be unreferenced
*/
struct ttm_validate_buffer {
struct list_head head;
struct ttm_buffer_object *bo;
- bool reserved;
- bool removed;
- int put_count;
- void *old_sync_obj;
};
/**
@@ -73,6 +65,7 @@
* @ticket: [out] ww_acquire_ctx filled in by call, or NULL if only
* non-blocking reserves should be tried.
* @list: thread private list of ttm_validate_buffer structs.
+ * @intr: should the wait be interruptible
*
* Tries to reserve bos pointed to by the list entries for validation.
* If the function returns 0, all buffers are marked as "unfenced",
@@ -84,9 +77,9 @@
* CPU write reservations to be cleared, and for other threads to
* unreserve their buffers.
*
- * This function may return -ERESTART or -EAGAIN if the calling process
- * receives a signal while waiting. In that case, no buffers on the list
- * will be reserved upon return.
+ * If intr is set to true, this function may return -ERESTARTSYS if the
+ * calling process receives a signal while waiting. In that case, no
+ * buffers on the list will be reserved upon return.
*
* Buffers reserved by this function should be unreserved by
* a call to either ttm_eu_backoff_reservation() or
@@ -95,14 +88,14 @@
*/
extern int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
- struct list_head *list);
+ struct list_head *list, bool intr);
/**
* function ttm_eu_fence_buffer_objects.
*
* @ticket: ww_acquire_ctx from reserve call
* @list: thread private list of ttm_validate_buffer structs.
- * @sync_obj: The new sync object for the buffers.
+ * @fence: The new exclusive fence for the buffers.
*
* This function should be called when command submission is complete, and
* it will add a new sync object to bos pointed to by entries on @list.
@@ -111,6 +104,7 @@
*/
extern void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
- struct list_head *list, void *sync_obj);
+ struct list_head *list,
+ struct fence *fence);
#endif