Eric Anholt | c8b75bc | 2015-03-02 13:01:12 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright © 2015 Broadcom |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or modify |
| 5 | * it under the terms of the GNU General Public License version 2 as |
| 6 | * published by the Free Software Foundation. |
| 7 | */ |
| 8 | |
| 9 | /* DOC: VC4 GEM BO management support. |
| 10 | * |
| 11 | * The VC4 GPU architecture (both scanout and rendering) has direct |
| 12 | * access to system memory with no MMU in between. To support it, we |
| 13 | * use the GEM CMA helper functions to allocate contiguous ranges of |
| 14 | * physical memory for our BOs. |
Eric Anholt | c826a6e | 2015-10-09 20:25:07 -0700 | [diff] [blame^] | 15 | * |
| 16 | * Since the CMA allocator is very slow, we keep a cache of recently |
| 17 | * freed BOs around so that the kernel's allocation of objects for 3D |
| 18 | * rendering can return quickly. |
Eric Anholt | c8b75bc | 2015-03-02 13:01:12 -0800 | [diff] [blame] | 19 | */ |
| 20 | |
| 21 | #include "vc4_drv.h" |
| 22 | |
Eric Anholt | c826a6e | 2015-10-09 20:25:07 -0700 | [diff] [blame^] | 23 | static void vc4_bo_stats_dump(struct vc4_dev *vc4) |
Eric Anholt | c8b75bc | 2015-03-02 13:01:12 -0800 | [diff] [blame] | 24 | { |
Eric Anholt | c826a6e | 2015-10-09 20:25:07 -0700 | [diff] [blame^] | 25 | DRM_INFO("num bos allocated: %d\n", |
| 26 | vc4->bo_stats.num_allocated); |
| 27 | DRM_INFO("size bos allocated: %dkb\n", |
| 28 | vc4->bo_stats.size_allocated / 1024); |
| 29 | DRM_INFO("num bos used: %d\n", |
| 30 | vc4->bo_stats.num_allocated - vc4->bo_stats.num_cached); |
| 31 | DRM_INFO("size bos used: %dkb\n", |
| 32 | (vc4->bo_stats.size_allocated - |
| 33 | vc4->bo_stats.size_cached) / 1024); |
| 34 | DRM_INFO("num bos cached: %d\n", |
| 35 | vc4->bo_stats.num_cached); |
| 36 | DRM_INFO("size bos cached: %dkb\n", |
| 37 | vc4->bo_stats.size_cached / 1024); |
| 38 | } |
| 39 | |
| 40 | #ifdef CONFIG_DEBUG_FS |
| 41 | int vc4_bo_stats_debugfs(struct seq_file *m, void *unused) |
| 42 | { |
| 43 | struct drm_info_node *node = (struct drm_info_node *)m->private; |
| 44 | struct drm_device *dev = node->minor->dev; |
| 45 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
| 46 | struct vc4_bo_stats stats; |
| 47 | |
| 48 | /* Take a snapshot of the current stats with the lock held. */ |
| 49 | mutex_lock(&vc4->bo_lock); |
| 50 | stats = vc4->bo_stats; |
| 51 | mutex_unlock(&vc4->bo_lock); |
| 52 | |
| 53 | seq_printf(m, "num bos allocated: %d\n", |
| 54 | stats.num_allocated); |
| 55 | seq_printf(m, "size bos allocated: %dkb\n", |
| 56 | stats.size_allocated / 1024); |
| 57 | seq_printf(m, "num bos used: %d\n", |
| 58 | stats.num_allocated - stats.num_cached); |
| 59 | seq_printf(m, "size bos used: %dkb\n", |
| 60 | (stats.size_allocated - stats.size_cached) / 1024); |
| 61 | seq_printf(m, "num bos cached: %d\n", |
| 62 | stats.num_cached); |
| 63 | seq_printf(m, "size bos cached: %dkb\n", |
| 64 | stats.size_cached / 1024); |
| 65 | |
| 66 | return 0; |
| 67 | } |
| 68 | #endif |
| 69 | |
| 70 | static uint32_t bo_page_index(size_t size) |
| 71 | { |
| 72 | return (size / PAGE_SIZE) - 1; |
| 73 | } |
| 74 | |
| 75 | /* Must be called with bo_lock held. */ |
| 76 | static void vc4_bo_destroy(struct vc4_bo *bo) |
| 77 | { |
| 78 | struct drm_gem_object *obj = &bo->base.base; |
| 79 | struct vc4_dev *vc4 = to_vc4_dev(obj->dev); |
| 80 | |
| 81 | vc4->bo_stats.num_allocated--; |
| 82 | vc4->bo_stats.size_allocated -= obj->size; |
| 83 | drm_gem_cma_free_object(obj); |
| 84 | } |
| 85 | |
| 86 | /* Must be called with bo_lock held. */ |
| 87 | static void vc4_bo_remove_from_cache(struct vc4_bo *bo) |
| 88 | { |
| 89 | struct drm_gem_object *obj = &bo->base.base; |
| 90 | struct vc4_dev *vc4 = to_vc4_dev(obj->dev); |
| 91 | |
| 92 | vc4->bo_stats.num_cached--; |
| 93 | vc4->bo_stats.size_cached -= obj->size; |
| 94 | |
| 95 | list_del(&bo->unref_head); |
| 96 | list_del(&bo->size_head); |
| 97 | } |
| 98 | |
| 99 | static struct list_head *vc4_get_cache_list_for_size(struct drm_device *dev, |
| 100 | size_t size) |
| 101 | { |
| 102 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
| 103 | uint32_t page_index = bo_page_index(size); |
| 104 | |
| 105 | if (vc4->bo_cache.size_list_size <= page_index) { |
| 106 | uint32_t new_size = max(vc4->bo_cache.size_list_size * 2, |
| 107 | page_index + 1); |
| 108 | struct list_head *new_list; |
| 109 | uint32_t i; |
| 110 | |
| 111 | new_list = kmalloc_array(new_size, sizeof(struct list_head), |
| 112 | GFP_KERNEL); |
| 113 | if (!new_list) |
| 114 | return NULL; |
| 115 | |
| 116 | /* Rebase the old cached BO lists to their new list |
| 117 | * head locations. |
| 118 | */ |
| 119 | for (i = 0; i < vc4->bo_cache.size_list_size; i++) { |
| 120 | struct list_head *old_list = |
| 121 | &vc4->bo_cache.size_list[i]; |
| 122 | |
| 123 | if (list_empty(old_list)) |
| 124 | INIT_LIST_HEAD(&new_list[i]); |
| 125 | else |
| 126 | list_replace(old_list, &new_list[i]); |
| 127 | } |
| 128 | /* And initialize the brand new BO list heads. */ |
| 129 | for (i = vc4->bo_cache.size_list_size; i < new_size; i++) |
| 130 | INIT_LIST_HEAD(&new_list[i]); |
| 131 | |
| 132 | kfree(vc4->bo_cache.size_list); |
| 133 | vc4->bo_cache.size_list = new_list; |
| 134 | vc4->bo_cache.size_list_size = new_size; |
| 135 | } |
| 136 | |
| 137 | return &vc4->bo_cache.size_list[page_index]; |
| 138 | } |
| 139 | |
| 140 | void vc4_bo_cache_purge(struct drm_device *dev) |
| 141 | { |
| 142 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
| 143 | |
| 144 | mutex_lock(&vc4->bo_lock); |
| 145 | while (!list_empty(&vc4->bo_cache.time_list)) { |
| 146 | struct vc4_bo *bo = list_last_entry(&vc4->bo_cache.time_list, |
| 147 | struct vc4_bo, unref_head); |
| 148 | vc4_bo_remove_from_cache(bo); |
| 149 | vc4_bo_destroy(bo); |
| 150 | } |
| 151 | mutex_unlock(&vc4->bo_lock); |
| 152 | } |
| 153 | |
| 154 | static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev, |
| 155 | uint32_t size) |
| 156 | { |
| 157 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
| 158 | uint32_t page_index = bo_page_index(size); |
| 159 | struct vc4_bo *bo = NULL; |
| 160 | |
| 161 | size = roundup(size, PAGE_SIZE); |
| 162 | |
| 163 | mutex_lock(&vc4->bo_lock); |
| 164 | if (page_index >= vc4->bo_cache.size_list_size) |
| 165 | goto out; |
| 166 | |
| 167 | if (list_empty(&vc4->bo_cache.size_list[page_index])) |
| 168 | goto out; |
| 169 | |
| 170 | bo = list_first_entry(&vc4->bo_cache.size_list[page_index], |
| 171 | struct vc4_bo, size_head); |
| 172 | vc4_bo_remove_from_cache(bo); |
| 173 | kref_init(&bo->base.base.refcount); |
| 174 | |
| 175 | out: |
| 176 | mutex_unlock(&vc4->bo_lock); |
| 177 | return bo; |
| 178 | } |
| 179 | |
| 180 | /** |
| 181 | * vc4_gem_create_object - Implementation of driver->gem_create_object. |
| 182 | * |
| 183 | * This lets the CMA helpers allocate object structs for us, and keep |
| 184 | * our BO stats correct. |
| 185 | */ |
| 186 | struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) |
| 187 | { |
| 188 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
| 189 | struct vc4_bo *bo; |
| 190 | |
| 191 | bo = kzalloc(sizeof(*bo), GFP_KERNEL); |
| 192 | if (!bo) |
| 193 | return ERR_PTR(-ENOMEM); |
| 194 | |
| 195 | mutex_lock(&vc4->bo_lock); |
| 196 | vc4->bo_stats.num_allocated++; |
| 197 | vc4->bo_stats.size_allocated += size; |
| 198 | mutex_unlock(&vc4->bo_lock); |
| 199 | |
| 200 | return &bo->base.base; |
| 201 | } |
| 202 | |
| 203 | struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, |
| 204 | bool from_cache) |
| 205 | { |
| 206 | size_t size = roundup(unaligned_size, PAGE_SIZE); |
| 207 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
Eric Anholt | c8b75bc | 2015-03-02 13:01:12 -0800 | [diff] [blame] | 208 | struct drm_gem_cma_object *cma_obj; |
| 209 | |
Eric Anholt | c826a6e | 2015-10-09 20:25:07 -0700 | [diff] [blame^] | 210 | if (size == 0) |
Eric Anholt | c8b75bc | 2015-03-02 13:01:12 -0800 | [diff] [blame] | 211 | return NULL; |
Eric Anholt | c826a6e | 2015-10-09 20:25:07 -0700 | [diff] [blame^] | 212 | |
| 213 | /* First, try to get a vc4_bo from the kernel BO cache. */ |
| 214 | if (from_cache) { |
| 215 | struct vc4_bo *bo = vc4_bo_get_from_cache(dev, size); |
| 216 | |
| 217 | if (bo) |
| 218 | return bo; |
| 219 | } |
| 220 | |
| 221 | cma_obj = drm_gem_cma_create(dev, size); |
| 222 | if (IS_ERR(cma_obj)) { |
| 223 | /* |
| 224 | * If we've run out of CMA memory, kill the cache of |
| 225 | * CMA allocations we've got laying around and try again. |
| 226 | */ |
| 227 | vc4_bo_cache_purge(dev); |
| 228 | |
| 229 | cma_obj = drm_gem_cma_create(dev, size); |
| 230 | if (IS_ERR(cma_obj)) { |
| 231 | DRM_ERROR("Failed to allocate from CMA:\n"); |
| 232 | vc4_bo_stats_dump(vc4); |
| 233 | return NULL; |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | return to_vc4_bo(&cma_obj->base); |
Eric Anholt | c8b75bc | 2015-03-02 13:01:12 -0800 | [diff] [blame] | 238 | } |
| 239 | |
| 240 | int vc4_dumb_create(struct drm_file *file_priv, |
| 241 | struct drm_device *dev, |
| 242 | struct drm_mode_create_dumb *args) |
| 243 | { |
| 244 | int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8); |
| 245 | struct vc4_bo *bo = NULL; |
| 246 | int ret; |
| 247 | |
| 248 | if (args->pitch < min_pitch) |
| 249 | args->pitch = min_pitch; |
| 250 | |
| 251 | if (args->size < args->pitch * args->height) |
| 252 | args->size = args->pitch * args->height; |
| 253 | |
Eric Anholt | c826a6e | 2015-10-09 20:25:07 -0700 | [diff] [blame^] | 254 | bo = vc4_bo_create(dev, args->size, false); |
Eric Anholt | c8b75bc | 2015-03-02 13:01:12 -0800 | [diff] [blame] | 255 | if (!bo) |
| 256 | return -ENOMEM; |
| 257 | |
| 258 | ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle); |
| 259 | drm_gem_object_unreference_unlocked(&bo->base.base); |
| 260 | |
| 261 | return ret; |
| 262 | } |
Eric Anholt | c826a6e | 2015-10-09 20:25:07 -0700 | [diff] [blame^] | 263 | |
| 264 | /* Must be called with bo_lock held. */ |
| 265 | static void vc4_bo_cache_free_old(struct drm_device *dev) |
| 266 | { |
| 267 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
| 268 | unsigned long expire_time = jiffies - msecs_to_jiffies(1000); |
| 269 | |
| 270 | while (!list_empty(&vc4->bo_cache.time_list)) { |
| 271 | struct vc4_bo *bo = list_last_entry(&vc4->bo_cache.time_list, |
| 272 | struct vc4_bo, unref_head); |
| 273 | if (time_before(expire_time, bo->free_time)) { |
| 274 | mod_timer(&vc4->bo_cache.time_timer, |
| 275 | round_jiffies_up(jiffies + |
| 276 | msecs_to_jiffies(1000))); |
| 277 | return; |
| 278 | } |
| 279 | |
| 280 | vc4_bo_remove_from_cache(bo); |
| 281 | vc4_bo_destroy(bo); |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | /* Called on the last userspace/kernel unreference of the BO. Returns |
| 286 | * it to the BO cache if possible, otherwise frees it. |
| 287 | * |
| 288 | * Note that this is called with the struct_mutex held. |
| 289 | */ |
| 290 | void vc4_free_object(struct drm_gem_object *gem_bo) |
| 291 | { |
| 292 | struct drm_device *dev = gem_bo->dev; |
| 293 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
| 294 | struct vc4_bo *bo = to_vc4_bo(gem_bo); |
| 295 | struct list_head *cache_list; |
| 296 | |
| 297 | mutex_lock(&vc4->bo_lock); |
| 298 | /* If the object references someone else's memory, we can't cache it. |
| 299 | */ |
| 300 | if (gem_bo->import_attach) { |
| 301 | vc4_bo_destroy(bo); |
| 302 | goto out; |
| 303 | } |
| 304 | |
| 305 | /* Don't cache if it was publicly named. */ |
| 306 | if (gem_bo->name) { |
| 307 | vc4_bo_destroy(bo); |
| 308 | goto out; |
| 309 | } |
| 310 | |
| 311 | cache_list = vc4_get_cache_list_for_size(dev, gem_bo->size); |
| 312 | if (!cache_list) { |
| 313 | vc4_bo_destroy(bo); |
| 314 | goto out; |
| 315 | } |
| 316 | |
| 317 | bo->free_time = jiffies; |
| 318 | list_add(&bo->size_head, cache_list); |
| 319 | list_add(&bo->unref_head, &vc4->bo_cache.time_list); |
| 320 | |
| 321 | vc4->bo_stats.num_cached++; |
| 322 | vc4->bo_stats.size_cached += gem_bo->size; |
| 323 | |
| 324 | vc4_bo_cache_free_old(dev); |
| 325 | |
| 326 | out: |
| 327 | mutex_unlock(&vc4->bo_lock); |
| 328 | } |
| 329 | |
| 330 | static void vc4_bo_cache_time_work(struct work_struct *work) |
| 331 | { |
| 332 | struct vc4_dev *vc4 = |
| 333 | container_of(work, struct vc4_dev, bo_cache.time_work); |
| 334 | struct drm_device *dev = vc4->dev; |
| 335 | |
| 336 | mutex_lock(&vc4->bo_lock); |
| 337 | vc4_bo_cache_free_old(dev); |
| 338 | mutex_unlock(&vc4->bo_lock); |
| 339 | } |
| 340 | |
| 341 | static void vc4_bo_cache_time_timer(unsigned long data) |
| 342 | { |
| 343 | struct drm_device *dev = (struct drm_device *)data; |
| 344 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
| 345 | |
| 346 | schedule_work(&vc4->bo_cache.time_work); |
| 347 | } |
| 348 | |
| 349 | void vc4_bo_cache_init(struct drm_device *dev) |
| 350 | { |
| 351 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
| 352 | |
| 353 | mutex_init(&vc4->bo_lock); |
| 354 | |
| 355 | INIT_LIST_HEAD(&vc4->bo_cache.time_list); |
| 356 | |
| 357 | INIT_WORK(&vc4->bo_cache.time_work, vc4_bo_cache_time_work); |
| 358 | setup_timer(&vc4->bo_cache.time_timer, |
| 359 | vc4_bo_cache_time_timer, |
| 360 | (unsigned long)dev); |
| 361 | } |
| 362 | |
| 363 | void vc4_bo_cache_destroy(struct drm_device *dev) |
| 364 | { |
| 365 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
| 366 | |
| 367 | del_timer(&vc4->bo_cache.time_timer); |
| 368 | cancel_work_sync(&vc4->bo_cache.time_work); |
| 369 | |
| 370 | vc4_bo_cache_purge(dev); |
| 371 | |
| 372 | if (vc4->bo_stats.num_allocated) { |
| 373 | DRM_ERROR("Destroying BO cache while BOs still allocated:\n"); |
| 374 | vc4_bo_stats_dump(vc4); |
| 375 | } |
| 376 | } |