David Howells | c73be61 | 2020-01-14 17:07:11 +0000 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* Watch queue and general notification mechanism, built on pipes |
| 3 | * |
| 4 | * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. |
| 5 | * Written by David Howells (dhowells@redhat.com) |
| 6 | * |
| 7 | * See Documentation/watch_queue.rst |
| 8 | */ |
| 9 | |
| 10 | #define pr_fmt(fmt) "watchq: " fmt |
| 11 | #include <linux/module.h> |
| 12 | #include <linux/init.h> |
| 13 | #include <linux/sched.h> |
| 14 | #include <linux/slab.h> |
| 15 | #include <linux/printk.h> |
| 16 | #include <linux/miscdevice.h> |
| 17 | #include <linux/fs.h> |
| 18 | #include <linux/mm.h> |
| 19 | #include <linux/pagemap.h> |
| 20 | #include <linux/poll.h> |
| 21 | #include <linux/uaccess.h> |
| 22 | #include <linux/vmalloc.h> |
| 23 | #include <linux/file.h> |
| 24 | #include <linux/security.h> |
| 25 | #include <linux/cred.h> |
| 26 | #include <linux/sched/signal.h> |
| 27 | #include <linux/watch_queue.h> |
| 28 | #include <linux/pipe_fs_i.h> |
| 29 | |
| 30 | MODULE_DESCRIPTION("Watch queue"); |
| 31 | MODULE_AUTHOR("Red Hat, Inc."); |
| 32 | MODULE_LICENSE("GPL"); |
| 33 | |
| 34 | #define WATCH_QUEUE_NOTE_SIZE 128 |
| 35 | #define WATCH_QUEUE_NOTES_PER_PAGE (PAGE_SIZE / WATCH_QUEUE_NOTE_SIZE) |
| 36 | |
| 37 | static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe, |
| 38 | struct pipe_buffer *buf) |
| 39 | { |
| 40 | struct watch_queue *wqueue = (struct watch_queue *)buf->private; |
| 41 | struct page *page; |
| 42 | unsigned int bit; |
| 43 | |
| 44 | /* We need to work out which note within the page this refers to, but |
| 45 | * the note might have been maximum size, so merely ANDing the offset |
| 46 | * off doesn't work. OTOH, the note must've been more than zero size. |
| 47 | */ |
| 48 | bit = buf->offset + buf->len; |
| 49 | if ((bit & (WATCH_QUEUE_NOTE_SIZE - 1)) == 0) |
| 50 | bit -= WATCH_QUEUE_NOTE_SIZE; |
| 51 | bit /= WATCH_QUEUE_NOTE_SIZE; |
| 52 | |
| 53 | page = buf->page; |
| 54 | bit += page->index; |
| 55 | |
| 56 | set_bit(bit, wqueue->notes_bitmap); |
| 57 | } |
| 58 | |
Linus Torvalds | 6c32978 | 2020-06-13 09:56:21 -0700 | [diff] [blame] | 59 | // No try_steal function => no stealing |
| 60 | #define watch_queue_pipe_buf_try_steal NULL |
David Howells | c73be61 | 2020-01-14 17:07:11 +0000 | [diff] [blame] | 61 | |
| 62 | /* New data written to a pipe may be appended to a buffer with this type. */ |
| 63 | static const struct pipe_buf_operations watch_queue_pipe_buf_ops = { |
David Howells | c73be61 | 2020-01-14 17:07:11 +0000 | [diff] [blame] | 64 | .release = watch_queue_pipe_buf_release, |
Linus Torvalds | 6c32978 | 2020-06-13 09:56:21 -0700 | [diff] [blame] | 65 | .try_steal = watch_queue_pipe_buf_try_steal, |
David Howells | c73be61 | 2020-01-14 17:07:11 +0000 | [diff] [blame] | 66 | .get = generic_pipe_buf_get, |
| 67 | }; |
| 68 | |
| 69 | /* |
| 70 | * Post a notification to a watch queue. |
| 71 | */ |
| 72 | static bool post_one_notification(struct watch_queue *wqueue, |
| 73 | struct watch_notification *n) |
| 74 | { |
| 75 | void *p; |
| 76 | struct pipe_inode_info *pipe = wqueue->pipe; |
| 77 | struct pipe_buffer *buf; |
| 78 | struct page *page; |
| 79 | unsigned int head, tail, mask, note, offset, len; |
| 80 | bool done = false; |
| 81 | |
| 82 | if (!pipe) |
| 83 | return false; |
| 84 | |
| 85 | spin_lock_irq(&pipe->rd_wait.lock); |
| 86 | |
| 87 | if (wqueue->defunct) |
| 88 | goto out; |
| 89 | |
| 90 | mask = pipe->ring_size - 1; |
| 91 | head = pipe->head; |
| 92 | tail = pipe->tail; |
| 93 | if (pipe_full(head, tail, pipe->ring_size)) |
| 94 | goto lost; |
| 95 | |
| 96 | note = find_first_bit(wqueue->notes_bitmap, wqueue->nr_notes); |
| 97 | if (note >= wqueue->nr_notes) |
| 98 | goto lost; |
| 99 | |
| 100 | page = wqueue->notes[note / WATCH_QUEUE_NOTES_PER_PAGE]; |
| 101 | offset = note % WATCH_QUEUE_NOTES_PER_PAGE * WATCH_QUEUE_NOTE_SIZE; |
| 102 | get_page(page); |
| 103 | len = n->info & WATCH_INFO_LENGTH; |
| 104 | p = kmap_atomic(page); |
| 105 | memcpy(p + offset, n, len); |
| 106 | kunmap_atomic(p); |
| 107 | |
| 108 | buf = &pipe->bufs[head & mask]; |
| 109 | buf->page = page; |
| 110 | buf->private = (unsigned long)wqueue; |
| 111 | buf->ops = &watch_queue_pipe_buf_ops; |
| 112 | buf->offset = offset; |
| 113 | buf->len = len; |
David Howells | 8cfba76 | 2020-01-14 17:07:11 +0000 | [diff] [blame] | 114 | buf->flags = PIPE_BUF_FLAG_WHOLE; |
David Howells | c73be61 | 2020-01-14 17:07:11 +0000 | [diff] [blame] | 115 | pipe->head = head + 1; |
| 116 | |
| 117 | if (!test_and_clear_bit(note, wqueue->notes_bitmap)) { |
| 118 | spin_unlock_irq(&pipe->rd_wait.lock); |
| 119 | BUG(); |
| 120 | } |
| 121 | wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); |
| 122 | done = true; |
| 123 | |
| 124 | out: |
| 125 | spin_unlock_irq(&pipe->rd_wait.lock); |
| 126 | if (done) |
| 127 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); |
| 128 | return done; |
| 129 | |
| 130 | lost: |
David Howells | e7d553d | 2020-01-14 17:07:12 +0000 | [diff] [blame] | 131 | buf = &pipe->bufs[(head - 1) & mask]; |
| 132 | buf->flags |= PIPE_BUF_FLAG_LOSS; |
David Howells | c73be61 | 2020-01-14 17:07:11 +0000 | [diff] [blame] | 133 | goto out; |
| 134 | } |
| 135 | |
| 136 | /* |
| 137 | * Apply filter rules to a notification. |
| 138 | */ |
| 139 | static bool filter_watch_notification(const struct watch_filter *wf, |
| 140 | const struct watch_notification *n) |
| 141 | { |
| 142 | const struct watch_type_filter *wt; |
| 143 | unsigned int st_bits = sizeof(wt->subtype_filter[0]) * 8; |
| 144 | unsigned int st_index = n->subtype / st_bits; |
| 145 | unsigned int st_bit = 1U << (n->subtype % st_bits); |
| 146 | int i; |
| 147 | |
| 148 | if (!test_bit(n->type, wf->type_filter)) |
| 149 | return false; |
| 150 | |
| 151 | for (i = 0; i < wf->nr_filters; i++) { |
| 152 | wt = &wf->filters[i]; |
| 153 | if (n->type == wt->type && |
| 154 | (wt->subtype_filter[st_index] & st_bit) && |
| 155 | (n->info & wt->info_mask) == wt->info_filter) |
| 156 | return true; |
| 157 | } |
| 158 | |
| 159 | return false; /* If there is a filter, the default is to reject. */ |
| 160 | } |
| 161 | |
| 162 | /** |
| 163 | * __post_watch_notification - Post an event notification |
| 164 | * @wlist: The watch list to post the event to. |
| 165 | * @n: The notification record to post. |
| 166 | * @cred: The creds of the process that triggered the notification. |
| 167 | * @id: The ID to match on the watch. |
| 168 | * |
| 169 | * Post a notification of an event into a set of watch queues and let the users |
| 170 | * know. |
| 171 | * |
| 172 | * The size of the notification should be set in n->info & WATCH_INFO_LENGTH and |
| 173 | * should be in units of sizeof(*n). |
| 174 | */ |
| 175 | void __post_watch_notification(struct watch_list *wlist, |
| 176 | struct watch_notification *n, |
| 177 | const struct cred *cred, |
| 178 | u64 id) |
| 179 | { |
| 180 | const struct watch_filter *wf; |
| 181 | struct watch_queue *wqueue; |
| 182 | struct watch *watch; |
| 183 | |
| 184 | if (((n->info & WATCH_INFO_LENGTH) >> WATCH_INFO_LENGTH__SHIFT) == 0) { |
| 185 | WARN_ON(1); |
| 186 | return; |
| 187 | } |
| 188 | |
| 189 | rcu_read_lock(); |
| 190 | |
| 191 | hlist_for_each_entry_rcu(watch, &wlist->watchers, list_node) { |
| 192 | if (watch->id != id) |
| 193 | continue; |
| 194 | n->info &= ~WATCH_INFO_ID; |
| 195 | n->info |= watch->info_id; |
| 196 | |
| 197 | wqueue = rcu_dereference(watch->queue); |
| 198 | wf = rcu_dereference(wqueue->filter); |
| 199 | if (wf && !filter_watch_notification(wf, n)) |
| 200 | continue; |
| 201 | |
| 202 | if (security_post_notification(watch->cred, cred, n) < 0) |
| 203 | continue; |
| 204 | |
| 205 | post_one_notification(wqueue, n); |
| 206 | } |
| 207 | |
| 208 | rcu_read_unlock(); |
| 209 | } |
| 210 | EXPORT_SYMBOL(__post_watch_notification); |
| 211 | |
| 212 | /* |
| 213 | * Allocate sufficient pages to preallocation for the requested number of |
| 214 | * notifications. |
| 215 | */ |
| 216 | long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes) |
| 217 | { |
| 218 | struct watch_queue *wqueue = pipe->watch_queue; |
| 219 | struct page **pages; |
| 220 | unsigned long *bitmap; |
| 221 | unsigned long user_bufs; |
| 222 | unsigned int bmsize; |
| 223 | int ret, i, nr_pages; |
| 224 | |
| 225 | if (!wqueue) |
| 226 | return -ENODEV; |
| 227 | if (wqueue->notes) |
| 228 | return -EBUSY; |
| 229 | |
| 230 | if (nr_notes < 1 || |
| 231 | nr_notes > 512) /* TODO: choose a better hard limit */ |
| 232 | return -EINVAL; |
| 233 | |
| 234 | nr_pages = (nr_notes + WATCH_QUEUE_NOTES_PER_PAGE - 1); |
| 235 | nr_pages /= WATCH_QUEUE_NOTES_PER_PAGE; |
| 236 | user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_pages); |
| 237 | |
| 238 | if (nr_pages > pipe->max_usage && |
| 239 | (too_many_pipe_buffers_hard(user_bufs) || |
| 240 | too_many_pipe_buffers_soft(user_bufs)) && |
| 241 | pipe_is_unprivileged_user()) { |
| 242 | ret = -EPERM; |
| 243 | goto error; |
| 244 | } |
| 245 | |
| 246 | ret = pipe_resize_ring(pipe, nr_notes); |
| 247 | if (ret < 0) |
| 248 | goto error; |
| 249 | |
| 250 | pages = kcalloc(sizeof(struct page *), nr_pages, GFP_KERNEL); |
| 251 | if (!pages) |
| 252 | goto error; |
| 253 | |
| 254 | for (i = 0; i < nr_pages; i++) { |
| 255 | pages[i] = alloc_page(GFP_KERNEL); |
| 256 | if (!pages[i]) |
| 257 | goto error_p; |
| 258 | pages[i]->index = i * WATCH_QUEUE_NOTES_PER_PAGE; |
| 259 | } |
| 260 | |
| 261 | bmsize = (nr_notes + BITS_PER_LONG - 1) / BITS_PER_LONG; |
| 262 | bmsize *= sizeof(unsigned long); |
| 263 | bitmap = kmalloc(bmsize, GFP_KERNEL); |
| 264 | if (!bitmap) |
| 265 | goto error_p; |
| 266 | |
| 267 | memset(bitmap, 0xff, bmsize); |
| 268 | wqueue->notes = pages; |
| 269 | wqueue->notes_bitmap = bitmap; |
| 270 | wqueue->nr_pages = nr_pages; |
| 271 | wqueue->nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE; |
| 272 | return 0; |
| 273 | |
| 274 | error_p: |
| 275 | for (i = 0; i < nr_pages; i++) |
| 276 | __free_page(pages[i]); |
| 277 | kfree(pages); |
| 278 | error: |
| 279 | (void) account_pipe_buffers(pipe->user, nr_pages, pipe->nr_accounted); |
| 280 | return ret; |
| 281 | } |
| 282 | |
| 283 | /* |
| 284 | * Set the filter on a watch queue. |
| 285 | */ |
| 286 | long watch_queue_set_filter(struct pipe_inode_info *pipe, |
| 287 | struct watch_notification_filter __user *_filter) |
| 288 | { |
| 289 | struct watch_notification_type_filter *tf; |
| 290 | struct watch_notification_filter filter; |
| 291 | struct watch_type_filter *q; |
| 292 | struct watch_filter *wfilter; |
| 293 | struct watch_queue *wqueue = pipe->watch_queue; |
| 294 | int ret, nr_filter = 0, i; |
| 295 | |
| 296 | if (!wqueue) |
| 297 | return -ENODEV; |
| 298 | |
| 299 | if (!_filter) { |
| 300 | /* Remove the old filter */ |
| 301 | wfilter = NULL; |
| 302 | goto set; |
| 303 | } |
| 304 | |
| 305 | /* Grab the user's filter specification */ |
| 306 | if (copy_from_user(&filter, _filter, sizeof(filter)) != 0) |
| 307 | return -EFAULT; |
| 308 | if (filter.nr_filters == 0 || |
| 309 | filter.nr_filters > 16 || |
| 310 | filter.__reserved != 0) |
| 311 | return -EINVAL; |
| 312 | |
| 313 | tf = memdup_user(_filter->filters, filter.nr_filters * sizeof(*tf)); |
| 314 | if (IS_ERR(tf)) |
| 315 | return PTR_ERR(tf); |
| 316 | |
| 317 | ret = -EINVAL; |
| 318 | for (i = 0; i < filter.nr_filters; i++) { |
| 319 | if ((tf[i].info_filter & ~tf[i].info_mask) || |
| 320 | tf[i].info_mask & WATCH_INFO_LENGTH) |
| 321 | goto err_filter; |
| 322 | /* Ignore any unknown types */ |
| 323 | if (tf[i].type >= sizeof(wfilter->type_filter) * 8) |
| 324 | continue; |
| 325 | nr_filter++; |
| 326 | } |
| 327 | |
| 328 | /* Now we need to build the internal filter from only the relevant |
| 329 | * user-specified filters. |
| 330 | */ |
| 331 | ret = -ENOMEM; |
| 332 | wfilter = kzalloc(struct_size(wfilter, filters, nr_filter), GFP_KERNEL); |
| 333 | if (!wfilter) |
| 334 | goto err_filter; |
| 335 | wfilter->nr_filters = nr_filter; |
| 336 | |
| 337 | q = wfilter->filters; |
| 338 | for (i = 0; i < filter.nr_filters; i++) { |
| 339 | if (tf[i].type >= sizeof(wfilter->type_filter) * BITS_PER_LONG) |
| 340 | continue; |
| 341 | |
| 342 | q->type = tf[i].type; |
| 343 | q->info_filter = tf[i].info_filter; |
| 344 | q->info_mask = tf[i].info_mask; |
| 345 | q->subtype_filter[0] = tf[i].subtype_filter[0]; |
| 346 | __set_bit(q->type, wfilter->type_filter); |
| 347 | q++; |
| 348 | } |
| 349 | |
| 350 | kfree(tf); |
| 351 | set: |
| 352 | pipe_lock(pipe); |
| 353 | wfilter = rcu_replace_pointer(wqueue->filter, wfilter, |
| 354 | lockdep_is_held(&pipe->mutex)); |
| 355 | pipe_unlock(pipe); |
| 356 | if (wfilter) |
| 357 | kfree_rcu(wfilter, rcu); |
| 358 | return 0; |
| 359 | |
| 360 | err_filter: |
| 361 | kfree(tf); |
| 362 | return ret; |
| 363 | } |
| 364 | |
| 365 | static void __put_watch_queue(struct kref *kref) |
| 366 | { |
| 367 | struct watch_queue *wqueue = |
| 368 | container_of(kref, struct watch_queue, usage); |
| 369 | struct watch_filter *wfilter; |
| 370 | int i; |
| 371 | |
| 372 | for (i = 0; i < wqueue->nr_pages; i++) |
| 373 | __free_page(wqueue->notes[i]); |
| 374 | |
| 375 | wfilter = rcu_access_pointer(wqueue->filter); |
| 376 | if (wfilter) |
| 377 | kfree_rcu(wfilter, rcu); |
| 378 | kfree_rcu(wqueue, rcu); |
| 379 | } |
| 380 | |
| 381 | /** |
| 382 | * put_watch_queue - Dispose of a ref on a watchqueue. |
| 383 | * @wqueue: The watch queue to unref. |
| 384 | */ |
| 385 | void put_watch_queue(struct watch_queue *wqueue) |
| 386 | { |
| 387 | kref_put(&wqueue->usage, __put_watch_queue); |
| 388 | } |
| 389 | EXPORT_SYMBOL(put_watch_queue); |
| 390 | |
| 391 | static void free_watch(struct rcu_head *rcu) |
| 392 | { |
| 393 | struct watch *watch = container_of(rcu, struct watch, rcu); |
| 394 | |
| 395 | put_watch_queue(rcu_access_pointer(watch->queue)); |
David Howells | 29e44f4 | 2020-08-17 11:07:28 +0100 | [diff] [blame] | 396 | atomic_dec(&watch->cred->user->nr_watches); |
David Howells | c73be61 | 2020-01-14 17:07:11 +0000 | [diff] [blame] | 397 | put_cred(watch->cred); |
| 398 | } |
| 399 | |
| 400 | static void __put_watch(struct kref *kref) |
| 401 | { |
| 402 | struct watch *watch = container_of(kref, struct watch, usage); |
| 403 | |
| 404 | call_rcu(&watch->rcu, free_watch); |
| 405 | } |
| 406 | |
| 407 | /* |
| 408 | * Discard a watch. |
| 409 | */ |
| 410 | static void put_watch(struct watch *watch) |
| 411 | { |
| 412 | kref_put(&watch->usage, __put_watch); |
| 413 | } |
| 414 | |
| 415 | /** |
| 416 | * init_watch_queue - Initialise a watch |
| 417 | * @watch: The watch to initialise. |
| 418 | * @wqueue: The queue to assign. |
| 419 | * |
| 420 | * Initialise a watch and set the watch queue. |
| 421 | */ |
| 422 | void init_watch(struct watch *watch, struct watch_queue *wqueue) |
| 423 | { |
| 424 | kref_init(&watch->usage); |
| 425 | INIT_HLIST_NODE(&watch->list_node); |
| 426 | INIT_HLIST_NODE(&watch->queue_node); |
| 427 | rcu_assign_pointer(watch->queue, wqueue); |
| 428 | } |
| 429 | |
| 430 | /** |
| 431 | * add_watch_to_object - Add a watch on an object to a watch list |
| 432 | * @watch: The watch to add |
| 433 | * @wlist: The watch list to add to |
| 434 | * |
| 435 | * @watch->queue must have been set to point to the queue to post notifications |
| 436 | * to and the watch list of the object to be watched. @watch->cred must also |
| 437 | * have been set to the appropriate credentials and a ref taken on them. |
| 438 | * |
| 439 | * The caller must pin the queue and the list both and must hold the list |
| 440 | * locked against racing watch additions/removals. |
| 441 | */ |
| 442 | int add_watch_to_object(struct watch *watch, struct watch_list *wlist) |
| 443 | { |
| 444 | struct watch_queue *wqueue = rcu_access_pointer(watch->queue); |
| 445 | struct watch *w; |
| 446 | |
| 447 | hlist_for_each_entry(w, &wlist->watchers, list_node) { |
| 448 | struct watch_queue *wq = rcu_access_pointer(w->queue); |
| 449 | if (wqueue == wq && watch->id == w->id) |
| 450 | return -EBUSY; |
| 451 | } |
| 452 | |
| 453 | watch->cred = get_current_cred(); |
| 454 | rcu_assign_pointer(watch->watch_list, wlist); |
| 455 | |
David Howells | 29e44f4 | 2020-08-17 11:07:28 +0100 | [diff] [blame] | 456 | if (atomic_inc_return(&watch->cred->user->nr_watches) > |
| 457 | task_rlimit(current, RLIMIT_NOFILE)) { |
| 458 | atomic_dec(&watch->cred->user->nr_watches); |
| 459 | put_cred(watch->cred); |
| 460 | return -EAGAIN; |
| 461 | } |
| 462 | |
David Howells | c73be61 | 2020-01-14 17:07:11 +0000 | [diff] [blame] | 463 | spin_lock_bh(&wqueue->lock); |
| 464 | kref_get(&wqueue->usage); |
| 465 | kref_get(&watch->usage); |
| 466 | hlist_add_head(&watch->queue_node, &wqueue->watches); |
| 467 | spin_unlock_bh(&wqueue->lock); |
| 468 | |
| 469 | hlist_add_head(&watch->list_node, &wlist->watchers); |
| 470 | return 0; |
| 471 | } |
| 472 | EXPORT_SYMBOL(add_watch_to_object); |
| 473 | |
| 474 | /** |
| 475 | * remove_watch_from_object - Remove a watch or all watches from an object. |
| 476 | * @wlist: The watch list to remove from |
| 477 | * @wq: The watch queue of interest (ignored if @all is true) |
| 478 | * @id: The ID of the watch to remove (ignored if @all is true) |
| 479 | * @all: True to remove all objects |
| 480 | * |
| 481 | * Remove a specific watch or all watches from an object. A notification is |
| 482 | * sent to the watcher to tell them that this happened. |
| 483 | */ |
| 484 | int remove_watch_from_object(struct watch_list *wlist, struct watch_queue *wq, |
| 485 | u64 id, bool all) |
| 486 | { |
| 487 | struct watch_notification_removal n; |
| 488 | struct watch_queue *wqueue; |
| 489 | struct watch *watch; |
| 490 | int ret = -EBADSLT; |
| 491 | |
| 492 | rcu_read_lock(); |
| 493 | |
| 494 | again: |
| 495 | spin_lock(&wlist->lock); |
| 496 | hlist_for_each_entry(watch, &wlist->watchers, list_node) { |
| 497 | if (all || |
| 498 | (watch->id == id && rcu_access_pointer(watch->queue) == wq)) |
| 499 | goto found; |
| 500 | } |
| 501 | spin_unlock(&wlist->lock); |
| 502 | goto out; |
| 503 | |
| 504 | found: |
| 505 | ret = 0; |
| 506 | hlist_del_init_rcu(&watch->list_node); |
| 507 | rcu_assign_pointer(watch->watch_list, NULL); |
| 508 | spin_unlock(&wlist->lock); |
| 509 | |
| 510 | /* We now own the reference on watch that used to belong to wlist. */ |
| 511 | |
| 512 | n.watch.type = WATCH_TYPE_META; |
| 513 | n.watch.subtype = WATCH_META_REMOVAL_NOTIFICATION; |
| 514 | n.watch.info = watch->info_id | watch_sizeof(n.watch); |
| 515 | n.id = id; |
| 516 | if (id != 0) |
| 517 | n.watch.info = watch->info_id | watch_sizeof(n); |
| 518 | |
| 519 | wqueue = rcu_dereference(watch->queue); |
| 520 | |
| 521 | /* We don't need the watch list lock for the next bit as RCU is |
| 522 | * protecting *wqueue from deallocation. |
| 523 | */ |
| 524 | if (wqueue) { |
| 525 | post_one_notification(wqueue, &n.watch); |
| 526 | |
| 527 | spin_lock_bh(&wqueue->lock); |
| 528 | |
| 529 | if (!hlist_unhashed(&watch->queue_node)) { |
| 530 | hlist_del_init_rcu(&watch->queue_node); |
| 531 | put_watch(watch); |
| 532 | } |
| 533 | |
| 534 | spin_unlock_bh(&wqueue->lock); |
| 535 | } |
| 536 | |
| 537 | if (wlist->release_watch) { |
| 538 | void (*release_watch)(struct watch *); |
| 539 | |
| 540 | release_watch = wlist->release_watch; |
| 541 | rcu_read_unlock(); |
| 542 | (*release_watch)(watch); |
| 543 | rcu_read_lock(); |
| 544 | } |
| 545 | put_watch(watch); |
| 546 | |
| 547 | if (all && !hlist_empty(&wlist->watchers)) |
| 548 | goto again; |
| 549 | out: |
| 550 | rcu_read_unlock(); |
| 551 | return ret; |
| 552 | } |
| 553 | EXPORT_SYMBOL(remove_watch_from_object); |
| 554 | |
| 555 | /* |
| 556 | * Remove all the watches that are contributory to a queue. This has the |
| 557 | * potential to race with removal of the watches by the destruction of the |
| 558 | * objects being watched or with the distribution of notifications. |
| 559 | */ |
| 560 | void watch_queue_clear(struct watch_queue *wqueue) |
| 561 | { |
| 562 | struct watch_list *wlist; |
| 563 | struct watch *watch; |
| 564 | bool release; |
| 565 | |
| 566 | rcu_read_lock(); |
| 567 | spin_lock_bh(&wqueue->lock); |
| 568 | |
| 569 | /* Prevent new additions and prevent notifications from happening */ |
| 570 | wqueue->defunct = true; |
| 571 | |
| 572 | while (!hlist_empty(&wqueue->watches)) { |
| 573 | watch = hlist_entry(wqueue->watches.first, struct watch, queue_node); |
| 574 | hlist_del_init_rcu(&watch->queue_node); |
| 575 | /* We now own a ref on the watch. */ |
| 576 | spin_unlock_bh(&wqueue->lock); |
| 577 | |
| 578 | /* We can't do the next bit under the queue lock as we need to |
| 579 | * get the list lock - which would cause a deadlock if someone |
| 580 | * was removing from the opposite direction at the same time or |
| 581 | * posting a notification. |
| 582 | */ |
| 583 | wlist = rcu_dereference(watch->watch_list); |
| 584 | if (wlist) { |
| 585 | void (*release_watch)(struct watch *); |
| 586 | |
| 587 | spin_lock(&wlist->lock); |
| 588 | |
| 589 | release = !hlist_unhashed(&watch->list_node); |
| 590 | if (release) { |
| 591 | hlist_del_init_rcu(&watch->list_node); |
| 592 | rcu_assign_pointer(watch->watch_list, NULL); |
| 593 | |
| 594 | /* We now own a second ref on the watch. */ |
| 595 | } |
| 596 | |
| 597 | release_watch = wlist->release_watch; |
| 598 | spin_unlock(&wlist->lock); |
| 599 | |
| 600 | if (release) { |
| 601 | if (release_watch) { |
| 602 | rcu_read_unlock(); |
| 603 | /* This might need to call dput(), so |
| 604 | * we have to drop all the locks. |
| 605 | */ |
| 606 | (*release_watch)(watch); |
| 607 | rcu_read_lock(); |
| 608 | } |
| 609 | put_watch(watch); |
| 610 | } |
| 611 | } |
| 612 | |
| 613 | put_watch(watch); |
| 614 | spin_lock_bh(&wqueue->lock); |
| 615 | } |
| 616 | |
| 617 | spin_unlock_bh(&wqueue->lock); |
| 618 | rcu_read_unlock(); |
| 619 | } |
| 620 | |
| 621 | /** |
| 622 | * get_watch_queue - Get a watch queue from its file descriptor. |
| 623 | * @fd: The fd to query. |
| 624 | */ |
| 625 | struct watch_queue *get_watch_queue(int fd) |
| 626 | { |
| 627 | struct pipe_inode_info *pipe; |
| 628 | struct watch_queue *wqueue = ERR_PTR(-EINVAL); |
| 629 | struct fd f; |
| 630 | |
| 631 | f = fdget(fd); |
| 632 | if (f.file) { |
| 633 | pipe = get_pipe_info(f.file, false); |
| 634 | if (pipe && pipe->watch_queue) { |
| 635 | wqueue = pipe->watch_queue; |
| 636 | kref_get(&wqueue->usage); |
| 637 | } |
| 638 | fdput(f); |
| 639 | } |
| 640 | |
| 641 | return wqueue; |
| 642 | } |
| 643 | EXPORT_SYMBOL(get_watch_queue); |
| 644 | |
| 645 | /* |
| 646 | * Initialise a watch queue |
| 647 | */ |
| 648 | int watch_queue_init(struct pipe_inode_info *pipe) |
| 649 | { |
| 650 | struct watch_queue *wqueue; |
| 651 | |
| 652 | wqueue = kzalloc(sizeof(*wqueue), GFP_KERNEL); |
| 653 | if (!wqueue) |
| 654 | return -ENOMEM; |
| 655 | |
| 656 | wqueue->pipe = pipe; |
| 657 | kref_init(&wqueue->usage); |
| 658 | spin_lock_init(&wqueue->lock); |
| 659 | INIT_HLIST_HEAD(&wqueue->watches); |
| 660 | |
| 661 | pipe->watch_queue = wqueue; |
| 662 | return 0; |
| 663 | } |