Rusty Russell | d7e28ff | 2007-07-19 01:49:23 -0700 | [diff] [blame] | 1 | /* Simple I/O model for guests, based on shared memory. |
| 2 | * Copyright (C) 2006 Rusty Russell IBM Corporation |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or modify |
| 5 | * it under the terms of the GNU General Public License as published by |
| 6 | * the Free Software Foundation; either version 2 of the License, or |
| 7 | * (at your option) any later version. |
| 8 | * |
| 9 | * This program is distributed in the hope that it will be useful, |
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | * GNU General Public License for more details. |
| 13 | * |
| 14 | * You should have received a copy of the GNU General Public License |
| 15 | * along with this program; if not, write to the Free Software |
| 16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| 17 | */ |
| 18 | #include <linux/types.h> |
| 19 | #include <linux/futex.h> |
| 20 | #include <linux/jhash.h> |
| 21 | #include <linux/mm.h> |
| 22 | #include <linux/highmem.h> |
| 23 | #include <linux/uaccess.h> |
| 24 | #include "lg.h" |
| 25 | |
| 26 | static struct list_head dma_hash[61]; |
| 27 | |
| 28 | void lguest_io_init(void) |
| 29 | { |
| 30 | unsigned int i; |
| 31 | |
| 32 | for (i = 0; i < ARRAY_SIZE(dma_hash); i++) |
| 33 | INIT_LIST_HEAD(&dma_hash[i]); |
| 34 | } |
| 35 | |
| 36 | /* FIXME: allow multi-page lengths. */ |
| 37 | static int check_dma_list(struct lguest *lg, const struct lguest_dma *dma) |
| 38 | { |
| 39 | unsigned int i; |
| 40 | |
| 41 | for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { |
| 42 | if (!dma->len[i]) |
| 43 | return 1; |
| 44 | if (!lguest_address_ok(lg, dma->addr[i], dma->len[i])) |
| 45 | goto kill; |
| 46 | if (dma->len[i] > PAGE_SIZE) |
| 47 | goto kill; |
| 48 | /* We could do over a page, but is it worth it? */ |
| 49 | if ((dma->addr[i] % PAGE_SIZE) + dma->len[i] > PAGE_SIZE) |
| 50 | goto kill; |
| 51 | } |
| 52 | return 1; |
| 53 | |
| 54 | kill: |
| 55 | kill_guest(lg, "bad DMA entry: %u@%#lx", dma->len[i], dma->addr[i]); |
| 56 | return 0; |
| 57 | } |
| 58 | |
| 59 | static unsigned int hash(const union futex_key *key) |
| 60 | { |
| 61 | return jhash2((u32*)&key->both.word, |
| 62 | (sizeof(key->both.word)+sizeof(key->both.ptr))/4, |
| 63 | key->both.offset) |
| 64 | % ARRAY_SIZE(dma_hash); |
| 65 | } |
| 66 | |
| 67 | static inline int key_eq(const union futex_key *a, const union futex_key *b) |
| 68 | { |
| 69 | return (a->both.word == b->both.word |
| 70 | && a->both.ptr == b->both.ptr |
| 71 | && a->both.offset == b->both.offset); |
| 72 | } |
| 73 | |
| 74 | /* Must hold read lock on dmainfo owner's current->mm->mmap_sem */ |
| 75 | static void unlink_dma(struct lguest_dma_info *dmainfo) |
| 76 | { |
| 77 | BUG_ON(!mutex_is_locked(&lguest_lock)); |
| 78 | dmainfo->interrupt = 0; |
| 79 | list_del(&dmainfo->list); |
| 80 | drop_futex_key_refs(&dmainfo->key); |
| 81 | } |
| 82 | |
| 83 | static int unbind_dma(struct lguest *lg, |
| 84 | const union futex_key *key, |
| 85 | unsigned long dmas) |
| 86 | { |
| 87 | int i, ret = 0; |
| 88 | |
| 89 | for (i = 0; i < LGUEST_MAX_DMA; i++) { |
| 90 | if (key_eq(key, &lg->dma[i].key) && dmas == lg->dma[i].dmas) { |
| 91 | unlink_dma(&lg->dma[i]); |
| 92 | ret = 1; |
| 93 | break; |
| 94 | } |
| 95 | } |
| 96 | return ret; |
| 97 | } |
| 98 | |
| 99 | int bind_dma(struct lguest *lg, |
| 100 | unsigned long ukey, unsigned long dmas, u16 numdmas, u8 interrupt) |
| 101 | { |
| 102 | unsigned int i; |
| 103 | int ret = 0; |
| 104 | union futex_key key; |
| 105 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; |
| 106 | |
| 107 | if (interrupt >= LGUEST_IRQS) |
| 108 | return 0; |
| 109 | |
| 110 | mutex_lock(&lguest_lock); |
| 111 | down_read(fshared); |
| 112 | if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { |
| 113 | kill_guest(lg, "bad dma key %#lx", ukey); |
| 114 | goto unlock; |
| 115 | } |
| 116 | get_futex_key_refs(&key); |
| 117 | |
| 118 | if (interrupt == 0) |
| 119 | ret = unbind_dma(lg, &key, dmas); |
| 120 | else { |
| 121 | for (i = 0; i < LGUEST_MAX_DMA; i++) { |
| 122 | if (lg->dma[i].interrupt) |
| 123 | continue; |
| 124 | |
| 125 | lg->dma[i].dmas = dmas; |
| 126 | lg->dma[i].num_dmas = numdmas; |
| 127 | lg->dma[i].next_dma = 0; |
| 128 | lg->dma[i].key = key; |
| 129 | lg->dma[i].guestid = lg->guestid; |
| 130 | lg->dma[i].interrupt = interrupt; |
| 131 | list_add(&lg->dma[i].list, &dma_hash[hash(&key)]); |
| 132 | ret = 1; |
| 133 | goto unlock; |
| 134 | } |
| 135 | } |
| 136 | drop_futex_key_refs(&key); |
| 137 | unlock: |
| 138 | up_read(fshared); |
| 139 | mutex_unlock(&lguest_lock); |
| 140 | return ret; |
| 141 | } |
| 142 | |
| 143 | /* lgread from another guest */ |
| 144 | static int lgread_other(struct lguest *lg, |
| 145 | void *buf, u32 addr, unsigned bytes) |
| 146 | { |
| 147 | if (!lguest_address_ok(lg, addr, bytes) |
| 148 | || access_process_vm(lg->tsk, addr, buf, bytes, 0) != bytes) { |
| 149 | memset(buf, 0, bytes); |
| 150 | kill_guest(lg, "bad address in registered DMA struct"); |
| 151 | return 0; |
| 152 | } |
| 153 | return 1; |
| 154 | } |
| 155 | |
| 156 | /* lgwrite to another guest */ |
| 157 | static int lgwrite_other(struct lguest *lg, u32 addr, |
| 158 | const void *buf, unsigned bytes) |
| 159 | { |
| 160 | if (!lguest_address_ok(lg, addr, bytes) |
| 161 | || (access_process_vm(lg->tsk, addr, (void *)buf, bytes, 1) |
| 162 | != bytes)) { |
| 163 | kill_guest(lg, "bad address writing to registered DMA"); |
| 164 | return 0; |
| 165 | } |
| 166 | return 1; |
| 167 | } |
| 168 | |
| 169 | static u32 copy_data(struct lguest *srclg, |
| 170 | const struct lguest_dma *src, |
| 171 | const struct lguest_dma *dst, |
| 172 | struct page *pages[]) |
| 173 | { |
| 174 | unsigned int totlen, si, di, srcoff, dstoff; |
| 175 | void *maddr = NULL; |
| 176 | |
| 177 | totlen = 0; |
| 178 | si = di = 0; |
| 179 | srcoff = dstoff = 0; |
| 180 | while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si] |
| 181 | && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) { |
| 182 | u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff); |
| 183 | |
| 184 | if (!maddr) |
| 185 | maddr = kmap(pages[di]); |
| 186 | |
| 187 | /* FIXME: This is not completely portable, since |
| 188 | archs do different things for copy_to_user_page. */ |
| 189 | if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE, |
| 190 | (void *__user)src->addr[si], len) != 0) { |
| 191 | kill_guest(srclg, "bad address in sending DMA"); |
| 192 | totlen = 0; |
| 193 | break; |
| 194 | } |
| 195 | |
| 196 | totlen += len; |
| 197 | srcoff += len; |
| 198 | dstoff += len; |
| 199 | if (srcoff == src->len[si]) { |
| 200 | si++; |
| 201 | srcoff = 0; |
| 202 | } |
| 203 | if (dstoff == dst->len[di]) { |
| 204 | kunmap(pages[di]); |
| 205 | maddr = NULL; |
| 206 | di++; |
| 207 | dstoff = 0; |
| 208 | } |
| 209 | } |
| 210 | |
| 211 | if (maddr) |
| 212 | kunmap(pages[di]); |
| 213 | |
| 214 | return totlen; |
| 215 | } |
| 216 | |
| 217 | /* Src is us, ie. current. */ |
| 218 | static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src, |
| 219 | struct lguest *dstlg, const struct lguest_dma *dst) |
| 220 | { |
| 221 | int i; |
| 222 | u32 ret; |
| 223 | struct page *pages[LGUEST_MAX_DMA_SECTIONS]; |
| 224 | |
| 225 | if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src)) |
| 226 | return 0; |
| 227 | |
| 228 | /* First get the destination pages */ |
| 229 | for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { |
| 230 | if (dst->len[i] == 0) |
| 231 | break; |
| 232 | if (get_user_pages(dstlg->tsk, dstlg->mm, |
| 233 | dst->addr[i], 1, 1, 1, pages+i, NULL) |
| 234 | != 1) { |
| 235 | kill_guest(dstlg, "Error mapping DMA pages"); |
| 236 | ret = 0; |
| 237 | goto drop_pages; |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | /* Now copy until we run out of src or dst. */ |
| 242 | ret = copy_data(srclg, src, dst, pages); |
| 243 | |
| 244 | drop_pages: |
| 245 | while (--i >= 0) |
| 246 | put_page(pages[i]); |
| 247 | return ret; |
| 248 | } |
| 249 | |
| 250 | static int dma_transfer(struct lguest *srclg, |
| 251 | unsigned long udma, |
| 252 | struct lguest_dma_info *dst) |
| 253 | { |
| 254 | struct lguest_dma dst_dma, src_dma; |
| 255 | struct lguest *dstlg; |
| 256 | u32 i, dma = 0; |
| 257 | |
| 258 | dstlg = &lguests[dst->guestid]; |
| 259 | /* Get our dma list. */ |
| 260 | lgread(srclg, &src_dma, udma, sizeof(src_dma)); |
| 261 | |
| 262 | /* We can't deadlock against them dmaing to us, because this |
| 263 | * is all under the lguest_lock. */ |
| 264 | down_read(&dstlg->mm->mmap_sem); |
| 265 | |
| 266 | for (i = 0; i < dst->num_dmas; i++) { |
| 267 | dma = (dst->next_dma + i) % dst->num_dmas; |
| 268 | if (!lgread_other(dstlg, &dst_dma, |
| 269 | dst->dmas + dma * sizeof(struct lguest_dma), |
| 270 | sizeof(dst_dma))) { |
| 271 | goto fail; |
| 272 | } |
| 273 | if (!dst_dma.used_len) |
| 274 | break; |
| 275 | } |
| 276 | if (i != dst->num_dmas) { |
| 277 | unsigned long used_lenp; |
| 278 | unsigned int ret; |
| 279 | |
| 280 | ret = do_dma(srclg, &src_dma, dstlg, &dst_dma); |
| 281 | /* Put used length in src. */ |
| 282 | lgwrite_u32(srclg, |
| 283 | udma+offsetof(struct lguest_dma, used_len), ret); |
| 284 | if (ret == 0 && src_dma.len[0] != 0) |
| 285 | goto fail; |
| 286 | |
| 287 | /* Make sure destination sees contents before length. */ |
| 288 | wmb(); |
| 289 | used_lenp = dst->dmas |
| 290 | + dma * sizeof(struct lguest_dma) |
| 291 | + offsetof(struct lguest_dma, used_len); |
| 292 | lgwrite_other(dstlg, used_lenp, &ret, sizeof(ret)); |
| 293 | dst->next_dma++; |
| 294 | } |
| 295 | up_read(&dstlg->mm->mmap_sem); |
| 296 | |
| 297 | /* Do this last so dst doesn't simply sleep on lock. */ |
| 298 | set_bit(dst->interrupt, dstlg->irqs_pending); |
| 299 | wake_up_process(dstlg->tsk); |
| 300 | return i == dst->num_dmas; |
| 301 | |
| 302 | fail: |
| 303 | up_read(&dstlg->mm->mmap_sem); |
| 304 | return 0; |
| 305 | } |
| 306 | |
| 307 | void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma) |
| 308 | { |
| 309 | union futex_key key; |
| 310 | int empty = 0; |
| 311 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; |
| 312 | |
| 313 | again: |
| 314 | mutex_lock(&lguest_lock); |
| 315 | down_read(fshared); |
| 316 | if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { |
| 317 | kill_guest(lg, "bad sending DMA key"); |
| 318 | goto unlock; |
| 319 | } |
| 320 | /* Shared mapping? Look for other guests... */ |
| 321 | if (key.shared.offset & 1) { |
| 322 | struct lguest_dma_info *i; |
| 323 | list_for_each_entry(i, &dma_hash[hash(&key)], list) { |
| 324 | if (i->guestid == lg->guestid) |
| 325 | continue; |
| 326 | if (!key_eq(&key, &i->key)) |
| 327 | continue; |
| 328 | |
| 329 | empty += dma_transfer(lg, udma, i); |
| 330 | break; |
| 331 | } |
| 332 | if (empty == 1) { |
| 333 | /* Give any recipients one chance to restock. */ |
| 334 | up_read(¤t->mm->mmap_sem); |
| 335 | mutex_unlock(&lguest_lock); |
| 336 | empty++; |
| 337 | goto again; |
| 338 | } |
| 339 | } else { |
| 340 | /* Private mapping: tell our userspace. */ |
| 341 | lg->dma_is_pending = 1; |
| 342 | lg->pending_dma = udma; |
| 343 | lg->pending_key = ukey; |
| 344 | } |
| 345 | unlock: |
| 346 | up_read(fshared); |
| 347 | mutex_unlock(&lguest_lock); |
| 348 | } |
| 349 | |
| 350 | void release_all_dma(struct lguest *lg) |
| 351 | { |
| 352 | unsigned int i; |
| 353 | |
| 354 | BUG_ON(!mutex_is_locked(&lguest_lock)); |
| 355 | |
| 356 | down_read(&lg->mm->mmap_sem); |
| 357 | for (i = 0; i < LGUEST_MAX_DMA; i++) { |
| 358 | if (lg->dma[i].interrupt) |
| 359 | unlink_dma(&lg->dma[i]); |
| 360 | } |
| 361 | up_read(&lg->mm->mmap_sem); |
| 362 | } |
| 363 | |
| 364 | /* Userspace wants a dma buffer from this guest. */ |
| 365 | unsigned long get_dma_buffer(struct lguest *lg, |
| 366 | unsigned long ukey, unsigned long *interrupt) |
| 367 | { |
| 368 | unsigned long ret = 0; |
| 369 | union futex_key key; |
| 370 | struct lguest_dma_info *i; |
| 371 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; |
| 372 | |
| 373 | mutex_lock(&lguest_lock); |
| 374 | down_read(fshared); |
| 375 | if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { |
| 376 | kill_guest(lg, "bad registered DMA buffer"); |
| 377 | goto unlock; |
| 378 | } |
| 379 | list_for_each_entry(i, &dma_hash[hash(&key)], list) { |
| 380 | if (key_eq(&key, &i->key) && i->guestid == lg->guestid) { |
| 381 | unsigned int j; |
| 382 | for (j = 0; j < i->num_dmas; j++) { |
| 383 | struct lguest_dma dma; |
| 384 | |
| 385 | ret = i->dmas + j * sizeof(struct lguest_dma); |
| 386 | lgread(lg, &dma, ret, sizeof(dma)); |
| 387 | if (dma.used_len == 0) |
| 388 | break; |
| 389 | } |
| 390 | *interrupt = i->interrupt; |
| 391 | break; |
| 392 | } |
| 393 | } |
| 394 | unlock: |
| 395 | up_read(fshared); |
| 396 | mutex_unlock(&lguest_lock); |
| 397 | return ret; |
| 398 | } |
| 399 | |