blob: 0b78bb7a994a7b7a94d6f8fbf05754cafc9102ca [file] [log] [blame]
Jeff Dike75e55842005-09-03 15:57:45 -07001/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
9#include <errno.h>
10#include <sched.h>
11#include <sys/syscall.h>
12#include "os.h"
Jeff Dike75e55842005-09-03 15:57:45 -070013#include "aio.h"
14#include "init.h"
15#include "user.h"
16#include "mode.h"
17
Jeff Dike91acb212005-10-10 23:10:32 -040018struct aio_thread_req {
19 enum aio_type type;
20 int io_fd;
21 unsigned long long offset;
22 char *buf;
23 int len;
24 struct aio_context *aio;
25};
26
Jeff Dike75e55842005-09-03 15:57:45 -070027static int aio_req_fd_r = -1;
28static int aio_req_fd_w = -1;
29
30#if defined(HAVE_AIO_ABI)
31#include <linux/aio_abi.h>
32
33/* If we have the headers, we are going to build with AIO enabled.
34 * If we don't have aio in libc, we define the necessary stubs here.
35 */
36
37#if !defined(HAVE_AIO_LIBC)
38
39static long io_setup(int n, aio_context_t *ctxp)
40{
41 return syscall(__NR_io_setup, n, ctxp);
42}
43
44static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
45{
46 return syscall(__NR_io_submit, ctx, nr, iocbpp);
47}
48
49static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
50 struct io_event *events, struct timespec *timeout)
51{
52 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
53}
54
55#endif
56
57/* The AIO_MMAP cases force the mmapped page into memory here
58 * rather than in whatever place first touches the data. I used
59 * to do this by touching the page, but that's delicate because
60 * gcc is prone to optimizing that away. So, what's done here
61 * is we read from the descriptor from which the page was
62 * mapped. The caller is required to pass an offset which is
63 * inside the page that was mapped. Thus, when the read
64 * returns, we know that the page is in the page cache, and
65 * that it now backs the mmapped area.
66 */
67
Jeff Dike91acb212005-10-10 23:10:32 -040068static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
69 int len, unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -070070{
71 struct iocb iocb, *iocbp = &iocb;
72 char c;
73 int err;
74
75 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
76 .aio_reqprio = 0,
Jeff Dike91acb212005-10-10 23:10:32 -040077 .aio_fildes = fd,
78 .aio_buf = (unsigned long) buf,
79 .aio_nbytes = len,
80 .aio_offset = offset,
Jeff Dike75e55842005-09-03 15:57:45 -070081 .aio_reserved1 = 0,
82 .aio_reserved2 = 0,
83 .aio_reserved3 = 0 });
84
Jeff Dike91acb212005-10-10 23:10:32 -040085 switch(type){
Jeff Dike75e55842005-09-03 15:57:45 -070086 case AIO_READ:
87 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
Jeff Dike91acb212005-10-10 23:10:32 -040088 err = io_submit(ctx, 1, &iocbp);
Jeff Dike75e55842005-09-03 15:57:45 -070089 break;
90 case AIO_WRITE:
91 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
Jeff Dike91acb212005-10-10 23:10:32 -040092 err = io_submit(ctx, 1, &iocbp);
Jeff Dike75e55842005-09-03 15:57:45 -070093 break;
94 case AIO_MMAP:
95 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
96 iocb.aio_buf = (unsigned long) &c;
97 iocb.aio_nbytes = sizeof(c);
Jeff Dike91acb212005-10-10 23:10:32 -040098 err = io_submit(ctx, 1, &iocbp);
Jeff Dike75e55842005-09-03 15:57:45 -070099 break;
100 default:
Jeff Dike91acb212005-10-10 23:10:32 -0400101 printk("Bogus op in do_aio - %d\n", type);
Jeff Dike75e55842005-09-03 15:57:45 -0700102 err = -EINVAL;
Jeff Dike91acb212005-10-10 23:10:32 -0400103 break;
Jeff Dike75e55842005-09-03 15:57:45 -0700104 }
Jeff Dike09ace812005-09-03 15:57:46 -0700105
Jeff Dike75e55842005-09-03 15:57:45 -0700106 if(err > 0)
107 err = 0;
Jeff Dike2867ace2005-09-16 19:27:51 -0700108 else
109 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700110
111 return err;
112}
113
114static aio_context_t ctx = 0;
115
116static int aio_thread(void *arg)
117{
118 struct aio_thread_reply reply;
119 struct io_event event;
Jeff Dike91acb212005-10-10 23:10:32 -0400120 int err, n, reply_fd;
Jeff Dike75e55842005-09-03 15:57:45 -0700121
122 signal(SIGWINCH, SIG_IGN);
123
124 while(1){
125 n = io_getevents(ctx, 1, 1, &event, NULL);
126 if(n < 0){
127 if(errno == EINTR)
128 continue;
129 printk("aio_thread - io_getevents failed, "
130 "errno = %d\n", errno);
131 }
132 else {
133 reply = ((struct aio_thread_reply)
Jeff Dike91acb212005-10-10 23:10:32 -0400134 { .data = (void *) (long) event.data,
135 .err = event.res });
136 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
137 err = os_write_file(reply_fd, &reply, sizeof(reply));
Jeff Dike75e55842005-09-03 15:57:45 -0700138 if(err != sizeof(reply))
Jeff Dike91acb212005-10-10 23:10:32 -0400139 printk("aio_thread - write failed, fd = %d, "
140 "err = %d\n", aio_req_fd_r, -err);
Jeff Dike75e55842005-09-03 15:57:45 -0700141 }
142 }
143 return 0;
144}
145
146#endif
147
Jeff Dike91acb212005-10-10 23:10:32 -0400148static int do_not_aio(struct aio_thread_req *req)
Jeff Dike75e55842005-09-03 15:57:45 -0700149{
150 char c;
151 int err;
152
Jeff Dike91acb212005-10-10 23:10:32 -0400153 switch(req->type){
Jeff Dike75e55842005-09-03 15:57:45 -0700154 case AIO_READ:
Jeff Dike91acb212005-10-10 23:10:32 -0400155 err = os_seek_file(req->io_fd, req->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700156 if(err)
157 goto out;
158
Jeff Dike91acb212005-10-10 23:10:32 -0400159 err = os_read_file(req->io_fd, req->buf, req->len);
Jeff Dike75e55842005-09-03 15:57:45 -0700160 break;
161 case AIO_WRITE:
Jeff Dike91acb212005-10-10 23:10:32 -0400162 err = os_seek_file(req->io_fd, req->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700163 if(err)
164 goto out;
165
Jeff Dike91acb212005-10-10 23:10:32 -0400166 err = os_write_file(req->io_fd, req->buf, req->len);
Jeff Dike75e55842005-09-03 15:57:45 -0700167 break;
168 case AIO_MMAP:
Jeff Dike91acb212005-10-10 23:10:32 -0400169 err = os_seek_file(req->io_fd, req->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700170 if(err)
171 goto out;
172
Jeff Dike91acb212005-10-10 23:10:32 -0400173 err = os_read_file(req->io_fd, &c, sizeof(c));
Jeff Dike75e55842005-09-03 15:57:45 -0700174 break;
175 default:
Jeff Dike91acb212005-10-10 23:10:32 -0400176 printk("do_not_aio - bad request type : %d\n", req->type);
Jeff Dike75e55842005-09-03 15:57:45 -0700177 err = -EINVAL;
178 break;
179 }
180
181 out:
182 return err;
183}
184
185static int not_aio_thread(void *arg)
186{
Jeff Dike91acb212005-10-10 23:10:32 -0400187 struct aio_thread_req req;
Jeff Dike75e55842005-09-03 15:57:45 -0700188 struct aio_thread_reply reply;
189 int err;
190
191 signal(SIGWINCH, SIG_IGN);
192 while(1){
Jeff Dike91acb212005-10-10 23:10:32 -0400193 err = os_read_file(aio_req_fd_r, &req, sizeof(req));
194 if(err != sizeof(req)){
Jeff Dike75e55842005-09-03 15:57:45 -0700195 if(err < 0)
196 printk("not_aio_thread - read failed, "
197 "fd = %d, err = %d\n", aio_req_fd_r,
198 -err);
199 else {
200 printk("not_aio_thread - short read, fd = %d, "
201 "length = %d\n", aio_req_fd_r, err);
202 }
203 continue;
204 }
Jeff Dike91acb212005-10-10 23:10:32 -0400205 err = do_not_aio(&req);
206 reply = ((struct aio_thread_reply) { .data = req.aio,
207 .err = err });
208 err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply));
Jeff Dike75e55842005-09-03 15:57:45 -0700209 if(err != sizeof(reply))
210 printk("not_aio_thread - write failed, fd = %d, "
211 "err = %d\n", aio_req_fd_r, -err);
212 }
Jeff Dike1b57e9c2006-01-06 00:18:49 -0800213
214 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700215}
216
217static int aio_pid = -1;
218
219static int init_aio_24(void)
220{
221 unsigned long stack;
222 int fds[2], err;
223
224 err = os_pipe(fds, 1, 1);
225 if(err)
226 goto out;
227
228 aio_req_fd_w = fds[0];
229 aio_req_fd_r = fds[1];
230 err = run_helper_thread(not_aio_thread, NULL,
231 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
232 if(err < 0)
233 goto out_close_pipe;
234
235 aio_pid = err;
236 goto out;
237
238 out_close_pipe:
239 os_close_file(fds[0]);
240 os_close_file(fds[1]);
241 aio_req_fd_w = -1;
242 aio_req_fd_r = -1;
243 out:
244#ifndef HAVE_AIO_ABI
245 printk("/usr/include/linux/aio_abi.h not present during build\n");
246#endif
247 printk("2.6 host AIO support not used - falling back to I/O "
248 "thread\n");
249 return 0;
250}
251
252#ifdef HAVE_AIO_ABI
253#define DEFAULT_24_AIO 0
254static int init_aio_26(void)
255{
256 unsigned long stack;
257 int err;
258
259 if(io_setup(256, &ctx)){
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700260 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700261 printk("aio_thread failed to initialize context, err = %d\n",
262 errno);
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700263 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700264 }
265
266 err = run_helper_thread(aio_thread, NULL,
267 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
268 if(err < 0)
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700269 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700270
271 aio_pid = err;
272
273 printk("Using 2.6 host AIO\n");
274 return 0;
275}
276
Jeff Dike91acb212005-10-10 23:10:32 -0400277static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
278 unsigned long long offset, struct aio_context *aio)
279{
280 struct aio_thread_reply reply;
281 int err;
282
283 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
284 if(err){
285 reply = ((struct aio_thread_reply) { .data = aio,
286 .err = err });
287 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
288 if(err != sizeof(reply))
289 printk("submit_aio_26 - write failed, "
290 "fd = %d, err = %d\n", aio->reply_fd, -err);
291 else err = 0;
292 }
293
294 return err;
295}
296
Jeff Dike75e55842005-09-03 15:57:45 -0700297#else
298#define DEFAULT_24_AIO 1
Jeff Dike91acb212005-10-10 23:10:32 -0400299static int init_aio_26(void)
Jeff Dike75e55842005-09-03 15:57:45 -0700300{
301 return -ENOSYS;
302}
303
Jeff Dike91acb212005-10-10 23:10:32 -0400304static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
305 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700306{
307 return -ENOSYS;
308}
309#endif
310
311static int aio_24 = DEFAULT_24_AIO;
312
313static int __init set_aio_24(char *name, int *add)
314{
315 aio_24 = 1;
316 return 0;
317}
318
319__uml_setup("aio=2.4", set_aio_24,
320"aio=2.4\n"
321" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
322" available. 2.4 AIO is a single thread that handles one request at a\n"
323" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
324" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
325" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
326" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
327" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
328" your /usr/include/linux in order to build an AIO-capable UML\n\n"
329);
330
331static int init_aio(void)
332{
333 int err;
334
335 CHOOSE_MODE(({
336 if(!aio_24){
337 printk("Disabling 2.6 AIO in tt mode\n");
338 aio_24 = 1;
339 } }), (void) 0);
340
341 if(!aio_24){
342 err = init_aio_26();
343 if(err && (errno == ENOSYS)){
344 printk("2.6 AIO not supported on the host - "
345 "reverting to 2.4 AIO\n");
346 aio_24 = 1;
347 }
348 else return err;
349 }
350
351 if(aio_24)
352 return init_aio_24();
353
354 return 0;
355}
356
357/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
358 * needs to be called when the kernel is running because it calls run_helper,
359 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
360 * kernel does not run __exitcalls on shutdown, and can't because many of them
361 * break when called outside of module unloading.
362 */
363__initcall(init_aio);
364
365static void exit_aio(void)
366{
367 if(aio_pid != -1)
368 os_kill_process(aio_pid, 1);
369}
370
371__uml_exitcall(exit_aio);
372
Jeff Dike91acb212005-10-10 23:10:32 -0400373static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
374 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700375{
Jeff Dike91acb212005-10-10 23:10:32 -0400376 struct aio_thread_req req = { .type = type,
377 .io_fd = io_fd,
378 .offset = offset,
379 .buf = buf,
380 .len = len,
381 .aio = aio,
382 };
383 int err;
384
385 err = os_write_file(aio_req_fd_w, &req, sizeof(req));
386 if(err == sizeof(req))
387 err = 0;
388
389 return err;
390}
391
392int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
393 unsigned long long offset, int reply_fd,
394 struct aio_context *aio)
395{
396 aio->reply_fd = reply_fd;
397 if(aio_24)
398 return submit_aio_24(type, io_fd, buf, len, offset, aio);
399 else {
400 return submit_aio_26(type, io_fd, buf, len, offset, aio);
401 }
Jeff Dike75e55842005-09-03 15:57:45 -0700402}