blob: ffa759addd3c657e7292c07482d49d6dbc24bc38 [file] [log] [blame]
Jeff Dike75e55842005-09-03 15:57:45 -07001/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
9#include <errno.h>
10#include <sched.h>
11#include <sys/syscall.h>
12#include "os.h"
Jeff Dike75e55842005-09-03 15:57:45 -070013#include "aio.h"
14#include "init.h"
15#include "user.h"
16#include "mode.h"
17
Jeff Dike91acb212005-10-10 23:10:32 -040018struct aio_thread_req {
19 enum aio_type type;
20 int io_fd;
21 unsigned long long offset;
22 char *buf;
23 int len;
24 struct aio_context *aio;
25};
26
Jeff Dike75e55842005-09-03 15:57:45 -070027static int aio_req_fd_r = -1;
28static int aio_req_fd_w = -1;
29
30#if defined(HAVE_AIO_ABI)
31#include <linux/aio_abi.h>
32
33/* If we have the headers, we are going to build with AIO enabled.
34 * If we don't have aio in libc, we define the necessary stubs here.
35 */
36
37#if !defined(HAVE_AIO_LIBC)
38
39static long io_setup(int n, aio_context_t *ctxp)
40{
41 return syscall(__NR_io_setup, n, ctxp);
42}
43
44static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
45{
46 return syscall(__NR_io_submit, ctx, nr, iocbpp);
47}
48
49static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
50 struct io_event *events, struct timespec *timeout)
51{
52 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
53}
54
55#endif
56
57/* The AIO_MMAP cases force the mmapped page into memory here
58 * rather than in whatever place first touches the data. I used
59 * to do this by touching the page, but that's delicate because
60 * gcc is prone to optimizing that away. So, what's done here
61 * is we read from the descriptor from which the page was
62 * mapped. The caller is required to pass an offset which is
63 * inside the page that was mapped. Thus, when the read
64 * returns, we know that the page is in the page cache, and
65 * that it now backs the mmapped area.
66 */
67
Jeff Dike91acb212005-10-10 23:10:32 -040068static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
69 int len, unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -070070{
71 struct iocb iocb, *iocbp = &iocb;
72 char c;
73 int err;
74
75 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
76 .aio_reqprio = 0,
Jeff Dike91acb212005-10-10 23:10:32 -040077 .aio_fildes = fd,
78 .aio_buf = (unsigned long) buf,
79 .aio_nbytes = len,
80 .aio_offset = offset,
Jeff Dike75e55842005-09-03 15:57:45 -070081 .aio_reserved1 = 0,
82 .aio_reserved2 = 0,
83 .aio_reserved3 = 0 });
84
Jeff Dike91acb212005-10-10 23:10:32 -040085 switch(type){
Jeff Dike75e55842005-09-03 15:57:45 -070086 case AIO_READ:
87 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
Jeff Dike91acb212005-10-10 23:10:32 -040088 err = io_submit(ctx, 1, &iocbp);
Jeff Dike75e55842005-09-03 15:57:45 -070089 break;
90 case AIO_WRITE:
91 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
Jeff Dike91acb212005-10-10 23:10:32 -040092 err = io_submit(ctx, 1, &iocbp);
Jeff Dike75e55842005-09-03 15:57:45 -070093 break;
94 case AIO_MMAP:
95 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
96 iocb.aio_buf = (unsigned long) &c;
97 iocb.aio_nbytes = sizeof(c);
Jeff Dike91acb212005-10-10 23:10:32 -040098 err = io_submit(ctx, 1, &iocbp);
Jeff Dike75e55842005-09-03 15:57:45 -070099 break;
100 default:
Jeff Dike91acb212005-10-10 23:10:32 -0400101 printk("Bogus op in do_aio - %d\n", type);
Jeff Dike75e55842005-09-03 15:57:45 -0700102 err = -EINVAL;
Jeff Dike91acb212005-10-10 23:10:32 -0400103 break;
Jeff Dike75e55842005-09-03 15:57:45 -0700104 }
Jeff Dike09ace812005-09-03 15:57:46 -0700105
Jeff Dike75e55842005-09-03 15:57:45 -0700106 if(err > 0)
107 err = 0;
Jeff Dike2867ace2005-09-16 19:27:51 -0700108 else
109 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700110
111 return err;
112}
113
114static aio_context_t ctx = 0;
115
116static int aio_thread(void *arg)
117{
118 struct aio_thread_reply reply;
119 struct io_event event;
Jeff Dike91acb212005-10-10 23:10:32 -0400120 int err, n, reply_fd;
Jeff Dike75e55842005-09-03 15:57:45 -0700121
122 signal(SIGWINCH, SIG_IGN);
123
124 while(1){
125 n = io_getevents(ctx, 1, 1, &event, NULL);
126 if(n < 0){
127 if(errno == EINTR)
128 continue;
129 printk("aio_thread - io_getevents failed, "
130 "errno = %d\n", errno);
131 }
132 else {
133 reply = ((struct aio_thread_reply)
Jeff Dike91acb212005-10-10 23:10:32 -0400134 { .data = (void *) (long) event.data,
135 .err = event.res });
136 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
137 err = os_write_file(reply_fd, &reply, sizeof(reply));
Jeff Dike75e55842005-09-03 15:57:45 -0700138 if(err != sizeof(reply))
Jeff Dike91acb212005-10-10 23:10:32 -0400139 printk("aio_thread - write failed, fd = %d, "
140 "err = %d\n", aio_req_fd_r, -err);
Jeff Dike75e55842005-09-03 15:57:45 -0700141 }
142 }
143 return 0;
144}
145
146#endif
147
Jeff Dike91acb212005-10-10 23:10:32 -0400148static int do_not_aio(struct aio_thread_req *req)
Jeff Dike75e55842005-09-03 15:57:45 -0700149{
150 char c;
151 int err;
152
Jeff Dike91acb212005-10-10 23:10:32 -0400153 switch(req->type){
Jeff Dike75e55842005-09-03 15:57:45 -0700154 case AIO_READ:
Jeff Dike91acb212005-10-10 23:10:32 -0400155 err = os_seek_file(req->io_fd, req->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700156 if(err)
157 goto out;
158
Jeff Dike91acb212005-10-10 23:10:32 -0400159 err = os_read_file(req->io_fd, req->buf, req->len);
Jeff Dike75e55842005-09-03 15:57:45 -0700160 break;
161 case AIO_WRITE:
Jeff Dike91acb212005-10-10 23:10:32 -0400162 err = os_seek_file(req->io_fd, req->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700163 if(err)
164 goto out;
165
Jeff Dike91acb212005-10-10 23:10:32 -0400166 err = os_write_file(req->io_fd, req->buf, req->len);
Jeff Dike75e55842005-09-03 15:57:45 -0700167 break;
168 case AIO_MMAP:
Jeff Dike91acb212005-10-10 23:10:32 -0400169 err = os_seek_file(req->io_fd, req->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700170 if(err)
171 goto out;
172
Jeff Dike91acb212005-10-10 23:10:32 -0400173 err = os_read_file(req->io_fd, &c, sizeof(c));
Jeff Dike75e55842005-09-03 15:57:45 -0700174 break;
175 default:
Jeff Dike91acb212005-10-10 23:10:32 -0400176 printk("do_not_aio - bad request type : %d\n", req->type);
Jeff Dike75e55842005-09-03 15:57:45 -0700177 err = -EINVAL;
178 break;
179 }
180
181 out:
182 return err;
183}
184
185static int not_aio_thread(void *arg)
186{
Jeff Dike91acb212005-10-10 23:10:32 -0400187 struct aio_thread_req req;
Jeff Dike75e55842005-09-03 15:57:45 -0700188 struct aio_thread_reply reply;
189 int err;
190
191 signal(SIGWINCH, SIG_IGN);
192 while(1){
Jeff Dike91acb212005-10-10 23:10:32 -0400193 err = os_read_file(aio_req_fd_r, &req, sizeof(req));
194 if(err != sizeof(req)){
Jeff Dike75e55842005-09-03 15:57:45 -0700195 if(err < 0)
196 printk("not_aio_thread - read failed, "
197 "fd = %d, err = %d\n", aio_req_fd_r,
198 -err);
199 else {
200 printk("not_aio_thread - short read, fd = %d, "
201 "length = %d\n", aio_req_fd_r, err);
202 }
203 continue;
204 }
Jeff Dike91acb212005-10-10 23:10:32 -0400205 err = do_not_aio(&req);
206 reply = ((struct aio_thread_reply) { .data = req.aio,
207 .err = err });
208 err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply));
Jeff Dike75e55842005-09-03 15:57:45 -0700209 if(err != sizeof(reply))
210 printk("not_aio_thread - write failed, fd = %d, "
211 "err = %d\n", aio_req_fd_r, -err);
212 }
213}
214
215static int aio_pid = -1;
216
217static int init_aio_24(void)
218{
219 unsigned long stack;
220 int fds[2], err;
221
222 err = os_pipe(fds, 1, 1);
223 if(err)
224 goto out;
225
226 aio_req_fd_w = fds[0];
227 aio_req_fd_r = fds[1];
228 err = run_helper_thread(not_aio_thread, NULL,
229 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
230 if(err < 0)
231 goto out_close_pipe;
232
233 aio_pid = err;
234 goto out;
235
236 out_close_pipe:
237 os_close_file(fds[0]);
238 os_close_file(fds[1]);
239 aio_req_fd_w = -1;
240 aio_req_fd_r = -1;
241 out:
242#ifndef HAVE_AIO_ABI
243 printk("/usr/include/linux/aio_abi.h not present during build\n");
244#endif
245 printk("2.6 host AIO support not used - falling back to I/O "
246 "thread\n");
247 return 0;
248}
249
250#ifdef HAVE_AIO_ABI
251#define DEFAULT_24_AIO 0
252static int init_aio_26(void)
253{
254 unsigned long stack;
255 int err;
256
257 if(io_setup(256, &ctx)){
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700258 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700259 printk("aio_thread failed to initialize context, err = %d\n",
260 errno);
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700261 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700262 }
263
264 err = run_helper_thread(aio_thread, NULL,
265 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
266 if(err < 0)
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700267 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700268
269 aio_pid = err;
270
271 printk("Using 2.6 host AIO\n");
272 return 0;
273}
274
Jeff Dike91acb212005-10-10 23:10:32 -0400275static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
276 unsigned long long offset, struct aio_context *aio)
277{
278 struct aio_thread_reply reply;
279 int err;
280
281 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
282 if(err){
283 reply = ((struct aio_thread_reply) { .data = aio,
284 .err = err });
285 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
286 if(err != sizeof(reply))
287 printk("submit_aio_26 - write failed, "
288 "fd = %d, err = %d\n", aio->reply_fd, -err);
289 else err = 0;
290 }
291
292 return err;
293}
294
Jeff Dike75e55842005-09-03 15:57:45 -0700295#else
296#define DEFAULT_24_AIO 1
Jeff Dike91acb212005-10-10 23:10:32 -0400297static int init_aio_26(void)
Jeff Dike75e55842005-09-03 15:57:45 -0700298{
299 return -ENOSYS;
300}
301
Jeff Dike91acb212005-10-10 23:10:32 -0400302static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
303 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700304{
305 return -ENOSYS;
306}
307#endif
308
309static int aio_24 = DEFAULT_24_AIO;
310
311static int __init set_aio_24(char *name, int *add)
312{
313 aio_24 = 1;
314 return 0;
315}
316
317__uml_setup("aio=2.4", set_aio_24,
318"aio=2.4\n"
319" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
320" available. 2.4 AIO is a single thread that handles one request at a\n"
321" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
322" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
323" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
324" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
325" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
326" your /usr/include/linux in order to build an AIO-capable UML\n\n"
327);
328
329static int init_aio(void)
330{
331 int err;
332
333 CHOOSE_MODE(({
334 if(!aio_24){
335 printk("Disabling 2.6 AIO in tt mode\n");
336 aio_24 = 1;
337 } }), (void) 0);
338
339 if(!aio_24){
340 err = init_aio_26();
341 if(err && (errno == ENOSYS)){
342 printk("2.6 AIO not supported on the host - "
343 "reverting to 2.4 AIO\n");
344 aio_24 = 1;
345 }
346 else return err;
347 }
348
349 if(aio_24)
350 return init_aio_24();
351
352 return 0;
353}
354
355/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
356 * needs to be called when the kernel is running because it calls run_helper,
357 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
358 * kernel does not run __exitcalls on shutdown, and can't because many of them
359 * break when called outside of module unloading.
360 */
361__initcall(init_aio);
362
363static void exit_aio(void)
364{
365 if(aio_pid != -1)
366 os_kill_process(aio_pid, 1);
367}
368
369__uml_exitcall(exit_aio);
370
Jeff Dike91acb212005-10-10 23:10:32 -0400371static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
372 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700373{
Jeff Dike91acb212005-10-10 23:10:32 -0400374 struct aio_thread_req req = { .type = type,
375 .io_fd = io_fd,
376 .offset = offset,
377 .buf = buf,
378 .len = len,
379 .aio = aio,
380 };
381 int err;
382
383 err = os_write_file(aio_req_fd_w, &req, sizeof(req));
384 if(err == sizeof(req))
385 err = 0;
386
387 return err;
388}
389
390int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
391 unsigned long long offset, int reply_fd,
392 struct aio_context *aio)
393{
394 aio->reply_fd = reply_fd;
395 if(aio_24)
396 return submit_aio_24(type, io_fd, buf, len, offset, aio);
397 else {
398 return submit_aio_26(type, io_fd, buf, len, offset, aio);
399 }
Jeff Dike75e55842005-09-03 15:57:45 -0700400}