blob: 298d5632128b8119d06f90c1639a559b0168d20e [file] [log] [blame]
Jeff Dike75e55842005-09-03 15:57:45 -07001/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
Jeff Dike09ace812005-09-03 15:57:46 -07009#include <string.h>
Jeff Dike75e55842005-09-03 15:57:45 -070010#include <errno.h>
11#include <sched.h>
12#include <sys/syscall.h>
13#include "os.h"
14#include "helper.h"
15#include "aio.h"
16#include "init.h"
17#include "user.h"
18#include "mode.h"
19
Jeff Dike75e55842005-09-03 15:57:45 -070020static int aio_req_fd_r = -1;
21static int aio_req_fd_w = -1;
22
Jeff Dike09ace812005-09-03 15:57:46 -070023static int update_aio(struct aio_context *aio, int res)
24{
25 if(res < 0)
26 aio->len = res;
27 else if((res == 0) && (aio->type == AIO_READ)){
28 /* This is the EOF case - we have hit the end of the file
29 * and it ends in a partial block, so we fill the end of
30 * the block with zeros and claim success.
31 */
32 memset(aio->data, 0, aio->len);
33 aio->len = 0;
34 }
35 else if(res > 0){
36 aio->len -= res;
37 aio->data += res;
38 aio->offset += res;
39 return aio->len;
40 }
41
42 return 0;
43}
44
Jeff Dike75e55842005-09-03 15:57:45 -070045#if defined(HAVE_AIO_ABI)
46#include <linux/aio_abi.h>
47
48/* If we have the headers, we are going to build with AIO enabled.
49 * If we don't have aio in libc, we define the necessary stubs here.
50 */
51
52#if !defined(HAVE_AIO_LIBC)
53
54static long io_setup(int n, aio_context_t *ctxp)
55{
56 return syscall(__NR_io_setup, n, ctxp);
57}
58
59static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
60{
61 return syscall(__NR_io_submit, ctx, nr, iocbpp);
62}
63
64static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
65 struct io_event *events, struct timespec *timeout)
66{
67 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
68}
69
70#endif
71
72/* The AIO_MMAP cases force the mmapped page into memory here
73 * rather than in whatever place first touches the data. I used
74 * to do this by touching the page, but that's delicate because
75 * gcc is prone to optimizing that away. So, what's done here
76 * is we read from the descriptor from which the page was
77 * mapped. The caller is required to pass an offset which is
78 * inside the page that was mapped. Thus, when the read
79 * returns, we know that the page is in the page cache, and
80 * that it now backs the mmapped area.
81 */
82
Jeff Dike09ace812005-09-03 15:57:46 -070083static int do_aio(aio_context_t ctx, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -070084{
85 struct iocb iocb, *iocbp = &iocb;
86 char c;
87 int err;
88
89 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
90 .aio_reqprio = 0,
Jeff Dike09ace812005-09-03 15:57:46 -070091 .aio_fildes = aio->fd,
92 .aio_buf = (unsigned long) aio->data,
93 .aio_nbytes = aio->len,
94 .aio_offset = aio->offset,
Jeff Dike75e55842005-09-03 15:57:45 -070095 .aio_reserved1 = 0,
96 .aio_reserved2 = 0,
97 .aio_reserved3 = 0 });
98
Jeff Dike09ace812005-09-03 15:57:46 -070099 switch(aio->type){
Jeff Dike75e55842005-09-03 15:57:45 -0700100 case AIO_READ:
101 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
Jeff Dike75e55842005-09-03 15:57:45 -0700102 break;
103 case AIO_WRITE:
104 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
Jeff Dike75e55842005-09-03 15:57:45 -0700105 break;
106 case AIO_MMAP:
107 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
108 iocb.aio_buf = (unsigned long) &c;
109 iocb.aio_nbytes = sizeof(c);
Jeff Dike75e55842005-09-03 15:57:45 -0700110 break;
111 default:
Jeff Dike09ace812005-09-03 15:57:46 -0700112 printk("Bogus op in do_aio - %d\n", aio->type);
Jeff Dike75e55842005-09-03 15:57:45 -0700113 err = -EINVAL;
Jeff Dike09ace812005-09-03 15:57:46 -0700114 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700115 }
Jeff Dike09ace812005-09-03 15:57:46 -0700116
117 err = io_submit(ctx, 1, &iocbp);
Jeff Dike75e55842005-09-03 15:57:45 -0700118 if(err > 0)
119 err = 0;
Jeff Dike2867ace2005-09-16 19:27:51 -0700120 else
121 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700122
Jeff Dike09ace812005-09-03 15:57:46 -0700123 out:
Jeff Dike75e55842005-09-03 15:57:45 -0700124 return err;
125}
126
127static aio_context_t ctx = 0;
128
129static int aio_thread(void *arg)
130{
131 struct aio_thread_reply reply;
Jeff Dike09ace812005-09-03 15:57:46 -0700132 struct aio_context *aio;
Jeff Dike75e55842005-09-03 15:57:45 -0700133 struct io_event event;
Jeff Dike09ace812005-09-03 15:57:46 -0700134 int err, n;
Jeff Dike75e55842005-09-03 15:57:45 -0700135
136 signal(SIGWINCH, SIG_IGN);
137
138 while(1){
139 n = io_getevents(ctx, 1, 1, &event, NULL);
140 if(n < 0){
141 if(errno == EINTR)
142 continue;
143 printk("aio_thread - io_getevents failed, "
144 "errno = %d\n", errno);
145 }
146 else {
Jeff Dike64b76732005-09-16 19:27:48 -0700147 aio = (struct aio_context *) (long) event.data;
Jeff Dike09ace812005-09-03 15:57:46 -0700148 if(update_aio(aio, event.res)){
149 do_aio(ctx, aio);
150 continue;
151 }
152
Jeff Dike75e55842005-09-03 15:57:45 -0700153 reply = ((struct aio_thread_reply)
Jeff Dike09ace812005-09-03 15:57:46 -0700154 { .data = aio,
155 .err = aio->len });
156 err = os_write_file(aio->reply_fd, &reply,
157 sizeof(reply));
Jeff Dike75e55842005-09-03 15:57:45 -0700158 if(err != sizeof(reply))
Jeff Dike09ace812005-09-03 15:57:46 -0700159 printk("aio_thread - write failed, "
160 "fd = %d, err = %d\n", aio->reply_fd,
161 -err);
Jeff Dike75e55842005-09-03 15:57:45 -0700162 }
163 }
164 return 0;
165}
166
167#endif
168
Jeff Dike09ace812005-09-03 15:57:46 -0700169static int do_not_aio(struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700170{
171 char c;
172 int err;
173
Jeff Dike09ace812005-09-03 15:57:46 -0700174 switch(aio->type){
Jeff Dike75e55842005-09-03 15:57:45 -0700175 case AIO_READ:
Jeff Dike09ace812005-09-03 15:57:46 -0700176 err = os_seek_file(aio->fd, aio->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700177 if(err)
178 goto out;
179
Jeff Dike09ace812005-09-03 15:57:46 -0700180 err = os_read_file(aio->fd, aio->data, aio->len);
Jeff Dike75e55842005-09-03 15:57:45 -0700181 break;
182 case AIO_WRITE:
Jeff Dike09ace812005-09-03 15:57:46 -0700183 err = os_seek_file(aio->fd, aio->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700184 if(err)
185 goto out;
186
Jeff Dike09ace812005-09-03 15:57:46 -0700187 err = os_write_file(aio->fd, aio->data, aio->len);
Jeff Dike75e55842005-09-03 15:57:45 -0700188 break;
189 case AIO_MMAP:
Jeff Dike09ace812005-09-03 15:57:46 -0700190 err = os_seek_file(aio->fd, aio->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700191 if(err)
192 goto out;
193
Jeff Dike09ace812005-09-03 15:57:46 -0700194 err = os_read_file(aio->fd, &c, sizeof(c));
Jeff Dike75e55842005-09-03 15:57:45 -0700195 break;
196 default:
Jeff Dike09ace812005-09-03 15:57:46 -0700197 printk("do_not_aio - bad request type : %d\n", aio->type);
Jeff Dike75e55842005-09-03 15:57:45 -0700198 err = -EINVAL;
199 break;
200 }
201
202 out:
203 return err;
204}
205
206static int not_aio_thread(void *arg)
207{
Jeff Dike09ace812005-09-03 15:57:46 -0700208 struct aio_context *aio;
Jeff Dike75e55842005-09-03 15:57:45 -0700209 struct aio_thread_reply reply;
210 int err;
211
212 signal(SIGWINCH, SIG_IGN);
213 while(1){
Jeff Dike09ace812005-09-03 15:57:46 -0700214 err = os_read_file(aio_req_fd_r, &aio, sizeof(aio));
215 if(err != sizeof(aio)){
Jeff Dike75e55842005-09-03 15:57:45 -0700216 if(err < 0)
217 printk("not_aio_thread - read failed, "
218 "fd = %d, err = %d\n", aio_req_fd_r,
219 -err);
220 else {
221 printk("not_aio_thread - short read, fd = %d, "
222 "length = %d\n", aio_req_fd_r, err);
223 }
224 continue;
225 }
Jeff Dike09ace812005-09-03 15:57:46 -0700226 again:
227 err = do_not_aio(aio);
228
229 if(update_aio(aio, err))
230 goto again;
231
232 reply = ((struct aio_thread_reply) { .data = aio,
233 .err = aio->len });
234 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
Jeff Dike75e55842005-09-03 15:57:45 -0700235 if(err != sizeof(reply))
236 printk("not_aio_thread - write failed, fd = %d, "
237 "err = %d\n", aio_req_fd_r, -err);
238 }
239}
240
Jeff Dike09ace812005-09-03 15:57:46 -0700241static int submit_aio_24(struct aio_context *aio)
242{
243 int err;
244
245 err = os_write_file(aio_req_fd_w, &aio, sizeof(aio));
246 if(err == sizeof(aio))
247 err = 0;
248
249 return err;
250}
251
Jeff Dike75e55842005-09-03 15:57:45 -0700252static int aio_pid = -1;
Jeff Dike09ace812005-09-03 15:57:46 -0700253static int (*submit_proc)(struct aio_context *aio);
Jeff Dike75e55842005-09-03 15:57:45 -0700254
255static int init_aio_24(void)
256{
257 unsigned long stack;
258 int fds[2], err;
259
260 err = os_pipe(fds, 1, 1);
261 if(err)
262 goto out;
263
264 aio_req_fd_w = fds[0];
265 aio_req_fd_r = fds[1];
266 err = run_helper_thread(not_aio_thread, NULL,
267 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
268 if(err < 0)
269 goto out_close_pipe;
270
271 aio_pid = err;
272 goto out;
273
274 out_close_pipe:
275 os_close_file(fds[0]);
276 os_close_file(fds[1]);
277 aio_req_fd_w = -1;
278 aio_req_fd_r = -1;
279 out:
280#ifndef HAVE_AIO_ABI
281 printk("/usr/include/linux/aio_abi.h not present during build\n");
282#endif
283 printk("2.6 host AIO support not used - falling back to I/O "
284 "thread\n");
Jeff Dike09ace812005-09-03 15:57:46 -0700285
286 submit_proc = submit_aio_24;
287
Jeff Dike75e55842005-09-03 15:57:45 -0700288 return 0;
289}
290
291#ifdef HAVE_AIO_ABI
292#define DEFAULT_24_AIO 0
Jeff Dike09ace812005-09-03 15:57:46 -0700293static int submit_aio_26(struct aio_context *aio)
294{
295 struct aio_thread_reply reply;
296 int err;
297
298 err = do_aio(ctx, aio);
299 if(err){
300 reply = ((struct aio_thread_reply) { .data = aio,
301 .err = err });
302 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
303 if(err != sizeof(reply))
304 printk("submit_aio_26 - write failed, "
305 "fd = %d, err = %d\n", aio->reply_fd, -err);
306 else err = 0;
307 }
308
309 return err;
310}
311
Jeff Dike75e55842005-09-03 15:57:45 -0700312static int init_aio_26(void)
313{
314 unsigned long stack;
315 int err;
316
317 if(io_setup(256, &ctx)){
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700318 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700319 printk("aio_thread failed to initialize context, err = %d\n",
320 errno);
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700321 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700322 }
323
324 err = run_helper_thread(aio_thread, NULL,
325 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
326 if(err < 0)
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700327 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700328
329 aio_pid = err;
330
331 printk("Using 2.6 host AIO\n");
Jeff Dike09ace812005-09-03 15:57:46 -0700332
333 submit_proc = submit_aio_26;
334
Jeff Dike75e55842005-09-03 15:57:45 -0700335 return 0;
336}
337
Jeff Dike75e55842005-09-03 15:57:45 -0700338#else
339#define DEFAULT_24_AIO 1
Jeff Dike09ace812005-09-03 15:57:46 -0700340static int submit_aio_26(struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700341{
342 return -ENOSYS;
343}
344
Jeff Dike09ace812005-09-03 15:57:46 -0700345static int init_aio_26(void)
Jeff Dike75e55842005-09-03 15:57:45 -0700346{
Jeff Dike09ace812005-09-03 15:57:46 -0700347 submit_proc = submit_aio_26;
Jeff Dike75e55842005-09-03 15:57:45 -0700348 return -ENOSYS;
349}
350#endif
351
352static int aio_24 = DEFAULT_24_AIO;
353
354static int __init set_aio_24(char *name, int *add)
355{
356 aio_24 = 1;
357 return 0;
358}
359
360__uml_setup("aio=2.4", set_aio_24,
361"aio=2.4\n"
362" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
363" available. 2.4 AIO is a single thread that handles one request at a\n"
364" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
365" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
366" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
367" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
368" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
369" your /usr/include/linux in order to build an AIO-capable UML\n\n"
370);
371
372static int init_aio(void)
373{
374 int err;
375
376 CHOOSE_MODE(({
377 if(!aio_24){
378 printk("Disabling 2.6 AIO in tt mode\n");
379 aio_24 = 1;
380 } }), (void) 0);
381
382 if(!aio_24){
383 err = init_aio_26();
384 if(err && (errno == ENOSYS)){
385 printk("2.6 AIO not supported on the host - "
386 "reverting to 2.4 AIO\n");
387 aio_24 = 1;
388 }
389 else return err;
390 }
391
392 if(aio_24)
393 return init_aio_24();
394
395 return 0;
396}
397
398/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
399 * needs to be called when the kernel is running because it calls run_helper,
400 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
401 * kernel does not run __exitcalls on shutdown, and can't because many of them
402 * break when called outside of module unloading.
403 */
404__initcall(init_aio);
405
406static void exit_aio(void)
407{
408 if(aio_pid != -1)
409 os_kill_process(aio_pid, 1);
410}
411
412__uml_exitcall(exit_aio);
413
Jeff Dike09ace812005-09-03 15:57:46 -0700414int submit_aio(struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700415{
Jeff Dike09ace812005-09-03 15:57:46 -0700416 return (*submit_proc)(aio);
Jeff Dike75e55842005-09-03 15:57:45 -0700417}