blob: f6e64026f9952b90d1af98625b152ef394c454e9 [file] [log] [blame]
Jeff Dike75e55842005-09-03 15:57:45 -07001/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
Jeff Dike09ace812005-09-03 15:57:46 -07009#include <string.h>
Jeff Dike75e55842005-09-03 15:57:45 -070010#include <errno.h>
11#include <sched.h>
12#include <sys/syscall.h>
13#include "os.h"
14#include "helper.h"
15#include "aio.h"
16#include "init.h"
17#include "user.h"
18#include "mode.h"
19
Jeff Dike75e55842005-09-03 15:57:45 -070020static int aio_req_fd_r = -1;
21static int aio_req_fd_w = -1;
22
Jeff Dike09ace812005-09-03 15:57:46 -070023static int update_aio(struct aio_context *aio, int res)
24{
25 if(res < 0)
26 aio->len = res;
27 else if((res == 0) && (aio->type == AIO_READ)){
28 /* This is the EOF case - we have hit the end of the file
29 * and it ends in a partial block, so we fill the end of
30 * the block with zeros and claim success.
31 */
32 memset(aio->data, 0, aio->len);
33 aio->len = 0;
34 }
35 else if(res > 0){
36 aio->len -= res;
37 aio->data += res;
38 aio->offset += res;
39 return aio->len;
40 }
41
42 return 0;
43}
44
Jeff Dike75e55842005-09-03 15:57:45 -070045#if defined(HAVE_AIO_ABI)
46#include <linux/aio_abi.h>
47
48/* If we have the headers, we are going to build with AIO enabled.
49 * If we don't have aio in libc, we define the necessary stubs here.
50 */
51
52#if !defined(HAVE_AIO_LIBC)
53
54static long io_setup(int n, aio_context_t *ctxp)
55{
56 return syscall(__NR_io_setup, n, ctxp);
57}
58
59static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
60{
61 return syscall(__NR_io_submit, ctx, nr, iocbpp);
62}
63
64static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
65 struct io_event *events, struct timespec *timeout)
66{
67 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
68}
69
70#endif
71
72/* The AIO_MMAP cases force the mmapped page into memory here
73 * rather than in whatever place first touches the data. I used
74 * to do this by touching the page, but that's delicate because
75 * gcc is prone to optimizing that away. So, what's done here
76 * is we read from the descriptor from which the page was
77 * mapped. The caller is required to pass an offset which is
78 * inside the page that was mapped. Thus, when the read
79 * returns, we know that the page is in the page cache, and
80 * that it now backs the mmapped area.
81 */
82
Jeff Dike09ace812005-09-03 15:57:46 -070083static int do_aio(aio_context_t ctx, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -070084{
85 struct iocb iocb, *iocbp = &iocb;
86 char c;
87 int err;
88
89 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
90 .aio_reqprio = 0,
Jeff Dike09ace812005-09-03 15:57:46 -070091 .aio_fildes = aio->fd,
92 .aio_buf = (unsigned long) aio->data,
93 .aio_nbytes = aio->len,
94 .aio_offset = aio->offset,
Jeff Dike75e55842005-09-03 15:57:45 -070095 .aio_reserved1 = 0,
96 .aio_reserved2 = 0,
97 .aio_reserved3 = 0 });
98
Jeff Dike09ace812005-09-03 15:57:46 -070099 switch(aio->type){
Jeff Dike75e55842005-09-03 15:57:45 -0700100 case AIO_READ:
101 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
Jeff Dike75e55842005-09-03 15:57:45 -0700102 break;
103 case AIO_WRITE:
104 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
Jeff Dike75e55842005-09-03 15:57:45 -0700105 break;
106 case AIO_MMAP:
107 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
108 iocb.aio_buf = (unsigned long) &c;
109 iocb.aio_nbytes = sizeof(c);
Jeff Dike75e55842005-09-03 15:57:45 -0700110 break;
111 default:
Jeff Dike09ace812005-09-03 15:57:46 -0700112 printk("Bogus op in do_aio - %d\n", aio->type);
Jeff Dike75e55842005-09-03 15:57:45 -0700113 err = -EINVAL;
Jeff Dike09ace812005-09-03 15:57:46 -0700114 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700115 }
Jeff Dike09ace812005-09-03 15:57:46 -0700116
117 err = io_submit(ctx, 1, &iocbp);
Jeff Dike75e55842005-09-03 15:57:45 -0700118 if(err > 0)
119 err = 0;
Jeff Dike2867ace2005-09-16 19:27:51 -0700120 else
121 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700122
Jeff Dike09ace812005-09-03 15:57:46 -0700123 out:
Jeff Dike75e55842005-09-03 15:57:45 -0700124 return err;
125}
126
127static aio_context_t ctx = 0;
128
129static int aio_thread(void *arg)
130{
131 struct aio_thread_reply reply;
Jeff Dike09ace812005-09-03 15:57:46 -0700132 struct aio_context *aio;
Jeff Dike75e55842005-09-03 15:57:45 -0700133 struct io_event event;
Jeff Dike09ace812005-09-03 15:57:46 -0700134 int err, n;
Jeff Dike75e55842005-09-03 15:57:45 -0700135
136 signal(SIGWINCH, SIG_IGN);
137
138 while(1){
139 n = io_getevents(ctx, 1, 1, &event, NULL);
140 if(n < 0){
141 if(errno == EINTR)
142 continue;
143 printk("aio_thread - io_getevents failed, "
144 "errno = %d\n", errno);
145 }
146 else {
Paolo 'Blaisorblade' Giarrussoa46c9042005-09-21 18:40:29 +0200147 /* This is safe as we've just a pointer here. */
Jeff Dike64b76732005-09-16 19:27:48 -0700148 aio = (struct aio_context *) (long) event.data;
Jeff Dike09ace812005-09-03 15:57:46 -0700149 if(update_aio(aio, event.res)){
150 do_aio(ctx, aio);
151 continue;
152 }
153
Jeff Dike75e55842005-09-03 15:57:45 -0700154 reply = ((struct aio_thread_reply)
Jeff Dike09ace812005-09-03 15:57:46 -0700155 { .data = aio,
156 .err = aio->len });
157 err = os_write_file(aio->reply_fd, &reply,
158 sizeof(reply));
Jeff Dike75e55842005-09-03 15:57:45 -0700159 if(err != sizeof(reply))
Jeff Dike09ace812005-09-03 15:57:46 -0700160 printk("aio_thread - write failed, "
161 "fd = %d, err = %d\n", aio->reply_fd,
162 -err);
Jeff Dike75e55842005-09-03 15:57:45 -0700163 }
164 }
165 return 0;
166}
167
168#endif
169
Jeff Dike09ace812005-09-03 15:57:46 -0700170static int do_not_aio(struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700171{
172 char c;
173 int err;
174
Jeff Dike09ace812005-09-03 15:57:46 -0700175 switch(aio->type){
Jeff Dike75e55842005-09-03 15:57:45 -0700176 case AIO_READ:
Jeff Dike09ace812005-09-03 15:57:46 -0700177 err = os_seek_file(aio->fd, aio->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700178 if(err)
179 goto out;
180
Jeff Dike09ace812005-09-03 15:57:46 -0700181 err = os_read_file(aio->fd, aio->data, aio->len);
Jeff Dike75e55842005-09-03 15:57:45 -0700182 break;
183 case AIO_WRITE:
Jeff Dike09ace812005-09-03 15:57:46 -0700184 err = os_seek_file(aio->fd, aio->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700185 if(err)
186 goto out;
187
Jeff Dike09ace812005-09-03 15:57:46 -0700188 err = os_write_file(aio->fd, aio->data, aio->len);
Jeff Dike75e55842005-09-03 15:57:45 -0700189 break;
190 case AIO_MMAP:
Jeff Dike09ace812005-09-03 15:57:46 -0700191 err = os_seek_file(aio->fd, aio->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700192 if(err)
193 goto out;
194
Jeff Dike09ace812005-09-03 15:57:46 -0700195 err = os_read_file(aio->fd, &c, sizeof(c));
Jeff Dike75e55842005-09-03 15:57:45 -0700196 break;
197 default:
Jeff Dike09ace812005-09-03 15:57:46 -0700198 printk("do_not_aio - bad request type : %d\n", aio->type);
Jeff Dike75e55842005-09-03 15:57:45 -0700199 err = -EINVAL;
200 break;
201 }
202
203 out:
204 return err;
205}
206
207static int not_aio_thread(void *arg)
208{
Jeff Dike09ace812005-09-03 15:57:46 -0700209 struct aio_context *aio;
Jeff Dike75e55842005-09-03 15:57:45 -0700210 struct aio_thread_reply reply;
211 int err;
212
213 signal(SIGWINCH, SIG_IGN);
214 while(1){
Jeff Dike09ace812005-09-03 15:57:46 -0700215 err = os_read_file(aio_req_fd_r, &aio, sizeof(aio));
216 if(err != sizeof(aio)){
Jeff Dike75e55842005-09-03 15:57:45 -0700217 if(err < 0)
218 printk("not_aio_thread - read failed, "
219 "fd = %d, err = %d\n", aio_req_fd_r,
220 -err);
221 else {
222 printk("not_aio_thread - short read, fd = %d, "
223 "length = %d\n", aio_req_fd_r, err);
224 }
225 continue;
226 }
Jeff Dike09ace812005-09-03 15:57:46 -0700227 again:
228 err = do_not_aio(aio);
229
230 if(update_aio(aio, err))
231 goto again;
232
233 reply = ((struct aio_thread_reply) { .data = aio,
234 .err = aio->len });
235 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
Jeff Dike75e55842005-09-03 15:57:45 -0700236 if(err != sizeof(reply))
237 printk("not_aio_thread - write failed, fd = %d, "
238 "err = %d\n", aio_req_fd_r, -err);
239 }
240}
241
Jeff Dike09ace812005-09-03 15:57:46 -0700242static int submit_aio_24(struct aio_context *aio)
243{
244 int err;
245
246 err = os_write_file(aio_req_fd_w, &aio, sizeof(aio));
247 if(err == sizeof(aio))
248 err = 0;
249
250 return err;
251}
252
Jeff Dike75e55842005-09-03 15:57:45 -0700253static int aio_pid = -1;
Jeff Dike09ace812005-09-03 15:57:46 -0700254static int (*submit_proc)(struct aio_context *aio);
Jeff Dike75e55842005-09-03 15:57:45 -0700255
256static int init_aio_24(void)
257{
258 unsigned long stack;
259 int fds[2], err;
260
261 err = os_pipe(fds, 1, 1);
262 if(err)
263 goto out;
264
265 aio_req_fd_w = fds[0];
266 aio_req_fd_r = fds[1];
267 err = run_helper_thread(not_aio_thread, NULL,
268 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
269 if(err < 0)
270 goto out_close_pipe;
271
272 aio_pid = err;
273 goto out;
274
275 out_close_pipe:
276 os_close_file(fds[0]);
277 os_close_file(fds[1]);
278 aio_req_fd_w = -1;
279 aio_req_fd_r = -1;
280 out:
281#ifndef HAVE_AIO_ABI
282 printk("/usr/include/linux/aio_abi.h not present during build\n");
283#endif
284 printk("2.6 host AIO support not used - falling back to I/O "
285 "thread\n");
Jeff Dike09ace812005-09-03 15:57:46 -0700286
287 submit_proc = submit_aio_24;
288
Jeff Dike75e55842005-09-03 15:57:45 -0700289 return 0;
290}
291
292#ifdef HAVE_AIO_ABI
293#define DEFAULT_24_AIO 0
Jeff Dike09ace812005-09-03 15:57:46 -0700294static int submit_aio_26(struct aio_context *aio)
295{
296 struct aio_thread_reply reply;
297 int err;
298
299 err = do_aio(ctx, aio);
300 if(err){
301 reply = ((struct aio_thread_reply) { .data = aio,
302 .err = err });
303 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
304 if(err != sizeof(reply))
305 printk("submit_aio_26 - write failed, "
306 "fd = %d, err = %d\n", aio->reply_fd, -err);
307 else err = 0;
308 }
309
310 return err;
311}
312
Jeff Dike75e55842005-09-03 15:57:45 -0700313static int init_aio_26(void)
314{
315 unsigned long stack;
316 int err;
317
318 if(io_setup(256, &ctx)){
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700319 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700320 printk("aio_thread failed to initialize context, err = %d\n",
321 errno);
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700322 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700323 }
324
325 err = run_helper_thread(aio_thread, NULL,
326 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
327 if(err < 0)
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700328 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700329
330 aio_pid = err;
331
332 printk("Using 2.6 host AIO\n");
Jeff Dike09ace812005-09-03 15:57:46 -0700333
334 submit_proc = submit_aio_26;
335
Jeff Dike75e55842005-09-03 15:57:45 -0700336 return 0;
337}
338
Jeff Dike75e55842005-09-03 15:57:45 -0700339#else
340#define DEFAULT_24_AIO 1
Jeff Dike09ace812005-09-03 15:57:46 -0700341static int submit_aio_26(struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700342{
343 return -ENOSYS;
344}
345
Jeff Dike09ace812005-09-03 15:57:46 -0700346static int init_aio_26(void)
Jeff Dike75e55842005-09-03 15:57:45 -0700347{
Jeff Dike09ace812005-09-03 15:57:46 -0700348 submit_proc = submit_aio_26;
Jeff Dike75e55842005-09-03 15:57:45 -0700349 return -ENOSYS;
350}
351#endif
352
353static int aio_24 = DEFAULT_24_AIO;
354
355static int __init set_aio_24(char *name, int *add)
356{
357 aio_24 = 1;
358 return 0;
359}
360
361__uml_setup("aio=2.4", set_aio_24,
362"aio=2.4\n"
363" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
364" available. 2.4 AIO is a single thread that handles one request at a\n"
365" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
366" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
367" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
368" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
369" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
370" your /usr/include/linux in order to build an AIO-capable UML\n\n"
371);
372
373static int init_aio(void)
374{
375 int err;
376
377 CHOOSE_MODE(({
378 if(!aio_24){
379 printk("Disabling 2.6 AIO in tt mode\n");
380 aio_24 = 1;
381 } }), (void) 0);
382
383 if(!aio_24){
384 err = init_aio_26();
385 if(err && (errno == ENOSYS)){
386 printk("2.6 AIO not supported on the host - "
387 "reverting to 2.4 AIO\n");
388 aio_24 = 1;
389 }
390 else return err;
391 }
392
393 if(aio_24)
394 return init_aio_24();
395
396 return 0;
397}
398
399/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
400 * needs to be called when the kernel is running because it calls run_helper,
401 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
402 * kernel does not run __exitcalls on shutdown, and can't because many of them
403 * break when called outside of module unloading.
404 */
405__initcall(init_aio);
406
407static void exit_aio(void)
408{
409 if(aio_pid != -1)
410 os_kill_process(aio_pid, 1);
411}
412
413__uml_exitcall(exit_aio);
414
Jeff Dike09ace812005-09-03 15:57:46 -0700415int submit_aio(struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700416{
Jeff Dike09ace812005-09-03 15:57:46 -0700417 return (*submit_proc)(aio);
Jeff Dike75e55842005-09-03 15:57:45 -0700418}