# I noticed that the common case in io_submit is an immediate context # switch to the AIO thread when it returns from io_getevents, followed # by a switch back. This patch changes that by having the AIO thread # wait on a pipe before calling io_getevents. When the kernel # finishes a batch of I/O, it writes the number of requests down the # pipe, and the AIO thread waits for that number, and goes back to # sleeping on the pipe. # This probably shouldn't reach mainline, as O_DIRECT I/O should have # the property of causing switching on every I/O request. Also, the # wakeup mechanism should be only used when the other side might be # sleeping. Index: linux-2.6.17/arch/um/drivers/ubd_kern.c =================================================================== --- linux-2.6.17.orig/arch/um/drivers/ubd_kern.c 2007-11-19 20:25:15.000000000 -0500 +++ linux-2.6.17/arch/um/drivers/ubd_kern.c 2007-11-19 21:18:29.000000000 -0500 @@ -780,6 +780,8 @@ static void ubd_intr(struct aio_context do_ubd_request(ubd->queue); spin_unlock_irqrestore(&ubd->lock, flags); } + + finish_aio(); } static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out) @@ -1271,7 +1273,7 @@ static void do_ubd_request(struct reques if(dev->end_sg == 0){ struct request *req = elv_next_request(q); if(req == NULL) - return; + goto out; dev->request = req; blkdev_dequeue_request(req); @@ -1306,11 +1308,14 @@ static void do_ubd_request(struct reques dev->end_sg = 0; dev->request = NULL; } +out: + finish_aio(); return; out_again: if(list_empty(&dev->restart)) list_add(&dev->restart, &restart); + goto out; } static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) Index: linux-2.6.17/arch/um/include/aio.h =================================================================== --- linux-2.6.17.orig/arch/um/include/aio.h 2007-11-19 16:42:39.000000000 -0500 +++ linux-2.6.17/arch/um/include/aio.h 2007-11-19 21:18:29.000000000 -0500 @@ -28,6 +28,7 @@ struct aio_context { .next = NULL } extern int submit_aio(struct aio_context *aio); +extern int finish_aio(void); /* Declared here instead of aio-restarts.h because that's a kernel header */ extern void aio_do_restarts(void); Index: linux-2.6.17/arch/um/os-Linux/aio.c =================================================================== --- linux-2.6.17.orig/arch/um/os-Linux/aio.c 2007-11-19 17:16:56.000000000 -0500 +++ linux-2.6.17/arch/um/os-Linux/aio.c 2007-11-19 21:27:41.000000000 -0500 @@ -38,6 +38,9 @@ static int update_aio(struct aio_context return 0; } +/* Initialized in an initcall and unchanged thereafter */ +static int aio_reply_w = -1; + #if defined(HAVE_AIO_ABI) #include @@ -78,6 +81,8 @@ static long io_getevents(aio_context_t c * that it now backs the mmapped area. */ +static int pending_events[UM_NR_CPUS]; + static int do_aio(aio_context_t ctx, struct aio_context *aio) { struct iocb *iocbp = & ((struct iocb) { @@ -110,35 +115,73 @@ static int do_aio(aio_context_t ctx, str } /* Initialized in an initcall and unchanged thereafter */ -static aio_context_t ctx = 0; +static int aio_wakeup_r_fd; +static int aio_wakeup_w_fd; -static int aio_reply_w = -1; +static int finish_aio_26(void) +{ + int err = 0, signals, nevents; + + /* The write of pending_events and setting it to zero needs to be + * atomic, otherwise an interrupt can come in between, issue some + * more I/O and send the non-zeroed pending_events to the AIO thread, + * causing it to expect too many events. + */ + signals = get_signals(); + block_signals(); + + nevents = pending_events[cpu()]; + if(nevents != 0){ + err = write(aio_wakeup_w_fd, &nevents, sizeof(nevents)); + err = (err != sizeof(nevents)) ? errno : 0; + } + + pending_events[cpu()] = 0; + + set_signals(signals); + + return err; +} + +/* Initialized in an initcall and unchanged thereafter */ +static aio_context_t ctx = 0; static int aio_thread(void *arg) { struct aio_context *aio; struct io_event event; - int err, n; + int err, i, n, nevents; signal(SIGWINCH, SIG_IGN); while (1) { - n = io_getevents(ctx, 1, 1, &event, NULL); - if (n < 0) { - if (errno == EINTR) - continue; - printk(UM_KERN_ERR "aio_thread - io_getevents failed, " - "errno = %d\n", errno); - } - else { - /* This is safe as we've just a pointer here. */ - aio = (struct aio_context *) (long) event.data; - update_aio(aio, event.res); - err = write(aio_reply_w, &aio, sizeof(aio)); - if (err != sizeof(aio)) - printk("aio_thread - write failed, " - "fd = %d, err = %d\n", aio_reply_w, - errno); + n = read(aio_wakeup_r_fd, &nevents, sizeof(nevents)); + if (n != sizeof(nevents)) { + printk("aio_thread - reading wakeup fd returned " + "%d, errno = %d\n", n, errno); + continue; + } + + for (i = 0; i < nevents; i++) { + n = io_getevents(ctx, 1, 1, &event, NULL); + if (n < 0) { + if (errno == EINTR) { + printk("io_getevents returns EINTR\n"); + continue; + } + printk("aio_thread - io_getevents failed, " + "errno = %d\n", errno); + } + else { + /* This is safe as we've just a pointer here. */ + aio = (struct aio_context *) (long) event.data; + update_aio(aio, event.res); + err = write(aio_reply_w, &aio, sizeof(aio)); + if (err != sizeof(aio)) + printk("aio_thread - write failed, " + "fd = %d, err = %d\n", + aio_reply_w, -err); + } } } return 0; @@ -183,6 +226,7 @@ static int aio_req_fd_w = -1; static int aio_pid = -1; static unsigned long aio_stack; static int (*submit_proc)(struct aio_context *aio); +static int (*finish_proc)(void); static int not_aio_thread(void *arg) { @@ -294,7 +338,7 @@ static int submit_aio_26(struct aio_cont static int init_aio_26(void) { - int err; + int err, wakeup_pipe[2]; if (io_setup(256, &ctx)) { err = -errno; @@ -303,17 +347,32 @@ static int init_aio_26(void) return err; } + if (pipe(wakeup_pipe) < 0) { + err = -errno; + goto out; + } + + aio_wakeup_r_fd = wakeup_pipe[0]; + aio_wakeup_w_fd = wakeup_pipe[1]; + err = run_helper_thread(aio_thread, NULL, CLONE_FILES | CLONE_VM | SIGCHLD, &aio_stack); if (err < 0) - return err; + goto out_close; aio_pid = err; printk(UM_KERN_INFO "Using 2.6 host AIO\n"); submit_proc = submit_aio_26; + finish_proc = finish_aio_26; return 0; + +out_close: + close(wakeup_pipe[0]); + close(wakeup_pipe[1]); +out: + return err; } #else @@ -323,9 +382,15 @@ static int submit_aio_26(struct aio_cont return -ENOSYS; } +static int finish_aio_26(void) +{ + return -ENOSYS; +} + static int init_aio_26(void) { submit_proc = submit_aio_26; + finish_proc = finish_aio_26; return -ENOSYS; } #endif @@ -399,3 +464,11 @@ int submit_aio(struct aio_context *aio) { return (*submit_proc)(aio); } + +int finish_aio(void) +{ + if(finish_proc == NULL) + return 0; + + return (*finish_proc)(); +} Index: linux-2.6.17/fs/externfs/humfs.c =================================================================== --- linux-2.6.17.orig/fs/externfs/humfs.c 2007-11-19 20:04:44.000000000 -0500 +++ linux-2.6.17/fs/externfs/humfs.c 2007-11-19 21:18:29.000000000 -0500 @@ -258,8 +258,10 @@ static void humfs_interrupt(struct aio_c struct humfs_aio *aio; while(context){ - if(context->len > 0) + if(context->len > 0){ submit_aio(context); + finish_aio(); + } else { aio = container_of(context, struct humfs_aio, aio); list_add(&aio->list, &humfs_replies); @@ -311,6 +313,8 @@ retry: if(err) (*finish)(buf, err, arg); + finish_aio(); + out: return err; }