# I noticed that the common case in io_submit is an immediate context
# switch to the AIO thread when it returns from io_getevents, followed
# by a switch back.  This patch changes that by having the AIO thread
# wait on a pipe before calling io_getevents.  When the kernel
# finishes a batch of I/O, it writes the number of requests down the
# pipe, and the AIO thread waits for that number, and goes back to
# sleeping on the pipe.
# This probably shouldn't reach mainline, as O_DIRECT I/O should have
# the property of causing switching on every I/O request.  Also, the
# wakeup mechanism should be only used when the other side might be
# sleeping.
Index: linux-2.6.17/arch/um/drivers/ubd_kern.c
===================================================================
--- linux-2.6.17.orig/arch/um/drivers/ubd_kern.c	2007-11-19 20:25:15.000000000 -0500
+++ linux-2.6.17/arch/um/drivers/ubd_kern.c	2007-11-19 21:18:29.000000000 -0500
@@ -780,6 +780,8 @@ static void ubd_intr(struct aio_context 
 		do_ubd_request(ubd->queue);
 		spin_unlock_irqrestore(&ubd->lock, flags);
 	}
+
+	finish_aio();
 }
 
 static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
@@ -1271,7 +1273,7 @@ static void do_ubd_request(struct reques
 		if(dev->end_sg == 0){
 			struct request *req = elv_next_request(q);
 			if(req == NULL)
-				return;
+				goto out;
 
 			dev->request = req;
 			blkdev_dequeue_request(req);
@@ -1306,11 +1308,14 @@ static void do_ubd_request(struct reques
 		dev->end_sg = 0;
 		dev->request = NULL;
 	}
+out:
+	finish_aio();
 	return;
 
 out_again:
 	if(list_empty(&dev->restart))
 		list_add(&dev->restart, &restart);
+	goto out;
 }
 
 static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
Index: linux-2.6.17/arch/um/include/aio.h
===================================================================
--- linux-2.6.17.orig/arch/um/include/aio.h	2007-11-19 16:42:39.000000000 -0500
+++ linux-2.6.17/arch/um/include/aio.h	2007-11-19 21:18:29.000000000 -0500
@@ -28,6 +28,7 @@ struct aio_context {
 	  .next		= NULL }
 
 extern int submit_aio(struct aio_context *aio);
+extern int finish_aio(void);
 
 /* Declared here instead of aio-restarts.h because that's a kernel header */
 extern void aio_do_restarts(void);
Index: linux-2.6.17/arch/um/os-Linux/aio.c
===================================================================
--- linux-2.6.17.orig/arch/um/os-Linux/aio.c	2007-11-19 17:16:56.000000000 -0500
+++ linux-2.6.17/arch/um/os-Linux/aio.c	2007-11-19 21:27:41.000000000 -0500
@@ -38,6 +38,9 @@ static int update_aio(struct aio_context
 	return 0;
 }
 
+/* Initialized in an initcall and unchanged thereafter */
+static int aio_reply_w = -1;
+
 #if defined(HAVE_AIO_ABI)
 #include <linux/aio_abi.h>
 
@@ -78,6 +81,8 @@ static long io_getevents(aio_context_t c
  * that it now backs the mmapped area.
  */
 
+static int pending_events[UM_NR_CPUS];
+
 static int do_aio(aio_context_t ctx, struct aio_context *aio)
 {
 	struct iocb *iocbp = & ((struct iocb) {
@@ -110,35 +115,73 @@ static int do_aio(aio_context_t ctx, str
 }
 
 /* Initialized in an initcall and unchanged thereafter */
-static aio_context_t ctx = 0;
+static int aio_wakeup_r_fd;
+static int aio_wakeup_w_fd;
 
-static int aio_reply_w = -1;
+static int finish_aio_26(void)
+{
+	int err = 0, signals, nevents;
+
+	/* The write of pending_events and setting it to zero needs to be
+	 * atomic, otherwise an interrupt can come in between, issue some
+	 * more I/O and send the non-zeroed pending_events to the AIO thread,
+	 * causing it to expect too many events.
+	 */
+	signals = get_signals();
+	block_signals();
+
+	nevents = pending_events[cpu()];
+	if(nevents != 0){
+		err = write(aio_wakeup_w_fd, &nevents, sizeof(nevents));
+		err = (err != sizeof(nevents)) ? errno : 0;
+	}
+
+	pending_events[cpu()] = 0;
+
+	set_signals(signals);
+
+	return err;
+}
+
+/* Initialized in an initcall and unchanged thereafter */
+static aio_context_t ctx = 0;
 
 static int aio_thread(void *arg)
 {
 	struct aio_context *aio;
 	struct io_event event;
-	int err, n;
+	int err, i, n, nevents;
 
 	signal(SIGWINCH, SIG_IGN);
 
 	while (1) {
-		n = io_getevents(ctx, 1, 1, &event, NULL);
-		if (n < 0) {
-			if (errno == EINTR)
-				continue;
-			printk(UM_KERN_ERR "aio_thread - io_getevents failed, "
-			       "errno = %d\n", errno);
-		}
-		else {
-			/* This is safe as we've just a pointer here. */
-			aio = (struct aio_context *) (long) event.data;
-			update_aio(aio, event.res);
-			err = write(aio_reply_w, &aio, sizeof(aio));
-			if (err != sizeof(aio))
- 				printk("aio_thread - write failed, "
-				       "fd = %d, err = %d\n", aio_reply_w,
-				       errno);
+		n = read(aio_wakeup_r_fd, &nevents, sizeof(nevents));
+		if (n != sizeof(nevents)) {
+			printk("aio_thread - reading wakeup fd returned "
+			       "%d, errno = %d\n", n, errno);
+			continue;
+  		}
+
+		for (i = 0; i < nevents; i++) {
+			n = io_getevents(ctx, 1, 1, &event, NULL);
+			if (n < 0) {
+				if (errno == EINTR) {
+					printk("io_getevents returns EINTR\n");
+					continue;
+				}
+				printk("aio_thread - io_getevents failed, "
+				       "errno = %d\n", errno);
+			}
+			else {
+				/* This is safe as we've just a pointer here. */
+				aio = (struct aio_context *) (long) event.data;
+				update_aio(aio, event.res);
+				err = write(aio_reply_w, &aio, sizeof(aio));
+				if (err != sizeof(aio))
+					printk("aio_thread - write failed, "
+					       "fd = %d, err = %d\n",
+ 					       aio_reply_w, -err);
+			}
 		}
 	}
 	return 0;
@@ -183,6 +226,7 @@ static int aio_req_fd_w = -1;
 static int aio_pid = -1;
 static unsigned long aio_stack;
 static int (*submit_proc)(struct aio_context *aio);
+static int (*finish_proc)(void);
 
 static int not_aio_thread(void *arg)
 {
@@ -294,7 +338,7 @@ static int submit_aio_26(struct aio_cont
 
 static int init_aio_26(void)
 {
-	int err;
+	int err, wakeup_pipe[2];
 
 	if (io_setup(256, &ctx)) {
 		err = -errno;
@@ -303,17 +347,32 @@ static int init_aio_26(void)
 		return err;
 	}
 
+	if (pipe(wakeup_pipe) < 0) {
+		err = -errno;
+		goto out;
+	}
+
+	aio_wakeup_r_fd = wakeup_pipe[0];
+	aio_wakeup_w_fd = wakeup_pipe[1];
+
 	err = run_helper_thread(aio_thread, NULL,
 				CLONE_FILES | CLONE_VM | SIGCHLD, &aio_stack);
 	if (err < 0)
-		return err;
+		goto out_close;
 
 	aio_pid = err;
 
 	printk(UM_KERN_INFO "Using 2.6 host AIO\n");
 	submit_proc = submit_aio_26;
+	finish_proc = finish_aio_26;
 
 	return 0;
+
+out_close:
+	close(wakeup_pipe[0]);
+	close(wakeup_pipe[1]);
+out:
+	return err;
 }
 
 #else
@@ -323,9 +382,15 @@ static int submit_aio_26(struct aio_cont
 	return -ENOSYS;
 }
 
+static int finish_aio_26(void)
+{
+	return -ENOSYS;
+}
+
 static int init_aio_26(void)
 {
 	submit_proc = submit_aio_26;
+	finish_proc = finish_aio_26;
 	return -ENOSYS;
 }
 #endif
@@ -399,3 +464,11 @@ int submit_aio(struct aio_context *aio)
 {
 	return (*submit_proc)(aio);
 }
+
+int finish_aio(void)
+{
+	if(finish_proc == NULL)
+		return 0;
+
+	return (*finish_proc)();
+}
Index: linux-2.6.17/fs/externfs/humfs.c
===================================================================
--- linux-2.6.17.orig/fs/externfs/humfs.c	2007-11-19 20:04:44.000000000 -0500
+++ linux-2.6.17/fs/externfs/humfs.c	2007-11-19 21:18:29.000000000 -0500
@@ -258,8 +258,10 @@ static void humfs_interrupt(struct aio_c
 	struct humfs_aio *aio;
 
 	while(context){
-		if(context->len > 0)
+		if(context->len > 0){
 			submit_aio(context);
+			finish_aio();
+		}
 		else {
 			aio = container_of(context, struct humfs_aio, aio);
 			list_add(&aio->list, &humfs_replies);
@@ -311,6 +313,8 @@ retry:
 	if(err)
 		(*finish)(buf, err, arg);
 
+	finish_aio();
+
  out:
 	return err;
 }