# This adds AIO support to the ubd driver. Index: linux-2.6.17/arch/um/drivers/Makefile =================================================================== --- linux-2.6.17.orig/arch/um/drivers/Makefile 2007-11-19 10:58:09.000000000 -0500 +++ linux-2.6.17/arch/um/drivers/Makefile 2007-11-19 11:22:17.000000000 -0500 @@ -13,7 +13,7 @@ mcast-objs := mcast_kern.o mcast_user.o net-objs := net_kern.o net_user.o mconsole-objs := mconsole_kern.o mconsole_user.o hostaudio-objs := hostaudio_kern.o -ubd-objs := ubd_kern.o ubd_user.o +ubd-objs := ubd_kern.o port-objs := port_kern.o port_user.o harddog-objs := harddog_kern.o harddog_user.o x11-objs := x11_kern.o x11_user.o \ Index: linux-2.6.17/arch/um/drivers/ubd_kern.c =================================================================== --- linux-2.6.17.orig/arch/um/drivers/ubd_kern.c 2007-11-19 10:52:01.000000000 -0500 +++ linux-2.6.17/arch/um/drivers/ubd_kern.c 2007-11-19 11:22:48.000000000 -0500 @@ -36,6 +36,7 @@ #include "linux/spinlock.h" #include "linux/platform_device.h" #include "linux/scatterlist.h" +#include "asm/atomic.h" #include "asm/segment.h" #include "asm/uaccess.h" #include "asm/irq.h" @@ -48,26 +49,24 @@ #include "init.h" #include "irq_user.h" #include "irq_kern.h" -#include "ubd_user.h" #include "os.h" #include "mem.h" #include "mem_kern.h" #include "cow.h" - -enum ubd_req { UBD_READ, UBD_WRITE }; +#include "aio.h" +#include "aio-restart.h" struct io_thread_req { - struct request *req; - enum ubd_req op; + enum aio_type op; int fds[2]; unsigned long offsets[2]; unsigned long long offset; unsigned long length; char *buffer; int sectorsize; - unsigned long sector_mask; - unsigned long long cow_offset; - unsigned long bitmap_words[2]; + int bitmap_offset; + long bitmap_start; + long bitmap_end; int error; }; @@ -81,30 +80,32 @@ extern int create_cow_file(char *cow_fil unsigned long *bitmap_len_out, int *data_offset_out); extern int read_cow_bitmap(int fd, void *buf, int offset, int len); -extern void do_io(struct io_thread_req *req); +static int do_io(struct io_thread_req *req, struct request *r, + unsigned long *bitmap); -static inline int ubd_test_bit(__u64 bit, unsigned char *data) +static inline int ubd_test_bit(__u64 bit, void *data) { + unsigned char *buffer = data; __u64 n; int bits, off; - bits = sizeof(data[0]) * 8; + bits = sizeof(buffer[0]) * 8; n = bit / bits; off = bit % bits; - return (data[n] & (1 << off)) != 0; + return (buffer[n] & (1 << off)) != 0; } -static inline void ubd_set_bit(__u64 bit, unsigned char *data) +static inline void ubd_set_bit(__u64 bit, void *data) { + unsigned char *buffer = data; __u64 n; int bits, off; - bits = sizeof(data[0]) * 8; + bits = sizeof(buffer[0]) * 8; n = bit / bits; off = bit % bits; - data[n] |= (1 << off); + buffer[n] |= (1 << off); } -/*End stuff from ubd_user.h*/ #define DRIVER_NAME "uml-blkdev" @@ -490,7 +491,7 @@ static void ubd_end_request(struct reque /* Callable only from interrupt context - otherwise you need to do * spin_lock_irq()/spin_lock_irqsave() */ -static inline void ubd_finish(struct request *req, int bytes) +static void ubd_finish(struct request *req, int bytes) { if(bytes < 0){ ubd_end_request(req, 0, 0); @@ -499,37 +500,71 @@ static inline void ubd_finish(struct req ubd_end_request(req, bytes, 1); } +struct bitmap_io { + atomic_t count; + struct aio_context aio; +}; + +struct ubd_aio { + struct aio_context aio; + struct request *req; + int len; + struct bitmap_io *bitmap; + void *bitmap_buf; +}; + static LIST_HEAD(restart); -/* XXX - move this inside ubd_intr. */ /* Called without dev->lock held, and only in interrupt context. */ -static void ubd_handler(void) +static void ubd_intr(struct aio_context *context) { - struct io_thread_req *req; - struct request *rq; + struct aio_context *next; + struct request *req; + struct ubd_aio *aio; struct ubd *ubd; struct list_head *list, *next_ele; unsigned long flags; int n; - while(1){ - n = os_read_file(thread_fd, &req, - sizeof(struct io_thread_req *)); - if(n != sizeof(req)){ - if(n == -EAGAIN) - break; - printk(KERN_ERR "spurious interrupt in ubd_handler, " - "err = %d\n", -n); - return; + while(context != NULL){ + aio = container_of(context, struct ubd_aio, aio); + n = context->len; + next = context->next; + req = aio->req; + if(n == 0){ + req->nr_sectors -= aio->len >> 9; + + if((aio->bitmap != NULL) && + (atomic_dec_and_test(&aio->bitmap->count))){ + aio->aio = aio->bitmap->aio; + aio->len = 0; + kfree(aio->bitmap); + aio->bitmap = NULL; + submit_aio(&aio->aio); + } + else { + if((req->nr_sectors == 0) && + (aio->bitmap == NULL)){ + int len = req->hard_nr_sectors << 9; + ubd_finish(req, len); + } + + if(aio->bitmap_buf != NULL) + kfree(aio->bitmap_buf); + kfree(aio); + } + } + else if(n < 0){ + ubd_finish(req, n); + if(aio->bitmap != NULL) + kfree(aio->bitmap); + if(aio->bitmap_buf != NULL) + kfree(aio->bitmap_buf); + kfree(aio); } - rq = req->req; - rq->nr_sectors -= req->length >> 9; - if(rq->nr_sectors == 0) - ubd_finish(rq, rq->hard_nr_sectors << 9); - kfree(req); + context = next; } - reactivate_fd(thread_fd, UBD_IRQ); list_for_each_safe(list, next_ele, &restart){ ubd = container_of(list, struct ubd, restart); @@ -540,23 +575,6 @@ static void ubd_handler(void) } } -static irqreturn_t ubd_intr(int irq, void *dev) -{ - ubd_handler(); - return IRQ_HANDLED; -} - -/* Only changed by ubd_init, which is an initcall. */ -static int io_pid = -1; - -void kill_io_thread(void) -{ - if(io_pid != -1) - os_kill_process(io_pid, 1); -} - -__uml_exitcall(kill_io_thread); - static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out) { char *file; @@ -897,11 +915,18 @@ static struct platform_driver ubd_driver }, }; +static struct aio_driver ubd_aio_driver = { + .list = LIST_HEAD_INIT(ubd_aio_driver.list), + .handler = ubd_intr, + .requests = NULL, +}; + static int __init ubd_init(void) { char *error; - int i, err; + int i; + register_aio_driver(&ubd_aio_driver); if (register_blkdev(MAJOR_NR, "ubd")) return -1; @@ -915,8 +940,7 @@ static int __init ubd_init(void) platform_driver_register(&ubd_driver); mutex_lock(&ubd_lock); for (i = 0; i < MAX_DEV; i++){ - err = ubd_add(i, &error); - if(err) + if(ubd_add(i, &error)) printk(KERN_ERR "Failed to initialize ubd device %d :" "%s\n", i, error); } @@ -926,35 +950,6 @@ static int __init ubd_init(void) late_initcall(ubd_init); -static int __init ubd_driver_init(void){ - unsigned long stack; - int err; - - /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ - if(global_openflags.s){ - printk(KERN_INFO "ubd: Synchronous mode\n"); - /* Letting ubd=sync be like using ubd#s= instead of ubd#= is - * enough. So use anyway the io thread. */ - } - stack = alloc_stack(0, 0); - io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), - &thread_fd); - if(io_pid < 0){ - printk(KERN_ERR - "ubd : Failed to start I/O thread (errno = %d) - " - "falling back to synchronous I/O\n", -io_pid); - io_pid = -1; - return 0; - } - err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, - IRQF_DISABLED, "ubd", ubd_devs); - if(err != 0) - printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); - return 0; -} - -device_initcall(ubd_driver_init); - static int ubd_open(struct inode *inode, struct file *filp) { struct gendisk *disk = inode->i_bdev->bd_disk; @@ -992,64 +987,21 @@ static int ubd_release(struct inode * in return 0; } -static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, - __u64 *cow_offset, unsigned long *bitmap, - __u64 bitmap_offset, unsigned long *bitmap_words, - __u64 bitmap_len) -{ - __u64 sector = io_offset >> 9; - int i, update_bitmap = 0; - - for(i = 0; i < length >> 9; i++){ - if(cow_mask != NULL) - ubd_set_bit(i, (unsigned char *) cow_mask); - if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) - continue; - - update_bitmap = 1; - ubd_set_bit(sector + i, (unsigned char *) bitmap); - } - - if(!update_bitmap) - return; - - *cow_offset = sector / (sizeof(unsigned long) * 8); - - /* This takes care of the case where we're exactly at the end of the - * device, and *cow_offset + 1 is off the end. So, just back it up - * by one word. Thanks to Lynn Kerby for the fix and James McMechan - * for the original diagnosis. - */ - if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) / - sizeof(unsigned long) - 1)) - (*cow_offset)--; - - bitmap_words[0] = bitmap[*cow_offset]; - bitmap_words[1] = bitmap[*cow_offset + 1]; - - *cow_offset *= sizeof(unsigned long); - *cow_offset += bitmap_offset; -} - -static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, - __u64 bitmap_offset, __u64 bitmap_len) +static void cowify_bitmap(struct io_thread_req *req, unsigned long *bitmap) { - __u64 sector = req->offset >> 9; + __u64 sector = req->offset / req->sectorsize; int i; - if(req->length > (sizeof(req->sector_mask) * 8) << 9) - panic("Operation too long"); + for(i = 0; i < req->length / req->sectorsize; i++){ + if(ubd_test_bit(sector + i, bitmap)) + continue; - if(req->op == UBD_READ) { - for(i = 0; i < req->length >> 9; i++){ - if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) - ubd_set_bit(i, (unsigned char *) - &req->sector_mask); - } + if(req->bitmap_start == -1) + req->bitmap_start = sector + i; + req->bitmap_end = sector + i + 1; + + ubd_set_bit(sector + i, bitmap); } - else cowify_bitmap(req->offset, req->length, &req->sector_mask, - &req->cow_offset, bitmap, bitmap_offset, - req->bitmap_words, bitmap_len); } /* Called with dev->lock held */ @@ -1060,25 +1012,24 @@ static void prepare_request(struct reque struct gendisk *disk = req->rq_disk; struct ubd *ubd_dev = disk->private_data; - io_req->req = req; io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : ubd_dev->fd; io_req->fds[1] = ubd_dev->fd; - io_req->cow_offset = -1; io_req->offset = offset; io_req->length = len; io_req->error = 0; - io_req->sector_mask = 0; - io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; + io_req->op = (rq_data_dir(req) == READ) ? AIO_READ : AIO_WRITE; io_req->offsets[0] = 0; io_req->offsets[1] = ubd_dev->cow.data_offset; io_req->buffer = page_address(page) + page_offset; io_req->sectorsize = 1 << 9; + io_req->bitmap_offset = -1; + io_req->bitmap_start = -1; + io_req->bitmap_end = -1; if(ubd_dev->cow.file != NULL) - cowify_req(io_req, ubd_dev->cow.bitmap, - ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len); + cowify_bitmap(io_req, ubd_dev->cow.bitmap); } @@ -1087,7 +1038,7 @@ static void do_ubd_request(struct reques { struct io_thread_req *io_req; struct request *req; - int n, last_sectors; + int last_sectors; while(1){ struct ubd *dev = q->queuedata; @@ -1120,13 +1071,9 @@ static void do_ubd_request(struct reques sg->offset, sg->length, sg_page(sg)); last_sectors = sg->length >> 9; - n = os_write_file(thread_fd, &io_req, - sizeof(struct io_thread_req *)); - if(n != sizeof(struct io_thread_req *)){ - if(n != -EAGAIN) - printk("write to io thread failed, " - "errno = %d\n", -n); - else if(list_empty(&dev->restart)) + + if(do_io(io_req, req, dev->cow.bitmap) == -EAGAIN){ + if(list_empty(&dev->restart)) list_add(&dev->restart, &restart); return; } @@ -1363,124 +1310,100 @@ int create_cow_file(char *cow_file, char return err; } -static int update_bitmap(struct io_thread_req *req) +static int do_io(struct io_thread_req *req, struct request *r, + unsigned long *bitmap) { - int n; + struct ubd_aio *aio; + struct bitmap_io *bitmap_io = NULL; + char *buf; + void *bitmap_buf = NULL; + unsigned long len, sector; + int nsectors, start, end, bit; + int err; + __u64 off; - if(req->cow_offset == -1) - return 0; + if(req->bitmap_start != -1){ + /* Round up to the nearest word */ + int round = sizeof(unsigned long); + len = (req->bitmap_end - req->bitmap_start + + round * 8 - 1) / (round * 8); + len *= round; + + off = req->bitmap_start / (8 * round); + off *= round; + + bitmap_io = kmalloc(sizeof(*bitmap_io), GFP_KERNEL); + if(bitmap_io == NULL){ + printk("Failed to kmalloc bitmap IO\n"); + req->error = 1; + return -ENOMEM; + } - n = os_seek_file(req->fds[1], req->cow_offset); - if(n < 0){ - printk("do_io - bitmap lseek failed : err = %d\n", -n); - return 1; - } + bitmap_buf = kmalloc(len, GFP_KERNEL); + if(bitmap_buf == NULL){ + printk("do_io : kmalloc of bitmap chunk (len %ld)" + "failed\n", len); + kfree(bitmap_io); + req->error = 1; + return -ENOMEM; + } + memcpy(bitmap_buf, &bitmap[off / sizeof(bitmap[0])], len); - n = os_write_file(req->fds[1], &req->bitmap_words, - sizeof(req->bitmap_words)); - if(n != sizeof(req->bitmap_words)){ - printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, - req->fds[1]); - return 1; + *bitmap_io = ((struct bitmap_io) + { .count = ATOMIC_INIT(0), + .aio = INIT_AIO(AIO_WRITE, req->fds[1], + bitmap_buf, len, + req->bitmap_offset + off, + &ubd_aio_driver) } ); } - return 0; -} - -void do_io(struct io_thread_req *req) -{ - char *buf; - unsigned long len; - int n, nsectors, start, end, bit; - int err; - __u64 off; - nsectors = req->length / req->sectorsize; start = 0; + end = nsectors; + bit = 0; do { - bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); - end = start; - while((end < nsectors) && - (ubd_test_bit(end, (unsigned char *) - &req->sector_mask) == bit)) - end++; + if(bitmap != NULL){ + sector = req->offset / req->sectorsize; + bit = ubd_test_bit(sector + start, bitmap); + end = start; + while((end < nsectors) && + (ubd_test_bit(sector + end, bitmap) == bit)) + end++; + } - off = req->offset + req->offsets[bit] + + off = req->offsets[bit] + req->offset + start * req->sectorsize; len = (end - start) * req->sectorsize; buf = &req->buffer[start * req->sectorsize]; - - err = os_seek_file(req->fds[bit], off); - if(err < 0){ - printk("do_io - lseek failed : err = %d\n", -err); + aio = kmalloc(sizeof(*aio), GFP_KERNEL); + if(aio == NULL){ req->error = 1; - return; + return -ENOMEM; } - if(req->op == UBD_READ){ - n = 0; - do { - buf = &buf[n]; - len -= n; - n = os_read_file(req->fds[bit], buf, len); - if (n < 0) { - printk("do_io - read failed, err = %d " - "fd = %d\n", -n, req->fds[bit]); - req->error = 1; - return; - } - } while((n < len) && (n != 0)); - if (n < len) memset(&buf[n], 0, len - n); - } else { - n = os_write_file(req->fds[bit], buf, len); - if(n != len){ - printk("do_io - write failed err = %d " - "fd = %d\n", -n, req->fds[bit]); + + *aio = ((struct ubd_aio) + { .aio = INIT_AIO(req->op, req->fds[bit], buf, + len, off, &ubd_aio_driver), + .len = len, + .req = r, + .bitmap = bitmap_io, + .bitmap_buf = bitmap_buf }); + + if(aio->bitmap != NULL) + atomic_inc(&aio->bitmap->count); + + err = submit_aio(&aio->aio); + if(err){ + if(err != -EAGAIN){ + printk("do_io - submit_aio failed, " + "err = %d\n", err); req->error = 1; - return; } + return err; } start = end; } while(start < nsectors); - req->error = update_bitmap(req); -} - -/* Changed in start_io_thread, which is serialized by being called only - * from ubd_init, which is an initcall. - */ -int kernel_fd = -1; - -/* Only changed by the io thread. XXX: currently unused. */ -static int io_count = 0; - -int io_thread(void *arg) -{ - struct io_thread_req *req; - int n; - - ignore_sigwinch_sig(); - while(1){ - n = os_read_file(kernel_fd, &req, - sizeof(struct io_thread_req *)); - if(n != sizeof(struct io_thread_req *)){ - if(n < 0) - printk("io_thread - read failed, fd = %d, " - "err = %d\n", kernel_fd, -n); - else { - printk("io_thread - short read, fd = %d, " - "length = %d\n", kernel_fd, n); - } - continue; - } - io_count++; - do_io(req); - n = os_write_file(kernel_fd, &req, - sizeof(struct io_thread_req *)); - if(n != sizeof(struct io_thread_req *)) - printk("io_thread - write failed, fd = %d, err = %d\n", - kernel_fd, -n); - } - return 0; } Index: linux-2.6.17/arch/um/drivers/ubd_user.c =================================================================== --- linux-2.6.17.orig/arch/um/drivers/ubd_user.c 2007-10-17 12:11:51.000000000 -0400 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,69 +0,0 @@ -/* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) - * Copyright (C) 2001 Ridgerun,Inc (glonnon@ridgerun.com) - * Licensed under the GPL - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "asm/types.h" -#include "kern_util.h" -#include "user.h" -#include "ubd_user.h" -#include "os.h" -#include "cow.h" - -#include -#include - -void ignore_sigwinch_sig(void) -{ - signal(SIGWINCH, SIG_IGN); -} - -int start_io_thread(unsigned long sp, int *fd_out) -{ - int pid, fds[2], err; - - err = os_pipe(fds, 1, 1); - if(err < 0){ - printk("start_io_thread - os_pipe failed, err = %d\n", -err); - goto out; - } - - kernel_fd = fds[0]; - *fd_out = fds[1]; - - err = os_set_fd_block(*fd_out, 0); - if (err) { - printk("start_io_thread - failed to set nonblocking I/O.\n"); - goto out_close; - } - - pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, - NULL); - if(pid < 0){ - err = -errno; - printk("start_io_thread - clone failed : errno = %d\n", errno); - goto out_close; - } - - return(pid); - - out_close: - os_close_file(fds[0]); - os_close_file(fds[1]); - kernel_fd = -1; - *fd_out = -1; - out: - return err; -} Index: linux-2.6.17/include/asm-um/irq.h =================================================================== --- linux-2.6.17.orig/include/asm-um/irq.h 2007-11-19 10:56:56.000000000 -0500 +++ linux-2.6.17/include/asm-um/irq.h 2007-11-19 11:22:17.000000000 -0500 @@ -5,18 +5,17 @@ #define UMN_IRQ 1 #define CONSOLE_IRQ 2 #define CONSOLE_WRITE_IRQ 3 -#define UBD_IRQ 4 -#define UM_ETH_IRQ 5 -#define SSL_IRQ 6 -#define SSL_WRITE_IRQ 7 -#define ACCEPT_IRQ 8 -#define MCONSOLE_IRQ 9 -#define WINCH_IRQ 10 -#define SIGIO_WRITE_IRQ 11 -#define TELNETD_IRQ 12 -#define XTERM_IRQ 13 -#define AIO_IRQ 14 -#define X11_IRQ 15 +#define UM_ETH_IRQ 4 +#define SSL_IRQ 5 +#define SSL_WRITE_IRQ 6 +#define ACCEPT_IRQ 7 +#define MCONSOLE_IRQ 8 +#define WINCH_IRQ 9 +#define SIGIO_WRITE_IRQ 10 +#define TELNETD_IRQ 11 +#define XTERM_IRQ 12 +#define AIO_IRQ 13 +#define X11_IRQ 14 #define LAST_IRQ X11_IRQ #define NR_IRQS (LAST_IRQ + 1) Index: linux-2.6.17/arch/um/include/ubd_user.h =================================================================== --- linux-2.6.17.orig/arch/um/include/ubd_user.h 2007-10-17 12:11:51.000000000 -0400 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ -/* - * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) - * Copyright (C) 2001 RidgeRun, Inc (glonnon@ridgerun.com) - * Licensed under the GPL - */ - -#ifndef __UM_UBD_USER_H -#define __UM_UBD_USER_H - -extern void ignore_sigwinch_sig(void); -extern int start_io_thread(unsigned long sp, int *fds_out); -extern int io_thread(void *arg); -extern int kernel_fd; - -#endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */