Version:
~ [ 0.6-2.3.46 ] ~
Architecture:
~ [ um ] ~
** Warning: Cannot open xref database.
1 /*
2 * linux/drivers/block/ll_rw_blk.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
6 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
7 */
8
9 /*
10 * This handles all read/write requests to block devices
11 */
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/errno.h>
16 #include <linux/string.h>
17 #include <linux/config.h>
18 #include <linux/locks.h>
19 #include <linux/mm.h>
20 #include <linux/init.h>
21 #include <linux/smp_lock.h>
22
23 #include <asm/system.h>
24 #include <asm/io.h>
25 #include <linux/blk.h>
26 #include <linux/highmem.h>
27 #include <linux/raid/md.h>
28
29 #include <linux/module.h>
30
31 #define DEBUG_ELEVATOR
32
33 /*
34 * MAC Floppy IWM hooks
35 */
36
37 #ifdef CONFIG_MAC_FLOPPY_IWM
38 extern int mac_floppy_init(void);
39 #endif
40
41 /*
42 * The request-struct contains all necessary data
43 * to load a nr of sectors into memory
44 */
45 static struct request all_requests[NR_REQUEST];
46
47 /*
48 * The "disk" task queue is used to start the actual requests
49 * after a plug
50 */
51 DECLARE_TASK_QUEUE(tq_disk);
52
53 /*
54 * Protect the request list against multiple users..
55 *
56 * With this spinlock the Linux block IO subsystem is 100% SMP threaded
57 * from the IRQ event side, and almost 100% SMP threaded from the syscall
58 * side (we still have protect against block device array operations, and
59 * the do_request() side is casually still unsafe. The kernel lock protects
60 * this part currently.).
61 *
62 * there is a fair chance that things will work just OK if these functions
63 * are called with no global kernel lock held ...
64 */
65 spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
66
67 /*
68 * used to wait on when there are no free requests
69 */
70 DECLARE_WAIT_QUEUE_HEAD(wait_for_request);
71
72 /* This specifies how many sectors to read ahead on the disk. */
73
74 int read_ahead[MAX_BLKDEV];
75
76 /* blk_dev_struct is:
77 * *request_fn
78 * *current_request
79 */
80 struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
81
82 /*
83 * blk_size contains the size of all block-devices in units of 1024 byte
84 * sectors:
85 *
86 * blk_size[MAJOR][MINOR]
87 *
88 * if (!blk_size[MAJOR]) then no minor size checking is done.
89 */
90 int * blk_size[MAX_BLKDEV];
91
92 /*
93 * blksize_size contains the size of all block-devices:
94 *
95 * blksize_size[MAJOR][MINOR]
96 *
97 * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
98 */
99 int * blksize_size[MAX_BLKDEV];
100
101 /*
102 * hardsect_size contains the size of the hardware sector of a device.
103 *
104 * hardsect_size[MAJOR][MINOR]
105 *
106 * if (!hardsect_size[MAJOR])
107 * then 512 bytes is assumed.
108 * else
109 * sector_size is hardsect_size[MAJOR][MINOR]
110 * This is currently set by some scsi devices and read by the msdos fs driver.
111 * Other uses may appear later.
112 */
113 int * hardsect_size[MAX_BLKDEV];
114
115 /*
116 * The following tunes the read-ahead algorithm in mm/filemap.c
117 */
118 int * max_readahead[MAX_BLKDEV];
119
120 /*
121 * Max number of sectors per request
122 */
123 int * max_sectors[MAX_BLKDEV];
124
125 static inline int get_max_sectors(kdev_t dev)
126 {
127 if (!max_sectors[MAJOR(dev)])
128 return MAX_SECTORS;
129 return max_sectors[MAJOR(dev)][MINOR(dev)];
130 }
131
132 /*
133 * NOTE: the device-specific queue() functions
134 * have to be atomic!
135 */
136 request_queue_t * blk_get_queue (kdev_t dev)
137 {
138 int major = MAJOR(dev);
139 struct blk_dev_struct *bdev = blk_dev + major;
140 unsigned long flags;
141 request_queue_t *ret;
142
143 spin_lock_irqsave(&io_request_lock,flags);
144 if (bdev->queue)
145 ret = bdev->queue(dev);
146 else
147 ret = &blk_dev[major].request_queue;
148 spin_unlock_irqrestore(&io_request_lock,flags);
149
150 return ret;
151 }
152
153 static inline int get_request_latency(elevator_t * elevator, int rw)
154 {
155 int latency;
156
157 if (rw != READ)
158 latency = elevator->write_latency;
159 else
160 latency = elevator->read_latency;
161
162 return latency;
163 }
164
165 void blk_cleanup_queue(request_queue_t * q)
166 {
167 memset(q, 0, sizeof(*q));
168 }
169
170 void blk_queue_headactive(request_queue_t * q, int active)
171 {
172 q->head_active = active;
173 }
174
175 void blk_queue_pluggable (request_queue_t * q, plug_device_fn *plug)
176 {
177 q->plug_device_fn = plug;
178 }
179
180 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
181 {
182 q->make_request_fn = mfn;
183 }
184
185 static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
186 {
187 if (req->nr_segments < max_segments) {
188 req->nr_segments++;
189 q->nr_segments++;
190 return 1;
191 }
192 return 0;
193 }
194
195 static int ll_back_merge_fn(request_queue_t *q, struct request *req,
196 struct buffer_head *bh, int max_segments)
197 {
198 if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
199 return 1;
200 return ll_new_segment(q, req, max_segments);
201 }
202
203 static int ll_front_merge_fn(request_queue_t *q, struct request *req,
204 struct buffer_head *bh, int max_segments)
205 {
206 if (bh->b_data + bh->b_size == req->bh->b_data)
207 return 1;
208 return ll_new_segment(q, req, max_segments);
209 }
210
211 static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
212 struct request *next, int max_segments)
213 {
214 int total_segments = req->nr_segments + next->nr_segments;
215
216 if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) {
217 total_segments--;
218 q->nr_segments--;
219 }
220
221 if (total_segments > max_segments)
222 return 0;
223
224 req->nr_segments = total_segments;
225 return 1;
226 }
227
228 /*
229 * "plug" the device if there are no outstanding requests: this will
230 * force the transfer to start only after we have put all the requests
231 * on the list.
232 *
233 * This is called with interrupts off and no requests on the queue.
234 * (and with the request spinlock aquired)
235 */
236 static void generic_plug_device (request_queue_t *q, kdev_t dev)
237 {
238 #ifdef CONFIG_BLK_DEV_MD
239 if (MAJOR(dev) == MD_MAJOR) {
240 spin_unlock_irq(&io_request_lock);
241 BUG();
242 }
243 #endif
244 if (!list_empty(&q->queue_head))
245 return;
246
247 q->plugged = 1;
248 queue_task(&q->plug_tq, &tq_disk);
249 }
250
251 void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
252 {
253 INIT_LIST_HEAD(&q->queue_head);
254 q->elevator = ELEVATOR_DEFAULTS;
255 q->request_fn = rfn;
256 q->back_merge_fn = ll_back_merge_fn;
257 q->front_merge_fn = ll_front_merge_fn;
258 q->merge_requests_fn = ll_merge_requests_fn;
259 q->make_request_fn = NULL;
260 q->plug_tq.sync = 0;
261 q->plug_tq.routine = &generic_unplug_device;
262 q->plug_tq.data = q;
263 q->plugged = 0;
264 /*
265 * These booleans describe the queue properties. We set the
266 * default (and most common) values here. Other drivers can
267 * use the appropriate functions to alter the queue properties.
268 * as appropriate.
269 */
270 q->plug_device_fn = generic_plug_device;
271 q->head_active = 1;
272 }
273
274 /*
275 * remove the plug and let it rip..
276 */
277 void generic_unplug_device(void * data)
278 {
279 request_queue_t * q = (request_queue_t *) data;
280 unsigned long flags;
281
282 spin_lock_irqsave(&io_request_lock,flags);
283 if (q->plugged) {
284 q->plugged = 0;
285 if (!list_empty(&q->queue_head))
286 (q->request_fn)(q);
287 }
288 spin_unlock_irqrestore(&io_request_lock,flags);
289 }
290
291 /*
292 * look for a free request in the first N entries.
293 * NOTE: interrupts must be disabled on the way in (on SMP the request queue
294 * spinlock has to be aquired), and will still be disabled on the way out.
295 */
296 static inline struct request * get_request(int n, kdev_t dev)
297 {
298 static struct request *prev_found = NULL, *prev_limit = NULL;
299 register struct request *req, *limit;
300
301 if (n <= 0)
302 panic("get_request(%d): impossible!\n", n);
303
304 limit = all_requests + n;
305 if (limit != prev_limit) {
306 prev_limit = limit;
307 prev_found = all_requests;
308 }
309 req = prev_found;
310 for (;;) {
311 req = ((req > all_requests) ? req : limit) - 1;
312 if (req->rq_status == RQ_INACTIVE)
313 break;
314 if (req == prev_found)
315 return NULL;
316 }
317 prev_found = req;
318 req->rq_status = RQ_ACTIVE;
319 req->rq_dev = dev;
320 req->special = NULL;
321 return req;
322 }
323
324 /*
325 * wait until a free request in the first N entries is available.
326 */
327 static struct request * __get_request_wait(int n, kdev_t dev)
328 {
329 register struct request *req;
330 DECLARE_WAITQUEUE(wait, current);
331 unsigned long flags;
332
333 add_wait_queue(&wait_for_request, &wait);
334 for (;;) {
335 current->state = TASK_UNINTERRUPTIBLE;
336 spin_lock_irqsave(&io_request_lock,flags);
337 req = get_request(n, dev);
338 spin_unlock_irqrestore(&io_request_lock,flags);
339 if (req)
340 break;
341 run_task_queue(&tq_disk);
342 schedule();
343 }
344 remove_wait_queue(&wait_for_request, &wait);
345 current->state = TASK_RUNNING;
346 return req;
347 }
348
349 static inline struct request * get_request_wait(int n, kdev_t dev)
350 {
351 register struct request *req;
352 unsigned long flags;
353
354 spin_lock_irqsave(&io_request_lock,flags);
355 req = get_request(n, dev);
356 spin_unlock_irqrestore(&io_request_lock,flags);
357 if (req)
358 return req;
359 return __get_request_wait(n, dev);
360 }
361
362 /* RO fail safe mechanism */
363
364 static long ro_bits[MAX_BLKDEV][8];
365
366 int is_read_only(kdev_t dev)
367 {
368 int minor,major;
369
370 major = MAJOR(dev);
371 minor = MINOR(dev);
372 if (major < 0 || major >= MAX_BLKDEV) return 0;
373 return ro_bits[major][minor >> 5] & (1 << (minor & 31));
374 }
375
376 void set_device_ro(kdev_t dev,int flag)
377 {
378 int minor,major;
379
380 major = MAJOR(dev);
381 minor = MINOR(dev);
382 if (major < 0 || major >= MAX_BLKDEV) return;
383 if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
384 else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
385 }
386
387 static inline void drive_stat_acct(struct request *req,
388 unsigned long nr_sectors, int new_io)
389 {
390 int major = MAJOR(req->rq_dev);
391 int minor = MINOR(req->rq_dev);
392 unsigned int disk_index;
393
394 switch (major) {
395 case DAC960_MAJOR+0:
396 disk_index = (minor & 0x00f8) >> 3;
397 break;
398 case SCSI_DISK0_MAJOR:
399 disk_index = (minor & 0x00f0) >> 4;
400 break;
401 case IDE0_MAJOR: /* same as HD_MAJOR */
402 case XT_DISK_MAJOR:
403 disk_index = (minor & 0x0040) >> 6;
404 break;
405 case IDE1_MAJOR:
406 disk_index = ((minor & 0x0040) >> 6) + 2;
407 break;
408 default:
409 return;
410 }
411 if (disk_index >= DK_NDRIVE)
412 return;
413
414 kstat.dk_drive[disk_index] += new_io;
415 if (req->cmd == READ) {
416 kstat.dk_drive_rio[disk_index] += new_io;
417 kstat.dk_drive_rblk[disk_index] += nr_sectors;
418 } else if (req->cmd == WRITE) {
419 kstat.dk_drive_wio[disk_index] += new_io;
420 kstat.dk_drive_wblk[disk_index] += nr_sectors;
421 } else
422 printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
423 }
424
425 /* elevator */
426
427 #define elevator_sequence_after(a,b) ((int)((b)-(a)) < 0)
428 #define elevator_sequence_before(a,b) elevator_sequence_after(b,a)
429 #define elevator_sequence_after_eq(a,b) ((int)((b)-(a)) <= 0)
430 #define elevator_sequence_before_eq(a,b) elevator_sequence_after_eq(b,a)
431
432 static inline struct list_head * seek_to_not_starving_chunk(request_queue_t * q,
433 int * lat, int * starving)
434 {
435 int sequence = q->elevator.sequence;
436 struct list_head * entry = q->queue_head.prev;
437 int pos = 0;
438
439 do {
440 struct request * req = blkdev_entry_to_request(entry);
441 if (elevator_sequence_before(req->elevator_sequence, sequence)) {
442 *lat -= q->nr_segments - pos;
443 *starving = 1;
444 return entry;
445 }
446 pos += req->nr_segments;
447 } while ((entry = entry->prev) != &q->queue_head);
448
449 *starving = 0;
450
451 return entry->next;
452 }
453
454 static inline void elevator_merge_requests(elevator_t * e, struct request * req, struct request * next)
455 {
456 if (elevator_sequence_before(next->elevator_sequence, req->elevator_sequence))
457 req->elevator_sequence = next->elevator_sequence;
458 if (req->cmd == READ)
459 e->read_pendings--;
460
461 }
462
463 static inline int elevator_sequence(elevator_t * e, int latency)
464 {
465 return latency + e->sequence;
466 }
467
468 #define elevator_merge_before(q, req, lat) __elevator_merge((q), (req), (lat), 0)
469 #define elevator_merge_after(q, req, lat) __elevator_merge((q), (req), (lat), 1)
470 static inline void __elevator_merge(request_queue_t * q, struct request * req, int latency, int after)
471 {
472 #ifdef DEBUG_ELEVATOR
473 int sequence = elevator_sequence(&q->elevator, latency);
474 if (after)
475 sequence -= req->nr_segments;
476 if (elevator_sequence_before(sequence, req->elevator_sequence)) {
477 static int warned = 0;
478 if (!warned) {
479 printk(KERN_WARNING __FUNCTION__
480 ": req latency %d req latency %d\n",
481 req->elevator_sequence - q->elevator.sequence,
482 sequence - q->elevator.sequence);
483 warned = 1;
484 }
485 req->elevator_sequence = sequence;
486 }
487 #endif
488 }
489
490 static inline void elevator_queue(request_queue_t * q,
491 struct request * req,
492 struct list_head * entry,
493 int latency, int starving)
494 {
495 struct request * tmp, * __tmp;
496 int __latency = latency;
497
498 __tmp = tmp = blkdev_entry_to_request(entry);
499
500 for (;; tmp = blkdev_next_request(tmp))
501 {
502 if ((latency -= tmp->nr_segments) <= 0)
503 {
504 tmp = __tmp;
505 latency = __latency;
506
507 if (starving)
508 break;
509
510 if (q->head_active && !q->plugged)
511 {
512 latency -= tmp->nr_segments;
513 break;
514 }
515
516 list_add(&req->queue, &q->queue_head);
517 goto after_link;
518 }
519
520 if (tmp->queue.next == &q->queue_head)
521 break;
522
523 {
524 const int after_current = IN_ORDER(tmp,req);
525 const int before_next = IN_ORDER(req,blkdev_next_request(tmp));
526
527 if (!IN_ORDER(tmp,blkdev_next_request(tmp))) {
528 if (after_current || before_next)
529 break;
530 } else {
531 if (after_current && before_next)
532 break;
533 }
534 }
535 }
536
537 list_add(&req->queue, &tmp->queue);
538
539 after_link:
540 req->elevator_sequence = elevator_sequence(&q->elevator, latency);
541 }
542
543 /*
544 * add-request adds a request to the linked list.
545 * It disables interrupts (aquires the request spinlock) so that it can muck
546 * with the request-lists in peace. Thus it should be called with no spinlocks
547 * held.
548 *
549 * By this point, req->cmd is always either READ/WRITE, never READA,
550 * which is important for drive_stat_acct() above.
551 */
552
553 static inline void __add_request(request_queue_t * q, struct request * req,
554 int empty, struct list_head * entry,
555 int latency, int starving)
556 {
557 int major;
558
559 drive_stat_acct(req, req->nr_sectors, 1);
560
561 if (empty) {
562 req->elevator_sequence = elevator_sequence(&q->elevator, latency);
563 list_add(&req->queue, &q->queue_head);
564 return;
565 }
566 elevator_queue(q, req, entry, latency, starving);
567
568 /*
569 * FIXME(eric) I don't understand why there is a need for this
570 * special case code. It clearly doesn't fit any more with
571 * the new queueing architecture, and it got added in 2.3.10.
572 * I am leaving this in here until I hear back from the COMPAQ
573 * people.
574 */
575 major = MAJOR(req->rq_dev);
576 if (major >= COMPAQ_SMART2_MAJOR+0 && major <= COMPAQ_SMART2_MAJOR+7)
577 {
578 (q->request_fn)(q);
579 }
580
581 if (major >= DAC960_MAJOR+0 && major <= DAC960_MAJOR+7)
582 {
583 (q->request_fn)(q);
584 }
585 }
586
587 /*
588 * Has to be called with the request spinlock aquired
589 */
590 static inline void attempt_merge (request_queue_t * q,
591 struct request *req,
592 int max_sectors,
593 int max_segments)
594 {
595 struct request *next;
596
597 if (req->queue.next == &q->queue_head)
598 return;
599 next = blkdev_next_request(req);
600 if (req->sector + req->nr_sectors != next->sector)
601 return;
602 if (next->sem || req->cmd != next->cmd || req->rq_dev != next->rq_dev || req->nr_sectors + next->nr_sectors > max_sectors)
603 return;
604 /*
605 * If we are not allowed to merge these requests, then
606 * return. If we are allowed to merge, then the count
607 * will have been updated to the appropriate number,
608 * and we shouldn't do it here too.
609 */
610 if(!(q->merge_requests_fn)(q, req, next, max_segments))
611 return;
612
613 elevator_merge_requests(&q->elevator, req, next);
614 req->bhtail->b_reqnext = next->bh;
615 req->bhtail = next->bhtail;
616 req->nr_sectors += next->nr_sectors;
617 next->rq_status = RQ_INACTIVE;
618 list_del(&next->queue);
619 wake_up (&wait_for_request);
620 }
621
622 static inline void elevator_debug(request_queue_t * q, kdev_t dev)
623 {
624 #ifdef DEBUG_ELEVATOR
625 int read_pendings = 0, nr_segments = 0;
626 elevator_t * elevator = &q->elevator;
627 struct list_head * entry = &q->queue_head;
628 static int counter;
629
630 if (counter++ % 100)
631 return;
632
633 while ((entry = entry->next) != &q->queue_head)
634 {
635 struct request * req;
636
637 req = blkdev_entry_to_request(entry);
638 if (!req->q)
639 continue;
640 if (req->cmd == READ)
641 read_pendings++;
642 nr_segments += req->nr_segments;
643 }
644
645 if (read_pendings != elevator->read_pendings)
646 {
647 printk(KERN_WARNING
648 "%s: elevator read_pendings %d should be %d\n",
649 kdevname(dev), elevator->read_pendings,
650 read_pendings);
651 elevator->read_pendings = read_pendings;
652 }
653 if (nr_segments != q->nr_segments)
654 {
655 printk(KERN_WARNING
656 "%s: elevator nr_segments %d should be %d\n",
657 kdevname(dev), q->nr_segments,
658 nr_segments);
659 q->nr_segments = nr_segments;
660 }
661 #endif
662 }
663
664 static inline void elevator_account_request(request_queue_t * q, struct request * req)
665 {
666 q->elevator.sequence++;
667 if (req->cmd == READ)
668 q->elevator.read_pendings++;
669 q->nr_segments++;
670 }
671
672 static inline void __make_request(request_queue_t * q, int rw,
673 struct buffer_head * bh)
674 {
675 int major = MAJOR(bh->b_rdev);
676 unsigned int sector, count;
677 int max_segments = MAX_SEGMENTS;
678 struct request * req, * prev;
679 int rw_ahead, max_req, max_sectors;
680 unsigned long flags;
681 int orig_latency, latency, __latency, starving, __starving, empty;
682 struct list_head * entry, * __entry;
683
684 count = bh->b_size >> 9;
685 sector = bh->b_rsector;
686
687 if (blk_size[major]) {
688 unsigned long maxsector = (blk_size[major][MINOR(bh->b_rdev)] << 1) + 1;
689
690 if (maxsector < count || maxsector - count < sector) {
691 bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped);
692 if (!blk_size[major][MINOR(bh->b_rdev)])
693 goto end_io;
694 /* This may well happen - the kernel calls bread()
695 without checking the size of the device, e.g.,
696 when mounting a device. */
697 printk(KERN_INFO
698 "attempt to access beyond end of device\n");
699 printk(KERN_INFO "%s: rw=%d, want=%d, limit=%d\n",
700 kdevname(bh->b_rdev), rw,
701 (sector + count)>>1,
702 blk_size[major][MINOR(bh->b_rdev)]);
703 goto end_io;
704 }
705 }
706
707 rw_ahead = 0; /* normal case; gets changed below for READA */
708 switch (rw) {
709 case READA:
710 rw_ahead = 1;
711 rw = READ; /* drop into READ */
712 case READ:
713 if (buffer_uptodate(bh)) /* Hmmph! Already have it */
714 goto end_io;
715 kstat.pgpgin++;
716 max_req = NR_REQUEST; /* reads take precedence */
717 break;
718 case WRITERAW:
719 rw = WRITE;
720 goto do_write; /* Skip the buffer refile */
721 case WRITE:
722 if (!test_and_clear_bit(BH_Dirty, &bh->b_state))
723 goto end_io; /* Hmmph! Nothing to write */
724 refile_buffer(bh);
725 do_write:
726 /*
727 * We don't allow the write-requests to fill up the
728 * queue completely: we want some room for reads,
729 * as they take precedence. The last third of the
730 * requests are only for reads.
731 */
732 kstat.pgpgout++;
733 max_req = (NR_REQUEST * 2) / 3;
734 break;
735 default:
736 BUG();
737 goto end_io;
738 }
739
740 /* We'd better have a real physical mapping!
741 Check this bit only if the buffer was dirty and just locked
742 down by us so at this point flushpage will block and
743 won't clear the mapped bit under us. */
744 if (!buffer_mapped(bh))
745 BUG();
746
747 /*
748 * Temporary solution - in 2.5 this will be done by the lowlevel
749 * driver. Create a bounce buffer if the buffer data points into
750 * high memory - keep the original buffer otherwise.
751 */
752 #if CONFIG_HIGHMEM
753 bh = create_bounce(rw, bh);
754 #endif
755
756 /* look for a free request. */
757 /*
758 * Loop uses two requests, 1 for loop and 1 for the real device.
759 * Cut max_req in half to avoid running out and deadlocking.
760 */
761 if ((major == LOOP_MAJOR) || (major == NBD_MAJOR))
762 max_req >>= 1;
763
764 /*
765 * Try to coalesce the new request with old requests
766 */
767 max_sectors = get_max_sectors(bh->b_rdev);
768
769 __latency = orig_latency = get_request_latency(&q->elevator, rw);
770
771 /*
772 * Now we acquire the request spinlock, we have to be mega careful
773 * not to schedule or do something nonatomic
774 */
775 spin_lock_irqsave(&io_request_lock,flags);
776 elevator_debug(q, bh->b_rdev);
777
778 empty = 0;
779 if (list_empty(&q->queue_head)) {
780 empty = 1;
781 q->plug_device_fn(q, bh->b_rdev); /* is atomic */
782 goto get_rq;
783 }
784
785 /* avoid write-bombs to not hurt iteractiveness of reads */
786 if (rw != READ && q->elevator.read_pendings)
787 max_segments = q->elevator.max_bomb_segments;
788
789 entry = seek_to_not_starving_chunk(q, &__latency, &starving);
790
791 __entry = entry;
792 __starving = starving;
793
794 latency = __latency;
795
796 if (q->head_active && !q->plugged) {
797 /*
798 * The scsi disk and cdrom drivers completely remove the request
799 * from the queue when they start processing an entry. For this
800 * reason it is safe to continue to add links to the top entry
801 * for those devices.
802 *
803 * All other drivers need to jump over the first entry, as that
804 * entry may be busy being processed and we thus can't change
805 * it.
806 */
807 if (entry == q->queue_head.next) {
808 latency -= blkdev_entry_to_request(entry)->nr_segments;
809 if ((entry = entry->next) == &q->queue_head)
810 goto get_rq;
811 starving = 0;
812 }
813 }
814
815 prev = NULL;
816 do {
817 req = blkdev_entry_to_request(entry);
818
819 if (req->sem)
820 continue;
821 if (req->cmd != rw)
822 continue;
823 if (req->nr_sectors + count > max_sectors)
824 continue;
825 if (req->rq_dev != bh->b_rdev)
826 continue;
827 /* Can we add it to the end of this request? */
828 if (req->sector + req->nr_sectors == sector) {
829 if (latency - req->nr_segments < 0)
830 break;
831 /*
832 * The merge_fn is a more advanced way
833 * of accomplishing the same task. Instead
834 * of applying a fixed limit of some sort
835 * we instead define a function which can
836 * determine whether or not it is safe to
837 * merge the request or not.
838 *
839 * See if this queue has rules that
840 * may suggest that we shouldn't merge
841 * this
842 */
843 if(!(q->back_merge_fn)(q, req, bh, max_segments))
844 continue;
845 req->bhtail->b_reqnext = bh;
846 req->bhtail = bh;
847 req->nr_sectors += count;
848 drive_stat_acct(req, count, 0);
849
850 elevator_merge_after(q, req, latency);
851
852 /* Can we now merge this req with the next? */
853 attempt_merge(q, req, max_sectors, max_segments);
854 /* or to the beginning? */
855 } else if (req->sector - count == sector) {
856 if (!prev && starving)
857 continue;
858 /*
859 * The merge_fn is a more advanced way
860 * of accomplishing the same task. Instead
861 * of applying a fixed limit of some sort
862 * we instead define a function which can
863 * determine whether or not it is safe to
864 * merge the request or not.
865 *
866 * See if this queue has rules that
867 * may suggest that we shouldn't merge
868 * this
869 */
870 if(!(q->front_merge_fn)(q, req, bh, max_segments))
871 continue;
872 bh->b_reqnext = req->bh;
873 req->bh = bh;
874 req->buffer = bh->b_data;
875 req->current_nr_sectors = count;
876 req->sector = sector;
877 req->nr_sectors += count;
878 drive_stat_acct(req, count, 0);
879
880 elevator_merge_before(q, req, latency);
881
882 if (prev)
883 attempt_merge(q, prev, max_sectors, max_segments);
884 } else
885 continue;
886
887 q->elevator.sequence++;
888 spin_unlock_irqrestore(&io_request_lock,flags);
889 return;
890
891 } while (prev = req,
892 (latency -= req->nr_segments) >= 0 &&
893 (entry = entry->next) != &q->queue_head);
894
895 /* find an unused request. */
896 get_rq:
897 req = get_request(max_req, bh->b_rdev);
898
899 /*
900 * if no request available: if rw_ahead, forget it,
901 * otherwise try again blocking..
902 */
903 if (!req) {
904 spin_unlock_irqrestore(&io_request_lock,flags);
905 if (rw_ahead)
906 goto end_io;
907 req = __get_request_wait(max_req, bh->b_rdev);
908 spin_lock_irqsave(&io_request_lock,flags);
909
910 /* lock got dropped so revalidate elevator */
911 empty = 1;
912 if (!list_empty(&q->queue_head)) {
913 empty = 0;
914 __latency = orig_latency;
915 __entry = seek_to_not_starving_chunk(q, &__latency, &__starving);
916 }
917 }
918 /*
919 * Dont start the IO if the buffer has been
920 * invalidated meanwhile. (we have to do this
921 * within the io request lock and atomically
922 * before adding the request, see buffer.c's
923 * insert_into_queues_exclusive() function.
924 */
925 if (!test_bit(BH_Req, &bh->b_state)) {
926 req->rq_status = RQ_INACTIVE;
927 spin_unlock_irqrestore(&io_request_lock,flags);
928 /*
929 * A fake 'everything went ok' completion event.
930 * The bh doesnt matter anymore, but we should not
931 * signal errors to RAID levels.
932 */
933 bh->b_end_io(bh, 1);
934 return;
935 }
936
937 /* fill up the request-info, and add it to the queue */
938 req->cmd = rw;
939 req->errors = 0;
940 req->sector = sector;
941 req->nr_sectors = count;
942 req->current_nr_sectors = count;
943 req->nr_segments = 1; /* Always 1 for a new request. */
944 req->nr_hw_segments = 1; /* Always 1 for a new request. */
945 req->buffer = bh->b_data;
946 req->sem = NULL;
947 req->bh = bh;
948 req->bhtail = bh;
949 req->q = q;
950 __add_request(q, req, empty, __entry, __latency, __starving);
951 elevator_account_request(q, req);
952
953 spin_unlock_irqrestore(&io_request_lock, flags);
954 return;
955
956 end_io:
957 bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
958 }
959
960 void generic_make_request(int rw, struct buffer_head * bh)
961 {
962 request_queue_t * q;
963 unsigned long flags;
964
965 q = blk_get_queue(bh->b_rdev);
966
967 __make_request(q, rw, bh);
968
969 spin_lock_irqsave(&io_request_lock,flags);
970 if (q && !q->plugged)
971 (q->request_fn)(q);
972 spin_unlock_irqrestore(&io_request_lock,flags);
973 }
974
975
976 /* This function can be used to request a number of buffers from a block
977 device. Currently the only restriction is that all buffers must belong to
978 the same device */
979
980 static void __ll_rw_block(int rw, int nr, struct buffer_head * bh[],int haslock)
981 {
982 unsigned int major;
983 int correct_size;
984 request_queue_t *q;
985 int i;
986
987 major = MAJOR(bh[0]->b_dev);
988 q = blk_get_queue(bh[0]->b_dev);
989 if (!q) {
990 printk(KERN_ERR
991 "ll_rw_block: Trying to read nonexistent block-device %s (%ld)\n",
992 kdevname(bh[0]->b_dev), bh[0]->b_blocknr);
993 goto sorry;
994 }
995
996 /* Determine correct block size for this device. */
997 correct_size = BLOCK_SIZE;
998 if (blksize_size[major]) {
999 i = blksize_size[major][MINOR(bh[0]->b_dev)];
1000 if (i)
1001 correct_size = i;
1002 }
1003
1004 /* Verify requested block sizes. */
1005 for (i = 0; i < nr; i++) {
1006 if (bh[i]->b_size != correct_size) {
1007 printk(KERN_NOTICE "ll_rw_block: device %s: "
1008 "only %d-char blocks implemented (%u)\n",
1009 kdevname(bh[0]->b_dev),
1010 correct_size, bh[i]->b_size);
1011 goto sorry;
1012 }
1013 }
1014
1015 if ((rw & WRITE) && is_read_only(bh[0]->b_dev)) {
1016 printk(KERN_NOTICE "Can't write to read-only device %s\n",
1017 kdevname(bh[0]->b_dev));
1018 goto sorry;
1019 }
1020
1021 for (i = 0; i < nr; i++) {
1022 /* Only one thread can actually submit the I/O. */
1023 if (haslock) {
1024 if (!buffer_locked(bh[i]))
1025 BUG();
1026 } else {
1027 if (test_and_set_bit(BH_Lock, &bh[i]->b_state))
1028 continue;
1029 }
1030 set_bit(BH_Req, &bh[i]->b_state);
1031
1032 if (q->make_request_fn)
1033 q->make_request_fn(rw, bh[i]);
1034 else {
1035 bh[i]->b_rdev = bh[i]->b_dev;
1036 bh[i]->b_rsector = bh[i]->b_blocknr*(bh[i]->b_size>>9);
1037
1038 generic_make_request(rw, bh[i]);
1039 }
1040 }
1041
1042 return;
1043
1044 sorry:
1045 for (i = 0; i < nr; i++) {
1046 mark_buffer_clean(bh[i]); /* remeber to refile it */
1047 clear_bit(BH_Uptodate, &bh[i]->b_state);
1048 bh[i]->b_end_io(bh[i], 0);
1049 }
1050 return;
1051 }
1052
1053 void ll_rw_block(int rw, int nr, struct buffer_head * bh[])
1054 {
1055 __ll_rw_block(rw, nr, bh, 0);
1056 }
1057
1058 void ll_rw_block_locked(int rw, int nr, struct buffer_head * bh[])
1059 {
1060 __ll_rw_block(rw, nr, bh, 1);
1061 }
1062
1063 #ifdef CONFIG_STRAM_SWAP
1064 extern int stram_device_init (void);
1065 #endif
1066
1067 /*
1068 * First step of what used to be end_request
1069 *
1070 * 0 means continue with end_that_request_last,
1071 * 1 means we are done
1072 */
1073
1074 int end_that_request_first (struct request *req, int uptodate, char *name)
1075 {
1076 struct buffer_head * bh;
1077 int nsect;
1078
1079 req->errors = 0;
1080 if (!uptodate) {
1081 printk("end_request: I/O error, dev %s (%s), sector %lu\n",
1082 kdevname(req->rq_dev), name, req->sector);
1083 if ((bh = req->bh) != NULL) {
1084 nsect = bh->b_size >> 9;
1085 req->nr_sectors--;
1086 req->nr_sectors &= ~(nsect - 1);
1087 req->sector += nsect;
1088 req->sector &= ~(nsect - 1);
1089 }
1090 }
1091
1092 if ((bh = req->bh) != NULL) {
1093 req->bh = bh->b_reqnext;
1094 bh->b_reqnext = NULL;
1095 bh->b_end_io(bh, uptodate);
1096 if ((bh = req->bh) != NULL) {
1097 req->current_nr_sectors = bh->b_size >> 9;
1098 if (req->nr_sectors < req->current_nr_sectors) {
1099 req->nr_sectors = req->current_nr_sectors;
1100 printk("end_request: buffer-list destroyed\n");
1101 }
1102 req->buffer = bh->b_data;
1103 return 1;
1104 }
1105 }
1106 return 0;
1107 }
1108
1109 void end_that_request_last(struct request *req)
1110 {
1111 if (req->q)
1112 BUG();
1113 if (req->sem != NULL)
1114 up(req->sem);
1115 req->rq_status = RQ_INACTIVE;
1116 wake_up(&wait_for_request);
1117 }
1118
1119 int __init blk_dev_init(void)
1120 {
1121 struct request * req;
1122 struct blk_dev_struct *dev;
1123
1124 for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;) {
1125 dev->queue = NULL;
1126 blk_init_queue(&dev->request_queue, NULL);
1127 }
1128
1129 req = all_requests + NR_REQUEST;
1130 while (--req >= all_requests) {
1131 req->rq_status = RQ_INACTIVE;
1132 }
1133 memset(ro_bits,0,sizeof(ro_bits));
1134 memset(max_readahead, 0, sizeof(max_readahead));
1135 memset(max_sectors, 0, sizeof(max_sectors));
1136 #ifdef CONFIG_AMIGA_Z2RAM
1137 z2_init();
1138 #endif
1139 #ifdef CONFIG_STRAM_SWAP
1140 stram_device_init();
1141 #endif
1142 #ifdef CONFIG_BLK_DEV_RAM
1143 rd_init();
1144 #endif
1145 #ifdef CONFIG_BLK_DEV_LOOP
1146 loop_init();
1147 #endif
1148 #ifdef CONFIG_ISP16_CDI
1149 isp16_init();
1150 #endif CONFIG_ISP16_CDI
1151 #ifdef CONFIG_BLK_DEV_IDE
1152 ide_init(); /* this MUST precede hd_init */
1153 #endif
1154 #ifdef CONFIG_BLK_DEV_HD
1155 hd_init();
1156 #endif
1157 #ifdef CONFIG_BLK_DEV_PS2
1158 ps2esdi_init();
1159 #endif
1160 #ifdef CONFIG_BLK_DEV_XD
1161 xd_init();
1162 #endif
1163 #ifdef CONFIG_BLK_DEV_MFM
1164 mfm_init();
1165 #endif
1166 #ifdef CONFIG_PARIDE
1167 { extern void paride_init(void); paride_init(); };
1168 #endif
1169 #ifdef CONFIG_MAC_FLOPPY
1170 swim3_init();
1171 #endif
1172 #ifdef CONFIG_BLK_DEV_SWIM_IOP
1173 swimiop_init();
1174 #endif
1175 #ifdef CONFIG_AMIGA_FLOPPY
1176 amiga_floppy_init();
1177 #endif
1178 #ifdef CONFIG_ATARI_FLOPPY
1179 atari_floppy_init();
1180 #endif
1181 #ifdef CONFIG_BLK_DEV_FD
1182 floppy_init();
1183 #else
1184 #if !defined (__mc68000__) && !defined(CONFIG_PPC) && !defined(__sparc__) \
1185 && !defined(CONFIG_APUS) && !defined(__sh__) && !defined(__arch_um__) \
1186 && !defined(__ia64__) /* XXX do something with the floppy controller?? */
1187 outb_p(0xc, 0x3f2);
1188 #endif
1189 #endif
1190 #ifdef CONFIG_CDU31A
1191 cdu31a_init();
1192 #endif CONFIG_CDU31A
1193 #ifdef CONFIG_ATARI_ACSI
1194 acsi_init();
1195 #endif CONFIG_ATARI_ACSI
1196 #ifdef CONFIG_MCD
1197 mcd_init();
1198 #endif CONFIG_MCD
1199 #ifdef CONFIG_MCDX
1200 mcdx_init();
1201 #endif CONFIG_MCDX
1202 #ifdef CONFIG_SBPCD
1203 sbpcd_init();
1204 #endif CONFIG_SBPCD
1205 #ifdef CONFIG_AZTCD
1206 aztcd_init();
1207 #endif CONFIG_AZTCD
1208 #ifdef CONFIG_CDU535
1209 sony535_init();
1210 #endif CONFIG_CDU535
1211 #ifdef CONFIG_GSCD
1212 gscd_init();
1213 #endif CONFIG_GSCD
1214 #ifdef CONFIG_CM206
1215 cm206_init();
1216 #endif
1217 #ifdef CONFIG_OPTCD
1218 optcd_init();
1219 #endif CONFIG_OPTCD
1220 #ifdef CONFIG_SJCD
1221 sjcd_init();
1222 #endif CONFIG_SJCD
1223 #ifdef CONFIG_BLK_DEV_MD
1224 md_init();
1225 #endif CONFIG_BLK_DEV_MD
1226 #ifdef CONFIG_APBLOCK
1227 ap_init();
1228 #endif
1229 #ifdef CONFIG_DDV
1230 ddv_init();
1231 #endif
1232 #ifdef CONFIG_BLK_DEV_NBD
1233 nbd_init();
1234 #endif
1235 return 0;
1236 };
1237
1238 EXPORT_SYMBOL(io_request_lock);
1239 EXPORT_SYMBOL(end_that_request_first);
1240 EXPORT_SYMBOL(end_that_request_last);
1241 EXPORT_SYMBOL(blk_init_queue);
1242 EXPORT_SYMBOL(blk_cleanup_queue);
1243 EXPORT_SYMBOL(blk_queue_headactive);
1244 EXPORT_SYMBOL(blk_queue_pluggable);
1245 EXPORT_SYMBOL(generic_make_request);
1246
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.