From: Nick Piggin This gets rid of the global queue_nr_requests and usage of BLKDEV_MAX_RQ (the latter is now only used to set the queues' defaults). Also exports a sysfs variable, don't know if its done right though. /dev/null | 0 drivers/block/cfq-iosched.c | 4 drivers/block/cfq-iosched.c.orig | 0 drivers/block/elevator.c | 14 - drivers/block/elevator.c.orig | 431 +++++++++++++++++++++++++++++++++++++++ drivers/block/genhd.c | 4 drivers/block/ll_rw_blk.c | 195 ++++++++++++++--- drivers/block/ll_rw_blk.c.orig | 162 ++------------ include/linux/blkdev.h | 11 include/linux/blkdev.h.orig | 0 include/linux/elevator.h | 4 11 files changed, 645 insertions(+), 180 deletions(-) diff -puN drivers/block/cfq-iosched.c~per-queue-nr_requests drivers/block/cfq-iosched.c --- 25/drivers/block/cfq-iosched.c~per-queue-nr_requests 2003-05-25 03:38:15.000000000 -0700 +++ 25-akpm/drivers/block/cfq-iosched.c 2003-05-25 03:38:15.000000000 -0700 @@ -531,8 +531,6 @@ cfq_latter_request(request_queue_t *q, s return NULL; } -extern int queue_nr_requests; - static int cfq_may_queue(request_queue_t *q, int rw) { struct cfq_data *cfqd = q->elevator.elevator_data; @@ -549,7 +547,7 @@ static int cfq_may_queue(request_queue_t if (cfqq->queued[rw] < cfq_queued) goto out; - limit = (queue_nr_requests - cfq_queued) / cfqd->busy_queues; + limit = (q->nr_requests - cfq_queued) / cfqd->busy_queues; if (cfqq->queued[rw] > limit) ret = 0; diff -puN drivers/block/cfq-iosched.c.orig~per-queue-nr_requests drivers/block/cfq-iosched.c.orig diff -puN drivers/block/elevator.c~per-queue-nr_requests drivers/block/elevator.c --- 25/drivers/block/elevator.c~per-queue-nr_requests 2003-05-25 03:38:15.000000000 -0700 +++ 25-akpm/drivers/block/elevator.c 2003-05-25 03:38:15.000000000 -0700 @@ -379,17 +379,13 @@ void elv_completed_request(request_queue e->elevator_completed_req_fn(q, rq); } -int elv_register_queue(struct gendisk *disk) +int elv_register_queue(struct request_queue *q) { - request_queue_t *q = disk->queue; elevator_t *e; - if (!q) - return -ENXIO; - e = &q->elevator; - e->kobj.parent = kobject_get(&disk->kobj); + e->kobj.parent = kobject_get(&q->kobj); if (!e->kobj.parent) return -EBUSY; @@ -399,14 +395,12 @@ int elv_register_queue(struct gendisk *d return kobject_register(&e->kobj); } -void elv_unregister_queue(struct gendisk *disk) +void elv_unregister_queue(struct request_queue *q) { - request_queue_t *q = disk->queue; - if (q) { elevator_t * e = &q->elevator; kobject_unregister(&e->kobj); - kobject_put(&disk->kobj); + kobject_put(&q->kobj); } } diff -puN /dev/null drivers/block/elevator.c.orig --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/drivers/block/elevator.c.orig 2003-05-25 03:38:07.000000000 -0700 @@ -0,0 +1,431 @@ +/* + * linux/drivers/block/elevator.c + * + * Block device elevator/IO-scheduler. + * + * Copyright (C) 2000 Andrea Arcangeli SuSE + * + * 30042000 Jens Axboe : + * + * Split the elevator a bit so that it is possible to choose a different + * one or even write a new "plug in". There are three pieces: + * - elevator_fn, inserts a new request in the queue list + * - elevator_merge_fn, decides whether a new buffer can be merged with + * an existing request + * - elevator_dequeue_fn, called when a request is taken off the active list + * + * 20082000 Dave Jones : + * Removed tests for max-bomb-segments, which was breaking elvtune + * when run without -bN + * + * Jens: + * - Rework again to work with bio instead of buffer_heads + * - loose bi_dev comparisons, partition handling is right now + * - completely modularize elevator setup and teardown + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * can we safely merge with this request? + */ +inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) +{ + if (!rq_mergeable(rq)) + return 0; + + /* + * different data direction or already started, don't merge + */ + if (bio_data_dir(bio) != rq_data_dir(rq)) + return 0; + + /* + * same device and no special stuff set, merge is ok + */ + if (rq->rq_disk == bio->bi_bdev->bd_disk && + !rq->waiting && !rq->special) + return 1; + + return 0; +} + +inline int elv_try_merge(struct request *__rq, struct bio *bio) +{ + int ret = ELEVATOR_NO_MERGE; + + /* + * we can merge and sequence is ok, check if it's possible + */ + if (elv_rq_merge_ok(__rq, bio)) { + if (__rq->sector + __rq->nr_sectors == bio->bi_sector) + ret = ELEVATOR_BACK_MERGE; + else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) + ret = ELEVATOR_FRONT_MERGE; + } + + return ret; +} + +inline int elv_try_last_merge(request_queue_t *q, struct bio *bio) +{ + if (q->last_merge) + return elv_try_merge(list_entry_rq(q->last_merge), bio); + + return ELEVATOR_NO_MERGE; +} + +/* + * elevator noop + * + * See if we can find a request that this buffer can be coalesced with. + */ +int elevator_noop_merge(request_queue_t *q, struct list_head **insert, + struct bio *bio) +{ + struct list_head *entry = &q->queue_head; + struct request *__rq; + int ret; + + if ((ret = elv_try_last_merge(q, bio))) { + *insert = q->last_merge; + return ret; + } + + while ((entry = entry->prev) != &q->queue_head) { + __rq = list_entry_rq(entry); + + if (__rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) + break; + else if (__rq->flags & REQ_STARTED) + break; + + if (!blk_fs_request(__rq)) + continue; + + if ((ret = elv_try_merge(__rq, bio))) { + *insert = &__rq->queuelist; + q->last_merge = &__rq->queuelist; + return ret; + } + } + + return ELEVATOR_NO_MERGE; +} + +void elevator_noop_merge_requests(request_queue_t *q, struct request *req, + struct request *next) +{ + list_del_init(&next->queuelist); +} + +void elevator_noop_add_request(request_queue_t *q, struct request *rq, + struct list_head *insert_here) +{ + list_add_tail(&rq->queuelist, &q->queue_head); + + /* + * new merges must not precede this barrier + */ + if (rq->flags & REQ_HARDBARRIER) + q->last_merge = NULL; + else if (!q->last_merge) + q->last_merge = &rq->queuelist; +} + +struct request *elevator_noop_next_request(request_queue_t *q) +{ + if (!list_empty(&q->queue_head)) + return list_entry_rq(q->queue_head.next); + + return NULL; +} + +/* + * general block -> elevator interface starts here + */ +int elevator_init(request_queue_t *q, elevator_t *type) +{ + elevator_t *e = &q->elevator; + + memcpy(e, type, sizeof(*e)); + + INIT_LIST_HEAD(&q->queue_head); + q->last_merge = NULL; + + if (e->elevator_init_fn) + return e->elevator_init_fn(q, e); + + return 0; +} + +void elevator_exit(request_queue_t *q) +{ + elevator_t *e = &q->elevator; + + if (e->elevator_exit_fn) + e->elevator_exit_fn(q, e); +} + +int elevator_global_init(void) +{ + return 0; +} + +int elv_merge(request_queue_t *q, struct list_head **entry, struct bio *bio) +{ + elevator_t *e = &q->elevator; + + if (e->elevator_merge_fn) + return e->elevator_merge_fn(q, entry, bio); + + return ELEVATOR_NO_MERGE; +} + +void elv_merged_request(request_queue_t *q, struct request *rq) +{ + elevator_t *e = &q->elevator; + + if (e->elevator_merged_fn) + e->elevator_merged_fn(q, rq); +} + +void elv_merge_requests(request_queue_t *q, struct request *rq, + struct request *next) +{ + elevator_t *e = &q->elevator; + + if (q->last_merge == &next->queuelist) + q->last_merge = NULL; + + if (e->elevator_merge_req_fn) + e->elevator_merge_req_fn(q, rq, next); +} + +void __elv_add_request(request_queue_t *q, struct request *rq, int at_end, + int plug) +{ + struct list_head *insert = &q->queue_head; + + if (at_end) + insert = insert->prev; + if (plug) + blk_plug_device(q); + + q->elevator.elevator_add_req_fn(q, rq, insert); +} + +void elv_add_request(request_queue_t *q, struct request *rq, int at_end, + int plug) +{ + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + __elv_add_request(q, rq, at_end, plug); + spin_unlock_irqrestore(q->queue_lock, flags); +} + +static inline struct request *__elv_next_request(request_queue_t *q) +{ + return q->elevator.elevator_next_req_fn(q); +} + +struct request *elv_next_request(request_queue_t *q) +{ + struct request *rq; + int ret; + + while ((rq = __elv_next_request(q))) { + /* + * just mark as started even if we don't start it, a request + * that has been delayed should not be passed by new incoming + * requests + */ + rq->flags |= REQ_STARTED; + + if (&rq->queuelist == q->last_merge) + q->last_merge = NULL; + + if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn) + break; + + ret = q->prep_rq_fn(q, rq); + if (ret == BLKPREP_OK) { + break; + } else if (ret == BLKPREP_DEFER) { + rq = NULL; + break; + } else if (ret == BLKPREP_KILL) { + blkdev_dequeue_request(rq); + rq->flags |= REQ_QUIET; + while (end_that_request_first(rq, 0, rq->nr_sectors)) + ; + end_that_request_last(rq); + } else { + printk("%s: bad return=%d\n", __FUNCTION__, ret); + break; + } + } + + return rq; +} + +void elv_remove_request(request_queue_t *q, struct request *rq) +{ + elevator_t *e = &q->elevator; + + /* + * the main clearing point for q->last_merge is on retrieval of + * request by driver (it calls elv_next_request()), but it _can_ + * also happen here if a request is added to the queue but later + * deleted without ever being given to driver (merged with another + * request). + */ + if (&rq->queuelist == q->last_merge) + q->last_merge = NULL; + + if (e->elevator_remove_req_fn) + e->elevator_remove_req_fn(q, rq); +} + +int elv_queue_empty(request_queue_t *q) +{ + elevator_t *e = &q->elevator; + + if (e->elevator_queue_empty_fn) + return e->elevator_queue_empty_fn(q); + + return list_empty(&q->queue_head); +} + +struct request *elv_latter_request(request_queue_t *q, struct request *rq) +{ + struct list_head *next; + + elevator_t *e = &q->elevator; + + if (e->elevator_latter_req_fn) + return e->elevator_latter_req_fn(q, rq); + + next = rq->queuelist.next; + if (next != &q->queue_head && next != &rq->queuelist) + return list_entry_rq(next); + + return NULL; +} + +struct request *elv_former_request(request_queue_t *q, struct request *rq) +{ + struct list_head *prev; + + elevator_t *e = &q->elevator; + + if (e->elevator_former_req_fn) + return e->elevator_former_req_fn(q, rq); + + prev = rq->queuelist.prev; + if (prev != &q->queue_head && prev != &rq->queuelist) + return list_entry_rq(prev); + + return NULL; +} + +int elv_set_request(request_queue_t *q, struct request *rq, int gfp_mask) +{ + elevator_t *e = &q->elevator; + + if (e->elevator_set_req_fn) + return e->elevator_set_req_fn(q, rq, gfp_mask); + + rq->elevator_private = NULL; + return 0; +} + +void elv_put_request(request_queue_t *q, struct request *rq) +{ + elevator_t *e = &q->elevator; + + if (e->elevator_put_req_fn) + e->elevator_put_req_fn(q, rq); +} + +int elv_may_queue(request_queue_t *q, int rw) +{ + elevator_t *e = &q->elevator; + + if (e->elevator_may_queue_fn) + return e->elevator_may_queue_fn(q, rw); + + return 1; +} + +void elv_completed_request(request_queue_t *q, struct request *rq) +{ + elevator_t *e = &q->elevator; + + if (e->elevator_completed_req_fn) + e->elevator_completed_req_fn(q, rq); +} + +int elv_register_queue(struct gendisk *disk) +{ + request_queue_t *q = disk->queue; + elevator_t *e; + + if (!q) + return -ENXIO; + + e = &q->elevator; + + e->kobj.parent = kobject_get(&disk->kobj); + if (!e->kobj.parent) + return -EBUSY; + + snprintf(e->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); + e->kobj.ktype = e->elevator_ktype; + + return kobject_register(&e->kobj); +} + +void elv_unregister_queue(struct gendisk *disk) +{ + request_queue_t *q = disk->queue; + + if (q) { + elevator_t * e = &q->elevator; + kobject_unregister(&e->kobj); + kobject_put(&disk->kobj); + } +} + +elevator_t elevator_noop = { + .elevator_merge_fn = elevator_noop_merge, + .elevator_merge_req_fn = elevator_noop_merge_requests, + .elevator_next_req_fn = elevator_noop_next_request, + .elevator_add_req_fn = elevator_noop_add_request, +}; + +module_init(elevator_global_init); + +EXPORT_SYMBOL(elevator_noop); + +EXPORT_SYMBOL(elv_add_request); +EXPORT_SYMBOL(__elv_add_request); +EXPORT_SYMBOL(elv_next_request); +EXPORT_SYMBOL(elv_remove_request); +EXPORT_SYMBOL(elv_queue_empty); +EXPORT_SYMBOL(elv_completed_request); +EXPORT_SYMBOL(elevator_exit); +EXPORT_SYMBOL(elevator_init); diff -puN drivers/block/genhd.c~per-queue-nr_requests drivers/block/genhd.c --- 25/drivers/block/genhd.c~per-queue-nr_requests 2003-05-25 03:38:15.000000000 -0700 +++ 25-akpm/drivers/block/genhd.c 2003-05-25 03:38:15.000000000 -0700 @@ -233,7 +233,7 @@ void add_disk(struct gendisk *disk) blk_register_region(MKDEV(disk->major, disk->first_minor), disk->minors, NULL, exact_match, exact_lock, disk); register_disk(disk); - elv_register_queue(disk); + blk_register_queue(disk); } EXPORT_SYMBOL(add_disk); @@ -241,7 +241,7 @@ EXPORT_SYMBOL(del_gendisk); /* in partit void unlink_gendisk(struct gendisk *disk) { - elv_unregister_queue(disk); + blk_unregister_queue(disk); blk_unregister_region(MKDEV(disk->major, disk->first_minor), disk->minors); } diff -puN -L drivers/block/genhd.c.orig /dev/null /dev/null diff -puN drivers/block/ll_rw_blk.c~per-queue-nr_requests drivers/block/ll_rw_blk.c --- 25/drivers/block/ll_rw_blk.c~per-queue-nr_requests 2003-05-25 03:38:15.000000000 -0700 +++ 25-akpm/drivers/block/ll_rw_blk.c 2003-05-25 03:39:59.000000000 -0700 @@ -42,12 +42,6 @@ static kmem_cache_t *request_cachep; static LIST_HEAD(blk_plug_list); static spinlock_t blk_plug_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; -/* - * Number of requests per queue. This many for reads and for writes (twice - * this number, total). - */ -int queue_nr_requests; - unsigned long blk_max_low_pfn, blk_max_pfn; int blk_nohighio = 0; @@ -58,9 +52,9 @@ static struct workqueue_struct *kblockd_ static wait_queue_head_t congestion_wqh[2]; -static inline int batch_requests(void) +static inline int batch_requests(struct request_queue *q) { - return min(BLKDEV_MAX_RQ / 8, 8); + return min(q->nr_requests / 8, 8UL); } /* @@ -68,11 +62,11 @@ static inline int batch_requests(void) * considered to be congested. It include a little hysteresis to keep the * context switch rate down. */ -static inline int queue_congestion_on_threshold(void) +static inline int queue_congestion_on_threshold(struct request_queue *q) { int ret; - ret = queue_nr_requests / 8 - 1; + ret = q->nr_requests / 8 - 1; if (ret < 0) ret = 1; return ret; @@ -81,13 +75,13 @@ static inline int queue_congestion_on_th /* * The threshold at which a queue is considered to be uncongested */ -static inline int queue_congestion_off_threshold(void) +static inline int queue_congestion_off_threshold(struct request_queue *q) { int ret; - ret = queue_nr_requests / 8 + 1; - if (ret > queue_nr_requests) - ret = queue_nr_requests; + ret = q->nr_requests / 8 + 1; + if (ret > q->nr_requests) + ret = q->nr_requests; return ret; } @@ -200,6 +194,7 @@ void blk_queue_make_request(request_queu /* * set defaults */ + q->nr_requests = BLKDEV_MAX_RQ; q->max_phys_segments = MAX_PHYS_SEGMENTS; q->max_hw_segments = MAX_HW_SEGMENTS; q->make_request_fn = mfn; @@ -468,11 +463,6 @@ int blk_queue_init_tags(request_queue_t struct blk_queue_tag *tags; int bits, i; - if (depth > (queue_nr_requests*2)) { - depth = (queue_nr_requests*2); - printk("blk_queue_init_tags: adjusted depth to %d\n", depth); - } - tags = kmalloc(sizeof(struct blk_queue_tag),GFP_ATOMIC); if (!tags) goto fail; @@ -1286,12 +1276,12 @@ static struct request *get_request(reque struct request_list *rl = &q->rq; spin_lock_irq(q->queue_lock); - if (rl->count[rw] == BLKDEV_MAX_RQ || !elv_may_queue(q, rw)) { + if (rl->count[rw] >= q->nr_requests || !elv_may_queue(q, rw)) { spin_unlock_irq(q->queue_lock); goto out; } rl->count[rw]++; - if ((BLKDEV_MAX_RQ - rl->count[rw]) < queue_congestion_on_threshold()) + if ((q->nr_requests - rl->count[rw]) < queue_congestion_on_threshold(q)) set_queue_congested(q, rw); spin_unlock_irq(q->queue_lock); @@ -1299,7 +1289,7 @@ static struct request *get_request(reque if (!rq) { spin_lock_irq(q->queue_lock); rl->count[rw]--; - if ((BLKDEV_MAX_RQ - rl->count[rw]) >= queue_congestion_off_threshold()) + if ((q->nr_requests - rl->count[rw]) >= queue_congestion_off_threshold(q)) clear_queue_congested(q, rw); spin_unlock_irq(q->queue_lock); goto out; @@ -1520,10 +1510,10 @@ void __blk_put_request(request_queue_t * blk_free_request(q, req); rl->count[rw]--; - if ((BLKDEV_MAX_RQ - rl->count[rw]) >= - queue_congestion_off_threshold()) + if ((q->nr_requests - rl->count[rw]) >= + queue_congestion_off_threshold(q)) clear_queue_congested(q, rw); - if ((BLKDEV_MAX_RQ - rl->count[rw]) >= batch_requests() && + if ((q->nr_requests - rl->count[rw]) >= batch_requests(q) && waitqueue_active(&rl->wait[rw])) wake_up(&rl->wait[rw]); } @@ -2331,14 +2321,6 @@ int __init blk_dev_init(void) if (!request_cachep) panic("Can't create request pool slab cache\n"); - queue_nr_requests = BLKDEV_MAX_RQ; - - printk("block request queues:\n"); - printk(" %d/%d requests per read queue\n", BLKDEV_MIN_RQ, queue_nr_requests); - printk(" %d/%d requests per write queue\n", BLKDEV_MIN_RQ, queue_nr_requests); - printk(" enter congestion at %d\n", queue_congestion_on_threshold()); - printk(" exit congestion at %d\n", queue_congestion_off_threshold()); - blk_max_low_pfn = max_low_pfn; blk_max_pfn = max_pfn; @@ -2347,6 +2329,153 @@ int __init blk_dev_init(void) return 0; } +/* + * sysfs parts below + */ +struct queue_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct request_queue *, char *); + ssize_t (*store)(struct request_queue *, const char *, size_t); +}; + +static ssize_t +queue_var_show(unsigned int var, char *page) +{ + return sprintf(page, "%d\n", var); +} + +static ssize_t +queue_var_store(unsigned long *var, const char *page, size_t count) +{ + char *p = (char *) page; + + *var = simple_strtoul(p, &p, 10); + return count; +} + +static ssize_t queue_requests_show(struct request_queue *q, char *page) +{ + return queue_var_show(q->nr_requests, (page)); +} + +static ssize_t +queue_requests_store(struct request_queue *q, const char *page, size_t count) +{ + struct request_list *rl = &q->rq; + + int ret = queue_var_store(&q->nr_requests, page, count); + if (q->nr_requests < BLKDEV_MIN_RQ) + q->nr_requests = BLKDEV_MIN_RQ; + + if ((q->nr_requests - rl->count[READ]) < + queue_congestion_on_threshold(q)) + set_queue_congested(q, READ); + else if ((q->nr_requests - rl->count[READ]) >= + queue_congestion_off_threshold(q)) + clear_queue_congested(q, READ); + + if ((q->nr_requests - rl->count[READ]) < + queue_congestion_on_threshold(q)) + set_queue_congested(q, READ); + else if ((q->nr_requests - rl->count[READ]) >= + queue_congestion_off_threshold(q)) + clear_queue_congested(q, READ); + + return ret; +} + +static struct queue_sysfs_entry queue_requests_entry = { + .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, + .show = queue_requests_show, + .store = queue_requests_store, +}; + +static struct attribute *default_attrs[] = { + &queue_requests_entry.attr, + NULL, +}; + +#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) + +static ssize_t +queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + struct queue_sysfs_entry *entry = to_queue(attr); + struct request_queue *q; + + q = container_of(kobj, struct request_queue, kobj); + if (!entry->show) + return 0; + + return entry->show(q, page); +} + +static ssize_t +queue_attr_store(struct kobject *kobj, struct attribute *attr, + const char *page, size_t length) +{ + struct queue_sysfs_entry *entry = to_queue(attr); + struct request_queue *q; + + q = container_of(kobj, struct request_queue, kobj); + if (!entry->store) + return -EINVAL; + + return entry->store(q, page, length); +} + +static struct sysfs_ops queue_sysfs_ops = { + .show = queue_attr_show, + .store = queue_attr_store, +}; + +struct kobj_type queue_ktype = { + .sysfs_ops = &queue_sysfs_ops, + .default_attrs = default_attrs, +}; + +int blk_register_queue(struct gendisk *disk) +{ + int ret; + + request_queue_t *q = disk->queue; + + if (!q) + return -ENXIO; + + q->kobj.parent = kobject_get(&disk->kobj); + if (!q->kobj.parent) + return -EBUSY; + + snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue"); + q->kobj.ktype = &queue_ktype; + + ret = kobject_register(&q->kobj); + if (ret < 0) + return ret; + + ret = elv_register_queue(q); + if (ret) { + kobject_unregister(&q->kobj); + return ret; + } + + return 0; +} + +void blk_unregister_queue(struct gendisk *disk) +{ + request_queue_t *q = disk->queue; + + if (q) { + elv_unregister_queue(q); + + kobject_unregister(&q->kobj); + kobject_put(&disk->kobj); + } +} + + EXPORT_SYMBOL(process_that_request_first); EXPORT_SYMBOL(end_that_request_first); EXPORT_SYMBOL(end_that_request_chunk); diff -puN drivers/block/ll_rw_blk.c.orig~per-queue-nr_requests drivers/block/ll_rw_blk.c.orig --- 25/drivers/block/ll_rw_blk.c.orig~per-queue-nr_requests 2003-05-25 03:38:15.000000000 -0700 +++ 25-akpm/drivers/block/ll_rw_blk.c.orig 2003-05-25 03:38:07.000000000 -0700 @@ -42,6 +42,12 @@ static kmem_cache_t *request_cachep; static LIST_HEAD(blk_plug_list); static spinlock_t blk_plug_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; +/* + * Number of requests per queue. This many for reads and for writes (twice + * this number, total). + */ +int queue_nr_requests; + unsigned long blk_max_low_pfn, blk_max_pfn; int blk_nohighio = 0; @@ -52,9 +58,9 @@ static struct workqueue_struct *kblockd_ static wait_queue_head_t congestion_wqh[2]; -static inline int batch_requests(struct request_queue *q) +static inline int batch_requests(void) { - return min(q->nr_requests / 8, 8UL); + return min(BLKDEV_MAX_RQ / 8, 8); } /* @@ -62,11 +68,11 @@ static inline int batch_requests(struct * considered to be congested. It include a little hysteresis to keep the * context switch rate down. */ -static inline int queue_congestion_on_threshold(struct request_queue *q) +static inline int queue_congestion_on_threshold(void) { int ret; - ret = q->nr_requests / 8 - 1; + ret = queue_nr_requests / 8 - 1; if (ret < 0) ret = 1; return ret; @@ -75,13 +81,13 @@ static inline int queue_congestion_on_th /* * The threshold at which a queue is considered to be uncongested */ -static inline int queue_congestion_off_threshold(struct request_queue *q) +static inline int queue_congestion_off_threshold(void) { int ret; - ret = q->nr_requests / 8 + 1; - if (ret > q->nr_requests) - ret = q->nr_requests; + ret = queue_nr_requests / 8 + 1; + if (ret > queue_nr_requests) + ret = queue_nr_requests; return ret; } @@ -194,7 +200,6 @@ void blk_queue_make_request(request_queu /* * set defaults */ - q->nr_requests = BLKDEV_MAX_RQ; q->max_phys_segments = MAX_PHYS_SEGMENTS; q->max_hw_segments = MAX_HW_SEGMENTS; q->make_request_fn = mfn; @@ -463,6 +468,11 @@ int blk_queue_init_tags(request_queue_t struct blk_queue_tag *tags; int bits, i; + if (depth > (queue_nr_requests*2)) { + depth = (queue_nr_requests*2); + printk("blk_queue_init_tags: adjusted depth to %d\n", depth); + } + tags = kmalloc(sizeof(struct blk_queue_tag),GFP_ATOMIC); if (!tags) goto fail; @@ -1276,12 +1286,12 @@ static struct request *get_request(reque struct request_list *rl = &q->rq; spin_lock_irq(q->queue_lock); - if (rl->count[rw] >= q->nr_requests || !elv_may_queue(q, rw)) { + if (rl->count[rw] == BLKDEV_MAX_RQ || !elv_may_queue(q, rw)) { spin_unlock_irq(q->queue_lock); goto out; } rl->count[rw]++; - if ((q->nr_requests - rl->count[rw]) < queue_congestion_on_threshold(q)) + if ((BLKDEV_MAX_RQ - rl->count[rw]) < queue_congestion_on_threshold()) set_queue_congested(q, rw); spin_unlock_irq(q->queue_lock); @@ -1289,7 +1299,7 @@ static struct request *get_request(reque if (!rq) { spin_lock_irq(q->queue_lock); rl->count[rw]--; - if ((q->nr_requests - rl->count[rw]) >= queue_congestion_off_threshold(q)) + if ((BLKDEV_MAX_RQ - rl->count[rw]) >= queue_congestion_off_threshold()) clear_queue_congested(q, rw); spin_unlock_irq(q->queue_lock); goto out; @@ -1510,10 +1520,10 @@ void __blk_put_request(request_queue_t * blk_free_request(q, req); rl->count[rw]--; - if ((q->nr_requests - rl->count[rw]) >= - queue_congestion_off_threshold(q)) + if ((BLKDEV_MAX_RQ - rl->count[rw]) >= + queue_congestion_off_threshold()) clear_queue_congested(q, rw); - if ((q->nr_requests - rl->count[rw]) >= batch_requests(q) && + if ((BLKDEV_MAX_RQ - rl->count[rw]) >= batch_requests() && waitqueue_active(&rl->wait[rw])) wake_up(&rl->wait[rw]); } @@ -2321,6 +2331,14 @@ int __init blk_dev_init(void) if (!request_cachep) panic("Can't create request pool slab cache\n"); + queue_nr_requests = BLKDEV_MAX_RQ; + + printk("block request queues:\n"); + printk(" %d/%d requests per read queue\n", BLKDEV_MIN_RQ, queue_nr_requests); + printk(" %d/%d requests per write queue\n", BLKDEV_MIN_RQ, queue_nr_requests); + printk(" enter congestion at %d\n", queue_congestion_on_threshold()); + printk(" exit congestion at %d\n", queue_congestion_off_threshold()); + blk_max_low_pfn = max_low_pfn; blk_max_pfn = max_pfn; @@ -2329,120 +2347,6 @@ int __init blk_dev_init(void) return 0; } -/* - * sysfs parts below - */ -struct queue_sysfs_entry { - struct attribute attr; - ssize_t (*show)(struct request_queue *, char *); - ssize_t (*store)(struct request_queue *, const char *, size_t); -}; - -static ssize_t -queue_var_show(unsigned int var, char *page) -{ - return sprintf(page, "%d\n", var); -} - -static ssize_t -queue_var_store(unsigned long *var, const char *page, size_t count) -{ - char *p = (char *) page; - - *var = simple_strtoul(p, &p, 10); - return count; -} - -static ssize_t queue_requests_show(struct request_queue *q, char *page) -{ - return queue_var_show(q->nr_requests, (page)); -} - -static ssize_t -queue_requests_store(struct request_queue *q, const char *page, size_t count) -{ - int ret = queue_var_store(&q->nr_requests, page, count); - if (q->nr_requests < BLKDEV_MIN_RQ) - q->nr_requests = BLKDEV_MIN_RQ; - return ret; -} - -static struct queue_sysfs_entry queue_requests_entry = { - .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, - .show = queue_requests_show, - .store = queue_requests_store, -}; - -static struct attribute *default_attrs[] = { - &queue_requests_entry.attr, - NULL, -}; - -#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) - -static ssize_t -queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) -{ - struct request_queue *q = container_of(kobj, struct request_queue, kobj); - struct queue_sysfs_entry *entry = to_queue(attr); - - if (!entry->show) - return 0; - - return entry->show(q, page); -} - -static ssize_t -queue_attr_store(struct kobject *kobj, struct attribute *attr, - const char *page, size_t length) -{ - struct request_queue *q = container_of(kobj, struct request_queue, kobj); - struct queue_sysfs_entry *entry = to_queue(attr); - - if (!entry->store) - return -EINVAL; - - return entry->store(q, page, length); -} - -static struct sysfs_ops queue_sysfs_ops = { - .show = queue_attr_show, - .store = queue_attr_store, -}; - -struct kobj_type queue_ktype = { - .sysfs_ops = &queue_sysfs_ops, - .default_attrs = default_attrs, -}; - -int blk_register_queue(struct gendisk *disk) -{ - request_queue_t *q = disk->queue; - - if (!q) - return -ENXIO; - - q->kobj.parent = kobject_get(&disk->kobj); - if (!q->kobj.parent) - return -EBUSY; - - snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue"); - q->kobj.ktype = &queue_ktype; - - return kobject_register(&q->kobj); -} - -void blk_unregister_queue(struct gendisk *disk) -{ - request_queue_t *q = disk->queue; - - if (q) { - kobject_unregister(&q->kobj); - kobject_put(&disk->kobj); - } -} - - EXPORT_SYMBOL(process_that_request_first); EXPORT_SYMBOL(end_that_request_first); EXPORT_SYMBOL(end_that_request_chunk); diff -puN include/linux/blkdev.h~per-queue-nr_requests include/linux/blkdev.h --- 25/include/linux/blkdev.h~per-queue-nr_requests 2003-05-25 03:38:15.000000000 -0700 +++ 25-akpm/include/linux/blkdev.h 2003-05-25 03:38:15.000000000 -0700 @@ -21,7 +21,7 @@ struct elevator_s; typedef struct elevator_s elevator_t; #define BLKDEV_MIN_RQ 4 -#define BLKDEV_MAX_RQ 128 +#define BLKDEV_MAX_RQ 128 /* Default maximum */ struct request_list { int count[2]; @@ -241,8 +241,15 @@ struct request_queue spinlock_t *queue_lock; /* + * queue kobject + */ + struct kobject kobj; + + /* * queue settings */ + unsigned long nr_requests; /* Max # of requests */ + unsigned short max_sectors; unsigned short max_phys_segments; unsigned short max_hw_segments; @@ -364,6 +371,8 @@ struct sec_size { unsigned block_size_bits; }; +extern int blk_register_queue(struct gendisk *disk); +extern void blk_unregister_queue(struct gendisk *disk); extern void register_disk(struct gendisk *dev); extern void generic_make_request(struct bio *bio); extern void blk_put_request(struct request *); diff -puN include/linux/blkdev.h.orig~per-queue-nr_requests include/linux/blkdev.h.orig diff -puN include/linux/elevator.h~per-queue-nr_requests include/linux/elevator.h --- 25/include/linux/elevator.h~per-queue-nr_requests 2003-05-25 03:38:15.000000000 -0700 +++ 25-akpm/include/linux/elevator.h 2003-05-25 03:38:15.000000000 -0700 @@ -68,8 +68,8 @@ extern int elv_queue_empty(request_queue extern struct request *elv_next_request(struct request_queue *q); extern struct request *elv_former_request(request_queue_t *, struct request *); extern struct request *elv_latter_request(request_queue_t *, struct request *); -extern int elv_register_queue(struct gendisk *); -extern void elv_unregister_queue(struct gendisk *); +extern int elv_register_queue(request_queue_t *q); +extern void elv_unregister_queue(request_queue_t *q); extern int elv_may_queue(request_queue_t *, int); extern void elv_completed_request(request_queue_t *, struct request *); extern int elv_set_request(request_queue_t *, struct request *, int); diff -puN -L include/linux/elevator.h.orig /dev/null /dev/null _