From: Nick Piggin One of the problems with the multilevel balance-on-fork/exec is that it needs to jump through hoops to satisfy sched-domain's locking semantics (that is, you may traverse your own domain when not preemptable, and you may traverse others' domains when holding their runqueue lock). balance-on-exec had to potentially migrate between more than one CPU before finding a final CPU to migrate to, and balance-on-fork needed to potentially take multiple runqueue locks. So bite the bullet and make sched-domains go completely RCU. This actually simplifies the code quite a bit. From: Ingo Molnar schedstats RCU fix, and a nice comment on for_each_domain, from Ingo. Signed-off-by: Ingo Molnar Signed-off-by: Nick Piggin Acked-by: Ingo Molnar Signed-off-by: Andrew Morton --- kernel/sched.c | 58 +++++++++++++-------------------------------------------- 1 files changed, 14 insertions(+), 44 deletions(-) diff -puN kernel/sched.c~sched-rcu-domains kernel/sched.c --- 25/kernel/sched.c~sched-rcu-domains 2005-06-18 02:55:38.000000000 -0700 +++ 25-akpm/kernel/sched.c 2005-06-18 02:55:38.000000000 -0700 @@ -262,6 +262,13 @@ struct runqueue { static DEFINE_PER_CPU(struct runqueue, runqueues); +/* + * The domain tree (rq->sd) is protected by RCU's quiescent state transition. + * See update_sched_domains: synchronize_kernel for details. + * + * The domain tree of any CPU may only be accessed from within + * preempt-disabled sections. + */ #define for_each_domain(cpu, domain) \ for (domain = cpu_rq(cpu)->sd; domain; domain = domain->parent) @@ -397,6 +404,7 @@ static int show_schedstat(struct seq_fil #ifdef CONFIG_SMP /* domain-specific stats */ + preempt_disable(); for_each_domain(cpu, sd) { enum idle_type itype; char mask_str[NR_CPUS]; @@ -421,6 +429,7 @@ static int show_schedstat(struct seq_fil sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed, sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance); } + preempt_enable(); #endif } return 0; @@ -826,22 +835,12 @@ inline int task_curr(const task_t *p) } #ifdef CONFIG_SMP -enum request_type { - REQ_MOVE_TASK, - REQ_SET_DOMAIN, -}; - typedef struct { struct list_head list; - enum request_type type; - /* For REQ_MOVE_TASK */ task_t *task; int dest_cpu; - /* For REQ_SET_DOMAIN */ - struct sched_domain *sd; - struct completion done; } migration_req_t; @@ -863,7 +862,6 @@ static int migrate_task(task_t *p, int d } init_completion(&req->done); - req->type = REQ_MOVE_TASK; req->task = p; req->dest_cpu = dest_cpu; list_add(&req->list, &rq->migration_queue); @@ -4381,17 +4379,9 @@ static int migration_thread(void * data) req = list_entry(head->next, migration_req_t, list); list_del_init(head->next); - if (req->type == REQ_MOVE_TASK) { - spin_unlock(&rq->lock); - __migrate_task(req->task, cpu, req->dest_cpu); - local_irq_enable(); - } else if (req->type == REQ_SET_DOMAIN) { - rq->sd = req->sd; - spin_unlock_irq(&rq->lock); - } else { - spin_unlock_irq(&rq->lock); - WARN_ON(1); - } + spin_unlock(&rq->lock); + __migrate_task(req->task, cpu, req->dest_cpu); + local_irq_enable(); complete(&req->done); } @@ -4622,7 +4612,6 @@ static int migration_call(struct notifie migration_req_t *req; req = list_entry(rq->migration_queue.next, migration_req_t, list); - BUG_ON(req->type != REQ_MOVE_TASK); list_del_init(&req->list); complete(&req->done); } @@ -4926,10 +4915,7 @@ static int __devinit sd_parent_degenerat */ void __devinit cpu_attach_domain(struct sched_domain *sd, int cpu) { - migration_req_t req; - unsigned long flags; runqueue_t *rq = cpu_rq(cpu); - int local = 1; struct sched_domain *tmp; /* Remove the sched domains which do not contribute to scheduling. */ @@ -4946,24 +4932,7 @@ void __devinit cpu_attach_domain(struct sched_domain_debug(sd, cpu); - spin_lock_irqsave(&rq->lock, flags); - - if (cpu == smp_processor_id() || !cpu_online(cpu)) { - rq->sd = sd; - } else { - init_completion(&req.done); - req.type = REQ_SET_DOMAIN; - req.sd = sd; - list_add(&req.list, &rq->migration_queue); - local = 0; - } - - spin_unlock_irqrestore(&rq->lock, flags); - - if (!local) { - wake_up_process(rq->migration_thread); - wait_for_completion(&req.done); - } + rq->sd = sd; } /* cpus with isolated domains */ @@ -5238,6 +5207,7 @@ static int update_sched_domains(struct n case CPU_DOWN_PREPARE: for_each_online_cpu(i) cpu_attach_domain(NULL, i); + synchronize_kernel(); arch_destroy_sched_domains(); return NOTIFY_OK; _