From: Rusty Russell [ These patches are the version of the hotplug code which atomically takes down CPUs, as suggested by Ingo Molnar. They have been test-compiled as a group with CONFIG_SMP on and off, CONFIG_HOTPLUG_CPU on and off, CONFIG_MODULE_UNLOAD on and off. ] The "bogolock" code was introduced in module.c, as a way of freezing the machine when we wanted to remove a module. This patch moves it out to stop_machine.c and stop_machine.h. Since the code changes affinity and proirity, it's impolite to hijack the current context, so we use a kthread. This means we have to pass the function rather than implement "stop_machine()" and "restart_machine()". --- include/linux/stop_machine.h | 52 +++++++++++ init/Kconfig | 6 + kernel/Makefile | 1 kernel/stop_machine.c | 199 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 258 insertions(+) diff -puN /dev/null include/linux/stop_machine.h --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/include/linux/stop_machine.h 2004-02-14 23:19:05.000000000 -0800 @@ -0,0 +1,52 @@ +#ifndef _LINUX_STOP_MACHINE +#define _LINUX_STOP_MACHINE +/* "Bogolock": stop the entire machine, disable interrupts. This is a + very heavy lock, which is equivalent to grabbing every spinlock + (and more). So the "read" side to such a lock is anything which + diables preeempt. */ +#include +#include +#include + +#ifdef CONFIG_SMP +/** + * stop_machine_run: freeze the machine on all CPUs and run this function + * @fn: the function to run + * @data: the data ptr for the @fn() + * @cpu: the cpu to run @fn() on (or any, if @cpu == NR_CPUS. + * + * Description: This causes a thread to be scheduled on every other cpu, + * each of which disables interrupts, and finally interrupts are disabled + * on the current CPU. The result is that noone is holding a spinlock + * or inside any other preempt-disabled region when @fn() runs. + * + * This can be thought of as a very heavy write lock, equivalent to + * grabbing every spinlock in the kernel. */ +int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu); + +/** + * __stop_machine_run: freeze the machine on all CPUs and run this function + * @fn: the function to run + * @data: the data ptr for the @fn + * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS. + * + * Description: This is a special version of the above, which returns the + * thread which has run @fn(): kthread_stop will return the return value + * of @fn(). Used by hotplug cpu. + */ +struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, + unsigned int cpu); + +#else + +static inline int stop_machine_run(int (*fn)(void *), void *data, + unsigned int cpu) +{ + int ret; + local_irq_disable(); + ret = fn(data); + local_irq_enable(); + return ret; +} +#endif /* CONFIG_SMP */ +#endif /* _LINUX_STOP_MACHINE */ diff -puN init/Kconfig~hotplugcpu-generalise-bogolock init/Kconfig --- 25/init/Kconfig~hotplugcpu-generalise-bogolock 2004-02-14 23:19:05.000000000 -0800 +++ 25-akpm/init/Kconfig 2004-02-14 23:19:05.000000000 -0800 @@ -285,4 +285,10 @@ config KMOD runs modprobe with the appropriate arguments, thereby loading the module if it is available. If unsure, say Y. +config STOP_MACHINE + bool + default y + depends on (SMP && MODULE_UNLOAD) || HOTPLUG_CPU + help + Need stop_machine() primitive. endmenu diff -puN kernel/Makefile~hotplugcpu-generalise-bogolock kernel/Makefile --- 25/kernel/Makefile~hotplugcpu-generalise-bogolock 2004-02-14 23:19:05.000000000 -0800 +++ 25-akpm/kernel/Makefile 2004-02-14 23:19:05.000000000 -0800 @@ -20,6 +20,7 @@ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_IKCONFIG_PROC) += configs.o +obj-$(CONFIG_STOP_MACHINE) += stop_machine.o ifneq ($(CONFIG_IA64),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff -puN /dev/null kernel/stop_machine.c --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/kernel/stop_machine.c 2004-02-14 23:19:05.000000000 -0800 @@ -0,0 +1,199 @@ +#include +#include +#include +#include +#include +#include +#include + +/* Since we effect priority and affinity (both of which are visible + * to, and settable by outside processes) we do indirection via a + * kthread. */ + +/* Thread to stop each CPU in user context. */ +enum stopmachine_state { + STOPMACHINE_WAIT, + STOPMACHINE_PREPARE, + STOPMACHINE_DISABLE_IRQ, + STOPMACHINE_EXIT, +}; + +static enum stopmachine_state stopmachine_state; +static unsigned int stopmachine_num_threads; +static atomic_t stopmachine_thread_ack; +static DECLARE_MUTEX(stopmachine_mutex); + +static int stopmachine(void *cpu) +{ + int irqs_disabled = 0; + int prepared = 0; + + set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu)); + + /* Ack: we are alive */ + mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ + atomic_inc(&stopmachine_thread_ack); + + /* Simple state machine */ + while (stopmachine_state != STOPMACHINE_EXIT) { + if (stopmachine_state == STOPMACHINE_DISABLE_IRQ + && !irqs_disabled) { + local_irq_disable(); + irqs_disabled = 1; + /* Ack: irqs disabled. */ + mb(); /* Must read state first. */ + atomic_inc(&stopmachine_thread_ack); + } else if (stopmachine_state == STOPMACHINE_PREPARE + && !prepared) { + /* Everyone is in place, hold CPU. */ + preempt_disable(); + prepared = 1; + mb(); /* Must read state first. */ + atomic_inc(&stopmachine_thread_ack); + } + cpu_relax(); + } + + /* Ack: we are exiting. */ + mb(); /* Must read state first. */ + atomic_inc(&stopmachine_thread_ack); + + if (irqs_disabled) + local_irq_enable(); + if (prepared) + preempt_enable(); + + return 0; +} + +/* Change the thread state */ +static void stopmachine_set_state(enum stopmachine_state state) +{ + atomic_set(&stopmachine_thread_ack, 0); + wmb(); + stopmachine_state = state; + while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) + cpu_relax(); +} + +static int stop_machine(void) +{ + int i, ret = 0; + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + + /* One high-prio thread per cpu. We'll do this one. */ + sys_sched_setscheduler(current->pid, SCHED_FIFO, ¶m); + + atomic_set(&stopmachine_thread_ack, 0); + stopmachine_num_threads = 0; + stopmachine_state = STOPMACHINE_WAIT; + + for_each_online_cpu(i) { + if (i == smp_processor_id()) + continue; + ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL); + if (ret < 0) + break; + stopmachine_num_threads++; + } + + /* Wait for them all to come to life. */ + while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) + yield(); + + /* If some failed, kill them all. */ + if (ret < 0) { + stopmachine_set_state(STOPMACHINE_EXIT); + up(&stopmachine_mutex); + return ret; + } + + /* Don't schedule us away at this point, please. */ + local_irq_disable(); + + /* Now they are all started, make them hold the CPUs, ready. */ + stopmachine_set_state(STOPMACHINE_PREPARE); + + /* Make them disable irqs. */ + stopmachine_set_state(STOPMACHINE_DISABLE_IRQ); + + return 0; +} + +static void restart_machine(void) +{ + stopmachine_set_state(STOPMACHINE_EXIT); + local_irq_enable(); +} + +struct stop_machine_data +{ + int (*fn)(void *); + void *data; + struct completion done; +}; + +static int do_stop(void *_smdata) +{ + struct stop_machine_data *smdata = _smdata; + int ret; + + ret = stop_machine(); + if (ret == 0) { + ret = smdata->fn(smdata->data); + restart_machine(); + } + + /* We're done: you can kthread_stop us now */ + complete(&smdata->done); + + /* Wait for kthread_stop */ + while (!signal_pending(current)) { + __set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + return ret; +} + +struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, + unsigned int cpu) +{ + struct stop_machine_data smdata; + struct task_struct *p; + + smdata.fn = fn; + smdata.data = data; + init_completion(&smdata.done); + + down(&stopmachine_mutex); + + /* If they don't care which CPU fn runs on, bind to any online one. */ + if (cpu == NR_CPUS) + cpu = smp_processor_id(); + + p = kthread_create(do_stop, &smdata, "kstopmachine"); + if (!IS_ERR(p)) { + kthread_bind(p, cpu); + wake_up_process(p); + wait_for_completion(&smdata.done); + } + up(&stopmachine_mutex); + return p; +} + +int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu) +{ + struct task_struct *p; + int ret; + + /* No CPUs can come up or down during this. */ + lock_cpu_hotplug(); + p = __stop_machine_run(fn, data, cpu); + if (!IS_ERR(p)) + ret = kthread_stop(p); + else + ret = PTR_ERR(p); + unlock_cpu_hotplug(); + + return ret; +} _