diff -urN linux-2.5.4-pre5/arch/i386/Config.help linux/arch/i386/Config.help --- linux-2.5.4-pre5/arch/i386/Config.help Fri Feb 8 22:26:29 2002 +++ linux/arch/i386/Config.help Sat Feb 9 00:26:16 2002 @@ -25,6 +25,16 @@ If you don't know what to do here, say N. +CONFIG_PREEMPT + This option reduces the latency of the kernel when reacting to + real-time or interactive events by allowing a low priority process to + be preempted even if it is in kernel mode executing a system call. + This allows applications to run more reliably even when the system is + under load. + + Say Y here if you are building a kernel for a desktop, embedded + or real-time system. Say N if you are unsure. + CONFIG_X86 This is Linux's home port. Linux was originally native to the Intel 386, and runs on all the later x86 processors including the Intel diff -urN linux-2.5.4-pre5/arch/i386/config.in linux/arch/i386/config.in --- linux-2.5.4-pre5/arch/i386/config.in Fri Feb 8 22:26:29 2002 +++ linux/arch/i386/config.in Sat Feb 9 00:26:16 2002 @@ -167,6 +167,7 @@ bool 'Math emulation' CONFIG_MATH_EMULATION bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR bool 'Symmetric multi-processing support' CONFIG_SMP +bool 'Preemptible Kernel' CONFIG_PREEMPT if [ "$CONFIG_SMP" != "y" ]; then bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC @@ -180,9 +181,12 @@ bool 'Multiquad NUMA system' CONFIG_MULTIQUAD fi -if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then - define_bool CONFIG_HAVE_DEC_LOCK y +if [ "$CONFIG_SMP" = "y" -o "$CONFIG_PREEMPT" = "y" ]; then + if [ "$CONFIG_X86_CMPXCHG" = "y" ]; then + define_bool CONFIG_HAVE_DEC_LOCK y + fi fi + endmenu mainmenu_option next_comment diff -urN linux-2.5.4-pre5/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S --- linux-2.5.4-pre5/arch/i386/kernel/entry.S Fri Feb 8 22:26:29 2002 +++ linux/arch/i386/kernel/entry.S Sat Feb 9 00:26:16 2002 @@ -69,6 +69,37 @@ NT_MASK = 0x00004000 VM_MASK = 0x00020000 +/* These are offsets into the irq_stat structure + * There is one per cpu and it is aligned to 32 + * byte boundry (we put that here as a shift count) + */ +irq_array_shift = CONFIG_X86_L1_CACHE_SHIFT +irq_stat_local_irq_count = 4 +irq_stat_local_bh_count = 8 + +#ifdef CONFIG_SMP +#define GET_CPU_INDX movl TI_CPU(%ebx),%eax; \ + shll $irq_array_shift,%eax +#define GET_CURRENT_CPU_INDX GET_THREAD_INFO(%ebx); \ + GET_CPU_INDX +#define CPU_INDX (,%eax) +#else +#define GET_CPU_INDX +#define GET_CURRENT_CPU_INDX GET_THREAD_INFO(%ebx) +#define CPU_INDX +#endif + +#ifdef CONFIG_PREEMPT +#define preempt_stop cli +#define init_ret_intr \ + cli; \ + decl TI_PRE_COUNT(%ebx); +#else +#define preempt_stop +#define init_ret_intr +#define resume_kernel restore_all +#endif + #define SAVE_ALL \ cld; \ pushl %es; \ @@ -176,11 +207,12 @@ ALIGN ENTRY(ret_from_intr) GET_THREAD_INFO(%ebx) + init_ret_intr ret_from_exception: movl EFLAGS(%esp),%eax # mix EFLAGS and CS movb CS(%esp),%al testl $(VM_MASK | 3),%eax - jz restore_all # returning to kernel-space or vm86-space + jz resume_kernel # returning to kernel or vm86-space ENTRY(resume_userspace) cli # make sure we don't miss an interrupt setting need_resched # or sigpending between sampling and the iret @@ -189,6 +221,22 @@ jne work_pending jmp restore_all +#ifdef CONFIG_PREEMPT +ENTRY(resume_kernel) + cmpl $0,TI_PRE_COUNT(%ebx) + jnz restore_all + movl TI_FLAGS(%ebx),%ecx + testb $_TIF_NEED_RESCHED,%cl + jz restore_all + movl SYMBOL_NAME(irq_stat)+irq_stat_local_bh_count CPU_INDX,%ecx + addl SYMBOL_NAME(irq_stat)+irq_stat_local_irq_count CPU_INDX,%ecx + jnz restore_all + incl TI_PRE_COUNT(%ebx) + sti + call SYMBOL_NAME(preempt_schedule) + jmp ret_from_intr +#endif + # system call handler stub ALIGN ENTRY(system_call) @@ -302,6 +350,7 @@ GET_THREAD_INFO(%ebx) call *%edi addl $8,%esp + preempt_stop jmp ret_from_exception ENTRY(coprocessor_error) @@ -321,12 +370,14 @@ movl %cr0,%eax testl $0x4,%eax # EM (math emulation bit) jne device_not_available_emulate + preempt_stop call SYMBOL_NAME(math_state_restore) jmp ret_from_exception device_not_available_emulate: pushl $0 # temporary storage for ORIG_EIP call SYMBOL_NAME(math_emulate) addl $4,%esp + preempt_stop jmp ret_from_exception ENTRY(debug) diff -urN linux-2.5.4-pre5/arch/i386/kernel/i387.c linux/arch/i386/kernel/i387.c --- linux-2.5.4-pre5/arch/i386/kernel/i387.c Fri Feb 8 22:26:29 2002 +++ linux/arch/i386/kernel/i387.c Sat Feb 9 00:26:16 2002 @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -63,6 +64,7 @@ void kernel_fpu_begin(void) { + preempt_disable(); if (test_thread_flag(TIF_USEDFPU)) { __save_init_fpu(current); return; diff -urN linux-2.5.4-pre5/arch/i386/kernel/smp.c linux/arch/i386/kernel/smp.c --- linux-2.5.4-pre5/arch/i386/kernel/smp.c Fri Feb 8 22:26:29 2002 +++ linux/arch/i386/kernel/smp.c Sat Feb 9 00:26:16 2002 @@ -497,7 +497,7 @@ /* * The target CPU will unlock the migration spinlock: */ - spin_lock(&migration_lock); + _raw_spin_lock(&migration_lock); new_task = p; send_IPI_mask(1 << cpu, TASK_MIGRATION_VECTOR); } @@ -511,7 +511,7 @@ ack_APIC_irq(); p = new_task; - spin_unlock(&migration_lock); + _raw_spin_unlock(&migration_lock); sched_task_migrated(p); } /* diff -urN linux-2.5.4-pre5/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c --- linux-2.5.4-pre5/arch/i386/kernel/traps.c Fri Feb 8 22:26:29 2002 +++ linux/arch/i386/kernel/traps.c Sat Feb 9 00:26:16 2002 @@ -710,6 +710,8 @@ * * Careful.. There are problems with IBM-designed IRQ13 behaviour. * Don't touch unless you *really* know how it works. + * + * Must be called with kernel preemption disabled. */ asmlinkage void math_state_restore(struct pt_regs regs) { diff -urN linux-2.5.4-pre5/fs/exec.c linux/fs/exec.c --- linux-2.5.4-pre5/fs/exec.c Fri Feb 8 22:26:18 2002 +++ linux/fs/exec.c Sat Feb 9 00:26:16 2002 @@ -420,8 +420,8 @@ active_mm = current->active_mm; current->mm = mm; current->active_mm = mm; - task_unlock(current); activate_mm(active_mm, mm); + task_unlock(current); mm_release(); if (old_mm) { if (active_mm != old_mm) BUG(); diff -urN linux-2.5.4-pre5/include/asm-i386/hardirq.h linux/include/asm-i386/hardirq.h --- linux-2.5.4-pre5/include/asm-i386/hardirq.h Fri Feb 8 22:26:19 2002 +++ linux/include/asm-i386/hardirq.h Sat Feb 9 00:30:43 2002 @@ -36,6 +36,8 @@ #define synchronize_irq() barrier() +#define release_irqlock(cpu) do { } while (0) + #else #include diff -urN linux-2.5.4-pre5/include/asm-i386/highmem.h linux/include/asm-i386/highmem.h --- linux-2.5.4-pre5/include/asm-i386/highmem.h Fri Feb 8 22:26:19 2002 +++ linux/include/asm-i386/highmem.h Sat Feb 9 00:30:43 2002 @@ -88,6 +88,7 @@ enum fixed_addresses idx; unsigned long vaddr; + preempt_disable(); if (page < highmem_start_page) return page_address(page); @@ -109,8 +110,10 @@ unsigned long vaddr = (unsigned long) kvaddr; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - if (vaddr < FIXADDR_START) // FIXME + if (vaddr < FIXADDR_START) { // FIXME + preempt_enable(); return; + } if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) BUG(); @@ -122,6 +125,8 @@ pte_clear(kmap_pte-idx); __flush_tlb_one(vaddr); #endif + + preempt_enable(); } #endif /* __KERNEL__ */ diff -urN linux-2.5.4-pre5/include/asm-i386/hw_irq.h linux/include/asm-i386/hw_irq.h --- linux-2.5.4-pre5/include/asm-i386/hw_irq.h Fri Feb 8 22:26:19 2002 +++ linux/include/asm-i386/hw_irq.h Sat Feb 9 00:26:44 2002 @@ -96,6 +96,18 @@ #define __STR(x) #x #define STR(x) __STR(x) +#define GET_THREAD_INFO \ + "movl $-8192, %ebx\n\t" \ + "andl %esp, %ebx\n\t" + +#ifdef CONFIG_PREEMPT +#define BUMP_LOCK_COUNT \ + GET_THREAD_INFO \ + "incl 16(%ebx)\n\t" +#else +#define BUMP_LOCK_COUNT +#endif + #define SAVE_ALL \ "cld\n\t" \ "pushl %es\n\t" \ @@ -109,7 +121,8 @@ "pushl %ebx\n\t" \ "movl $" STR(__KERNEL_DS) ",%edx\n\t" \ "movl %edx,%ds\n\t" \ - "movl %edx,%es\n\t" + "movl %edx,%es\n\t" \ + BUMP_LOCK_COUNT #define IRQ_NAME2(nr) nr##_interrupt(void) #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) diff -urN linux-2.5.4-pre5/include/asm-i386/i387.h linux/include/asm-i386/i387.h --- linux-2.5.4-pre5/include/asm-i386/i387.h Fri Feb 8 22:26:19 2002 +++ linux/include/asm-i386/i387.h Sat Feb 9 00:30:43 2002 @@ -12,6 +12,7 @@ #define __ASM_I386_I387_H #include +#include #include #include #include @@ -24,7 +25,7 @@ extern void restore_fpu( struct task_struct *tsk ); extern void kernel_fpu_begin(void); -#define kernel_fpu_end() stts() +#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0) #define unlazy_fpu( tsk ) do { \ diff -urN linux-2.5.4-pre5/include/asm-i386/pgalloc.h linux/include/asm-i386/pgalloc.h --- linux-2.5.4-pre5/include/asm-i386/pgalloc.h Fri Feb 8 22:26:19 2002 +++ linux/include/asm-i386/pgalloc.h Sat Feb 9 00:26:44 2002 @@ -75,20 +75,26 @@ { unsigned long *ret; + preempt_disable(); if ((ret = pgd_quicklist) != NULL) { pgd_quicklist = (unsigned long *)(*ret); ret[0] = 0; pgtable_cache_size--; - } else + preempt_enable(); + } else { + preempt_enable(); ret = (unsigned long *)get_pgd_slow(); + } return (pgd_t *)ret; } static inline void free_pgd_fast(pgd_t *pgd) { + preempt_disable(); *(unsigned long *)pgd = (unsigned long) pgd_quicklist; pgd_quicklist = (unsigned long *) pgd; pgtable_cache_size++; + preempt_enable(); } static inline void free_pgd_slow(pgd_t *pgd) @@ -119,19 +125,23 @@ { unsigned long *ret; + preempt_disable(); if ((ret = (unsigned long *)pte_quicklist) != NULL) { pte_quicklist = (unsigned long *)(*ret); ret[0] = ret[1]; pgtable_cache_size--; } + preempt_enable(); return (pte_t *)ret; } static inline void pte_free_fast(pte_t *pte) { + preempt_disable(); *(unsigned long *)pte = (unsigned long) pte_quicklist; pte_quicklist = (unsigned long *) pte; pgtable_cache_size++; + preempt_enable(); } static __inline__ void pte_free_slow(pte_t *pte) diff -urN linux-2.5.4-pre5/include/asm-i386/smplock.h linux/include/asm-i386/smplock.h --- linux-2.5.4-pre5/include/asm-i386/smplock.h Fri Feb 8 22:26:19 2002 +++ linux/include/asm-i386/smplock.h Sat Feb 9 00:30:43 2002 @@ -10,7 +10,15 @@ extern spinlock_t kernel_flag; +#ifdef CONFIG_SMP #define kernel_locked() spin_is_locked(&kernel_flag) +#else +#ifdef CONFIG_PREEMPT +#define kernel_locked() preempt_get_count() +#else +#define kernel_locked() 1 +#endif +#endif /* * Release global kernel lock and global interrupt lock @@ -43,6 +51,11 @@ */ static __inline__ void lock_kernel(void) { +#ifdef CONFIG_PREEMPT + if (current->lock_depth == -1) + spin_lock(&kernel_flag); + ++current->lock_depth; +#else #if 1 if (!++current->lock_depth) spin_lock(&kernel_flag); @@ -55,6 +68,7 @@ :"=m" (__dummy_lock(&kernel_flag)), "=m" (current->lock_depth)); #endif +#endif } static __inline__ void unlock_kernel(void) diff -urN linux-2.5.4-pre5/include/asm-i386/softirq.h linux/include/asm-i386/softirq.h --- linux-2.5.4-pre5/include/asm-i386/softirq.h Fri Feb 8 22:26:19 2002 +++ linux/include/asm-i386/softirq.h Sat Feb 9 00:30:43 2002 @@ -5,9 +5,9 @@ #include #define __cpu_bh_enable(cpu) \ - do { barrier(); local_bh_count(cpu)--; } while (0) + do { barrier(); local_bh_count(cpu)--; preempt_enable(); } while (0) #define cpu_bh_disable(cpu) \ - do { local_bh_count(cpu)++; barrier(); } while (0) + do { preempt_disable(); local_bh_count(cpu)++; barrier(); } while (0) #define local_bh_disable() cpu_bh_disable(smp_processor_id()) #define __local_bh_enable() __cpu_bh_enable(smp_processor_id()) @@ -22,7 +22,7 @@ * If you change the offsets in irq_stat then you have to * update this code as well. */ -#define local_bh_enable() \ +#define _local_bh_enable() \ do { \ unsigned int *ptr = &local_bh_count(smp_processor_id()); \ \ @@ -45,4 +45,6 @@ /* no registers clobbered */ ); \ } while (0) +#define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while (0) + #endif /* __ASM_SOFTIRQ_H */ diff -urN linux-2.5.4-pre5/include/asm-i386/spinlock.h linux/include/asm-i386/spinlock.h --- linux-2.5.4-pre5/include/asm-i386/spinlock.h Fri Feb 8 22:26:19 2002 +++ linux/include/asm-i386/spinlock.h Sat Feb 9 00:26:43 2002 @@ -77,7 +77,7 @@ :"=m" (lock->lock) : : "memory" -static inline void spin_unlock(spinlock_t *lock) +static inline void _raw_spin_unlock(spinlock_t *lock) { #if SPINLOCK_DEBUG if (lock->magic != SPINLOCK_MAGIC) @@ -97,7 +97,7 @@ :"=q" (oldval), "=m" (lock->lock) \ :"0" (oldval) : "memory" -static inline void spin_unlock(spinlock_t *lock) +static inline void _raw_spin_unlock(spinlock_t *lock) { char oldval = 1; #if SPINLOCK_DEBUG @@ -113,7 +113,7 @@ #endif -static inline int spin_trylock(spinlock_t *lock) +static inline int _raw_spin_trylock(spinlock_t *lock) { char oldval; __asm__ __volatile__( @@ -123,7 +123,7 @@ return oldval > 0; } -static inline void spin_lock(spinlock_t *lock) +static inline void _raw_spin_lock(spinlock_t *lock) { #if SPINLOCK_DEBUG __label__ here; @@ -179,7 +179,7 @@ */ /* the spinlock helpers are in arch/i386/kernel/semaphore.c */ -static inline void read_lock(rwlock_t *rw) +static inline void _raw_read_lock(rwlock_t *rw) { #if SPINLOCK_DEBUG if (rw->magic != RWLOCK_MAGIC) @@ -188,7 +188,7 @@ __build_read_lock(rw, "__read_lock_failed"); } -static inline void write_lock(rwlock_t *rw) +static inline void _raw_write_lock(rwlock_t *rw) { #if SPINLOCK_DEBUG if (rw->magic != RWLOCK_MAGIC) @@ -197,10 +197,10 @@ __build_write_lock(rw, "__write_lock_failed"); } -#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") -#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") +#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") +#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") -static inline int write_trylock(rwlock_t *lock) +static inline int _raw_write_trylock(rwlock_t *lock) { atomic_t *count = (atomic_t *)lock; if (atomic_sub_and_test(RW_LOCK_BIAS, count)) diff -urN linux-2.5.4-pre5/include/asm-i386/thread_info.h linux/include/asm-i386/thread_info.h --- linux-2.5.4-pre5/include/asm-i386/thread_info.h Fri Feb 8 22:26:19 2002 +++ linux/include/asm-i386/thread_info.h Sat Feb 9 00:26:43 2002 @@ -25,6 +25,7 @@ struct exec_domain *exec_domain; /* execution domain */ __u32 flags; /* low level flags */ __u32 cpu; /* current CPU */ + __s32 preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; /* thread address space: 0-0xBFFFFFFF for user-thead @@ -41,7 +42,8 @@ #define TI_EXEC_DOMAIN 0x00000004 #define TI_FLAGS 0x00000008 #define TI_CPU 0x0000000C -#define TI_ADDR_LIMIT 0x00000010 +#define TI_PRE_COUNT 0x00000010 +#define TI_ADDR_LIMIT 0x00000014 #endif diff -urN linux-2.5.4-pre5/include/linux/brlock.h linux/include/linux/brlock.h --- linux-2.5.4-pre5/include/linux/brlock.h Fri Feb 8 22:26:19 2002 +++ linux/include/linux/brlock.h Sat Feb 9 00:30:57 2002 @@ -171,11 +171,11 @@ } #else -# define br_read_lock(idx) ((void)(idx)) -# define br_read_unlock(idx) ((void)(idx)) -# define br_write_lock(idx) ((void)(idx)) -# define br_write_unlock(idx) ((void)(idx)) -#endif +# define br_read_lock(idx) ({ (void)(idx); preempt_disable(); }) +# define br_read_unlock(idx) ({ (void)(idx); preempt_enable(); }) +# define br_write_lock(idx) ({ (void)(idx); preempt_disable(); }) +# define br_write_unlock(idx) ({ (void)(idx); preempt_enable(); }) +#endif /* CONFIG_SMP */ /* * Now enumerate all of the possible sw/hw IRQ protected diff -urN linux-2.5.4-pre5/include/linux/sched.h linux/include/linux/sched.h --- linux-2.5.4-pre5/include/linux/sched.h Fri Feb 8 22:26:19 2002 +++ linux/include/linux/sched.h Sat Feb 9 00:30:43 2002 @@ -91,6 +91,7 @@ #define TASK_UNINTERRUPTIBLE 2 #define TASK_ZOMBIE 4 #define TASK_STOPPED 8 +#define PREEMPT_ACTIVE 0x4000000 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) diff -urN linux-2.5.4-pre5/include/linux/smp.h linux/include/linux/smp.h --- linux-2.5.4-pre5/include/linux/smp.h Fri Feb 8 22:26:19 2002 +++ linux/include/linux/smp.h Sat Feb 9 00:26:44 2002 @@ -81,7 +81,9 @@ #define smp_processor_id() 0 #define hard_smp_processor_id() 0 #define smp_threads_ready 1 +#ifndef CONFIG_PREEMPT #define kernel_lock() +#endif #define cpu_logical_map(cpu) 0 #define cpu_number_map(cpu) 0 #define smp_call_function(func,info,retry,wait) ({ 0; }) diff -urN linux-2.5.4-pre5/include/linux/smp_lock.h linux/include/linux/smp_lock.h --- linux-2.5.4-pre5/include/linux/smp_lock.h Fri Feb 8 22:26:19 2002 +++ linux/include/linux/smp_lock.h Sat Feb 9 00:30:43 2002 @@ -3,7 +3,7 @@ #include -#ifndef CONFIG_SMP +#if !defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT) #define lock_kernel() do { } while(0) #define unlock_kernel() do { } while(0) diff -urN linux-2.5.4-pre5/include/linux/spinlock.h linux/include/linux/spinlock.h --- linux-2.5.4-pre5/include/linux/spinlock.h Fri Feb 8 22:26:19 2002 +++ linux/include/linux/spinlock.h Sat Feb 9 00:30:41 2002 @@ -2,6 +2,10 @@ #define __LINUX_SPINLOCK_H #include +#include +#include +#include +#include /* * These are the generic versions of the spinlocks and read-write @@ -62,8 +66,10 @@ #if (DEBUG_SPINLOCKS < 1) +#ifndef CONFIG_PREEMPT #define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic) #define ATOMIC_DEC_AND_LOCK +#endif /* * Your basic spinlocks, allowing only a single CPU anywhere @@ -79,11 +85,11 @@ #endif #define spin_lock_init(lock) do { } while(0) -#define spin_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _raw_spin_lock(lock) (void)(lock) /* Not "unused variable". */ #define spin_is_locked(lock) (0) -#define spin_trylock(lock) ({1; }) +#define _raw_spin_trylock(lock) ({1; }) #define spin_unlock_wait(lock) do { } while(0) -#define spin_unlock(lock) do { } while(0) +#define _raw_spin_unlock(lock) do { } while(0) #elif (DEBUG_SPINLOCKS < 2) @@ -142,13 +148,79 @@ #endif #define rwlock_init(lock) do { } while(0) -#define read_lock(lock) (void)(lock) /* Not "unused variable". */ -#define read_unlock(lock) do { } while(0) -#define write_lock(lock) (void)(lock) /* Not "unused variable". */ -#define write_unlock(lock) do { } while(0) +#define _raw_read_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _raw_read_unlock(lock) do { } while(0) +#define _raw_write_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _raw_write_unlock(lock) do { } while(0) #endif /* !SMP */ +#ifdef CONFIG_PREEMPT + +asmlinkage void preempt_schedule(void); + +#define preempt_get_count() (current_thread_info()->preempt_count) + +#define preempt_disable() \ +do { \ + ++current_thread_info()->preempt_count; \ + barrier(); \ +} while (0) + +#define preempt_enable_no_resched() \ +do { \ + --current_thread_info()->preempt_count; \ + barrier(); \ +} while (0) + +#define preempt_enable() \ +do { \ + --current_thread_info()->preempt_count; \ + barrier(); \ + if (unlikely(!(current_thread_info()->preempt_count) && \ + test_thread_flag(TIF_NEED_RESCHED))) \ + preempt_schedule(); \ +} while (0) + +#define spin_lock(lock) \ +do { \ + preempt_disable(); \ + _raw_spin_lock(lock); \ +} while(0) + +#define spin_trylock(lock) ({preempt_disable(); _raw_spin_trylock(lock) ? \ + 1 : ({preempt_enable(); 0;});}) +#define spin_unlock(lock) \ +do { \ + _raw_spin_unlock(lock); \ + preempt_enable(); \ +} while (0) + +#define read_lock(lock) ({preempt_disable(); _raw_read_lock(lock);}) +#define read_unlock(lock) ({_raw_read_unlock(lock); preempt_enable();}) +#define write_lock(lock) ({preempt_disable(); _raw_write_lock(lock);}) +#define write_unlock(lock) ({_raw_write_unlock(lock); preempt_enable();}) +#define write_trylock(lock) ({preempt_disable();_raw_write_trylock(lock) ? \ + 1 : ({preempt_enable(); 0;});}) + +#else + +#define preempt_get_count() do { } while (0) +#define preempt_disable() do { } while (0) +#define preempt_enable_no_resched() do {} while(0) +#define preempt_enable() do { } while (0) + +#define spin_lock(lock) _raw_spin_lock(lock) +#define spin_trylock(lock) _raw_spin_trylock(lock) +#define spin_unlock(lock) _raw_spin_unlock(lock) + +#define read_lock(lock) _raw_read_lock(lock) +#define read_unlock(lock) _raw_read_unlock(lock) +#define write_lock(lock) _raw_write_lock(lock) +#define write_unlock(lock) _raw_write_unlock(lock) +#define write_trylock(lock) _raw_write_trylock(lock) +#endif + /* "lock on reference count zero" */ #ifndef ATOMIC_DEC_AND_LOCK #include diff -urN linux-2.5.4-pre5/kernel/exit.c linux/kernel/exit.c --- linux-2.5.4-pre5/kernel/exit.c Fri Feb 8 22:26:19 2002 +++ linux/kernel/exit.c Sat Feb 9 00:26:16 2002 @@ -390,8 +390,8 @@ /* more a memory barrier than a real lock */ task_lock(tsk); tsk->mm = NULL; - task_unlock(tsk); enter_lazy_tlb(mm, current, smp_processor_id()); + task_unlock(tsk); mmput(mm); } } diff -urN linux-2.5.4-pre5/kernel/fork.c linux/kernel/fork.c --- linux-2.5.4-pre5/kernel/fork.c Fri Feb 8 22:26:19 2002 +++ linux/kernel/fork.c Sat Feb 9 00:26:16 2002 @@ -650,6 +650,13 @@ if (p->binfmt && p->binfmt->module) __MOD_INC_USE_COUNT(p->binfmt->module); +#ifdef CONFIG_PREEMPT + /* + * schedule_tail drops this_rq()->lock so we compensate with a count + * of 1. Also, we want to start with kernel preemption disabled. + */ + p->thread_info->preempt_count = 1; +#endif p->did_exec = 0; p->swappable = 0; p->state = TASK_UNINTERRUPTIBLE; diff -urN linux-2.5.4-pre5/kernel/ksyms.c linux/kernel/ksyms.c --- linux-2.5.4-pre5/kernel/ksyms.c Fri Feb 8 22:26:19 2002 +++ linux/kernel/ksyms.c Sat Feb 9 00:26:16 2002 @@ -445,6 +445,9 @@ EXPORT_SYMBOL(interruptible_sleep_on); EXPORT_SYMBOL(interruptible_sleep_on_timeout); EXPORT_SYMBOL(schedule); +#ifdef CONFIG_PREEMPT +EXPORT_SYMBOL(preempt_schedule); +#endif EXPORT_SYMBOL(schedule_timeout); EXPORT_SYMBOL(sys_sched_yield); EXPORT_SYMBOL(set_user_nice); diff -urN linux-2.5.4-pre5/kernel/sched.c linux/kernel/sched.c --- linux-2.5.4-pre5/kernel/sched.c Fri Feb 8 22:26:19 2002 +++ linux/kernel/sched.c Sat Feb 9 00:26:16 2002 @@ -61,10 +61,12 @@ struct runqueue *__rq; repeat_lock_task: + preempt_disable(); __rq = task_rq(p); spin_lock_irqsave(&__rq->lock, *flags); if (unlikely(__rq != task_rq(p))) { spin_unlock_irqrestore(&__rq->lock, *flags); + preempt_enable(); goto repeat_lock_task; } return __rq; @@ -73,6 +75,7 @@ static inline void unlock_task_rq(runqueue_t *rq, unsigned long *flags) { spin_unlock_irqrestore(&rq->lock, *flags); + preempt_enable(); } /* * Adding/removing a task to/from a priority array: @@ -195,6 +198,7 @@ #ifdef CONFIG_SMP int need_resched, nrpolling; + preempt_disable(); /* minimise the chance of sending an interrupt to poll_idle() */ nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED); @@ -202,6 +206,7 @@ if (!need_resched && !nrpolling && (p->thread_info->cpu != smp_processor_id())) smp_send_reschedule(p->thread_info->cpu); + preempt_enable(); #else set_tsk_need_resched(p); #endif @@ -219,6 +224,7 @@ runqueue_t *rq; repeat: + preempt_disable(); rq = task_rq(p); while (unlikely(rq->curr == p)) { cpu_relax(); @@ -227,9 +233,11 @@ rq = lock_task_rq(p, &flags); if (unlikely(rq->curr == p)) { unlock_task_rq(rq, &flags); + preempt_enable(); goto repeat; } unlock_task_rq(rq, &flags); + preempt_enable(); } /* @@ -295,7 +303,10 @@ void wake_up_forked_process(task_t * p) { - runqueue_t *rq = this_rq(); + runqueue_t *rq; + + preempt_disable(); + rq = this_rq(); p->state = TASK_RUNNING; if (!rt_task(p)) { @@ -308,6 +319,7 @@ p->thread_info->cpu = smp_processor_id(); activate_task(p, rq); spin_unlock_irq(&rq->lock); + preempt_enable(); } asmlinkage void schedule_tail(task_t *prev) @@ -635,17 +647,31 @@ */ asmlinkage void schedule(void) { - task_t *prev = current, *next; - runqueue_t *rq = this_rq(); + task_t *prev, *next; + runqueue_t *rq; prio_array_t *array; list_t *queue; int idx; if (unlikely(in_interrupt())) BUG(); + + preempt_disable(); + prev = current; + rq = this_rq(); + release_kernel_lock(prev, smp_processor_id()); spin_lock_irq(&rq->lock); +#ifdef CONFIG_PREEMPT + /* + * if entering from preempt_schedule, off a kernel preemption, + * go straight to picking the next task. + */ + if (unlikely(preempt_get_count() & PREEMPT_ACTIVE)) + goto pick_next_task; +#endif + switch (prev->state) { case TASK_RUNNING: prev->sleep_timestamp = jiffies; @@ -659,7 +685,7 @@ default: deactivate_task(prev, rq); } -#if CONFIG_SMP +#if CONFIG_SMP || CONFIG_PREEMPT pick_next_task: #endif if (unlikely(!rq->nr_running)) { @@ -707,9 +733,25 @@ spin_unlock_irq(&rq->lock); reacquire_kernel_lock(current); + preempt_enable_no_resched(); return; } +#ifdef CONFIG_PREEMPT +/* + * this is is the entry point to schedule() from in-kernel preemption. + */ +asmlinkage void preempt_schedule(void) +{ + do { + current_thread_info()->preempt_count += PREEMPT_ACTIVE; + schedule(); + current_thread_info()->preempt_count -= PREEMPT_ACTIVE; + barrier(); + } while (test_thread_flag(TIF_NEED_RESCHED)); +} +#endif /* CONFIG_PREEMPT */ + /* * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve @@ -1105,9 +1147,12 @@ asmlinkage long sys_sched_yield(void) { - runqueue_t *rq = this_rq(); + runqueue_t *rq; prio_array_t *array; + preempt_disable(); + rq = this_rq(); + /* * Decrease the yielding task's priority by one, to avoid * livelocks. This priority loss is temporary, it's recovered @@ -1134,6 +1179,7 @@ __set_bit(current->prio, array->bitmap); } spin_unlock(&rq->lock); + preempt_enable_no_resched(); schedule(); diff -urN linux-2.5.4-pre5/net/socket.c linux/net/socket.c --- linux-2.5.4-pre5/net/socket.c Fri Feb 8 22:26:19 2002 +++ linux/net/socket.c Sat Feb 9 00:26:16 2002 @@ -132,7 +132,7 @@ static struct net_proto_family *net_families[NPROTO]; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) static atomic_t net_family_lockct = ATOMIC_INIT(0); static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;