From: Hans Reiser This patch adds an API to reserve some number of pages for exclusive use of calling thread. This is necessary to support operations that cannot tolerate memory allocation failures in the middle. Implementation is very simplistic and would only work efficiently for small reservations (no more than few dozens of pages). Reserved pages are stored in a list hanging off calling task_struct. This list is only accessed by the current thread, hence, no locking is required. Note that this makes use of reservation from interrupt handlers impossible without some external synchronization. Caller is responsible for manually releasing unused pages by calling perthread_pages_release(int nrpages) with proper argument. Signed-off-by: Andrew Morton --- 25-akpm/include/linux/gfp.h | 6 +- 25-akpm/include/linux/init_task.h | 2 25-akpm/include/linux/sched.h | 5 + 25-akpm/kernel/fork.c | 4 + 25-akpm/mm/page_alloc.c | 97 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 1 deletion(-) diff -puN include/linux/gfp.h~reiser4-perthread-pages include/linux/gfp.h --- 25/include/linux/gfp.h~reiser4-perthread-pages 2005-03-16 21:42:53.000000000 -0800 +++ 25-akpm/include/linux/gfp.h 2005-03-16 21:42:53.000000000 -0800 @@ -130,6 +130,10 @@ extern void FASTCALL(free_cold_page(stru #define free_page(addr) free_pages((addr),0) void page_alloc_init(void); - void prep_zero_page(struct page *, unsigned int order, unsigned int gfp_flags); + +int perthread_pages_reserve(int nrpages, int gfp); +void perthread_pages_release(int nrpages); +int perthread_pages_count(void); + #endif /* __LINUX_GFP_H */ diff -puN include/linux/init_task.h~reiser4-perthread-pages include/linux/init_task.h --- 25/include/linux/init_task.h~reiser4-perthread-pages 2005-03-16 21:42:53.000000000 -0800 +++ 25-akpm/include/linux/init_task.h 2005-03-16 21:42:53.000000000 -0800 @@ -113,6 +113,8 @@ extern struct group_info init_groups; .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .fs_excl = ATOMIC_INIT(0), \ + .private_pages = LIST_HEAD_INIT(tsk.private_pages), \ + .private_pages_count = 0, \ } diff -puN include/linux/sched.h~reiser4-perthread-pages include/linux/sched.h --- 25/include/linux/sched.h~reiser4-perthread-pages 2005-03-16 21:42:53.000000000 -0800 +++ 25-akpm/include/linux/sched.h 2005-03-16 21:42:53.000000000 -0800 @@ -552,6 +552,9 @@ struct sched_domain { unsigned long ttwu_move_affine; unsigned long ttwu_move_balance; #endif + + struct list_head private_pages; /* per-process private pages */ + int private_pages_count; }; #ifdef ARCH_HAS_SCHED_DOMAIN @@ -773,6 +776,8 @@ struct task_struct { int cpuset_mems_generation; #endif atomic_t fs_excl; /* holding fs exclusive resources */ + struct list_head private_pages; /* per-process private pages */ + int private_pages_count; }; static inline pid_t process_group(struct task_struct *tsk) diff -puN kernel/fork.c~reiser4-perthread-pages kernel/fork.c --- 25/kernel/fork.c~reiser4-perthread-pages 2005-03-16 21:42:53.000000000 -0800 +++ 25-akpm/kernel/fork.c 2005-03-16 21:42:53.000000000 -0800 @@ -174,6 +174,10 @@ static struct task_struct *dup_task_stru tsk->thread_info = ti; ti->task = tsk; + /* initialize list of pages privately reserved by process */ + INIT_LIST_HEAD(&tsk->private_pages); + tsk->private_pages_count = 0; + /* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); return tsk; diff -puN mm/page_alloc.c~reiser4-perthread-pages mm/page_alloc.c --- 25/mm/page_alloc.c~reiser4-perthread-pages 2005-03-16 21:42:53.000000000 -0800 +++ 25-akpm/mm/page_alloc.c 2005-03-16 21:42:53.000000000 -0800 @@ -650,6 +650,97 @@ void prep_zero_page(struct page *page, u clear_highpage(page + i); } +static inline struct list_head *get_per_thread_pages(void) +{ + return ¤t->private_pages; +} + +int perthread_pages_reserve(int nrpages, int gfp) +{ + int i; + struct list_head accumulator; + struct list_head *per_thread; + + per_thread = get_per_thread_pages(); + INIT_LIST_HEAD(&accumulator); + list_splice_init(per_thread, &accumulator); + for (i = 0; i < nrpages; ++i) { + struct page *page; + + page = alloc_page(gfp); + if (page != NULL) + list_add(&page->lru, &accumulator); + else { + for (; i > 0; --i) { + page = list_entry(accumulator.next, struct page, lru); + list_del(&page->lru); + page_cache_release(page); + } + return -ENOMEM; + } + } + /* + * Q: why @accumulator is used, instead of directly adding pages to + * the get_per_thread_pages()? + * + * A: because after first page is added to the get_per_thread_pages(), + * next call to the alloc_page() (on the next loop iteration), will + * re-use it. + */ + list_splice(&accumulator, per_thread); + current->private_pages_count += nrpages; + return 0; +} +EXPORT_SYMBOL(perthread_pages_reserve); + +void perthread_pages_release(int nrpages) +{ + struct list_head *per_thread; + + current->private_pages_count -= nrpages; + per_thread = get_per_thread_pages(); + for (; nrpages != 0; --nrpages) { + struct page *page; + + BUG_ON(list_empty(per_thread)); + page = list_entry(per_thread->next, struct page, lru); + list_del(&page->lru); + page_cache_release(page); + } +} +EXPORT_SYMBOL(perthread_pages_release); + +int perthread_pages_count(void) +{ + return current->private_pages_count; +} +EXPORT_SYMBOL(perthread_pages_count); + +static inline struct page * +perthread_pages_alloc(void) +{ + struct list_head *perthread_pages; + + /* + * try to allocate pages from the per-thread private_pages pool. No + * locking is needed: this list can only be modified by the thread + * itself, and not by interrupts or other threads. + */ + perthread_pages = get_per_thread_pages(); + if (!in_interrupt() && !list_empty(perthread_pages)) { + struct page *page; + + page = list_entry(perthread_pages->next, struct page, lru); + list_del(&page->lru); + current->private_pages_count--; + /* + * per-thread page is already initialized, just return it. + */ + return page; + } else + return NULL; +} + /* * Really, prep_compound_page() should be called from __rmqueue_bulk(). But * we cheat by calling it from here, in the order > 0 path. Saves a branch @@ -794,6 +885,12 @@ __alloc_pages(unsigned int gfp_mask, uns */ can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait; + if (order == 0) { + page = perthread_pages_alloc(); + if (page != NULL) + return page; + } + zones = zonelist->zones; /* the list of zones suitable for gfp_mask */ if (unlikely(zones[0] == NULL)) { _