include/linux/writeback.h | 2 + mm/filemap.c | 61 ++++++++++++++++++++++++++++++++++++++++++---- mm/page-writeback.c | 24 ++++++++++++++++++ 3 files changed, 83 insertions(+), 4 deletions(-) diff -puN include/linux/writeback.h~O_SYNC-speedup-nolock-fix include/linux/writeback.h --- 25/include/linux/writeback.h~O_SYNC-speedup-nolock-fix 2003-08-30 15:42:33.000000000 -0700 +++ 25-akpm/include/linux/writeback.h 2003-08-30 15:42:33.000000000 -0700 @@ -89,6 +89,8 @@ int pdflush_operation(void (*fn)(unsigne int do_writepages(struct address_space *mapping, struct writeback_control *wbc); ssize_t sync_page_range(struct inode *inode, struct address_space *mapping, loff_t pos, size_t count); +ssize_t sync_page_range_nolock(struct inode *inode, struct address_space + *mapping, loff_t pos, size_t count); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl diff -puN mm/filemap.c~O_SYNC-speedup-nolock-fix mm/filemap.c --- 25/mm/filemap.c~O_SYNC-speedup-nolock-fix 2003-08-30 15:42:33.000000000 -0700 +++ 25-akpm/mm/filemap.c 2003-08-30 15:42:33.000000000 -0700 @@ -1711,7 +1711,7 @@ EXPORT_SYMBOL(generic_write_checks); * okir@monad.swb.de */ ssize_t -generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, +__generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) { struct file *file = iocb->ki_filp; @@ -1903,6 +1903,59 @@ out: } ssize_t +generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos) +{ + struct file *file = iocb->ki_filp; + struct address_space * mapping = file->f_dentry->d_inode->i_mapping; + struct inode *inode = mapping->host; + ssize_t ret; + loff_t pos = *ppos; + + if (!iov->iov_base && !is_sync_kiocb(iocb)) { + /* nothing to transfer, may just need to sync data */ + ret = iov->iov_len; /* vector AIO not supported yet */ + goto osync; + } + + ret = __generic_file_aio_write_nolock(iocb, iov, 1, ppos); + + /* + * Avoid doing a sync in parts for aio - its more efficient to + * call in again after all the data has been copied + */ + if (!is_sync_kiocb(iocb)) + return ret; + +osync: + if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { + ssize_t err; + + err = sync_page_range_nolock(inode, mapping, pos, ret); + if (err < 0) + ret = err; + else + *ppos = pos + err; + } + return ret; +} + + +ssize_t +__generic_file_write_nolock(struct file *file, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos) +{ + struct kiocb kiocb; + ssize_t ret; + + init_sync_kiocb(&kiocb, file); + ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&kiocb); + return ret; +} + +ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) { @@ -1935,7 +1988,7 @@ ssize_t generic_file_aio_write(struct ki } down(&inode->i_sem); - ret = generic_file_aio_write_nolock(iocb, &local_iov, 1, + ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); up(&inode->i_sem); @@ -1971,7 +2024,7 @@ ssize_t generic_file_write(struct file * .iov_len = count }; down(&inode->i_sem); - ret = generic_file_write_nolock(file, &local_iov, 1, ppos); + ret = __generic_file_write_nolock(file, &local_iov, 1, ppos); up(&inode->i_sem); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { @@ -2004,7 +2057,7 @@ ssize_t generic_file_writev(struct file ssize_t ret; down(&inode->i_sem); - ret = generic_file_write_nolock(file, iov, nr_segs, ppos); + ret = __generic_file_write_nolock(file, iov, nr_segs, ppos); up(&inode->i_sem); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { diff -puN mm/page-writeback.c~O_SYNC-speedup-nolock-fix mm/page-writeback.c --- 25/mm/page-writeback.c~O_SYNC-speedup-nolock-fix 2003-08-30 15:42:33.000000000 -0700 +++ 25-akpm/mm/page-writeback.c 2003-08-30 15:42:33.000000000 -0700 @@ -678,3 +678,27 @@ ssize_t sync_page_range(struct inode *in ret = wait_on_page_range(mapping, pos, count); return ret; } + +/* + * It is really better to use sync_page_range, rather than call + * sync_page_range_nolock while holding i_sem, if you don't + * want to block parallel O_SYNC writes until the pages in this + * range are written out. + */ +ssize_t sync_page_range_nolock(struct inode *inode, struct address_space + *mapping, loff_t pos, size_t count) +{ + int ret; + + if (!mapping->a_ops->writepage) + return 0; + if (mapping->backing_dev_info->memory_backed) + return 0; + ret = write_out_page_range(mapping, pos, count); + if (ret >= 0) { + ret = generic_osync_inode(inode, OSYNC_METADATA); + } + if (ret >= 0) + ret = wait_on_page_range(mapping, pos, count); + return ret; +} _