• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

Revisiond3f71ae711cebdeaff12989761f48bd4230e83d5 (tree)
Time2016-01-30 08:46:49
AuthorLinus Torvalds <torvalds@linu...>
CommiterLinus Torvalds

Log Message

Merge branch 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:

"Dave had a small collection of fixes to the new free space tree code,
one of which was keeping our sysfs files more up to date with feature
bits as different things get enabled (lzo, raid5/6, etc).
I should have kept the sysfs stuff for rc3, since we always manage to
trip over something. This time it was GFP_KERNEL from somewhere that
is NOFS only. Instead of rebasing it out I've put a revert in, and
we'll fix it properly for rc3.
Otherwise, Filipe fixed a btrfs DIO race and Qu Wenruo fixed up a
use-after-free in our tracepoints that Dave Jones reported"

* 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:

Revert "btrfs: synchronize incompat feature bits with sysfs files"
btrfs: don't use GFP_HIGHMEM for free-space-tree bitmap kzalloc
btrfs: sysfs: check initialization state before updating features
Revert "btrfs: clear PF_NOFREEZE in cleaner_kthread()"
btrfs: async-thread: Fix a use-after-free error for trace
Btrfs: fix race between fsync and lockless direct IO writes
btrfs: add free space tree to the cow-only list
btrfs: add free space tree to lockdep classes
btrfs: tweak free space tree bitmap allocation
btrfs: tests: switch to GFP_KERNEL
btrfs: synchronize incompat feature bits with sysfs files
btrfs: sysfs: introduce helper for syncing bits with sysfs files
btrfs: sysfs: add free-space-tree bit attribute
btrfs: sysfs: fix typo in compat_ro attribute definition

Change Summary

Incremental Difference

--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
328328 list_add_tail(&work->ordered_list, &wq->ordered_list);
329329 spin_unlock_irqrestore(&wq->list_lock, flags);
330330 }
331- queue_work(wq->normal_wq, &work->normal_work);
332331 trace_btrfs_work_queued(work);
332+ queue_work(wq->normal_wq, &work->normal_work);
333333 }
334334
335335 void btrfs_queue_work(struct btrfs_workqueue *wq,
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -182,6 +182,7 @@ static struct btrfs_lockdep_keyset {
182182 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
183183 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
184184 { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" },
185+ { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" },
185186 { .id = 0, .name_stem = "tree" },
186187 };
187188
@@ -1787,7 +1788,6 @@ static int cleaner_kthread(void *arg)
17871788 int again;
17881789 struct btrfs_trans_handle *trans;
17891790
1790- set_freezable();
17911791 do {
17921792 again = 0;
17931793
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -153,6 +153,20 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
153153
154154 static unsigned long *alloc_bitmap(u32 bitmap_size)
155155 {
156+ void *mem;
157+
158+ /*
159+ * The allocation size varies, observed numbers were < 4K up to 16K.
160+ * Using vmalloc unconditionally would be too heavy, we'll try
161+ * contiguous allocations first.
162+ */
163+ if (bitmap_size <= PAGE_SIZE)
164+ return kzalloc(bitmap_size, GFP_NOFS);
165+
166+ mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
167+ if (mem)
168+ return mem;
169+
156170 return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
157171 PAGE_KERNEL);
158172 }
@@ -289,7 +303,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
289303
290304 ret = 0;
291305 out:
292- vfree(bitmap);
306+ kvfree(bitmap);
293307 if (ret)
294308 btrfs_abort_transaction(trans, root, ret);
295309 return ret;
@@ -438,7 +452,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
438452
439453 ret = 0;
440454 out:
441- vfree(bitmap);
455+ kvfree(bitmap);
442456 if (ret)
443457 btrfs_abort_transaction(trans, root, ret);
444458 return ret;
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7116,21 +7116,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
71167116 if (ret)
71177117 return ERR_PTR(ret);
71187118
7119- em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
7120- ins.offset, ins.offset, ins.offset, 0);
7121- if (IS_ERR(em)) {
7122- btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
7123- return em;
7124- }
7125-
7119+ /*
7120+ * Create the ordered extent before the extent map. This is to avoid
7121+ * races with the fast fsync path that would lead to it logging file
7122+ * extent items that point to disk extents that were not yet written to.
7123+ * The fast fsync path collects ordered extents into a local list and
7124+ * then collects all the new extent maps, so we must create the ordered
7125+ * extent first and make sure the fast fsync path collects any new
7126+ * ordered extents after collecting new extent maps as well.
7127+ * The fsync path simply can not rely on inode_dio_wait() because it
7128+ * causes deadlock with AIO.
7129+ */
71267130 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
71277131 ins.offset, ins.offset, 0);
71287132 if (ret) {
71297133 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
7130- free_extent_map(em);
71317134 return ERR_PTR(ret);
71327135 }
71337136
7137+ em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
7138+ ins.offset, ins.offset, ins.offset, 0);
7139+ if (IS_ERR(em)) {
7140+ struct btrfs_ordered_extent *oe;
7141+
7142+ btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
7143+ oe = btrfs_lookup_ordered_extent(inode, start);
7144+ ASSERT(oe);
7145+ if (WARN_ON(!oe))
7146+ return em;
7147+ set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
7148+ set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
7149+ btrfs_remove_ordered_extent(inode, oe);
7150+ /* Once for our lookup and once for the ordered extents tree. */
7151+ btrfs_put_ordered_extent(oe);
7152+ btrfs_put_ordered_extent(oe);
7153+ }
71347154 return em;
71357155 }
71367156
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid)
575575 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
576576 root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
577577 root_objectid == BTRFS_UUID_TREE_OBJECTID ||
578- root_objectid == BTRFS_QUOTA_TREE_OBJECTID)
578+ root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
579+ root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
579580 return 1;
580581 return 0;
581582 }
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
202202 BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
203203 BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
204204 BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
205+BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
205206
206207 static struct attribute *btrfs_supported_feature_attrs[] = {
207208 BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
213214 BTRFS_FEAT_ATTR_PTR(raid56),
214215 BTRFS_FEAT_ATTR_PTR(skinny_metadata),
215216 BTRFS_FEAT_ATTR_PTR(no_holes),
217+ BTRFS_FEAT_ATTR_PTR(free_space_tree),
216218 NULL
217219 };
218220
@@ -780,6 +782,39 @@ failure:
780782 return error;
781783 }
782784
785+
786+/*
787+ * Change per-fs features in /sys/fs/btrfs/UUID/features to match current
788+ * values in superblock. Call after any changes to incompat/compat_ro flags
789+ */
790+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
791+ u64 bit, enum btrfs_feature_set set)
792+{
793+ struct btrfs_fs_devices *fs_devs;
794+ struct kobject *fsid_kobj;
795+ u64 features;
796+ int ret;
797+
798+ if (!fs_info)
799+ return;
800+
801+ features = get_features(fs_info, set);
802+ ASSERT(bit & supported_feature_masks[set]);
803+
804+ fs_devs = fs_info->fs_devices;
805+ fsid_kobj = &fs_devs->fsid_kobj;
806+
807+ if (!fsid_kobj->state_initialized)
808+ return;
809+
810+ /*
811+ * FIXME: this is too heavy to update just one value, ideally we'd like
812+ * to use sysfs_update_group but some refactoring is needed first.
813+ */
814+ sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
815+ ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
816+}
817+
783818 static int btrfs_init_debugfs(void)
784819 {
785820 #ifdef CONFIG_DEBUG_FS
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = { \
5656 #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
5757 BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
5858 #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
59- BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
59+ BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
6060 #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
6161 BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
6262
@@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
9090 struct kobject *parent);
9191 int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
9292 void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
93+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
94+ u64 bit, enum btrfs_feature_set set);
95+
9396 #endif /* _BTRFS_SYSFS_H_ */
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -82,18 +82,18 @@ void btrfs_destroy_test_fs(void)
8282 struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
8383 {
8484 struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
85- GFP_NOFS);
85+ GFP_KERNEL);
8686
8787 if (!fs_info)
8888 return fs_info;
8989 fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
90- GFP_NOFS);
90+ GFP_KERNEL);
9191 if (!fs_info->fs_devices) {
9292 kfree(fs_info);
9393 return NULL;
9494 }
9595 fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
96- GFP_NOFS);
96+ GFP_KERNEL);
9797 if (!fs_info->super_copy) {
9898 kfree(fs_info->fs_devices);
9999 kfree(fs_info);
@@ -180,11 +180,11 @@ btrfs_alloc_dummy_block_group(unsigned long length)
180180 {
181181 struct btrfs_block_group_cache *cache;
182182
183- cache = kzalloc(sizeof(*cache), GFP_NOFS);
183+ cache = kzalloc(sizeof(*cache), GFP_KERNEL);
184184 if (!cache)
185185 return NULL;
186186 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
187- GFP_NOFS);
187+ GFP_KERNEL);
188188 if (!cache->free_space_ctl) {
189189 kfree(cache);
190190 return NULL;
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -94,7 +94,7 @@ static int test_find_delalloc(void)
9494 * test.
9595 */
9696 for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
97- page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
97+ page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
9898 if (!page) {
9999 test_msg("Failed to allocate test page\n");
100100 ret = -ENOMEM;
@@ -113,7 +113,7 @@ static int test_find_delalloc(void)
113113 * |--- delalloc ---|
114114 * |--- search ---|
115115 */
116- set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
116+ set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL);
117117 start = 0;
118118 end = 0;
119119 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -144,7 +144,7 @@ static int test_find_delalloc(void)
144144 test_msg("Couldn't find the locked page\n");
145145 goto out_bits;
146146 }
147- set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
147+ set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL);
148148 start = test_start;
149149 end = 0;
150150 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -199,7 +199,7 @@ static int test_find_delalloc(void)
199199 *
200200 * We are re-using our test_start from above since it works out well.
201201 */
202- set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
202+ set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL);
203203 start = test_start;
204204 end = 0;
205205 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -262,7 +262,7 @@ static int test_find_delalloc(void)
262262 }
263263 ret = 0;
264264 out_bits:
265- clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
265+ clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
266266 out:
267267 if (locked_page)
268268 page_cache_release(locked_page);
@@ -360,7 +360,7 @@ static int test_eb_bitmaps(void)
360360
361361 test_msg("Running extent buffer bitmap tests\n");
362362
363- bitmap = kmalloc(len, GFP_NOFS);
363+ bitmap = kmalloc(len, GFP_KERNEL);
364364 if (!bitmap) {
365365 test_msg("Couldn't allocate test bitmap\n");
366366 return -ENOMEM;
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -974,7 +974,7 @@ static int test_extent_accounting(void)
974974 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
975975 EXTENT_DELALLOC | EXTENT_DIRTY |
976976 EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
977- NULL, GFP_NOFS);
977+ NULL, GFP_KERNEL);
978978 if (ret) {
979979 test_msg("clear_extent_bit returned %d\n", ret);
980980 goto out;
@@ -1045,7 +1045,7 @@ static int test_extent_accounting(void)
10451045 BTRFS_MAX_EXTENT_SIZE+8191,
10461046 EXTENT_DIRTY | EXTENT_DELALLOC |
10471047 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1048- NULL, GFP_NOFS);
1048+ NULL, GFP_KERNEL);
10491049 if (ret) {
10501050 test_msg("clear_extent_bit returned %d\n", ret);
10511051 goto out;
@@ -1079,7 +1079,7 @@ static int test_extent_accounting(void)
10791079 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
10801080 EXTENT_DIRTY | EXTENT_DELALLOC |
10811081 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1082- NULL, GFP_NOFS);
1082+ NULL, GFP_KERNEL);
10831083 if (ret) {
10841084 test_msg("clear_extent_bit returned %d\n", ret);
10851085 goto out;
@@ -1096,7 +1096,7 @@ out:
10961096 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
10971097 EXTENT_DIRTY | EXTENT_DELALLOC |
10981098 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1099- NULL, GFP_NOFS);
1099+ NULL, GFP_KERNEL);
11001100 iput(inode);
11011101 btrfs_free_dummy_root(root);
11021102 return ret;
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
41274127 struct inode *inode,
41284128 struct btrfs_path *path,
41294129 struct list_head *logged_list,
4130- struct btrfs_log_ctx *ctx)
4130+ struct btrfs_log_ctx *ctx,
4131+ const u64 start,
4132+ const u64 end)
41314133 {
41324134 struct extent_map *em, *n;
41334135 struct list_head extents;
@@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
41664168 }
41674169
41684170 list_sort(NULL, &extents, extent_cmp);
4169-
4171+ /*
4172+ * Collect any new ordered extents within the range. This is to
4173+ * prevent logging file extent items without waiting for the disk
4174+ * location they point to being written. We do this only to deal
4175+ * with races against concurrent lockless direct IO writes.
4176+ */
4177+ btrfs_get_logged_extents(inode, logged_list, start, end);
41704178 process:
41714179 while (!list_empty(&extents)) {
41724180 em = list_entry(extents.next, struct extent_map, list);
@@ -4701,7 +4709,7 @@ log_extents:
47014709 goto out_unlock;
47024710 }
47034711 ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
4704- &logged_list, ctx);
4712+ &logged_list, ctx, start, end);
47054713 if (ret) {
47064714 err = ret;
47074715 goto out_unlock;