Compare commits
No commits in common. "529c6e3575af3e0f48a5bcb69d3ddcd172d7a6c1" and "a59160f781b588141f5a837ed8388fc6cc4bce94" have entirely different histories.
529c6e3575
...
a59160f781
@ -1,219 +0,0 @@
|
||||
From 342b52596ce27f30b59622b67da188c4a3adbeee Mon Sep 17 00:00:00 2001
|
||||
From: Wu Guanghao <wuguanghao3@huawei.com>
|
||||
Date: Wed, 21 Jun 2023 09:25:27 +0000
|
||||
Subject: [PATCH] mkfs.xfs: fix segmentation fault caused by accessing a null
|
||||
pointer
|
||||
|
||||
We encountered a segfault while testing the mkfs.xfs + iscsi.
|
||||
|
||||
(gdb) bt
|
||||
#0 libxfs_log_sb (tp=0xaaaafaea0630) at xfs_sb.c:810
|
||||
#1 0x0000aaaaca991468 in __xfs_trans_commit (tp=<optimized out>, tp@entry=0xaaaafaea0630, regrant=regrant@entry=true) at trans.c:995
|
||||
#2 0x0000aaaaca991790 in libxfs_trans_roll (tpp=tpp@entry=0xfffffe1f3018) at trans.c:103
|
||||
#3 0x0000aaaaca9bcde8 in xfs_dialloc_roll (agibp=0xaaaafaea2fa0, tpp=0xfffffe1f31c8) at xfs_ialloc.c:1561
|
||||
#4 xfs_dialloc_try_ag (ok_alloc=true, new_ino=<synthetic pointer>, parent=0, pag=0xaaaafaea0210, tpp=0xfffffe1f31c8) at xfs_ialloc.c:1698
|
||||
#5 xfs_dialloc (tpp=tpp@entry=0xfffffe1f31c8, parent=0, mode=mode@entry=16877, new_ino=new_ino@entry=0xfffffe1f3128) at xfs_ialloc.c:1776
|
||||
#6 0x0000aaaaca9925b0 in libxfs_dir_ialloc (tpp=tpp@entry=0xfffffe1f31c8, dp=dp@entry=0x0, mode=mode@entry=16877, nlink=nlink@entry=1, rdev=rdev@entry=0, cr=cr@entry=0xfffffe1f31d0,
|
||||
fsx=fsx@entry=0xfffffe1f36a4, ipp=ipp@entry=0xfffffe1f31c0) at util.c:525
|
||||
#7 0x0000aaaaca988fac in parseproto (mp=0xfffffe1f36c8, pip=0x0, fsxp=0xfffffe1f36a4, pp=0xfffffe1f3370, name=0x0) at proto.c:552
|
||||
#8 0x0000aaaaca9867a4 in main (argc=<optimized out>, argv=<optimized out>) at xfs_mkfs.c:4217
|
||||
|
||||
(gdb) p bp
|
||||
$1 = 0x0
|
||||
|
||||
```
|
||||
void
|
||||
xfs_log_sb(
|
||||
struct xfs_trans *tp)
|
||||
{
|
||||
// iscsi offline
|
||||
...
|
||||
// failed to read sb, bp = NULL
|
||||
struct xfs_buf *bp = xfs_trans_getsb(tp);
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
When writing data to sb, if the device is abnormal at this time,
|
||||
the bp may be empty. Using it without checking will result in
|
||||
a segfault.
|
||||
|
||||
So before using it, we need to check if the bp is empty and return
|
||||
the error.
|
||||
|
||||
Signed-off-by: Wu Guanghao <wuguanghao3@huawei.com>
|
||||
---
|
||||
libxfs/trans.c | 4 +++-
|
||||
libxfs/xfs_attr_leaf.c | 2 +-
|
||||
libxfs/xfs_bmap.c | 8 ++++++--
|
||||
libxfs/xfs_sb.c | 25 ++++++++++++++++++++++---
|
||||
libxfs/xfs_sb.h | 2 +-
|
||||
mkfs/proto.c | 15 ++++++++++++---
|
||||
6 files changed, 45 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/libxfs/trans.c b/libxfs/trans.c
|
||||
index fd2e6f9..a43472f 100644
|
||||
--- a/libxfs/trans.c
|
||||
+++ b/libxfs/trans.c
|
||||
@@ -975,7 +975,9 @@ __xfs_trans_commit(
|
||||
sbp->sb_fdblocks += tp->t_fdblocks_delta;
|
||||
if (tp->t_frextents_delta)
|
||||
sbp->sb_frextents += tp->t_frextents_delta;
|
||||
- xfs_log_sb(tp);
|
||||
+ error = xfs_log_sb(tp);
|
||||
+ if (error)
|
||||
+ goto out_unreserve;
|
||||
}
|
||||
|
||||
trans_committed(tp);
|
||||
diff --git a/libxfs/xfs_attr_leaf.c b/libxfs/xfs_attr_leaf.c
|
||||
index cfb6bf1..9522155 100644
|
||||
--- a/libxfs/xfs_attr_leaf.c
|
||||
+++ b/libxfs/xfs_attr_leaf.c
|
||||
@@ -629,7 +629,7 @@ xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
|
||||
if (!xfs_sb_version_hasattr2(&mp->m_sb)) {
|
||||
xfs_sb_version_addattr2(&mp->m_sb);
|
||||
spin_unlock(&mp->m_sb_lock);
|
||||
- xfs_log_sb(tp);
|
||||
+ ASSERT(!xfs_log_sb(tp));
|
||||
} else
|
||||
spin_unlock(&mp->m_sb_lock);
|
||||
}
|
||||
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
|
||||
index a548507..16dbe71 100644
|
||||
--- a/libxfs/xfs_bmap.c
|
||||
+++ b/libxfs/xfs_bmap.c
|
||||
@@ -1122,8 +1122,12 @@ xfs_bmap_add_attrfork(
|
||||
log_sb = true;
|
||||
}
|
||||
spin_unlock(&mp->m_sb_lock);
|
||||
- if (log_sb)
|
||||
- xfs_log_sb(tp);
|
||||
+ if (log_sb) {
|
||||
+ error = xfs_log_sb(tp);
|
||||
+ if (error)
|
||||
+ goto trans_cancel;
|
||||
+ }
|
||||
+
|
||||
}
|
||||
|
||||
error = xfs_trans_commit(tp);
|
||||
diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c
|
||||
index b2e214e..df7aec5 100644
|
||||
--- a/libxfs/xfs_sb.c
|
||||
+++ b/libxfs/xfs_sb.c
|
||||
@@ -784,13 +784,16 @@ xfs_sb_mount_common(
|
||||
* level of locking that is needed to protect the in-core superblock from
|
||||
* concurrent access.
|
||||
*/
|
||||
-void
|
||||
+int
|
||||
xfs_log_sb(
|
||||
struct xfs_trans *tp)
|
||||
{
|
||||
struct xfs_mount *mp = tp->t_mountp;
|
||||
struct xfs_buf *bp = xfs_trans_getsb(tp);
|
||||
|
||||
+ if (!bp)
|
||||
+ return -EIO;
|
||||
+
|
||||
/*
|
||||
* Lazy sb counters don't update the in-core superblock so do that now.
|
||||
* If this is at unmount, the counters will be exactly correct, but at
|
||||
@@ -808,6 +811,8 @@ xfs_log_sb(
|
||||
xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
|
||||
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
|
||||
xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1);
|
||||
+
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -834,7 +839,13 @@ xfs_sync_sb(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
- xfs_log_sb(tp);
|
||||
+ error = xfs_log_sb(tp);
|
||||
+ if (error) {
|
||||
+ xfs_trans_cancel(tp);
|
||||
+ return error;
|
||||
+ }
|
||||
+
|
||||
+
|
||||
if (wait)
|
||||
xfs_trans_set_sync(tp);
|
||||
return xfs_trans_commit(tp);
|
||||
@@ -931,7 +942,15 @@ xfs_sync_sb_buf(
|
||||
return error;
|
||||
|
||||
bp = xfs_trans_getsb(tp);
|
||||
- xfs_log_sb(tp);
|
||||
+ if (!bp) {
|
||||
+ xfs_trans_cancel(tp);
|
||||
+ return -EIO;
|
||||
+ }
|
||||
+ error = xfs_log_sb(tp);
|
||||
+ if (error) {
|
||||
+ xfs_trans_cancel(tp);
|
||||
+ return error;
|
||||
+ }
|
||||
xfs_trans_bhold(tp, bp);
|
||||
xfs_trans_set_sync(tp);
|
||||
error = xfs_trans_commit(tp);
|
||||
diff --git a/libxfs/xfs_sb.h b/libxfs/xfs_sb.h
|
||||
index 0c1602d..33ca6bb 100644
|
||||
--- a/libxfs/xfs_sb.h
|
||||
+++ b/libxfs/xfs_sb.h
|
||||
@@ -13,7 +13,7 @@ struct xfs_trans;
|
||||
struct xfs_fsop_geom;
|
||||
struct xfs_perag;
|
||||
|
||||
-extern void xfs_log_sb(struct xfs_trans *tp);
|
||||
+extern int xfs_log_sb(struct xfs_trans *tp);
|
||||
extern int xfs_sync_sb(struct xfs_mount *mp, bool wait);
|
||||
extern int xfs_sync_sb_buf(struct xfs_mount *mp);
|
||||
extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp);
|
||||
diff --git a/mkfs/proto.c b/mkfs/proto.c
|
||||
index ef130ed..005b6e3 100644
|
||||
--- a/mkfs/proto.c
|
||||
+++ b/mkfs/proto.c
|
||||
@@ -557,7 +557,10 @@ parseproto(
|
||||
if (!pip) {
|
||||
pip = ip;
|
||||
mp->m_sb.sb_rootino = ip->i_ino;
|
||||
- libxfs_log_sb(tp);
|
||||
+ error = -libxfs_log_sb(tp);
|
||||
+ if (error) {
|
||||
+ fail(_("Log sb failed"), error);
|
||||
+ }
|
||||
isroot = 1;
|
||||
} else {
|
||||
libxfs_trans_ijoin(tp, pip, 0);
|
||||
@@ -656,7 +659,10 @@ rtinit(
|
||||
rbmip->i_diflags = XFS_DIFLAG_NEWRTBM;
|
||||
*(uint64_t *)&VFS_I(rbmip)->i_atime = 0;
|
||||
libxfs_trans_log_inode(tp, rbmip, XFS_ILOG_CORE);
|
||||
- libxfs_log_sb(tp);
|
||||
+ error = -libxfs_log_sb(tp);
|
||||
+ if (error) {
|
||||
+ fail(_("Log sb failed"), error);
|
||||
+ }
|
||||
mp->m_rbmip = rbmip;
|
||||
error = -libxfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0,
|
||||
&creds, &fsxattrs, &rsumip);
|
||||
@@ -666,7 +672,10 @@ rtinit(
|
||||
mp->m_sb.sb_rsumino = rsumip->i_ino;
|
||||
rsumip->i_disk_size = mp->m_rsumsize;
|
||||
libxfs_trans_log_inode(tp, rsumip, XFS_ILOG_CORE);
|
||||
- libxfs_log_sb(tp);
|
||||
+ error = -libxfs_log_sb(tp);
|
||||
+ if (error) {
|
||||
+ fail(_("Log sb failed"), error);
|
||||
+ }
|
||||
error = -libxfs_trans_commit(tp);
|
||||
if (error)
|
||||
fail(_("Completion of the realtime summary inode failed"),
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,93 +0,0 @@
|
||||
From d1b7a268a4d5f47e6e8d1ed9f2d44c61ba9103a2 Mon Sep 17 00:00:00 2001
|
||||
From: Long Li <leo.lilong@huawei.com>
|
||||
Date: Fri, 7 Jul 2023 17:01:52 +0800
|
||||
Subject: [PATCH] xfs_repair: fix warn in xfs_buf_find when growfs fails
|
||||
|
||||
The kernel does not update secondary superblock through transactions, so
|
||||
the atomicity of secondary sb cannot be guaranteed. If IO failures are
|
||||
encountered during the growfs, secondary superblocks and primary superblock
|
||||
will be inconsistent, and xfs_repair may update the old data in secondary
|
||||
superblocks to primary superblock. This will cause sb_dblocks to be less
|
||||
than the actual value and trigger the following warning.
|
||||
|
||||
When checking whether secondary superblocks and primary superblock are
|
||||
consistent, do not judge the fields that will change during growfs in
|
||||
superblock.
|
||||
|
||||
XFS (sdb): Starting recovery (logdev: internal)
|
||||
XFS (sdb): xfs_buf_find: daddr 0xf00001 out of range, EOFS 0xeec208
|
||||
------------[ cut here ]------------
|
||||
WARNING: CPU: 2 PID: 54142 at fs/xfs/xfs_buf.c:615 xfs_buf_find+0x268a/0x2f90 fs/xfs/xfs_buf.c:612
|
||||
Modules linked in:
|
||||
CPU: 2 PID: 54142 Comm: mount Not tainted 5.10.0-02690-g9edb619c0692 #24
|
||||
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
|
||||
RIP: 0010:xfs_buf_find+0x268a/0x2f90 fs/xfs/xfs_buf.c:615
|
||||
Code: 01 e8 aa 1e 16 ff 49 89 d8 4c 89 e1 4c 89 ef 48 c7 c2 a0 c2 02 af 48 c7 c6 e0 b8 02 af e8 16 dc a2 02 48 83 05 3e 56 00 09 01 <0f> 0b 48 83 05 3c 56 00 09 01 48 83 05 3c 56 00 09 01 41 bc 8b ff
|
||||
RSP: 0018:ffff88811c6b7138 EFLAGS: 00010202
|
||||
RAX: 0000000000000000 RBX: 0000000000eec208 RCX: 0000000000000000
|
||||
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffed10238d6e17
|
||||
RBP: ffff88811c6b72b8 R08: ffff88810168c000 R09: 0000000000000001
|
||||
R10: ffff8881f6738427 R11: ffffed103ece7084 R12: 0000000000f00001
|
||||
R13: ffff88812fe44000 R14: ffff88810da77418 R15: 0000000000000001
|
||||
FS: 00007f9ef08e0080(0000) GS:ffff8881f6700000(0000) knlGS:0000000000000000
|
||||
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
|
||||
CR2: 0000564ec77273d0 CR3: 000000011ba20005 CR4: 0000000000370ee0
|
||||
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
|
||||
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
|
||||
Call Trace:
|
||||
xfs_buf_get_map+0xc3/0xa20 fs/xfs/xfs_buf.c:717
|
||||
xfs_buf_read_map+0xd1/0xbe0 fs/xfs/xfs_buf.c:832
|
||||
xfs_buf_readahead_map+0x185/0x1c0 fs/xfs/xfs_buf.c:913
|
||||
xfs_buf_readahead fs/xfs/xfs_buf.h:249 [inline]
|
||||
xlog_buf_readahead+0x13d/0x160 fs/xfs/xfs_log_recover.c:1921
|
||||
xlog_recover_buf_ra_pass2+0xad/0xf0 fs/xfs/xfs_buf_item_recover.c:177
|
||||
xlog_recover_commit_trans+0x3bb/0xec0 fs/xfs/xfs_log_recover.c:2003
|
||||
xlog_recovery_process_trans+0x1de/0x290 fs/xfs/xfs_log_recover.c:2256
|
||||
xlog_recover_process_ophdr+0x205/0x4c0 fs/xfs/xfs_log_recover.c:2402
|
||||
xlog_recover_process_data+0x1dc/0x620 fs/xfs/xfs_log_recover.c:2444
|
||||
xlog_recover_process+0x296/0x390 fs/xfs/xfs_log_recover.c:2892
|
||||
xlog_do_recovery_pass+0x7fb/0x13e0 fs/xfs/xfs_log_recover.c:3178
|
||||
xlog_do_log_recovery+0xf8/0x170 fs/xfs/xfs_log_recover.c:3258
|
||||
xlog_do_recover+0x118/0x810 fs/xfs/xfs_log_recover.c:3286
|
||||
xlog_recover+0x2d1/0x5d0 fs/xfs/xfs_log_recover.c:3420
|
||||
xfs_log_mount+0x40f/0xa00 fs/xfs/xfs_log.c:713
|
||||
xfs_mountfs+0x12f5/0x2390 fs/xfs/xfs_mount.c:905
|
||||
xfs_fc_fill_super+0x1482/0x1f80 fs/xfs/xfs_super.c:1684
|
||||
get_tree_bdev+0x4ba/0x890 fs/super.c:1345
|
||||
xfs_fc_get_tree+0x20/0x30 fs/xfs/xfs_super.c:1731
|
||||
vfs_get_tree+0x96/0x390 fs/super.c:1550
|
||||
do_new_mount fs/namespace.c:2900 [inline]
|
||||
path_mount+0x7b0/0x2430 fs/namespace.c:3230
|
||||
do_mount+0x10f/0x140 fs/namespace.c:3243
|
||||
__do_sys_mount fs/namespace.c:3451 [inline]
|
||||
__se_sys_mount fs/namespace.c:3428 [inline]
|
||||
__x64_sys_mount+0x1bb/0x2b0 fs/namespace.c:3428
|
||||
do_syscall_64+0x4b/0x80 arch/x86/entry/common.c:46
|
||||
entry_SYSCALL_64_after_hwframe+0x61/0xc6
|
||||
|
||||
Signed-off-by: Long Li <leo.lilong@huawei.com>
|
||||
Signed-off-by: Wu Guanghao <wuguanghao3@huawei.com>
|
||||
---
|
||||
repair/sb.c | 2 --
|
||||
1 file changed, 2 deletions(-)
|
||||
|
||||
diff --git a/repair/sb.c b/repair/sb.c
|
||||
index 7391cf04..fe63bff3 100644
|
||||
--- a/repair/sb.c
|
||||
+++ b/repair/sb.c
|
||||
@@ -663,13 +663,11 @@ get_sb_geometry(fs_geometry_t *geo, xfs_sb_t *sbp)
|
||||
* blindly set fields that we know are always good
|
||||
*/
|
||||
geo->sb_blocksize = sbp->sb_blocksize;
|
||||
- geo->sb_dblocks = sbp->sb_dblocks;
|
||||
geo->sb_rblocks = sbp->sb_rblocks;
|
||||
geo->sb_rextents = sbp->sb_rextents;
|
||||
geo->sb_logstart = sbp->sb_logstart;
|
||||
geo->sb_rextsize = sbp->sb_rextsize;
|
||||
geo->sb_agblocks = sbp->sb_agblocks;
|
||||
- geo->sb_agcount = sbp->sb_agcount;
|
||||
geo->sb_rbmblocks = sbp->sb_rbmblocks;
|
||||
geo->sb_logblocks = sbp->sb_logblocks;
|
||||
geo->sb_sectsize = sbp->sb_sectsize;
|
||||
--
|
||||
2.31.1
|
||||
@ -1,61 +0,0 @@
|
||||
From baf8a5df8a0c6539818a3f78a68d22648c022e50 Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Tue, 12 Jul 2022 13:25:33 -0500
|
||||
Subject: [PATCH] xfs_copy: don't use cached buffer reads until after
|
||||
libxfs_mount
|
||||
|
||||
I accidentally tried to xfs_copy an ext4 filesystem, but instead of
|
||||
rejecting the filesystem, the program instead crashed. I figured out
|
||||
that zeroing the superblock was enough to trigger this:
|
||||
|
||||
# dd if=/dev/zero of=/dev/sda bs=1024k count=1
|
||||
# xfs_copy /dev/sda /dev/sdb
|
||||
Floating point exception
|
||||
|
||||
The exact crash happens in this line from libxfs_getbuf_flags, which is
|
||||
called from the main() routine of xfs_copy:
|
||||
|
||||
if (btp == btp->bt_mount->m_ddev_targp) {
|
||||
(*bpp)->b_pag = xfs_perag_get(btp->bt_mount,
|
||||
xfs_daddr_to_agno(btp->bt_mount, blkno));
|
||||
|
||||
The problem here is that the uncached read filled the incore superblock
|
||||
with zeroes, which means mbuf.sb_agblocks is zero. This causes a
|
||||
division by zero in xfs_daddr_to_agno, thereby crashing the program.
|
||||
|
||||
In commit f8b581d6, we made it so that xfs_buf structures contain a
|
||||
passive reference to the associated perag structure. That commit
|
||||
assumes that no program would try a cached buffer read until the buffer
|
||||
cache is fully set up, which is true throughout xfsprogs... except for
|
||||
the beginning of xfs_copy. For whatever reason, it attempts an uncached
|
||||
read of the superblock to figure out the real superblock size, then
|
||||
performs a *cached* read with the proper buffer length and verifier.
|
||||
The cached read crashes the program.
|
||||
|
||||
Fix the problem by changing the (second) cached read into an uncached read.
|
||||
|
||||
Fixes: f8b581d6 ("libxfs: actually make buffers track the per-ag structures")
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Dave Chinner <dchinner@redhat.com>
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
copy/xfs_copy.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/copy/xfs_copy.c b/copy/xfs_copy.c
|
||||
index 41f594bd..79f65946 100644
|
||||
--- a/copy/xfs_copy.c
|
||||
+++ b/copy/xfs_copy.c
|
||||
@@ -748,7 +748,7 @@ main(int argc, char **argv)
|
||||
/* Do it again, now with proper length and verifier */
|
||||
libxfs_buf_relse(sbp);
|
||||
|
||||
- error = -libxfs_buf_read(mbuf.m_ddev_targp, XFS_SB_DADDR,
|
||||
+ error = -libxfs_buf_read_uncached(mbuf.m_ddev_targp, XFS_SB_DADDR,
|
||||
1 << (sb->sb_sectlog - BBSHIFT), 0, &sbp,
|
||||
&xfs_sb_buf_ops);
|
||||
if (error) {
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,42 +0,0 @@
|
||||
From b3749469112306a80925420b48a6e756b2beeed9 Mon Sep 17 00:00:00 2001
|
||||
From: Dave Chinner <dchinner@redhat.com>
|
||||
Date: Mon, 31 Jan 2022 15:25:48 -0500
|
||||
Subject: [PATCH] xfs: sb verifier doesn't handle uncached sb buffer
|
||||
|
||||
Source kernel commit: 8cf07f3dd56195316be97758cb8b4e1d7183ea84
|
||||
|
||||
The verifier checks explicitly for bp->b_bn == XFS_SB_DADDR to match
|
||||
the primary superblock buffer, but the primary superblock is an
|
||||
uncached buffer and so bp->b_bn is always -1ULL. Hence this never
|
||||
matches and the CRC error reporting is wholly dependent on the
|
||||
mount superblock already being populated so CRC feature checks pass
|
||||
and allow CRC errors to be reported.
|
||||
|
||||
Fix this so that the primary superblock CRC error reporting is not
|
||||
dependent on already having read the superblock into memory.
|
||||
|
||||
Signed-off-by: Dave Chinner <dchinner@redhat.com>
|
||||
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
libxfs/xfs_sb.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c
|
||||
index b2e214e..f29a59a 100644
|
||||
--- a/libxfs/xfs_sb.c
|
||||
+++ b/libxfs/xfs_sb.c
|
||||
@@ -634,7 +634,7 @@ xfs_sb_read_verify(
|
||||
|
||||
if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) {
|
||||
/* Only fail bad secondaries on a known V5 filesystem */
|
||||
- if (bp->b_bn == XFS_SB_DADDR ||
|
||||
+ if (bp->b_maps[0].bm_bn == XFS_SB_DADDR ||
|
||||
xfs_sb_version_hascrc(&mp->m_sb)) {
|
||||
error = -EFSBADCRC;
|
||||
goto out_error;
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,49 +0,0 @@
|
||||
From f043c63e38c9582deac85053a6c8a737482983b1 Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Mon, 31 Jan 2022 17:46:05 -0500
|
||||
Subject: [PATCH] libxfs: always initialize internal buffer map
|
||||
|
||||
The __initbuf function is responsible for initializing the fields of an
|
||||
xfs_buf. Buffers are always required to have a mapping, though in the
|
||||
typical case there's only one mapping, so we can use the internal one.
|
||||
|
||||
The single-mapping b_maps init code at the end of the function doesn't
|
||||
quite get this right though -- if a single-mapping buffer in the cache
|
||||
was allowed to expire and now is being repurposed, it'll come out with
|
||||
b_maps == &__b_map, in which case we incorrectly skip initializing the
|
||||
map. This has gone unnoticed until now because (AFAICT) the code paths
|
||||
that use b_maps are the same ones that are called with multi-mapping
|
||||
buffers, which are initialized correctly.
|
||||
|
||||
Anyway, the improperly initialized single-mappings will cause problems
|
||||
in upcoming patches where we turn b_bn into the cache key and require
|
||||
the use of b_maps[0].bm_bn for the buffer LBA. Fix this.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
libxfs/rdwr.c | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
|
||||
index 5086bdb..a55e3a7 100644
|
||||
--- a/libxfs/rdwr.c
|
||||
+++ b/libxfs/rdwr.c
|
||||
@@ -251,9 +251,11 @@ __initbuf(struct xfs_buf *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
|
||||
bp->b_ops = NULL;
|
||||
INIT_LIST_HEAD(&bp->b_li_list);
|
||||
|
||||
- if (!bp->b_maps) {
|
||||
- bp->b_nmaps = 1;
|
||||
+ if (!bp->b_maps)
|
||||
bp->b_maps = &bp->__b_map;
|
||||
+
|
||||
+ if (bp->b_maps == &bp->__b_map) {
|
||||
+ bp->b_nmaps = 1;
|
||||
bp->b_maps[0].bm_bn = bp->b_bn;
|
||||
bp->b_maps[0].bm_len = bp->b_length;
|
||||
}
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,70 +0,0 @@
|
||||
From f98b7a261130726c33accd295ec0d2a22f270cde Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Fri, 25 Feb 2022 17:32:48 -0500
|
||||
Subject: [PATCH] libxfs: shut down filesystem if we xfs_trans_cancel with
|
||||
deferred work items
|
||||
|
||||
While debugging some very strange rmap corruption reports in connection
|
||||
with the online directory repair code. I root-caused the error to the
|
||||
following incorrect sequence:
|
||||
|
||||
<start repair transaction>
|
||||
<expand directory, causing a deferred rmap to be queued>
|
||||
<roll transaction>
|
||||
<cancel transaction>
|
||||
|
||||
Obviously, we should have committed the transaction instead of
|
||||
cancelling it. Thinking more broadly, however, xfs_trans_cancel should
|
||||
have warned us that we were throwing away work item that we already
|
||||
committed to performing. This is not correct, and we need to shut down
|
||||
the filesystem.
|
||||
|
||||
Change xfs_trans_cancel to complain in the loudest manner if we're
|
||||
cancelling any transaction with deferred work items attached.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
libxfs/trans.c | 19 ++++++++++++++++++-
|
||||
1 file changed, 18 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/libxfs/trans.c b/libxfs/trans.c
|
||||
index fd2e6f9..8c16cb8 100644
|
||||
--- a/libxfs/trans.c
|
||||
+++ b/libxfs/trans.c
|
||||
@@ -318,13 +318,30 @@ void
|
||||
libxfs_trans_cancel(
|
||||
struct xfs_trans *tp)
|
||||
{
|
||||
+ bool dirty;
|
||||
+
|
||||
trace_xfs_trans_cancel(tp, _RET_IP_);
|
||||
|
||||
if (tp == NULL)
|
||||
return;
|
||||
+ dirty = (tp->t_flags & XFS_TRANS_DIRTY);
|
||||
|
||||
- if (tp->t_flags & XFS_TRANS_PERM_LOG_RES)
|
||||
+ /*
|
||||
+ * It's never valid to cancel a transaction with deferred ops attached,
|
||||
+ * because the transaction is effectively dirty. Complain about this
|
||||
+ * loudly before freeing the in-memory defer items.
|
||||
+ */
|
||||
+ if (!list_empty(&tp->t_dfops)) {
|
||||
+ ASSERT(list_empty(&tp->t_dfops));
|
||||
+ ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
|
||||
+ dirty = true;
|
||||
xfs_defer_cancel(tp);
|
||||
+ }
|
||||
+
|
||||
+ if (dirty) {
|
||||
+ fprintf(stderr, _("Cancelling dirty transaction!\n"));
|
||||
+ abort();
|
||||
+ }
|
||||
|
||||
xfs_trans_free_items(tp);
|
||||
xfs_trans_free(tp);
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,47 +0,0 @@
|
||||
From e9ff33f6e604ece202373be3ac176064083d913e Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Fri, 25 Feb 2022 17:42:16 -0500
|
||||
Subject: [PATCH] xfs_db: fix nbits parameter in fa_ino[48] functions
|
||||
|
||||
Use the proper macro to convert ino4 and ino8 field byte sizes to a bit
|
||||
count in the functions that navigate shortform directories. This just
|
||||
happens to work correctly for ino4 entries, but omits the upper 4 bytes
|
||||
of an ino8 entry. Note that the entries display correctly; it's just
|
||||
the command "addr u3.sfdir3.list[X].inumber.i8" that won't.
|
||||
|
||||
Found by running smatch.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
db/faddr.c | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/db/faddr.c b/db/faddr.c
|
||||
index 81d69c9..0127c5d 100644
|
||||
--- a/db/faddr.c
|
||||
+++ b/db/faddr.c
|
||||
@@ -353,7 +353,8 @@ fa_ino4(
|
||||
xfs_ino_t ino;
|
||||
|
||||
ASSERT(next == TYP_INODE);
|
||||
- ino = (xfs_ino_t)getbitval(obj, bit, bitsz(XFS_INO32_SIZE), BVUNSIGNED);
|
||||
+ ino = (xfs_ino_t)getbitval(obj, bit, bitize(XFS_INO32_SIZE),
|
||||
+ BVUNSIGNED);
|
||||
if (ino == NULLFSINO) {
|
||||
dbprintf(_("null inode number, cannot set new addr\n"));
|
||||
return;
|
||||
@@ -370,7 +371,8 @@ fa_ino8(
|
||||
xfs_ino_t ino;
|
||||
|
||||
ASSERT(next == TYP_INODE);
|
||||
- ino = (xfs_ino_t)getbitval(obj, bit, bitsz(XFS_INO64_SIZE), BVUNSIGNED);
|
||||
+ ino = (xfs_ino_t)getbitval(obj, bit, bitize(XFS_INO64_SIZE),
|
||||
+ BVUNSIGNED);
|
||||
if (ino == NULLFSINO) {
|
||||
dbprintf(_("null inode number, cannot set new addr\n"));
|
||||
return;
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,100 +0,0 @@
|
||||
From 918e82a4879dccaf3673871be925a87efc2fbabc Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Fri, 25 Feb 2022 17:42:16 -0500
|
||||
Subject: [PATCH] xfs_repair: update secondary superblocks after changing
|
||||
features
|
||||
|
||||
When we add features to an existing filesystem, make sure we update the
|
||||
secondary superblocks to reflect the new geometry so that if we lose the
|
||||
primary super in the future, repair will recover correctly.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
libxfs/libxfs_api_defs.h | 2 ++
|
||||
repair/globals.c | 1 +
|
||||
repair/globals.h | 1 +
|
||||
repair/phase2.c | 2 ++
|
||||
repair/xfs_repair.c | 15 +++++++++++++++
|
||||
5 files changed, 21 insertions(+)
|
||||
|
||||
diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
|
||||
index b76e638..63628ae 100644
|
||||
--- a/libxfs/libxfs_api_defs.h
|
||||
+++ b/libxfs/libxfs_api_defs.h
|
||||
@@ -195,6 +195,8 @@
|
||||
#define xfs_trans_roll libxfs_trans_roll
|
||||
#define xfs_trim_extent libxfs_trim_extent
|
||||
|
||||
+#define xfs_update_secondary_sbs libxfs_update_secondary_sbs
|
||||
+
|
||||
#define xfs_validate_stripe_geometry libxfs_validate_stripe_geometry
|
||||
#define xfs_verify_agbno libxfs_verify_agbno
|
||||
#define xfs_verify_agino libxfs_verify_agino
|
||||
diff --git a/repair/globals.c b/repair/globals.c
|
||||
index 506a4e7..f8d4f1e 100644
|
||||
--- a/repair/globals.c
|
||||
+++ b/repair/globals.c
|
||||
@@ -48,6 +48,7 @@ char *rt_name; /* Name of realtime device */
|
||||
int rt_spec; /* Realtime dev specified as option */
|
||||
int convert_lazy_count; /* Convert lazy-count mode on/off */
|
||||
int lazy_count; /* What to set if to if converting */
|
||||
+bool features_changed; /* did we change superblock feature bits? */
|
||||
bool add_inobtcount; /* add inode btree counts to AGI */
|
||||
bool add_bigtime; /* add support for timestamps up to 2486 */
|
||||
|
||||
diff --git a/repair/globals.h b/repair/globals.h
|
||||
index 929b82b..0f98bd2 100644
|
||||
--- a/repair/globals.h
|
||||
+++ b/repair/globals.h
|
||||
@@ -89,6 +89,7 @@ extern char *rt_name; /* Name of realtime device */
|
||||
extern int rt_spec; /* Realtime dev specified as option */
|
||||
extern int convert_lazy_count; /* Convert lazy-count mode on/off */
|
||||
extern int lazy_count; /* What to set if to if converting */
|
||||
+extern bool features_changed; /* did we change superblock feature bits? */
|
||||
extern bool add_inobtcount; /* add inode btree counts to AGI */
|
||||
extern bool add_bigtime; /* add support for timestamps up to 2486 */
|
||||
|
||||
diff --git a/repair/phase2.c b/repair/phase2.c
|
||||
index 32ffe18..ab53ee0 100644
|
||||
--- a/repair/phase2.c
|
||||
+++ b/repair/phase2.c
|
||||
@@ -216,6 +216,8 @@ upgrade_filesystem(
|
||||
}
|
||||
if (bp)
|
||||
libxfs_buf_relse(bp);
|
||||
+
|
||||
+ features_changed = true;
|
||||
}
|
||||
|
||||
/*
|
||||
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
|
||||
index 38406ee..e44aa40 100644
|
||||
--- a/repair/xfs_repair.c
|
||||
+++ b/repair/xfs_repair.c
|
||||
@@ -1298,6 +1298,21 @@ _("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\
|
||||
libxfs_buf_relse(sbp);
|
||||
|
||||
/*
|
||||
+ * If we upgraded V5 filesystem features, we need to update the
|
||||
+ * secondary superblocks to include the new feature bits. Don't set
|
||||
+ * NEEDSREPAIR on the secondaries.
|
||||
+ */
|
||||
+ if (features_changed) {
|
||||
+ mp->m_sb.sb_features_incompat &=
|
||||
+ ~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
|
||||
+ error = -libxfs_update_secondary_sbs(mp);
|
||||
+ if (error)
|
||||
+ do_error(_("upgrading features of secondary supers"));
|
||||
+ mp->m_sb.sb_features_incompat |=
|
||||
+ XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
* Done. Flush all cached buffers and inodes first to ensure all
|
||||
* verifiers are run (where we discover the max metadata LSN), reformat
|
||||
* the log if necessary and unmount.
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,41 +0,0 @@
|
||||
From 2e9720d51a1e9efa6535b540f3c9ff88e95aabe9 Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Wed, 27 Apr 2022 23:11:09 -0400
|
||||
Subject: [PATCH] xfs_repair: fix AG header btree level comparisons
|
||||
|
||||
It's not an error if repair encounters a btree with the maximal
|
||||
height, so don't print warnings. Also, we don't allow zero-height
|
||||
btrees.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
repair/scan.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/repair/scan.c b/repair/scan.c
|
||||
index 909c449..e2d281a 100644
|
||||
--- a/repair/scan.c
|
||||
+++ b/repair/scan.c
|
||||
@@ -2297,7 +2297,7 @@ validate_agf(
|
||||
priv.nr_blocks = 0;
|
||||
|
||||
levels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
|
||||
- if (levels >= XFS_BTREE_MAXLEVELS) {
|
||||
+ if (levels == 0 || levels > XFS_BTREE_MAXLEVELS) {
|
||||
do_warn(_("bad levels %u for rmapbt root, agno %d\n"),
|
||||
levels, agno);
|
||||
rmap_avoid_check();
|
||||
@@ -2323,7 +2323,7 @@ validate_agf(
|
||||
unsigned int levels;
|
||||
|
||||
levels = be32_to_cpu(agf->agf_refcount_level);
|
||||
- if (levels >= XFS_BTREE_MAXLEVELS) {
|
||||
+ if (levels == 0 || levels > XFS_BTREE_MAXLEVELS) {
|
||||
do_warn(_("bad levels %u for refcountbt root, agno %d\n"),
|
||||
levels, agno);
|
||||
refcount_avoid_check();
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,47 +0,0 @@
|
||||
From 0571c857fe326141e35162f5a05e6b89789840bf Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Thu, 28 Apr 2022 15:39:02 -0400
|
||||
Subject: [PATCH] xfs: fix maxlevels comparisons in the btree staging code
|
||||
|
||||
Source kernel commit: 78e8ec83a404d63dcc86b251f42e4ee8aff27465
|
||||
|
||||
The btree geometry computation function has an off-by-one error in that
|
||||
it does not allow maximally tall btrees (nlevels == XFS_BTREE_MAXLEVELS).
|
||||
This can result in repairs failing unnecessarily on very fragmented
|
||||
filesystems. Subsequent patches to remove MAXLEVELS usage in favor of
|
||||
the per-btree type computations will make this a much more likely
|
||||
occurrence.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Chandan Babu R <chandan.babu@oracle.com>
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
libxfs/xfs_btree_staging.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/libxfs/xfs_btree_staging.c b/libxfs/xfs_btree_staging.c
|
||||
index 146d247..daf9979 100644
|
||||
--- a/libxfs/xfs_btree_staging.c
|
||||
+++ b/libxfs/xfs_btree_staging.c
|
||||
@@ -662,7 +662,7 @@ xfs_btree_bload_compute_geometry(
|
||||
xfs_btree_bload_ensure_slack(cur, &bbl->node_slack, 1);
|
||||
|
||||
bbl->nr_records = nr_this_level = nr_records;
|
||||
- for (cur->bc_nlevels = 1; cur->bc_nlevels < XFS_BTREE_MAXLEVELS;) {
|
||||
+ for (cur->bc_nlevels = 1; cur->bc_nlevels <= XFS_BTREE_MAXLEVELS;) {
|
||||
uint64_t level_blocks;
|
||||
uint64_t dontcare64;
|
||||
unsigned int level = cur->bc_nlevels - 1;
|
||||
@@ -724,7 +724,7 @@ xfs_btree_bload_compute_geometry(
|
||||
nr_this_level = level_blocks;
|
||||
}
|
||||
|
||||
- if (cur->bc_nlevels == XFS_BTREE_MAXLEVELS)
|
||||
+ if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS)
|
||||
return -EOVERFLOW;
|
||||
|
||||
bbl->btree_height = cur->bc_nlevels;
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,52 +0,0 @@
|
||||
From b79218242b786a2c02bcac9f53fdae45e2e61e90 Mon Sep 17 00:00:00 2001
|
||||
From: Brian Foster <bfoster@redhat.com>
|
||||
Date: Thu, 28 Apr 2022 15:39:02 -0400
|
||||
Subject: [PATCH] xfs: fold perag loop iteration logic into helper function
|
||||
|
||||
Source kernel commit: bf2307b195135ed9c95eebb38920d8bd41843092
|
||||
|
||||
Fold the loop iteration logic into a helper in preparation for
|
||||
further fixups. No functional change in this patch.
|
||||
|
||||
Signed-off-by: Brian Foster <bfoster@redhat.com>
|
||||
Reviewed-by: Dave Chinner <dchinner@redhat.com>
|
||||
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
libxfs/xfs_ag.h | 16 +++++++++++++---
|
||||
1 file changed, 13 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/libxfs/xfs_ag.h b/libxfs/xfs_ag.h
|
||||
index 2522f76..95570df 100644
|
||||
--- a/libxfs/xfs_ag.h
|
||||
+++ b/libxfs/xfs_ag.h
|
||||
@@ -126,12 +126,22 @@ void xfs_perag_put(struct xfs_perag *pag);
|
||||
* for_each_perag_from() because they terminate at sb_agcount where there are
|
||||
* no perag structures in tree beyond end_agno.
|
||||
*/
|
||||
+static inline struct xfs_perag *
|
||||
+xfs_perag_next(
|
||||
+ struct xfs_perag *pag,
|
||||
+ xfs_agnumber_t *next_agno)
|
||||
+{
|
||||
+ struct xfs_mount *mp = pag->pag_mount;
|
||||
+
|
||||
+ *next_agno = pag->pag_agno + 1;
|
||||
+ xfs_perag_put(pag);
|
||||
+ return xfs_perag_get(mp, *next_agno);
|
||||
+}
|
||||
+
|
||||
#define for_each_perag_range(mp, next_agno, end_agno, pag) \
|
||||
for ((pag) = xfs_perag_get((mp), (next_agno)); \
|
||||
(pag) != NULL && (next_agno) <= (end_agno); \
|
||||
- (next_agno) = (pag)->pag_agno + 1, \
|
||||
- xfs_perag_put(pag), \
|
||||
- (pag) = xfs_perag_get((mp), (next_agno)))
|
||||
+ (pag) = xfs_perag_next((pag), &(next_agno)))
|
||||
|
||||
#define for_each_perag_from(mp, next_agno, pag) \
|
||||
for_each_perag_range((mp), (next_agno), (mp)->m_sb.sb_agcount, (pag))
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,58 +0,0 @@
|
||||
From 02ff0b2b4c117f33f79500815a9322fe987a4bf5 Mon Sep 17 00:00:00 2001
|
||||
From: Brian Foster <bfoster@redhat.com>
|
||||
Date: Thu, 28 Apr 2022 15:39:02 -0400
|
||||
Subject: [PATCH] xfs: rename the next_agno perag iteration variable
|
||||
|
||||
Source kernel commit: f1788b5e5ee25bedf00bb4d25f82b93820d61189
|
||||
|
||||
Rename the next_agno variable to be consistent across the several
|
||||
iteration macros and shorten line length.
|
||||
|
||||
Signed-off-by: Brian Foster <bfoster@redhat.com>
|
||||
Reviewed-by: Dave Chinner <dchinner@redhat.com>
|
||||
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
libxfs/xfs_ag.h | 18 +++++++++---------
|
||||
1 file changed, 9 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/libxfs/xfs_ag.h b/libxfs/xfs_ag.h
|
||||
index 95570df..9cd0669 100644
|
||||
--- a/libxfs/xfs_ag.h
|
||||
+++ b/libxfs/xfs_ag.h
|
||||
@@ -129,22 +129,22 @@ void xfs_perag_put(struct xfs_perag *pag);
|
||||
static inline struct xfs_perag *
|
||||
xfs_perag_next(
|
||||
struct xfs_perag *pag,
|
||||
- xfs_agnumber_t *next_agno)
|
||||
+ xfs_agnumber_t *agno)
|
||||
{
|
||||
struct xfs_mount *mp = pag->pag_mount;
|
||||
|
||||
- *next_agno = pag->pag_agno + 1;
|
||||
+ *agno = pag->pag_agno + 1;
|
||||
xfs_perag_put(pag);
|
||||
- return xfs_perag_get(mp, *next_agno);
|
||||
+ return xfs_perag_get(mp, *agno);
|
||||
}
|
||||
|
||||
-#define for_each_perag_range(mp, next_agno, end_agno, pag) \
|
||||
- for ((pag) = xfs_perag_get((mp), (next_agno)); \
|
||||
- (pag) != NULL && (next_agno) <= (end_agno); \
|
||||
- (pag) = xfs_perag_next((pag), &(next_agno)))
|
||||
+#define for_each_perag_range(mp, agno, end_agno, pag) \
|
||||
+ for ((pag) = xfs_perag_get((mp), (agno)); \
|
||||
+ (pag) != NULL && (agno) <= (end_agno); \
|
||||
+ (pag) = xfs_perag_next((pag), &(agno)))
|
||||
|
||||
-#define for_each_perag_from(mp, next_agno, pag) \
|
||||
- for_each_perag_range((mp), (next_agno), (mp)->m_sb.sb_agcount, (pag))
|
||||
+#define for_each_perag_from(mp, agno, pag) \
|
||||
+ for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount, (pag))
|
||||
|
||||
|
||||
#define for_each_perag(mp, agno, pag) \
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,47 +0,0 @@
|
||||
From 6c18fde82cd02e550fb0c095bd6c6908dcc77747 Mon Sep 17 00:00:00 2001
|
||||
From: Brian Foster <bfoster@redhat.com>
|
||||
Date: Thu, 28 Apr 2022 15:39:03 -0400
|
||||
Subject: [PATCH] xfs: terminate perag iteration reliably on agcount
|
||||
|
||||
Source kernel commit: 8ed004eb9d07a5d6114db3e97a166707c186262d
|
||||
|
||||
The for_each_perag_from() iteration macro relies on sb_agcount to
|
||||
process every perag currently within EOFS from a given starting
|
||||
point. It's perfectly valid to have perag structures beyond
|
||||
sb_agcount, however, such as if a growfs is in progress. If a perag
|
||||
loop happens to race with growfs in this manner, it will actually
|
||||
attempt to process the post-EOFS perag where ->pag_agno ==
|
||||
sb_agcount. This is reproduced by xfs/104 and manifests as the
|
||||
following assert failure in superblock write verifier context:
|
||||
|
||||
XFS: Assertion failed: agno < mp->m_sb.sb_agcount, file: fs/xfs/libxfs/xfs_types.c, line: 22
|
||||
|
||||
Update the corresponding macro to only process perags that are
|
||||
within the current sb_agcount.
|
||||
|
||||
Fixes: 58d43a7e3263 ("xfs: pass perags around in fsmap data dev functions")
|
||||
Signed-off-by: Brian Foster <bfoster@redhat.com>
|
||||
Reviewed-by: Dave Chinner <dchinner@redhat.com>
|
||||
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
libxfs/xfs_ag.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/libxfs/xfs_ag.h b/libxfs/xfs_ag.h
|
||||
index 9cd0669..fae2a38 100644
|
||||
--- a/libxfs/xfs_ag.h
|
||||
+++ b/libxfs/xfs_ag.h
|
||||
@@ -144,7 +144,7 @@ xfs_perag_next(
|
||||
(pag) = xfs_perag_next((pag), &(agno)))
|
||||
|
||||
#define for_each_perag_from(mp, agno, pag) \
|
||||
- for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount, (pag))
|
||||
+ for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag))
|
||||
|
||||
|
||||
#define for_each_perag(mp, agno, pag) \
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,88 +0,0 @@
|
||||
From 9619d9e715b2eba7c39683bcbc721d3954275eb4 Mon Sep 17 00:00:00 2001
|
||||
From: Brian Foster <bfoster@redhat.com>
|
||||
Date: Thu, 28 Apr 2022 15:39:03 -0400
|
||||
Subject: [PATCH] xfs: fix perag reference leak on iteration race with growfs
|
||||
|
||||
Source kernel commit: 892a666fafa19ab04b5e948f6c92f98f1dafb489
|
||||
|
||||
The for_each_perag*() set of macros are hacky in that some (i.e.
|
||||
those based on sb_agcount) rely on the assumption that perag
|
||||
iteration terminates naturally with a NULL perag at the specified
|
||||
end_agno. Others allow for the final AG to have a valid perag and
|
||||
require the calling function to clean up any potential leftover
|
||||
xfs_perag reference on termination of the loop.
|
||||
|
||||
Aside from providing a subtly inconsistent interface, the former
|
||||
variant is racy with growfs because growfs can create discoverable
|
||||
post-eofs perags before the final superblock update that completes
|
||||
the grow operation and increases sb_agcount. This leads to the
|
||||
following assert failure (reproduced by xfs/104) in the perag free
|
||||
path during unmount:
|
||||
|
||||
XFS: Assertion failed: atomic_read(&pag->pag_ref) == 0, file: fs/xfs/libxfs/xfs_ag.c, line: 195
|
||||
|
||||
This occurs because one of the many for_each_perag() loops in the
|
||||
code that is expected to terminate with a NULL pag (and thus has no
|
||||
post-loop xfs_perag_put() check) raced with a growfs and found a
|
||||
non-NULL post-EOFS perag, but terminated naturally based on the
|
||||
end_agno check without releasing the post-EOFS perag.
|
||||
|
||||
Rework the iteration logic to lift the agno check from the main for
|
||||
loop conditional to the iteration helper function. The for loop now
|
||||
purely terminates on a NULL pag and xfs_perag_next() avoids taking a
|
||||
reference to any perag beyond end_agno in the first place.
|
||||
|
||||
Fixes: f250eedcf762 ("xfs: make for_each_perag... a first class citizen")
|
||||
Signed-off-by: Brian Foster <bfoster@redhat.com>
|
||||
Reviewed-by: Dave Chinner <dchinner@redhat.com>
|
||||
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
libxfs/xfs_ag.h | 16 ++++++----------
|
||||
1 file changed, 6 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/libxfs/xfs_ag.h b/libxfs/xfs_ag.h
|
||||
index fae2a38..e411d51 100644
|
||||
--- a/libxfs/xfs_ag.h
|
||||
+++ b/libxfs/xfs_ag.h
|
||||
@@ -118,30 +118,26 @@ void xfs_perag_put(struct xfs_perag *pag);
|
||||
|
||||
/*
|
||||
* Perag iteration APIs
|
||||
- *
|
||||
- * XXX: for_each_perag_range() usage really needs an iterator to clean up when
|
||||
- * we terminate at end_agno because we may have taken a reference to the perag
|
||||
- * beyond end_agno. Right now callers have to be careful to catch and clean that
|
||||
- * up themselves. This is not necessary for the callers of for_each_perag() and
|
||||
- * for_each_perag_from() because they terminate at sb_agcount where there are
|
||||
- * no perag structures in tree beyond end_agno.
|
||||
*/
|
||||
static inline struct xfs_perag *
|
||||
xfs_perag_next(
|
||||
struct xfs_perag *pag,
|
||||
- xfs_agnumber_t *agno)
|
||||
+ xfs_agnumber_t *agno,
|
||||
+ xfs_agnumber_t end_agno)
|
||||
{
|
||||
struct xfs_mount *mp = pag->pag_mount;
|
||||
|
||||
*agno = pag->pag_agno + 1;
|
||||
xfs_perag_put(pag);
|
||||
+ if (*agno > end_agno)
|
||||
+ return NULL;
|
||||
return xfs_perag_get(mp, *agno);
|
||||
}
|
||||
|
||||
#define for_each_perag_range(mp, agno, end_agno, pag) \
|
||||
for ((pag) = xfs_perag_get((mp), (agno)); \
|
||||
- (pag) != NULL && (agno) <= (end_agno); \
|
||||
- (pag) = xfs_perag_next((pag), &(agno)))
|
||||
+ (pag) != NULL; \
|
||||
+ (pag) = xfs_perag_next((pag), &(agno), (end_agno)))
|
||||
|
||||
#define for_each_perag_from(mp, agno, pag) \
|
||||
for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag))
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,91 +0,0 @@
|
||||
From b6a7b627b1211f87e3bac3dc0111d056e70aa773 Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Tue, 17 May 2022 22:48:12 -0400
|
||||
Subject: [PATCH] mkfs: fix missing validation of -l size against maximum
|
||||
internal log size
|
||||
|
||||
If a sysadmin specifies a log size explicitly, we don't actually check
|
||||
that against the maximum internal log size that we compute for the
|
||||
default log size computation. We're going to add more validation soon,
|
||||
so refactor the max internal log blocks into a common variable and
|
||||
add a check.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
mkfs/xfs_mkfs.c | 36 ++++++++++++++++++++++--------------
|
||||
1 file changed, 22 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
|
||||
index b7e335f..f7006af 100644
|
||||
--- a/mkfs/xfs_mkfs.c
|
||||
+++ b/mkfs/xfs_mkfs.c
|
||||
@@ -3268,6 +3268,7 @@ calculate_log_size(
|
||||
{
|
||||
struct xfs_sb *sbp = &mp->m_sb;
|
||||
int min_logblocks;
|
||||
+ int max_logblocks; /* absolute max for this AG */
|
||||
struct xfs_mount mount;
|
||||
|
||||
/* we need a temporary mount to calculate the minimum log size. */
|
||||
@@ -3307,6 +3308,18 @@ _("external log device size %lld blocks too small, must be at least %lld blocks\
|
||||
return;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Make sure the log fits wholly within an AG
|
||||
+ *
|
||||
+ * XXX: If agf->freeblks ends up as 0 because the log uses all
|
||||
+ * the free space, it causes the kernel all sorts of problems
|
||||
+ * with per-ag reservations. Right now just back it off one
|
||||
+ * block, but there's a whole can of worms here that needs to be
|
||||
+ * opened to decide what is the valid maximum size of a log in
|
||||
+ * an AG.
|
||||
+ */
|
||||
+ max_logblocks = libxfs_alloc_ag_max_usable(mp) - 1;
|
||||
+
|
||||
/* internal log - if no size specified, calculate automatically */
|
||||
if (!cfg->logblocks) {
|
||||
if (cfg->dblocks < GIGABYTES(1, cfg->blocklog)) {
|
||||
@@ -3332,21 +3345,9 @@ _("external log device size %lld blocks too small, must be at least %lld blocks\
|
||||
cfg->logblocks = cfg->logblocks >> cfg->blocklog;
|
||||
}
|
||||
|
||||
- /* Ensure the chosen size meets minimum log size requirements */
|
||||
+ /* Ensure the chosen size fits within log size requirements */
|
||||
cfg->logblocks = max(min_logblocks, cfg->logblocks);
|
||||
-
|
||||
- /*
|
||||
- * Make sure the log fits wholly within an AG
|
||||
- *
|
||||
- * XXX: If agf->freeblks ends up as 0 because the log uses all
|
||||
- * the free space, it causes the kernel all sorts of problems
|
||||
- * with per-ag reservations. Right now just back it off one
|
||||
- * block, but there's a whole can of worms here that needs to be
|
||||
- * opened to decide what is the valid maximum size of a log in
|
||||
- * an AG.
|
||||
- */
|
||||
- cfg->logblocks = min(cfg->logblocks,
|
||||
- libxfs_alloc_ag_max_usable(mp) - 1);
|
||||
+ cfg->logblocks = min(cfg->logblocks, max_logblocks);
|
||||
|
||||
/* and now clamp the size to the maximum supported size */
|
||||
cfg->logblocks = min(cfg->logblocks, XFS_MAX_LOG_BLOCKS);
|
||||
@@ -3354,6 +3355,13 @@ _("external log device size %lld blocks too small, must be at least %lld blocks\
|
||||
cfg->logblocks = XFS_MAX_LOG_BYTES >> cfg->blocklog;
|
||||
|
||||
validate_log_size(cfg->logblocks, cfg->blocklog, min_logblocks);
|
||||
+ } else if (cfg->logblocks > max_logblocks) {
|
||||
+ /* check specified log size */
|
||||
+ fprintf(stderr,
|
||||
+_("internal log size %lld too large, must be less than %d\n"),
|
||||
+ (long long)cfg->logblocks,
|
||||
+ max_logblocks);
|
||||
+ usage();
|
||||
}
|
||||
|
||||
if (cfg->logblocks > sbp->sb_agblocks - libxfs_prealloc_blocks(mp)) {
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,108 +0,0 @@
|
||||
From 8d1bff2be3360572fbee9ed83e0d1c86af1614c5 Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Tue, 17 May 2022 22:48:12 -0400
|
||||
Subject: [PATCH] mkfs: reduce internal log size when log stripe units are in
|
||||
play
|
||||
|
||||
Currently, one can feed mkfs a combination of options like this:
|
||||
|
||||
$ truncate -s 6366g /tmp/a ; mkfs.xfs -f /tmp/a -d agcount=3200 -d su=256k,sw=4
|
||||
meta-data=/tmp/a isize=512 agcount=3200, agsize=521536 blks
|
||||
= sectsz=512 attr=2, projid32bit=1
|
||||
= crc=1 finobt=1, sparse=1, rmapbt=0
|
||||
= reflink=1 bigtime=0 inobtcount=0
|
||||
data = bsize=4096 blocks=1668808704, imaxpct=5
|
||||
= sunit=64 swidth=256 blks
|
||||
naming =version 2 bsize=4096 ascii-ci=0, ftype=1
|
||||
log =internal log bsize=4096 blocks=521536, version=2
|
||||
= sectsz=512 sunit=64 blks, lazy-count=1
|
||||
realtime =none extsz=4096 blocks=0, rtextents=0
|
||||
Metadata corruption detected at 0x55e88052c6b6, xfs_agf block 0x1/0x200
|
||||
libxfs_bwrite: write verifier failed on xfs_agf bno 0x1/0x1
|
||||
mkfs.xfs: writing AG headers failed, err=117
|
||||
|
||||
The format fails because the internal log size sizing algorithm
|
||||
specifies a log size of 521492 blocks to avoid taking all the space in
|
||||
the AG, but align_log_size sees the stripe unit and rounds that up to
|
||||
the next stripe unit, which is 521536 blocks.
|
||||
|
||||
Fix this problem by rounding the log size down if rounding up would
|
||||
result in a log that consumes more space in the AG than we allow.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
mkfs/xfs_mkfs.c | 19 +++++++++++--------
|
||||
1 file changed, 11 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
|
||||
index e11b39d..eb4d7fa 100644
|
||||
--- a/mkfs/xfs_mkfs.c
|
||||
+++ b/mkfs/xfs_mkfs.c
|
||||
@@ -3180,9 +3180,10 @@ sb_set_features(
|
||||
static void
|
||||
align_log_size(
|
||||
struct mkfs_params *cfg,
|
||||
- int sunit)
|
||||
+ int sunit,
|
||||
+ int max_logblocks)
|
||||
{
|
||||
- uint64_t tmp_logblocks;
|
||||
+ uint64_t tmp_logblocks;
|
||||
|
||||
/* nothing to do if it's already aligned. */
|
||||
if ((cfg->logblocks % sunit) == 0)
|
||||
@@ -3199,7 +3200,8 @@ _("log size %lld is not a multiple of the log stripe unit %d\n"),
|
||||
|
||||
/* If the log is too large, round down instead of round up */
|
||||
if ((tmp_logblocks > XFS_MAX_LOG_BLOCKS) ||
|
||||
- ((tmp_logblocks << cfg->blocklog) > XFS_MAX_LOG_BYTES)) {
|
||||
+ ((tmp_logblocks << cfg->blocklog) > XFS_MAX_LOG_BYTES) ||
|
||||
+ tmp_logblocks > max_logblocks) {
|
||||
tmp_logblocks = (cfg->logblocks / sunit) * sunit;
|
||||
}
|
||||
cfg->logblocks = tmp_logblocks;
|
||||
@@ -3213,7 +3215,8 @@ static void
|
||||
align_internal_log(
|
||||
struct mkfs_params *cfg,
|
||||
struct xfs_mount *mp,
|
||||
- int sunit)
|
||||
+ int sunit,
|
||||
+ int max_logblocks)
|
||||
{
|
||||
uint64_t logend;
|
||||
|
||||
@@ -3231,7 +3234,7 @@ _("Due to stripe alignment, the internal log start (%lld) cannot be aligned\n"
|
||||
}
|
||||
|
||||
/* round up/down the log size now */
|
||||
- align_log_size(cfg, sunit);
|
||||
+ align_log_size(cfg, sunit, max_logblocks);
|
||||
|
||||
/* check the aligned log still starts and ends in the same AG. */
|
||||
logend = cfg->logstart + cfg->logblocks - 1;
|
||||
@@ -3309,7 +3312,7 @@ _("external log device size %lld blocks too small, must be at least %lld blocks\
|
||||
cfg->logstart = 0;
|
||||
cfg->logagno = 0;
|
||||
if (cfg->lsunit)
|
||||
- align_log_size(cfg, cfg->lsunit);
|
||||
+ align_log_size(cfg, cfg->lsunit, XFS_MAX_LOG_BLOCKS);
|
||||
|
||||
validate_log_size(cfg->logblocks, cfg->blocklog, min_logblocks);
|
||||
return;
|
||||
@@ -3386,9 +3389,9 @@ _("log ag number %lld too large, must be less than %lld\n"),
|
||||
* Align the logstart at stripe unit boundary.
|
||||
*/
|
||||
if (cfg->lsunit) {
|
||||
- align_internal_log(cfg, mp, cfg->lsunit);
|
||||
+ align_internal_log(cfg, mp, cfg->lsunit, max_logblocks);
|
||||
} else if (cfg->dsunit) {
|
||||
- align_internal_log(cfg, mp, cfg->dsunit);
|
||||
+ align_internal_log(cfg, mp, cfg->dsunit, max_logblocks);
|
||||
}
|
||||
validate_log_size(cfg->logblocks, cfg->blocklog, min_logblocks);
|
||||
}
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,105 +0,0 @@
|
||||
From 1b580a773a65eb9b2fe7f777dd6900c0d6e9a7b3 Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Tue, 17 May 2022 22:48:13 -0400
|
||||
Subject: [PATCH] mkfs: don't let internal logs bump the root dir inode chunk
|
||||
to AG 1
|
||||
|
||||
Currently, we don't let an internal log consume every last block in an
|
||||
AG. According to the comment, we're doing this to avoid tripping AGF
|
||||
verifiers if freeblks==0, but on a modern filesystem this isn't
|
||||
sufficient to avoid problems because we need to have enough space in the
|
||||
AG to allocate an aligned root inode chunk, if it should be the case
|
||||
that the log also ends up in AG 0:
|
||||
|
||||
$ truncate -s 6366g /tmp/a ; mkfs.xfs -f /tmp/a -d agcount=3200 -l agnum=0
|
||||
meta-data=/tmp/a isize=512 agcount=3200, agsize=521503 blks
|
||||
= sectsz=512 attr=2, projid32bit=1
|
||||
= crc=1 finobt=1, sparse=1, rmapbt=0
|
||||
= reflink=1 bigtime=0 inobtcount=0
|
||||
data = bsize=4096 blocks=1668808704, imaxpct=5
|
||||
= sunit=0 swidth=0 blks
|
||||
naming =version 2 bsize=4096 ascii-ci=0, ftype=1
|
||||
log =internal log bsize=4096 blocks=521492, version=2
|
||||
= sectsz=512 sunit=0 blks, lazy-count=1
|
||||
realtime =none extsz=4096 blocks=0, rtextents=0
|
||||
mkfs.xfs: root inode created in AG 1, not AG 0
|
||||
|
||||
Therefore, modify the maximum internal log size calculation to constrain
|
||||
the maximum internal log size so that the aligned inode chunk allocation
|
||||
will always succeed.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
mkfs/xfs_mkfs.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 47 insertions(+)
|
||||
|
||||
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
|
||||
index eb4d7fa..0b1fb74 100644
|
||||
--- a/mkfs/xfs_mkfs.c
|
||||
+++ b/mkfs/xfs_mkfs.c
|
||||
@@ -3271,6 +3271,49 @@ validate_log_size(uint64_t logblocks, int blocklog, int min_logblocks)
|
||||
}
|
||||
|
||||
static void
|
||||
+adjust_ag0_internal_logblocks(
|
||||
+ struct mkfs_params *cfg,
|
||||
+ struct xfs_mount *mp,
|
||||
+ int min_logblocks,
|
||||
+ int *max_logblocks)
|
||||
+{
|
||||
+ int backoff = 0;
|
||||
+ int ichunk_blocks;
|
||||
+
|
||||
+ /*
|
||||
+ * mkfs will trip over the write verifiers if the log is allocated in
|
||||
+ * AG 0 and consumes enough space that we cannot allocate a non-sparse
|
||||
+ * inode chunk for the root directory. The inode allocator requires
|
||||
+ * that the AG have enough free space for the chunk itself plus enough
|
||||
+ * to fix up the freelist with aligned blocks if we need to fill the
|
||||
+ * allocation from the AGFL.
|
||||
+ */
|
||||
+ ichunk_blocks = XFS_INODES_PER_CHUNK * cfg->inodesize >> cfg->blocklog;
|
||||
+ backoff = ichunk_blocks * 4;
|
||||
+
|
||||
+ /*
|
||||
+ * We try to align inode allocations to the data device stripe unit,
|
||||
+ * so ensure there's enough space to perform an aligned allocation.
|
||||
+ * The inode geometry structure isn't set up yet, so compute this by
|
||||
+ * hand.
|
||||
+ */
|
||||
+ backoff = max(backoff, cfg->dsunit * 2);
|
||||
+
|
||||
+ *max_logblocks -= backoff;
|
||||
+
|
||||
+ /* If the specified log size is too big, complain. */
|
||||
+ if (cli_opt_set(&lopts, L_SIZE) && cfg->logblocks > *max_logblocks) {
|
||||
+ fprintf(stderr,
|
||||
+_("internal log size %lld too large, must be less than %d\n"),
|
||||
+ (long long)cfg->logblocks,
|
||||
+ *max_logblocks);
|
||||
+ usage();
|
||||
+ }
|
||||
+
|
||||
+ cfg->logblocks = min(cfg->logblocks, *max_logblocks);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
calculate_log_size(
|
||||
struct mkfs_params *cfg,
|
||||
struct cli_params *cli,
|
||||
@@ -3382,6 +3425,10 @@ _("log ag number %lld too large, must be less than %lld\n"),
|
||||
} else
|
||||
cfg->logagno = (xfs_agnumber_t)(sbp->sb_agcount / 2);
|
||||
|
||||
+ if (cfg->logagno == 0)
|
||||
+ adjust_ag0_internal_logblocks(cfg, mp, min_logblocks,
|
||||
+ &max_logblocks);
|
||||
+
|
||||
cfg->logstart = XFS_AGB_TO_FSB(mp, cfg->logagno,
|
||||
libxfs_prealloc_blocks(mp));
|
||||
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,73 +0,0 @@
|
||||
From 93a199f21dd12fdef4cbcb6821e58e2c301727e2 Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Tue, 17 May 2022 22:48:13 -0400
|
||||
Subject: [PATCH] mkfs: improve log extent validation
|
||||
|
||||
Use the standard libxfs fsblock verifiers to check the start and end of
|
||||
the internal log. The current code does not catch the case of a
|
||||
(segmented) fsblock that is beyond agf_blocks but not so large to change
|
||||
the agno part of the segmented fsblock.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
libxfs/libxfs_api_defs.h | 1 +
|
||||
mkfs/xfs_mkfs.c | 10 ++++------
|
||||
2 files changed, 5 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
|
||||
index 8abbd23..370ad8b 100644
|
||||
--- a/libxfs/libxfs_api_defs.h
|
||||
+++ b/libxfs/libxfs_api_defs.h
|
||||
@@ -208,6 +208,7 @@
|
||||
#define xfs_verify_agino libxfs_verify_agino
|
||||
#define xfs_verify_cksum libxfs_verify_cksum
|
||||
#define xfs_verify_dir_ino libxfs_verify_dir_ino
|
||||
+#define xfs_verify_fsbext libxfs_verify_fsbext
|
||||
#define xfs_verify_fsbno libxfs_verify_fsbno
|
||||
#define xfs_verify_ino libxfs_verify_ino
|
||||
#define xfs_verify_rtbno libxfs_verify_rtbno
|
||||
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
|
||||
index 0b1fb74..b932aca 100644
|
||||
--- a/mkfs/xfs_mkfs.c
|
||||
+++ b/mkfs/xfs_mkfs.c
|
||||
@@ -3218,15 +3218,13 @@ align_internal_log(
|
||||
int sunit,
|
||||
int max_logblocks)
|
||||
{
|
||||
- uint64_t logend;
|
||||
-
|
||||
/* round up log start if necessary */
|
||||
if ((cfg->logstart % sunit) != 0)
|
||||
cfg->logstart = ((cfg->logstart + (sunit - 1)) / sunit) * sunit;
|
||||
|
||||
/* If our log start overlaps the next AG's metadata, fail. */
|
||||
- if (XFS_FSB_TO_AGBNO(mp, cfg->logstart) <= XFS_AGFL_BLOCK(mp)) {
|
||||
- fprintf(stderr,
|
||||
+ if (!libxfs_verify_fsbno(mp, cfg->logstart)) {
|
||||
+ fprintf(stderr,
|
||||
_("Due to stripe alignment, the internal log start (%lld) cannot be aligned\n"
|
||||
"within an allocation group.\n"),
|
||||
(long long) cfg->logstart);
|
||||
@@ -3237,8 +3235,7 @@ _("Due to stripe alignment, the internal log start (%lld) cannot be aligned\n"
|
||||
align_log_size(cfg, sunit, max_logblocks);
|
||||
|
||||
/* check the aligned log still starts and ends in the same AG. */
|
||||
- logend = cfg->logstart + cfg->logblocks - 1;
|
||||
- if (XFS_FSB_TO_AGNO(mp, cfg->logstart) != XFS_FSB_TO_AGNO(mp, logend)) {
|
||||
+ if (!libxfs_verify_fsbext(mp, cfg->logstart, cfg->logblocks)) {
|
||||
fprintf(stderr,
|
||||
_("Due to stripe alignment, the internal log size (%lld) is too large.\n"
|
||||
"Must fit within an allocation group.\n"),
|
||||
@@ -3465,6 +3462,7 @@ start_superblock_setup(
|
||||
sbp->sb_agblocks = (xfs_agblock_t)cfg->agsize;
|
||||
sbp->sb_agblklog = (uint8_t)log2_roundup(cfg->agsize);
|
||||
sbp->sb_agcount = (xfs_agnumber_t)cfg->agcount;
|
||||
+ sbp->sb_dblocks = (xfs_rfsblock_t)cfg->dblocks;
|
||||
|
||||
sbp->sb_inodesize = (uint16_t)cfg->inodesize;
|
||||
sbp->sb_inodelog = (uint8_t)cfg->inodelog;
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,130 +0,0 @@
|
||||
From 2b7301269e82e86d9601392d289e38f3f66b1467 Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Tue, 17 May 2022 22:48:13 -0400
|
||||
Subject: [PATCH] xfs_repair: detect v5 featureset mismatches in secondary
|
||||
supers
|
||||
|
||||
Make sure we detect and correct mismatches between the V5 features
|
||||
described in the primary and the secondary superblocks.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
|
||||
[sandeen: add comment about XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR]
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
repair/agheader.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 92 insertions(+)
|
||||
|
||||
diff --git a/repair/agheader.c b/repair/agheader.c
|
||||
index 2af2410..90adc1f 100644
|
||||
--- a/repair/agheader.c
|
||||
+++ b/repair/agheader.c
|
||||
@@ -221,6 +221,96 @@ compare_sb(xfs_mount_t *mp, xfs_sb_t *sb)
|
||||
}
|
||||
|
||||
/*
|
||||
+ * If the fs feature bits on a secondary superblock don't match the
|
||||
+ * primary, we need to update them.
|
||||
+ */
|
||||
+static inline int
|
||||
+check_v5_feature_mismatch(
|
||||
+ struct xfs_mount *mp,
|
||||
+ xfs_agnumber_t agno,
|
||||
+ struct xfs_sb *sb)
|
||||
+{
|
||||
+ bool dirty = false;
|
||||
+
|
||||
+ if (!xfs_sb_version_hascrc(&mp->m_sb) || agno == 0)
|
||||
+ return 0;
|
||||
+
|
||||
+ if (mp->m_sb.sb_features_compat != sb->sb_features_compat) {
|
||||
+ if (no_modify) {
|
||||
+ do_warn(
|
||||
+ _("would fix compat feature mismatch in AG %u super, 0x%x != 0x%x\n"),
|
||||
+ agno, mp->m_sb.sb_features_compat,
|
||||
+ sb->sb_features_compat);
|
||||
+ } else {
|
||||
+ do_warn(
|
||||
+ _("will fix compat feature mismatch in AG %u super, 0x%x != 0x%x\n"),
|
||||
+ agno, mp->m_sb.sb_features_compat,
|
||||
+ sb->sb_features_compat);
|
||||
+ dirty = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Ignore XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR becauses the repair upgrade
|
||||
+ * path sets it only on the primary while upgrading.
|
||||
+ */
|
||||
+ if ((mp->m_sb.sb_features_incompat ^ sb->sb_features_incompat) &
|
||||
+ ~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR) {
|
||||
+ if (no_modify) {
|
||||
+ do_warn(
|
||||
+ _("would fix incompat feature mismatch in AG %u super, 0x%x != 0x%x\n"),
|
||||
+ agno, mp->m_sb.sb_features_incompat,
|
||||
+ sb->sb_features_incompat);
|
||||
+ } else {
|
||||
+ do_warn(
|
||||
+ _("will fix incompat feature mismatch in AG %u super, 0x%x != 0x%x\n"),
|
||||
+ agno, mp->m_sb.sb_features_incompat,
|
||||
+ sb->sb_features_incompat);
|
||||
+ dirty = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (mp->m_sb.sb_features_ro_compat != sb->sb_features_ro_compat) {
|
||||
+ if (no_modify) {
|
||||
+ do_warn(
|
||||
+ _("would fix ro compat feature mismatch in AG %u super, 0x%x != 0x%x\n"),
|
||||
+ agno, mp->m_sb.sb_features_ro_compat,
|
||||
+ sb->sb_features_ro_compat);
|
||||
+ } else {
|
||||
+ do_warn(
|
||||
+ _("will fix ro compat feature mismatch in AG %u super, 0x%x != 0x%x\n"),
|
||||
+ agno, mp->m_sb.sb_features_ro_compat,
|
||||
+ sb->sb_features_ro_compat);
|
||||
+ dirty = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (mp->m_sb.sb_features_log_incompat != sb->sb_features_log_incompat) {
|
||||
+ if (no_modify) {
|
||||
+ do_warn(
|
||||
+ _("would fix log incompat feature mismatch in AG %u super, 0x%x != 0x%x\n"),
|
||||
+ agno, mp->m_sb.sb_features_log_incompat,
|
||||
+ sb->sb_features_log_incompat);
|
||||
+ } else {
|
||||
+ do_warn(
|
||||
+ _("will fix log incompat feature mismatch in AG %u super, 0x%x != 0x%x\n"),
|
||||
+ agno, mp->m_sb.sb_features_log_incompat,
|
||||
+ sb->sb_features_log_incompat);
|
||||
+ dirty = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (!dirty)
|
||||
+ return 0;
|
||||
+
|
||||
+ sb->sb_features_compat = mp->m_sb.sb_features_compat;
|
||||
+ sb->sb_features_ro_compat = mp->m_sb.sb_features_ro_compat;
|
||||
+ sb->sb_features_incompat = mp->m_sb.sb_features_incompat;
|
||||
+ sb->sb_features_log_incompat = mp->m_sb.sb_features_log_incompat;
|
||||
+ return XR_AG_SB_SEC;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
* Possible fields that may have been set at mkfs time,
|
||||
* sb_inoalignmt, sb_unit, sb_width and sb_dirblklog.
|
||||
* The quota inode fields in the secondaries should be zero.
|
||||
@@ -452,6 +542,8 @@ secondary_sb_whack(
|
||||
rval |= XR_AG_SB_SEC;
|
||||
}
|
||||
|
||||
+ rval |= check_v5_feature_mismatch(mp, i, sb);
|
||||
+
|
||||
if (xfs_sb_version_needsrepair(sb)) {
|
||||
if (i == 0) {
|
||||
if (!no_modify)
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,134 +0,0 @@
|
||||
From 5008cbb4b0eaef22e5a0e13a5a2c17457671e34a Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Tue, 17 May 2022 22:48:13 -0400
|
||||
Subject: [PATCH] xfs_repair: check the ftype of dot and dotdot directory
|
||||
entries
|
||||
|
||||
The long-format directory block checking code skips the filetype check
|
||||
for the '.' and '..' entries, even though they're part of the ondisk
|
||||
format. This leads to repair failing to catch subtle corruption at the
|
||||
start of a directory.
|
||||
|
||||
Found by fuzzing bu[0].filetype = zeroes in xfs/386.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
repair/phase6.c | 79 +++++++++++++++++++++++++++++++++++++++------------------
|
||||
1 file changed, 54 insertions(+), 25 deletions(-)
|
||||
|
||||
diff --git a/repair/phase6.c b/repair/phase6.c
|
||||
index 696a642..06232fb 100644
|
||||
--- a/repair/phase6.c
|
||||
+++ b/repair/phase6.c
|
||||
@@ -1412,6 +1412,48 @@ dir2_kill_block(
|
||||
_("directory shrink failed (%d)\n"), error);
|
||||
}
|
||||
|
||||
+static inline void
|
||||
+check_longform_ftype(
|
||||
+ struct xfs_mount *mp,
|
||||
+ struct xfs_inode *ip,
|
||||
+ xfs_dir2_data_entry_t *dep,
|
||||
+ ino_tree_node_t *irec,
|
||||
+ int ino_offset,
|
||||
+ struct dir_hash_tab *hashtab,
|
||||
+ xfs_dir2_dataptr_t addr,
|
||||
+ struct xfs_da_args *da,
|
||||
+ struct xfs_buf *bp)
|
||||
+{
|
||||
+ xfs_ino_t inum = be64_to_cpu(dep->inumber);
|
||||
+ uint8_t dir_ftype;
|
||||
+ uint8_t ino_ftype;
|
||||
+
|
||||
+ if (!xfs_sb_version_hasftype(&mp->m_sb))
|
||||
+ return;
|
||||
+
|
||||
+ dir_ftype = libxfs_dir2_data_get_ftype(mp, dep);
|
||||
+ ino_ftype = get_inode_ftype(irec, ino_offset);
|
||||
+
|
||||
+ if (dir_ftype == ino_ftype)
|
||||
+ return;
|
||||
+
|
||||
+ if (no_modify) {
|
||||
+ do_warn(
|
||||
+_("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
|
||||
+ dir_ftype, ino_ftype,
|
||||
+ ip->i_ino, inum);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ do_warn(
|
||||
+_("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
|
||||
+ dir_ftype, ino_ftype,
|
||||
+ ip->i_ino, inum);
|
||||
+ libxfs_dir2_data_put_ftype(mp, dep, ino_ftype);
|
||||
+ libxfs_dir2_data_log_entry(da, bp, dep);
|
||||
+ dir_hash_update_ftype(hashtab, addr, ino_ftype);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* process a data block, also checks for .. entry
|
||||
* and corrects it to match what we think .. should be
|
||||
@@ -1749,6 +1791,11 @@ longform_dir2_entry_check_data(
|
||||
libxfs_dir2_data_log_entry(&da, bp, dep);
|
||||
}
|
||||
}
|
||||
+
|
||||
+ if (!nbad)
|
||||
+ check_longform_ftype(mp, ip, dep, irec,
|
||||
+ ino_offset, hashtab, addr, &da,
|
||||
+ bp);
|
||||
continue;
|
||||
}
|
||||
ASSERT(no_modify || libxfs_verify_dir_ino(mp, inum));
|
||||
@@ -1777,6 +1824,11 @@ longform_dir2_entry_check_data(
|
||||
libxfs_dir2_data_log_entry(&da, bp, dep);
|
||||
}
|
||||
}
|
||||
+
|
||||
+ if (!nbad)
|
||||
+ check_longform_ftype(mp, ip, dep, irec,
|
||||
+ ino_offset, hashtab, addr, &da,
|
||||
+ bp);
|
||||
*need_dot = 0;
|
||||
continue;
|
||||
}
|
||||
@@ -1787,31 +1839,8 @@ longform_dir2_entry_check_data(
|
||||
continue;
|
||||
|
||||
/* validate ftype field if supported */
|
||||
- if (xfs_sb_version_hasftype(&mp->m_sb)) {
|
||||
- uint8_t dir_ftype;
|
||||
- uint8_t ino_ftype;
|
||||
-
|
||||
- dir_ftype = libxfs_dir2_data_get_ftype(mp, dep);
|
||||
- ino_ftype = get_inode_ftype(irec, ino_offset);
|
||||
-
|
||||
- if (dir_ftype != ino_ftype) {
|
||||
- if (no_modify) {
|
||||
- do_warn(
|
||||
- _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
|
||||
- dir_ftype, ino_ftype,
|
||||
- ip->i_ino, inum);
|
||||
- } else {
|
||||
- do_warn(
|
||||
- _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
|
||||
- dir_ftype, ino_ftype,
|
||||
- ip->i_ino, inum);
|
||||
- libxfs_dir2_data_put_ftype(mp, dep, ino_ftype);
|
||||
- libxfs_dir2_data_log_entry(&da, bp, dep);
|
||||
- dir_hash_update_ftype(hashtab, addr,
|
||||
- ino_ftype);
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
+ check_longform_ftype(mp, ip, dep, irec, ino_offset, hashtab,
|
||||
+ addr, &da, bp);
|
||||
|
||||
/*
|
||||
* check easy case first, regular inode, just bump
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,32 +0,0 @@
|
||||
From 8b4002e0cd0072dd69d478ed662f7cf546bae33b Mon Sep 17 00:00:00 2001
|
||||
From: Pavel Reichl <preichl@redhat.com>
|
||||
Date: Fri, 27 May 2022 16:36:21 -0400
|
||||
Subject: [PATCH] mkfs: Fix memory leak
|
||||
|
||||
'value' is allocated by strdup() in getstr(). It
|
||||
needs to be freed as we do not keep any permanent
|
||||
reference to it.
|
||||
|
||||
Signed-off-by: Pavel Reichl <preichl@redhat.com>
|
||||
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
mkfs/xfs_mkfs.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
|
||||
index 01d2e8c..a37d684 100644
|
||||
--- a/mkfs/xfs_mkfs.c
|
||||
+++ b/mkfs/xfs_mkfs.c
|
||||
@@ -1714,6 +1714,7 @@ naming_opts_parser(
|
||||
} else {
|
||||
cli->sb_feat.dir_version = getnum(value, opts, subopt);
|
||||
}
|
||||
+ free((char *)value);
|
||||
break;
|
||||
case N_FTYPE:
|
||||
cli->sb_feat.dirftype = getnum(value, opts, subopt);
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,58 +0,0 @@
|
||||
From 5a282e43fd719e37b866f797c9aacac199d08a19 Mon Sep 17 00:00:00 2001
|
||||
From: Dave Chinner <dchinner@redhat.com>
|
||||
Date: Wed, 22 Jun 2022 14:28:52 -0500
|
||||
Subject: [PATCH] xfs: zero inode fork buffer at allocation
|
||||
|
||||
Source kernel commit: cb512c921639613ce03f87e62c5e93ed9fe8c84d
|
||||
|
||||
When we first allocate or resize an inline inode fork, we round up
|
||||
the allocation to 4 byte alingment to make journal alignment
|
||||
constraints. We don't clear the unused bytes, so we can copy up to
|
||||
three uninitialised bytes into the journal. Zero those bytes so we
|
||||
only ever copy zeros into the journal.
|
||||
|
||||
Signed-off-by: Dave Chinner <dchinner@redhat.com>
|
||||
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
|
||||
Signed-off-by: Dave Chinner <david@fromorbit.com>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
libxfs/xfs_inode_fork.c | 12 +++++++++---
|
||||
1 file changed, 9 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/libxfs/xfs_inode_fork.c b/libxfs/xfs_inode_fork.c
|
||||
index da59232..ac3692b 100644
|
||||
--- a/libxfs/xfs_inode_fork.c
|
||||
+++ b/libxfs/xfs_inode_fork.c
|
||||
@@ -48,8 +48,13 @@ xfs_init_local_fork(
|
||||
mem_size++;
|
||||
|
||||
if (size) {
|
||||
+ /*
|
||||
+ * As we round up the allocation here, we need to ensure the
|
||||
+ * bytes we don't copy data into are zeroed because the log
|
||||
+ * vectors still copy them into the journal.
|
||||
+ */
|
||||
real_size = roundup(mem_size, 4);
|
||||
- ifp->if_u1.if_data = kmem_alloc(real_size, KM_NOFS);
|
||||
+ ifp->if_u1.if_data = kmem_zalloc(real_size, KM_NOFS);
|
||||
memcpy(ifp->if_u1.if_data, data, size);
|
||||
if (zero_terminate)
|
||||
ifp->if_u1.if_data[size] = '\0';
|
||||
@@ -498,10 +503,11 @@ xfs_idata_realloc(
|
||||
/*
|
||||
* For inline data, the underlying buffer must be a multiple of 4 bytes
|
||||
* in size so that it can be logged and stay on word boundaries.
|
||||
- * We enforce that here.
|
||||
+ * We enforce that here, and use __GFP_ZERO to ensure that size
|
||||
+ * extensions always zero the unused roundup area.
|
||||
*/
|
||||
ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, roundup(new_size, 4),
|
||||
- GFP_NOFS | __GFP_NOFAIL);
|
||||
+ GFP_NOFS | __GFP_NOFAIL | __GFP_ZERO);
|
||||
ifp->if_bytes = new_size;
|
||||
}
|
||||
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,237 +0,0 @@
|
||||
From 393859c7a197c8187ffec131ec80cca697f8bf79 Mon Sep 17 00:00:00 2001
|
||||
From: Dave Chinner <dchinner@redhat.com>
|
||||
Date: Wed, 22 Jun 2022 14:28:52 -0500
|
||||
Subject: [PATCH] xfs: detect self referencing btree sibling pointers
|
||||
|
||||
Source kernel commit: dc04db2aa7c9307e740d6d0e173085301c173b1a
|
||||
|
||||
To catch the obvious graph cycle problem and hence potential endless
|
||||
looping.
|
||||
|
||||
Signed-off-by: Dave Chinner <dchinner@redhat.com>
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Dave Chinner <david@fromorbit.com>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
libxfs/xfs_btree.c | 140 +++++++++++++++++++++++++++++++++++++++--------------
|
||||
1 file changed, 105 insertions(+), 35 deletions(-)
|
||||
|
||||
diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c
|
||||
index 8455f26..d9a82e7 100644
|
||||
--- a/libxfs/xfs_btree.c
|
||||
+++ b/libxfs/xfs_btree.c
|
||||
@@ -48,6 +48,52 @@ xfs_btree_magic(
|
||||
return magic;
|
||||
}
|
||||
|
||||
+static xfs_failaddr_t
|
||||
+xfs_btree_check_lblock_siblings(
|
||||
+ struct xfs_mount *mp,
|
||||
+ struct xfs_btree_cur *cur,
|
||||
+ int level,
|
||||
+ xfs_fsblock_t fsb,
|
||||
+ xfs_fsblock_t sibling)
|
||||
+{
|
||||
+ if (sibling == NULLFSBLOCK)
|
||||
+ return NULL;
|
||||
+ if (sibling == fsb)
|
||||
+ return __this_address;
|
||||
+ if (level >= 0) {
|
||||
+ if (!xfs_btree_check_lptr(cur, sibling, level + 1))
|
||||
+ return __this_address;
|
||||
+ } else {
|
||||
+ if (!xfs_verify_fsbno(mp, sibling))
|
||||
+ return __this_address;
|
||||
+ }
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+static xfs_failaddr_t
|
||||
+xfs_btree_check_sblock_siblings(
|
||||
+ struct xfs_mount *mp,
|
||||
+ struct xfs_btree_cur *cur,
|
||||
+ int level,
|
||||
+ xfs_agnumber_t agno,
|
||||
+ xfs_agblock_t agbno,
|
||||
+ xfs_agblock_t sibling)
|
||||
+{
|
||||
+ if (sibling == NULLAGBLOCK)
|
||||
+ return NULL;
|
||||
+ if (sibling == agbno)
|
||||
+ return __this_address;
|
||||
+ if (level >= 0) {
|
||||
+ if (!xfs_btree_check_sptr(cur, sibling, level + 1))
|
||||
+ return __this_address;
|
||||
+ } else {
|
||||
+ if (!xfs_verify_agbno(mp, agno, sibling))
|
||||
+ return __this_address;
|
||||
+ }
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Check a long btree block header. Return the address of the failing check,
|
||||
* or NULL if everything is ok.
|
||||
@@ -62,6 +108,8 @@ __xfs_btree_check_lblock(
|
||||
struct xfs_mount *mp = cur->bc_mp;
|
||||
xfs_btnum_t btnum = cur->bc_btnum;
|
||||
int crc = xfs_sb_version_hascrc(&mp->m_sb);
|
||||
+ xfs_failaddr_t fa;
|
||||
+ xfs_fsblock_t fsb = NULLFSBLOCK;
|
||||
|
||||
if (crc) {
|
||||
if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
|
||||
@@ -80,16 +128,16 @@ __xfs_btree_check_lblock(
|
||||
if (be16_to_cpu(block->bb_numrecs) >
|
||||
cur->bc_ops->get_maxrecs(cur, level))
|
||||
return __this_address;
|
||||
- if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
|
||||
- !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_leftsib),
|
||||
- level + 1))
|
||||
- return __this_address;
|
||||
- if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
|
||||
- !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_rightsib),
|
||||
- level + 1))
|
||||
- return __this_address;
|
||||
|
||||
- return NULL;
|
||||
+ if (bp)
|
||||
+ fsb = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
|
||||
+
|
||||
+ fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
|
||||
+ be64_to_cpu(block->bb_u.l.bb_leftsib));
|
||||
+ if (!fa)
|
||||
+ fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
|
||||
+ be64_to_cpu(block->bb_u.l.bb_rightsib));
|
||||
+ return fa;
|
||||
}
|
||||
|
||||
/* Check a long btree block header. */
|
||||
@@ -127,6 +175,9 @@ __xfs_btree_check_sblock(
|
||||
struct xfs_mount *mp = cur->bc_mp;
|
||||
xfs_btnum_t btnum = cur->bc_btnum;
|
||||
int crc = xfs_sb_version_hascrc(&mp->m_sb);
|
||||
+ xfs_failaddr_t fa;
|
||||
+ xfs_agblock_t agbno = NULLAGBLOCK;
|
||||
+ xfs_agnumber_t agno = NULLAGNUMBER;
|
||||
|
||||
if (crc) {
|
||||
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
|
||||
@@ -143,16 +194,18 @@ __xfs_btree_check_sblock(
|
||||
if (be16_to_cpu(block->bb_numrecs) >
|
||||
cur->bc_ops->get_maxrecs(cur, level))
|
||||
return __this_address;
|
||||
- if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
|
||||
- !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_leftsib),
|
||||
- level + 1))
|
||||
- return __this_address;
|
||||
- if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) &&
|
||||
- !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_rightsib),
|
||||
- level + 1))
|
||||
- return __this_address;
|
||||
|
||||
- return NULL;
|
||||
+ if (bp) {
|
||||
+ agbno = xfs_daddr_to_agbno(mp, XFS_BUF_ADDR(bp));
|
||||
+ agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
|
||||
+ }
|
||||
+
|
||||
+ fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno, agbno,
|
||||
+ be32_to_cpu(block->bb_u.s.bb_leftsib));
|
||||
+ if (!fa)
|
||||
+ fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno,
|
||||
+ agbno, be32_to_cpu(block->bb_u.s.bb_rightsib));
|
||||
+ return fa;
|
||||
}
|
||||
|
||||
/* Check a short btree block header. */
|
||||
@@ -4265,6 +4318,21 @@ xfs_btree_visit_block(
|
||||
if (xfs_btree_ptr_is_null(cur, &rptr))
|
||||
return -ENOENT;
|
||||
|
||||
+ /*
|
||||
+ * We only visit blocks once in this walk, so we have to avoid the
|
||||
+ * internal xfs_btree_lookup_get_block() optimisation where it will
|
||||
+ * return the same block without checking if the right sibling points
|
||||
+ * back to us and creates a cyclic reference in the btree.
|
||||
+ */
|
||||
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
|
||||
+ if (be64_to_cpu(rptr.l) == XFS_DADDR_TO_FSB(cur->bc_mp,
|
||||
+ XFS_BUF_ADDR(bp)))
|
||||
+ return -EFSCORRUPTED;
|
||||
+ } else {
|
||||
+ if (be32_to_cpu(rptr.s) == xfs_daddr_to_agbno(cur->bc_mp,
|
||||
+ XFS_BUF_ADDR(bp)))
|
||||
+ return -EFSCORRUPTED;
|
||||
+ }
|
||||
return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
|
||||
}
|
||||
|
||||
@@ -4439,20 +4507,21 @@ xfs_btree_lblock_verify(
|
||||
{
|
||||
struct xfs_mount *mp = bp->b_mount;
|
||||
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
|
||||
+ xfs_fsblock_t fsb;
|
||||
+ xfs_failaddr_t fa;
|
||||
|
||||
/* numrecs verification */
|
||||
if (be16_to_cpu(block->bb_numrecs) > max_recs)
|
||||
return __this_address;
|
||||
|
||||
/* sibling pointer verification */
|
||||
- if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
|
||||
- !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))
|
||||
- return __this_address;
|
||||
- if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
|
||||
- !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))
|
||||
- return __this_address;
|
||||
-
|
||||
- return NULL;
|
||||
+ fsb = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
|
||||
+ fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
|
||||
+ be64_to_cpu(block->bb_u.l.bb_leftsib));
|
||||
+ if (!fa)
|
||||
+ fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
|
||||
+ be64_to_cpu(block->bb_u.l.bb_rightsib));
|
||||
+ return fa;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -4493,7 +4562,9 @@ xfs_btree_sblock_verify(
|
||||
{
|
||||
struct xfs_mount *mp = bp->b_mount;
|
||||
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
|
||||
- xfs_agblock_t agno;
|
||||
+ xfs_agnumber_t agno;
|
||||
+ xfs_agblock_t agbno;
|
||||
+ xfs_failaddr_t fa;
|
||||
|
||||
/* numrecs verification */
|
||||
if (be16_to_cpu(block->bb_numrecs) > max_recs)
|
||||
@@ -4501,14 +4572,13 @@ xfs_btree_sblock_verify(
|
||||
|
||||
/* sibling pointer verification */
|
||||
agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
|
||||
- if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
|
||||
- !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib)))
|
||||
- return __this_address;
|
||||
- if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) &&
|
||||
- !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib)))
|
||||
- return __this_address;
|
||||
-
|
||||
- return NULL;
|
||||
+ agbno = xfs_daddr_to_agbno(mp, XFS_BUF_ADDR(bp));
|
||||
+ fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno,
|
||||
+ be32_to_cpu(block->bb_u.s.bb_leftsib));
|
||||
+ if (!fa)
|
||||
+ fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno,
|
||||
+ be32_to_cpu(block->bb_u.s.bb_rightsib));
|
||||
+ return fa;
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,84 +0,0 @@
|
||||
From ff6aea290450f00e084cafe5b34901d26abdbc4a Mon Sep 17 00:00:00 2001
|
||||
From: Dave Chinner <dchinner@redhat.com>
|
||||
Date: Wed, 22 Jun 2022 14:28:52 -0500
|
||||
Subject: [PATCH] xfs: validate inode fork size against fork format
|
||||
|
||||
Source kernel commit: 1eb70f54c445fcbb25817841e774adb3d912f3e8
|
||||
|
||||
xfs_repair catches fork size/format mismatches, but the in-kernel
|
||||
verifier doesn't, leading to null pointer failures when attempting
|
||||
to perform operations on the fork. This can occur in the
|
||||
xfs_dir_is_empty() where the in-memory fork format does not match
|
||||
the size and so the fork data pointer is accessed incorrectly.
|
||||
|
||||
Note: this causes new failures in xfs/348 which is testing mode vs
|
||||
ftype mismatches. We now detect a regular file that has been changed
|
||||
to a directory or symlink mode as being corrupt because the data
|
||||
fork is for a symlink or directory should be in local form when
|
||||
there are only 3 bytes of data in the data fork. Hence the inode
|
||||
verify for the regular file now fires w/ -EFSCORRUPTED because
|
||||
the inode fork format does not match the format the corrupted mode
|
||||
says it should be in.
|
||||
|
||||
Signed-off-by: Dave Chinner <dchinner@redhat.com>
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Dave Chinner <david@fromorbit.com>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
libxfs/xfs_inode_buf.c | 35 ++++++++++++++++++++++++++---------
|
||||
1 file changed, 26 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/libxfs/xfs_inode_buf.c b/libxfs/xfs_inode_buf.c
|
||||
index f98f5c4..7ecbfad 100644
|
||||
--- a/libxfs/xfs_inode_buf.c
|
||||
+++ b/libxfs/xfs_inode_buf.c
|
||||
@@ -334,19 +334,36 @@ xfs_dinode_verify_fork(
|
||||
int whichfork)
|
||||
{
|
||||
uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
|
||||
+ mode_t mode = be16_to_cpu(dip->di_mode);
|
||||
+ uint32_t fork_size = XFS_DFORK_SIZE(dip, mp, whichfork);
|
||||
+ uint32_t fork_format = XFS_DFORK_FORMAT(dip, whichfork);
|
||||
|
||||
- switch (XFS_DFORK_FORMAT(dip, whichfork)) {
|
||||
+ /*
|
||||
+ * For fork types that can contain local data, check that the fork
|
||||
+ * format matches the size of local data contained within the fork.
|
||||
+ *
|
||||
+ * For all types, check that when the size says the should be in extent
|
||||
+ * or btree format, the inode isn't claiming it is in local format.
|
||||
+ */
|
||||
+ if (whichfork == XFS_DATA_FORK) {
|
||||
+ if (S_ISDIR(mode) || S_ISLNK(mode)) {
|
||||
+ if (be64_to_cpu(dip->di_size) <= fork_size &&
|
||||
+ fork_format != XFS_DINODE_FMT_LOCAL)
|
||||
+ return __this_address;
|
||||
+ }
|
||||
+
|
||||
+ if (be64_to_cpu(dip->di_size) > fork_size &&
|
||||
+ fork_format == XFS_DINODE_FMT_LOCAL)
|
||||
+ return __this_address;
|
||||
+ }
|
||||
+
|
||||
+ switch (fork_format) {
|
||||
case XFS_DINODE_FMT_LOCAL:
|
||||
/*
|
||||
- * no local regular files yet
|
||||
+ * No local regular files yet
|
||||
*/
|
||||
- if (whichfork == XFS_DATA_FORK) {
|
||||
- if (S_ISREG(be16_to_cpu(dip->di_mode)))
|
||||
- return __this_address;
|
||||
- if (be64_to_cpu(dip->di_size) >
|
||||
- XFS_DFORK_SIZE(dip, mp, whichfork))
|
||||
- return __this_address;
|
||||
- }
|
||||
+ if (S_ISREG(mode) && whichfork == XFS_DATA_FORK)
|
||||
+ return __this_address;
|
||||
if (di_nextents)
|
||||
return __this_address;
|
||||
break;
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,110 +0,0 @@
|
||||
From fa0f9232bd89e2955ee54e0be4adb6713a00d8b4 Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Tue, 12 Jul 2022 13:22:33 -0500
|
||||
Subject: [PATCH] xfs_repair: always rewrite secondary supers when needsrepair
|
||||
is set
|
||||
|
||||
Dave Chinner complained about xfs_scrub failures coming from xfs/158.
|
||||
That test induces xfs_repair to fail while upgrading a filesystem to
|
||||
have the inobtcount feature, and then restarts xfs_repair to finish the
|
||||
upgrade. When the second xfs_repair run starts, it will find that the
|
||||
primary super has NEEDSREPAIR set, along with whatever new feature that
|
||||
we were trying to add to the filesystem.
|
||||
|
||||
From there, repair completes the upgrade in much the same manner as the
|
||||
first repair run would have, with one big exception -- it forgets to set
|
||||
features_changed to trigger rewriting of the secondary supers at the end
|
||||
of repair. This results in discrepancies between the supers:
|
||||
|
||||
# XFS_REPAIR_FAIL_AFTER_PHASE=2 xfs_repair -c inobtcount=1 /dev/sdf
|
||||
Phase 1 - find and verify superblock...
|
||||
Phase 2 - using internal log
|
||||
- zero log...
|
||||
- scan filesystem freespace and inode maps...
|
||||
- found root inode chunk
|
||||
Adding inode btree counts to filesystem.
|
||||
Killed
|
||||
# xfs_repair /dev/sdf
|
||||
Phase 1 - find and verify superblock...
|
||||
Phase 2 - using internal log
|
||||
- zero log...
|
||||
- scan filesystem freespace and inode maps...
|
||||
clearing needsrepair flag and regenerating metadata
|
||||
bad inobt block count 0, saw 1
|
||||
bad finobt block count 0, saw 1
|
||||
bad inobt block count 0, saw 1
|
||||
bad finobt block count 0, saw 1
|
||||
bad inobt block count 0, saw 1
|
||||
bad finobt block count 0, saw 1
|
||||
bad inobt block count 0, saw 1
|
||||
bad finobt block count 0, saw 1
|
||||
- found root inode chunk
|
||||
Phase 3 - for each AG...
|
||||
- scan and clear agi unlinked lists...
|
||||
- process known inodes and perform inode discovery...
|
||||
- agno = 0
|
||||
- agno = 1
|
||||
- agno = 2
|
||||
- agno = 3
|
||||
- process newly discovered inodes...
|
||||
Phase 4 - check for duplicate blocks...
|
||||
- setting up duplicate extent list...
|
||||
- check for inodes claiming duplicate blocks...
|
||||
- agno = 1
|
||||
- agno = 2
|
||||
- agno = 0
|
||||
- agno = 3
|
||||
Phase 5 - rebuild AG headers and trees...
|
||||
- reset superblock...
|
||||
Phase 6 - check inode connectivity...
|
||||
- resetting contents of realtime bitmap and summary inodes
|
||||
- traversing filesystem ...
|
||||
- traversal finished ...
|
||||
- moving disconnected inodes to lost+found ...
|
||||
Phase 7 - verify and correct link counts...
|
||||
done
|
||||
# xfs_db -c 'sb 0' -c 'print' -c 'sb 1' -c 'print' /dev/sdf | \
|
||||
egrep '(features_ro_compat|features_incompat)'
|
||||
features_ro_compat = 0xd
|
||||
features_incompat = 0xb
|
||||
features_ro_compat = 0x5
|
||||
features_incompat = 0xb
|
||||
|
||||
Curiously, re-running xfs_repair will not trigger any warnings about the
|
||||
featureset mismatch between the primary and secondary supers. xfs_scrub
|
||||
immediately notices, which is what causes xfs/158 to fail.
|
||||
|
||||
This discrepancy doesn't happen when the upgrade completes successfully
|
||||
in a single repair run, so we need to teach repair to rewrite the
|
||||
secondaries at the end of repair any time needsrepair was set.
|
||||
|
||||
Reported-by: Dave Chinner <david@fromorbit.com>
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Dave Chinner <dchinner@redhat.com>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
repair/agheader.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/repair/agheader.c b/repair/agheader.c
|
||||
index 36da139..e91509d 100644
|
||||
--- a/repair/agheader.c
|
||||
+++ b/repair/agheader.c
|
||||
@@ -552,6 +552,14 @@ secondary_sb_whack(
|
||||
else
|
||||
do_warn(
|
||||
_("would clear needsrepair flag and regenerate metadata\n"));
|
||||
+ /*
|
||||
+ * If needsrepair is set on the primary super, there's
|
||||
+ * a possibility that repair crashed during an upgrade.
|
||||
+ * Set features_changed to ensure that the secondary
|
||||
+ * supers are rewritten with the new feature bits once
|
||||
+ * we've finished the upgrade.
|
||||
+ */
|
||||
+ features_changed = true;
|
||||
} else {
|
||||
/*
|
||||
* Quietly clear needsrepair on the secondary supers as
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,55 +0,0 @@
|
||||
From f50d3462c654acc484ab3ea68e75e8252b77e262 Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Wed, 13 Jul 2022 20:58:25 -0500
|
||||
Subject: [PATCH] xfs_repair: ignore empty xattr leaf blocks
|
||||
|
||||
As detailed in the commit:
|
||||
|
||||
5e572d1a xfs: empty xattr leaf header blocks are not corruption
|
||||
|
||||
empty xattr leaf blocks can be the benign byproduct of the system
|
||||
going down during the multi-step process of adding a large xattr
|
||||
to a file that has no xattrs. If we find one at attr fork offset 0,
|
||||
we should clear it, but this isn't a corruption.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Dave Chinner <dchinner@redhat.com>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
repair/attr_repair.c | 20 ++++++++++++++++++++
|
||||
1 file changed, 20 insertions(+)
|
||||
|
||||
diff --git a/repair/attr_repair.c b/repair/attr_repair.c
|
||||
index 2055d96..c3a6d50 100644
|
||||
--- a/repair/attr_repair.c
|
||||
+++ b/repair/attr_repair.c
|
||||
@@ -579,6 +579,26 @@ process_leaf_attr_block(
|
||||
firstb = mp->m_sb.sb_blocksize;
|
||||
stop = xfs_attr3_leaf_hdr_size(leaf);
|
||||
|
||||
+ /*
|
||||
+ * Empty leaf blocks at offset zero can occur as a race between
|
||||
+ * setxattr and the system going down, so we only take action if we're
|
||||
+ * running in modify mode. See xfs_attr3_leaf_verify for details of
|
||||
+ * how we've screwed this up many times.
|
||||
+ */
|
||||
+ if (!leafhdr.count && da_bno == 0) {
|
||||
+ if (no_modify) {
|
||||
+ do_log(
|
||||
+ _("would clear empty leaf attr block 0, inode %" PRIu64 "\n"),
|
||||
+ ino);
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ do_warn(
|
||||
+ _("will clear empty leaf attr block 0, inode %" PRIu64 "\n"),
|
||||
+ ino);
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
/* does the count look sorta valid? */
|
||||
if (!leafhdr.count ||
|
||||
leafhdr.count * sizeof(xfs_attr_leaf_entry_t) + stop >
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,45 +0,0 @@
|
||||
From 91c1d0836aa4a228e76c0b8c5d83903f1f6bfdbb Mon Sep 17 00:00:00 2001
|
||||
From: Chandan Babu R <chandan.babu@oracle.com>
|
||||
Date: Wed, 13 Jul 2022 20:58:27 -0500
|
||||
Subject: [PATCH] xfs_repair: Search for conflicts in inode_tree_ptrs[] when
|
||||
processing uncertain inodes
|
||||
|
||||
When processing an uncertain inode chunk record, if we lose 2 blocks worth of
|
||||
inodes or 25% of the chunk, xfs_repair decides to ignore the chunk. Otherwise,
|
||||
xfs_repair adds a new chunk record to inode_tree_ptrs[agno], marking each
|
||||
inode as either free or used. However, before adding the new chunk record,
|
||||
xfs_repair has to check for the existance of a conflicting record.
|
||||
|
||||
The existing code incorrectly checks for the conflicting record in
|
||||
inode_uncertain_tree_ptrs[agno]. This check will succeed since the inode chunk
|
||||
record being processed was originally obtained from
|
||||
inode_uncertain_tree_ptrs[agno].
|
||||
|
||||
This commit fixes the bug by changing xfs_repair to search
|
||||
inode_tree_ptrs[agno] for conflicts.
|
||||
|
||||
Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
|
||||
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
repair/dino_chunks.c | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
diff --git a/repair/dino_chunks.c b/repair/dino_chunks.c
|
||||
index 11b0eb5..80c52a4 100644
|
||||
--- a/repair/dino_chunks.c
|
||||
+++ b/repair/dino_chunks.c
|
||||
@@ -229,8 +229,7 @@ verify_inode_chunk(xfs_mount_t *mp,
|
||||
/*
|
||||
* ok, put the record into the tree, if no conflict.
|
||||
*/
|
||||
- if (find_uncertain_inode_rec(agno,
|
||||
- XFS_AGB_TO_AGINO(mp, start_agbno)))
|
||||
+ if (find_inode_rec(mp, agno, XFS_AGB_TO_AGINO(mp, start_agbno)))
|
||||
return(0);
|
||||
|
||||
start_agino = XFS_AGB_TO_AGINO(mp, start_agbno);
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,119 +0,0 @@
|
||||
From cdf5cfe93ee14942665f3c6ae78a8bf1198e1798 Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Wed, 13 Jul 2022 20:58:28 -0500
|
||||
Subject: [PATCH] mkfs: terminate getsubopt arrays properly
|
||||
|
||||
Having not drank any (or maybe too much) coffee this morning, I typed:
|
||||
|
||||
$ mkfs.xfs -d agcount=3 -d nrext64=0
|
||||
Segmentation fault
|
||||
|
||||
I traced this down to getsubopt walking off the end of the dopts.subopts
|
||||
array. The manpage says you're supposed to terminate the suboptions
|
||||
string array with a NULL entry, but the structure definition uses
|
||||
MAX_SUBOPTS/D_MAX_OPTS directly, which means there is no terminator.
|
||||
|
||||
Explicitly terminate each suboption array with a NULL entry after
|
||||
making room for it.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
[sandeen: explicitly add NULL terminators & clarify comment]
|
||||
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
mkfs/xfs_mkfs.c | 16 ++++++++++++++--
|
||||
1 file changed, 14 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
|
||||
index fdf6d4a..5cd2f81 100644
|
||||
--- a/mkfs/xfs_mkfs.c
|
||||
+++ b/mkfs/xfs_mkfs.c
|
||||
@@ -132,8 +132,11 @@ enum {
|
||||
M_MAX_OPTS,
|
||||
};
|
||||
|
||||
-/* Just define the max options array size manually right now */
|
||||
-#define MAX_SUBOPTS D_MAX_OPTS
|
||||
+/*
|
||||
+ * Just define the max options array size manually to the largest
|
||||
+ * enum right now, leaving room for a NULL terminator at the end
|
||||
+ */
|
||||
+#define MAX_SUBOPTS (D_MAX_OPTS + 1)
|
||||
|
||||
#define SUBOPT_NEEDS_VAL (-1LL)
|
||||
#define MAX_CONFLICTS 8
|
||||
@@ -243,6 +246,7 @@ static struct opt_params bopts = {
|
||||
.ini_section = "block",
|
||||
.subopts = {
|
||||
[B_SIZE] = "size",
|
||||
+ [B_MAX_OPTS] = NULL,
|
||||
},
|
||||
.subopt_params = {
|
||||
{ .index = B_SIZE,
|
||||
@@ -269,6 +273,7 @@ static struct opt_params copts = {
|
||||
.name = 'c',
|
||||
.subopts = {
|
||||
[C_OPTFILE] = "options",
|
||||
+ [C_MAX_OPTS] = NULL,
|
||||
},
|
||||
.subopt_params = {
|
||||
{ .index = C_OPTFILE,
|
||||
@@ -298,6 +303,7 @@ static struct opt_params dopts = {
|
||||
[D_EXTSZINHERIT] = "extszinherit",
|
||||
[D_COWEXTSIZE] = "cowextsize",
|
||||
[D_DAXINHERIT] = "daxinherit",
|
||||
+ [D_MAX_OPTS] = NULL,
|
||||
},
|
||||
.subopt_params = {
|
||||
{ .index = D_AGCOUNT,
|
||||
@@ -434,6 +440,7 @@ static struct opt_params iopts = {
|
||||
[I_ATTR] = "attr",
|
||||
[I_PROJID32BIT] = "projid32bit",
|
||||
[I_SPINODES] = "sparse",
|
||||
+ [I_MAX_OPTS] = NULL,
|
||||
},
|
||||
.subopt_params = {
|
||||
{ .index = I_ALIGN,
|
||||
@@ -500,6 +507,7 @@ static struct opt_params lopts = {
|
||||
[L_FILE] = "file",
|
||||
[L_NAME] = "name",
|
||||
[L_LAZYSBCNTR] = "lazy-count",
|
||||
+ [L_MAX_OPTS] = NULL,
|
||||
},
|
||||
.subopt_params = {
|
||||
{ .index = L_AGNUM,
|
||||
@@ -592,6 +600,7 @@ static struct opt_params nopts = {
|
||||
[N_SIZE] = "size",
|
||||
[N_VERSION] = "version",
|
||||
[N_FTYPE] = "ftype",
|
||||
+ [N_MAX_OPTS] = NULL,
|
||||
},
|
||||
.subopt_params = {
|
||||
{ .index = N_SIZE,
|
||||
@@ -627,6 +636,7 @@ static struct opt_params ropts = {
|
||||
[R_FILE] = "file",
|
||||
[R_NAME] = "name",
|
||||
[R_NOALIGN] = "noalign",
|
||||
+ [R_MAX_OPTS] = NULL,
|
||||
},
|
||||
.subopt_params = {
|
||||
{ .index = R_EXTSIZE,
|
||||
@@ -674,6 +684,7 @@ static struct opt_params sopts = {
|
||||
.subopts = {
|
||||
[S_SIZE] = "size",
|
||||
[S_SECTSIZE] = "sectsize",
|
||||
+ [S_MAX_OPTS] = NULL,
|
||||
},
|
||||
.subopt_params = {
|
||||
{ .index = S_SIZE,
|
||||
@@ -710,6 +721,7 @@ static struct opt_params mopts = {
|
||||
[M_REFLINK] = "reflink",
|
||||
[M_INOBTCNT] = "inobtcount",
|
||||
[M_BIGTIME] = "bigtime",
|
||||
+ [M_MAX_OPTS] = NULL,
|
||||
},
|
||||
.subopt_params = {
|
||||
{ .index = M_CRC,
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,40 +0,0 @@
|
||||
From db5b866537e78669f7b84590345b0c37f841f701 Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Thu, 4 Aug 2022 21:28:23 -0500
|
||||
Subject: [PATCH] mkfs: complain about impossible log size constraints
|
||||
|
||||
xfs/042 trips over an impossible fs geometry when nrext64 is enabled.
|
||||
The minimum log size calculation comes out to 4287 blocks, but the mkfs
|
||||
parameters specify an AG size of 4096 blocks. This eventually causes
|
||||
mkfs to complain that the autoselected log size doesn't meet the minimum
|
||||
size, but we could be a little more explicit in pointing out that the
|
||||
two size constraints make for an impossible geometry.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
|
||||
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
|
||||
---
|
||||
mkfs/xfs_mkfs.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
|
||||
index 12994ed..9dd0e79 100644
|
||||
--- a/mkfs/xfs_mkfs.c
|
||||
+++ b/mkfs/xfs_mkfs.c
|
||||
@@ -3490,6 +3490,13 @@ _("external log device size %lld blocks too small, must be at least %lld blocks\
|
||||
* an AG.
|
||||
*/
|
||||
max_logblocks = libxfs_alloc_ag_max_usable(mp) - 1;
|
||||
+ if (max_logblocks < min_logblocks) {
|
||||
+ fprintf(stderr,
|
||||
+_("max log size %d smaller than min log size %d, filesystem is too small\n"),
|
||||
+ max_logblocks,
|
||||
+ min_logblocks);
|
||||
+ usage();
|
||||
+ }
|
||||
|
||||
/* internal log - if no size specified, calculate automatically */
|
||||
if (!cfg->logblocks) {
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,53 +0,0 @@
|
||||
From 04d4c27afa3f2c0088e381102e68cfb6a96b3306 Mon Sep 17 00:00:00 2001
|
||||
From: Shida Zhang <zhangshida@kylinos.cn>
|
||||
Date: Fri, 18 Nov 2022 10:46:33 +0100
|
||||
Subject: [PATCH] xfs: trim the mapp array accordingly in xfs_da_grow_inode_int
|
||||
|
||||
Source kernel commit: 44159659df8ca381b84261e11058b2176fa03ba0
|
||||
|
||||
Take a look at the for-loop in xfs_da_grow_inode_int:
|
||||
======
|
||||
for(){
|
||||
nmap = min(XFS_BMAP_MAX_NMAP, count);
|
||||
...
|
||||
error = xfs_bmapi_write(...,&mapp[mapi], &nmap);//(..., $1, $2)
|
||||
...
|
||||
mapi += nmap;
|
||||
}
|
||||
=====
|
||||
where $1 stands for the start address of the array,
|
||||
while $2 is used to indicate the size of the array.
|
||||
|
||||
The array $1 will advance by $nmap in each iteration after
|
||||
the allocation of extents.
|
||||
But the size $2 still remains unchanged, which is determined by
|
||||
min(XFS_BMAP_MAX_NMAP, count).
|
||||
|
||||
It seems that it has forgotten to trim the mapp array after each
|
||||
iteration, so change it.
|
||||
|
||||
Signed-off-by: Shida Zhang <zhangshida@kylinos.cn>
|
||||
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Dave Chinner <david@fromorbit.com>
|
||||
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||
---
|
||||
libxfs/xfs_da_btree.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/libxfs/xfs_da_btree.c b/libxfs/xfs_da_btree.c
|
||||
index 9dc22f2..a068a01 100644
|
||||
--- a/libxfs/xfs_da_btree.c
|
||||
+++ b/libxfs/xfs_da_btree.c
|
||||
@@ -2188,8 +2188,8 @@ xfs_da_grow_inode_int(
|
||||
*/
|
||||
mapp = kmem_alloc(sizeof(*mapp) * count, 0);
|
||||
for (b = *bno, mapi = 0; b < *bno + count; ) {
|
||||
- nmap = min(XFS_BMAP_MAX_NMAP, count);
|
||||
c = (int)(*bno + count - b);
|
||||
+ nmap = min(XFS_BMAP_MAX_NMAP, c);
|
||||
error = xfs_bmapi_write(tp, dp, b, c,
|
||||
xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
|
||||
args->total, &mapp[mapi], &nmap);
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,156 +0,0 @@
|
||||
From 20798cc06315ec1581b87b3da7f868dff62a6efd Mon Sep 17 00:00:00 2001
|
||||
From: Guo Xuenan <guoxuenan@huawei.com>
|
||||
Date: Fri, 18 Nov 2022 10:48:09 +0100
|
||||
Subject: [PATCH] xfs: fix exception caused by unexpected illegal bestcount in
|
||||
leaf dir
|
||||
|
||||
Source kernel commit: 13cf24e00665c9751951a422756d975812b71173
|
||||
|
||||
For leaf dir, In most cases, there should be as many bestfree slots
|
||||
as the dir data blocks that can fit under i_size (except for [1]).
|
||||
|
||||
Root cause is we don't examin the number bestfree slots, when the slots
|
||||
number less than dir data blocks, if we need to allocate new dir data
|
||||
block and update the bestfree array, we will use the dir block number as
|
||||
index to assign bestfree array, while we did not check the leaf buf
|
||||
boundary which may cause UAF or other memory access problem. This issue
|
||||
can also triggered with test cases xfs/473 from fstests.
|
||||
|
||||
According to Dave Chinner & Darrick's suggestion, adding buffer verifier
|
||||
to detect this abnormal situation in time.
|
||||
Simplify the testcase for fstest xfs/554 [1]
|
||||
|
||||
The error log is shown as follows:
|
||||
==================================================================
|
||||
BUG: KASAN: use-after-free in xfs_dir2_leaf_addname+0x1995/0x1ac0
|
||||
Write of size 2 at addr ffff88810168b000 by task touch/1552
|
||||
CPU: 5 PID: 1552 Comm: touch Not tainted 6.0.0-rc3+ #101
|
||||
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
|
||||
1.13.0-1ubuntu1.1 04/01/2014
|
||||
Call Trace:
|
||||
<TASK>
|
||||
dump_stack_lvl+0x4d/0x66
|
||||
print_report.cold+0xf6/0x691
|
||||
kasan_report+0xa8/0x120
|
||||
xfs_dir2_leaf_addname+0x1995/0x1ac0
|
||||
xfs_dir_createname+0x58c/0x7f0
|
||||
xfs_create+0x7af/0x1010
|
||||
xfs_generic_create+0x270/0x5e0
|
||||
path_openat+0x270b/0x3450
|
||||
do_filp_open+0x1cf/0x2b0
|
||||
do_sys_openat2+0x46b/0x7a0
|
||||
do_sys_open+0xb7/0x130
|
||||
do_syscall_64+0x35/0x80
|
||||
entry_SYSCALL_64_after_hwframe+0x63/0xcd
|
||||
RIP: 0033:0x7fe4d9e9312b
|
||||
Code: 25 00 00 41 00 3d 00 00 41 00 74 4b 64 8b 04 25 18 00 00 00 85 c0
|
||||
75 67 44 89 e2 48 89 ee bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00
|
||||
f0 ff ff 0f 87 91 00 00 00 48 8b 4c 24 28 64 48 33 0c 25
|
||||
RSP: 002b:00007ffda4c16c20 EFLAGS: 00000246 ORIG_RAX: 0000000000000101
|
||||
RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fe4d9e9312b
|
||||
RDX: 0000000000000941 RSI: 00007ffda4c17f33 RDI: 00000000ffffff9c
|
||||
RBP: 00007ffda4c17f33 R08: 0000000000000000 R09: 0000000000000000
|
||||
R10: 00000000000001b6 R11: 0000000000000246 R12: 0000000000000941
|
||||
R13: 00007fe4d9f631a4 R14: 00007ffda4c17f33 R15: 0000000000000000
|
||||
</TASK>
|
||||
|
||||
The buggy address belongs to the physical page:
|
||||
page:ffffea000405a2c0 refcount:0 mapcount:0 mapping:0000000000000000
|
||||
index:0x0 pfn:0x10168b
|
||||
flags: 0x2fffff80000000(node=0|zone=2|lastcpupid=0x1fffff)
|
||||
raw: 002fffff80000000 ffffea0004057788 ffffea000402dbc8 0000000000000000
|
||||
raw: 0000000000000000 0000000000170000 00000000ffffffff 0000000000000000
|
||||
page dumped because: kasan: bad access detected
|
||||
|
||||
Memory state around the buggy address:
|
||||
ffff88810168af00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
ffff88810168af80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
>ffff88810168b000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
|
||||
^
|
||||
ffff88810168b080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
|
||||
ffff88810168b100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
|
||||
==================================================================
|
||||
Disabling lock debugging due to kernel taint
|
||||
00000000: 58 44 44 33 5b 53 35 c2 00 00 00 00 00 00 00 78
|
||||
XDD3[S5........x
|
||||
XFS (sdb): Internal error xfs_dir2_data_use_free at line 1200 of file
|
||||
fs/xfs/libxfs/xfs_dir2_data.c. Caller
|
||||
xfs_dir2_data_use_free+0x28a/0xeb0
|
||||
CPU: 5 PID: 1552 Comm: touch Tainted: G B 6.0.0-rc3+
|
||||
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
|
||||
1.13.0-1ubuntu1.1 04/01/2014
|
||||
Call Trace:
|
||||
<TASK>
|
||||
dump_stack_lvl+0x4d/0x66
|
||||
xfs_corruption_error+0x132/0x150
|
||||
xfs_dir2_data_use_free+0x198/0xeb0
|
||||
xfs_dir2_leaf_addname+0xa59/0x1ac0
|
||||
xfs_dir_createname+0x58c/0x7f0
|
||||
xfs_create+0x7af/0x1010
|
||||
xfs_generic_create+0x270/0x5e0
|
||||
path_openat+0x270b/0x3450
|
||||
do_filp_open+0x1cf/0x2b0
|
||||
do_sys_openat2+0x46b/0x7a0
|
||||
do_sys_open+0xb7/0x130
|
||||
do_syscall_64+0x35/0x80
|
||||
entry_SYSCALL_64_after_hwframe+0x63/0xcd
|
||||
RIP: 0033:0x7fe4d9e9312b
|
||||
Code: 25 00 00 41 00 3d 00 00 41 00 74 4b 64 8b 04 25 18 00 00 00 85 c0
|
||||
75 67 44 89 e2 48 89 ee bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00
|
||||
f0 ff ff 0f 87 91 00 00 00 48 8b 4c 24 28 64 48 33 0c 25
|
||||
RSP: 002b:00007ffda4c16c20 EFLAGS: 00000246 ORIG_RAX: 0000000000000101
|
||||
RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fe4d9e9312b
|
||||
RDX: 0000000000000941 RSI: 00007ffda4c17f46 RDI: 00000000ffffff9c
|
||||
RBP: 00007ffda4c17f46 R08: 0000000000000000 R09: 0000000000000001
|
||||
R10: 00000000000001b6 R11: 0000000000000246 R12: 0000000000000941
|
||||
R13: 00007fe4d9f631a4 R14: 00007ffda4c17f46 R15: 0000000000000000
|
||||
</TASK>
|
||||
XFS (sdb): Corruption detected. Unmount and run xfs_repair
|
||||
|
||||
[1] https://lore.kernel.org/all/20220928095355.2074025-1-guoxuenan@huawei.com/
|
||||
Reviewed-by: Hou Tao <houtao1@huawei.com>
|
||||
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
|
||||
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||
---
|
||||
libxfs/xfs_dir2_leaf.c | 9 +++++++--
|
||||
1 file changed, 7 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/libxfs/xfs_dir2_leaf.c b/libxfs/xfs_dir2_leaf.c
|
||||
index 8827c96..5da6600 100644
|
||||
--- a/libxfs/xfs_dir2_leaf.c
|
||||
+++ b/libxfs/xfs_dir2_leaf.c
|
||||
@@ -144,6 +144,8 @@ xfs_dir3_leaf_check_int(
|
||||
xfs_dir2_leaf_tail_t *ltp;
|
||||
int stale;
|
||||
int i;
|
||||
+ bool isleaf1 = (hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
|
||||
+ hdr->magic == XFS_DIR3_LEAF1_MAGIC);
|
||||
|
||||
ltp = xfs_dir2_leaf_tail_p(geo, leaf);
|
||||
|
||||
@@ -156,8 +158,7 @@ xfs_dir3_leaf_check_int(
|
||||
return __this_address;
|
||||
|
||||
/* Leaves and bests don't overlap in leaf format. */
|
||||
- if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
|
||||
- hdr->magic == XFS_DIR3_LEAF1_MAGIC) &&
|
||||
+ if (isleaf1 &&
|
||||
(char *)&hdr->ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
|
||||
return __this_address;
|
||||
|
||||
@@ -173,6 +174,10 @@ xfs_dir3_leaf_check_int(
|
||||
}
|
||||
if (hdr->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
|
||||
stale++;
|
||||
+ if (isleaf1 && xfs_dir2_dataptr_to_db(geo,
|
||||
+ be32_to_cpu(hdr->ents[i].address)) >=
|
||||
+ be32_to_cpu(ltp->bestcount))
|
||||
+ return __this_address;
|
||||
}
|
||||
if (hdr->stale != stale)
|
||||
return __this_address;
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
From 227bc97f12f2df902ab776fe038dc6d065f03c58 Mon Sep 17 00:00:00 2001
|
||||
From: Allison Henderson <allison.henderson@oracle.com>
|
||||
Date: Fri, 18 Nov 2022 10:48:26 +0100
|
||||
Subject: [PATCH] xfs: increase rename inode reservation
|
||||
|
||||
Source kernel commit: e07ee6fe21f47cfd72ae566395c67a80e7c66163
|
||||
|
||||
xfs_rename can update up to 5 inodes: src_dp, target_dp, src_ip, target_ip
|
||||
and wip. So we need to increase the inode reservation to match.
|
||||
|
||||
Signed-off-by: Allison Henderson <allison.henderson@oracle.com>
|
||||
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||
---
|
||||
libxfs/xfs_trans_resv.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/libxfs/xfs_trans_resv.c b/libxfs/xfs_trans_resv.c
|
||||
index 797176d..04c4448 100644
|
||||
--- a/libxfs/xfs_trans_resv.c
|
||||
+++ b/libxfs/xfs_trans_resv.c
|
||||
@@ -421,7 +421,7 @@ xfs_calc_itruncate_reservation_minlogsize(
|
||||
|
||||
/*
|
||||
* In renaming a files we can modify:
|
||||
- * the four inodes involved: 4 * inode size
|
||||
+ * the five inodes involved: 5 * inode size
|
||||
* the two directory btrees: 2 * (max depth + v2) * dir block size
|
||||
* the two directory bmap btrees: 2 * max depth * block size
|
||||
* And the bmap_finish transaction can free dir and bmap blocks (two sets
|
||||
@@ -436,7 +436,7 @@ xfs_calc_rename_reservation(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
return XFS_DQUOT_LOGRES(mp) +
|
||||
- max((xfs_calc_inode_res(mp, 4) +
|
||||
+ max((xfs_calc_inode_res(mp, 5) +
|
||||
xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
|
||||
XFS_FSB_TO_B(mp, 1))),
|
||||
(xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,118 +0,0 @@
|
||||
From 4b593a7b25ce7cd155614006a943ddd53ca47669 Mon Sep 17 00:00:00 2001
|
||||
From: Long Li <leo.lilong@huawei.com>
|
||||
Date: Fri, 18 Nov 2022 12:23:57 +0100
|
||||
Subject: [PATCH] xfs: fix sb write verify for lazysbcount
|
||||
|
||||
Source kernel commit: 7cecd500d90164419add650e26cc1de03a7a66cb
|
||||
|
||||
When lazysbcount is enabled, fsstress and loop mount/unmount test report
|
||||
the following problems:
|
||||
|
||||
XFS (loop0): SB summary counter sanity check failed
|
||||
XFS (loop0): Metadata corruption detected at xfs_sb_write_verify+0x13b/0x460,
|
||||
xfs_sb block 0x0
|
||||
XFS (loop0): Unmount and run xfs_repair
|
||||
XFS (loop0): First 128 bytes of corrupted metadata buffer:
|
||||
00000000: 58 46 53 42 00 00 10 00 00 00 00 00 00 28 00 00 XFSB.........(..
|
||||
00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
|
||||
00000020: 69 fb 7c cd 5f dc 44 af 85 74 e0 cc d4 e3 34 5a i.|._.D..t....4Z
|
||||
00000030: 00 00 00 00 00 20 00 06 00 00 00 00 00 00 00 80 ..... ..........
|
||||
00000040: 00 00 00 00 00 00 00 81 00 00 00 00 00 00 00 82 ................
|
||||
00000050: 00 00 00 01 00 0a 00 00 00 00 00 04 00 00 00 00 ................
|
||||
00000060: 00 00 0a 00 b4 b5 02 00 02 00 00 08 00 00 00 00 ................
|
||||
00000070: 00 00 00 00 00 00 00 00 0c 09 09 03 14 00 00 19 ................
|
||||
XFS (loop0): Corruption of in-memory data (0x8) detected at _xfs_buf_ioapply
|
||||
+0xe1e/0x10e0 (fs/xfs/xfs_buf.c:1580). Shutting down filesystem.
|
||||
XFS (loop0): Please unmount the filesystem and rectify the problem(s)
|
||||
XFS (loop0): log mount/recovery failed: error -117
|
||||
XFS (loop0): log mount failed
|
||||
|
||||
This corruption will shutdown the file system and the file system will
|
||||
no longer be mountable. The following script can reproduce the problem,
|
||||
but it may take a long time.
|
||||
|
||||
#!/bin/bash
|
||||
|
||||
device=/dev/sda
|
||||
testdir=/mnt/test
|
||||
round=0
|
||||
|
||||
function fail()
|
||||
{
|
||||
echo "$*"
|
||||
exit 1
|
||||
}
|
||||
|
||||
mkdir -p $testdir
|
||||
while [ $round -lt 10000 ]
|
||||
do
|
||||
echo "******* round $round ********"
|
||||
mkfs.xfs -f $device
|
||||
mount $device $testdir || fail "mount failed!"
|
||||
fsstress -d $testdir -l 0 -n 10000 -p 4 >/dev/null &
|
||||
sleep 4
|
||||
killall -w fsstress
|
||||
umount $testdir
|
||||
xfs_repair -e $device > /dev/null
|
||||
if [ $? -eq 2 ];then
|
||||
echo "ERR CODE 2: Dirty log exception during repair."
|
||||
exit 1
|
||||
fi
|
||||
round=$(($round+1))
|
||||
done
|
||||
|
||||
With lazysbcount is enabled, There is no additional lock protection for
|
||||
reading m_ifree and m_icount in xfs_log_sb(), if other cpu modifies the
|
||||
m_ifree, this will make the m_ifree greater than m_icount. For example,
|
||||
consider the following sequence and ifreedelta is postive:
|
||||
|
||||
CPU0 CPU1
|
||||
xfs_log_sb xfs_trans_unreserve_and_mod_sb
|
||||
---------- ------------------------------
|
||||
percpu_counter_sum(&mp->m_icount)
|
||||
percpu_counter_add_batch(&mp->m_icount,
|
||||
idelta, XFS_ICOUNT_BATCH)
|
||||
percpu_counter_add(&mp->m_ifree, ifreedelta);
|
||||
percpu_counter_sum(&mp->m_ifree)
|
||||
|
||||
After this, incorrect inode count (sb_ifree > sb_icount) will be writen to
|
||||
the log. In the subsequent writing of sb, incorrect inode count (sb_ifree >
|
||||
sb_icount) will fail to pass the boundary check in xfs_validate_sb_write()
|
||||
that cause the file system shutdown.
|
||||
|
||||
When lazysbcount is enabled, we don't need to guarantee that Lazy sb
|
||||
counters are completely correct, but we do need to guarantee that sb_ifree
|
||||
<= sb_icount. On the other hand, the constraint that m_ifree <= m_icount
|
||||
must be satisfied any time that there /cannot/ be other threads allocating
|
||||
or freeing inode chunks. If the constraint is violated under these
|
||||
circumstances, sb_i{count,free} (the ondisk superblock inode counters)
|
||||
maybe incorrect and need to be marked sick at unmount, the count will
|
||||
be rebuilt on the next mount.
|
||||
|
||||
Fixes: 8756a5af1819 ("libxfs: add more bounds checking to sb sanity checks")
|
||||
Signed-off-by: Long Li <leo.lilong@huawei.com>
|
||||
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||
---
|
||||
libxfs/xfs_sb.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c
|
||||
index cfa44eb..624bfbf 100644
|
||||
--- a/libxfs/xfs_sb.c
|
||||
+++ b/libxfs/xfs_sb.c
|
||||
@@ -804,7 +804,9 @@ xfs_log_sb(
|
||||
*/
|
||||
if (xfs_sb_version_haslazysbcount(&mp->m_sb)) {
|
||||
mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
|
||||
- mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
|
||||
+ mp->m_sb.sb_ifree = min_t(uint64_t,
|
||||
+ percpu_counter_sum(&mp->m_ifree),
|
||||
+ mp->m_sb.sb_icount);
|
||||
mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
|
||||
}
|
||||
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,58 +0,0 @@
|
||||
From 978c3087b6afa56986ac3e5a52131d73d28253ca Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Wed, 23 Nov 2022 09:09:28 -0800
|
||||
Subject: [PATCH] xfs_repair: don't crash on unknown inode parents in dry run
|
||||
mode
|
||||
|
||||
Fuzz testing of directory block headers exposed a debug assertion vector
|
||||
in xfs_repair. In normal (aka fixit) mode, if a single-block directory
|
||||
has a totally trashed block, repair will zap the entire directory.
|
||||
Phase 4 ignores any dirents pointing to the zapped directory, phase 6
|
||||
ignores the freed directory, and everything is good.
|
||||
|
||||
However, in dry run mode, we don't actually free the inode. Phase 4
|
||||
still ignores any dirents pointing to the zapped directory, but phase 6
|
||||
thinks the inode is still live and tries to walk it. xfs_repair doesn't
|
||||
know of any parents for the zapped directory and so trips the assertion.
|
||||
|
||||
The assertion is critical for fixit mode because we need all the parent
|
||||
information to ensure consistency of the directory tree. In dry run
|
||||
mode we don't care, because we only have to print inconsistencies and
|
||||
return 1. Worse yet, (our) customers file bugs when xfs_repair crashes
|
||||
during a -n scan, so this will generate support calls.
|
||||
|
||||
Make everyone's life easier by downgrading the assertion to a warning if
|
||||
we're running in dry run mode.
|
||||
|
||||
Found by fuzzing bhdr.hdr.bno = zeroes in xfs/471.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
|
||||
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||
---
|
||||
repair/phase6.c | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/repair/phase6.c b/repair/phase6.c
|
||||
index 1f9f8de..0be2c9c 100644
|
||||
--- a/repair/phase6.c
|
||||
+++ b/repair/phase6.c
|
||||
@@ -1836,7 +1836,14 @@ longform_dir2_entry_check_data(
|
||||
continue;
|
||||
}
|
||||
parent = get_inode_parent(irec, ino_offset);
|
||||
- ASSERT(parent != 0);
|
||||
+ if (parent == 0) {
|
||||
+ if (no_modify)
|
||||
+ do_warn(
|
||||
+ _("unknown parent for inode %" PRIu64 "\n"),
|
||||
+ inum);
|
||||
+ else
|
||||
+ ASSERT(parent != 0);
|
||||
+ }
|
||||
junkit = 0;
|
||||
/*
|
||||
* bump up the link counts in parent and child
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,275 +0,0 @@
|
||||
From a5915eb4be5c2070adce092e6fff1fd9c906dc7e Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Wed, 23 Nov 2022 09:09:33 -0800
|
||||
Subject: [PATCH] xfs_repair: retain superblock buffer to avoid write hook
|
||||
deadlock
|
||||
|
||||
Every now and then I experience the following deadlock in xfs_repair
|
||||
when I'm running the offline repair fuzz tests:
|
||||
|
||||
#0 futex_wait (private=0, expected=2, futex_word=0x55555566df70) at ../sysdeps/nptl/futex-internal.h:146
|
||||
#1 __GI___lll_lock_wait (futex=futex@entry=0x55555566df70, private=0) at ./nptl/lowlevellock.c:49
|
||||
#2 lll_mutex_lock_optimized (mutex=0x55555566df70) at ./nptl/pthread_mutex_lock.c:48
|
||||
#3 ___pthread_mutex_lock (mutex=mutex@entry=0x55555566df70) at ./nptl/pthread_mutex_lock.c:93
|
||||
#4 cache_shake (cache=cache@entry=0x55555566de60, priority=priority@entry=2, purge=purge@entry=false) at cache.c:231
|
||||
#5 cache_node_get (cache=cache@entry=0x55555566de60, key=key@entry=0x7fffe55e01b0, nodep=nodep@entry=0x7fffe55e0168) at cache.c:452
|
||||
#6 __cache_lookup (key=key@entry=0x7fffe55e01b0, flags=0, bpp=bpp@entry=0x7fffe55e0228) at rdwr.c:405
|
||||
#7 libxfs_getbuf_flags (btp=0x55555566de00, blkno=0, len=<optimized out>, flags=<optimized out>, bpp=0x7fffe55e0228) at rdwr.c:457
|
||||
#8 libxfs_buf_read_map (btp=0x55555566de00, map=map@entry=0x7fffe55e0280, nmaps=nmaps@entry=1, flags=flags@entry=0, bpp=bpp@entry=0x7fffe55e0278, ops=0x5555556233e0 <xfs_sb_buf_ops>)
|
||||
at rdwr.c:704
|
||||
#9 libxfs_buf_read (ops=<optimized out>, bpp=0x7fffe55e0278, flags=0, numblks=<optimized out>, blkno=0, target=<optimized out>)
|
||||
at /storage/home/djwong/cdev/work/xfsprogs/build-x86_64/libxfs/libxfs_io.h:195
|
||||
#10 libxfs_getsb (mp=mp@entry=0x7fffffffd690) at rdwr.c:162
|
||||
#11 force_needsrepair (mp=0x7fffffffd690) at xfs_repair.c:924
|
||||
#12 repair_capture_writeback (bp=<optimized out>) at xfs_repair.c:1000
|
||||
#13 libxfs_bwrite (bp=0x7fffe011e530) at rdwr.c:869
|
||||
#14 cache_shake (cache=cache@entry=0x55555566de60, priority=priority@entry=2, purge=purge@entry=false) at cache.c:240
|
||||
#15 cache_node_get (cache=cache@entry=0x55555566de60, key=key@entry=0x7fffe55e0470, nodep=nodep@entry=0x7fffe55e0428) at cache.c:452
|
||||
#16 __cache_lookup (key=key@entry=0x7fffe55e0470, flags=1, bpp=bpp@entry=0x7fffe55e0538) at rdwr.c:405
|
||||
#17 libxfs_getbuf_flags (btp=0x55555566de00, blkno=12736, len=<optimized out>, flags=<optimized out>, bpp=0x7fffe55e0538) at rdwr.c:457
|
||||
#18 __libxfs_buf_get_map (btp=<optimized out>, map=map@entry=0x7fffe55e05b0, nmaps=<optimized out>, flags=flags@entry=1, bpp=bpp@entry=0x7fffe55e0538) at rdwr.c:501
|
||||
#19 libxfs_buf_get_map (btp=<optimized out>, map=map@entry=0x7fffe55e05b0, nmaps=<optimized out>, flags=flags@entry=1, bpp=bpp@entry=0x7fffe55e0538) at rdwr.c:525
|
||||
#20 pf_queue_io (args=args@entry=0x5555556722c0, map=map@entry=0x7fffe55e05b0, nmaps=<optimized out>, flag=flag@entry=11) at prefetch.c:124
|
||||
#21 pf_read_bmbt_reclist (args=0x5555556722c0, rp=<optimized out>, numrecs=78) at prefetch.c:220
|
||||
#22 pf_scan_lbtree (dbno=dbno@entry=1211, level=level@entry=1, isadir=isadir@entry=1, args=args@entry=0x5555556722c0, func=0x55555557f240 <pf_scanfunc_bmap>) at prefetch.c:298
|
||||
#23 pf_read_btinode (isadir=1, dino=<optimized out>, args=0x5555556722c0) at prefetch.c:385
|
||||
#24 pf_read_inode_dirs (args=args@entry=0x5555556722c0, bp=bp@entry=0x7fffdc023790) at prefetch.c:459
|
||||
#25 pf_read_inode_dirs (bp=<optimized out>, args=0x5555556722c0) at prefetch.c:411
|
||||
#26 pf_batch_read (args=args@entry=0x5555556722c0, which=which@entry=PF_PRIMARY, buf=buf@entry=0x7fffd001d000) at prefetch.c:609
|
||||
#27 pf_io_worker (param=0x5555556722c0) at prefetch.c:673
|
||||
#28 start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
|
||||
#29 clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
|
||||
|
||||
>From this stack trace, we see that xfs_repair's prefetch module is
|
||||
getting some xfs_buf objects ahead of initiating a read (#19). The
|
||||
buffer cache has hit its limit, so it calls cache_shake (#14) to free
|
||||
some unused xfs_bufs. The buffer it finds is a dirty buffer, so it
|
||||
calls libxfs_bwrite to flush it out to disk, which in turn invokes the
|
||||
buffer write hook that xfs_repair set up in 3b7667cb to mark the ondisk
|
||||
filesystem's superblock as NEEDSREPAIR until repair actually completes.
|
||||
|
||||
Unfortunately, the NEEDSREPAIR handler itself needs to grab the
|
||||
superblock buffer, so it makes another call into the buffer cache (#9),
|
||||
which sees that the cache is full and tries to shake it(#4). Hence we
|
||||
deadlock on cm_mutex because shaking is not reentrant.
|
||||
|
||||
Fix this by retaining a reference to the superblock buffer when possible
|
||||
so that the writeback hook doesn't have to access the buffer cache to
|
||||
set NEEDSREPAIR.
|
||||
|
||||
Fixes: 3b7667cb ("xfs_repair: set NEEDSREPAIR the first time we write to a filesystem")
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
|
||||
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||
---
|
||||
libxfs/libxfs_api_defs.h | 2 ++
|
||||
libxfs/libxfs_io.h | 3 ++
|
||||
libxfs/rdwr.c | 16 +++++++++++
|
||||
repair/phase2.c | 8 ++++++
|
||||
repair/protos.h | 1 +
|
||||
repair/xfs_repair.c | 75 ++++++++++++++++++++++++++++++++++++++++++------
|
||||
6 files changed, 96 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
|
||||
index 9aea8c7..4fe4e75 100644
|
||||
--- a/libxfs/libxfs_api_defs.h
|
||||
+++ b/libxfs/libxfs_api_defs.h
|
||||
@@ -49,9 +49,11 @@
|
||||
#define xfs_buf_delwri_submit libxfs_buf_delwri_submit
|
||||
#define xfs_buf_get libxfs_buf_get
|
||||
#define xfs_buf_get_uncached libxfs_buf_get_uncached
|
||||
+#define xfs_buf_lock libxfs_buf_lock
|
||||
#define xfs_buf_read libxfs_buf_read
|
||||
#define xfs_buf_read_uncached libxfs_buf_read_uncached
|
||||
#define xfs_buf_relse libxfs_buf_relse
|
||||
+#define xfs_buf_unlock libxfs_buf_unlock
|
||||
#define xfs_bunmapi libxfs_bunmapi
|
||||
#define xfs_bwrite libxfs_bwrite
|
||||
#define xfs_calc_dquots_per_chunk libxfs_calc_dquots_per_chunk
|
||||
diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h
|
||||
index 3cc4f4e..0e444e2 100644
|
||||
--- a/libxfs/libxfs_io.h
|
||||
+++ b/libxfs/libxfs_io.h
|
||||
@@ -217,6 +217,9 @@ xfs_buf_hold(struct xfs_buf *bp)
|
||||
bp->b_node.cn_count++;
|
||||
}
|
||||
|
||||
+void xfs_buf_lock(struct xfs_buf *bp);
|
||||
+void xfs_buf_unlock(struct xfs_buf *bp);
|
||||
+
|
||||
int libxfs_buf_get_uncached(struct xfs_buftarg *targ, size_t bblen, int flags,
|
||||
struct xfs_buf **bpp);
|
||||
int libxfs_buf_read_uncached(struct xfs_buftarg *targ, xfs_daddr_t daddr,
|
||||
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
|
||||
index 128367e..2c1162b 100644
|
||||
--- a/libxfs/rdwr.c
|
||||
+++ b/libxfs/rdwr.c
|
||||
@@ -376,6 +376,22 @@ libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen,
|
||||
return bp;
|
||||
}
|
||||
|
||||
+void
|
||||
+xfs_buf_lock(
|
||||
+ struct xfs_buf *bp)
|
||||
+{
|
||||
+ if (use_xfs_buf_lock)
|
||||
+ pthread_mutex_lock(&bp->b_lock);
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+xfs_buf_unlock(
|
||||
+ struct xfs_buf *bp)
|
||||
+{
|
||||
+ if (use_xfs_buf_lock)
|
||||
+ pthread_mutex_unlock(&bp->b_lock);
|
||||
+}
|
||||
+
|
||||
static int
|
||||
__cache_lookup(
|
||||
struct xfs_bufkey *key,
|
||||
diff --git a/repair/phase2.c b/repair/phase2.c
|
||||
index ab53ee0..7441451 100644
|
||||
--- a/repair/phase2.c
|
||||
+++ b/repair/phase2.c
|
||||
@@ -250,6 +250,14 @@ phase2(
|
||||
} else
|
||||
do_log(_("Phase 2 - using internal log\n"));
|
||||
|
||||
+ /*
|
||||
+ * Now that we've set up the buffer cache the way we want it, try to
|
||||
+ * grab our own reference to the primary sb so that the hooks will not
|
||||
+ * have to call out to the buffer cache.
|
||||
+ */
|
||||
+ if (mp->m_buf_writeback_fn)
|
||||
+ retain_primary_sb(mp);
|
||||
+
|
||||
/* Zero log if applicable */
|
||||
do_log(_(" - zero log...\n"));
|
||||
|
||||
diff --git a/repair/protos.h b/repair/protos.h
|
||||
index 83734e8..7cdc3a1 100644
|
||||
--- a/repair/protos.h
|
||||
+++ b/repair/protos.h
|
||||
@@ -16,6 +16,7 @@ int get_sb(xfs_sb_t *sbp,
|
||||
xfs_off_t off,
|
||||
int size,
|
||||
xfs_agnumber_t agno);
|
||||
+int retain_primary_sb(struct xfs_mount *mp);
|
||||
void write_primary_sb(xfs_sb_t *sbp,
|
||||
int size);
|
||||
|
||||
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
|
||||
index e44aa40..d043643 100644
|
||||
--- a/repair/xfs_repair.c
|
||||
+++ b/repair/xfs_repair.c
|
||||
@@ -738,6 +738,63 @@ check_fs_vs_host_sectsize(
|
||||
}
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * If we set up a writeback function to set NEEDSREPAIR while the filesystem is
|
||||
+ * dirty, there's a chance that calling libxfs_getsb could deadlock the buffer
|
||||
+ * cache while trying to get the primary sb buffer if the first non-sb write to
|
||||
+ * the filesystem is the result of a cache shake. Retain a reference to the
|
||||
+ * primary sb buffer to avoid all that.
|
||||
+ */
|
||||
+static struct xfs_buf *primary_sb_bp; /* buffer for superblock */
|
||||
+
|
||||
+int
|
||||
+retain_primary_sb(
|
||||
+ struct xfs_mount *mp)
|
||||
+{
|
||||
+ int error;
|
||||
+
|
||||
+ error = -libxfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
|
||||
+ XFS_FSS_TO_BB(mp, 1), 0, &primary_sb_bp,
|
||||
+ &xfs_sb_buf_ops);
|
||||
+ if (error)
|
||||
+ return error;
|
||||
+
|
||||
+ libxfs_buf_unlock(primary_sb_bp);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+drop_primary_sb(void)
|
||||
+{
|
||||
+ if (!primary_sb_bp)
|
||||
+ return;
|
||||
+
|
||||
+ libxfs_buf_lock(primary_sb_bp);
|
||||
+ libxfs_buf_relse(primary_sb_bp);
|
||||
+ primary_sb_bp = NULL;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+get_primary_sb(
|
||||
+ struct xfs_mount *mp,
|
||||
+ struct xfs_buf **bpp)
|
||||
+{
|
||||
+ int error;
|
||||
+
|
||||
+ *bpp = NULL;
|
||||
+
|
||||
+ if (!primary_sb_bp) {
|
||||
+ error = retain_primary_sb(mp);
|
||||
+ if (error)
|
||||
+ return error;
|
||||
+ }
|
||||
+
|
||||
+ libxfs_buf_lock(primary_sb_bp);
|
||||
+ xfs_buf_hold(primary_sb_bp);
|
||||
+ *bpp = primary_sb_bp;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/* Clear needsrepair after a successful repair run. */
|
||||
void
|
||||
clear_needsrepair(
|
||||
@@ -758,15 +815,14 @@ clear_needsrepair(
|
||||
do_warn(
|
||||
_("Cannot clear needsrepair due to flush failure, err=%d.\n"),
|
||||
error);
|
||||
- return;
|
||||
+ goto drop;
|
||||
}
|
||||
|
||||
/* Clear needsrepair from the superblock. */
|
||||
- bp = libxfs_getsb(mp);
|
||||
- if (!bp || bp->b_error) {
|
||||
+ error = get_primary_sb(mp, &bp);
|
||||
+ if (error) {
|
||||
do_warn(
|
||||
- _("Cannot clear needsrepair from primary super, err=%d.\n"),
|
||||
- bp ? bp->b_error : ENOMEM);
|
||||
+ _("Cannot clear needsrepair from primary super, err=%d.\n"), error);
|
||||
} else {
|
||||
mp->m_sb.sb_features_incompat &=
|
||||
~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
|
||||
@@ -775,6 +831,8 @@ clear_needsrepair(
|
||||
}
|
||||
if (bp)
|
||||
libxfs_buf_relse(bp);
|
||||
+drop:
|
||||
+ drop_primary_sb();
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -797,11 +855,10 @@ force_needsrepair(
|
||||
xfs_sb_version_needsrepair(&mp->m_sb))
|
||||
return;
|
||||
|
||||
- bp = libxfs_getsb(mp);
|
||||
- if (!bp || bp->b_error) {
|
||||
+ error = get_primary_sb(mp, &bp);
|
||||
+ if (error) {
|
||||
do_log(
|
||||
- _("couldn't get superblock to set needsrepair, err=%d\n"),
|
||||
- bp ? bp->b_error : ENOMEM);
|
||||
+ _("couldn't get superblock to set needsrepair, err=%d\n"), error);
|
||||
} else {
|
||||
/*
|
||||
* It's possible that we need to set NEEDSREPAIR before we've
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,90 +0,0 @@
|
||||
From 79ba1e15d80eba3aff4396f44629eb8960722d36 Mon Sep 17 00:00:00 2001
|
||||
From: Srikanth C S <srikanth.c.s@oracle.com>
|
||||
Date: Tue, 13 Dec 2022 22:45:43 +0530
|
||||
Subject: [PATCH] fsck.xfs: mount/umount xfs fs to replay log before running
|
||||
xfs_repair
|
||||
|
||||
After a recent data center crash, we had to recover root filesystems
|
||||
on several thousands of VMs via a boot time fsck. Since these
|
||||
machines are remotely manageable, support can inject the kernel
|
||||
command line with 'fsck.mode=force fsck.repair=yes' to kick off
|
||||
xfs_repair if the machine won't come up or if they suspect there
|
||||
might be deeper issues with latent errors in the fs metadata, which
|
||||
is what they did to try to get everyone running ASAP while
|
||||
anticipating any future problems. But, fsck.xfs does not address the
|
||||
journal replay in case of a crash.
|
||||
|
||||
fsck.xfs does xfs_repair -e if fsck.mode=force is set. It is
|
||||
possible that when the machine crashes, the fs is in inconsistent
|
||||
state with the journal log not yet replayed. This can drop the machine
|
||||
into the rescue shell because xfs_fsck.sh does not know how to clean the
|
||||
log. Since the administrator told us to force repairs, address the
|
||||
deficiency by cleaning the log and rerunning xfs_repair.
|
||||
|
||||
Run xfs_repair -e when fsck.mode=force and repair=auto or yes.
|
||||
Replay the logs only if fsck.mode=force and fsck.repair=yes. For
|
||||
other option -fa and -f drop to the rescue shell if repair detects
|
||||
any corruptions.
|
||||
|
||||
Signed-off-by: Srikanth C S <srikanth.c.s@oracle.com>
|
||||
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
|
||||
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||
---
|
||||
fsck/xfs_fsck.sh | 31 +++++++++++++++++++++++++++++--
|
||||
1 file changed, 29 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/fsck/xfs_fsck.sh b/fsck/xfs_fsck.sh
|
||||
index 6af0f22..62a1e0b 100755
|
||||
--- a/fsck/xfs_fsck.sh
|
||||
+++ b/fsck/xfs_fsck.sh
|
||||
@@ -31,10 +31,12 @@ repair2fsck_code() {
|
||||
|
||||
AUTO=false
|
||||
FORCE=false
|
||||
+REPAIR=false
|
||||
while getopts ":aApyf" c
|
||||
do
|
||||
case $c in
|
||||
- a|A|p|y) AUTO=true;;
|
||||
+ a|A|p) AUTO=true;;
|
||||
+ y) REPAIR=true;;
|
||||
f) FORCE=true;;
|
||||
esac
|
||||
done
|
||||
@@ -64,7 +66,32 @@ fi
|
||||
|
||||
if $FORCE; then
|
||||
xfs_repair -e $DEV
|
||||
- repair2fsck_code $?
|
||||
+ error=$?
|
||||
+ if [ $error -eq 2 ] && [ $REPAIR = true ]; then
|
||||
+ echo "Replaying log for $DEV"
|
||||
+ mkdir -p /tmp/repair_mnt || exit 1
|
||||
+ for x in $(cat /proc/cmdline); do
|
||||
+ case $x in
|
||||
+ root=*)
|
||||
+ ROOT="${x#root=}"
|
||||
+ ;;
|
||||
+ rootflags=*)
|
||||
+ ROOTFLAGS="-o ${x#rootflags=}"
|
||||
+ ;;
|
||||
+ esac
|
||||
+ done
|
||||
+ test -b "$ROOT" || ROOT=$(blkid -t "$ROOT" -o device)
|
||||
+ if [ $(basename $DEV) = $(basename $ROOT) ]; then
|
||||
+ mount $DEV /tmp/repair_mnt $ROOTFLAGS || exit 1
|
||||
+ else
|
||||
+ mount $DEV /tmp/repair_mnt || exit 1
|
||||
+ fi
|
||||
+ umount /tmp/repair_mnt
|
||||
+ xfs_repair -e $DEV
|
||||
+ error=$?
|
||||
+ rm -d /tmp/repair_mnt
|
||||
+ fi
|
||||
+ repair2fsck_code $error
|
||||
exit $?
|
||||
fi
|
||||
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,33 +0,0 @@
|
||||
From 7374f58bfeb38467bab6552a47a5cd6bbe3c2e2e Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Tue, 20 Dec 2022 16:53:34 -0800
|
||||
Subject: [PATCH] xfs_db: fix dir3 block magic check
|
||||
|
||||
Fix this broken check, which (amazingly) went unnoticed until I cranked
|
||||
up the warning level /and/ built the system for s390x.
|
||||
|
||||
Fixes: e96864ff4d4 ("xfs_db: enable blockget for v5 filesystems")
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Dave Chinner <dchinner@redhat.com>
|
||||
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
|
||||
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||
---
|
||||
db/check.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/db/check.c b/db/check.c
|
||||
index bb27ce5..964756d 100644
|
||||
--- a/db/check.c
|
||||
+++ b/db/check.c
|
||||
@@ -2578,7 +2578,7 @@ process_data_dir_v2(
|
||||
error++;
|
||||
}
|
||||
if ((be32_to_cpu(data->magic) == XFS_DIR2_BLOCK_MAGIC ||
|
||||
- be32_to_cpu(data->magic) == XFS_DIR2_BLOCK_MAGIC) &&
|
||||
+ be32_to_cpu(data->magic) == XFS_DIR3_BLOCK_MAGIC) &&
|
||||
stale != be32_to_cpu(btp->stale)) {
|
||||
if (!sflag || v)
|
||||
dbprintf(_("dir %lld block %d bad stale tail count %d\n"),
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,36 +0,0 @@
|
||||
From b7b81f336ac02f4e4f24e0844a7fb3023c489667 Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Tue, 14 Mar 2023 18:01:55 -0700
|
||||
Subject: [PATCH] xfs_repair: fix incorrect dabtree hashval comparison
|
||||
|
||||
If an xattr structure contains enough names with the same hash value to
|
||||
fill multiple xattr leaf blocks with names all hashing to the same
|
||||
value, then the dabtree nodes will contain consecutive entries with the
|
||||
same hash value.
|
||||
|
||||
This causes false corruption reports in xfs_repair because it's not
|
||||
expecting such a huge same-hashing structure. Fix that.
|
||||
|
||||
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
|
||||
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||
---
|
||||
repair/da_util.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/repair/da_util.c b/repair/da_util.c
|
||||
index 7239c2e..b229422 100644
|
||||
--- a/repair/da_util.c
|
||||
+++ b/repair/da_util.c
|
||||
@@ -330,7 +330,7 @@ _("%s block used/count inconsistency - %d/%hu\n"),
|
||||
/*
|
||||
* hash values monotonically increasing ???
|
||||
*/
|
||||
- if (cursor->level[this_level].hashval >=
|
||||
+ if (cursor->level[this_level].hashval >
|
||||
be32_to_cpu(nodehdr.btree[entry].hashval)) {
|
||||
do_warn(
|
||||
_("%s block hashvalue inconsistency, expected > %u / saw %u\n"),
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -1,198 +0,0 @@
|
||||
From 983997ce4b8de736c5c10c33c6cf4e40076dcbdf Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Fri, 29 Sep 2023 09:53:36 +0000
|
||||
Subject: [PATCH 01/11] xfs: create a new inode flag to require extsize
|
||||
alignment of file data space
|
||||
|
||||
Conflict: m_feature is not supported in struct xfs_mount, some feature related
|
||||
functions are deleted or renamed
|
||||
|
||||
Add a new inode flag to require that all file data extent mappings must
|
||||
be aligned (both the file offset range and the allocated space itself)
|
||||
to the extent size hint. Having a separate COW extent size hint is no
|
||||
longer allowed.
|
||||
|
||||
The goal here is to enable sysadmins and users to mandate that all space
|
||||
mappings in a file must have a startoff/blockcount that are aligned to
|
||||
(say) a 2MB alignment and that the startblock/blockcount will follow the
|
||||
same alignment.
|
||||
|
||||
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Co-developed-by: John Garry <john.g.garry@oracle.com>
|
||||
Signed-off-by: John Garry <john.g.garry@oracle.com>
|
||||
|
||||
---
|
||||
include/linux.h | 5 +++++
|
||||
include/xfs_inode.h | 5 +++++
|
||||
libxfs/util.c | 2 ++
|
||||
libxfs/xfs_format.h | 13 ++++++++++++-
|
||||
libxfs/xfs_inode_buf.c | 40 ++++++++++++++++++++++++++++++++++++++++
|
||||
libxfs/xfs_inode_buf.h | 3 +++
|
||||
6 files changed, 67 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/linux.h b/include/linux.h
|
||||
index 2465022..d95365b 100644
|
||||
--- a/include/linux.h
|
||||
+++ b/include/linux.h
|
||||
@@ -246,6 +246,11 @@ struct fsxattr {
|
||||
#define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */
|
||||
#endif
|
||||
|
||||
+/* data extent mappings for regular files must be aligned to extent size hint */
|
||||
+#ifndef FS_XFLAG_FORCEALIGN
|
||||
+#define FS_XFLAG_FORCEALIGN 0x00020000
|
||||
+#endif
|
||||
+
|
||||
#ifdef HAVE_GETFSMAP
|
||||
# include <linux/fsmap.h>
|
||||
#else
|
||||
diff --git a/include/xfs_inode.h b/include/xfs_inode.h
|
||||
index 08a62d8..6d52a1d 100644
|
||||
--- a/include/xfs_inode.h
|
||||
+++ b/include/xfs_inode.h
|
||||
@@ -164,6 +164,11 @@ static inline bool xfs_inode_has_bigtime(struct xfs_inode *ip)
|
||||
return ip->i_diflags2 & XFS_DIFLAG2_BIGTIME;
|
||||
}
|
||||
|
||||
+static inline bool xfs_inode_forcealign(struct xfs_inode *ip)
|
||||
+{
|
||||
+ return ip->i_diflags2 & XFS_DIFLAG2_FORCEALIGN;
|
||||
+}
|
||||
+
|
||||
typedef struct cred {
|
||||
uid_t cr_uid;
|
||||
gid_t cr_gid;
|
||||
diff --git a/libxfs/util.c b/libxfs/util.c
|
||||
index 905f178..5750b3f 100644
|
||||
--- a/libxfs/util.c
|
||||
+++ b/libxfs/util.c
|
||||
@@ -204,6 +204,8 @@ xfs_flags2diflags2(
|
||||
di_flags2 |= XFS_DIFLAG2_DAX;
|
||||
if (xflags & FS_XFLAG_COWEXTSIZE)
|
||||
di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
|
||||
+ if (xflags & FS_XFLAG_FORCEALIGN)
|
||||
+ di_flags2 |= XFS_DIFLAG2_FORCEALIGN;
|
||||
|
||||
return di_flags2;
|
||||
}
|
||||
diff --git a/libxfs/xfs_format.h b/libxfs/xfs_format.h
|
||||
index 37570cf..c2db380 100644
|
||||
--- a/libxfs/xfs_format.h
|
||||
+++ b/libxfs/xfs_format.h
|
||||
@@ -450,10 +450,12 @@ xfs_sb_has_compat_feature(
|
||||
#define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */
|
||||
#define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */
|
||||
#define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3) /* inobt block counts */
|
||||
+#define XFS_SB_FEAT_RO_COMPAT_FORCEALIGN (1 << 30) /* aligned file data extents */
|
||||
#define XFS_SB_FEAT_RO_COMPAT_ALL \
|
||||
(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
|
||||
XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
|
||||
XFS_SB_FEAT_RO_COMPAT_REFLINK| \
|
||||
+ XFS_SB_FEAT_RO_COMPAT_FORCEALIGN| \
|
||||
XFS_SB_FEAT_RO_COMPAT_INOBTCNT)
|
||||
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
|
||||
static inline bool
|
||||
@@ -586,6 +588,12 @@ static inline bool xfs_sb_version_hasinobtcounts(struct xfs_sb *sbp)
|
||||
(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT);
|
||||
}
|
||||
|
||||
+static inline bool xfs_sb_version_hasforcealign(struct xfs_sb *sbp)
|
||||
+{
|
||||
+ return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
|
||||
+ (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FORCEALIGN);
|
||||
+}
|
||||
+
|
||||
static inline bool xfs_sb_version_needsrepair(struct xfs_sb *sbp)
|
||||
{
|
||||
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
|
||||
@@ -1170,15 +1178,18 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
|
||||
#define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */
|
||||
#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */
|
||||
#define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */
|
||||
+/* data extent mappings for regular files must be aligned to extent size hint */
|
||||
+#define XFS_DIFLAG2_FORCEALIGN_BIT 5
|
||||
|
||||
#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT)
|
||||
#define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT)
|
||||
#define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
|
||||
#define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT)
|
||||
+#define XFS_DIFLAG2_FORCEALIGN (1 << XFS_DIFLAG2_FORCEALIGN_BIT)
|
||||
|
||||
#define XFS_DIFLAG2_ANY \
|
||||
(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
|
||||
- XFS_DIFLAG2_BIGTIME)
|
||||
+ XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_FORCEALIGN)
|
||||
|
||||
static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip)
|
||||
{
|
||||
diff --git a/libxfs/xfs_inode_buf.c b/libxfs/xfs_inode_buf.c
|
||||
index 7ecbfad..8a89685 100644
|
||||
--- a/libxfs/xfs_inode_buf.c
|
||||
+++ b/libxfs/xfs_inode_buf.c
|
||||
@@ -551,6 +551,14 @@ xfs_dinode_verify(
|
||||
!xfs_sb_version_hasbigtime(&mp->m_sb))
|
||||
return __this_address;
|
||||
|
||||
+ if (flags2 & XFS_DIFLAG2_FORCEALIGN) {
|
||||
+ fa = xfs_inode_validate_forcealign(mp, mode, flags,
|
||||
+ be32_to_cpu(dip->di_extsize),
|
||||
+ be32_to_cpu(dip->di_cowextsize));
|
||||
+ if (fa)
|
||||
+ return fa;
|
||||
+ }
|
||||
+
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -718,3 +726,35 @@ xfs_inode_validate_cowextsize(
|
||||
|
||||
return NULL;
|
||||
}
|
||||
+
|
||||
+/* Validate the forcealign inode flag */
|
||||
+xfs_failaddr_t
|
||||
+xfs_inode_validate_forcealign(
|
||||
+ struct xfs_mount *mp,
|
||||
+ uint16_t mode,
|
||||
+ uint16_t flags,
|
||||
+ uint32_t extsize,
|
||||
+ uint32_t cowextsize)
|
||||
+{
|
||||
+ /* superblock rocompat feature flag */
|
||||
+ if (!xfs_sb_version_hasforcealign(&mp->m_sb))
|
||||
+ return __this_address;
|
||||
+
|
||||
+ /* Only regular files and directories */
|
||||
+ if (!S_ISDIR(mode) && !S_ISREG(mode))
|
||||
+ return __this_address;
|
||||
+
|
||||
+ /* Doesn't apply to realtime files */
|
||||
+ if (flags & XFS_DIFLAG_REALTIME)
|
||||
+ return __this_address;
|
||||
+
|
||||
+ /* Requires a nonzero extent size hint */
|
||||
+ if (extsize == 0)
|
||||
+ return __this_address;
|
||||
+
|
||||
+ /* Requires no cow extent size hint */
|
||||
+ if (cowextsize != 0)
|
||||
+ return __this_address;
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
diff --git a/libxfs/xfs_inode_buf.h b/libxfs/xfs_inode_buf.h
|
||||
index 7f865bb..56ed6d7 100644
|
||||
--- a/libxfs/xfs_inode_buf.h
|
||||
+++ b/libxfs/xfs_inode_buf.h
|
||||
@@ -33,6 +33,9 @@ xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp,
|
||||
xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp,
|
||||
uint32_t cowextsize, uint16_t mode, uint16_t flags,
|
||||
uint64_t flags2);
|
||||
+xfs_failaddr_t xfs_inode_validate_forcealign(struct xfs_mount *mp,
|
||||
+ uint16_t mode, uint16_t flags, uint32_t extsize,
|
||||
+ uint32_t cowextsize);
|
||||
|
||||
static inline uint64_t xfs_inode_encode_bigtime(struct timespec64 tv)
|
||||
{
|
||||
--
|
||||
2.33.0
|
||||
@ -1,49 +0,0 @@
|
||||
From 066996554fe69ee6e8b2acfc59013414adc9669c Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Fri, 29 Sep 2023 09:53:37 +0000
|
||||
Subject: [PATCH 02/11] xfs: allow files to require data mappings to be aligned
|
||||
to extszhint
|
||||
|
||||
Conflict: xfs_bmapi_allocate is refactored in v6.4, so delete some code related
|
||||
to alignment.
|
||||
|
||||
Add a new inode flag to require that all file data extent mappings must
|
||||
be aligned (both the file offset range and the allocated space itself)
|
||||
to the extent size hint. Having a separate COW extent size hint is no
|
||||
longer allowed.
|
||||
|
||||
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Co-developed-by: John Garry <john.g.garry@oracle.com>
|
||||
Signed-off-by: John Garry <john.g.garry@oracle.com>
|
||||
|
||||
---
|
||||
libxfs/xfs_bmap.c | 13 +++++++++++++
|
||||
1 file changed, 13 insertions(+)
|
||||
|
||||
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
|
||||
index 16dbe71..303370e 100644
|
||||
--- a/libxfs/xfs_bmap.c
|
||||
+++ b/libxfs/xfs_bmap.c
|
||||
@@ -3428,6 +3428,19 @@ xfs_bmap_compute_alignments(
|
||||
align = xfs_get_cowextsz_hint(ap->ip);
|
||||
else if (ap->datatype & XFS_ALLOC_USERDATA)
|
||||
align = xfs_get_extsz_hint(ap->ip);
|
||||
+
|
||||
+ /*
|
||||
+ * xfs_get_cowextsz_hint() returns extsz_hint for when forcealign is
|
||||
+ * set as forcealign and cowextsz_hint are mutually exclusive
|
||||
+ */
|
||||
+ if (xfs_inode_forcealign(ap->ip) && align) {
|
||||
+ args->alignment = align;
|
||||
+ if (stripe_align % align)
|
||||
+ stripe_align = align;
|
||||
+ } else {
|
||||
+ args->alignment = 1;
|
||||
+ }
|
||||
+
|
||||
if (align) {
|
||||
if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
|
||||
ap->eof, 0, ap->conv, &ap->offset,
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,45 +0,0 @@
|
||||
From a26e452c3e0cc84df8abd7cc8aabe4bc6d2f504d Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Fri, 29 Sep 2023 09:53:38 +0000
|
||||
Subject: [PATCH 03/11] xfs_db: expose force_align feature and flags
|
||||
|
||||
Expose the superblock feature and inode flags.
|
||||
|
||||
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Signed-off-by: John Garry <john.g.garry@oracle.com>
|
||||
|
||||
---
|
||||
db/inode.c | 3 +++
|
||||
db/sb.c | 2 ++
|
||||
2 files changed, 5 insertions(+)
|
||||
|
||||
diff --git a/db/inode.c b/db/inode.c
|
||||
index 3453c08..a859196 100644
|
||||
--- a/db/inode.c
|
||||
+++ b/db/inode.c
|
||||
@@ -178,6 +178,9 @@ const field_t inode_v3_flds[] = {
|
||||
{ "bigtime", FLDT_UINT1,
|
||||
OI(COFF(flags2) + bitsz(uint64_t) - XFS_DIFLAG2_BIGTIME_BIT - 1), C1,
|
||||
0, TYP_NONE },
|
||||
+ { "forcealign", FLDT_UINT1,
|
||||
+ OI(COFF(flags2) + bitsz(uint64_t) - XFS_DIFLAG2_FORCEALIGN_BIT-1), C1,
|
||||
+ 0, TYP_NONE },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
diff --git a/db/sb.c b/db/sb.c
|
||||
index cec7dce..200deac 100644
|
||||
--- a/db/sb.c
|
||||
+++ b/db/sb.c
|
||||
@@ -704,6 +704,8 @@ version_string(
|
||||
strcat(s, ",BIGTIME");
|
||||
if (xfs_sb_version_needsrepair(sbp))
|
||||
strcat(s, ",NEEDSREPAIR");
|
||||
+ if (xfs_sb_version_hasforcealign(sbp))
|
||||
+ strcat(s, ",FORCEALIGN");
|
||||
return s;
|
||||
}
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,68 +0,0 @@
|
||||
From 0825de7d2255ae25d7571a95244cc29095b044fe Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Fri, 29 Sep 2023 09:53:39 +0000
|
||||
Subject: [PATCH 04/11] xfs_io: implement lsattr and chattr support for
|
||||
forcealign
|
||||
|
||||
Make it so that we can adjust the forcealign flag at runtime.
|
||||
|
||||
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Co-developed-by: John Garry <john.g.garry@oracle.com>
|
||||
Signed-off-by: John Garry <john.g.garry@oracle.com>
|
||||
---
|
||||
io/attr.c | 5 ++++-
|
||||
man/man2/ioctl_xfs_fsgetxattr.2 | 6 ++++++
|
||||
2 files changed, 10 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/io/attr.c b/io/attr.c
|
||||
index fd82a2e..248a9c2 100644
|
||||
--- a/io/attr.c
|
||||
+++ b/io/attr.c
|
||||
@@ -38,9 +38,10 @@ static struct xflags {
|
||||
{ FS_XFLAG_DAX, "x", "dax" },
|
||||
{ FS_XFLAG_COWEXTSIZE, "C", "cowextsize" },
|
||||
{ FS_XFLAG_HASATTR, "X", "has-xattr" },
|
||||
+ { FS_XFLAG_FORCEALIGN, "F", "force-align" },
|
||||
{ 0, NULL, NULL }
|
||||
};
|
||||
-#define CHATTR_XFLAG_LIST "r"/*p*/"iasAdtPneEfSxC"/*X*/
|
||||
+#define CHATTR_XFLAG_LIST "r"/*p*/"iasAdtPneEfSxC"/*X*/"F"
|
||||
|
||||
static void
|
||||
lsattr_help(void)
|
||||
@@ -67,6 +68,7 @@ lsattr_help(void)
|
||||
" x -- Use direct access (DAX) for data in this file\n"
|
||||
" C -- for files with shared blocks, observe the inode CoW extent size value\n"
|
||||
" X -- file has extended attributes (cannot be changed using chattr)\n"
|
||||
+" F -- data extent mappings must be aligned to extent size hint\n"
|
||||
"\n"
|
||||
" Options:\n"
|
||||
" -R -- recursively descend (useful when current file is a directory)\n"
|
||||
@@ -104,6 +106,7 @@ chattr_help(void)
|
||||
" +/-S -- set/clear the filestreams allocator flag\n"
|
||||
" +/-x -- set/clear the direct access (DAX) flag\n"
|
||||
" +/-C -- set/clear the CoW extent-size flag\n"
|
||||
+" +/-F -- set/clear the forcealign flag\n"
|
||||
" Note1: user must have certain capabilities to modify immutable/append-only.\n"
|
||||
" Note2: immutable/append-only files cannot be deleted; removing these files\n"
|
||||
" requires the immutable/append-only flag to be cleared first.\n"
|
||||
diff --git a/man/man2/ioctl_xfs_fsgetxattr.2 b/man/man2/ioctl_xfs_fsgetxattr.2
|
||||
index 2c626a7..d97fb1b 100644
|
||||
--- a/man/man2/ioctl_xfs_fsgetxattr.2
|
||||
+++ b/man/man2/ioctl_xfs_fsgetxattr.2
|
||||
@@ -200,6 +200,12 @@ below).
|
||||
If set on a directory, new files and subdirectories created in the directory
|
||||
will have both the flag and the CoW extent size value set.
|
||||
.TP
|
||||
+.B XFS_XFLAG_FORCEALIGN
|
||||
+Force Alignment bit - requires that all file data extents must be aligned
|
||||
+to the extent size hint value.
|
||||
+If set on a directory, new files and subdirectories created in the directory
|
||||
+will have the flag set.
|
||||
+.TP
|
||||
.B XFS_XFLAG_HASATTR
|
||||
The file has extended attributes associated with it.
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,101 +0,0 @@
|
||||
From 2a14aafc618a9878d45240c4f0c1f0c7dc8f3d6b Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Fri, 29 Sep 2023 09:53:40 +0000
|
||||
Subject: [PATCH 05/11] xfs_repair: check the force-align flag
|
||||
|
||||
Make sure the flag isn't set incorrectly.
|
||||
|
||||
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Signed-off-by: John Garry <john.g.garry@oracle.com>
|
||||
|
||||
---
|
||||
repair/dinode.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 66 insertions(+)
|
||||
|
||||
diff --git a/repair/dinode.c b/repair/dinode.c
|
||||
index f39ab2d..7747070 100644
|
||||
--- a/repair/dinode.c
|
||||
+++ b/repair/dinode.c
|
||||
@@ -2229,6 +2229,69 @@ _("Bad extent size hint %u on inode %" PRIu64 ", "),
|
||||
}
|
||||
}
|
||||
|
||||
+static void
|
||||
+validate_forcealign(
|
||||
+ struct xfs_mount *mp,
|
||||
+ struct xfs_dinode *dino,
|
||||
+ xfs_ino_t lino,
|
||||
+ int *dirty)
|
||||
+{
|
||||
+ uint16_t mode;
|
||||
+ uint16_t flags;
|
||||
+ uint64_t flags2;
|
||||
+
|
||||
+ mode = be16_to_cpu(dino->di_mode);
|
||||
+ flags = be16_to_cpu(dino->di_flags);
|
||||
+ flags2 = be64_to_cpu(dino->di_flags2);
|
||||
+
|
||||
+ if (!(flags2 & XFS_DIFLAG2_FORCEALIGN))
|
||||
+ return;
|
||||
+
|
||||
+ if (!xfs_sb_version_hasforcealign(&mp->m_sb)) {
|
||||
+ do_warn(
|
||||
+ _("Filesystem does not support forcealign flag set on inode %" PRIu64 ", "),
|
||||
+ lino);
|
||||
+ goto zap;
|
||||
+ }
|
||||
+
|
||||
+ if (!S_ISDIR(mode) && !S_ISREG(mode)) {
|
||||
+ do_warn(
|
||||
+ _("Cannot have forcealign inode flag set on non-dir non-regular file inode %" PRIu64 "\n"),
|
||||
+ lino);
|
||||
+ goto zap;
|
||||
+ }
|
||||
+
|
||||
+ if (flags & XFS_DIFLAG_REALTIME) {
|
||||
+ do_warn(
|
||||
+ _("Cannot have forcealign inode flag set on realtime inode %" PRIu64 "\n"),
|
||||
+ lino);
|
||||
+ goto zap;
|
||||
+ }
|
||||
+
|
||||
+ if (dino->di_extsize == 0) {
|
||||
+ do_warn(
|
||||
+ _("Cannot have forcealign inode flag set without an extent size hint on inode %" PRIu64 "\n"),
|
||||
+ lino);
|
||||
+ goto zap;
|
||||
+ }
|
||||
+
|
||||
+ if (dino->di_cowextsize != 0) {
|
||||
+ do_warn(
|
||||
+ _("Cannot have forcealign inode flag set with nonzero CoW extent size hint on inode %" PRIu64 "\n"),
|
||||
+ lino);
|
||||
+ goto zap;
|
||||
+ }
|
||||
+
|
||||
+ return;
|
||||
+zap:
|
||||
+ if (!no_modify) {
|
||||
+ do_warn(_("clearing flag\n"));
|
||||
+ dino->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_FORCEALIGN);
|
||||
+ *dirty = 1;
|
||||
+ } else
|
||||
+ do_warn(_("would clear flag\n"));
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* returns 0 if the inode is ok, 1 if the inode is corrupt
|
||||
* check_dups can be set to 1 *only* when called by the
|
||||
@@ -2764,6 +2827,9 @@ _("Bad CoW extent size %u on inode %" PRIu64 ", "),
|
||||
do_warn(_("would reset to zero\n"));
|
||||
}
|
||||
|
||||
+ if (dino->di_version >= 3)
|
||||
+ validate_forcealign(mp, dino, lino, dirty);
|
||||
+
|
||||
/* nsec fields cannot be larger than 1 billion */
|
||||
check_nsec("atime", lino, dino, &dino->di_atime, dirty);
|
||||
check_nsec("mtime", lino, dino, &dino->di_mtime, dirty);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,165 +0,0 @@
|
||||
From 53e0ead4c1a522f42486e47925b4c224945c6cda Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Fri, 29 Sep 2023 09:53:41 +0000
|
||||
Subject: [PATCH 06/11] mkfs: add an extsize= option that allows units
|
||||
|
||||
Add a new mkfs option that allows the user to specify an extent size
|
||||
hint with units. This removes the need to specify the option in
|
||||
filesystem block size, which eases the computation requirements in
|
||||
deployment scripts.
|
||||
|
||||
# mkfs.xfs -d extsize=2m /dev/sda
|
||||
|
||||
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Signed-off-by: John Garry <john.g.garry@oracle.com>
|
||||
|
||||
---
|
||||
man/man8/mkfs.xfs.8 | 15 ++++++++++++++
|
||||
mkfs/xfs_mkfs.c | 48 +++++++++++++++++++++++++++++++++++++++++++--
|
||||
2 files changed, 61 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/man/man8/mkfs.xfs.8 b/man/man8/mkfs.xfs.8
|
||||
index b2ffb3d..5cd69fa 100644
|
||||
--- a/man/man8/mkfs.xfs.8
|
||||
+++ b/man/man8/mkfs.xfs.8
|
||||
@@ -479,6 +479,18 @@ will be assigned the project quota id provided in
|
||||
Directories will pass on the project id to newly created regular files and
|
||||
directories.
|
||||
.TP
|
||||
+.BI extsize= num
|
||||
+All inodes created by
|
||||
+.B mkfs.xfs
|
||||
+will have this
|
||||
+.I value
|
||||
+extent size hint applied.
|
||||
+Directories will pass on this hint to newly created regular files and
|
||||
+directories.
|
||||
+This option cannot be combined with the
|
||||
+.B extszinherit
|
||||
+option.
|
||||
+.TP
|
||||
.BI extszinherit= value
|
||||
All inodes created by
|
||||
.B mkfs.xfs
|
||||
@@ -488,6 +500,9 @@ extent size hint applied.
|
||||
The value must be provided in units of filesystem blocks.
|
||||
Directories will pass on this hint to newly created regular files and
|
||||
directories.
|
||||
+This option cannot be combined with the
|
||||
+.B extsize
|
||||
+option.
|
||||
.TP
|
||||
.BI daxinherit= value
|
||||
If
|
||||
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
|
||||
index 2eb3a0a..fb0c53f 100644
|
||||
--- a/mkfs/xfs_mkfs.c
|
||||
+++ b/mkfs/xfs_mkfs.c
|
||||
@@ -66,6 +66,7 @@ enum {
|
||||
D_NOALIGN,
|
||||
D_RTINHERIT,
|
||||
D_PROJINHERIT,
|
||||
+ D_EXTSIZE,
|
||||
D_EXTSZINHERIT,
|
||||
D_COWEXTSIZE,
|
||||
D_DAXINHERIT,
|
||||
@@ -300,6 +301,7 @@ static struct opt_params dopts = {
|
||||
[D_NOALIGN] = "noalign",
|
||||
[D_RTINHERIT] = "rtinherit",
|
||||
[D_PROJINHERIT] = "projinherit",
|
||||
+ [D_EXTSIZE] = "extsize",
|
||||
[D_EXTSZINHERIT] = "extszinherit",
|
||||
[D_COWEXTSIZE] = "cowextsize",
|
||||
[D_DAXINHERIT] = "daxinherit",
|
||||
@@ -407,8 +409,17 @@ static struct opt_params dopts = {
|
||||
.maxval = UINT_MAX,
|
||||
.defaultval = SUBOPT_NEEDS_VAL,
|
||||
},
|
||||
+ { .index = D_EXTSIZE,
|
||||
+ .conflicts = { { &dopts, D_EXTSZINHERIT },
|
||||
+ { NULL, LAST_CONFLICT } },
|
||||
+ .convert = true,
|
||||
+ .minval = 0,
|
||||
+ .maxval = XFS_AG_MAX_BYTES,
|
||||
+ .defaultval = SUBOPT_NEEDS_VAL,
|
||||
+ },
|
||||
{ .index = D_EXTSZINHERIT,
|
||||
- .conflicts = { { NULL, LAST_CONFLICT } },
|
||||
+ .conflicts = { { &dopts, D_EXTSIZE },
|
||||
+ { NULL, LAST_CONFLICT } },
|
||||
.minval = 0,
|
||||
.maxval = UINT_MAX,
|
||||
.defaultval = SUBOPT_NEEDS_VAL,
|
||||
@@ -835,6 +846,7 @@ struct cli_params {
|
||||
char *lsu;
|
||||
char *rtextsize;
|
||||
char *rtsize;
|
||||
+ char *extsize;
|
||||
|
||||
/* parameters where 0 is a valid CLI value */
|
||||
int dsunit;
|
||||
@@ -945,7 +957,7 @@ usage( void )
|
||||
inobtcount=0|1,bigtime=0|1]\n\
|
||||
/* data subvol */ [-d agcount=n,agsize=n,file,name=xxx,size=num,\n\
|
||||
(sunit=value,swidth=value|su=num,sw=num|noalign),\n\
|
||||
- sectsize=num\n\
|
||||
+ sectsize=num,extsize=num\n\
|
||||
/* force overwrite */ [-f]\n\
|
||||
/* inode size */ [-i perblock=n|size=num,maxpct=n,attr=0|1|2,\n\
|
||||
projid32bit=0|1,sparse=0|1]\n\
|
||||
@@ -1553,6 +1565,9 @@ data_opts_parser(
|
||||
cli->fsx.fsx_projid = getnum(value, opts, subopt);
|
||||
cli->fsx.fsx_xflags |= FS_XFLAG_PROJINHERIT;
|
||||
break;
|
||||
+ case D_EXTSIZE:
|
||||
+ cli->extsize = getstr(value, opts, subopt);
|
||||
+ break;
|
||||
case D_EXTSZINHERIT:
|
||||
cli->fsx.fsx_extsize = getnum(value, opts, subopt);
|
||||
if (cli->fsx.fsx_extsize)
|
||||
@@ -2002,6 +2017,33 @@ _("Minimum block size for CRC enabled filesystems is %d bytes.\n"),
|
||||
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Convert the -d extsize= option to a number, then set the extent size hint
|
||||
+ * to that number.
|
||||
+ */
|
||||
+static void
|
||||
+set_extsize(
|
||||
+ struct cli_params *cli,
|
||||
+ char *extsize,
|
||||
+ struct opt_params *opts,
|
||||
+ int subopt)
|
||||
+{
|
||||
+ uint64_t extsz_bytes;
|
||||
+ if (!extsize)
|
||||
+ return;
|
||||
+
|
||||
+ extsz_bytes = getnum(extsize, opts, subopt);
|
||||
+ if (extsz_bytes % blocksize)
|
||||
+ illegal_option(extsize, opts, subopt,
|
||||
+ _("Value must be a multiple of block size."));
|
||||
+
|
||||
+ cli->fsx.fsx_extsize = extsz_bytes / blocksize;
|
||||
+ if (cli->fsx.fsx_extsize)
|
||||
+ cli->fsx.fsx_xflags |= FS_XFLAG_EXTSZINHERIT;
|
||||
+ else
|
||||
+ cli->fsx.fsx_xflags &= ~FS_XFLAG_EXTSZINHERIT;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Grab log sector size and validate.
|
||||
*
|
||||
@@ -4081,6 +4123,8 @@ main(
|
||||
blocksize = cfg.blocksize;
|
||||
sectorsize = cfg.sectorsize;
|
||||
|
||||
+ set_extsize(&cli, cli.extsize, &dopts, D_EXTSIZE);
|
||||
+
|
||||
validate_log_sectorsize(&cfg, &cli, &dft);
|
||||
validate_sb_features(&cfg, &cli);
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,314 +0,0 @@
|
||||
From 2c0500e6036dc996ea9553c9d56b26f54d815e45 Mon Sep 17 00:00:00 2001
|
||||
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Date: Fri, 29 Sep 2023 09:53:42 +0000
|
||||
Subject: [PATCH 07/11] mkfs: enable the new force-align feature
|
||||
|
||||
Make it so that we can create filesystems with the forcealign feature
|
||||
turned on.
|
||||
|
||||
jpg: enforce extsize must be a power-of-2 for forcealign, relocate
|
||||
is_power_of_2() to be accessible for mkfs
|
||||
|
||||
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
|
||||
Signed-off-by: John Garry <john.g.garry@oracle.com>
|
||||
|
||||
---
|
||||
include/libxfs.h | 6 +++
|
||||
libxfs/libxfs_priv.h | 6 ---
|
||||
man/man8/mkfs.xfs.8 | 15 ++++++
|
||||
mkfs/xfs_mkfs.c | 124 ++++++++++++++++++++++++++++++++++++++++++-
|
||||
4 files changed, 143 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/include/libxfs.h b/include/libxfs.h
|
||||
index 24424d0..cf4baff 100644
|
||||
--- a/include/libxfs.h
|
||||
+++ b/include/libxfs.h
|
||||
@@ -43,6 +43,12 @@ struct iomap;
|
||||
#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
|
||||
#define unlikely(x) (x)
|
||||
|
||||
+static inline __attribute__((const))
|
||||
+int is_power_of_2(unsigned long n)
|
||||
+{
|
||||
+ return (n != 0 && ((n & (n - 1)) == 0));
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* This mirrors the kernel include for xfs_buf.h - it's implicitly included in
|
||||
* every files via a similar include in the kernel xfs_linux.h.
|
||||
diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h
|
||||
index 15bae1f..0dc6627 100644
|
||||
--- a/libxfs/libxfs_priv.h
|
||||
+++ b/libxfs/libxfs_priv.h
|
||||
@@ -369,12 +369,6 @@ find_next_zero_bit(const unsigned long *addr, unsigned long size,
|
||||
}
|
||||
#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
|
||||
|
||||
-static inline __attribute__((const))
|
||||
-int is_power_of_2(unsigned long n)
|
||||
-{
|
||||
- return (n != 0 && ((n & (n - 1)) == 0));
|
||||
-}
|
||||
-
|
||||
/*
|
||||
* xfs_iroundup: round up argument to next power of two
|
||||
*/
|
||||
diff --git a/man/man8/mkfs.xfs.8 b/man/man8/mkfs.xfs.8
|
||||
index 5cd69fa..8f159a4 100644
|
||||
--- a/man/man8/mkfs.xfs.8
|
||||
+++ b/man/man8/mkfs.xfs.8
|
||||
@@ -654,6 +654,21 @@ space over time such that no free extents are large enough to
|
||||
accommodate a chunk of 64 inodes. Without this feature enabled, inode
|
||||
allocations can fail with out of space errors under severe fragmented
|
||||
free space conditions.
|
||||
+.TP
|
||||
+.BI forcealign[= value]
|
||||
+If
|
||||
+.B value
|
||||
+is 1, mark the root directory so that all file data extent allocations will be
|
||||
+aligned to the extent size hint.
|
||||
+These allocations will be mapped into the file range at offsets that are
|
||||
+aligned to the extent size hint.
|
||||
+The
|
||||
+.B extszinherit
|
||||
+option must be specified.
|
||||
+The
|
||||
+.B cowextsize
|
||||
+option must not be specified.
|
||||
+This feature is only available for filesystems formatted with -m crc=1.
|
||||
.RE
|
||||
.PP
|
||||
.PD 0
|
||||
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
|
||||
index fb0c53f..1253ece 100644
|
||||
--- a/mkfs/xfs_mkfs.c
|
||||
+++ b/mkfs/xfs_mkfs.c
|
||||
@@ -81,6 +81,7 @@ enum {
|
||||
I_ATTR,
|
||||
I_PROJID32BIT,
|
||||
I_SPINODES,
|
||||
+ I_FORCEALIGN,
|
||||
I_MAX_OPTS,
|
||||
};
|
||||
|
||||
@@ -451,6 +452,7 @@ static struct opt_params iopts = {
|
||||
[I_ATTR] = "attr",
|
||||
[I_PROJID32BIT] = "projid32bit",
|
||||
[I_SPINODES] = "sparse",
|
||||
+ [I_FORCEALIGN] = "forcealign",
|
||||
[I_MAX_OPTS] = NULL,
|
||||
},
|
||||
.subopt_params = {
|
||||
@@ -500,6 +502,12 @@ static struct opt_params iopts = {
|
||||
.maxval = 1,
|
||||
.defaultval = 1,
|
||||
},
|
||||
+ { .index = I_FORCEALIGN,
|
||||
+ .conflicts = { { NULL, LAST_CONFLICT } },
|
||||
+ .minval = 0,
|
||||
+ .maxval = 1,
|
||||
+ .defaultval = 1,
|
||||
+ },
|
||||
},
|
||||
};
|
||||
|
||||
@@ -829,6 +837,7 @@ struct sb_feat_args {
|
||||
bool bigtime; /* XFS_SB_FEAT_INCOMPAT_BIGTIME */
|
||||
bool nodalign;
|
||||
bool nortalign;
|
||||
+ bool forcealign; /* XFS_SB_FEAT_RO_COMPAT_FORCEALIGN */
|
||||
};
|
||||
|
||||
struct cli_params {
|
||||
@@ -961,6 +970,7 @@ usage( void )
|
||||
/* force overwrite */ [-f]\n\
|
||||
/* inode size */ [-i perblock=n|size=num,maxpct=n,attr=0|1|2,\n\
|
||||
projid32bit=0|1,sparse=0|1]\n\
|
||||
+ forcealign=0|1\n\
|
||||
/* no discard */ [-K]\n\
|
||||
/* log subvol */ [-l agnum=n,internal,size=num,logdev=xxx,version=n\n\
|
||||
sunit=value|su=num,sectsize=num,lazy-count=0|1]\n\
|
||||
@@ -1601,6 +1611,8 @@ inode_opts_parser(
|
||||
const char *value,
|
||||
struct cli_params *cli)
|
||||
{
|
||||
+ long long val;
|
||||
+
|
||||
switch (subopt) {
|
||||
case I_ALIGN:
|
||||
cli->sb_feat.inode_align = getnum(value, opts, subopt);
|
||||
@@ -1623,6 +1635,17 @@ inode_opts_parser(
|
||||
case I_SPINODES:
|
||||
cli->sb_feat.spinodes = getnum(value, opts, subopt);
|
||||
break;
|
||||
+ case I_FORCEALIGN:
|
||||
+ val = getnum(value, opts, subopt);
|
||||
+
|
||||
+ if (val == 1) {
|
||||
+ cli->sb_feat.forcealign = true;
|
||||
+ cli->fsx.fsx_xflags |= FS_XFLAG_FORCEALIGN;
|
||||
+ } else {
|
||||
+ cli->sb_feat.forcealign = false;
|
||||
+ cli->fsx.fsx_xflags &= ~FS_XFLAG_FORCEALIGN;
|
||||
+ }
|
||||
+ break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -2228,6 +2251,13 @@ _("timestamps later than 2038 not supported without CRC support\n"));
|
||||
usage();
|
||||
}
|
||||
cli->sb_feat.bigtime = false;
|
||||
+
|
||||
+ if (cli->sb_feat.forcealign) {
|
||||
+ fprintf(stderr,
|
||||
+_("forced file data alignment not supported without CRC support\n"));
|
||||
+ usage();
|
||||
+ }
|
||||
+ cli->sb_feat.forcealign = false;
|
||||
}
|
||||
|
||||
if (!cli->sb_feat.finobt) {
|
||||
@@ -2262,6 +2292,13 @@ _("cowextsize not supported without reflink support\n"));
|
||||
usage();
|
||||
}
|
||||
|
||||
+ if ((cli->fsx.fsx_xflags & FS_XFLAG_FORCEALIGN) &&
|
||||
+ (cli->fsx.fsx_cowextsize > 0 || cli->fsx.fsx_extsize == 0 || !is_power_of_2(cli->fsx.fsx_extsize))) {
|
||||
+ fprintf(stderr,
|
||||
+_("forcealign requires a non-zero power-of-2 extent size hint and no cow extent size hint\n"));
|
||||
+ usage();
|
||||
+ }
|
||||
+
|
||||
/*
|
||||
* Copy features across to config structure now.
|
||||
*/
|
||||
@@ -2511,6 +2548,34 @@ _("illegal CoW extent size hint %lld, must be less than %u.\n"),
|
||||
}
|
||||
}
|
||||
|
||||
+/* Validate the incoming forcealign flag. */
|
||||
+static void
|
||||
+validate_forcealign(
|
||||
+ struct xfs_mount *mp,
|
||||
+ struct cli_params *cli)
|
||||
+{
|
||||
+ if (!(cli->fsx.fsx_xflags & FS_XFLAG_FORCEALIGN))
|
||||
+ return;
|
||||
+
|
||||
+ if (cli->fsx.fsx_cowextsize != 0) {
|
||||
+ fprintf(stderr,
|
||||
+_("cannot set CoW extent size hint when forcealign is set.\n"));
|
||||
+ usage();
|
||||
+ }
|
||||
+
|
||||
+ if (cli->fsx.fsx_extsize == 0) {
|
||||
+ fprintf(stderr,
|
||||
+_("cannot set forcealign without an extent size hint.\n"));
|
||||
+ usage();
|
||||
+ }
|
||||
+
|
||||
+ if (cli->fsx.fsx_xflags & (FS_XFLAG_REALTIME | FS_XFLAG_RTINHERIT)) {
|
||||
+ fprintf(stderr,
|
||||
+_("cannot set forcealign and realtime flags.\n"));
|
||||
+ usage();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Validate the configured stripe geometry, or is none is specified, pull
|
||||
* the configuration from the underlying device.
|
||||
@@ -2978,11 +3043,63 @@ _("agsize (%s) not a multiple of fs blk size (%d)\n"),
|
||||
*/
|
||||
static void
|
||||
align_ag_geometry(
|
||||
- struct mkfs_params *cfg)
|
||||
+ struct mkfs_params *cfg,
|
||||
+ struct cli_params *cli)
|
||||
{
|
||||
uint64_t tmp_agsize;
|
||||
int dsunit = cfg->dsunit;
|
||||
|
||||
+ /*
|
||||
+ * If the sysadmin wants to force all file data space mappings to be
|
||||
+ * aligned to the extszinherit value, then we need the AGs to be
|
||||
+ * aligned to the same value. Skip these checks if the extent size
|
||||
+ * hint is zero; the extszinherit validation will fail the format
|
||||
+ * later.
|
||||
+ */
|
||||
+ if (cli->sb_feat.forcealign && cli->fsx.fsx_extsize != 0) {
|
||||
+ /* Perfect alignment; we're done. */
|
||||
+ if (cfg->agsize % cli->fsx.fsx_extsize == 0)
|
||||
+ goto validate;
|
||||
+
|
||||
+ /*
|
||||
+ * Round up to file extent size boundary. Make sure that
|
||||
+ * agsize is still larger than XFS_AG_MIN_BLOCKS(blocklog).
|
||||
+ */
|
||||
+ tmp_agsize = ((cfg->agsize + cli->fsx.fsx_extsize - 1) /
|
||||
+ cli->fsx.fsx_extsize) * cli->fsx.fsx_extsize;
|
||||
+
|
||||
+ /*
|
||||
+ * Round down to file extent size boundary if rounding up
|
||||
+ * created an AG size that is larger than the AG max.
|
||||
+ */
|
||||
+ if (tmp_agsize > XFS_AG_MAX_BLOCKS(cfg->blocklog))
|
||||
+ tmp_agsize = (cfg->agsize / cli->fsx.fsx_extsize) *
|
||||
+ cli->fsx.fsx_extsize;
|
||||
+
|
||||
+ if (tmp_agsize < XFS_AG_MIN_BLOCKS(cfg->blocklog) &&
|
||||
+ tmp_agsize > XFS_AG_MAX_BLOCKS(cfg->blocklog)) {
|
||||
+ /*
|
||||
+ * Set the agsize to the invalid value so the following
|
||||
+ * validation of the ag will fail and print a nice error
|
||||
+ * and exit.
|
||||
+ */
|
||||
+ cfg->agsize = tmp_agsize;
|
||||
+ goto validate;
|
||||
+ }
|
||||
+
|
||||
+ /* Update geometry to be file extent size aligned */
|
||||
+ cfg->agsize = tmp_agsize;
|
||||
+ if (!cli_opt_set(&dopts, D_AGCOUNT))
|
||||
+ cfg->agcount = cfg->dblocks / cfg->agsize +
|
||||
+ (cfg->dblocks % cfg->agsize != 0);
|
||||
+
|
||||
+ if (cli_opt_set(&dopts, D_AGSIZE))
|
||||
+ fprintf(stderr,
|
||||
+_("agsize rounded to %lld, extszhint = %d\n"),
|
||||
+ (long long)cfg->agsize, cli->fsx.fsx_extsize);
|
||||
+ goto validate;
|
||||
+ }
|
||||
+
|
||||
if (!dsunit)
|
||||
goto validate;
|
||||
|
||||
@@ -3202,6 +3319,8 @@ sb_set_features(
|
||||
sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_REFLINK;
|
||||
if (fp->inobtcnt)
|
||||
sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_INOBTCNT;
|
||||
+ if (fp->forcealign)
|
||||
+ sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_FORCEALIGN;
|
||||
if (fp->bigtime)
|
||||
sbp->sb_features_incompat |= XFS_SB_FEAT_INCOMPAT_BIGTIME;
|
||||
|
||||
@@ -4164,7 +4283,7 @@ main(
|
||||
* aligns to device geometry correctly.
|
||||
*/
|
||||
calculate_initial_ag_geometry(&cfg, &cli);
|
||||
- align_ag_geometry(&cfg);
|
||||
+ align_ag_geometry(&cfg, &cli);
|
||||
|
||||
calculate_imaxpct(&cfg, &cli);
|
||||
|
||||
@@ -4187,6 +4306,7 @@ main(
|
||||
/* Validate the extent size hints now that @mp is fully set up. */
|
||||
validate_extsize_hint(mp, &cli);
|
||||
validate_cowextsize_hint(mp, &cli);
|
||||
+ validate_forcealign(mp, &cli);
|
||||
|
||||
/* Print the intended geometry of the fs. */
|
||||
if (!quiet || dry_run) {
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,193 +0,0 @@
|
||||
From c85d383b0bb1d86c676ccf817dd22ad43b589b61 Mon Sep 17 00:00:00 2001
|
||||
From: John Garry <john.g.garry@oracle.com>
|
||||
Date: Fri, 1 Mar 2024 17:40:12 +0000
|
||||
Subject: [PATCH 08/11] mkfs: Add atomic writes suppport
|
||||
|
||||
Use a command like the following to enable:
|
||||
/mkfs.xfs -f -i forcealign=1 -d extsize=4096 -d atomic-writes=1 /dev/sda
|
||||
|
||||
Forcealign enablement is required, and with that a specific extent size
|
||||
needs to be set. And extent size of 4096B (for 4K FS block size) is
|
||||
acceptable.
|
||||
|
||||
Signed-off-by: John Garry <john.g.garry@oracle.com>
|
||||
---
|
||||
include/linux.h | 4 ++++
|
||||
libxfs/xfs_format.h | 8 ++++++--
|
||||
mkfs/xfs_mkfs.c | 49 +++++++++++++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 59 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/include/linux.h b/include/linux.h
|
||||
index d95365b..7f295f0 100644
|
||||
--- a/include/linux.h
|
||||
+++ b/include/linux.h
|
||||
@@ -251,6 +251,10 @@ struct fsxattr {
|
||||
#define FS_XFLAG_FORCEALIGN 0x00020000
|
||||
#endif
|
||||
|
||||
+#ifndef FS_XFLAG_ATOMICWRITES
|
||||
+#define FS_XFLAG_ATOMICWRITES 0x00040000
|
||||
+#endif
|
||||
+
|
||||
#ifdef HAVE_GETFSMAP
|
||||
# include <linux/fsmap.h>
|
||||
#else
|
||||
diff --git a/libxfs/xfs_format.h b/libxfs/xfs_format.h
|
||||
index c2db380..1dfb11c 100644
|
||||
--- a/libxfs/xfs_format.h
|
||||
+++ b/libxfs/xfs_format.h
|
||||
@@ -451,12 +451,14 @@ xfs_sb_has_compat_feature(
|
||||
#define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */
|
||||
#define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3) /* inobt block counts */
|
||||
#define XFS_SB_FEAT_RO_COMPAT_FORCEALIGN (1 << 30) /* aligned file data extents */
|
||||
+#define XFS_SB_FEAT_RO_COMPAT_ATOMICWRITES (1 << 31) /* aligned file data extents */
|
||||
#define XFS_SB_FEAT_RO_COMPAT_ALL \
|
||||
(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
|
||||
XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
|
||||
XFS_SB_FEAT_RO_COMPAT_REFLINK| \
|
||||
XFS_SB_FEAT_RO_COMPAT_FORCEALIGN| \
|
||||
- XFS_SB_FEAT_RO_COMPAT_INOBTCNT)
|
||||
+ XFS_SB_FEAT_RO_COMPAT_INOBTCNT| \
|
||||
+ XFS_SB_FEAT_RO_COMPAT_ATOMICWRITES)
|
||||
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
|
||||
static inline bool
|
||||
xfs_sb_has_ro_compat_feature(
|
||||
@@ -1180,16 +1182,18 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
|
||||
#define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */
|
||||
/* data extent mappings for regular files must be aligned to extent size hint */
|
||||
#define XFS_DIFLAG2_FORCEALIGN_BIT 5
|
||||
+#define XFS_DIFLAG2_ATOMICWRITES_BIT 6
|
||||
|
||||
#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT)
|
||||
#define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT)
|
||||
#define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
|
||||
#define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT)
|
||||
#define XFS_DIFLAG2_FORCEALIGN (1 << XFS_DIFLAG2_FORCEALIGN_BIT)
|
||||
+#define XFS_DIFLAG2_ATOMICWRITES (1 << XFS_DIFLAG2_ATOMICWRITES_BIT)
|
||||
|
||||
#define XFS_DIFLAG2_ANY \
|
||||
(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
|
||||
- XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_FORCEALIGN)
|
||||
+ XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_FORCEALIGN | XFS_DIFLAG2_ATOMICWRITES)
|
||||
|
||||
static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip)
|
||||
{
|
||||
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
|
||||
index 1253ece..5169255 100644
|
||||
--- a/mkfs/xfs_mkfs.c
|
||||
+++ b/mkfs/xfs_mkfs.c
|
||||
@@ -70,6 +70,7 @@ enum {
|
||||
D_EXTSZINHERIT,
|
||||
D_COWEXTSIZE,
|
||||
D_DAXINHERIT,
|
||||
+ D_ATOMICWRITES,
|
||||
D_MAX_OPTS,
|
||||
};
|
||||
|
||||
@@ -306,6 +307,7 @@ static struct opt_params dopts = {
|
||||
[D_EXTSZINHERIT] = "extszinherit",
|
||||
[D_COWEXTSIZE] = "cowextsize",
|
||||
[D_DAXINHERIT] = "daxinherit",
|
||||
+ [D_ATOMICWRITES] = "atomic-writes",
|
||||
[D_MAX_OPTS] = NULL,
|
||||
},
|
||||
.subopt_params = {
|
||||
@@ -437,6 +439,12 @@ static struct opt_params dopts = {
|
||||
.maxval = 1,
|
||||
.defaultval = 1,
|
||||
},
|
||||
+ { .index = D_ATOMICWRITES,
|
||||
+ .conflicts = { { NULL, LAST_CONFLICT } },
|
||||
+ .minval = 0,
|
||||
+ .maxval = 1,
|
||||
+ .defaultval = 1,
|
||||
+ },
|
||||
},
|
||||
};
|
||||
|
||||
@@ -838,6 +846,7 @@ struct sb_feat_args {
|
||||
bool nodalign;
|
||||
bool nortalign;
|
||||
bool forcealign; /* XFS_SB_FEAT_RO_COMPAT_FORCEALIGN */
|
||||
+ bool atomicwrites; /* XFS_SB_FEAT_RO_COMPAT_ATOMICWRITES */
|
||||
};
|
||||
|
||||
struct cli_params {
|
||||
@@ -1598,6 +1607,13 @@ data_opts_parser(
|
||||
else
|
||||
cli->fsx.fsx_xflags &= ~FS_XFLAG_DAX;
|
||||
break;
|
||||
+ case D_ATOMICWRITES:
|
||||
+ if (getnum(value, opts, subopt) == 1) {
|
||||
+ cli->sb_feat.atomicwrites = true;
|
||||
+ } else {
|
||||
+ cli->sb_feat.atomicwrites = false;
|
||||
+ }
|
||||
+ break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -2557,6 +2573,12 @@ validate_forcealign(
|
||||
if (!(cli->fsx.fsx_xflags & FS_XFLAG_FORCEALIGN))
|
||||
return;
|
||||
|
||||
+ if (cli->fsx.fsx_xflags & (FS_XFLAG_REALTIME | FS_XFLAG_RTINHERIT)) {
|
||||
+ fprintf(stderr,
|
||||
+_("cannot set forcealign and realtime flags.\n"));
|
||||
+ usage();
|
||||
+ }
|
||||
+
|
||||
if (cli->fsx.fsx_cowextsize != 0) {
|
||||
fprintf(stderr,
|
||||
_("cannot set CoW extent size hint when forcealign is set.\n"));
|
||||
@@ -2576,6 +2598,30 @@ _("cannot set forcealign and realtime flags.\n"));
|
||||
}
|
||||
}
|
||||
|
||||
+/* Validate the incoming forcealign flag. */
|
||||
+static void
|
||||
+validate_atomicwrites(
|
||||
+ struct mkfs_params *cfg,
|
||||
+ struct xfs_mount *mp,
|
||||
+ struct cli_params *cli,
|
||||
+ char *dfile
|
||||
+ )
|
||||
+{
|
||||
+ if (!cli->sb_feat.atomicwrites)
|
||||
+ return;
|
||||
+
|
||||
+ if (!(cli->fsx.fsx_xflags & FS_XFLAG_FORCEALIGN)) {
|
||||
+ fprintf(stderr,
|
||||
+_("cannot set atomicwrites without forcealign.\n"));
|
||||
+ usage();
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * TODO: Add a check to see if the dfile can support atomic writes of
|
||||
+ * extsize.
|
||||
+ */
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Validate the configured stripe geometry, or is none is specified, pull
|
||||
* the configuration from the underlying device.
|
||||
@@ -3321,6 +3367,8 @@ sb_set_features(
|
||||
sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_INOBTCNT;
|
||||
if (fp->forcealign)
|
||||
sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_FORCEALIGN;
|
||||
+ if (fp->atomicwrites)
|
||||
+ sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_ATOMICWRITES;
|
||||
if (fp->bigtime)
|
||||
sbp->sb_features_incompat |= XFS_SB_FEAT_INCOMPAT_BIGTIME;
|
||||
|
||||
@@ -4307,6 +4355,7 @@ main(
|
||||
validate_extsize_hint(mp, &cli);
|
||||
validate_cowextsize_hint(mp, &cli);
|
||||
validate_forcealign(mp, &cli);
|
||||
+ validate_atomicwrites(&cfg, mp, &cli, dfile);
|
||||
|
||||
/* Print the intended geometry of the fs. */
|
||||
if (!quiet || dry_run) {
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,105 +0,0 @@
|
||||
From eb775dfcd8c48d4a60f3989f5b5707c96a9612f4 Mon Sep 17 00:00:00 2001
|
||||
From: John Garry <john.g.garry@oracle.com>
|
||||
Date: Mon, 4 Mar 2024 10:27:27 +0000
|
||||
Subject: [PATCH 10/11] xfs_io: Support statx for atomic writes
|
||||
|
||||
This should be done properly by installing the kernel headers with
|
||||
atomic write statx support.
|
||||
|
||||
Signed-off-by: John Garry <john.g.garry@oracle.com>
|
||||
|
||||
---
|
||||
io/stat.c | 3 +++
|
||||
io/statx.h | 20 +++++++++++++++++++-
|
||||
2 files changed, 22 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/io/stat.c b/io/stat.c
|
||||
index b57f9ee..d7f4047 100644
|
||||
--- a/io/stat.c
|
||||
+++ b/io/stat.c
|
||||
@@ -434,6 +434,9 @@ statx_f(
|
||||
printf(_("stat.btime = %s"),
|
||||
ctime((time_t *)&stx.stx_btime.tv_sec));
|
||||
}
|
||||
+ printf(_("stat.stx_atomic_write_unit_min = %lld\n"), (long long)stx.stx_atomic_write_unit_min);
|
||||
+ printf(_("stat.stx_atomic_write_unit_max = %lld\n"), (long long)stx.stx_atomic_write_unit_max);
|
||||
+ printf(_("stat.stx_atomic_write_segments_max = %lld\n"), (long long)stx.stx_atomic_write_segments_max);
|
||||
|
||||
if (file->flags & IO_FOREIGN)
|
||||
return 0;
|
||||
diff --git a/io/statx.h b/io/statx.h
|
||||
index c6625ac..80745e8 100644
|
||||
--- a/io/statx.h
|
||||
+++ b/io/statx.h
|
||||
@@ -34,6 +34,7 @@
|
||||
#endif
|
||||
|
||||
|
||||
+#undef STATX_TYPE
|
||||
#ifndef STATX_TYPE
|
||||
/* Pick up kernel definitions if glibc didn't already provide them */
|
||||
#include <linux/stat.h>
|
||||
@@ -56,6 +57,9 @@
|
||||
*
|
||||
* __reserved is held in case we need a yet finer resolution.
|
||||
*/
|
||||
+#define statx_timestamp _statx_timestamp
|
||||
+#undef _statx_timestamp
|
||||
+
|
||||
struct statx_timestamp {
|
||||
__s64 tv_sec;
|
||||
__s32 tv_nsec;
|
||||
@@ -99,6 +103,8 @@ struct statx_timestamp {
|
||||
* will have values installed for compatibility purposes so that stat() and
|
||||
* co. can be emulated in userspace.
|
||||
*/
|
||||
+#define statx _statx
|
||||
+#undef _statx
|
||||
struct statx {
|
||||
/* 0x00 */
|
||||
__u32 stx_mask; /* What results were written [uncond] */
|
||||
@@ -126,7 +132,16 @@ struct statx {
|
||||
__u32 stx_dev_major; /* ID of device containing file [uncond] */
|
||||
__u32 stx_dev_minor;
|
||||
/* 0x90 */
|
||||
- __u64 __spare2[14]; /* Spare space for future expansion */
|
||||
+ __u64 stx_mnt_id;
|
||||
+ __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
|
||||
+ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
|
||||
+ /* 0xa0 */
|
||||
+ __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */
|
||||
+ __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */
|
||||
+ __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */
|
||||
+ __u32 __spare1;
|
||||
+ /* 0xb0 */
|
||||
+ __u64 __spare3[10]; /* Spare space for future expansion */
|
||||
/* 0x100 */
|
||||
};
|
||||
|
||||
@@ -138,6 +153,7 @@ struct statx {
|
||||
* These bits should be set in the mask argument of statx() to request
|
||||
* particular items when calling statx().
|
||||
*/
|
||||
+#undef STATX_ALL
|
||||
#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */
|
||||
#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */
|
||||
#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */
|
||||
@@ -151,6 +167,7 @@ struct statx {
|
||||
#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
|
||||
#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
|
||||
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
|
||||
+#define STATX_WRITE_ATOMIC 0x00008000U /* Want/got atomic_write_* fields */
|
||||
#define STATX_ALL 0x00000fffU /* All currently supported flags */
|
||||
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
|
||||
|
||||
@@ -172,6 +189,7 @@ struct statx {
|
||||
#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */
|
||||
|
||||
#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
|
||||
+#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */
|
||||
|
||||
#endif /* STATX_TYPE */
|
||||
#endif /* XFS_IO_STATX_H */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,35 +0,0 @@
|
||||
From d95dc0f4b90de16168c533e495c5ee1e012e41fd Mon Sep 17 00:00:00 2001
|
||||
From: John Garry <john.g.garry@oracle.com>
|
||||
Date: Thu, 21 Mar 2024 12:20:37 +0000
|
||||
Subject: [PATCH 11/11] mkfs: Ensure extsize aligned to stripe unit for
|
||||
forcealign
|
||||
|
||||
Signed-off-by: John Garry <john.g.garry@oracle.com>
|
||||
---
|
||||
mkfs/xfs_mkfs.c | 10 ++++++++++
|
||||
1 file changed, 10 insertions(+)
|
||||
|
||||
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
|
||||
index 5169255..cf0852f 100644
|
||||
--- a/mkfs/xfs_mkfs.c
|
||||
+++ b/mkfs/xfs_mkfs.c
|
||||
@@ -2754,6 +2754,16 @@ _("%s: Stripe unit(%d) or stripe width(%d) is not a multiple of the block size(%
|
||||
cfg->dsunit = DTOBT(dsunit, cfg->blocklog);
|
||||
cfg->dswidth = DTOBT(dswidth, cfg->blocklog);
|
||||
|
||||
+ if (cli->fsx.fsx_xflags & FS_XFLAG_FORCEALIGN) {
|
||||
+ if ((cfg->dsunit % cli->fsx.fsx_extsize) ||
|
||||
+ (cfg->dswidth % cli->fsx.fsx_extsize)) {
|
||||
+ fprintf(stderr,
|
||||
+ _("Stripe unit(%d) or stripe width(%d) is not a multiple of the extsize (%d) for forcealign\n"),
|
||||
+ cfg->dsunit, cfg->dswidth, cli->fsx.fsx_extsize);
|
||||
+ usage();
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
check_lsunit:
|
||||
/* log sunit options */
|
||||
if (cli_opt_set(&lopts, L_SUNIT))
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,51 +0,0 @@
|
||||
From 1e02682bf6204656f37e7c0d30aef29c6a246cb2 Mon Sep 17 00:00:00 2001
|
||||
From: John Garry <john.g.garry@oracle.com>
|
||||
Date: Fri, 1 Mar 2024 17:15:22 +0000
|
||||
Subject: [PATCH 09/11] xfs_io: Implement lsattr and chattr support for atomic
|
||||
writes
|
||||
|
||||
Use something like the following:
|
||||
xfs_io -c "chattr +W" mnt/file
|
||||
|
||||
forcealign must be aliged for the file.
|
||||
|
||||
Signed-off-by: John Garry <john.g.garry@oracle.com>
|
||||
---
|
||||
io/attr.c | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/io/attr.c b/io/attr.c
|
||||
index 248a9c2..bc27a1a 100644
|
||||
--- a/io/attr.c
|
||||
+++ b/io/attr.c
|
||||
@@ -39,9 +39,10 @@ static struct xflags {
|
||||
{ FS_XFLAG_COWEXTSIZE, "C", "cowextsize" },
|
||||
{ FS_XFLAG_HASATTR, "X", "has-xattr" },
|
||||
{ FS_XFLAG_FORCEALIGN, "F", "force-align" },
|
||||
+ { FS_XFLAG_ATOMICWRITES, "W", "atomic-writes" },
|
||||
{ 0, NULL, NULL }
|
||||
};
|
||||
-#define CHATTR_XFLAG_LIST "r"/*p*/"iasAdtPneEfSxC"/*X*/"F"
|
||||
+#define CHATTR_XFLAG_LIST "r"/*p*/"iasAdtPneEfSxC"/*X*/"FW"
|
||||
|
||||
static void
|
||||
lsattr_help(void)
|
||||
@@ -69,6 +70,7 @@ lsattr_help(void)
|
||||
" C -- for files with shared blocks, observe the inode CoW extent size value\n"
|
||||
" X -- file has extended attributes (cannot be changed using chattr)\n"
|
||||
" F -- data extent mappings must be aligned to extent size hint\n"
|
||||
+" W -- atomic writes enabled for a file\n"
|
||||
"\n"
|
||||
" Options:\n"
|
||||
" -R -- recursively descend (useful when current file is a directory)\n"
|
||||
@@ -107,6 +109,7 @@ chattr_help(void)
|
||||
" +/-x -- set/clear the direct access (DAX) flag\n"
|
||||
" +/-C -- set/clear the CoW extent-size flag\n"
|
||||
" +/-F -- set/clear the forcealign flag\n"
|
||||
+" +/-W -- set/clear the atomic writes flag\n"
|
||||
" Note1: user must have certain capabilities to modify immutable/append-only.\n"
|
||||
" Note2: immutable/append-only files cannot be deleted; removing these files\n"
|
||||
" requires the immutable/append-only flag to be cleared first.\n"
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
Name: xfsprogs
|
||||
Version: 5.14.1
|
||||
Release: 16
|
||||
Release: 11
|
||||
Summary: Administration and debugging tools for the XFS file system
|
||||
License: GPL+ and LGPLv2+
|
||||
URL: https://xfs.wiki.kernel.org
|
||||
@ -27,55 +27,6 @@ Patch5: 0005-xfs_db-use-preferable-macro-to-seek-offset-for-local.patch
|
||||
Patch6: 0006-mkfs.xfs-disable-inobtcount-feature.patch
|
||||
Patch7: 0007-libxcmd-add-return-value-check-for-dynamic-memory-fu.patch
|
||||
Patch8: 0008-xfs_repair-fix-the-problem-of-repair-failure-caused-.patch
|
||||
Patch9: 0009-mkfs.xfs-fix-segmentation-fault-caused-by-accessing-.patch
|
||||
Patch10: 0010-xfs_repair-fix-warn-in-xfs_buf_find-when-growfs-fails.patch
|
||||
Patch11: 0011-xfs_copy-don-t-use-cached-buffer-reads-until-after-l.patch
|
||||
Patch12: 0012-xfs-sb-verifier-doesn-t-handle-uncached-sb-buffer.patch
|
||||
Patch13: 0013-libxfs-always-initialize-internal-buffer-map.patch
|
||||
Patch14: 0014-libxfs-shut-down-filesystem-if-we-xfs_trans_cancel-w.patch
|
||||
Patch15: 0015-xfs_db-fix-nbits-parameter-in-fa_ino-48-functions.patch
|
||||
Patch16: 0016-xfs_repair-update-secondary-superblocks-after-changi.patch
|
||||
Patch17: 0017-xfs_repair-fix-AG-header-btree-level-comparisons.patch
|
||||
Patch18: 0018-xfs-fix-maxlevels-comparisons-in-the-btree-staging-c.patch
|
||||
Patch19: 0019-xfs-fold-perag-loop-iteration-logic-into-helper-func.patch
|
||||
Patch20: 0020-xfs-rename-the-next_agno-perag-iteration-variable.patch
|
||||
Patch21: 0021-xfs-terminate-perag-iteration-reliably-on-agcount.patch
|
||||
Patch22: 0022-xfs-fix-perag-reference-leak-on-iteration-race-with-.patch
|
||||
Patch23: 0023-mkfs-fix-missing-validation-of-l-size-against-maximu.patch
|
||||
Patch24: 0024-mkfs-reduce-internal-log-size-when-log-stripe-units-.patch
|
||||
Patch25: 0025-mkfs-don-t-let-internal-logs-bump-the-root-dir-inode.patch
|
||||
Patch26: 0026-mkfs-improve-log-extent-validation.patch
|
||||
Patch27: 0027-xfs_repair-detect-v5-featureset-mismatches-in-second.patch
|
||||
Patch28: 0028-xfs_repair-check-the-ftype-of-dot-and-dotdot-directo.patch
|
||||
Patch29: 0029-mkfs-Fix-memory-leak.patch
|
||||
Patch30: 0030-xfs-zero-inode-fork-buffer-at-allocation.patch
|
||||
Patch31: 0031-xfs-detect-self-referencing-btree-sibling-pointers.patch
|
||||
Patch32: 0032-xfs-validate-inode-fork-size-against-fork-format.patch
|
||||
Patch33: 0033-xfs_repair-always-rewrite-secondary-supers-when-need.patch
|
||||
Patch34: 0034-xfs_repair-ignore-empty-xattr-leaf-blocks.patch
|
||||
Patch35: 0035-xfs_repair-Search-for-conflicts-in-inode_tree_ptrs-w.patch
|
||||
Patch36: 0036-mkfs-terminate-getsubopt-arrays-properly.patch
|
||||
Patch37: 0037-mkfs-complain-about-impossible-log-size-constraints.patch
|
||||
Patch38: 0038-xfs-trim-the-mapp-array-accordingly-in-xfs_da_grow_i.patch
|
||||
Patch39: 0039-xfs-fix-exception-caused-by-unexpected-illegal-bestc.patch
|
||||
Patch40: 0040-xfs-increase-rename-inode-reservation.patch
|
||||
Patch41: 0041-xfs-fix-sb-write-verify-for-lazysbcount.patch
|
||||
Patch42: 0042-xfs_repair-don-t-crash-on-unknown-inode-parents-in-d.patch
|
||||
Patch43: 0043-xfs_repair-retain-superblock-buffer-to-avoid-write-h.patch
|
||||
Patch44: 0044-fsck.xfs-mount-umount-xfs-fs-to-replay-log-before-ru.patch
|
||||
Patch45: 0045-xfs_db-fix-dir3-block-magic-check.patch
|
||||
Patch46: 0046-xfs_repair-fix-incorrect-dabtree-hashval-comparison.patch
|
||||
Patch47: 0047-xfs-create-a-new-inode-flag-to-require-extsize-align.patch
|
||||
Patch48: 0048-xfs-allow-files-to-require-data-mappings-to-be-align.patch
|
||||
Patch49: 0049-xfs_db-expose-force_align-feature-and-flags.patch
|
||||
Patch50: 0050-xfs_io-implement-lsattr-and-chattr-support-for-force.patch
|
||||
Patch51: 0051-xfs_repair-check-the-force-align-flag.patch
|
||||
Patch52: 0052-mkfs-add-an-extsize-option-that-allows-units.patch
|
||||
Patch53: 0053-mkfs-enable-the-new-force-align-feature.patch
|
||||
Patch54: 0054-mkfs-Add-atomic-writes-suppport.patch
|
||||
Patch55: 0055-xfs_io-Support-statx-for-atomic-writes.patch
|
||||
Patch56: 0056-mkfs-Ensure-extsize-aligned-to-stripe-unit-for-force.patch
|
||||
Patch57: 0057-xfs_io-Implement-lsattr-and-chattr-support-for-atomi.patch
|
||||
|
||||
%description
|
||||
xfsprogs are the userspace utilities that manage XFS filesystems.
|
||||
@ -159,22 +110,6 @@ rm -rf %{buildroot}%{_datadir}/doc/xfsprogs/
|
||||
|
||||
|
||||
%changelog
|
||||
|
||||
* Mon May 20 2024 zhangjian <zhangjian496@huawei.com> - 5.14.1-16
|
||||
- support atomic write
|
||||
|
||||
* Wed Dec 27 2023 wuguanghao <wuguanghao3@huawei.com> - 5.14.1-15
|
||||
- backport patches from community
|
||||
|
||||
* Fri Nov 3 2023 wuguanghao <wuguanghao3@huawei.com> - 5.14.1-14
|
||||
- xfs_copy: don't use cached buffer reads until after libxfs_mount
|
||||
|
||||
* Tue Sep 12 2023 wuguanghao <wuguanghao3@huawei.com> - 5.14.1-13
|
||||
- xfs_repair: fix warn in xfs_buf_find when growfs fails
|
||||
|
||||
* Sun Sep 3 2023 wuguanghao <wuguanghao3@huawei.com> - 5.14.1-12
|
||||
- fix segmentation fault in mkfs.xfs
|
||||
|
||||
* Tue Aug 15 2023 wuguanghao <wuguanghao3@huawei.com> - 5.14.1-11
|
||||
- fix xfs_repair failure
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user