#! /bin/bash
# SPDX-License-Identifier: GPL-2.0+
# Copyright (c) 2019 Oracle, Inc.  All Rights Reserved.
#
# FS QA Test No. 507
#
# Regression test for kernel commit:
#
# 394aafdc15da ("xfs: widen inode delalloc block counter to 64-bits")
#
# Try to overflow i_delayed_blks by setting the largest cowextsize hint
# possible, creating a sparse file with a single byte every cowextsize bytes,
# reflinking it, and retouching every written byte to see if we can create
# enough speculative COW reservations to overflow i_delayed_blks.
#
. ./common/preamble
_begin_fstest auto clone

_register_cleanup "_cleanup" BUS

# Override the default cleanup function.
_cleanup()
{
	cd /
	test -n "$loop_mount" && $UMOUNT_PROG $loop_mount > /dev/null 2>&1
	test -n "$loop_dev" && _destroy_loop_device $loop_dev
	rm -rf $tmp.*
}

# Import common functions.
. ./common/reflink
. ./common/filter

# real QA test starts here
_supported_fs xfs
_require_scratch_reflink
_require_cp_reflink
_require_loop
_require_xfs_debug	# needed for xfs_bmap -c

MAXEXTLEN=2097151	# cowextsize can't be more than MAXEXTLEN

echo "Format and mount"
_scratch_mkfs > "$seqres.full" 2>&1
_scratch_mount

fs_blksz=$(_get_block_size $SCRATCH_MNT)
_require_congruent_file_oplen $SCRATCH_MNT $((MAXEXTLEN * fs_blksz))

# Create a huge sparse filesystem on the scratch device because that's what
# we're going to need to guarantee that we have enough blocks to overflow in
# the first place.  We need to have at least enough free space on that huge fs
# to handle one written block every MAXEXTLEN blocks and to reserve 2^32 blocks
# in the COW fork.  There needs to be sufficient space in the scratch
# filesystem to handle a 256M log, all the per-AG metadata, and all the data
# written to the test file.
#
# Worst case, a 64k-block fs needs to be about 300TB.  Best case, a 1k block
# filesystem needs ~5TB.  For the most common 4k case we only need a ~20TB fs.
#
# In practice, the author observed that the space required on the scratch fs
# never exceeded ~800M even for a 300T 6k-block filesystem, so we'll just ask
# for about 1.2GB.
blksz=$(_get_file_block_size "$SCRATCH_MNT")
nr_cows="$(( ((2 ** 32) / MAXEXTLEN) + 100 ))"
blks_needed="$(( nr_cows * (1 + MAXEXTLEN) ))"
loop_file_sz="$(( ((blksz * blks_needed) * 12 / 10) / 512 * 512 ))"
_require_fs_space $SCRATCH_MNT 1234567

loop_file=$SCRATCH_MNT/a.img
loop_mount=$SCRATCH_MNT/a
$XFS_IO_PROG -f -c "truncate $loop_file_sz" $loop_file
test -s $loop_file || _notrun "Could not create large sparse file"
loop_dev=$(_create_loop_device $loop_file)

# Now we have to create the source file.  The goal is to overflow a 32-bit
# i_delayed_blks, which means that we have to create at least that many delayed
# allocation block reservations.  Take advantage of the fact that a cowextsize
# hint causes creation of large speculative delalloc reservations in the cow
# fork to reduce the amount of work we have to do.
#
# The maximum cowextsize can only be set to MAXEXTLEN fs blocks on a filesystem
# whose AGs each have more than MAXEXTLEN * 2 blocks.  This we can do easily
# with a multi-terabyte filesystem, so start by setting up the hint.  Note that
# the current fsxattr interface specifies its u32 cowextsize hint in units of
# bytes and therefore can't handle MAXEXTLEN * blksz on most filesystems, so we
# set it via mkfs because mkfs takes units of fs blocks, not bytes.

_mkfs_dev -d cowextsize=$MAXEXTLEN -l size=256m $loop_dev >> $seqres.full
mkdir $loop_mount
mount $loop_dev $loop_mount

echo "Create crazy huge file"
huge_file="$loop_mount/a"
touch "$huge_file"
blksz=$(_get_file_block_size "$loop_mount")
extsize_bytes="$(( MAXEXTLEN * blksz ))"

# Make sure it actually set a hint.
curr_cowextsize_str="$($XFS_IO_PROG -c 'cowextsize' "$huge_file")"
echo "$curr_cowextsize_str" >> $seqres.full
cowextsize_bytes="$(echo "$curr_cowextsize_str" | sed -e 's/^.\([0-9]*\).*$/\1/g')"
test "$cowextsize_bytes" -eq 0 && echo "could not set cowextsize?"

# Now we have to seed the file with sparse contents.  Remember, the goal is to
# create a little more than 2^32 delayed allocation blocks in the COW fork with
# as little effort as possible.  We know that speculative COW preallocation
# will create MAXEXTLEN-length reservations for us, so that means we should
# be able to get away with touching a single byte every extsize_bytes.  We
# do this backwards to avoid having to move EOF.
seq $nr_cows -1 0 | while read n; do
	off="$((n * extsize_bytes))"
	$XFS_IO_PROG -c "pwrite $off 1" "$huge_file" > /dev/null
done

echo "Reflink crazy huge file"
_cp_reflink "$huge_file" "$huge_file.b"

# Now that we've shared all the blocks in the file, we touch them all again
# to create speculative COW preallocations.
echo "COW crazy huge file"
seq $nr_cows -1 0 | while read n; do
	off="$((n * extsize_bytes))"
	$XFS_IO_PROG -c "pwrite $off 1" "$huge_file" > /dev/null
done

# Compare the number of blocks allocated to this file (as reported by stat)
# against the number of blocks that are in the COW fork.  If either one is
# less than 2^32 then we have evidence of an overflow problem.
echo "Check crazy huge file"
allocated_stat_blocks="$(stat -c %b "$huge_file")"
stat_blksz="$(stat -c %B "$huge_file")"
allocated_fsblocks=$(( allocated_stat_blocks * stat_blksz / blksz ))

# Make sure we got enough COW reservations to overflow a 32-bit counter.

# Return the number of delalloc & real blocks given bmap output for a fork of a
# file.  Output is in units of 512-byte blocks.
count_fork_blocks() {
	$AWK_PROG "
{
	if (\$3 == \"delalloc\") {
		x += \$4;
	} else if (\$3 == \"hole\") {
		;
	} else {
		x += \$6;
	}
}
END {
	print(x);
}
"
}

# Count the number of blocks allocated to a file based on the xfs_bmap output.
# Output is in units of filesystem blocks.
count_file_fork_blocks() {
	local tag="$1"
	local file="$2"
	local args="$3"

	$XFS_IO_PROG -c "bmap $args -l -p -v" "$huge_file" > $tmp.extents
	echo "$tag fork map" >> $seqres.full
	cat $tmp.extents >> $seqres.full
	local sectors="$(count_fork_blocks < $tmp.extents)"
	echo "$(( sectors / (blksz / 512) ))"
}

cowblocks=$(count_file_fork_blocks cow "$huge_file" "-c")
attrblocks=$(count_file_fork_blocks attr "$huge_file" "-a")
datablocks=$(count_file_fork_blocks data "$huge_file" "")

# Did we create more than 2^32 blocks in the cow fork?
# Make sure the test actually set us up for the overflow.
echo "datablocks is $datablocks" >> $seqres.full
echo "attrblocks is $attrblocks" >> $seqres.full
echo "cowblocks is $cowblocks" >> $seqres.full
test "$cowblocks" -lt $((2 ** 32)) && \
	echo "cowblocks (${cowblocks}) should be more than 2^32!"

# Does stat's block allocation count exceed 2^32?
# This is how we detect the incore delalloc count overflow.
echo "stat blocks is $allocated_fsblocks" >> $seqres.full
test "$allocated_fsblocks" -lt $((2 ** 32)) && \
	echo "stat blocks (${allocated_fsblocks}) should be more than 2^32!"

# Finally, does st_blocks match what we computed from the forks?
# Sanity check the values computed from the forks.
expected_allocated_fsblocks=$((datablocks + cowblocks + attrblocks))
echo "expected stat blocks is $expected_allocated_fsblocks" >> $seqres.full

_within_tolerance "st_blocks" $allocated_fsblocks $expected_allocated_fsblocks 2% -v

echo "Test done"
# Quick check the large sparse fs, but skip xfs_db because it doesn't scale
# well on a multi-terabyte filesystem.
LARGE_SCRATCH_DEV=yes _check_xfs_filesystem $loop_dev none none

# success, all done
status=0
exit
