Sunday, November 17, 2019

Linux ZFS and disk configuration a collection of notes

#check stats on pool
zpool iostat -v 1

#mounting an encrypted filesystem
zfs mount -l -a

#adding slog to a pool
zpool add zfs log /dev/nvme0n1

#adding l2arc to a pool
zpool add zfs cache /dev/nvme0n2

#checking all zfs module parameters
modinfo zfs

#check atime enabled?
zfs get all |grep atime

#disable atime
zfs set atime=off zfs
zfs set atime=off zfs_sata

#check serial number of disk
[root@node-2 ~]# sginfo -s /dev/sda
Serial Number 'PDNLH0BRH9F2DJ'
#check physical and logical block size of disks
lsblk -o NAME,PHY-SEC,LOG-SEC,SIZE,TYPE,ROTA

#physical vs. logical block size
  • physical sector size -> actual hard drive reads and writes
  • logical sector size -> supported smallest hard drive reads and writes
#meaning of ashift
basically ashift means the exponent, index or power to the base of 2 e.g. for a physical sector size of 512 == 2^9 -> ashift of 9, or 4k == 2^12 -> ashift 12

#Example for 4x2TB rotational disks
[root@node-2 ~]# lsblk -o NAME,PHY-SEC,LOG-SEC,SIZE,TYPE,ROTA |grep 'disk     1'
sdk                  4096     512   1.8T disk     1
sdi                  4096     512   1.8T disk     1
sdl                  4096     512   1.8T disk     1
sdj                  4096     512   1.8T disk     1



[root@node-2 ~]# ls -l /dev/disk/by-id/
total 0
lrwxrwxrwx. 1 root root  9 Nov 17 23:14 ata-ST2000LM015-2E8174_WDZ3WZFN -> ../../sdj
lrwxrwxrwx. 1 root root  9 Nov 17 23:15 ata-ST2000LM015-2E8174_WDZAAC2H -> ../../sdi
lrwxrwxrwx. 1 root root  9 Nov 17 23:15 ata-ST2000LX001-1RG174_WDZASXRK -> ../../sdk
lrwxrwxrwx. 1 root root  9 Nov 17 23:15 ata-ST2000LX001-1RG174_ZDZ4TJK2 -> ../../sdl


#creating the pool
zpool create zfs_sata -o ashift=12 mirror ata-ST2000LM015-2E8174_WDZAAC2H ata-ST2000LM015-2E8174_WDZ3WZFN mirror ata-ST2000LX001-1RG174_WDZASXRK ata-ST2000LX001-1RG174_ZDZ4TJK2

#creating two volumes with different record sizes
zfs create -o recordsize=16k zfs_sata/mfdatabase
zfs create -o recordsize=1024k zfs_sata/mfjournal



#some sequential testing
dd if=/dev/zero of=/zfs_sata/mfdatabase/tempfile bs=1M count=1024; sync
dd if=/dev/zero of=/zfs_sata/mfdatabase/tempfile2 bs=16k count=65536; sync

/sbin/sysctl -w vm.drop_caches=3

dd if=/zfs_sata/mfdatabase/tempfile of=/dev/null bs=1M count=1024
dd if=/zfs_sata/mfdatabase/tempfile2 of=/dev/null bs=16k count=65536

rm -rf /zfs_sata/mfdatabase/*

writes
16.5013 s, 65.1 MB/s (expected slower)
7.26762 s, 148 MB/s  (expected faster)

reads
54.6486 s, 19.6 MB/s (expected slower)
59.2402 s, 18.1 MB/s (expected slower)


dd if=/dev/zero of=/zfs_sata/mfjournal/tempfile bs=1M count=1024; sync
dd if=/dev/zero of=/zfs_sata/mfjournal/tempfile2 bs=16k count=65536; sync

/sbin/sysctl -w vm.drop_caches=3

dd if=/zfs_sata/mfjournal/tempfile of=/dev/null bs=1M count=1024
dd if=/zfs_sata/mfjournal/tempfile2 of=/dev/null bs=16k count=65536

rm -rf /zfs_sata/mfjournal/*


writes
12.1631 s, 88.3 MB/s (expected faster)
8.75189 s, 123 MB/s  (expected slower)

reads
43.0267 s, 25.0 MB/s (expected faster)
23.1101 s, 46.5 MB/s (expected faster)


#zfs parameters
yum install sysfsutils -y
systool -vm zfs

#or
ls -l /sys/module/zfs/parameters/

#change zfs options for good
/etc/modprobe.d/zfs.conf


e.g. options zfs PARAMETER=VALUE

#change zfs option in flight
echo NEWVALUE >> /sys/module/zfs/parameters/PARAMETER

#Example 16GiB for zfs_arc_max
echo 17179869184>> /sys/module/zfs/parameters/zfs_arc_max
[root@node-2d current]# systool -vm zfs |grep zfs_arc_max
    zfs_arc_max         = "17179869184"


#values to be changed
zfs_arc_max
https://forums.freebsd.org/threads/howto-tuning-l2arc-in-zfs.29907/
l2arc_write_max: 8388608    # Maximum number of bytes written to l2arc per feed
l2arc_write_boost: 8388608  # Mostly only relevant at the first few hours after boot
l2arc_headroom: 2           # Not sure
l2arc_feed_secs: 1          # l2arc feeding period
l2arc_feed_min_ms: 200      # minimum l2arc feeding period
l2arc_noprefetch: 1         # control whether streaming data is cached or not
l2arc_feed_again: 1         # control whether feed_min_ms is used or not
l2arc_norw: 1               # no read and write at the same time
/etc/modprobe.d/zfs.conf
#log
options zfs zfs_txg_timeout=30

#cache
options zfs zfs_arc_max=34359738368

options zfs l2arc_noprefetch=0
options zfs l2arc_write_max=1073741824
options zfs l2arc_write_boost=2147483648

1073741824
zil_slog_limit

#for all SSD pool logbias could be changed e.g.
zfs set logbias=throughput zfs/mydata

#look at performance
zpool iostat -v 1

#resources I read
  • http://open-zfs.org/wiki/Performance_tuning 
  • https://github.com/zfsonlinux/zfs/wiki/ZFS-on-Linux-Module-Parameters
  • https://martin.heiland.io/2018/02/23/zfs-tuning 
  • https://www.svennd.be/tuning-of-zfs-module
  • https://utcc.utoronto.ca/~cks/space/blog/solaris/ZFSWritesAndZIL
  • http://www.nanowolk.nl/ext/2013_02_zfs_sequential_read_write_performance
  • http://www.nanowolk.nl/ext/2013_02_zfs_random_iops_read_write_performance
  • https://sites.google.com/site/ryanbabchishin/home/publications/changing-a-zvol-block-size-while-making-it-sparse-and-compressed 
  • https://utcc.utoronto.ca/~cks/space/blog/tech/AdvancedFormatDrives 
  • https://docs.oracle.com/cd/E23823_01/html/819-5461/gazss.html#indexterm-425
  • https://zfs.datto.com/2017_slides/pinchuk.pdf

Thursday, November 14, 2019