I am submitting some patches to speed up header reading in neomutt. For benchmarking and testing the new code, I have created a small bash script:
#!/bin/bash
#####################################################################
# Copyright © 2019 - 2020 Tino Reichardt
#####################################################################
#
# Benchmarking neomutt header compression, results will be send to
# these two files:
# 1) log_init.md -> times needed for first start
# 2) log_reload.md -> times needed for reload
#
# The output files are markdown and can be used for evaluating the best
# header compression type for your system.
#
# ctime: 2019-11-17 /TR
# mtime: 2020-03-24 /TR
#####################################################################
function error() {
echo "$*"
exit 111
}
NEOMUTT=`which neomutt`
GNUTIME=`which time`
CWD=`pwd`
# testing maildir, just create an symlink
MAILDIR="$CWD/Maildir/"
CACHEDIR="$CWD/cache"
test -z $NEOMUTT && error "Sorry, neomutt not found :/"
test -z $GNUTIME && error "Sorry, I need the GNU time command :/"
test ! -d $MAILDIR/cur && error "Sorry, I can handle only Maildir/ files."
# how much tests should be done, the average is used
TIMES_INIT="3"
TIMES_RELOAD="9"
# backends with built in compression (header_cache_compress=yes)
M1=(qdbm kyotocabinet rocksdb tokyocabinet)
#M1=(tokyocabinet)
# these backends should be tested with header_cache_compress=no
M2=(bdb gdbm kyotocabinet lmdb qdbm tdb rocksdb tokyocabinet)
#M2=(rocksdb)
# generic compression methods, should be used via:
# header_cache_compress=no
# header_cache_compress_method=XYZ
METHODS=("none" "lz4" "zstd" "zlib")
function getcol() {
ROW="$1"
cat "$LOG" | awk "{print \$$ROW}" | xargs
}
function getavg() {
ACCURACY="$1"
shift
echo "$ACCURACY k 0 $* $(printf "%.s+" $(seq 1 $TIMES)) $TIMES / p" | dc
}
function test_neomutt() {
MODE="$1"
TIMES="$2"
LOG="$HCACHE_FILE-$MODE.log"
echo -n > "$LOG"
for i in `seq 1 $TIMES`; do
[ "$MODE" = "init" ] && rm -rf "$HCACHE_FILE.db"
$GNUTIME --format="%e %U %S %M %F %R %O" \
--append --output="$LOG" $NEOMUTT -F "$CWD/muttrc" > /dev/null
done
# 1 %e Elapsed real time (in seconds).
# 2 %S Total number of CPU-seconds that the process spent in kernel mode.
# 3 %U Total number of CPU-seconds that the process spent in user mode.
# 4 %M Maximum resident set size of the process during its lifetime, in Kbytes.
# 5 %F Number of major page faults that occurred while the process was running.
# 6 %R Number of minor, or recoverable, page faults.
# 7 %O Number of filesystem outputs by the process.
e=`getavg 3 "$(getcol 1)"|sed -e 's/^\./0\./g'`
S=`getavg 3 "$(getcol 2)"|sed -e 's/^\./0\./g'`
U=`getavg 3 "$(getcol 3)"|sed -e 's/^\./0\./g'`
M=`getavg 0 "$(getcol 4)"`
F=`getavg 0 "$(getcol 5)"`
R=`getavg 0 "$(getcol 6)"`
O=`getavg 0 "$(getcol 7)"`
# wait some moment, so the stat data are correct
sync
L=`du -s "$HCACHE_FILE.db"| cut -f1`
echo "du -s $HCACHE_FILE.db -> size=$L time=$e"
echo "$e | $S | $U | $M | $L | $HCACHE_METHOD | $HCACHE_COMPRESS | $CM$WD $CL | $F + $R | $O" >> log_$MODE.md
}
function doit() {
HCACHE_METHOD="$1"
HCACHE_COMPRESS="$2"
CM="$3"
CL="$4"
WD=""
if [ ! -z $CM ]; then
HCACHE_COMPRESS_METHOD="set header_cache_compress_method = $CM"
else
HCACHE_COMPRESS_METHOD=""
CM="none"
fi
if [ $CM = "none" ]; then
HCACHE_COMPRESS_METHOD=""
HCACHE_COMPRESS_LEVEL=""
CL=""
fi
if [ ! -z $CL ]; then
HCACHE_COMPRESS_LEVEL="set header_cache_compress_level = $CL"
else
HCACHE_COMPRESS_LEVEL=""
fi
HCACHE_FILE="$CACHEDIR/hcache-$HCACHE_COMPRESS-$HCACHE_METHOD-$CM"
cat > "$CWD/muttrc" << EOF
# set debug_level = 2
# set debug_file = neolog
set read_inc = 0
set write_inc = 0
set folder = $MAILDIR
set spoolfile = $MAILDIR
set maildir_header_cache_verify = no
set header_cache = $HCACHE_FILE.db
set header_cache_backend = $HCACHE_METHOD
# set header_cache_compress = yes | no
set header_cache_compress = $HCACHE_COMPRESS
# set header_cache_compress_method = zstd | lz4 | zlib
$HCACHE_COMPRESS_METHOD
#set header_cache_compress_level = 1
$HCACHE_COMPRESS_LEVEL
folder-hook . exec exit
EOF
test_neomutt "init" "$TIMES_INIT"
test_neomutt "reload" "$TIMES_RELOAD"
}
X="Real | User | Sys | MaxMem | DBSize | Backend | Builtin Compress | Generic Compress | PageFaults | FS Outputs
-|-|-|-|-|-|-|-|-|-"
echo "$X" > log_init.md
echo "$X" > log_reload.md
mkdir -p "$CACHEDIR"
# header_cache_compress = yes
for hm in ${M1[@]}; do
# dbms | yes | method="" | level=""
doit "$hm" "yes" "" ""
done
#exit
# header_cache_compress = no
for hm in ${M2[@]}; do
for cm in ${METHODS[@]}; do
# dbms | no | method | level
doit "$hm" "no" "$cm" "1"
done
done
This is the table with the initial benchmark times, generated by the script:
Real | User | Sys | MaxMem | DBSize | Backend | Builtin Compress | Generic Compress | PageFaults | FS Outputs
-|-|-|-|-|-|-|-|-|-
9.050 | 7.410 | 1.590 | 218244 | 23876 | tokyocabinet | yes (zlib) | none | 0 + 251225 | 47784
24.340 | 22.680 | 1.630 | 219468 | 27688 | tokyocabinet | yes (bzip2) | none | 0 + 193902 | 55408
40.630 | 39.460 | 1.120 | 218932 | 32760 | tokyocabinet | yes (tcbs) | none | 0 + 57703 | 71712
8.540 | 6.690 | 1.920 | 256380 | 105220 | rocksdb | no | none | 0 + 82344 | 337000
8.390 | 6.760 | 1.770 | 256608 | 36232 | rocksdb | yes (lz4) | none | 0 + 82613 | 255976
8.600 | 6.840 | 1.830 | 256692 | 36520 | rocksdb | yes (snappy) | none | 0 + 82650 | 256344
8.480 | 7.130 | 1.760 | 257024 | 28284 | rocksdb | yes (zstd) | none | 0 + 83104 | 246456
8.740 | 8.470 | 1.870 | 260748 | 27168 | rocksdb | yes (zlib) | none | 0 + 83326 | 244992
16.450 | 17.690 | 2.130 | 322652 | 32512 | rocksdb | yes (bzip2) | none | 1 + 147128 | 250992
7.870 | 6.023 | 1.600 | 212206 | 120268 | bdb | no | none | 0 + 55712 | 240752
8.063 | 6.463 | 1.456 | 212456 | 64000 | bdb | no | lz4 1 | 0 + 55753 | 128242
9.263 | 7.606 | 1.520 | 212982 | 54332 | bdb | no | zstd 1 | 0 + 55772 | 108850
8.773 | 7.146 | 1.493 | 213316 | 49704 | bdb | no | zstd+dict 1 | 0 + 55894 | 99672
15.256 | 10.923 | 4.183 | 212634 | 53948 | bdb | no | zlib 1 | 0 + 1818048 | 108101
8.606 | 6.210 | 2.143 | 214234 | 112256 | gdbm | no | none | 0 + 472106 | 224690
8.740 | 6.490 | 2.063 | 213908 | 64880 | gdbm | no | lz4 1 | 0 + 410342 | 129890
9.810 | 7.740 | 1.933 | 214238 | 56304 | gdbm | no | zstd 1 | 0 + 392760 | 112632
9.270 | 7.160 | 1.953 | 214948 | 51904 | gdbm | no | zstd+dict 1 | 0 + 383151 | 103952
15.853 | 11.093 | 4.596 | 214545 | 55968 | gdbm | no | zlib 1 | 0 + 2155627 | 112074
8.250 | 6.653 | 1.556 | 356196 | 110768 | kyotocabinet | no | none | 17 + 92148 | 221554
8.373 | 6.873 | 1.453 | 347173 | 62148 | kyotocabinet | no | lz4 1 | 17 + 89893 | 124293
9.693 | 8.176 | 1.473 | 327474 | 53156 | kyotocabinet | no | zstd 1 | 17 + 84923 | 106245
9.286 | 7.846 | 1.393 | 318489 | 48876 | kyotocabinet | no | zstd+dict 1 | 16 + 82629 | 97760
15.666 | 11.406 | 4.213 | 326200 | 52796 | kyotocabinet | no | zlib 1 | 17 + 1847906 | 105504
7.856 | 5.860 | 1.613 | 413192 | 200404 | lmdb | no | none | 0 + 106004 | 400885
7.773 | 6.080 | 1.503 | 295153 | 82548 | lmdb | no | lz4 1 | 0 + 76508 | 165138
8.983 | 7.476 | 1.340 | 279501 | 67124 | lmdb | no | zstd 1 | 0 + 72545 | 134264
8.510 | 6.916 | 1.426 | 273070 | 60252 | lmdb | no | zstd+dict 1 | 0 + 70978 | 120496
11.193 | 9.623 | 1.380 | 278962 | 66472 | lmdb | no | zlib 1 | 0 + 72469 | 132813
7.313 | 5.813 | 1.456 | 243645 | 131400 | qdbm | no | none | 0 + 63833 | 263133
7.540 | 6.060 | 1.450 | 236957 | 76708 | qdbm | no | lz4 1 | 0 + 62139 | 153762
9.003 | 7.430 | 1.516 | 234634 | 59396 | qdbm | no | zstd 1 | 0 + 61495 | 119029
8.380 | 6.903 | 1.436 | 233474 | 56192 | qdbm | no | zstd+dict 1 | 0 + 61250 | 112640
14.863 | 10.660 | 4.140 | 234045 | 58888 | qdbm | no | zlib 1 | 0 + 1831184 | 118394
8.550 | 6.660 | 1.900 | 256344 | 105236 | rocksdb | no | none | 0 + 82351 | 337144
8.580 | 7.150 | 1.410 | 272816 | 60336 | rocksdb | no | lz4 1 | 0 + 70357 | 120808
9.950 | 8.070 | 1.840 | 264544 | 51752 | rocksdb | no | zstd 1 | 0 + 68239 | 103720
15.940 | 11.540 | 4.330 | 264324 | 51472 | rocksdb | no | zlib 1 | 0 + 1830079 | 103088
7.453 | 5.840 | 1.560 | 342353 | 133928 | tdb | no | none | 0 + 99264 | 268002
7.546 | 6.000 | 1.510 | 284900 | 90316 | tdb | no | lz4 1 | 0 + 81114 | 180669
8.813 | 7.310 | 1.466 | 274777 | 67612 | tdb | no | zstd 1 | 0 + 77095 | 135290
8.333 | 6.893 | 1.400 | 269874 | 64092 | tdb | no | zstd+dict 1 | 0 + 75567 | 128210
13.046 | 10.253 | 2.766 | 274192 | 67612 | tdb | no | zlib 1 | 0 + 932750 | 134293
7.040 | 5.666 | 1.340 | 222597 | 107588 | tokyocabinet | no | none | 0 + 58508 | 215205
8.030 | 6.473 | 1.513 | 222214 | 60404 | tokyocabinet | no | lz4 1 | 0 + 58373 | 120850
9.053 | 7.556 | 1.460 | 222570 | 51720 | tokyocabinet | no | zstd 1 | 0 + 58465 | 103450
8.176 | 6.740 | 1.393 | 222992 | 47612 | tokyocabinet | no | zstd+dict 1 | 0 + 58515 | 95074
14.996 | 10.863 | 4.093 | 222022 | 51440 | tokyocabinet | no | zlib 1 | 0 + 1825323 | 102808
log_init.md
and the reloading benchmark times now:
Real | User | Sys | MaxMem | DBSize | Backend | Builtin Compress | Generic Compress | PageFaults | FS Outputs
-|-|-|-|-|-|-|-|-|-
1.110 | 1.000 | 0.100 | 207364 | 23876 | tokyocabinet | yes (zlib) | none | 0 + 51546 | 5
3.613 | 3.480 | 0.110 | 209029 | 27688 | tokyocabinet | yes (bzip2) | none | 0 + 50370 | 5
10.133 | 10.003 | 0.106 | 207600 | 32760 | tokyocabinet | yes (tcbs) | none | 0 + 50204 | 5
1.331 | 1.197 | 0.108 | 212934 | 104684 | rocksdb | no | none | 0 + 51843 | 9661
1.385 | 1.243 | 0.117 | 213240 | 36232 | rocksdb | yes (lz4) | none | 0 + 52016 | 3464
1.406 | 1.264 | 0.114 | 213464 | 36520 | rocksdb | yes (snappy) | none | 0 + 52099 | 3485
1.672 | 1.528 | 0.125 | 213504 | 28284 | rocksdb | yes (zstd) | none | 0 + 52066 | 2749
2.014 | 1.865 | 0.121 | 213196 | 27168 | rocksdb | yes (zlib) | none | 0 + 51929 | 2651
6.797 | 6.661 | 0.132 | 213860 | 32512 | rocksdb | yes (bzip2) | none | 0 + 61511 | 7415
1.140 | 0.943 | 0.170 | 268580 | 120268 | bdb | no | none | 0 + 64540 | 0
1.220 | 1.126 | 0.073 | 268776 | 64000 | bdb | no | lz4 1 | 0 + 64546 | 0
2.063 | 1.910 | 0.140 | 269472 | 54332 | bdb | no | zstd 1 | 0 + 64593 | 0
1.456 | 1.330 | 0.103 | 269837 | 49704 | bdb | no | zstd+dict 1 | 0 + 64690 | 0
2.230 | 2.080 | 0.130 | 268722 | 53948 | bdb | no | zlib 1 | 0 + 64580 | 0
1.160 | 1.043 | 0.096 | 417358 | 112256 | gdbm | no | none | 0 + 66966 | 0
1.246 | 1.113 | 0.083 | 353922 | 64880 | gdbm | no | lz4 1 | 0 + 65940 | 0
1.976 | 1.783 | 0.143 | 342860 | 56304 | gdbm | no | zstd 1 | 0 + 65810 | 0
1.380 | 1.226 | 0.150 | 337124 | 51904 | gdbm | no | zstd+dict 1 | 0 + 66012 | 0
2.226 | 2.083 | 0.116 | 342094 | 55968 | gdbm | no | zlib 1 | 0 + 65748 | 0
1.620 | 1.413 | 0.193 | 449268 | 110768 | kyotocabinet | no | none | 0 + 89846 | 0
1.606 | 1.453 | 0.126 | 438918 | 62148 | kyotocabinet | no | lz4 1 | 0 + 88265 | 0
2.343 | 2.173 | 0.150 | 414906 | 53156 | kyotocabinet | no | zstd 1 | 0 + 84991 | 0
1.986 | 1.773 | 0.166 | 403305 | 48876 | kyotocabinet | no | zstd+dict 1 | 0 + 83384 | 0
2.576 | 2.383 | 0.176 | 413446 | 52796 | kyotocabinet | no | zlib 1 | 0 + 84830 | 0
0.973 | 0.836 | 0.120 | 531644 | 200404 | lmdb | no | none | 0 + 68525 | 0
1.126 | 0.956 | 0.136 | 375264 | 82548 | lmdb | no | lz4 1 | 0 + 66156 | 0
1.840 | 1.700 | 0.116 | 354758 | 67124 | lmdb | no | zstd 1 | 0 + 65872 | 0
1.350 | 1.196 | 0.120 | 346036 | 60252 | lmdb | no | zstd+dict 1 | 0 + 65963 | 0
2.086 | 1.946 | 0.123 | 353893 | 66472 | lmdb | no | zlib 1 | 0 + 65826 | 0
1.066 | 0.923 | 0.103 | 309512 | 131400 | qdbm | no | none | 0 + 74974 | 0
1.113 | 0.956 | 0.126 | 300257 | 76708 | qdbm | no | lz4 1 | 0 + 72676 | 0
1.803 | 1.636 | 0.143 | 296974 | 59396 | qdbm | no | zstd 1 | 0 + 71806 | 0
1.460 | 1.296 | 0.136 | 295590 | 56192 | qdbm | no | zstd+dict 1 | 0 + 71535 | 0
2.093 | 1.963 | 0.120 | 296188 | 58888 | qdbm | no | zlib 1 | 0 + 71698 | 0
1.393 | 1.180 | 0.166 | 213117 | 104696 | rocksdb | no | none | 0 + 54355 | 28576
1.400 | 1.166 | 0.173 | 213145 | 57536 | rocksdb | no | lz4 1 | 0 + 55688 | 38544
1.940 | 1.763 | 0.126 | 212872 | 48764 | rocksdb | no | zstd 1 | 0 + 54895 | 32656
2.120 | 1.920 | 0.143 | 212900 | 48464 | rocksdb | no | zlib 1 | 0 + 54889 | 32464
0.910 | 0.770 | 0.106 | 442193 | 133928 | tdb | no | none | 0 + 67247 | 0
0.980 | 0.830 | 0.123 | 365726 | 90316 | tdb | no | lz4 1 | 0 + 66034 | 0
1.823 | 1.706 | 0.096 | 351785 | 67612 | tdb | no | zstd 1 | 0 + 65785 | 0
1.366 | 1.190 | 0.126 | 345489 | 64092 | tdb | no | zstd+dict 1 | 0 + 65798 | 0
1.973 | 1.836 | 0.120 | 350894 | 67612 | tdb | no | zlib 1 | 0 + 65797 | 0
1.076 | 0.870 | 0.156 | 282280 | 107588 | tokyocabinet | no | none | 0 + 68186 | 0
1.100 | 0.983 | 0.110 | 282025 | 60404 | tokyocabinet | no | lz4 1 | 0 + 68040 | 0
1.803 | 1.593 | 0.180 | 282465 | 51720 | tokyocabinet | no | zstd 1 | 0 + 68048 | 0
1.426 | 1.296 | 0.116 | 281866 | 47612 | tokyocabinet | no | zstd+dict 1 | 0 + 68014 | 0
2.066 | 1.880 | 0.166 | 281845 | 51440 | tokyocabinet | no | zlib 1 | 0 + 68010 | 0
log_reload.md