malloc: Improve Huge Page support
backport the following seven patches malloc: Add madvise support for Transparent Huge Pages malloc: Add THP/madvise support for sbrk malloc: Move mmap logic to its own function malloc: Add Huge Page support for mmap malloc: Add Huge Page support to arenas malloc: Move MORECORE fallback mmap to sysmalloc_mmap_fallback malloc: Enable huge page support on main arena Signed-off-by: Yang Yanchao <yangyanchao6@huawei.com>
This commit is contained in:
parent
c27e7c7fbc
commit
780caed580
18
glibc.spec
18
glibc.spec
@ -66,7 +66,7 @@
|
|||||||
##############################################################################
|
##############################################################################
|
||||||
Name: glibc
|
Name: glibc
|
||||||
Version: 2.34
|
Version: 2.34
|
||||||
Release: 69
|
Release: 70
|
||||||
Summary: The GNU libc libraries
|
Summary: The GNU libc libraries
|
||||||
License: %{all_license}
|
License: %{all_license}
|
||||||
URL: http://www.gnu.org/software/glibc/
|
URL: http://www.gnu.org/software/glibc/
|
||||||
@ -201,6 +201,13 @@ Patch115: x86-Fallback-str-wcs-cmp-RTM-in-the-ncmp-overflow-ca.patch
|
|||||||
Patch116: x86-Test-wcscmp-RTM-in-the-wcsncmp-overflow-case-BZ-.patch
|
Patch116: x86-Test-wcscmp-RTM-in-the-wcsncmp-overflow-case-BZ-.patch
|
||||||
Patch117: x86-Fix-TEST_NAME-to-make-it-a-string-in-tst-strncmp.patch
|
Patch117: x86-Fix-TEST_NAME-to-make-it-a-string-in-tst-strncmp.patch
|
||||||
Patch118: Add-PTRACE_GET_RSEQ_CONFIGURATION-from-Linux-5.13-to.patch
|
Patch118: Add-PTRACE_GET_RSEQ_CONFIGURATION-from-Linux-5.13-to.patch
|
||||||
|
Patch119: malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch
|
||||||
|
Patch120: malloc-hugepage-0002-malloc-Add-THP-madvise-support-for-sbrk.patch
|
||||||
|
Patch121: malloc-hugepage-0003-malloc-Move-mmap-logic-to-its-own-function.patch
|
||||||
|
Patch122: malloc-hugepage-0004-malloc-Add-Huge-Page-support-for-mmap.patch
|
||||||
|
Patch123: malloc-hugepage-0005-malloc-Add-Huge-Page-support-to-arenas.patch
|
||||||
|
Patch124: malloc-hugepage-0006-malloc-Move-MORECORE-fallback-mmap-to-sysmalloc_mmap.patch
|
||||||
|
Patch125: malloc-hugepage-0007-malloc-Enable-huge-page-support-on-main-arena.patch
|
||||||
|
|
||||||
Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch
|
Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch
|
||||||
Patch9001: delete-no-hard-link-to-avoid-all_language-package-to.patch
|
Patch9001: delete-no-hard-link-to-avoid-all_language-package-to.patch
|
||||||
@ -1293,6 +1300,15 @@ fi
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Tue Mar 15 2022 Yang Yanchao<yangyanchao6@huawei.com> - 2.34-70
|
||||||
|
- malloc: Add madvise support for Transparent Huge Pages
|
||||||
|
- malloc: Add THP/madvise support for sbrk
|
||||||
|
- malloc: Move mmap logic to its own function
|
||||||
|
- malloc: Add Huge Page support for mmap
|
||||||
|
- malloc: Add Huge Page support to arenas
|
||||||
|
- malloc: Move MORECORE fallback mmap to sysmalloc_mmap_fallback
|
||||||
|
- malloc: Enable huge page support on main arena
|
||||||
|
|
||||||
* Sat Mar 12 2022 Yang Yanchao<yangyanchao6@huawei.com> - 2.34-69
|
* Sat Mar 12 2022 Yang Yanchao<yangyanchao6@huawei.com> - 2.34-69
|
||||||
- malloc: use __get_nprocs replace __get_nprocs_sched.
|
- malloc: use __get_nprocs replace __get_nprocs_sched.
|
||||||
|
|
||||||
|
|||||||
@ -0,0 +1,532 @@
|
|||||||
|
From 5f6d8d97c69748180f0031dfa385aff75062c4d5 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||||
|
Date: Fri, 13 Aug 2021 08:36:29 -0300
|
||||||
|
Subject: [PATCH 1/7] malloc: Add madvise support for Transparent Huge Pages
|
||||||
|
|
||||||
|
Linux Transparent Huge Pages (THP) current supports three different
|
||||||
|
states: 'never', 'madvise', and 'always'. The 'never' is
|
||||||
|
self-explanatory and 'always' will enable THP for all anonymous
|
||||||
|
pages. However, 'madvise' is still the default for some system and
|
||||||
|
for such case THP will be only used if the memory range is explicity
|
||||||
|
advertise by the program through a madvise(MADV_HUGEPAGE) call.
|
||||||
|
|
||||||
|
To enable it a new tunable is provided, 'glibc.malloc.hugetlb',
|
||||||
|
where setting to a value diffent than 0 enables the madvise call.
|
||||||
|
|
||||||
|
This patch issues the madvise(MADV_HUGEPAGE) call after a successful
|
||||||
|
mmap() call at sysmalloc() with sizes larger than the default huge
|
||||||
|
page size. The madvise() call is disable is system does not support
|
||||||
|
THP or if it has the mode set to "never" and on Linux only support
|
||||||
|
one page size for THP, even if the architecture supports multiple
|
||||||
|
sizes.
|
||||||
|
|
||||||
|
To test is a new rule is added tests-malloc-hugetlb1, which run the
|
||||||
|
addes tests with the required GLIBC_TUNABLE setting.
|
||||||
|
|
||||||
|
Checked on x86_64-linux-gnu.
|
||||||
|
|
||||||
|
Reviewed-by: DJ Delorie <dj@redhat.com>
|
||||||
|
---
|
||||||
|
NEWS | 5 ++
|
||||||
|
Rules | 19 ++++++
|
||||||
|
elf/dl-tunables.list | 5 ++
|
||||||
|
elf/tst-rtld-list-tunables.exp | 1 +
|
||||||
|
malloc/Makefile | 16 +++++
|
||||||
|
malloc/arena.c | 5 ++
|
||||||
|
malloc/malloc-internal.h | 1 +
|
||||||
|
malloc/malloc.c | 47 ++++++++++++++
|
||||||
|
manual/tunables.texi | 10 +++
|
||||||
|
sysdeps/generic/Makefile | 8 +++
|
||||||
|
sysdeps/generic/malloc-hugepages.c | 31 +++++++++
|
||||||
|
sysdeps/generic/malloc-hugepages.h | 37 +++++++++++
|
||||||
|
sysdeps/unix/sysv/linux/malloc-hugepages.c | 74 ++++++++++++++++++++++
|
||||||
|
13 files changed, 259 insertions(+)
|
||||||
|
create mode 100644 sysdeps/generic/malloc-hugepages.c
|
||||||
|
create mode 100644 sysdeps/generic/malloc-hugepages.h
|
||||||
|
create mode 100644 sysdeps/unix/sysv/linux/malloc-hugepages.c
|
||||||
|
|
||||||
|
diff --git a/NEWS b/NEWS
|
||||||
|
index 2532565d77..3b94dd209c 100644
|
||||||
|
--- a/NEWS
|
||||||
|
+++ b/NEWS
|
||||||
|
@@ -92,6 +92,11 @@ Major new features:
|
||||||
|
variables. The GNU C Library manual has details on integration of
|
||||||
|
Restartable Sequences.
|
||||||
|
|
||||||
|
+* On Linux, a new tunable, glibc.malloc.hugetlb, can be used to
|
||||||
|
+ make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls.
|
||||||
|
+ Setting this might improve performance with Transparent Huge Pages madvise
|
||||||
|
+ mode depending of the workload.
|
||||||
|
+
|
||||||
|
Deprecated and removed features, and other changes affecting compatibility:
|
||||||
|
|
||||||
|
* The function pthread_mutex_consistent_np has been deprecated; programs
|
||||||
|
diff --git a/Rules b/Rules
|
||||||
|
index b1137afe71..5f5d9ba4cc 100644
|
||||||
|
--- a/Rules
|
||||||
|
+++ b/Rules
|
||||||
|
@@ -157,6 +157,7 @@ tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \
|
||||||
|
$(tests-container:%=$(objpfx)%.out) \
|
||||||
|
$(tests-mcheck:%=$(objpfx)%-mcheck.out) \
|
||||||
|
$(tests-malloc-check:%=$(objpfx)%-malloc-check.out) \
|
||||||
|
+ $(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \
|
||||||
|
$(tests-special) $(tests-printers-out)
|
||||||
|
xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special)
|
||||||
|
endif
|
||||||
|
@@ -168,6 +169,7 @@ tests-expected =
|
||||||
|
else
|
||||||
|
tests-expected = $(tests) $(tests-internal) $(tests-printers) \
|
||||||
|
$(tests-container) $(tests-malloc-check:%=%-malloc-check) \
|
||||||
|
+ $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \
|
||||||
|
$(tests-mcheck:%=%-mcheck)
|
||||||
|
endif
|
||||||
|
tests:
|
||||||
|
@@ -196,6 +198,7 @@ binaries-pie-notests =
|
||||||
|
endif
|
||||||
|
binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck)
|
||||||
|
binaries-malloc-check-tests = $(tests-malloc-check:%=%-malloc-check)
|
||||||
|
+binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1)
|
||||||
|
else
|
||||||
|
binaries-all-notests =
|
||||||
|
binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs)
|
||||||
|
@@ -207,6 +210,7 @@ binaries-pie-tests =
|
||||||
|
binaries-pie-notests =
|
||||||
|
binaries-mcheck-tests =
|
||||||
|
binaries-malloc-check-tests =
|
||||||
|
+binaries-malloc-hugetlb1-tests =
|
||||||
|
endif
|
||||||
|
|
||||||
|
binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests)
|
||||||
|
@@ -247,6 +251,14 @@ $(addprefix $(objpfx),$(binaries-malloc-check-tests)): %-malloc-check: %.o \
|
||||||
|
$(+link-tests)
|
||||||
|
endif
|
||||||
|
|
||||||
|
+ifneq "$(strip $(binaries-malloc-hugetlb1-tests))" ""
|
||||||
|
+$(addprefix $(objpfx),$(binaries-malloc-hugetlb1-tests)): %-malloc-hugetlb1: %.o \
|
||||||
|
+ $(link-extra-libs-tests) \
|
||||||
|
+ $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
|
||||||
|
+ $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
|
||||||
|
+ $(+link-tests)
|
||||||
|
+endif
|
||||||
|
+
|
||||||
|
ifneq "$(strip $(binaries-pie-tests))" ""
|
||||||
|
$(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \
|
||||||
|
$(link-extra-libs-tests) \
|
||||||
|
@@ -284,6 +296,13 @@ $(1)-malloc-check-ENV = MALLOC_CHECK_=3 \
|
||||||
|
endef
|
||||||
|
$(foreach t,$(tests-malloc-check),$(eval $(call malloc-check-ENVS,$(t))))
|
||||||
|
|
||||||
|
+# All malloc-hugetlb1 tests will be run with GLIBC_TUNABLES=glibc.malloc.hugetlb=1
|
||||||
|
+define malloc-hugetlb1-ENVS
|
||||||
|
+$(1)-malloc-hugetlb1-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=1
|
||||||
|
+endef
|
||||||
|
+$(foreach t,$(tests-malloc-hugetlb1),$(eval $(call malloc-hugetlb1-ENVS,$(t))))
|
||||||
|
+
|
||||||
|
+
|
||||||
|
# mcheck tests need the debug DSO to support -lmcheck.
|
||||||
|
define mcheck-ENVS
|
||||||
|
$(1)-mcheck-ENV = LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so
|
||||||
|
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
|
||||||
|
index ffcd7f18d4..d1fd3f3e91 100644
|
||||||
|
--- a/elf/dl-tunables.list
|
||||||
|
+++ b/elf/dl-tunables.list
|
||||||
|
@@ -92,6 +92,11 @@ glibc {
|
||||||
|
minval: 0
|
||||||
|
security_level: SXID_IGNORE
|
||||||
|
}
|
||||||
|
+ hugetlb {
|
||||||
|
+ type: INT_32
|
||||||
|
+ minval: 0
|
||||||
|
+ maxval: 1
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
cpu {
|
||||||
|
hwcap_mask {
|
||||||
|
diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
|
||||||
|
index 44e4834cfb..d8e363f2c5 100644
|
||||||
|
--- a/elf/tst-rtld-list-tunables.exp
|
||||||
|
+++ b/elf/tst-rtld-list-tunables.exp
|
||||||
|
@@ -1,6 +1,7 @@
|
||||||
|
glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+)
|
||||||
|
glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+)
|
||||||
|
glibc.malloc.check: 0 (min: 0, max: 3)
|
||||||
|
+glibc.malloc.hugetlb: 0 (min: 0, max: 1)
|
||||||
|
glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
|
||||||
|
glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+)
|
||||||
|
glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+)
|
||||||
|
diff --git a/malloc/Makefile b/malloc/Makefile
|
||||||
|
index 63cd7c0734..0137595e17 100644
|
||||||
|
--- a/malloc/Makefile
|
||||||
|
+++ b/malloc/Makefile
|
||||||
|
@@ -78,6 +78,22 @@ tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \
|
||||||
|
tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \
|
||||||
|
$(tests-static),$(tests))
|
||||||
|
|
||||||
|
+# Run all testes with GLIBC_TUNABLES=glibc.malloc.hugetlb=1 that check the
|
||||||
|
+# Transparent Huge Pages support. We need exclude some tests that define
|
||||||
|
+# the ENV vars.
|
||||||
|
+tests-exclude-hugetlb1 = \
|
||||||
|
+ tst-compathooks-off \
|
||||||
|
+ tst-compathooks-on \
|
||||||
|
+ tst-interpose-nothread \
|
||||||
|
+ tst-interpose-thread \
|
||||||
|
+ tst-interpose-static-nothread \
|
||||||
|
+ tst-interpose-static-thread \
|
||||||
|
+ tst-malloc-usable \
|
||||||
|
+ tst-malloc-usable-tunables \
|
||||||
|
+ tst-mallocstate
|
||||||
|
+tests-malloc-hugetlb1 = \
|
||||||
|
+ $(filter-out $(tests-exclude-hugetlb1), $(tests))
|
||||||
|
+
|
||||||
|
# -lmcheck needs __malloc_initialize_hook, which was deprecated in 2.24.
|
||||||
|
ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes)
|
||||||
|
# Tests that don't play well with mcheck. They are either bugs in mcheck or
|
||||||
|
diff --git a/malloc/arena.c b/malloc/arena.c
|
||||||
|
index 78ef4cf18c..cd00c7bef4 100644
|
||||||
|
--- a/malloc/arena.c
|
||||||
|
+++ b/malloc/arena.c
|
||||||
|
@@ -230,6 +230,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_count, size_t)
|
||||||
|
TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
|
||||||
|
#endif
|
||||||
|
TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t)
|
||||||
|
+TUNABLE_CALLBACK_FNDECL (set_hugetlb, int32_t)
|
||||||
|
#else
|
||||||
|
/* Initialization routine. */
|
||||||
|
#include <string.h>
|
||||||
|
@@ -330,6 +331,7 @@ ptmalloc_init (void)
|
||||||
|
TUNABLE_CALLBACK (set_tcache_unsorted_limit));
|
||||||
|
# endif
|
||||||
|
TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
|
||||||
|
+ TUNABLE_GET (hugetlb, int32_t, TUNABLE_CALLBACK (set_hugetlb));
|
||||||
|
#else
|
||||||
|
if (__glibc_likely (_environ != NULL))
|
||||||
|
{
|
||||||
|
@@ -508,6 +510,9 @@ new_heap (size_t size, size_t top_pad)
|
||||||
|
__munmap (p2, HEAP_MAX_SIZE);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ madvise_thp (p2, size);
|
||||||
|
+
|
||||||
|
h = (heap_info *) p2;
|
||||||
|
h->size = size;
|
||||||
|
h->mprotect_size = size;
|
||||||
|
diff --git a/malloc/malloc-internal.h b/malloc/malloc-internal.h
|
||||||
|
index 0c7b5a183c..7493e34d86 100644
|
||||||
|
--- a/malloc/malloc-internal.h
|
||||||
|
+++ b/malloc/malloc-internal.h
|
||||||
|
@@ -22,6 +22,7 @@
|
||||||
|
#include <malloc-machine.h>
|
||||||
|
#include <malloc-sysdep.h>
|
||||||
|
#include <malloc-size.h>
|
||||||
|
+#include <malloc-hugepages.h>
|
||||||
|
|
||||||
|
/* Called in the parent process before a fork. */
|
||||||
|
void __malloc_fork_lock_parent (void) attribute_hidden;
|
||||||
|
diff --git a/malloc/malloc.c b/malloc/malloc.c
|
||||||
|
index 095d97a3be..c75841b841 100644
|
||||||
|
--- a/malloc/malloc.c
|
||||||
|
+++ b/malloc/malloc.c
|
||||||
|
@@ -1880,6 +1880,11 @@ struct malloc_par
|
||||||
|
INTERNAL_SIZE_T arena_test;
|
||||||
|
INTERNAL_SIZE_T arena_max;
|
||||||
|
|
||||||
|
+#if HAVE_TUNABLES
|
||||||
|
+ /* Transparent Large Page support. */
|
||||||
|
+ INTERNAL_SIZE_T thp_pagesize;
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
/* Memory map support */
|
||||||
|
int n_mmaps;
|
||||||
|
int n_mmaps_max;
|
||||||
|
@@ -2008,6 +2013,20 @@ free_perturb (char *p, size_t n)
|
||||||
|
|
||||||
|
#include <stap-probe.h>
|
||||||
|
|
||||||
|
+/* ----------- Routines dealing with transparent huge pages ----------- */
|
||||||
|
+
|
||||||
|
+static inline void
|
||||||
|
+madvise_thp (void *p, INTERNAL_SIZE_T size)
|
||||||
|
+{
|
||||||
|
+#if HAVE_TUNABLES && defined (MADV_HUGEPAGE)
|
||||||
|
+ /* Do not consider areas smaller than a huge page or if the tunable is
|
||||||
|
+ not active. */
|
||||||
|
+ if (mp_.thp_pagesize == 0 || size < mp_.thp_pagesize)
|
||||||
|
+ return;
|
||||||
|
+ __madvise (p, size, MADV_HUGEPAGE);
|
||||||
|
+#endif
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* ------------------- Support for multiple arenas -------------------- */
|
||||||
|
#include "arena.c"
|
||||||
|
|
||||||
|
@@ -2445,6 +2464,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
|
||||||
|
|
||||||
|
if (mm != MAP_FAILED)
|
||||||
|
{
|
||||||
|
+ madvise_thp (mm, size);
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
The offset to the start of the mmapped region is stored
|
||||||
|
in the prev_size field of the chunk. This allows us to adjust
|
||||||
|
@@ -2606,6 +2627,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
|
||||||
|
if (size > 0)
|
||||||
|
{
|
||||||
|
brk = (char *) (MORECORE (size));
|
||||||
|
+ if (brk != (char *) (MORECORE_FAILURE))
|
||||||
|
+ madvise_thp (brk, size);
|
||||||
|
LIBC_PROBE (memory_sbrk_more, 2, brk, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -2637,6 +2660,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
|
||||||
|
|
||||||
|
if (mbrk != MAP_FAILED)
|
||||||
|
{
|
||||||
|
+ madvise_thp (mbrk, size);
|
||||||
|
+
|
||||||
|
/* We do not need, and cannot use, another sbrk call to find end */
|
||||||
|
brk = mbrk;
|
||||||
|
snd_brk = brk + size;
|
||||||
|
@@ -2748,6 +2773,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
|
||||||
|
correction = 0;
|
||||||
|
snd_brk = (char *) (MORECORE (0));
|
||||||
|
}
|
||||||
|
+ else
|
||||||
|
+ madvise_thp (snd_brk, correction);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* handle non-contiguous cases */
|
||||||
|
@@ -2988,6 +3015,8 @@ mremap_chunk (mchunkptr p, size_t new_size)
|
||||||
|
if (cp == MAP_FAILED)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
+ madvise_thp (cp, new_size);
|
||||||
|
+
|
||||||
|
p = (mchunkptr) (cp + offset);
|
||||||
|
|
||||||
|
assert (aligned_OK (chunk2mem (p)));
|
||||||
|
@@ -5316,6 +5345,24 @@ do_set_mxfast (size_t value)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+#if HAVE_TUNABLES
|
||||||
|
+static __always_inline int
|
||||||
|
+do_set_hugetlb (int32_t value)
|
||||||
|
+{
|
||||||
|
+ if (value == 1)
|
||||||
|
+ {
|
||||||
|
+ enum malloc_thp_mode_t thp_mode = __malloc_thp_mode ();
|
||||||
|
+ /*
|
||||||
|
+ Only enable THP madvise usage if system does support it and
|
||||||
|
+ has 'madvise' mode. Otherwise the madvise() call is wasteful.
|
||||||
|
+ */
|
||||||
|
+ if (thp_mode == malloc_thp_mode_madvise)
|
||||||
|
+ mp_.thp_pagesize = __malloc_default_thp_pagesize ();
|
||||||
|
+ }
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
int
|
||||||
|
__libc_mallopt (int param_number, int value)
|
||||||
|
{
|
||||||
|
diff --git a/manual/tunables.texi b/manual/tunables.texi
|
||||||
|
index 28ff502990..9ca6e3f603 100644
|
||||||
|
--- a/manual/tunables.texi
|
||||||
|
+++ b/manual/tunables.texi
|
||||||
|
@@ -270,6 +270,16 @@ pointer, so add 4 on 32-bit systems or 8 on 64-bit systems to the size
|
||||||
|
passed to @code{malloc} for the largest bin size to enable.
|
||||||
|
@end deftp
|
||||||
|
|
||||||
|
+@deftp Tunable glibc.malloc.hugetlb
|
||||||
|
+This tunable controls the usage of Huge Pages on @code{malloc} calls. The
|
||||||
|
+default value is @code{0}, which disables any additional support on
|
||||||
|
+@code{malloc}.
|
||||||
|
+
|
||||||
|
+Setting its value to @code{1} enables the use of @code{madvise} with
|
||||||
|
+@code{MADV_HUGEPAGE} after memory allocation with @code{mmap}. It is enabled
|
||||||
|
+only if the system supports Transparent Huge Page (currently only on Linux).
|
||||||
|
+@end deftp
|
||||||
|
+
|
||||||
|
@node Dynamic Linking Tunables
|
||||||
|
@section Dynamic Linking Tunables
|
||||||
|
@cindex dynamic linking tunables
|
||||||
|
diff --git a/sysdeps/generic/Makefile b/sysdeps/generic/Makefile
|
||||||
|
index a209e85cc4..8eef83c94d 100644
|
||||||
|
--- a/sysdeps/generic/Makefile
|
||||||
|
+++ b/sysdeps/generic/Makefile
|
||||||
|
@@ -27,3 +27,11 @@ sysdep_routines += framestate unwind-pe
|
||||||
|
shared-only-routines += framestate unwind-pe
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
+
|
||||||
|
+ifeq ($(subdir),malloc)
|
||||||
|
+sysdep_malloc_debug_routines += malloc-hugepages
|
||||||
|
+endif
|
||||||
|
+
|
||||||
|
+ifeq ($(subdir),misc)
|
||||||
|
+sysdep_routines += malloc-hugepages
|
||||||
|
+endif
|
||||||
|
diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..8fb459a263
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/sysdeps/generic/malloc-hugepages.c
|
||||||
|
@@ -0,0 +1,31 @@
|
||||||
|
+/* Huge Page support. Generic implementation.
|
||||||
|
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||||||
|
+ This file is part of the GNU C Library.
|
||||||
|
+
|
||||||
|
+ The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
+ modify it under the terms of the GNU Lesser General Public License as
|
||||||
|
+ published by the Free Software Foundation; either version 2.1 of the
|
||||||
|
+ License, or (at your option) any later version.
|
||||||
|
+
|
||||||
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
+ Lesser General Public License for more details.
|
||||||
|
+
|
||||||
|
+ You should have received a copy of the GNU Lesser General Public
|
||||||
|
+ License along with the GNU C Library; see the file COPYING.LIB. If
|
||||||
|
+ not, see <https://www.gnu.org/licenses/>. */
|
||||||
|
+
|
||||||
|
+#include <malloc-hugepages.h>
|
||||||
|
+
|
||||||
|
+unsigned long int
|
||||||
|
+__malloc_default_thp_pagesize (void)
|
||||||
|
+{
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+enum malloc_thp_mode_t
|
||||||
|
+__malloc_thp_mode (void)
|
||||||
|
+{
|
||||||
|
+ return malloc_thp_mode_not_supported;
|
||||||
|
+}
|
||||||
|
diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..f5a442e328
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/sysdeps/generic/malloc-hugepages.h
|
||||||
|
@@ -0,0 +1,37 @@
|
||||||
|
+/* Malloc huge page support. Generic implementation.
|
||||||
|
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||||||
|
+ This file is part of the GNU C Library.
|
||||||
|
+
|
||||||
|
+ The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
+ modify it under the terms of the GNU Lesser General Public License as
|
||||||
|
+ published by the Free Software Foundation; either version 2.1 of the
|
||||||
|
+ License, or (at your option) any later version.
|
||||||
|
+
|
||||||
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
+ Lesser General Public License for more details.
|
||||||
|
+
|
||||||
|
+ You should have received a copy of the GNU Lesser General Public
|
||||||
|
+ License along with the GNU C Library; see the file COPYING.LIB. If
|
||||||
|
+ not, see <https://www.gnu.org/licenses/>. */
|
||||||
|
+
|
||||||
|
+#ifndef _MALLOC_HUGEPAGES_H
|
||||||
|
+#define _MALLOC_HUGEPAGES_H
|
||||||
|
+
|
||||||
|
+#include <stddef.h>
|
||||||
|
+
|
||||||
|
+/* Return the default transparent huge page size. */
|
||||||
|
+unsigned long int __malloc_default_thp_pagesize (void) attribute_hidden;
|
||||||
|
+
|
||||||
|
+enum malloc_thp_mode_t
|
||||||
|
+{
|
||||||
|
+ malloc_thp_mode_always,
|
||||||
|
+ malloc_thp_mode_madvise,
|
||||||
|
+ malloc_thp_mode_never,
|
||||||
|
+ malloc_thp_mode_not_supported
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden;
|
||||||
|
+
|
||||||
|
+#endif /* _MALLOC_HUGEPAGES_H */
|
||||||
|
diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..7497e07260
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
|
||||||
|
@@ -0,0 +1,74 @@
|
||||||
|
+/* Huge Page support. Linux implementation.
|
||||||
|
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||||||
|
+ This file is part of the GNU C Library.
|
||||||
|
+
|
||||||
|
+ The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
+ modify it under the terms of the GNU Lesser General Public License as
|
||||||
|
+ published by the Free Software Foundation; either version 2.1 of the
|
||||||
|
+ License, or (at your option) any later version.
|
||||||
|
+
|
||||||
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
+ Lesser General Public License for more details.
|
||||||
|
+
|
||||||
|
+ You should have received a copy of the GNU Lesser General Public
|
||||||
|
+ License along with the GNU C Library; see the file COPYING.LIB. If
|
||||||
|
+ not, see <https://www.gnu.org/licenses/>. */
|
||||||
|
+
|
||||||
|
+#include <intprops.h>
|
||||||
|
+#include <malloc-hugepages.h>
|
||||||
|
+#include <not-cancel.h>
|
||||||
|
+
|
||||||
|
+unsigned long int
|
||||||
|
+__malloc_default_thp_pagesize (void)
|
||||||
|
+{
|
||||||
|
+ int fd = __open64_nocancel (
|
||||||
|
+ "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", O_RDONLY);
|
||||||
|
+ if (fd == -1)
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ char str[INT_BUFSIZE_BOUND (unsigned long int)];
|
||||||
|
+ ssize_t s = __read_nocancel (fd, str, sizeof (str));
|
||||||
|
+ __close_nocancel (fd);
|
||||||
|
+ if (s < 0)
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ unsigned long int r = 0;
|
||||||
|
+ for (ssize_t i = 0; i < s; i++)
|
||||||
|
+ {
|
||||||
|
+ if (str[i] == '\n')
|
||||||
|
+ break;
|
||||||
|
+ r *= 10;
|
||||||
|
+ r += str[i] - '0';
|
||||||
|
+ }
|
||||||
|
+ return r;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+enum malloc_thp_mode_t
|
||||||
|
+__malloc_thp_mode (void)
|
||||||
|
+{
|
||||||
|
+ int fd = __open64_nocancel ("/sys/kernel/mm/transparent_hugepage/enabled",
|
||||||
|
+ O_RDONLY);
|
||||||
|
+ if (fd == -1)
|
||||||
|
+ return malloc_thp_mode_not_supported;
|
||||||
|
+
|
||||||
|
+ static const char mode_always[] = "[always] madvise never\n";
|
||||||
|
+ static const char mode_madvise[] = "always [madvise] never\n";
|
||||||
|
+ static const char mode_never[] = "always madvise [never]\n";
|
||||||
|
+
|
||||||
|
+ char str[sizeof(mode_always)];
|
||||||
|
+ ssize_t s = __read_nocancel (fd, str, sizeof (str));
|
||||||
|
+ __close_nocancel (fd);
|
||||||
|
+
|
||||||
|
+ if (s == sizeof (mode_always) - 1)
|
||||||
|
+ {
|
||||||
|
+ if (strcmp (str, mode_always) == 0)
|
||||||
|
+ return malloc_thp_mode_always;
|
||||||
|
+ else if (strcmp (str, mode_madvise) == 0)
|
||||||
|
+ return malloc_thp_mode_madvise;
|
||||||
|
+ else if (strcmp (str, mode_never) == 0)
|
||||||
|
+ return malloc_thp_mode_never;
|
||||||
|
+ }
|
||||||
|
+ return malloc_thp_mode_not_supported;
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
@ -0,0 +1,111 @@
|
|||||||
|
From 7478c9959ae409f7b3d63146943575d6ee745352 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||||
|
Date: Fri, 13 Aug 2021 10:06:04 -0300
|
||||||
|
Subject: [PATCH 2/7] malloc: Add THP/madvise support for sbrk
|
||||||
|
|
||||||
|
To increase effectiveness with Transparent Huge Page with madvise, the
|
||||||
|
large page size is use instead page size for sbrk increment for the
|
||||||
|
main arena.
|
||||||
|
|
||||||
|
Checked on x86_64-linux-gnu.
|
||||||
|
|
||||||
|
Reviewed-by: DJ Delorie <dj@redhat.com>
|
||||||
|
---
|
||||||
|
include/libc-pointer-arith.h | 8 ++++++++
|
||||||
|
malloc/malloc.c | 34 +++++++++++++++++++++++++++++-----
|
||||||
|
2 files changed, 37 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/include/libc-pointer-arith.h b/include/libc-pointer-arith.h
|
||||||
|
index 04ba537617..55dccc10ac 100644
|
||||||
|
--- a/include/libc-pointer-arith.h
|
||||||
|
+++ b/include/libc-pointer-arith.h
|
||||||
|
@@ -60,4 +60,12 @@
|
||||||
|
#define PTR_ALIGN_UP(base, size) \
|
||||||
|
((__typeof__ (base)) ALIGN_UP ((uintptr_t) (base), (size)))
|
||||||
|
|
||||||
|
+/* Check if BASE is aligned on SIZE */
|
||||||
|
+#define PTR_IS_ALIGNED(base, size) \
|
||||||
|
+ ((((uintptr_t) (base)) & (size - 1)) == 0)
|
||||||
|
+
|
||||||
|
+/* Returns the ptrdiff_t diference between P1 and P2. */
|
||||||
|
+#define PTR_DIFF(p1, p2) \
|
||||||
|
+ ((ptrdiff_t)((uintptr_t)(p1) - (uintptr_t)(p2)))
|
||||||
|
+
|
||||||
|
#endif
|
||||||
|
diff --git a/malloc/malloc.c b/malloc/malloc.c
|
||||||
|
index c75841b841..57db4dd9a5 100644
|
||||||
|
--- a/malloc/malloc.c
|
||||||
|
+++ b/malloc/malloc.c
|
||||||
|
@@ -2023,6 +2023,16 @@ madvise_thp (void *p, INTERNAL_SIZE_T size)
|
||||||
|
not active. */
|
||||||
|
if (mp_.thp_pagesize == 0 || size < mp_.thp_pagesize)
|
||||||
|
return;
|
||||||
|
+
|
||||||
|
+ /* Linux requires the input address to be page-aligned, and unaligned
|
||||||
|
+ inputs happens only for initial data segment. */
|
||||||
|
+ if (__glibc_unlikely (!PTR_IS_ALIGNED (p, GLRO (dl_pagesize))))
|
||||||
|
+ {
|
||||||
|
+ void *q = PTR_ALIGN_DOWN (p, GLRO (dl_pagesize));
|
||||||
|
+ size += PTR_DIFF (p, q);
|
||||||
|
+ p = q;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
__madvise (p, size, MADV_HUGEPAGE);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
@@ -2609,14 +2619,25 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
|
||||||
|
size -= old_size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
- Round to a multiple of page size.
|
||||||
|
+ Round to a multiple of page size or huge page size.
|
||||||
|
If MORECORE is not contiguous, this ensures that we only call it
|
||||||
|
with whole-page arguments. And if MORECORE is contiguous and
|
||||||
|
this is not first time through, this preserves page-alignment of
|
||||||
|
previous calls. Otherwise, we correct to page-align below.
|
||||||
|
*/
|
||||||
|
|
||||||
|
- size = ALIGN_UP (size, pagesize);
|
||||||
|
+#if HAVE_TUNABLES && defined (MADV_HUGEPAGE)
|
||||||
|
+ /* Defined in brk.c. */
|
||||||
|
+ extern void *__curbrk;
|
||||||
|
+ if (__glibc_unlikely (mp_.thp_pagesize != 0))
|
||||||
|
+ {
|
||||||
|
+ uintptr_t top = ALIGN_UP ((uintptr_t) __curbrk + size,
|
||||||
|
+ mp_.thp_pagesize);
|
||||||
|
+ size = top - (uintptr_t) __curbrk;
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+#endif
|
||||||
|
+ size = ALIGN_UP (size, GLRO(dl_pagesize));
|
||||||
|
|
||||||
|
/*
|
||||||
|
Don't try to call MORECORE if argument is so big as to appear
|
||||||
|
@@ -2899,10 +2920,8 @@ systrim (size_t pad, mstate av)
|
||||||
|
long released; /* Amount actually released */
|
||||||
|
char *current_brk; /* address returned by pre-check sbrk call */
|
||||||
|
char *new_brk; /* address returned by post-check sbrk call */
|
||||||
|
- size_t pagesize;
|
||||||
|
long top_area;
|
||||||
|
|
||||||
|
- pagesize = GLRO (dl_pagesize);
|
||||||
|
top_size = chunksize (av->top);
|
||||||
|
|
||||||
|
top_area = top_size - MINSIZE - 1;
|
||||||
|
@@ -2910,7 +2929,12 @@ systrim (size_t pad, mstate av)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Release in pagesize units and round down to the nearest page. */
|
||||||
|
- extra = ALIGN_DOWN(top_area - pad, pagesize);
|
||||||
|
+#if HAVE_TUNABLES && defined (MADV_HUGEPAGE)
|
||||||
|
+ if (__glibc_unlikely (mp_.thp_pagesize != 0))
|
||||||
|
+ extra = ALIGN_DOWN (top_area - pad, mp_.thp_pagesize);
|
||||||
|
+ else
|
||||||
|
+#endif
|
||||||
|
+ extra = ALIGN_DOWN (top_area - pad, GLRO(dl_pagesize));
|
||||||
|
|
||||||
|
if (extra == 0)
|
||||||
|
return 0;
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
@ -0,0 +1,205 @@
|
|||||||
|
From 6cc3ccc67e0dda654fc839377af2818a296f0007 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||||
|
Date: Mon, 16 Aug 2021 11:14:20 -0300
|
||||||
|
Subject: [PATCH 3/7] malloc: Move mmap logic to its own function
|
||||||
|
|
||||||
|
So it can be used with different pagesize and flags.
|
||||||
|
|
||||||
|
Reviewed-by: DJ Delorie <dj@redhat.com>
|
||||||
|
---
|
||||||
|
malloc/malloc.c | 164 ++++++++++++++++++++++++++----------------------
|
||||||
|
1 file changed, 88 insertions(+), 76 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/malloc/malloc.c b/malloc/malloc.c
|
||||||
|
index 57db4dd9a5..6b6ec53db1 100644
|
||||||
|
--- a/malloc/malloc.c
|
||||||
|
+++ b/malloc/malloc.c
|
||||||
|
@@ -2412,6 +2412,85 @@ do_check_malloc_state (mstate av)
|
||||||
|
be extended or replaced.
|
||||||
|
*/
|
||||||
|
|
||||||
|
+static void *
|
||||||
|
+sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
|
||||||
|
+{
|
||||||
|
+ long int size;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ Round up size to nearest page. For mmapped chunks, the overhead is one
|
||||||
|
+ SIZE_SZ unit larger than for normal chunks, because there is no
|
||||||
|
+ following chunk whose prev_size field could be used.
|
||||||
|
+
|
||||||
|
+ See the front_misalign handling below, for glibc there is no need for
|
||||||
|
+ further alignments unless we have have high alignment.
|
||||||
|
+ */
|
||||||
|
+ if (MALLOC_ALIGNMENT == CHUNK_HDR_SZ)
|
||||||
|
+ size = ALIGN_UP (nb + SIZE_SZ, pagesize);
|
||||||
|
+ else
|
||||||
|
+ size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
|
||||||
|
+
|
||||||
|
+ /* Don't try if size wraps around 0. */
|
||||||
|
+ if ((unsigned long) (size) <= (unsigned long) (nb))
|
||||||
|
+ return MAP_FAILED;
|
||||||
|
+
|
||||||
|
+ char *mm = (char *) MMAP (0, size,
|
||||||
|
+ mtag_mmap_flags | PROT_READ | PROT_WRITE,
|
||||||
|
+ extra_flags);
|
||||||
|
+ if (mm == MAP_FAILED)
|
||||||
|
+ return mm;
|
||||||
|
+
|
||||||
|
+ madvise_thp (mm, size);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ The offset to the start of the mmapped region is stored in the prev_size
|
||||||
|
+ field of the chunk. This allows us to adjust returned start address to
|
||||||
|
+ meet alignment requirements here and in memalign(), and still be able to
|
||||||
|
+ compute proper address argument for later munmap in free() and realloc().
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+ INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
|
||||||
|
+
|
||||||
|
+ if (MALLOC_ALIGNMENT == CHUNK_HDR_SZ)
|
||||||
|
+ {
|
||||||
|
+ /* For glibc, chunk2mem increases the address by CHUNK_HDR_SZ and
|
||||||
|
+ MALLOC_ALIGN_MASK is CHUNK_HDR_SZ-1. Each mmap'ed area is page
|
||||||
|
+ aligned and therefore definitely MALLOC_ALIGN_MASK-aligned. */
|
||||||
|
+ assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
|
||||||
|
+ front_misalign = 0;
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
|
||||||
|
+
|
||||||
|
+ mchunkptr p; /* the allocated/returned chunk */
|
||||||
|
+
|
||||||
|
+ if (front_misalign > 0)
|
||||||
|
+ {
|
||||||
|
+ ptrdiff_t correction = MALLOC_ALIGNMENT - front_misalign;
|
||||||
|
+ p = (mchunkptr) (mm + correction);
|
||||||
|
+ set_prev_size (p, correction);
|
||||||
|
+ set_head (p, (size - correction) | IS_MMAPPED);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ p = (mchunkptr) mm;
|
||||||
|
+ set_prev_size (p, 0);
|
||||||
|
+ set_head (p, size | IS_MMAPPED);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* update statistics */
|
||||||
|
+ int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
|
||||||
|
+ atomic_max (&mp_.max_n_mmaps, new);
|
||||||
|
+
|
||||||
|
+ unsigned long sum;
|
||||||
|
+ sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
|
||||||
|
+ atomic_max (&mp_.max_mmapped_mem, sum);
|
||||||
|
+
|
||||||
|
+ check_chunk (av, p);
|
||||||
|
+
|
||||||
|
+ return chunk2mem (p);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void *
|
||||||
|
sysmalloc (INTERNAL_SIZE_T nb, mstate av)
|
||||||
|
{
|
||||||
|
@@ -2449,81 +2528,10 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
|
||||||
|
|| ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
|
||||||
|
&& (mp_.n_mmaps < mp_.n_mmaps_max)))
|
||||||
|
{
|
||||||
|
- char *mm; /* return value from mmap call*/
|
||||||
|
-
|
||||||
|
- try_mmap:
|
||||||
|
- /*
|
||||||
|
- Round up size to nearest page. For mmapped chunks, the overhead
|
||||||
|
- is one SIZE_SZ unit larger than for normal chunks, because there
|
||||||
|
- is no following chunk whose prev_size field could be used.
|
||||||
|
-
|
||||||
|
- See the front_misalign handling below, for glibc there is no
|
||||||
|
- need for further alignments unless we have have high alignment.
|
||||||
|
- */
|
||||||
|
- if (MALLOC_ALIGNMENT == CHUNK_HDR_SZ)
|
||||||
|
- size = ALIGN_UP (nb + SIZE_SZ, pagesize);
|
||||||
|
- else
|
||||||
|
- size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
|
||||||
|
+ char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
|
||||||
|
+ if (mm != MAP_FAILED)
|
||||||
|
+ return mm;
|
||||||
|
tried_mmap = true;
|
||||||
|
-
|
||||||
|
- /* Don't try if size wraps around 0 */
|
||||||
|
- if ((unsigned long) (size) > (unsigned long) (nb))
|
||||||
|
- {
|
||||||
|
- mm = (char *) (MMAP (0, size,
|
||||||
|
- mtag_mmap_flags | PROT_READ | PROT_WRITE, 0));
|
||||||
|
-
|
||||||
|
- if (mm != MAP_FAILED)
|
||||||
|
- {
|
||||||
|
- madvise_thp (mm, size);
|
||||||
|
-
|
||||||
|
- /*
|
||||||
|
- The offset to the start of the mmapped region is stored
|
||||||
|
- in the prev_size field of the chunk. This allows us to adjust
|
||||||
|
- returned start address to meet alignment requirements here
|
||||||
|
- and in memalign(), and still be able to compute proper
|
||||||
|
- address argument for later munmap in free() and realloc().
|
||||||
|
- */
|
||||||
|
-
|
||||||
|
- if (MALLOC_ALIGNMENT == CHUNK_HDR_SZ)
|
||||||
|
- {
|
||||||
|
- /* For glibc, chunk2mem increases the address by
|
||||||
|
- CHUNK_HDR_SZ and MALLOC_ALIGN_MASK is
|
||||||
|
- CHUNK_HDR_SZ-1. Each mmap'ed area is page
|
||||||
|
- aligned and therefore definitely
|
||||||
|
- MALLOC_ALIGN_MASK-aligned. */
|
||||||
|
- assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
|
||||||
|
- front_misalign = 0;
|
||||||
|
- }
|
||||||
|
- else
|
||||||
|
- front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
|
||||||
|
- if (front_misalign > 0)
|
||||||
|
- {
|
||||||
|
- correction = MALLOC_ALIGNMENT - front_misalign;
|
||||||
|
- p = (mchunkptr) (mm + correction);
|
||||||
|
- set_prev_size (p, correction);
|
||||||
|
- set_head (p, (size - correction) | IS_MMAPPED);
|
||||||
|
- }
|
||||||
|
- else
|
||||||
|
- {
|
||||||
|
- p = (mchunkptr) mm;
|
||||||
|
- set_prev_size (p, 0);
|
||||||
|
- set_head (p, size | IS_MMAPPED);
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- /* update statistics */
|
||||||
|
-
|
||||||
|
- int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
|
||||||
|
- atomic_max (&mp_.max_n_mmaps, new);
|
||||||
|
-
|
||||||
|
- unsigned long sum;
|
||||||
|
- sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
|
||||||
|
- atomic_max (&mp_.max_mmapped_mem, sum);
|
||||||
|
-
|
||||||
|
- check_chunk (av, p);
|
||||||
|
-
|
||||||
|
- return chunk2mem (p);
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
}
|
||||||
|
|
||||||
|
/* There are no usable arenas and mmap also failed. */
|
||||||
|
@@ -2600,8 +2608,12 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (!tried_mmap)
|
||||||
|
- /* We can at least try to use to mmap memory. */
|
||||||
|
- goto try_mmap;
|
||||||
|
+ {
|
||||||
|
+ /* We can at least try to use to mmap memory. */
|
||||||
|
+ char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
|
||||||
|
+ if (mm != MAP_FAILED)
|
||||||
|
+ return mm;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
else /* av == main_arena */
|
||||||
|
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
476
malloc-hugepage-0004-malloc-Add-Huge-Page-support-for-mmap.patch
Normal file
476
malloc-hugepage-0004-malloc-Add-Huge-Page-support-for-mmap.patch
Normal file
@ -0,0 +1,476 @@
|
|||||||
|
From 98d5fcb8d099a1a868e032c89891c395a2f365c5 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||||
|
Date: Mon, 16 Aug 2021 15:08:27 -0300
|
||||||
|
Subject: [PATCH 4/7] malloc: Add Huge Page support for mmap
|
||||||
|
|
||||||
|
With the morecore hook removed, there is not easy way to provide huge
|
||||||
|
pages support on with glibc allocator without resorting to transparent
|
||||||
|
huge pages. And some users and programs do prefer to use the huge pages
|
||||||
|
directly instead of THP for multiple reasons: no splitting, re-merging
|
||||||
|
by the VM, no TLB shootdowns for running processes, fast allocation
|
||||||
|
from the reserve pool, no competition with the rest of the processes
|
||||||
|
unlike THP, no swapping all, etc.
|
||||||
|
|
||||||
|
This patch extends the 'glibc.malloc.hugetlb' tunable: the value
|
||||||
|
'2' means to use huge pages directly with the system default size,
|
||||||
|
while a positive value means and specific page size that is matched
|
||||||
|
against the supported ones by the system.
|
||||||
|
|
||||||
|
Currently only memory allocated on sysmalloc() is handled, the arenas
|
||||||
|
still uses the default system page size.
|
||||||
|
|
||||||
|
To test is a new rule is added tests-malloc-hugetlb2, which run the
|
||||||
|
addes tests with the required GLIBC_TUNABLE setting. On systems without
|
||||||
|
a reserved huge pages pool, is just stress the mmap(MAP_HUGETLB)
|
||||||
|
allocation failure. To improve test coverage it is required to create
|
||||||
|
a pool with some allocated pages.
|
||||||
|
|
||||||
|
Checked on x86_64-linux-gnu.
|
||||||
|
|
||||||
|
Reviewed-by: DJ Delorie <dj@redhat.com>
|
||||||
|
---
|
||||||
|
NEWS | 8 +-
|
||||||
|
Rules | 17 +++
|
||||||
|
elf/dl-tunables.list | 3 +-
|
||||||
|
elf/tst-rtld-list-tunables.exp | 2 +-
|
||||||
|
malloc/Makefile | 8 +-
|
||||||
|
malloc/arena.c | 4 +-
|
||||||
|
malloc/malloc.c | 31 ++++-
|
||||||
|
manual/tunables.texi | 7 ++
|
||||||
|
sysdeps/generic/malloc-hugepages.c | 8 ++
|
||||||
|
sysdeps/generic/malloc-hugepages.h | 7 ++
|
||||||
|
sysdeps/unix/sysv/linux/malloc-hugepages.c | 127 +++++++++++++++++++++
|
||||||
|
11 files changed, 207 insertions(+), 15 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/NEWS b/NEWS
|
||||||
|
index 3b94dd209c..c7200cd4e8 100644
|
||||||
|
--- a/NEWS
|
||||||
|
+++ b/NEWS
|
||||||
|
@@ -93,9 +93,11 @@ Major new features:
|
||||||
|
Restartable Sequences.
|
||||||
|
|
||||||
|
* On Linux, a new tunable, glibc.malloc.hugetlb, can be used to
|
||||||
|
- make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls.
|
||||||
|
- Setting this might improve performance with Transparent Huge Pages madvise
|
||||||
|
- mode depending of the workload.
|
||||||
|
+ either make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk
|
||||||
|
+ or to use huge pages directly with mmap calls with the MAP_HUGETLB
|
||||||
|
+ flags). The former can improve performance when Transparent Huge Pages
|
||||||
|
+ is set to 'madvise' mode while the latter uses the system reserved
|
||||||
|
+ huge pages.
|
||||||
|
|
||||||
|
Deprecated and removed features, and other changes affecting compatibility:
|
||||||
|
|
||||||
|
diff --git a/Rules b/Rules
|
||||||
|
index 5f5d9ba4cc..be34982daa 100644
|
||||||
|
--- a/Rules
|
||||||
|
+++ b/Rules
|
||||||
|
@@ -158,6 +158,7 @@ tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \
|
||||||
|
$(tests-mcheck:%=$(objpfx)%-mcheck.out) \
|
||||||
|
$(tests-malloc-check:%=$(objpfx)%-malloc-check.out) \
|
||||||
|
$(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \
|
||||||
|
+ $(tests-malloc-hugetlb2:%=$(objpfx)%-malloc-hugetlb2.out) \
|
||||||
|
$(tests-special) $(tests-printers-out)
|
||||||
|
xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special)
|
||||||
|
endif
|
||||||
|
@@ -170,6 +171,7 @@ else
|
||||||
|
tests-expected = $(tests) $(tests-internal) $(tests-printers) \
|
||||||
|
$(tests-container) $(tests-malloc-check:%=%-malloc-check) \
|
||||||
|
$(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \
|
||||||
|
+ $(tests-malloc-hugetlb2:%=%-malloc-hugetlb2) \
|
||||||
|
$(tests-mcheck:%=%-mcheck)
|
||||||
|
endif
|
||||||
|
tests:
|
||||||
|
@@ -199,6 +201,7 @@ endif
|
||||||
|
binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck)
|
||||||
|
binaries-malloc-check-tests = $(tests-malloc-check:%=%-malloc-check)
|
||||||
|
binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1)
|
||||||
|
+binaries-malloc-hugetlb2-tests = $(tests-malloc-hugetlb2:%=%-malloc-hugetlb2)
|
||||||
|
else
|
||||||
|
binaries-all-notests =
|
||||||
|
binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs)
|
||||||
|
@@ -211,6 +214,7 @@ binaries-pie-notests =
|
||||||
|
binaries-mcheck-tests =
|
||||||
|
binaries-malloc-check-tests =
|
||||||
|
binaries-malloc-hugetlb1-tests =
|
||||||
|
+binaries-malloc-hugetlb2-tests =
|
||||||
|
endif
|
||||||
|
|
||||||
|
binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests)
|
||||||
|
@@ -259,6 +263,14 @@ $(addprefix $(objpfx),$(binaries-malloc-hugetlb1-tests)): %-malloc-hugetlb1: %.o
|
||||||
|
$(+link-tests)
|
||||||
|
endif
|
||||||
|
|
||||||
|
+ifneq "$(strip $(binaries-malloc-hugetlb2-tests))" ""
|
||||||
|
+$(addprefix $(objpfx),$(binaries-malloc-hugetlb2-tests)): %-malloc-hugetlb2: %.o \
|
||||||
|
+ $(link-extra-libs-tests) \
|
||||||
|
+ $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
|
||||||
|
+ $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
|
||||||
|
+ $(+link-tests)
|
||||||
|
+endif
|
||||||
|
+
|
||||||
|
ifneq "$(strip $(binaries-pie-tests))" ""
|
||||||
|
$(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \
|
||||||
|
$(link-extra-libs-tests) \
|
||||||
|
@@ -302,6 +314,11 @@ $(1)-malloc-hugetlb1-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=1
|
||||||
|
endef
|
||||||
|
$(foreach t,$(tests-malloc-hugetlb1),$(eval $(call malloc-hugetlb1-ENVS,$(t))))
|
||||||
|
|
||||||
|
+# All malloc-hugetlb2 tests will be run with GLIBC_TUNABLE=glibc.malloc.hugetlb=2
|
||||||
|
+define malloc-hugetlb2-ENVS
|
||||||
|
+$(1)-malloc-hugetlb2-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=2
|
||||||
|
+endef
|
||||||
|
+$(foreach t,$(tests-malloc-hugetlb2),$(eval $(call malloc-hugetlb2-ENVS,$(t))))
|
||||||
|
|
||||||
|
# mcheck tests need the debug DSO to support -lmcheck.
|
||||||
|
define mcheck-ENVS
|
||||||
|
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
|
||||||
|
index d1fd3f3e91..845d521a43 100644
|
||||||
|
--- a/elf/dl-tunables.list
|
||||||
|
+++ b/elf/dl-tunables.list
|
||||||
|
@@ -93,9 +93,8 @@ glibc {
|
||||||
|
security_level: SXID_IGNORE
|
||||||
|
}
|
||||||
|
hugetlb {
|
||||||
|
- type: INT_32
|
||||||
|
+ type: SIZE_T
|
||||||
|
minval: 0
|
||||||
|
- maxval: 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cpu {
|
||||||
|
diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
|
||||||
|
index d8e363f2c5..cdfdb56a94 100644
|
||||||
|
--- a/elf/tst-rtld-list-tunables.exp
|
||||||
|
+++ b/elf/tst-rtld-list-tunables.exp
|
||||||
|
@@ -1,7 +1,7 @@
|
||||||
|
glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+)
|
||||||
|
glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+)
|
||||||
|
glibc.malloc.check: 0 (min: 0, max: 3)
|
||||||
|
-glibc.malloc.hugetlb: 0 (min: 0, max: 1)
|
||||||
|
+glibc.malloc.hugetlb: 0x0 (min: 0x0, max: 0x[f]+)
|
||||||
|
glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
|
||||||
|
glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+)
|
||||||
|
glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+)
|
||||||
|
diff --git a/malloc/Makefile b/malloc/Makefile
|
||||||
|
index 0137595e17..e9a6666d22 100644
|
||||||
|
--- a/malloc/Makefile
|
||||||
|
+++ b/malloc/Makefile
|
||||||
|
@@ -78,9 +78,9 @@ tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \
|
||||||
|
tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \
|
||||||
|
$(tests-static),$(tests))
|
||||||
|
|
||||||
|
-# Run all testes with GLIBC_TUNABLES=glibc.malloc.hugetlb=1 that check the
|
||||||
|
-# Transparent Huge Pages support. We need exclude some tests that define
|
||||||
|
-# the ENV vars.
|
||||||
|
+# Run all tests with GLIBC_TUNABLES=glibc.malloc.hugetlb={1,2} which check
|
||||||
|
+# the Transparent Huge Pages support (1) or automatic huge page support (2).
|
||||||
|
+# We need exclude some tests that define the ENV vars.
|
||||||
|
tests-exclude-hugetlb1 = \
|
||||||
|
tst-compathooks-off \
|
||||||
|
tst-compathooks-on \
|
||||||
|
@@ -93,6 +93,8 @@ tests-exclude-hugetlb1 = \
|
||||||
|
tst-mallocstate
|
||||||
|
tests-malloc-hugetlb1 = \
|
||||||
|
$(filter-out $(tests-exclude-hugetlb1), $(tests))
|
||||||
|
+tests-malloc-hugetlb2 = \
|
||||||
|
+ $(filter-out $(tests-exclude-hugetlb1), $(tests))
|
||||||
|
|
||||||
|
# -lmcheck needs __malloc_initialize_hook, which was deprecated in 2.24.
|
||||||
|
ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes)
|
||||||
|
diff --git a/malloc/arena.c b/malloc/arena.c
|
||||||
|
index cd00c7bef4..9a6e1af2bd 100644
|
||||||
|
--- a/malloc/arena.c
|
||||||
|
+++ b/malloc/arena.c
|
||||||
|
@@ -230,7 +230,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_count, size_t)
|
||||||
|
TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
|
||||||
|
#endif
|
||||||
|
TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t)
|
||||||
|
-TUNABLE_CALLBACK_FNDECL (set_hugetlb, int32_t)
|
||||||
|
+TUNABLE_CALLBACK_FNDECL (set_hugetlb, size_t)
|
||||||
|
#else
|
||||||
|
/* Initialization routine. */
|
||||||
|
#include <string.h>
|
||||||
|
@@ -331,7 +331,7 @@ ptmalloc_init (void)
|
||||||
|
TUNABLE_CALLBACK (set_tcache_unsorted_limit));
|
||||||
|
# endif
|
||||||
|
TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
|
||||||
|
- TUNABLE_GET (hugetlb, int32_t, TUNABLE_CALLBACK (set_hugetlb));
|
||||||
|
+ TUNABLE_GET (hugetlb, size_t, TUNABLE_CALLBACK (set_hugetlb));
|
||||||
|
#else
|
||||||
|
if (__glibc_likely (_environ != NULL))
|
||||||
|
{
|
||||||
|
diff --git a/malloc/malloc.c b/malloc/malloc.c
|
||||||
|
index 6b6ec53db1..75efdc2ee7 100644
|
||||||
|
--- a/malloc/malloc.c
|
||||||
|
+++ b/malloc/malloc.c
|
||||||
|
@@ -1883,6 +1883,10 @@ struct malloc_par
|
||||||
|
#if HAVE_TUNABLES
|
||||||
|
/* Transparent Large Page support. */
|
||||||
|
INTERNAL_SIZE_T thp_pagesize;
|
||||||
|
+ /* A value different than 0 means to align mmap allocation to hp_pagesize
|
||||||
|
+ add hp_flags on flags. */
|
||||||
|
+ INTERNAL_SIZE_T hp_pagesize;
|
||||||
|
+ int hp_flags;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Memory map support */
|
||||||
|
@@ -2440,7 +2444,10 @@ sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
|
||||||
|
if (mm == MAP_FAILED)
|
||||||
|
return mm;
|
||||||
|
|
||||||
|
- madvise_thp (mm, size);
|
||||||
|
+#ifdef MAP_HUGETLB
|
||||||
|
+ if (!(extra_flags & MAP_HUGETLB))
|
||||||
|
+ madvise_thp (mm, size);
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
The offset to the start of the mmapped region is stored in the prev_size
|
||||||
|
@@ -2528,7 +2535,18 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
|
||||||
|
|| ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
|
||||||
|
&& (mp_.n_mmaps < mp_.n_mmaps_max)))
|
||||||
|
{
|
||||||
|
- char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
|
||||||
|
+ char *mm;
|
||||||
|
+#if HAVE_TUNABLES
|
||||||
|
+ if (mp_.hp_pagesize > 0 && nb >= mp_.hp_pagesize)
|
||||||
|
+ {
|
||||||
|
+ /* There is no need to isse the THP madvise call if Huge Pages are
|
||||||
|
+ used directly. */
|
||||||
|
+ mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av);
|
||||||
|
+ if (mm != MAP_FAILED)
|
||||||
|
+ return mm;
|
||||||
|
+ }
|
||||||
|
+#endif
|
||||||
|
+ mm = sysmalloc_mmap (nb, pagesize, 0, av);
|
||||||
|
if (mm != MAP_FAILED)
|
||||||
|
return mm;
|
||||||
|
tried_mmap = true;
|
||||||
|
@@ -2609,7 +2627,9 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
|
||||||
|
}
|
||||||
|
else if (!tried_mmap)
|
||||||
|
{
|
||||||
|
- /* We can at least try to use to mmap memory. */
|
||||||
|
+ /* We can at least try to use to mmap memory. If new_heap fails
|
||||||
|
+ it is unlikely that trying to allocate huge pages will
|
||||||
|
+ succeed. */
|
||||||
|
char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
|
||||||
|
if (mm != MAP_FAILED)
|
||||||
|
return mm;
|
||||||
|
@@ -5383,7 +5403,7 @@ do_set_mxfast (size_t value)
|
||||||
|
|
||||||
|
#if HAVE_TUNABLES
|
||||||
|
static __always_inline int
|
||||||
|
-do_set_hugetlb (int32_t value)
|
||||||
|
+do_set_hugetlb (size_t value)
|
||||||
|
{
|
||||||
|
if (value == 1)
|
||||||
|
{
|
||||||
|
@@ -5395,6 +5415,9 @@ do_set_hugetlb (int32_t value)
|
||||||
|
if (thp_mode == malloc_thp_mode_madvise)
|
||||||
|
mp_.thp_pagesize = __malloc_default_thp_pagesize ();
|
||||||
|
}
|
||||||
|
+ else if (value >= 2)
|
||||||
|
+ __malloc_hugepage_config (value == 2 ? 0 : value, &mp_.hp_pagesize,
|
||||||
|
+ &mp_.hp_flags);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
diff --git a/manual/tunables.texi b/manual/tunables.texi
|
||||||
|
index 9ca6e3f603..58a47b2e9b 100644
|
||||||
|
--- a/manual/tunables.texi
|
||||||
|
+++ b/manual/tunables.texi
|
||||||
|
@@ -278,6 +278,13 @@ default value is @code{0}, which disables any additional support on
|
||||||
|
Setting its value to @code{1} enables the use of @code{madvise} with
|
||||||
|
@code{MADV_HUGEPAGE} after memory allocation with @code{mmap}. It is enabled
|
||||||
|
only if the system supports Transparent Huge Page (currently only on Linux).
|
||||||
|
+
|
||||||
|
+Setting its value to @code{2} enables the use of Huge Page directly with
|
||||||
|
+@code{mmap} with the use of @code{MAP_HUGETLB} flag. The huge page size
|
||||||
|
+to use will be the default one provided by the system. A value larger than
|
||||||
|
+@code{2} specifies huge page size, which will be matched against the system
|
||||||
|
+supported ones. If provided value is invalid, @code{MAP_HUGETLB} will not
|
||||||
|
+be used.
|
||||||
|
@end deftp
|
||||||
|
|
||||||
|
@node Dynamic Linking Tunables
|
||||||
|
diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
|
||||||
|
index 8fb459a263..946284a33c 100644
|
||||||
|
--- a/sysdeps/generic/malloc-hugepages.c
|
||||||
|
+++ b/sysdeps/generic/malloc-hugepages.c
|
||||||
|
@@ -29,3 +29,11 @@ __malloc_thp_mode (void)
|
||||||
|
{
|
||||||
|
return malloc_thp_mode_not_supported;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+/* Return the default transparent huge page size. */
|
||||||
|
+void
|
||||||
|
+__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
|
||||||
|
+{
|
||||||
|
+ *pagesize = 0;
|
||||||
|
+ *flags = 0;
|
||||||
|
+}
|
||||||
|
diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
|
||||||
|
index f5a442e328..75cda3796a 100644
|
||||||
|
--- a/sysdeps/generic/malloc-hugepages.h
|
||||||
|
+++ b/sysdeps/generic/malloc-hugepages.h
|
||||||
|
@@ -34,4 +34,11 @@ enum malloc_thp_mode_t
|
||||||
|
|
||||||
|
enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden;
|
||||||
|
|
||||||
|
+/* Return the supported huge page size from the REQUESTED sizes on PAGESIZE
|
||||||
|
+ along with the required extra mmap flags on FLAGS, Requesting the value
|
||||||
|
+ of 0 returns the default huge page size, otherwise the value will be
|
||||||
|
+ matched against the sizes supported by the system. */
|
||||||
|
+void __malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
|
||||||
|
+ attribute_hidden;
|
||||||
|
+
|
||||||
|
#endif /* _MALLOC_HUGEPAGES_H */
|
||||||
|
diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
|
||||||
|
index 7497e07260..0e05291d61 100644
|
||||||
|
--- a/sysdeps/unix/sysv/linux/malloc-hugepages.c
|
||||||
|
+++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
|
||||||
|
@@ -17,8 +17,10 @@
|
||||||
|
not, see <https://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include <intprops.h>
|
||||||
|
+#include <dirent.h>
|
||||||
|
#include <malloc-hugepages.h>
|
||||||
|
#include <not-cancel.h>
|
||||||
|
+#include <sys/mman.h>
|
||||||
|
|
||||||
|
unsigned long int
|
||||||
|
__malloc_default_thp_pagesize (void)
|
||||||
|
@@ -72,3 +74,128 @@ __malloc_thp_mode (void)
|
||||||
|
}
|
||||||
|
return malloc_thp_mode_not_supported;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+static size_t
|
||||||
|
+malloc_default_hugepage_size (void)
|
||||||
|
+{
|
||||||
|
+ int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY);
|
||||||
|
+ if (fd == -1)
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ size_t hpsize = 0;
|
||||||
|
+
|
||||||
|
+ char buf[512];
|
||||||
|
+ off64_t off = 0;
|
||||||
|
+ while (1)
|
||||||
|
+ {
|
||||||
|
+ ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off);
|
||||||
|
+ if (r < 0)
|
||||||
|
+ break;
|
||||||
|
+ buf[r] = '\0';
|
||||||
|
+
|
||||||
|
+ /* If the tag is not found, read the last line again. */
|
||||||
|
+ const char *s = strstr (buf, "Hugepagesize:");
|
||||||
|
+ if (s == NULL)
|
||||||
|
+ {
|
||||||
|
+ char *nl = strrchr (buf, '\n');
|
||||||
|
+ if (nl == NULL)
|
||||||
|
+ break;
|
||||||
|
+ off += (nl + 1) - buf;
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* The default huge page size is in the form:
|
||||||
|
+ Hugepagesize: NUMBER kB */
|
||||||
|
+ s += sizeof ("Hugepagesize: ") - 1;
|
||||||
|
+ for (int i = 0; (s[i] >= '0' && s[i] <= '9') || s[i] == ' '; i++)
|
||||||
|
+ {
|
||||||
|
+ if (s[i] == ' ')
|
||||||
|
+ continue;
|
||||||
|
+ hpsize *= 10;
|
||||||
|
+ hpsize += s[i] - '0';
|
||||||
|
+ }
|
||||||
|
+ hpsize *= 1024;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ __close_nocancel (fd);
|
||||||
|
+
|
||||||
|
+ return hpsize;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline int
|
||||||
|
+hugepage_flags (size_t pagesize)
|
||||||
|
+{
|
||||||
|
+ return MAP_HUGETLB | (__builtin_ctzll (pagesize) << MAP_HUGE_SHIFT);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
|
||||||
|
+{
|
||||||
|
+ *pagesize = 0;
|
||||||
|
+ *flags = 0;
|
||||||
|
+
|
||||||
|
+ if (requested == 0)
|
||||||
|
+ {
|
||||||
|
+ *pagesize = malloc_default_hugepage_size ();
|
||||||
|
+ if (*pagesize != 0)
|
||||||
|
+ *flags = hugepage_flags (*pagesize);
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Each entry represents a supported huge page in the form of:
|
||||||
|
+ hugepages-<size>kB. */
|
||||||
|
+ int dirfd = __open64_nocancel ("/sys/kernel/mm/hugepages",
|
||||||
|
+ O_RDONLY | O_DIRECTORY, 0);
|
||||||
|
+ if (dirfd == -1)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ char buffer[1024];
|
||||||
|
+ while (true)
|
||||||
|
+ {
|
||||||
|
+#if !IS_IN(libc)
|
||||||
|
+# define __getdents64 getdents64
|
||||||
|
+#endif
|
||||||
|
+ ssize_t ret = __getdents64 (dirfd, buffer, sizeof (buffer));
|
||||||
|
+ if (ret == -1)
|
||||||
|
+ break;
|
||||||
|
+ else if (ret == 0)
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ bool found = false;
|
||||||
|
+ char *begin = buffer, *end = buffer + ret;
|
||||||
|
+ while (begin != end)
|
||||||
|
+ {
|
||||||
|
+ unsigned short int d_reclen;
|
||||||
|
+ memcpy (&d_reclen, begin + offsetof (struct dirent64, d_reclen),
|
||||||
|
+ sizeof (d_reclen));
|
||||||
|
+ const char *dname = begin + offsetof (struct dirent64, d_name);
|
||||||
|
+ begin += d_reclen;
|
||||||
|
+
|
||||||
|
+ if (dname[0] == '.'
|
||||||
|
+ || strncmp (dname, "hugepages-", sizeof ("hugepages-") - 1) != 0)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ size_t hpsize = 0;
|
||||||
|
+ const char *sizestr = dname + sizeof ("hugepages-") - 1;
|
||||||
|
+ for (int i = 0; sizestr[i] >= '0' && sizestr[i] <= '9'; i++)
|
||||||
|
+ {
|
||||||
|
+ hpsize *= 10;
|
||||||
|
+ hpsize += sizestr[i] - '0';
|
||||||
|
+ }
|
||||||
|
+ hpsize *= 1024;
|
||||||
|
+
|
||||||
|
+ if (hpsize == requested)
|
||||||
|
+ {
|
||||||
|
+ *pagesize = hpsize;
|
||||||
|
+ *flags = hugepage_flags (*pagesize);
|
||||||
|
+ found = true;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ if (found)
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ __close_nocancel (dirfd);
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
@ -0,0 +1,338 @@
|
|||||||
|
From c1beb51d08d3d7ec935b0a2419b4c6fad91d1969 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||||
|
Date: Fri, 20 Aug 2021 13:22:35 -0300
|
||||||
|
Subject: [PATCH 5/7] malloc: Add Huge Page support to arenas
|
||||||
|
|
||||||
|
It is enabled as default for glibc.malloc.hugetlb set to 2 or higher.
|
||||||
|
It also uses a non configurable minimum value and maximum value,
|
||||||
|
currently set respectively to 1 and 4 selected huge page size.
|
||||||
|
|
||||||
|
The arena allocation with huge pages does not use MAP_NORESERVE. As
|
||||||
|
indicate by kernel internal documentation [1], the flag might trigger
|
||||||
|
a SIGBUS on soft page faults if at memory access there is no left
|
||||||
|
pages in the pool.
|
||||||
|
|
||||||
|
On systems without a reserved huge pages pool, is just stress the
|
||||||
|
mmap(MAP_HUGETLB) allocation failure. To improve test coverage it is
|
||||||
|
required to create a pool with some allocated pages.
|
||||||
|
|
||||||
|
Checked on x86_64-linux-gnu with no reserved pages, 10 reserved pages
|
||||||
|
(which trigger mmap(MAP_HUGETBL) failures) and with 256 reserved pages
|
||||||
|
(which does not trigger mmap(MAP_HUGETLB) failures).
|
||||||
|
|
||||||
|
[1] https://www.kernel.org/doc/html/v4.18/vm/hugetlbfs_reserv.html#resv-map-modifications
|
||||||
|
|
||||||
|
Reviewed-by: DJ Delorie <dj@redhat.com>
|
||||||
|
---
|
||||||
|
malloc/Makefile | 7 ++-
|
||||||
|
malloc/arena.c | 134 +++++++++++++++++++++++++++++++++---------------
|
||||||
|
malloc/malloc.c | 2 +-
|
||||||
|
3 files changed, 99 insertions(+), 44 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/malloc/Makefile b/malloc/Makefile
|
||||||
|
index e9a6666d22..451eb84612 100644
|
||||||
|
--- a/malloc/Makefile
|
||||||
|
+++ b/malloc/Makefile
|
||||||
|
@@ -91,10 +91,15 @@ tests-exclude-hugetlb1 = \
|
||||||
|
tst-malloc-usable \
|
||||||
|
tst-malloc-usable-tunables \
|
||||||
|
tst-mallocstate
|
||||||
|
+# The tst-free-errno relies on the used malloc page size to mmap an
|
||||||
|
+# overlapping region.
|
||||||
|
+tests-exclude-hugetlb2 = \
|
||||||
|
+ $(tests-exclude-hugetlb1) \
|
||||||
|
+ tst-free-errno
|
||||||
|
tests-malloc-hugetlb1 = \
|
||||||
|
$(filter-out $(tests-exclude-hugetlb1), $(tests))
|
||||||
|
tests-malloc-hugetlb2 = \
|
||||||
|
- $(filter-out $(tests-exclude-hugetlb1), $(tests))
|
||||||
|
+ $(filter-out $(tests-exclude-hugetlb2), $(tests))
|
||||||
|
|
||||||
|
# -lmcheck needs __malloc_initialize_hook, which was deprecated in 2.24.
|
||||||
|
ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes)
|
||||||
|
diff --git a/malloc/arena.c b/malloc/arena.c
|
||||||
|
index 9a6e1af2bd..e1852f8597 100644
|
||||||
|
--- a/malloc/arena.c
|
||||||
|
+++ b/malloc/arena.c
|
||||||
|
@@ -41,6 +41,29 @@
|
||||||
|
mmap threshold, so that requests with a size just below that
|
||||||
|
threshold can be fulfilled without creating too many heaps. */
|
||||||
|
|
||||||
|
+/* When huge pages are used to create new arenas, the maximum and minumum
|
||||||
|
+ size are based on the runtime defined huge page size. */
|
||||||
|
+
|
||||||
|
+static inline size_t
|
||||||
|
+heap_min_size (void)
|
||||||
|
+{
|
||||||
|
+#if HAVE_TUNABLES
|
||||||
|
+ return mp_.hp_pagesize == 0 ? HEAP_MIN_SIZE : mp_.hp_pagesize;
|
||||||
|
+#else
|
||||||
|
+ return HEAP_MIN_SIZE;
|
||||||
|
+#endif
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline size_t
|
||||||
|
+heap_max_size (void)
|
||||||
|
+{
|
||||||
|
+#if HAVE_TUNABLES
|
||||||
|
+ return mp_.hp_pagesize == 0 ? HEAP_MAX_SIZE : mp_.hp_pagesize * 4;
|
||||||
|
+#else
|
||||||
|
+ return HEAP_MAX_SIZE;
|
||||||
|
+#endif
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/***************************************************************************/
|
||||||
|
|
||||||
|
#define top(ar_ptr) ((ar_ptr)->top)
|
||||||
|
@@ -56,10 +79,11 @@ typedef struct _heap_info
|
||||||
|
size_t size; /* Current size in bytes. */
|
||||||
|
size_t mprotect_size; /* Size in bytes that has been mprotected
|
||||||
|
PROT_READ|PROT_WRITE. */
|
||||||
|
+ size_t pagesize; /* Page size used when allocating the arena. */
|
||||||
|
/* Make sure the following data is properly aligned, particularly
|
||||||
|
that sizeof (heap_info) + 2 * SIZE_SZ is a multiple of
|
||||||
|
MALLOC_ALIGNMENT. */
|
||||||
|
- char pad[-6 * SIZE_SZ & MALLOC_ALIGN_MASK];
|
||||||
|
+ char pad[-3 * SIZE_SZ & MALLOC_ALIGN_MASK];
|
||||||
|
} heap_info;
|
||||||
|
|
||||||
|
/* Get a compile-time error if the heap_info padding is not correct
|
||||||
|
@@ -125,10 +149,18 @@ static bool __malloc_initialized = false;
|
||||||
|
|
||||||
|
/* find the heap and corresponding arena for a given ptr */
|
||||||
|
|
||||||
|
-#define heap_for_ptr(ptr) \
|
||||||
|
- ((heap_info *) ((unsigned long) (ptr) & ~(HEAP_MAX_SIZE - 1)))
|
||||||
|
-#define arena_for_chunk(ptr) \
|
||||||
|
- (chunk_main_arena (ptr) ? &main_arena : heap_for_ptr (ptr)->ar_ptr)
|
||||||
|
+static inline heap_info *
|
||||||
|
+heap_for_ptr (void *ptr)
|
||||||
|
+{
|
||||||
|
+ size_t max_size = heap_max_size ();
|
||||||
|
+ return PTR_ALIGN_DOWN (ptr, max_size);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline struct malloc_state *
|
||||||
|
+arena_for_chunk (mchunkptr ptr)
|
||||||
|
+{
|
||||||
|
+ return chunk_main_arena (ptr) ? &main_arena : heap_for_ptr (ptr)->ar_ptr;
|
||||||
|
+}
|
||||||
|
|
||||||
|
|
||||||
|
/**************************************************************************/
|
||||||
|
@@ -443,71 +475,72 @@ static char *aligned_heap_area;
|
||||||
|
of the page size. */
|
||||||
|
|
||||||
|
static heap_info *
|
||||||
|
-new_heap (size_t size, size_t top_pad)
|
||||||
|
+alloc_new_heap (size_t size, size_t top_pad, size_t pagesize,
|
||||||
|
+ int mmap_flags)
|
||||||
|
{
|
||||||
|
- size_t pagesize = GLRO (dl_pagesize);
|
||||||
|
char *p1, *p2;
|
||||||
|
unsigned long ul;
|
||||||
|
heap_info *h;
|
||||||
|
+ size_t min_size = heap_min_size ();
|
||||||
|
+ size_t max_size = heap_max_size ();
|
||||||
|
|
||||||
|
- if (size + top_pad < HEAP_MIN_SIZE)
|
||||||
|
- size = HEAP_MIN_SIZE;
|
||||||
|
- else if (size + top_pad <= HEAP_MAX_SIZE)
|
||||||
|
+ if (size + top_pad < min_size)
|
||||||
|
+ size = min_size;
|
||||||
|
+ else if (size + top_pad <= max_size)
|
||||||
|
size += top_pad;
|
||||||
|
- else if (size > HEAP_MAX_SIZE)
|
||||||
|
+ else if (size > max_size)
|
||||||
|
return 0;
|
||||||
|
else
|
||||||
|
- size = HEAP_MAX_SIZE;
|
||||||
|
+ size = max_size;
|
||||||
|
size = ALIGN_UP (size, pagesize);
|
||||||
|
|
||||||
|
- /* A memory region aligned to a multiple of HEAP_MAX_SIZE is needed.
|
||||||
|
+ /* A memory region aligned to a multiple of max_size is needed.
|
||||||
|
No swap space needs to be reserved for the following large
|
||||||
|
mapping (on Linux, this is the case for all non-writable mappings
|
||||||
|
anyway). */
|
||||||
|
p2 = MAP_FAILED;
|
||||||
|
if (aligned_heap_area)
|
||||||
|
{
|
||||||
|
- p2 = (char *) MMAP (aligned_heap_area, HEAP_MAX_SIZE, PROT_NONE,
|
||||||
|
- MAP_NORESERVE);
|
||||||
|
+ p2 = (char *) MMAP (aligned_heap_area, max_size, PROT_NONE, mmap_flags);
|
||||||
|
aligned_heap_area = NULL;
|
||||||
|
- if (p2 != MAP_FAILED && ((unsigned long) p2 & (HEAP_MAX_SIZE - 1)))
|
||||||
|
+ if (p2 != MAP_FAILED && ((unsigned long) p2 & (max_size - 1)))
|
||||||
|
{
|
||||||
|
- __munmap (p2, HEAP_MAX_SIZE);
|
||||||
|
+ __munmap (p2, max_size);
|
||||||
|
p2 = MAP_FAILED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (p2 == MAP_FAILED)
|
||||||
|
{
|
||||||
|
- p1 = (char *) MMAP (0, HEAP_MAX_SIZE << 1, PROT_NONE, MAP_NORESERVE);
|
||||||
|
+ p1 = (char *) MMAP (0, max_size << 1, PROT_NONE, mmap_flags);
|
||||||
|
if (p1 != MAP_FAILED)
|
||||||
|
{
|
||||||
|
- p2 = (char *) (((unsigned long) p1 + (HEAP_MAX_SIZE - 1))
|
||||||
|
- & ~(HEAP_MAX_SIZE - 1));
|
||||||
|
+ p2 = (char *) (((unsigned long) p1 + (max_size - 1))
|
||||||
|
+ & ~(max_size - 1));
|
||||||
|
ul = p2 - p1;
|
||||||
|
if (ul)
|
||||||
|
__munmap (p1, ul);
|
||||||
|
else
|
||||||
|
- aligned_heap_area = p2 + HEAP_MAX_SIZE;
|
||||||
|
- __munmap (p2 + HEAP_MAX_SIZE, HEAP_MAX_SIZE - ul);
|
||||||
|
+ aligned_heap_area = p2 + max_size;
|
||||||
|
+ __munmap (p2 + max_size, max_size - ul);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
- /* Try to take the chance that an allocation of only HEAP_MAX_SIZE
|
||||||
|
+ /* Try to take the chance that an allocation of only max_size
|
||||||
|
is already aligned. */
|
||||||
|
- p2 = (char *) MMAP (0, HEAP_MAX_SIZE, PROT_NONE, MAP_NORESERVE);
|
||||||
|
+ p2 = (char *) MMAP (0, max_size, PROT_NONE, mmap_flags);
|
||||||
|
if (p2 == MAP_FAILED)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
- if ((unsigned long) p2 & (HEAP_MAX_SIZE - 1))
|
||||||
|
+ if ((unsigned long) p2 & (max_size - 1))
|
||||||
|
{
|
||||||
|
- __munmap (p2, HEAP_MAX_SIZE);
|
||||||
|
+ __munmap (p2, max_size);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (__mprotect (p2, size, mtag_mmap_flags | PROT_READ | PROT_WRITE) != 0)
|
||||||
|
{
|
||||||
|
- __munmap (p2, HEAP_MAX_SIZE);
|
||||||
|
+ __munmap (p2, max_size);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -516,22 +549,42 @@ new_heap (size_t size, size_t top_pad)
|
||||||
|
h = (heap_info *) p2;
|
||||||
|
h->size = size;
|
||||||
|
h->mprotect_size = size;
|
||||||
|
+ h->pagesize = pagesize;
|
||||||
|
LIBC_PROBE (memory_heap_new, 2, h, h->size);
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static heap_info *
|
||||||
|
+new_heap (size_t size, size_t top_pad)
|
||||||
|
+{
|
||||||
|
+#if HAVE_TUNABLES
|
||||||
|
+ if (__glibc_unlikely (mp_.hp_pagesize != 0))
|
||||||
|
+ {
|
||||||
|
+ /* MAP_NORESERVE is not used for huge pages because some kernel may
|
||||||
|
+ not reserve the mmap region and a subsequent access may trigger
|
||||||
|
+ a SIGBUS if there is no free pages in the pool. */
|
||||||
|
+ heap_info *h = alloc_new_heap (size, top_pad, mp_.hp_pagesize,
|
||||||
|
+ mp_.hp_flags);
|
||||||
|
+ if (h != NULL)
|
||||||
|
+ return h;
|
||||||
|
+ }
|
||||||
|
+#endif
|
||||||
|
+ return alloc_new_heap (size, top_pad, GLRO (dl_pagesize), MAP_NORESERVE);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Grow a heap. size is automatically rounded up to a
|
||||||
|
multiple of the page size. */
|
||||||
|
|
||||||
|
static int
|
||||||
|
grow_heap (heap_info *h, long diff)
|
||||||
|
{
|
||||||
|
- size_t pagesize = GLRO (dl_pagesize);
|
||||||
|
+ size_t pagesize = h->pagesize;
|
||||||
|
+ size_t max_size = heap_max_size ();
|
||||||
|
long new_size;
|
||||||
|
|
||||||
|
diff = ALIGN_UP (diff, pagesize);
|
||||||
|
new_size = (long) h->size + diff;
|
||||||
|
- if ((unsigned long) new_size > (unsigned long) HEAP_MAX_SIZE)
|
||||||
|
+ if ((unsigned long) new_size > (unsigned long) max_size)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if ((unsigned long) new_size > h->mprotect_size)
|
||||||
|
@@ -581,21 +634,14 @@ shrink_heap (heap_info *h, long diff)
|
||||||
|
|
||||||
|
/* Delete a heap. */
|
||||||
|
|
||||||
|
-#define delete_heap(heap) \
|
||||||
|
- do { \
|
||||||
|
- if ((char *) (heap) + HEAP_MAX_SIZE == aligned_heap_area) \
|
||||||
|
- aligned_heap_area = NULL; \
|
||||||
|
- __munmap ((char *) (heap), HEAP_MAX_SIZE); \
|
||||||
|
- } while (0)
|
||||||
|
-
|
||||||
|
static int
|
||||||
|
heap_trim (heap_info *heap, size_t pad)
|
||||||
|
{
|
||||||
|
mstate ar_ptr = heap->ar_ptr;
|
||||||
|
- unsigned long pagesz = GLRO (dl_pagesize);
|
||||||
|
mchunkptr top_chunk = top (ar_ptr), p;
|
||||||
|
heap_info *prev_heap;
|
||||||
|
long new_size, top_size, top_area, extra, prev_size, misalign;
|
||||||
|
+ size_t max_size = heap_max_size ();
|
||||||
|
|
||||||
|
/* Can this heap go away completely? */
|
||||||
|
while (top_chunk == chunk_at_offset (heap, sizeof (*heap)))
|
||||||
|
@@ -612,19 +658,23 @@ heap_trim (heap_info *heap, size_t pad)
|
||||||
|
assert (new_size > 0 && new_size < (long) (2 * MINSIZE));
|
||||||
|
if (!prev_inuse (p))
|
||||||
|
new_size += prev_size (p);
|
||||||
|
- assert (new_size > 0 && new_size < HEAP_MAX_SIZE);
|
||||||
|
- if (new_size + (HEAP_MAX_SIZE - prev_heap->size) < pad + MINSIZE + pagesz)
|
||||||
|
+ assert (new_size > 0 && new_size < max_size);
|
||||||
|
+ if (new_size + (max_size - prev_heap->size) < pad + MINSIZE
|
||||||
|
+ + heap->pagesize)
|
||||||
|
break;
|
||||||
|
ar_ptr->system_mem -= heap->size;
|
||||||
|
LIBC_PROBE (memory_heap_free, 2, heap, heap->size);
|
||||||
|
- delete_heap (heap);
|
||||||
|
+ if ((char *) heap + max_size == aligned_heap_area)
|
||||||
|
+ aligned_heap_area = NULL;
|
||||||
|
+ __munmap (heap, max_size);
|
||||||
|
heap = prev_heap;
|
||||||
|
if (!prev_inuse (p)) /* consolidate backward */
|
||||||
|
{
|
||||||
|
p = prev_chunk (p);
|
||||||
|
unlink_chunk (ar_ptr, p);
|
||||||
|
}
|
||||||
|
- assert (((unsigned long) ((char *) p + new_size) & (pagesz - 1)) == 0);
|
||||||
|
+ assert (((unsigned long) ((char *) p + new_size) & (heap->pagesize - 1))
|
||||||
|
+ == 0);
|
||||||
|
assert (((char *) p + new_size) == ((char *) heap + heap->size));
|
||||||
|
top (ar_ptr) = top_chunk = p;
|
||||||
|
set_head (top_chunk, new_size | PREV_INUSE);
|
||||||
|
@@ -644,7 +694,7 @@ heap_trim (heap_info *heap, size_t pad)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Release in pagesize units and round down to the nearest page. */
|
||||||
|
- extra = ALIGN_DOWN(top_area - pad, pagesz);
|
||||||
|
+ extra = ALIGN_DOWN(top_area - pad, heap->pagesize);
|
||||||
|
if (extra == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
diff --git a/malloc/malloc.c b/malloc/malloc.c
|
||||||
|
index 75efdc2ee7..1698d45d1e 100644
|
||||||
|
--- a/malloc/malloc.c
|
||||||
|
+++ b/malloc/malloc.c
|
||||||
|
@@ -5302,7 +5302,7 @@ static __always_inline int
|
||||||
|
do_set_mmap_threshold (size_t value)
|
||||||
|
{
|
||||||
|
/* Forbid setting the threshold too high. */
|
||||||
|
- if (value <= HEAP_MAX_SIZE / 2)
|
||||||
|
+ if (value <= heap_max_size () / 2)
|
||||||
|
{
|
||||||
|
LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value, mp_.mmap_threshold,
|
||||||
|
mp_.no_dyn_threshold);
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
@ -0,0 +1,119 @@
|
|||||||
|
From 0849eed45daabf30a02c153695041597d6d43b2d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||||
|
Date: Mon, 30 Aug 2021 10:56:55 -0300
|
||||||
|
Subject: [PATCH 6/7] malloc: Move MORECORE fallback mmap to
|
||||||
|
sysmalloc_mmap_fallback
|
||||||
|
|
||||||
|
So it can be used on hugepage code as well.
|
||||||
|
|
||||||
|
Reviewed-by: DJ Delorie <dj@redhat.com>
|
||||||
|
---
|
||||||
|
malloc/malloc.c | 85 ++++++++++++++++++++++++++++++-------------------
|
||||||
|
1 file changed, 53 insertions(+), 32 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/malloc/malloc.c b/malloc/malloc.c
|
||||||
|
index 1698d45d1e..32050be4cc 100644
|
||||||
|
--- a/malloc/malloc.c
|
||||||
|
+++ b/malloc/malloc.c
|
||||||
|
@@ -2498,6 +2498,51 @@ sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
|
||||||
|
return chunk2mem (p);
|
||||||
|
}
|
||||||
|
|
||||||
|
+/*
|
||||||
|
+ Allocate memory using mmap() based on S and NB requested size, aligning to
|
||||||
|
+ PAGESIZE if required. The EXTRA_FLAGS is used on mmap() call. If the call
|
||||||
|
+ succeedes S is updated with the allocated size. This is used as a fallback
|
||||||
|
+ if MORECORE fails.
|
||||||
|
+ */
|
||||||
|
+static void *
|
||||||
|
+sysmalloc_mmap_fallback (long int *s, INTERNAL_SIZE_T nb,
|
||||||
|
+ INTERNAL_SIZE_T old_size, size_t minsize,
|
||||||
|
+ size_t pagesize, int extra_flags, mstate av)
|
||||||
|
+{
|
||||||
|
+ long int size = *s;
|
||||||
|
+
|
||||||
|
+ /* Cannot merge with old top, so add its size back in */
|
||||||
|
+ if (contiguous (av))
|
||||||
|
+ size = ALIGN_UP (size + old_size, pagesize);
|
||||||
|
+
|
||||||
|
+ /* If we are relying on mmap as backup, then use larger units */
|
||||||
|
+ if ((unsigned long) (size) < minsize)
|
||||||
|
+ size = minsize;
|
||||||
|
+
|
||||||
|
+ /* Don't try if size wraps around 0 */
|
||||||
|
+ if ((unsigned long) (size) <= (unsigned long) (nb))
|
||||||
|
+ return MORECORE_FAILURE;
|
||||||
|
+
|
||||||
|
+ char *mbrk = (char *) (MMAP (0, size,
|
||||||
|
+ mtag_mmap_flags | PROT_READ | PROT_WRITE,
|
||||||
|
+ extra_flags));
|
||||||
|
+ if (mbrk == MAP_FAILED)
|
||||||
|
+ return MAP_FAILED;
|
||||||
|
+
|
||||||
|
+#ifdef MAP_HUGETLB
|
||||||
|
+ if (!(extra_flags & MAP_HUGETLB))
|
||||||
|
+ madvise_thp (mbrk, size);
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+ /* Record that we no longer have a contiguous sbrk region. After the first
|
||||||
|
+ time mmap is used as backup, we do not ever rely on contiguous space
|
||||||
|
+ since this could incorrectly bridge regions. */
|
||||||
|
+ set_noncontiguous (av);
|
||||||
|
+
|
||||||
|
+ *s = size;
|
||||||
|
+ return mbrk;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void *
|
||||||
|
sysmalloc (INTERNAL_SIZE_T nb, mstate av)
|
||||||
|
{
|
||||||
|
@@ -2696,38 +2741,14 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
|
||||||
|
segregated mmap region.
|
||||||
|
*/
|
||||||
|
|
||||||
|
- /* Cannot merge with old top, so add its size back in */
|
||||||
|
- if (contiguous (av))
|
||||||
|
- size = ALIGN_UP (size + old_size, pagesize);
|
||||||
|
-
|
||||||
|
- /* If we are relying on mmap as backup, then use larger units */
|
||||||
|
- if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
|
||||||
|
- size = MMAP_AS_MORECORE_SIZE;
|
||||||
|
-
|
||||||
|
- /* Don't try if size wraps around 0 */
|
||||||
|
- if ((unsigned long) (size) > (unsigned long) (nb))
|
||||||
|
- {
|
||||||
|
- char *mbrk = (char *) (MMAP (0, size,
|
||||||
|
- mtag_mmap_flags | PROT_READ | PROT_WRITE,
|
||||||
|
- 0));
|
||||||
|
-
|
||||||
|
- if (mbrk != MAP_FAILED)
|
||||||
|
- {
|
||||||
|
- madvise_thp (mbrk, size);
|
||||||
|
-
|
||||||
|
- /* We do not need, and cannot use, another sbrk call to find end */
|
||||||
|
- brk = mbrk;
|
||||||
|
- snd_brk = brk + size;
|
||||||
|
-
|
||||||
|
- /*
|
||||||
|
- Record that we no longer have a contiguous sbrk region.
|
||||||
|
- After the first time mmap is used as backup, we do not
|
||||||
|
- ever rely on contiguous space since this could incorrectly
|
||||||
|
- bridge regions.
|
||||||
|
- */
|
||||||
|
- set_noncontiguous (av);
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
+ char *mbrk = sysmalloc_mmap_fallback (&size, nb, old_size, pagesize,
|
||||||
|
+ MMAP_AS_MORECORE_SIZE, 0, av);
|
||||||
|
+ if (mbrk != MAP_FAILED)
|
||||||
|
+ {
|
||||||
|
+ /* We do not need, and cannot use, another sbrk call to find end */
|
||||||
|
+ brk = mbrk;
|
||||||
|
+ snd_brk = brk + size;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
if (brk != (char *) (MORECORE_FAILURE))
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
@ -0,0 +1,86 @@
|
|||||||
|
From 0f982c182760bd7689769ee7590df592d0a132c0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||||
|
Date: Mon, 30 Aug 2021 14:01:00 -0300
|
||||||
|
Subject: [PATCH 7/7] malloc: Enable huge page support on main arena
|
||||||
|
|
||||||
|
This patch adds support huge page support on main arena allocation,
|
||||||
|
enable with tunable glibc.malloc.hugetlb=2. The patch essentially
|
||||||
|
disable the __glibc_morecore() sbrk() call (similar when memory
|
||||||
|
tag does when sbrk() call does not support it) and fallback to
|
||||||
|
default page size if the memory allocation fails.
|
||||||
|
|
||||||
|
Checked on x86_64-linux-gnu.
|
||||||
|
|
||||||
|
Reviewed-by: DJ Delorie <dj@redhat.com>
|
||||||
|
---
|
||||||
|
malloc/arena.c | 4 ++++
|
||||||
|
malloc/malloc.c | 12 ++++++++++--
|
||||||
|
malloc/morecore.c | 4 ----
|
||||||
|
3 files changed, 14 insertions(+), 6 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/malloc/arena.c b/malloc/arena.c
|
||||||
|
index e1852f8597..3ed4ef3f05 100644
|
||||||
|
--- a/malloc/arena.c
|
||||||
|
+++ b/malloc/arena.c
|
||||||
|
@@ -364,6 +364,10 @@ ptmalloc_init (void)
|
||||||
|
# endif
|
||||||
|
TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
|
||||||
|
TUNABLE_GET (hugetlb, size_t, TUNABLE_CALLBACK (set_hugetlb));
|
||||||
|
+ if (mp_.hp_pagesize > 0)
|
||||||
|
+ /* Force mmap for main arena instead of sbrk, so hugepages are explicitly
|
||||||
|
+ used. */
|
||||||
|
+ __always_fail_morecore = true;
|
||||||
|
#else
|
||||||
|
if (__glibc_likely (_environ != NULL))
|
||||||
|
{
|
||||||
|
diff --git a/malloc/malloc.c b/malloc/malloc.c
|
||||||
|
index 32050be4cc..b67f2c84ee 100644
|
||||||
|
--- a/malloc/malloc.c
|
||||||
|
+++ b/malloc/malloc.c
|
||||||
|
@@ -2741,8 +2741,16 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
|
||||||
|
segregated mmap region.
|
||||||
|
*/
|
||||||
|
|
||||||
|
- char *mbrk = sysmalloc_mmap_fallback (&size, nb, old_size, pagesize,
|
||||||
|
- MMAP_AS_MORECORE_SIZE, 0, av);
|
||||||
|
+ char *mbrk = MAP_FAILED;
|
||||||
|
+#if HAVE_TUNABLES
|
||||||
|
+ if (mp_.hp_pagesize > 0)
|
||||||
|
+ mbrk = sysmalloc_mmap_fallback (&size, nb, old_size,
|
||||||
|
+ mp_.hp_pagesize, mp_.hp_pagesize,
|
||||||
|
+ mp_.hp_flags, av);
|
||||||
|
+#endif
|
||||||
|
+ if (mbrk == MAP_FAILED)
|
||||||
|
+ mbrk = sysmalloc_mmap_fallback (&size, nb, old_size, pagesize,
|
||||||
|
+ MMAP_AS_MORECORE_SIZE, 0, av);
|
||||||
|
if (mbrk != MAP_FAILED)
|
||||||
|
{
|
||||||
|
/* We do not need, and cannot use, another sbrk call to find end */
|
||||||
|
diff --git a/malloc/morecore.c b/malloc/morecore.c
|
||||||
|
index 8168ef158c..004cd3ead4 100644
|
||||||
|
--- a/malloc/morecore.c
|
||||||
|
+++ b/malloc/morecore.c
|
||||||
|
@@ -15,9 +15,7 @@
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<https://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
-#if defined(SHARED) || defined(USE_MTAG)
|
||||||
|
static bool __always_fail_morecore = false;
|
||||||
|
-#endif
|
||||||
|
|
||||||
|
/* Allocate INCREMENT more bytes of data space,
|
||||||
|
and return the start of data space, or NULL on errors.
|
||||||
|
@@ -25,10 +23,8 @@ static bool __always_fail_morecore = false;
|
||||||
|
void *
|
||||||
|
__glibc_morecore (ptrdiff_t increment)
|
||||||
|
{
|
||||||
|
-#if defined(SHARED) || defined(USE_MTAG)
|
||||||
|
if (__always_fail_morecore)
|
||||||
|
return NULL;
|
||||||
|
-#endif
|
||||||
|
|
||||||
|
void *result = (void *) __sbrk (increment);
|
||||||
|
if (result == (void *) -1)
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user