add libtcmalloc_2m.so in gperftools-libs rpm package
In the Ceph scenario, enabling tcmalloc huge pages can reduce the TLB miss rate and improve performance. However, tcmalloc does not support huge page release. Therefore, release logic needs to be added. In this way, when the tcmalloc huge page is used, the memory can be released from the OS, preventing memory overuse. The libtcmalloc_2m.so file is added to tcmalloc to support hugetlb and services that require 2 MB tcmalloc. The native tcmalloc.so file is not affected. Signed-off-by: liubo <liubo254@huawei.com>
This commit is contained in:
parent
d9488bf44f
commit
736cafc9f0
114
add-libtcmalloc_2m.so-in-gperftools-libs-rpm-package.patch
Normal file
114
add-libtcmalloc_2m.so-in-gperftools-libs-rpm-package.patch
Normal file
@ -0,0 +1,114 @@
|
||||
From 92ae2027b9e9985f9f3ac90a007c9df452ea9cad Mon Sep 17 00:00:00 2001
|
||||
From: liubo <liubo254@huawei.com>
|
||||
Date: Sat, 13 Apr 2024 03:23:10 +0800
|
||||
Subject: [PATCH] add libtcmalloc_2m.so in gperftools-libs rpm package
|
||||
|
||||
In the Ceph scenario, enabling tcmalloc huge pages can
|
||||
reduce the TLB miss rate and improve performance.
|
||||
|
||||
However, tcmalloc does not support huge page release. Therefore,
|
||||
release logic needs to be added. In this way, when
|
||||
the tcmalloc huge page is used, the memory can be
|
||||
released from the OS, preventing memory overuse.
|
||||
|
||||
The libtcmalloc_2m.so file is added to tcmalloc to support
|
||||
hugetlb and services that require 2 MB tcmalloc.
|
||||
|
||||
The native tcmalloc.so file is not affected.
|
||||
|
||||
Signed-off-by: liubo <liubo254@huawei.com>
|
||||
---
|
||||
Makefile.am | 10 ++++++++++
|
||||
src/common.h | 13 +++++++++++--
|
||||
src/span.h | 4 ++--
|
||||
src/system-alloc.cc | 2 +-
|
||||
4 files changed, 24 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/Makefile.am b/Makefile.am
|
||||
index 82be544..2443e80 100644
|
||||
--- a/Makefile.am
|
||||
+++ b/Makefile.am
|
||||
@@ -967,6 +967,16 @@ libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) \
|
||||
libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@
|
||||
libtcmalloc_la_LIBADD = libtcmalloc_internal.la libmaybe_threads.la $(PTHREAD_LIBS)
|
||||
|
||||
+# add libtcmalloc_2m.so, use 2m hugetlb for tcmalloc page alloc.
|
||||
+lib_LTLIBRARIES += libtcmalloc_2m.la
|
||||
+libtcmalloc_2m_la_SOURCES = $(TCMALLOC_CC) $(TCMALLOC_INCLUDES) \
|
||||
+ $(HEAP_CHECKER_SOURCES) $(libtcmalloc_internal_la_SOURCES)
|
||||
+libtcmalloc_2m_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) \
|
||||
+ $(MAYBE_NO_HEAP_CHECK) $(EMERGENCY_MALLOC_DEFINE) -DTCMALLOC_PAGE_SIZE_2M
|
||||
+libtcmalloc_2m_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@
|
||||
+libtcmalloc_2m_la_LIBADD = libstacktrace.la libmaybe_threads.la $(PTHREAD_LIBS)
|
||||
+
|
||||
+
|
||||
# same as above with without -DNDEBUG
|
||||
noinst_LTLIBRARIES += libtcmalloc_internal_with_asserts.la
|
||||
libtcmalloc_internal_with_asserts_la_SOURCES = $(libtcmalloc_internal_la_SOURCES)
|
||||
diff --git a/src/common.h b/src/common.h
|
||||
index caa3e4a..687b2c6 100644
|
||||
--- a/src/common.h
|
||||
+++ b/src/common.h
|
||||
@@ -72,8 +72,10 @@ static const size_t kMinAlign = 16;
|
||||
// the thread cache allowance to avoid passing more free ranges to and from
|
||||
// central lists. Also, larger pages are less likely to get freed.
|
||||
// These two factors cause a bounded increase in memory use.
|
||||
-#if defined(TCMALLOC_PAGE_SIZE_SHIFT)
|
||||
+#if defined(TCMALLOC_PAGE_SIZE_SHIFT) && !defined(TCMALLOC_PAGE_SIZE_2M)
|
||||
static const size_t kPageShift = TCMALLOC_PAGE_SIZE_SHIFT;
|
||||
+#elif defined(TCMALLOC_PAGE_SIZE_2M)
|
||||
+static const size_t kPageShift = 21;
|
||||
#else
|
||||
static const size_t kPageShift = 13;
|
||||
#endif
|
||||
@@ -83,11 +85,18 @@ static const size_t kClassSizesMax = 128;
|
||||
static const size_t kMaxThreadCacheSize = 4 << 20;
|
||||
|
||||
static const size_t kPageSize = 1 << kPageShift;
|
||||
+#if defined(TCMALLOC_PAGE_SIZE_2M)
|
||||
+static const size_t kMaxSize = 2 * 1024 * 1024;
|
||||
+#else
|
||||
static const size_t kMaxSize = 256 * 1024;
|
||||
+#endif
|
||||
static const size_t kAlignment = 8;
|
||||
// For all span-lengths <= kMaxPages we keep an exact-size list in PageHeap.
|
||||
+#if defined(TCMALLOC_PAGE_SIZE_2M)
|
||||
+static const size_t kMaxPages = 1 << (21 - kPageShift);
|
||||
+#else
|
||||
static const size_t kMaxPages = 1 << (20 - kPageShift);
|
||||
-
|
||||
+#endif
|
||||
// Default bound on the total amount of thread caches.
|
||||
#ifdef TCMALLOC_SMALL_BUT_SLOW
|
||||
// Make the overall thread cache no bigger than that of a single thread
|
||||
diff --git a/src/span.h b/src/span.h
|
||||
index 7068893..9c89edc 100644
|
||||
--- a/src/span.h
|
||||
+++ b/src/span.h
|
||||
@@ -80,8 +80,8 @@ struct Span {
|
||||
// iterator which lifetime is controlled explicitly.
|
||||
char span_iter_space[sizeof(SpanSet::iterator)];
|
||||
};
|
||||
- unsigned int refcount : 16; // Number of non-free objects
|
||||
- unsigned int sizeclass : 8; // Size-class for small objects (or 0)
|
||||
+ unsigned int refcount; // Number of non-free objects
|
||||
+ unsigned int sizeclass; // Size-class for small objects (or 0)
|
||||
unsigned int location : 2; // Is the span on a freelist, and if so, which?
|
||||
unsigned int sample : 1; // Sampled object?
|
||||
bool has_span_iter : 1; // Iff span_iter_space has valid
|
||||
diff --git a/src/system-alloc.cc b/src/system-alloc.cc
|
||||
index 439ec69..b1bb7c9 100644
|
||||
--- a/src/system-alloc.cc
|
||||
+++ b/src/system-alloc.cc
|
||||
@@ -548,7 +548,7 @@ bool TCMalloc_SystemRelease(void* start, size_t length) {
|
||||
result = ret != MAP_FAILED;
|
||||
#else
|
||||
int ret = madvise(reinterpret_cast<char*>(new_start),
|
||||
- new_end - new_start, MADV_FREE);
|
||||
+ new_end - new_start, MADV_DONTNEED);
|
||||
|
||||
result = ret != -1;
|
||||
#endif
|
||||
--
|
||||
2.23.0
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
Name: gperftools
|
||||
Version: 2.10
|
||||
Release: 2
|
||||
Release: 3
|
||||
Summary: high-performance malloc and performance analysis tools
|
||||
|
||||
License: BSD-3-Clause
|
||||
@ -16,6 +16,7 @@ Patch9003: avoid-exceed-int-range.patch
|
||||
Patch9004: skip-tcm_asserts_unittest.patch
|
||||
Patch9005: Continue-to-release-span-until-the-end-of-one-round.patch
|
||||
Patch9006: gperftools-2.10-sw.patch
|
||||
Patch9007: add-libtcmalloc_2m.so-in-gperftools-libs-rpm-package.patch
|
||||
|
||||
|
||||
BuildRequires: autoconf automake gcc-c++
|
||||
@ -118,6 +119,9 @@ LD_LIBRARY_PATH=./.libs make check
|
||||
%{_mandir}/man1/*.1.gz
|
||||
|
||||
%changelog
|
||||
* Fri Apr 12 2024 liubo <liubo254@huawei.com> - 2.10-3
|
||||
- add libtcmalloc_2m.so in gperftools-libs rpm package
|
||||
|
||||
* Mon May 15 2023 yangchenguang <yangchenguang@kylinsec.com.cn> - 2.10-2
|
||||
- fix loongarch64 build error and sw_64 build error
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user