add libtcmalloc_2m.so in gperftools-libs rpm package

In the Ceph scenario, enabling tcmalloc huge pages can
 reduce the TLB miss rate and improve performance.

 However, tcmalloc does not support huge page release. Therefore,
 release logic needs to be added. In this way, when
 the tcmalloc huge page is used, the memory can be
 released from the OS, preventing memory overuse.

 The libtcmalloc_2m.so file is added to tcmalloc to support
 hugetlb and services that require 2 MB tcmalloc.

 The native tcmalloc.so file is not affected.

Signed-off-by: liubo <liubo254@huawei.com>
This commit is contained in:
liubo 2024-04-12 11:05:21 +08:00
parent d9488bf44f
commit 736cafc9f0
2 changed files with 119 additions and 1 deletions

View File

@ -0,0 +1,114 @@
From 92ae2027b9e9985f9f3ac90a007c9df452ea9cad Mon Sep 17 00:00:00 2001
From: liubo <liubo254@huawei.com>
Date: Sat, 13 Apr 2024 03:23:10 +0800
Subject: [PATCH] add libtcmalloc_2m.so in gperftools-libs rpm package
In the Ceph scenario, enabling tcmalloc huge pages can
reduce the TLB miss rate and improve performance.
However, tcmalloc does not support huge page release. Therefore,
release logic needs to be added. In this way, when
the tcmalloc huge page is used, the memory can be
released from the OS, preventing memory overuse.
The libtcmalloc_2m.so file is added to tcmalloc to support
hugetlb and services that require 2 MB tcmalloc.
The native tcmalloc.so file is not affected.
Signed-off-by: liubo <liubo254@huawei.com>
---
Makefile.am | 10 ++++++++++
src/common.h | 13 +++++++++++--
src/span.h | 4 ++--
src/system-alloc.cc | 2 +-
4 files changed, 24 insertions(+), 5 deletions(-)
diff --git a/Makefile.am b/Makefile.am
index 82be544..2443e80 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -967,6 +967,16 @@ libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) \
libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@
libtcmalloc_la_LIBADD = libtcmalloc_internal.la libmaybe_threads.la $(PTHREAD_LIBS)
+# add libtcmalloc_2m.so, use 2m hugetlb for tcmalloc page alloc.
+lib_LTLIBRARIES += libtcmalloc_2m.la
+libtcmalloc_2m_la_SOURCES = $(TCMALLOC_CC) $(TCMALLOC_INCLUDES) \
+ $(HEAP_CHECKER_SOURCES) $(libtcmalloc_internal_la_SOURCES)
+libtcmalloc_2m_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) \
+ $(MAYBE_NO_HEAP_CHECK) $(EMERGENCY_MALLOC_DEFINE) -DTCMALLOC_PAGE_SIZE_2M
+libtcmalloc_2m_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@
+libtcmalloc_2m_la_LIBADD = libstacktrace.la libmaybe_threads.la $(PTHREAD_LIBS)
+
+
# same as above with without -DNDEBUG
noinst_LTLIBRARIES += libtcmalloc_internal_with_asserts.la
libtcmalloc_internal_with_asserts_la_SOURCES = $(libtcmalloc_internal_la_SOURCES)
diff --git a/src/common.h b/src/common.h
index caa3e4a..687b2c6 100644
--- a/src/common.h
+++ b/src/common.h
@@ -72,8 +72,10 @@ static const size_t kMinAlign = 16;
// the thread cache allowance to avoid passing more free ranges to and from
// central lists. Also, larger pages are less likely to get freed.
// These two factors cause a bounded increase in memory use.
-#if defined(TCMALLOC_PAGE_SIZE_SHIFT)
+#if defined(TCMALLOC_PAGE_SIZE_SHIFT) && !defined(TCMALLOC_PAGE_SIZE_2M)
static const size_t kPageShift = TCMALLOC_PAGE_SIZE_SHIFT;
+#elif defined(TCMALLOC_PAGE_SIZE_2M)
+static const size_t kPageShift = 21;
#else
static const size_t kPageShift = 13;
#endif
@@ -83,11 +85,18 @@ static const size_t kClassSizesMax = 128;
static const size_t kMaxThreadCacheSize = 4 << 20;
static const size_t kPageSize = 1 << kPageShift;
+#if defined(TCMALLOC_PAGE_SIZE_2M)
+static const size_t kMaxSize = 2 * 1024 * 1024;
+#else
static const size_t kMaxSize = 256 * 1024;
+#endif
static const size_t kAlignment = 8;
// For all span-lengths <= kMaxPages we keep an exact-size list in PageHeap.
+#if defined(TCMALLOC_PAGE_SIZE_2M)
+static const size_t kMaxPages = 1 << (21 - kPageShift);
+#else
static const size_t kMaxPages = 1 << (20 - kPageShift);
-
+#endif
// Default bound on the total amount of thread caches.
#ifdef TCMALLOC_SMALL_BUT_SLOW
// Make the overall thread cache no bigger than that of a single thread
diff --git a/src/span.h b/src/span.h
index 7068893..9c89edc 100644
--- a/src/span.h
+++ b/src/span.h
@@ -80,8 +80,8 @@ struct Span {
// iterator which lifetime is controlled explicitly.
char span_iter_space[sizeof(SpanSet::iterator)];
};
- unsigned int refcount : 16; // Number of non-free objects
- unsigned int sizeclass : 8; // Size-class for small objects (or 0)
+ unsigned int refcount; // Number of non-free objects
+ unsigned int sizeclass; // Size-class for small objects (or 0)
unsigned int location : 2; // Is the span on a freelist, and if so, which?
unsigned int sample : 1; // Sampled object?
bool has_span_iter : 1; // Iff span_iter_space has valid
diff --git a/src/system-alloc.cc b/src/system-alloc.cc
index 439ec69..b1bb7c9 100644
--- a/src/system-alloc.cc
+++ b/src/system-alloc.cc
@@ -548,7 +548,7 @@ bool TCMalloc_SystemRelease(void* start, size_t length) {
result = ret != MAP_FAILED;
#else
int ret = madvise(reinterpret_cast<char*>(new_start),
- new_end - new_start, MADV_FREE);
+ new_end - new_start, MADV_DONTNEED);
result = ret != -1;
#endif
--
2.23.0

View File

@ -1,6 +1,6 @@
Name: gperftools
Version: 2.10
Release: 2
Release: 3
Summary: high-performance malloc and performance analysis tools
License: BSD-3-Clause
@ -16,6 +16,7 @@ Patch9003: avoid-exceed-int-range.patch
Patch9004: skip-tcm_asserts_unittest.patch
Patch9005: Continue-to-release-span-until-the-end-of-one-round.patch
Patch9006: gperftools-2.10-sw.patch
Patch9007: add-libtcmalloc_2m.so-in-gperftools-libs-rpm-package.patch
BuildRequires: autoconf automake gcc-c++
@ -118,6 +119,9 @@ LD_LIBRARY_PATH=./.libs make check
%{_mandir}/man1/*.1.gz
%changelog
* Fri Apr 12 2024 liubo <liubo254@huawei.com> - 2.10-3
- add libtcmalloc_2m.so in gperftools-libs rpm package
* Mon May 15 2023 yangchenguang <yangchenguang@kylinsec.com.cn> - 2.10-2
- fix loongarch64 build error and sw_64 build error