Compare commits

...

10 Commits

Author SHA1 Message Date
openeuler-ci-bot
f23fadb39c
!54 add libtcmalloc_2m.so in gperftools-libs rpm package
From: @liubo254 
Reviewed-by: @wangbin224 
Signed-off-by: @wangbin224
2024-04-18 01:19:22 +00:00
liubo
736cafc9f0 add libtcmalloc_2m.so in gperftools-libs rpm package
In the Ceph scenario, enabling tcmalloc huge pages can
 reduce the TLB miss rate and improve performance.

 However, tcmalloc does not support huge page release. Therefore,
 release logic needs to be added. In this way, when
 the tcmalloc huge page is used, the memory can be
 released from the OS, preventing memory overuse.

 The libtcmalloc_2m.so file is added to tcmalloc to support
 hugetlb and services that require 2 MB tcmalloc.

 The native tcmalloc.so file is not affected.

Signed-off-by: liubo <liubo254@huawei.com>
2024-04-12 11:05:21 +08:00
openeuler-ci-bot
d9488bf44f
!49 [sync] PR-47: fix loongarch64 build error.
From: @openeuler-sync-bot 
Reviewed-by: @liqingqing_1229 
Signed-off-by: @liqingqing_1229
2023-08-30 08:23:24 +00:00
lanruo
47c21a73a7 fix loongarch64 build error and sw_64 build error
(cherry picked from commit 6d6edb6bebc30538865479c815b2b3ed19fae822)
2023-08-30 16:22:53 +08:00
openeuler-ci-bot
532ac0827b
!41 Update 22.03-LTS-Next to 2.10
From: @sdlzx 
Reviewed-by: @liqingqing_1229 
Signed-off-by: @liqingqing_1229
2022-11-10 13:07:11 +00:00
Liu Zixian
37d34c839a Update to 2.10 2022-11-10 20:49:09 +08:00
openeuler-ci-bot
cb478302a1
!38 [sync] PR-37: 添加sw架构
From: @openeuler-sync-bot 
Reviewed-by: @liqingqing_1229 
Signed-off-by: @liqingqing_1229
2022-10-21 00:57:32 +00:00
wzx
1723f96288 Add sw64 architecture
Signed-off-by: wzx <wuzx1226@qq.com>
(cherry picked from commit af79d6a8c47de429693e21eff798cfee02131ce7)
2022-10-21 08:56:34 +08:00
openeuler-ci-bot
ac4a15b4aa
!32 [sync] PR-30: fix spec changelog date
From: @openeuler-sync-bot 
Reviewed-by: @liqingqing_1229 
Signed-off-by: @liqingqing_1229
2022-05-23 00:49:44 +00:00
loong_C
57caa999b8 fix spec changelog date
(cherry picked from commit c1ec9527d4db180476b88530fe4daf90da920b08)
2022-05-23 08:49:24 +08:00
6 changed files with 335 additions and 8 deletions

View File

@ -0,0 +1,114 @@
From 92ae2027b9e9985f9f3ac90a007c9df452ea9cad Mon Sep 17 00:00:00 2001
From: liubo <liubo254@huawei.com>
Date: Sat, 13 Apr 2024 03:23:10 +0800
Subject: [PATCH] add libtcmalloc_2m.so in gperftools-libs rpm package
In the Ceph scenario, enabling tcmalloc huge pages can
reduce the TLB miss rate and improve performance.
However, tcmalloc does not support huge page release. Therefore,
release logic needs to be added. In this way, when
the tcmalloc huge page is used, the memory can be
released from the OS, preventing memory overuse.
The libtcmalloc_2m.so file is added to tcmalloc to support
hugetlb and services that require 2 MB tcmalloc.
The native tcmalloc.so file is not affected.
Signed-off-by: liubo <liubo254@huawei.com>
---
Makefile.am | 10 ++++++++++
src/common.h | 13 +++++++++++--
src/span.h | 4 ++--
src/system-alloc.cc | 2 +-
4 files changed, 24 insertions(+), 5 deletions(-)
diff --git a/Makefile.am b/Makefile.am
index 82be544..2443e80 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -967,6 +967,16 @@ libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) \
libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@
libtcmalloc_la_LIBADD = libtcmalloc_internal.la libmaybe_threads.la $(PTHREAD_LIBS)
+# add libtcmalloc_2m.so, use 2m hugetlb for tcmalloc page alloc.
+lib_LTLIBRARIES += libtcmalloc_2m.la
+libtcmalloc_2m_la_SOURCES = $(TCMALLOC_CC) $(TCMALLOC_INCLUDES) \
+ $(HEAP_CHECKER_SOURCES) $(libtcmalloc_internal_la_SOURCES)
+libtcmalloc_2m_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) \
+ $(MAYBE_NO_HEAP_CHECK) $(EMERGENCY_MALLOC_DEFINE) -DTCMALLOC_PAGE_SIZE_2M
+libtcmalloc_2m_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@
+libtcmalloc_2m_la_LIBADD = libstacktrace.la libmaybe_threads.la $(PTHREAD_LIBS)
+
+
# same as above with without -DNDEBUG
noinst_LTLIBRARIES += libtcmalloc_internal_with_asserts.la
libtcmalloc_internal_with_asserts_la_SOURCES = $(libtcmalloc_internal_la_SOURCES)
diff --git a/src/common.h b/src/common.h
index caa3e4a..687b2c6 100644
--- a/src/common.h
+++ b/src/common.h
@@ -72,8 +72,10 @@ static const size_t kMinAlign = 16;
// the thread cache allowance to avoid passing more free ranges to and from
// central lists. Also, larger pages are less likely to get freed.
// These two factors cause a bounded increase in memory use.
-#if defined(TCMALLOC_PAGE_SIZE_SHIFT)
+#if defined(TCMALLOC_PAGE_SIZE_SHIFT) && !defined(TCMALLOC_PAGE_SIZE_2M)
static const size_t kPageShift = TCMALLOC_PAGE_SIZE_SHIFT;
+#elif defined(TCMALLOC_PAGE_SIZE_2M)
+static const size_t kPageShift = 21;
#else
static const size_t kPageShift = 13;
#endif
@@ -83,11 +85,18 @@ static const size_t kClassSizesMax = 128;
static const size_t kMaxThreadCacheSize = 4 << 20;
static const size_t kPageSize = 1 << kPageShift;
+#if defined(TCMALLOC_PAGE_SIZE_2M)
+static const size_t kMaxSize = 2 * 1024 * 1024;
+#else
static const size_t kMaxSize = 256 * 1024;
+#endif
static const size_t kAlignment = 8;
// For all span-lengths <= kMaxPages we keep an exact-size list in PageHeap.
+#if defined(TCMALLOC_PAGE_SIZE_2M)
+static const size_t kMaxPages = 1 << (21 - kPageShift);
+#else
static const size_t kMaxPages = 1 << (20 - kPageShift);
-
+#endif
// Default bound on the total amount of thread caches.
#ifdef TCMALLOC_SMALL_BUT_SLOW
// Make the overall thread cache no bigger than that of a single thread
diff --git a/src/span.h b/src/span.h
index 7068893..9c89edc 100644
--- a/src/span.h
+++ b/src/span.h
@@ -80,8 +80,8 @@ struct Span {
// iterator which lifetime is controlled explicitly.
char span_iter_space[sizeof(SpanSet::iterator)];
};
- unsigned int refcount : 16; // Number of non-free objects
- unsigned int sizeclass : 8; // Size-class for small objects (or 0)
+ unsigned int refcount; // Number of non-free objects
+ unsigned int sizeclass; // Size-class for small objects (or 0)
unsigned int location : 2; // Is the span on a freelist, and if so, which?
unsigned int sample : 1; // Sampled object?
bool has_span_iter : 1; // Iff span_iter_space has valid
diff --git a/src/system-alloc.cc b/src/system-alloc.cc
index 439ec69..b1bb7c9 100644
--- a/src/system-alloc.cc
+++ b/src/system-alloc.cc
@@ -548,7 +548,7 @@ bool TCMalloc_SystemRelease(void* start, size_t length) {
result = ret != MAP_FAILED;
#else
int ret = madvise(reinterpret_cast<char*>(new_start),
- new_end - new_start, MADV_FREE);
+ new_end - new_start, MADV_DONTNEED);
result = ret != -1;
#endif
--
2.23.0

View File

@ -0,0 +1,53 @@
From 1f9b13b0c08e0682fab3fbb962b504c2b85dc845 Mon Sep 17 00:00:00 2001
From: yangchenguang <yangchenguang@kylinsec.com.cn>
Date: Mon, 15 May 2023 11:14:21 +0800
Subject: [PATCH] fix loongarch64 build failed
Signed-off-by: yangchenguang <yangchenguang@kylinsec.com.cn>
---
src/base/linux_syscall_support.h | 5 +++++
src/base/linuxthreads.cc | 4 ++++
2 files changed, 9 insertions(+)
diff --git a/src/base/linux_syscall_support.h b/src/base/linux_syscall_support.h
index b807b11..a1afd87 100644
--- a/src/base/linux_syscall_support.h
+++ b/src/base/linux_syscall_support.h
@@ -2773,6 +2773,7 @@ LSS_RETURN(int, __res);
LSS_INLINE _syscall3(int, fcntl, int, f,
int, c, long, a)
#endif
+#ifndef __loongarch__
#if defined(__aarch64__) && defined (__ILP32__)
/* aarch64_ilp32 uses fstat64 for sys_fstat() */
LSS_INLINE _syscall2_long(int, fstat, fstat64, int, f,
@@ -2780,6 +2781,10 @@ LSS_RETURN(int, __res);
#else
LSS_INLINE _syscall2(int, fstat, int, f,
struct kernel_stat*, b)
+#endif
+#else
+ LSS_INLINE _syscall2(int, statx, int, f,
+ struct kernel_stat*, b)
#endif
LSS_INLINE _syscall6(int, futex, int*, a,
int, o, int, v,
diff --git a/src/base/linuxthreads.cc b/src/base/linuxthreads.cc
index c28b1a9..2eb2816 100644
--- a/src/base/linuxthreads.cc
+++ b/src/base/linuxthreads.cc
@@ -350,7 +350,11 @@ static void ListerThread(struct ListerParams *args) {
continue;
goto failure;
}
+#ifndef __loongarch__
if (sys_fstat(proc, &proc_sb) < 0)
+#else
+ if (sys_statx(proc, &proc_sb) < 0)
+#endif
goto failure;
/* Since we are suspending threads, we cannot call any libc
--
2.33.0

135
gperftools-2.10-sw.patch Normal file
View File

@ -0,0 +1,135 @@
From 4ca466e1bb19587ff954ce5508e355ed5a12383e Mon Sep 17 00:00:00 2001
From: Liu Zixian <liuzixian4@huawei.com>
Date: Thu, 10 Nov 2022 19:10:37 +0800
Subject: [PATCH] Suppot for sw arch
Based on version 2.9.1 from wuzx <wuzx1226@qq.com>
Signed-off-by: Liu Zixian <liuzixian4@huawei.com>
---
config.guess | 40 ++++++++++++++++++++++++++++++++++++
config.sub | 1 +
configure | 1 +
src/base/basictypes.h | 2 ++
src/malloc_hook_mmap_linux.h | 3 ++-
5 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/config.guess b/config.guess
index 7f76b62..abf78f4 100755
--- a/config.guess
+++ b/config.guess
@@ -313,6 +313,36 @@ case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in
mips:OSF1:*.*)
GUESS=mips-dec-osf1
;;
+ sw_64:OSF1:*:*)
+ case $UNAME_RELEASE in
+ *4.0)
+ UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+ ;;
+ *5.*)
+ UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+ ;;
+ esac
+ # According to Compaq, /usr/sbin/psrinfo has been available on
+ # OSF/1 and Tru64 systems produced since 1995. I hope that
+ # covers most systems running today. This code pipes the CPU
+ # types through head -n 1, so we only detect the type of CPU 0.
+ SW_64_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The sw_64 \(.*\) processor.*$/\1/p' | head -n 1`
+ case "$SW_64_CPU_TYPE" in
+ "SW6A (1621)")
+ UNAME_MACHINE=sw_64sw6a ;;
+ "SW6B (3231)")
+ UNAME_MACHINE=sw_64sw6b ;;
+ esac
+ # A Pn.n version is a patched version.
+ # A Vn.n version is a released version.
+ # A Tn.n version is a released field test version.
+ # A Xn.n version is an unreleased experimental baselevel.
+ # 1.2 uses "1.2" for uname -r.
+ GUESS="$UNAME_MACHINE"-dec-osf"`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`"
+ # Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+ exitcode=$?
+ trap '' 0
+ ;;
alpha:OSF1:*:*)
# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
trap '' 0
@@ -976,6 +1006,15 @@ EOF
UNAME_MACHINE=aarch64_be
GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
;;
+ sw_64:Linux:*:*)
+ case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+ SW6A) UNAME_MACHINE=sw_64sw6a ;;
+ SW6B) UNAME_MACHINE=sw_64sw6b ;;
+ esac
+ objdump --private-headers /bin/sh | grep -q ld.so.1
+ if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
+ GUESS="$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ ;;
alpha:Linux:*:*)
case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null` in
EV5) UNAME_MACHINE=alphaev5 ;;
@@ -1512,6 +1551,7 @@ EOF
UNAME_MACHINE=`(uname -p) 2>/dev/null`
case $UNAME_MACHINE in
A*) GUESS=alpha-dec-vms ;;
+ S*) GUESS=sw_64-dec-vms ;;
I*) GUESS=ia64-dec-vms ;;
V*) GUESS=vax-dec-vms ;;
esac ;;
diff --git a/config.sub b/config.sub
index dba16e8..ef3f539 100755
--- a/config.sub
+++ b/config.sub
@@ -1185,6 +1185,7 @@ case $cpu-$vendor in
| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \
| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \
| alphapca5[67] | alpha64pca5[67] \
+ | sw_64 | sw_64sw6a | sw_64sw6b \
| am33_2.0 \
| amdgcn \
| arc | arceb | arc32 | arc64 \
diff --git a/configure b/configure
index 4e0684d..2953fe4 100755
--- a/configure
+++ b/configure
@@ -19496,6 +19496,7 @@ printf %s "checking how to access the program counter from a struct ucontext...
pc_fields="$pc_fields uc_mcontext.sc_ip" # Linux (ia64)
pc_fields="$pc_fields uc_mcontext.__pc" # Linux (loongarch64)
pc_fields="$pc_fields uc_mcontext.pc" # Linux (mips)
+ pc_fields="$pc_fields uc_mcontext.sc_pc" # Linux (sw_64)
pc_fields="$pc_fields uc_mcontext.uc_regs->gregs[PT_NIP]" # Linux (ppc)
pc_fields="$pc_fields uc_mcontext.__gregs[REG_PC]" # Linux (riscv64)
pc_fields="$pc_fields uc_mcontext.psw.addr" # Linux (s390)
diff --git a/src/base/basictypes.h b/src/base/basictypes.h
index a8c9e1c..0ceec52 100644
--- a/src/base/basictypes.h
+++ b/src/base/basictypes.h
@@ -378,6 +378,8 @@ class AssignAttributeStartEnd {
// some ARMs have shorter cache lines (ARM1176JZF-S is 32 bytes for example) but obviously 64-byte aligned implies 32-byte aligned
# elif (defined(__mips__))
# define CACHELINE_ALIGNED __attribute__((aligned(128)))
+# elif (defined(__sw_64__))
+# define CACHELINE_ALIGNED __attribute__((aligned(128)))
# elif (defined(__aarch64__))
# define CACHELINE_ALIGNED __attribute__((aligned(64)))
// implementation specific, Cortex-A53 and 57 should have 64 bytes
diff --git a/src/malloc_hook_mmap_linux.h b/src/malloc_hook_mmap_linux.h
index c7d8b4b..6de699d 100644
--- a/src/malloc_hook_mmap_linux.h
+++ b/src/malloc_hook_mmap_linux.h
@@ -56,7 +56,8 @@
|| defined(__aarch64__) \
|| defined(__loongarch64) \
|| (defined(_MIPS_SIM) && (_MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32)) \
- || defined(__s390__) || (defined(__riscv) && __riscv_xlen == 64)
+ || defined(__s390__) || (defined(__riscv) && __riscv_xlen == 64) \
+ || defined(__sw_64__)
static inline void* do_mmap64(void *start, size_t length,
int prot, int flags,
--
2.36.1

BIN
gperftools-2.10.tar.gz Normal file

Binary file not shown.

Binary file not shown.

View File

@ -1,12 +1,13 @@
Name: gperftools
Version: 2.9.1
Release: 4
Version: 2.10
Release: 3
Summary: high-performance malloc and performance analysis tools
License: BSD
License: BSD-3-Clause
URL: https://github.com/gperftools/gperftools
Source0: https://github.com/gperftools/gperftools/releases/download/%{name}-%{version}/%{name}-%{version}.tar.gz
Patch1: gperftools-generic-dynamic-tls.patch
Patch2: fix-loongarch64-build-failed.patch
Patch9000: issue-1122-fix-bus-error-on-aarch64.patch
Patch9001: skip-arm-in-stacktrace_unittest.patch
@ -14,6 +15,8 @@ Patch9002: skip-heapchecker-in-arm-arch.patch
Patch9003: avoid-exceed-int-range.patch
Patch9004: skip-tcm_asserts_unittest.patch
Patch9005: Continue-to-release-span-until-the-end-of-one-round.patch
Patch9006: gperftools-2.10-sw.patch
Patch9007: add-libtcmalloc_2m.so-in-gperftools-libs-rpm-package.patch
BuildRequires: autoconf automake gcc-c++
@ -72,6 +75,9 @@ CXXFLAGS=`echo $RPM_OPT_FLAGS -fno-strict-aliasing -Wno-unused-local-typedefs -D
%configure \
%ifarch s390x aarch64
--disable-general-dynamic-tls \
%endif
%ifarch sw_64
--disable-cpu-profiler \
%endif
--disable-dynamic-sized-delete-support \
@ -91,17 +97,21 @@ LD_LIBRARY_PATH=./.libs make check
#nothing to do
%files libs
%ifnarch sw_64
%{_libdir}/libprofiler.so.*
%endif
%{_libdir}/libtcmalloc*.so.*
%files devel
%{_includedir}/google/*.h
%{_includedir}/gperftools/*.h
%ifnarch sw_64
%{_libdir}/libprofiler.so
%{_libdir}/libprofiler.*a
%endif
%{_libdir}/libtcmalloc*.so
%{_libdir}/pkgconfig/*.pc
%{_docdir}/%{name}/*
%{_libdir}/libprofiler.*a
%{_libdir}/libtcmalloc*.*a
%files -n pprof
@ -109,14 +119,29 @@ LD_LIBRARY_PATH=./.libs make check
%{_mandir}/man1/*.1.gz
%changelog
* Fri Apr 12 2024 liubo <liubo254@huawei.com> - 2.10-3
- add libtcmalloc_2m.so in gperftools-libs rpm package
* Mon May 15 2023 yangchenguang <yangchenguang@kylinsec.com.cn> - 2.10-2
- fix loongarch64 build error and sw_64 build error
* Thu Nov 10 2022 Liu Zixian <liuzixian4@huawei.com> - 2.10-1
- Update to 2.10
* Thu Oct 20 2022 wuzx<wuzx1226@qq.com> - 2.9.1-6
- add sw64 patch
* Fri May 20 2022 loong_C <loong_c@yeah.net> - 2.9.1-5
- fix spec changelog date
* Mon Feb 28 2022 liusirui <liusirui@huawei.com> - 2.9.1-4
- remove the dependency of the main package and pprof
* Tue Jan 11 2021 QingqingLi <liqingqing3@huawei.com> - 2.9.1-3
* Tue Jan 11 2022 QingqingLi <liqingqing3@huawei.com> - 2.9.1-3
- continue to release span until the end of one round
- use %{?_smp_mflags} to build
* Mon Jan 10 2021 zhangyiru <zhangyiru3@huawei.com> - 2.9.1-2
* Mon Jan 10 2022 zhangyiru <zhangyiru3@huawei.com> - 2.9.1-2
- skip stacktrace_unittest & tcm_asserts_unittest
* Sun Dec 5 2021 zhouwenpei <zhouwenpei1@huawei.com> - 2.9.1-1
@ -126,8 +151,8 @@ LD_LIBRARY_PATH=./.libs make check
- avoid exceed int range when use heapchecker
* Wed Oct 20 2021 zhangyiru <zhangyiru3@huawei.com> - 2.8.1-3
- enable make check && skip four arm testcases.
the reason is that arm do not have fully functional heap checker and
- enable make check && skip four arm testcases.
the reason is that arm do not have fully functional heap checker and
the calling of unw_step in arm stacktrace_unittest is incorrect, but the function is not affected
* Thu Jul 22 2021 zhangyiru <zhangyiru3@huawei.com> - 2.8.1-2