From b7ce39c71900c356b4161b5ef4d6c8a5ffa12732 Mon Sep 17 00:00:00 2001 From: herengui Date: Tue, 29 Aug 2023 11:12:32 +0800 Subject: [PATCH 1000/1001] add loongarch support not upstream modified Signed-off-by: herengui --- meson.build | 14 ++++++ src/gallium/auxiliary/gallivm/lp_bld.h | 4 ++ src/gallium/auxiliary/gallivm/lp_bld_arit.c | 17 ++++++- src/gallium/auxiliary/gallivm/lp_bld_debug.h | 8 ++++ src/gallium/auxiliary/gallivm/lp_bld_init.c | 34 ++++++++++++-- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 44 ++++++++++++++++++- src/gallium/auxiliary/gallivm/lp_bld_misc.h | 3 ++ src/gallium/drivers/llvmpipe/lp_screen.c | 4 +- src/gallium/drivers/llvmpipe/lp_test_arit.c | 2 +- src/gallium/include/pipe/p_config.h | 6 +++ src/gallium/targets/dri/meson.build | 1 + src/gallium/targets/dri/target.c | 1 + src/loader/pci_id_driver_map.h | 7 +++ src/util/u_cpu_detect.c | 30 +++++++++++++ src/util/u_cpu_detect.h | 2 + 15 files changed, 167 insertions(+), 10 deletions(-) diff --git a/meson.build b/meson.build index bd54e78..1a00889 100644 --- a/meson.build +++ b/meson.build @@ -176,6 +176,8 @@ if dri_drivers.contains('auto') dri_drivers = [] elif ['mips', 'mips64', 'riscv32', 'riscv64'].contains(host_machine.cpu_family()) dri_drivers = ['r100', 'r200', 'nouveau'] + elif ['loongarch64'].contains(host_machine.cpu_family()) + dri_drivers = [] else error('Unknown architecture @0@. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.'.format( host_machine.cpu_family())) @@ -215,6 +217,11 @@ if gallium_drivers.contains('auto') gallium_drivers = [ 'r300', 'r600', 'radeonsi', 'nouveau', 'virgl', 'swrast' ] + elif ['loongarch64'].contains(host_machine.cpu_family()) + gallium_drivers = [ + 'r300', 'r600', 'radeonsi', 'nouveau', 'etnaviv', 'kmsro', 'swrast', + 'virgl' + ] else error('Unknown architecture @0@. Please pass -Dgallium-drivers to set driver options. Patches gladly accepted to fix this.'.format( host_machine.cpu_family())) @@ -268,6 +275,8 @@ if _vulkan_drivers.contains('auto') _vulkan_drivers = ['swrast'] elif ['mips', 'mips64', 'riscv32', 'riscv64'].contains(host_machine.cpu_family()) _vulkan_drivers = ['amd', 'swrast'] + elif ['loongarch64'].contains(host_machine.cpu_family()) + _vulkan_drivers = ['amd'] else error('Unknown architecture @0@. Please pass -Dvulkan-drivers to set driver options. Patches gladly accepted to fix this.'.format( host_machine.cpu_family())) @@ -1349,6 +1358,11 @@ elif host_machine.cpu_family() == 'mips64' and host_machine.endian() == 'little' with_asm_arch = 'mips64el' pre_args += ['-DUSE_MIPS64EL_ASM'] endif +elif host_machine.cpu_family() == 'loongarch64' + if system_has_kms_drm + with_asm_arch = 'loongarch64' + pre_args += ['-DUSE_LOONGARCH64_ASM'] + endif endif # Check for standard headers and functions diff --git a/src/gallium/auxiliary/gallivm/lp_bld.h b/src/gallium/auxiliary/gallivm/lp_bld.h index 9144428..2fd50dd 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld.h +++ b/src/gallium/auxiliary/gallivm/lp_bld.h @@ -82,7 +82,11 @@ #define LLVMCreateBuilder ILLEGAL_LLVM_FUNCTION #if LLVM_VERSION_MAJOR >= 8 +#if defined(__loongarch__) || defined(__mips__) +#define GALLIVM_HAVE_CORO 0 +#else #define GALLIVM_HAVE_CORO 1 +#endif #else #define GALLIVM_HAVE_CORO 0 #endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 1c71c05..99abcae 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1887,6 +1887,15 @@ arch_rounding_available(const struct lp_type type) return TRUE; else if (util_get_cpu_caps()->has_neon) return TRUE; + else if (util_get_cpu_caps()->has_msa && + (type.width * type.length == 128)) + return ((gallivm_perf & GALLIVM_PERF_USE_ARCH_ROUNDING) ? TRUE : FALSE); + else if (util_get_cpu_caps()->has_lsx && + (type.width * type.length == 128)) + return ((gallivm_perf & GALLIVM_PERF_USE_ARCH_ROUNDING) ? TRUE : FALSE); + else if (util_get_cpu_caps()->has_lasx && + (type.width * type.length == 256)) + return ((gallivm_perf & GALLIVM_PERF_USE_ARCH_ROUNDING) ? TRUE : FALSE); return FALSE; } @@ -1994,7 +2003,8 @@ lp_build_round_arch(struct lp_build_context *bld, LLVMValueRef a, enum lp_build_round_mode mode) { - if (util_get_cpu_caps()->has_sse4_1 || util_get_cpu_caps()->has_neon) { + if (util_get_cpu_caps()->has_sse4_1 || util_get_cpu_caps()->has_neon || util_get_cpu_caps()->has_msa + || util_get_cpu_caps()->has_lsx || util_get_cpu_caps()->has_lasx) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const char *intrinsic_root; @@ -2403,7 +2413,10 @@ lp_build_iround(struct lp_build_context *bld, res = LLVMBuildFAdd(builder, a, half, ""); } - res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); + if (type.sign) + res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); + else + res = LLVMBuildFPToUI(builder, res, int_vec_type, ""); return res; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index a5dd7b8..6c22dc2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -49,6 +49,14 @@ #define GALLIVM_PERF_NO_OPT (1 << 3) #define GALLIVM_PERF_NO_AOS_SAMPLING (1 << 4) +#if defined(PIPE_ARCH_MIPS64) || defined(PIPE_ARCH_LOONGARCH64) +#define GALLIVM_PERF_OPT_O1 (1 << 5) +#define GALLIVM_PERF_OPT_O2 (1 << 6) +#define GALLIVM_PERF_OPT_O3 (1 << 7) +#endif + +#define GALLIVM_PERF_USE_ARCH_ROUNDING (1 << 8) + #ifdef __cplusplus extern "C" { #endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 3f040ac..64740ec 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -46,7 +46,7 @@ #endif #include #if GALLIVM_HAVE_CORO -#if LLVM_VERSION_MAJOR <= 8 && (defined(PIPE_ARCH_AARCH64) || defined (PIPE_ARCH_ARM) || defined(PIPE_ARCH_S390) || defined(PIPE_ARCH_MIPS64)) +#if LLVM_VERSION_MAJOR <= 8 && (defined(PIPE_ARCH_AARCH64) || defined (PIPE_ARCH_ARM) || defined(PIPE_ARCH_S390) || defined(PIPE_ARCH_MIPS64) || defined(PIPE_ARCH_LOONGARCH64)) #include #endif #include @@ -60,6 +60,12 @@ static const struct debug_named_value lp_bld_perf_flags[] = { { "no_quad_lod", GALLIVM_PERF_NO_QUAD_LOD, "disable quad_lod optimization" }, { "no_aos_sampling", GALLIVM_PERF_NO_AOS_SAMPLING, "disable aos sampling optimization" }, { "nopt", GALLIVM_PERF_NO_OPT, "disable optimization passes to speed up shader compilation" }, +#if defined(PIPE_ARCH_MIPS64) || defined(PIPE_ARCH_LOONGARCH64) + { "o3", GALLIVM_PERF_OPT_O3, "enable aggressive optimization passes" }, + { "o2", GALLIVM_PERF_OPT_O2, "enable medium optimization passes" }, + { "o1", GALLIVM_PERF_OPT_O1, "enable less optimization passes" }, +#endif + { "use_arch_rounding", GALLIVM_PERF_USE_ARCH_ROUNDING, "use poor arch rounding function provided by glibc" }, DEBUG_NAMED_VALUE_END }; @@ -135,7 +141,7 @@ create_pass_manager(struct gallivm_state *gallivm) } #if GALLIVM_HAVE_CORO -#if LLVM_VERSION_MAJOR <= 8 && (defined(PIPE_ARCH_AARCH64) || defined (PIPE_ARCH_ARM) || defined(PIPE_ARCH_S390) || defined(PIPE_ARCH_MIPS64)) +#if LLVM_VERSION_MAJOR <= 8 && (defined(PIPE_ARCH_AARCH64) || defined (PIPE_ARCH_ARM) || defined(PIPE_ARCH_S390) || defined(PIPE_ARCH_MIPS64) || defined(PIPE_ARCH_LOONGARCH64)) LLVMAddArgumentPromotionPass(gallivm->cgpassmgr); LLVMAddFunctionAttrsPass(gallivm->cgpassmgr); #endif @@ -267,7 +273,16 @@ init_gallivm_engine(struct gallivm_state *gallivm) optlevel = None; } else { - optlevel = Default; +#if defined(PIPE_ARCH_MIPS64) || defined(PIPE_ARCH_LOONGARCH64) + if (gallivm_perf & GALLIVM_PERF_OPT_O3) + optlevel = Aggressive; + else if (gallivm_perf & GALLIVM_PERF_OPT_O2) + optlevel = Default; + else if (gallivm_perf & GALLIVM_PERF_OPT_O1) + optlevel = Less; + else +#endif + optlevel = Default; } ret = lp_build_create_jit_compiler_for_module(&gallivm->engine, @@ -450,7 +465,14 @@ lp_build_init(void) if (util_get_cpu_caps()->has_avx2 || util_get_cpu_caps()->has_avx) { lp_native_vector_width = 256; - } else { + } else if (util_get_cpu_caps()->has_lasx) { +#if defined(PIPE_ARCH_LOONGARCH64) + if (lp_probe_lasx()) + lp_native_vector_width = 256; + else +#endif + lp_native_vector_width = 128; + } else { /* Leave it at 128, even when no SIMD extensions are available. * Really needs to be a multiple of 128 so can fit 4 floats. */ @@ -622,6 +644,10 @@ gallivm_compile_module(struct gallivm_state *gallivm) LLVMAddTargetDependentFunctionAttr(func, "no-frame-pointer-elim-non-leaf", "true"); #endif +#if defined(PIPE_ARCH_MIPS64) + LLVMAddTargetDependentFunctionAttr(func, "target-features", "+nomadd4"); +#endif + LLVMRunFunctionPassManager(gallivm->passmgr, func); func = LLVMGetNextFunction(func); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index be288ab..675de06 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -376,7 +376,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, llvm::SmallVector MAttrs; -#if LLVM_VERSION_MAJOR >= 4 && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) || defined(PIPE_ARCH_ARM)) +#if LLVM_VERSION_MAJOR >= 4 && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) || defined(PIPE_ARCH_ARM) || defined(PIPE_ARCH_LOONGARCH64)) /* llvm-3.3+ implements sys::getHostCPUFeatures for Arm * and llvm-3.7+ for x86, which allows us to enable/disable * code generation based on the results of cpuid on these @@ -470,6 +470,17 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, MAttrs.push_back("+fp64"); #endif +#if defined(PIPE_ARCH_MIPS64) + /* MSA requires a 64-bit FPU register file */ + MAttrs.push_back(util_cpu_caps.has_msa ? "+msa" : "-msa"); + /* Support 64-bit FP registers. */ + MAttrs.push_back("+fp64"); + /* General Purpose Registers are 64-bit wide */ + MAttrs.push_back("+gp64"); + /* Pointers are 64-bit wide */ + MAttrs.push_back("+ptr64"); +#endif + builder.setMAttrs(MAttrs); if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | GALLIVM_DEBUG_DUMP_BC)) { @@ -533,6 +544,13 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, */ if (MCPU == "generic") MCPU = util_get_cpu_caps()->has_msa ? "mips64r5" : "mips64r2"; + else if (MCPU == "loongson3a") + MCPU = util_get_cpu_caps()->has_msa ? "mips64r5" : "mips64r2"; +#endif + +#if defined(PIPE_ARCH_LOONGARCH64) + if (MCPU == "generic") + MCPU = "gs464v"; #endif builder.setMCPU(MCPU); @@ -573,6 +591,30 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, return 1; } +#if defined(PIPE_ARCH_LOONGARCH64) +extern "C" +LLVMBool +lp_probe_lasx(void) +{ + using namespace llvm; + /* our llvm-8+ implements sys::getHostCPUFeatures for loongarch, + * which allows us to enable/disable code generation based + * on the results of cpucfg. + */ + llvm::StringMap features; + llvm::sys::getHostCPUFeatures(features); + + for (StringMapIterator f = features.begin(); + f != features.end(); + ++f) { + + if ((*f).first() == "lasx" && (*f).second) + return 1; + } + + return 0; +} +#endif extern "C" void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.h b/src/gallium/auxiliary/gallivm/lp_bld_misc.h index fa0ce90..f9eb530 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.h @@ -64,6 +64,9 @@ gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info); extern void lp_set_target_options(void); +#if defined(PIPE_ARCH_LOONGARCH64) +extern LLVMBool lp_probe_lasx(void); +#endif extern int lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 839902b..c1ce1cf 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -897,8 +897,8 @@ static void update_cache_sha1_cpu(struct mesa_sha1 *ctx) * Don't need the cpu cache affinity stuff. The rest * is contained in first 5 dwords. */ - STATIC_ASSERT(offsetof(struct util_cpu_caps_t, num_L3_caches) == 5 * sizeof(uint32_t)); - _mesa_sha1_update(ctx, cpu_caps, 5 * sizeof(uint32_t)); + STATIC_ASSERT(offsetof(struct util_cpu_caps_t, num_L3_caches) == 6 * sizeof(uint32_t)); + _mesa_sha1_update(ctx, cpu_caps, 6 * sizeof(uint32_t)); } static void lp_disk_cache_create(struct llvmpipe_screen *screen) diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c index cbea1e2..5d64132 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c @@ -479,7 +479,7 @@ test_unary(unsigned verbose, FILE *fp, const struct unary_test_t *test, unsigned continue; } - if (!util_get_cpu_caps()->has_neon && + if (!util_get_cpu_caps()->has_neon && !util_get_cpu_caps()->has_msa && test->ref == &nearbyintf && length == 2 && ref != roundf(testval)) { /* FIXME: The generic (non SSE) path in lp_build_iround, which is diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index 978aa45..cc55351 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -130,6 +130,12 @@ #define PIPE_ARCH_MIPS #endif +#if defined(__loongarch64__) || defined(__loongarch64) +#define PIPE_ARCH_LOONGARCH64 +#elif defined(__loongarch__) +#define PIPE_ARCH_LOONGARCH +#endif + /* * Endian detection. */ diff --git a/src/gallium/targets/dri/meson.build b/src/gallium/targets/dri/meson.build index 86f66a6..310d839 100644 --- a/src/gallium/targets/dri/meson.build +++ b/src/gallium/targets/dri/meson.build @@ -77,6 +77,7 @@ foreach d : [[with_gallium_kmsro, [ 'ingenic-drm_dri.so', 'kirin_dri.so', 'mali-dp_dri.so', + 'loongson-drm_dri.so', 'mcde_dri.so', 'mediatek_dri.so', 'meson_dri.so', diff --git a/src/gallium/targets/dri/target.c b/src/gallium/targets/dri/target.c index 30c9ee9..aa9f768 100644 --- a/src/gallium/targets/dri/target.c +++ b/src/gallium/targets/dri/target.c @@ -106,6 +106,7 @@ DEFINE_LOADER_DRM_ENTRYPOINT(imx_dcss) DEFINE_LOADER_DRM_ENTRYPOINT(ingenic_drm) DEFINE_LOADER_DRM_ENTRYPOINT(kirin) DEFINE_LOADER_DRM_ENTRYPOINT(mali_dp) +DEFINE_LOADER_DRM_ENTRYPOINT(loongson_drm) DEFINE_LOADER_DRM_ENTRYPOINT(mcde) DEFINE_LOADER_DRM_ENTRYPOINT(mediatek) DEFINE_LOADER_DRM_ENTRYPOINT(meson) diff --git a/src/loader/pci_id_driver_map.h b/src/loader/pci_id_driver_map.h index d75b2cc..232e1b7 100644 --- a/src/loader/pci_id_driver_map.h +++ b/src/loader/pci_id_driver_map.h @@ -68,6 +68,12 @@ static const int vmwgfx_chip_ids[] = { #undef CHIPSET }; +static const int ls7a_chip_ids[] = { +#define CHIPSET(chip, name, family) chip, +#include "pci_ids/ls7a1000_pci_ids.h" +#undef CHIPSET +}; + bool is_nouveau_vieux(int fd); bool is_kernel_i915(int fd); @@ -90,6 +96,7 @@ static const struct { { 0x1002, "radeonsi", NULL, -1 }, { 0x10de, "nouveau_vieux", NULL, -1, is_nouveau_vieux }, { 0x10de, "nouveau", NULL, -1, }, + { 0x0014, "loongson-drm", ls7a_chip_ids, ARRAY_SIZE(ls7a_chip_ids) }, { 0x1af4, "virtio_gpu", virtio_gpu_chip_ids, ARRAY_SIZE(virtio_gpu_chip_ids) }, { 0x15ad, "vmwgfx", vmwgfx_chip_ids, ARRAY_SIZE(vmwgfx_chip_ids) }, }; diff --git a/src/util/u_cpu_detect.c b/src/util/u_cpu_detect.c index 955d087..43675d2 100644 --- a/src/util/u_cpu_detect.c +++ b/src/util/u_cpu_detect.c @@ -456,6 +456,30 @@ check_os_mips64_support(void) } #endif /* PIPE_ARCH_MIPS64 */ +#if defined(PIPE_ARCH_LOONGARCH64) +static void +check_os_loongarch64_support(void) +{ +#if defined(PIPE_OS_LINUX) + Elf64_auxv_t aux; + int fd; + + fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC); + if (fd >= 0) { + while (read(fd, &aux, sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) { + if (aux.a_type == AT_HWCAP) { + uint64_t hwcap = aux.a_un.a_val; + + util_cpu_caps.has_lsx = (hwcap >> 2) & 1; + util_cpu_caps.has_lasx = (hwcap >> 3) & 1; + break; + } + } + close (fd); + } +#endif /* PIPE_OS_LINUX */ +} +#endif static void get_cpu_topology(void) @@ -813,6 +837,10 @@ util_cpu_detect_once(void) check_os_mips64_support(); #endif /* PIPE_ARCH_MIPS64 */ +#if defined(PIPE_ARCH_LOONGARCH64) + check_os_loongarch64_support(); +#endif + get_cpu_topology(); if (debug_get_option_dump_cpu()) { @@ -842,6 +870,8 @@ util_cpu_detect_once(void) printf("util_cpu_caps.has_neon = %u\n", util_cpu_caps.has_neon); printf("util_cpu_caps.has_msa = %u\n", util_cpu_caps.has_msa); printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz); + printf("util_cpu_caps.has_lsx = %u\n", util_cpu_caps.has_lsx); + printf("util_cpu_caps.has_lasx = %u\n", util_cpu_caps.has_lasx); printf("util_cpu_caps.has_avx512f = %u\n", util_cpu_caps.has_avx512f); printf("util_cpu_caps.has_avx512dq = %u\n", util_cpu_caps.has_avx512dq); printf("util_cpu_caps.has_avx512ifma = %u\n", util_cpu_caps.has_avx512ifma); diff --git a/src/util/u_cpu_detect.h b/src/util/u_cpu_detect.h index 59dd230..cd4319e 100644 --- a/src/util/u_cpu_detect.h +++ b/src/util/u_cpu_detect.h @@ -103,6 +103,8 @@ struct util_cpu_caps_t { unsigned has_daz:1; unsigned has_neon:1; unsigned has_msa:1; + unsigned has_lsx:1; + unsigned has_lasx:1; unsigned has_avx512f:1; unsigned has_avx512dq:1; -- 2.41.0