From cc2a9973d5082ff996ff80bc16cc3db4982341dd Mon Sep 17 00:00:00 2001 From: tongxueting Date: Fri, 17 Jan 2025 16:08:27 +0800 Subject: [PATCH 1/9] Add limited support for LoongArch This add basic support for LoongArch. Signed-off-by: Jinyang He # Rebase #92 --- CMakeLists.txt | 16 +- configure.py | 13 + include/cpuinfo-mock.h | 3 + include/cpuinfo.h | 154 +++++++ src/api.c | 10 +- src/cpuinfo/internal-api.h | 3 +- src/init.c | 6 + src/linux/processors.c | 4 +- src/loongarch/api.h | 67 +++ src/loongarch/cache.c | 135 ++++++ src/loongarch/cpucfg.h | 61 +++ src/loongarch/linux/api.h | 118 +++++ src/loongarch/linux/chipset.c | 202 +++++++++ src/loongarch/linux/clusters.c | 48 ++ src/loongarch/linux/cpucfg.c | 264 +++++++++++ src/loongarch/linux/cpuinfo.c | 566 ++++++++++++++++++++++++ src/loongarch/linux/hwcap.c | 45 ++ src/loongarch/linux/init.c | 602 ++++++++++++++++++++++++++ src/loongarch/linux/loongarch64-isa.c | 50 +++ src/loongarch/uarch.c | 28 ++ tools/cpu-info.c | 4 + tools/isa-info.c | 22 + 22 files changed, 2412 insertions(+), 9 deletions(-) create mode 100644 src/loongarch/api.h create mode 100644 src/loongarch/cache.c create mode 100644 src/loongarch/cpucfg.h create mode 100644 src/loongarch/linux/api.h create mode 100644 src/loongarch/linux/chipset.c create mode 100644 src/loongarch/linux/clusters.c create mode 100644 src/loongarch/linux/cpucfg.c create mode 100644 src/loongarch/linux/cpuinfo.c create mode 100644 src/loongarch/linux/hwcap.c create mode 100644 src/loongarch/linux/init.c create mode 100644 src/loongarch/linux/loongarch64-isa.c create mode 100644 src/loongarch/uarch.c diff --git a/CMakeLists.txt b/CMakeLists.txt index bd9f77f8..00df8907 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,7 +95,7 @@ IF(NOT CMAKE_SYSTEM_PROCESSOR) "cpuinfo will compile, but cpuinfo_initialize() will always fail.") SET(CPUINFO_SUPPORTED_PLATFORM FALSE) ENDIF() -ELSEIF(NOT CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64.*|ARM64.*|riscv(32|64))$") +ELSEIF(NOT CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64.*|ARM64.*|riscv(32|64)|loongarch64)$") MESSAGE(WARNING "Target processor architecture \"${CPUINFO_TARGET_PROCESSOR}\" is not supported in cpuinfo. " "cpuinfo will compile, but cpuinfo_initialize() will always fail.") @@ -224,6 +224,20 @@ IF(CPUINFO_SUPPORTED_PLATFORM) src/riscv/linux/riscv-hw.c src/riscv/linux/riscv-isa.c) ENDIF() + ELSEIF(CPUINFO_TARGET_PROCESSOR MATCHES "^(loongarch64)$") + LIST(APPEND CPUINFO_SRCS + src/loongarch/uarch.c + src/loongarch/cache.c) + IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") + LIST(APPEND CPUINFO_SRCS + src/loongarch/linux/init.c + src/loongarch/linux/cpuinfo.c + src/loongarch/linux/clusters.c + src/loongarch/linux/chipset.c + src/loongarch/linux/cpucfg.c + src/loongarch/linux/hwcap.c + src/loongarch/linux/loongarch64-isa.c) + ENDIF() ENDIF() IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") diff --git a/configure.py b/configure.py index 00bba24b..67762d62 100755 --- a/configure.py +++ b/configure.py @@ -63,6 +63,19 @@ def main(args): "riscv/linux/riscv-isa.c", ] + if build.target.is_loongarch64: + sources += ["loongarch/uarch.c", "loongarch/cache.c"] + if build.target.is_linux: + sources += [ + "loongarch/linux/init.c", + "loongarch/linux/cpuinfo.c", + "loongarch/linux/clusters.c", + "loongarch/linux/cpucfg.c", + "loongarch/linux/chipset.c", + "loongarch/linux/hwcap.c", + "loongarch/linux/loongarch64-isa.c", + ] + if build.target.is_macos: sources += ["mach/topology.c"] if build.target.is_linux or build.target.is_android: diff --git a/include/cpuinfo-mock.h b/include/cpuinfo-mock.h index 7bb6d1ee..cfba1055 100644 --- a/include/cpuinfo-mock.h +++ b/include/cpuinfo-mock.h @@ -62,6 +62,9 @@ void CPUINFO_ABI cpuinfo_set_hwcap(uint32_t hwcap); #if CPUINFO_ARCH_ARM void CPUINFO_ABI cpuinfo_set_hwcap2(uint64_t hwcap2); #endif +#if CPUINFO_ARCH_LOONGARCH64 +void CPUINFO_ABI cpuinfo_set_hwcap(uint32_t hwcap); +#endif #endif #if defined(__ANDROID__) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index 6eb4b8c3..61c5c1fe 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -54,6 +54,10 @@ #endif #endif +#if defined(__loongarch64) +#define CPUINFO_ARCH_LOONGARCH64 1 +#endif + /* Define other architecture-specific macros as 0 */ #ifndef CPUINFO_ARCH_X86 @@ -96,6 +100,10 @@ #define CPUINFO_ARCH_RISCV64 0 #endif +#ifndef CPUINFO_ARCH_LOONGARCH64 +#define CPUINFO_ARCH_LOONGARCH64 0 +#endif + #if CPUINFO_ARCH_X86 && defined(_MSC_VER) #define CPUINFO_ABI __cdecl #elif CPUINFO_ARCH_X86 && defined(__GNUC__) @@ -304,6 +312,10 @@ enum cpuinfo_vendor { * in 1997. */ cpuinfo_vendor_dec = 57, + /** + * Loongson. Vendor of LOONGARCH processor microarchitecture. + */ + cpuinfo_vendor_loongson = 58, }; /** @@ -601,6 +613,9 @@ enum cpuinfo_uarch { /** HiSilicon TaiShan v110 (Huawei Kunpeng 920 series processors). */ cpuinfo_uarch_taishan_v110 = 0x00C00100, + + /** Loongson LA4 64 (Loongarch3 series processors). */ + cpuinfo_uarch_LA464 = 0x00D00100, }; struct cpuinfo_processor { @@ -635,6 +650,10 @@ struct cpuinfo_processor { #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 /** APIC ID (unique x86-specific ID of the logical processor) */ uint32_t apic_id; +#endif +#if CPUINFO_ARCH_LOONGARCH64 + /** CPUCFG ID (unique loongarch-specific ID of the logical processor) */ + uint32_t cpucfg_id; #endif struct { /** Level 1 instruction cache */ @@ -671,6 +690,9 @@ struct cpuinfo_core { #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 /** Value of Main ID Register (MIDR) for this core */ uint32_t midr; +#elif CPUINFO_ARCH_LOONGARCH64 + /** Value of CPUCFG for this core */ + uint32_t cpucfg; #endif /** Clock rate (non-Turbo) of the core, in Hz */ uint64_t frequency; @@ -699,6 +721,9 @@ struct cpuinfo_cluster { #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 /** Value of Main ID Register (MIDR) of the cores in the cluster */ uint32_t midr; +#elif CPUINFO_ARCH_LOONGARCH64 + /** Value of CPUCFG for this cores in the cluster */ + uint32_t cpucfg; #endif /** Clock rate (non-Turbo) of the cores in the cluster, in Hz */ uint64_t frequency; @@ -732,6 +757,9 @@ struct cpuinfo_uarch_info { #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 /** Value of Main ID Register (MIDR) for the microarchitecture */ uint32_t midr; +#elif CPUINFO_ARCH_LOONGARCH64 + /** Value of CPUCFG for the microarchitecture */ + uint32_t cpucfg; #endif /** Number of logical processors with the microarchitecture */ uint32_t processor_count; @@ -2227,6 +2255,132 @@ static inline bool cpuinfo_has_riscv_v(void) { #endif } +#if CPUINFO_ARCH_LOONGARCH64 +/* This structure is not a part of stable API. Use cpuinfo_has_loongarch_* functions instead. */ +struct cpuinfo_loongarch_isa { + bool cpucfg; + bool lam; + bool ual; + bool fpu; + bool lsx; + bool lasx; + + bool crc32; + bool complex; + bool crypto; + bool lvz; + bool lbt_x86; + bool lbt_arm; + bool lbt_mips; +}; + +extern struct cpuinfo_loongarch_isa cpuinfo_isa; +#endif + +static inline bool cpuinfo_has_loongarch_cpucfg(void) { +#if CPUINFO_ARCH_LOONGARCH64 + return cpuinfo_isa.cpucfg; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_loongarch_lam(void) { +#if CPUINFO_ARCH_LOONGARCH64 + return cpuinfo_isa.lam; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_loongarch_ual(void) { +#if CPUINFO_ARCH_LOONGARCH64 + return cpuinfo_isa.ual; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_loongarch_fpu(void) { +#if CPUINFO_ARCH_LOONGARCH64 + return cpuinfo_isa.fpu; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_loongarch_lsx(void) { +#if CPUINFO_ARCH_LOONGARCH64 + return cpuinfo_isa.lsx; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_loongarch_lasx(void) { +#if CPUINFO_ARCH_LOONGARCH64 + return cpuinfo_isa.lasx; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_loongarch_crc32(void) { +#if CPUINFO_ARCH_LOONGARCH64 + return cpuinfo_isa.crc32; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_loongarch_complex(void) { +#if CPUINFO_ARCH_LOONGARCH64 + return cpuinfo_isa.complex; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_loongarch_crypto(void) { +#if CPUINFO_ARCH_LOONGARCH64 + return cpuinfo_isa.crypto; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_loongarch_lvz(void) { +#if CPUINFO_ARCH_LOONGARCH64 + return cpuinfo_isa.lvz; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_loongarch_lbt_x86(void) { +#if CPUINFO_ARCH_LOONGARCH64 + return cpuinfo_isa.lbt_x86; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_loongarch_lbt_arm(void) { +#if CPUINFO_ARCH_LOONGARCH64 + return cpuinfo_isa.lbt_arm; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_loongarch_lbt_mips(void) { +#if CPUINFO_ARCH_LOONGARCH64 + return cpuinfo_isa.lbt_mips; +#else + return false; +#endif +} + const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_processors(void); const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_cores(void); const struct cpuinfo_cluster* CPUINFO_ABI cpuinfo_get_clusters(void); diff --git a/src/api.c b/src/api.c index b8c999f3..ec60d9a1 100644 --- a/src/api.c +++ b/src/api.c @@ -30,7 +30,7 @@ uint32_t cpuinfo_packages_count = 0; uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max] = {0}; uint32_t cpuinfo_max_cache_size = 0; -#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 || CPUINFO_ARCH_LOONGARCH64 struct cpuinfo_uarch_info* cpuinfo_uarchs = NULL; uint32_t cpuinfo_uarchs_count = 0; #else @@ -41,7 +41,7 @@ struct cpuinfo_uarch_info cpuinfo_global_uarch = {cpuinfo_uarch_unknown}; uint32_t cpuinfo_linux_cpu_max = 0; const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map = NULL; const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map = NULL; -#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 || CPUINFO_ARCH_LOONGARCH64 const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map = NULL; #endif #endif @@ -78,7 +78,7 @@ const struct cpuinfo_uarch_info* cpuinfo_get_uarchs() { if (!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs"); } -#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 || CPUINFO_ARCH_LOONGARCH64 return cpuinfo_uarchs; #else return &cpuinfo_global_uarch; @@ -129,7 +129,7 @@ const struct cpuinfo_uarch_info* cpuinfo_get_uarch(uint32_t index) { if (!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarch"); } -#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 || CPUINFO_ARCH_LOONGARCH64 if CPUINFO_UNLIKELY (index >= cpuinfo_uarchs_count) { return NULL; } @@ -174,7 +174,7 @@ uint32_t cpuinfo_get_uarchs_count(void) { if (!cpuinfo_is_initialized) { cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs_count"); } -#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 || CPUINFO_ARCH_LOONGARCH64 return cpuinfo_uarchs_count; #else return 1; diff --git a/src/cpuinfo/internal-api.h b/src/cpuinfo/internal-api.h index d84b26a8..3273cd8b 100644 --- a/src/cpuinfo/internal-api.h +++ b/src/cpuinfo/internal-api.h @@ -34,7 +34,7 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_packages_count; extern CPUINFO_INTERNAL uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max]; extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size; -#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 || CPUINFO_ARCH_LOONGARCH64 extern CPUINFO_INTERNAL struct cpuinfo_uarch_info* cpuinfo_uarchs; extern CPUINFO_INTERNAL uint32_t cpuinfo_uarchs_count; #else @@ -61,6 +61,7 @@ CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void); CPUINFO_PRIVATE void cpuinfo_arm_linux_init(void); CPUINFO_PRIVATE void cpuinfo_riscv_linux_init(void); CPUINFO_PRIVATE void cpuinfo_emscripten_init(void); +CPUINFO_PRIVATE void cpuinfo_loongarch_linux_init(void); CPUINFO_PRIVATE uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor); diff --git a/src/init.c b/src/init.c index 81d5721c..c9e068ba 100644 --- a/src/init.c +++ b/src/init.c @@ -58,6 +58,12 @@ bool CPUINFO_ABI cpuinfo_initialize(void) { } init_guard = true; #endif +#elif CPUINFO_ARCH_LOONGARCH64 +#if defined(__linux__) + pthread_once(&init_guard, &cpuinfo_loongarch_linux_init); +#else + cpuinfo_log_error("loongarch operating system is not supported in cpuinfo"); +#endif #else cpuinfo_log_error("processor architecture is not supported in cpuinfo"); #endif diff --git a/src/linux/processors.c b/src/linux/processors.c index b68cd1cc..f33fff97 100644 --- a/src/linux/processors.c +++ b/src/linux/processors.c @@ -293,7 +293,7 @@ uint32_t cpuinfo_linux_get_max_possible_processor(uint32_t max_processors_count) uint32_t max_possible_processor = 0; if (!cpuinfo_linux_parse_cpulist( POSSIBLE_CPULIST_FILENAME, max_processor_number_parser, &max_possible_processor)) { -#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_LOONGARCH64 cpuinfo_log_error("failed to parse the list of possible processors in %s", POSSIBLE_CPULIST_FILENAME); #else cpuinfo_log_warning("failed to parse the list of possible processors in %s", POSSIBLE_CPULIST_FILENAME); @@ -315,7 +315,7 @@ uint32_t cpuinfo_linux_get_max_present_processor(uint32_t max_processors_count) uint32_t max_present_processor = 0; if (!cpuinfo_linux_parse_cpulist( PRESENT_CPULIST_FILENAME, max_processor_number_parser, &max_present_processor)) { -#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_LOONGARCH64 cpuinfo_log_error("failed to parse the list of present processors in %s", PRESENT_CPULIST_FILENAME); #else cpuinfo_log_warning("failed to parse the list of present processors in %s", PRESENT_CPULIST_FILENAME); diff --git a/src/loongarch/api.h b/src/loongarch/api.h new file mode 100644 index 00000000..896a2686 --- /dev/null +++ b/src/loongarch/api.h @@ -0,0 +1,67 @@ +#pragma once + +#include +#include + +#include +#include + + +enum cpuinfo_loongarch_chipset_vendor { + cpuinfo_loongarch_chipset_vendor_unknown = 0, + cpuinfo_loongarch_chipset_vendor_Loongson, + cpuinfo_loongarch_chipset_vendor_max, +}; + + +enum cpuinfo_loongarch_chipset_series { + cpuinfo_loongarch_chipset_series_unknown = 0, + cpuinfo_loongarch_chipset_series_3, + cpuinfo_loongarch_chipset_series_max, +}; + + +#define CPUINFO_LOONGARCH_CHIPSET_SUFFIX_MAX 8 + +struct cpuinfo_loongarch_chipset { + enum cpuinfo_loongarch_chipset_vendor vendor; + enum cpuinfo_loongarch_chipset_series series; +}; + + +#define CPUINFO_LOONGARCH_CHIPSET_NAME_MAX CPUINFO_PACKAGE_NAME_MAX + +#ifndef __cplusplus + CPUINFO_INTERNAL void cpuinfo_loongarch_chipset_to_string( + const struct cpuinfo_loongarch_chipset chipset[restrict static 1], + char name[restrict static CPUINFO_LOONGARCH_CHIPSET_NAME_MAX]); + + CPUINFO_INTERNAL void cpuinfo_loongarch_fixup_chipset( + struct cpuinfo_loongarch_chipset chipset[restrict static 1], uint32_t cores); + + CPUINFO_INTERNAL void cpuinfo_loongarch_decode_vendor_uarch( + uint32_t cpucfg, + enum cpuinfo_vendor vendor[restrict static 1], + enum cpuinfo_uarch uarch[restrict static 1]); + + CPUINFO_INTERNAL void cpuinfo_loongarch_decode_cache( + enum cpuinfo_uarch uarch, + uint32_t cluster_cores, + uint32_t arch_version, + struct cpuinfo_cache l1i[restrict static 1], + struct cpuinfo_cache l1d[restrict static 1], + struct cpuinfo_cache l2[restrict static 1], + struct cpuinfo_cache l3[restrict static 1]); + + CPUINFO_INTERNAL uint32_t cpuinfo_loongarch_compute_max_cache_size( + const struct cpuinfo_processor processor[restrict static 1]); +#else /* defined(__cplusplus) */ + CPUINFO_INTERNAL void cpuinfo_loongarch_decode_cache( + enum cpuinfo_uarch uarch, + uint32_t cluster_cores, + uint32_t arch_version, + struct cpuinfo_cache l1i[1], + struct cpuinfo_cache l1d[1], + struct cpuinfo_cache l2[1], + struct cpuinfo_cache l3[1]); +#endif diff --git a/src/loongarch/cache.c b/src/loongarch/cache.c new file mode 100644 index 00000000..7f4117e7 --- /dev/null +++ b/src/loongarch/cache.c @@ -0,0 +1,135 @@ +#include + +#include +#include +#include +#include +#include + + +void cpuinfo_loongarch_decode_cache( + enum cpuinfo_uarch uarch, + uint32_t cluster_cores, + uint32_t arch_version, + struct cpuinfo_cache l1i[restrict static 1], + struct cpuinfo_cache l1d[restrict static 1], + struct cpuinfo_cache l2[restrict static 1], + struct cpuinfo_cache l3[restrict static 1]) +{ + switch (uarch) { + /* + * Loongarch 3A5000 Core Technical Reference Manual + * Loongarch 3A5000. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB. + * + * Loongarch 3A5000 L1 instruction-side memory system + * The L1 instruction memory system has the following key features: + * - Virtually Indexed, Physically Tagged (VIPT), four-way set-associative instruction cache. + * - Fixed cache line length of 64 bytes. + * + * Loongarch 3A5000 L1 data-side memory system + * The L1 data memory system has the following features: + * - Physically Indexed, Physically Tagged (PIPT), 4-way set-associative L1 data cache. + * - Fixed cache line length of 64 bytes. + * - Pseudo-random cache replacement policy. + * + * Loongarch 3A5000 About the L2 memory system + * The L2 memory subsystem consist of: + * - An 16-way set associative L2 cache with a configurable size of 256KB. + * Cache lines have a fixed length of 64 bytes. + * + * +--------------------+-------+-----------+-----------+-----------+----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | + * +--------------------+-------+-----------+-----------+-----------+----------+ + * | Loongarch 3A5000 | 4(+4) | 64K | 64K | 256K | 16 | + * +--------------------+-------+-----------+-----------+-----------+----------+ + * + */ + case cpuinfo_uarch_LA464: + *l1i = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = 256 * 1024, + .associativity = 16, + .line_size = 64 + }; + *l3 = (struct cpuinfo_cache) { + .size = 16 * 1024 * 1024, + .associativity = 16, + .line_size = 64 + }; + default: + cpuinfo_log_warning("loongarch uarch not recognized; using generic cache parameters"); + /* Follow OpenBLAS */ + if (arch_version >= 8) { + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = cluster_cores * 256 * 1024, + .associativity = 8, + .line_size = 64 + }; + } else { + *l1i = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 32 + }; + if (arch_version >= 7) { + *l2 = (struct cpuinfo_cache) { + .size = cluster_cores * 128 * 1024, + .associativity = 8, + .line_size = 32 + }; + } + } + break; + } + l1i->sets = l1i->size / (l1i->associativity * l1i->line_size); + l1i->partitions = 1; + l1d->sets = l1d->size / (l1d->associativity * l1d->line_size); + l1d->partitions = 1; + if (l2->size != 0) { + l2->sets = l2->size / (l2->associativity * l2->line_size); + l2->partitions = 1; + if (l3->size != 0) { + l3->sets = l3->size / (l3->associativity * l3->line_size); + l3->partitions = 1; + } + } +} + +uint32_t cpuinfo_loongarch_compute_max_cache_size(const struct cpuinfo_processor* processor) { + /* + * There is no precise way to detect cache size on LOONGARCH64, and cache size reported by cpuinfo + * may underestimate the actual cache size. Thus, we use microarchitecture-specific maximum. + */ + switch (processor->core->uarch) { + + case cpuinfo_uarch_LA464: + return 16 * 1024 * 1024; + default: + return 4 * 1024 * 1024; + } +} diff --git a/src/loongarch/cpucfg.h b/src/loongarch/cpucfg.h new file mode 100644 index 00000000..afc8ee2a --- /dev/null +++ b/src/loongarch/cpucfg.h @@ -0,0 +1,61 @@ +#pragma once +#include + + + +#define CPUINFO_LOONGARCH_CPUCFG_COMPANYID_MASK UINT32_C(0x00FF0000) +#define CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_MASK UINT32_C(0x0000FF00) +#define CPUINFO_LOONGARCH_CPUCFG_REVISION_MASK UINT32_C(0x000000FF) + + +#define CPUINFO_LOONGARCH_CPUCFG_COMPANYID_OFFSET 16 +#define CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_OFFSET 8 +#define CPUINFO_LOONGARCH_CPUCFG_REVISION_OFFSET 0 + + + +inline static uint32_t cpucfg_set_companyID(uint32_t cpucfg, uint32_t companyID) { + return (cpucfg & ~CPUINFO_LOONGARCH_CPUCFG_COMPANYID_MASK) | + ((companyID << CPUINFO_LOONGARCH_CPUCFG_COMPANYID_OFFSET) & CPUINFO_LOONGARCH_CPUCFG_COMPANYID_MASK); +} + +inline static uint32_t cpucfg_set_processorID(uint32_t cpucfg, uint32_t processorID) { + return (cpucfg & ~CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_MASK) | + ((processorID << CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_OFFSET) & CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_MASK); +} + +inline static uint32_t cpucfg_set_revision(uint32_t cpucfg, uint32_t revision) { + return (cpucfg & ~CPUINFO_LOONGARCH_CPUCFG_REVISION_MASK) | + ((revision << CPUINFO_LOONGARCH_CPUCFG_REVISION_OFFSET) & CPUINFO_LOONGARCH_CPUCFG_REVISION_MASK); +} + + + + +inline static uint32_t cpucfg_get_companyID(uint32_t cpucfg) { + return (cpucfg & CPUINFO_LOONGARCH_CPUCFG_COMPANYID_MASK) >> CPUINFO_LOONGARCH_CPUCFG_COMPANYID_OFFSET; +} + +inline static uint32_t cpucfg_get_processorID(uint32_t cpucfg) { + return (cpucfg & CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_MASK) >> CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_OFFSET; +} + +inline static uint32_t cpucfg_get_revision(uint32_t cpucfg) { + return (cpucfg & CPUINFO_LOONGARCH_CPUCFG_REVISION_MASK) >> CPUINFO_LOONGARCH_CPUCFG_REVISION_OFFSET; +} + + + +inline static uint32_t cpucfg_copy_companyID(uint32_t cpucfg, uint32_t other_cpucfg) { + return (cpucfg & ~CPUINFO_LOONGARCH_CPUCFG_COMPANYID_MASK) | (other_cpucfg & CPUINFO_LOONGARCH_CPUCFG_COMPANYID_MASK); +} + +inline static uint32_t cpucfg_copy_processorID(uint32_t cpucfg, uint32_t other_cpucfg) { + return (cpucfg & ~CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_MASK) | (other_cpucfg & CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_MASK); +} + + +inline static uint32_t cpucfg_copy_revision(uint32_t cpucfg, uint32_t other_cpucfg) { + return (cpucfg & ~CPUINFO_LOONGARCH_CPUCFG_REVISION_MASK) | (other_cpucfg & CPUINFO_LOONGARCH_CPUCFG_REVISION_MASK); +} + diff --git a/src/loongarch/linux/api.h b/src/loongarch/linux/api.h new file mode 100644 index 00000000..387a2c57 --- /dev/null +++ b/src/loongarch/linux/api.h @@ -0,0 +1,118 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +/* No hard limit in the kernel, maximum length observed on non-rogue kernels is 64 */ +#define CPUINFO_HARDWARE_VALUE_MAX 64 +/* No hard limit in the kernel, maximum length on Raspberry Pi is 8. Add 1 symbol to detect overly large revision strings */ +#define CPUINFO_REVISION_VALUE_MAX 9 + + +#if CPUINFO_ARCH_LOONGARCH64 + /* /usr/include/loongarch64-linux-gnu/asm/hwcap.h */ + #define CPUINFO_LOONGARCH_LINUX_FEATURE_CPUCFG UINT32_C(0x00000001) + #define CPUINFO_LOONGARCH_LINUX_FEATURE_LAM UINT32_C(0x00000002) + #define CPUINFO_LOONGARCH_LINUX_FEATURE_UAL UINT32_C(0x00000004) + #define CPUINFO_LOONGARCH_LINUX_FEATURE_FPU UINT32_C(0x00000008) + #define CPUINFO_LOONGARCH_LINUX_FEATURE_LSX UINT32_C(0x00000010) + #define CPUINFO_LOONGARCH_LINUX_FEATURE_LASX UINT32_C(0x00000020) + #define CPUINFO_LOONGARCH_LINUX_FEATURE_CRC32 UINT32_C(0x00000040) + #define CPUINFO_LOONGARCH_LINUX_FEATURE_COMPLEX UINT32_C(0x00000080) + #define CPUINFO_LOONGARCH_LINUX_FEATURE_CRYPTO UINT32_C(0x00000100) + #define CPUINFO_LOONGARCH_LINUX_FEATURE_LVZ UINT32_C(0x00000200) + #define CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_X86 UINT32_C(0x00000400) + #define CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_ARM UINT32_C(0x00000800) + #define CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_MIPS UINT32_C(0x00001000) +#endif + + +#define CPUINFO_LOONGARCH_LINUX_VALID_COMPANYID UINT32_C(0x00010000) +#define CPUINFO_LOONGARCH_LINUX_VALID_PROCESSORID UINT32_C(0x00020000) +#define CPUINFO_LOONGARCH_LINUX_VALID_REVISION UINT32_C(0x00040000) +#define CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR UINT32_C(0x00200000) +#define CPUINFO_LOONGARCH_LINUX_VALID_FEATURES UINT32_C(0x00400000) +#define CPUINFO_LOONGARCH_LINUX_VALID_INFO UINT32_C(0x007F0000) +#define CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG UINT32_C(0x003F0000) + +struct cpuinfo_loongarch_linux_processor { + uint32_t architecture_version; + uint32_t features; + uint32_t cpucfg_id; + enum cpuinfo_vendor vendor; + enum cpuinfo_uarch uarch; + uint32_t uarch_index; + /** + * ID of the physical package which includes this logical processor. + * The value is parsed from /sys/devices/system/cpu/cpu/topology/physical_package_id + */ + uint32_t package_id; + /** + * Minimum processor ID on the package which includes this logical processor. + * This value can serve as an ID for the cluster of logical processors: it is the + * same for all logical processors on the same package. + */ + uint32_t package_leader_id; + /** + * Number of logical processors in the package. + */ + uint32_t package_processor_count; + + /** Linux processor ID */ + uint32_t system_processor_id; + uint32_t flags; +}; + + +CPUINFO_INTERNAL bool cpuinfo_loongarch_linux_parse_proc_cpuinfo( + char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], + uint32_t max_processors_count, + struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors_count]); + +#if CPUINFO_ARCH_LOONGARCH64 + CPUINFO_INTERNAL void cpuinfo_loongarch_linux_hwcap_from_getauxval( + uint32_t hwcap[restrict static 1]); + + CPUINFO_INTERNAL void cpuinfo_loongarch64_linux_decode_isa_from_proc_cpuinfo( + uint32_t features, + struct cpuinfo_loongarch_isa isa[restrict static 1]); +#endif + +CPUINFO_INTERNAL struct cpuinfo_loongarch_chipset + cpuinfo_loongarch_linux_decode_chipset( + const char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX]); + + +CPUINFO_INTERNAL struct cpuinfo_loongarch_chipset + cpuinfo_loongarch_linux_decode_chipset_from_proc_cpuinfo_hardware( + const char proc_cpuinfo_hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], + bool is_loongson); + + +CPUINFO_INTERNAL bool cpuinfo_loongarch_linux_detect_core_clusters_by_heuristic( + uint32_t usable_processors, + uint32_t max_processors, + struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]); + +CPUINFO_INTERNAL void cpuinfo_loongarch_linux_detect_core_clusters_by_sequential_scan( + uint32_t max_processors, + struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]); + +CPUINFO_INTERNAL void cpuinfo_loongarch_linux_count_cluster_processors( + uint32_t max_processors, + struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]); + +CPUINFO_INTERNAL uint32_t cpuinfo_loongarch_linux_detect_cluster_cpucfg( + const struct cpuinfo_loongarch_chipset chipset[restrict static 1], + uint32_t max_processors, + uint32_t usable_processors, + struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]); + +extern CPUINFO_INTERNAL const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map; +extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_to_uarch_index_map_entries; diff --git a/src/loongarch/linux/chipset.c b/src/loongarch/linux/chipset.c new file mode 100644 index 00000000..3a66066a --- /dev/null +++ b/src/loongarch/linux/chipset.c @@ -0,0 +1,202 @@ +#include +#include +#include +#include + +#include +#include +#include + + +static inline bool is_ascii_whitespace(char c) { + switch (c) { + case ' ': + case '\t': + case '\r': + case '\n': + return true; + default: + return false; + } +} + +static inline bool is_ascii_alphabetic(char c) { + const char lower_c = c | '\x20'; + return (uint8_t) (lower_c - 'a') <= (uint8_t) ('z' - 'a'); +} + +static inline bool is_ascii_alphabetic_uppercase(char c) { + return (uint8_t) (c - 'A') <= (uint8_t) ('Z' - 'A'); +} + +static inline bool is_ascii_numeric(char c) { + return (uint8_t) (c - '0') < 10; +} + +static inline uint16_t load_u16le(const void* ptr) { +#if defined(__loongarch64) + return *((const uint16_t*) ptr); +#else + const uint8_t* byte_ptr = (const uint8_t*) ptr; + return ((uint16_t) byte_ptr[1] << 8) | (uint16_t) byte_ptr[0]; +#endif +} + +static inline uint32_t load_u24le(const void* ptr) { +#if defined(__loongarch64) + return ((uint32_t) ((const uint8_t*) ptr)[2] << 16) | ((uint32_t) *((const uint16_t*) ptr)); +#else + const uint8_t* byte_ptr = (const uint8_t*) ptr; + return ((uint32_t) byte_ptr[2] << 16) | ((uint32_t) byte_ptr[1] << 8) | (uint32_t) byte_ptr[0]; +#endif +} + +static inline uint32_t load_u32le(const void* ptr) { +#if defined(__loongarch64) + return *((const uint32_t*) ptr); +#else + return ((uint32_t) ((const uint8_t*) ptr)[3] << 24) | load_u24le(ptr); +#endif +} + +/* + * Map from Loongarch chipset series ID to Loongarch chipset vendor ID. + * This map is used to avoid storing vendor IDs in tables. + */ + + +static enum cpuinfo_loongarch_chipset_vendor chipset_series_vendor[cpuinfo_loongarch_chipset_series_max] = { + [cpuinfo_loongarch_chipset_series_unknown] = cpuinfo_loongarch_chipset_vendor_unknown, + [cpuinfo_loongarch_chipset_series_3] = cpuinfo_loongarch_chipset_vendor_Loongson, +}; + + + +struct loongson_map_entry { + const char* platform; + uint8_t series; +}; + + +int strcicmp(char const *a, char const *b) +{ + for (;; a++, b++) { + int d = ((int)(a-b)); + if (d != 0 || !*a) + return d; + } +} + + +static const struct loongson_map_entry loongson_hardware_map_entries[] = { + { + /* "3A5000" -> Loongson 3a5000 */ + .platform = "3A5000", + .series = cpuinfo_loongarch_chipset_series_3, + }, +}; + + + +/* + * Decodes chipset name from /proc/cpuinfo Hardware string. + * For some chipsets, the function relies frequency and on number of cores for chipset detection. + * + * @param[in] platform - /proc/cpuinfo Hardware string. + //* @param cores - number of cores in the chipset. + * + * @returns Decoded chipset name. If chipset could not be decoded, the resulting structure would use `unknown` vendor + * and series identifiers. + */ +struct cpuinfo_loongarch_chipset cpuinfo_loongarch_linux_decode_chipset_from_proc_cpuinfo_hardware( + const char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], + bool is_loongson) +{ + struct cpuinfo_loongarch_chipset chipset; + const size_t hardware_length = strnlen(hardware, CPUINFO_HARDWARE_VALUE_MAX); + const char* hardware_end = hardware + hardware_length; + + if (is_loongson) { + /* Compare to tabulated Hardware values for popular chipsets/devices which can't be otherwise detected */ + for (size_t i = 0; i < CPUINFO_COUNT_OF(loongson_hardware_map_entries); i++) { + + if (strncmp(loongson_hardware_map_entries[i].platform, hardware, hardware_length) == 0 && + loongson_hardware_map_entries[i].platform[hardware_length] == 0) + { + cpuinfo_log_debug( + "found /proc/cpuinfo Hardware string \"%.*s\" in special chipset table", + (int) hardware_length, hardware); + /* Create chipset name from entry */ + return (struct cpuinfo_loongarch_chipset) { + .vendor = chipset_series_vendor[loongson_hardware_map_entries[i].series], + .series = (enum cpuinfo_loongarch_chipset_series) loongson_hardware_map_entries[i].series, + }; + } + } + } + + return (struct cpuinfo_loongarch_chipset) { + .vendor = cpuinfo_loongarch_chipset_vendor_unknown, + .series = cpuinfo_loongarch_chipset_series_unknown, + }; +} + + +/* Map from Loongarch chipset vendor ID to its string representation */ +static const char* chipset_vendor_string[cpuinfo_loongarch_chipset_vendor_max] = { + [cpuinfo_loongarch_chipset_vendor_unknown] = "Unknown", + [cpuinfo_loongarch_chipset_vendor_Loongson] = "Loongson", +}; + +/* Map from Loongarch chipset series ID to its string representation */ +static const char* chipset_series_string[cpuinfo_loongarch_chipset_series_max] = { + [cpuinfo_loongarch_chipset_series_unknown] = NULL, + [cpuinfo_loongarch_chipset_series_3] = "3", +}; + +/* Convert chipset name represented by cpuinfo_loongarch_chipset structure to a string representation */ +void cpuinfo_loongarch_chipset_to_string( + const struct cpuinfo_loongarch_chipset chipset[restrict static 1], + char name[restrict static CPUINFO_LOONGARCH_CHIPSET_NAME_MAX]) +{ + enum cpuinfo_loongarch_chipset_vendor vendor = chipset->vendor; + + if (vendor >= cpuinfo_loongarch_chipset_vendor_max) { + vendor = cpuinfo_loongarch_chipset_vendor_unknown; + } + enum cpuinfo_loongarch_chipset_series series = chipset->series; + if (series >= cpuinfo_loongarch_chipset_series_max) { + series = cpuinfo_loongarch_chipset_series_unknown; + } + + const char* vendor_string = chipset_vendor_string[vendor]; + const char* series_string = chipset_series_string[series]; + + if (series == cpuinfo_loongarch_chipset_series_unknown) { + strncpy(name, vendor_string, CPUINFO_LOONGARCH_CHIPSET_NAME_MAX); + } else { + snprintf(name, CPUINFO_LOONGARCH_CHIPSET_NAME_MAX, + "%s %s", vendor_string, series_string); + } +} + + +/* + * Decodes chipset name from /proc/cpuinfo Hardware string. + * For some chipsets, the function relies frequency and on number of cores for chipset detection. + * + * @param[in] hardware - /proc/cpuinfo Hardware string. + * + * @returns Decoded chipset name. If chipset could not be decoded, the resulting structure would use `unknown` vendor + * and series identifiers. + */ +struct cpuinfo_loongarch_chipset cpuinfo_loongarch_linux_decode_chipset( + const char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX]) +{ + struct cpuinfo_loongarch_chipset chipset = + cpuinfo_loongarch_linux_decode_chipset_from_proc_cpuinfo_hardware( + hardware, true); + + return chipset; +} + diff --git a/src/loongarch/linux/clusters.c b/src/loongarch/linux/clusters.c new file mode 100644 index 00000000..8be642d4 --- /dev/null +++ b/src/loongarch/linux/clusters.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { + return (bitfield & mask) == mask; +} + + +/* + * Counts the number of logical processors in each core cluster. + * This function should be called after all processors are assigned to core clusters. + * + * @param max_processors - number of elements in the @p processors array. + * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, + * and decoded core cluster (package_leader_id) information. + * The function expects the value of processors[i].package_processor_count to be zero. + * Upon return, processors[i].package_processor_count will contain the number of logical + * processors in the respective core cluster. + */ +void cpuinfo_loongarch_linux_count_cluster_processors( + uint32_t max_processors, + struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]) +{ + /* First pass: accumulate the number of processors at the group leader's package_processor_count */ + for (uint32_t i = 0; i < max_processors; i++) { + if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + const uint32_t package_leader_id = processors[i].package_leader_id; + processors[package_leader_id].package_processor_count += 1; + } + } + /* Second pass: copy the package_processor_count from the group leader processor */ + for (uint32_t i = 0; i < max_processors; i++) { + if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + const uint32_t package_leader_id = processors[i].package_leader_id; + processors[i].package_processor_count = processors[package_leader_id].package_processor_count; + } + } +} diff --git a/src/loongarch/linux/cpucfg.c b/src/loongarch/linux/cpucfg.c new file mode 100644 index 00000000..437371d7 --- /dev/null +++ b/src/loongarch/linux/cpucfg.c @@ -0,0 +1,264 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +#define CLUSTERS_MAX 3 + +static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { + return (bitfield & mask) == mask; +} + +/* Description of core clusters configuration in a chipset (identified by series) */ +struct cluster_config { + /* Number of cores (logical processors) */ + uint8_t cores; + /* Loongarch chipset series (see cpuinfo_loongarch_chipset_series enum) */ + uint8_t series; + /* Number of heterogenous clusters in the CPU package */ + uint8_t clusters; + /* Number of cores in each cluster */ + uint8_t cluster_cores[CLUSTERS_MAX]; + /* CPUCFG of cores in each cluster */ + uint32_t cluster_cpucfg[CLUSTERS_MAX]; +}; + + +static const struct cluster_config cluster_configs[] = { + { + .cores = 4, + .series = cpuinfo_loongarch_chipset_series_3, + }, +}; + +/* + * Searches chipset name in mapping of chipset name to cores' CPUCFG values. If match is successful, initializes CPUCFG + * for all clusters' leaders with tabulated values. + * + * @param[in] chipset - chipset (SoC) name information. + * @param clusters_count - number of CPU core clusters detected in the SoC. + * @param cluster_leaders - indices of core clusters' leaders in the @p processors array. + * @param processors_count - number of usable logical processors in the system. + * @param[in,out] processors - array of logical processor descriptions with pre-parsed CPUCFG, maximum frequency, + * and decoded core cluster (package_leader_id) information. + * Upon successful return, processors[i].cpucfg for all clusters' leaders contains the + * tabulated CPUCFG values. + * @param verify_cpucfg - indicated whether the function should check that the CPUCFG values to be assigned to leaders of + * core clusters are consistent with known parts of their parsed values. + * Set if to false if the only CPUCFG value parsed from /proc/cpuinfo is for the last processor + * reported in /proc/cpuinfo and thus can't be unambiguously attributed to that processor. + * + * @retval true if the chipset was found in the mapping and core clusters' leaders initialized with CPUCFG values. + * @retval false if the chipset was not found in the mapping, or any consistency check failed. + */ +static bool cpuinfo_loongarch_linux_detect_cluster_cpucfg_by_chipset( + const struct cpuinfo_loongarch_chipset chipset[restrict static 1], + uint32_t clusters_count, + const uint32_t cluster_leaders[restrict static CLUSTERS_MAX], + uint32_t processors_count, + struct cpuinfo_loongarch_linux_processor processors[restrict static processors_count], + bool verify_cpucfg) +{ + if (clusters_count <= CLUSTERS_MAX) { + for (uint32_t c = 0; c < CPUINFO_COUNT_OF(cluster_configs); c++) { + if (cluster_configs[c].series == chipset->series) { + /* Verify that the total number of cores and clusters of cores matches expectation */ + if (cluster_configs[c].cores != processors_count || cluster_configs[c].clusters != clusters_count) { + return false; + } + + /* Verify that core cluster configuration matches expectation */ + for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { + const uint32_t cluster_leader = cluster_leaders[cluster]; + if (cluster_configs[c].cluster_cores[cluster] != processors[cluster_leader].package_processor_count) { + return false; + } + } + + if (verify_cpucfg) { + /* Verify known parts of CPUCFG */ + for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { + const uint32_t cluster_leader = cluster_leaders[cluster]; + + /* Create a mask of known cpucfg bits */ + uint32_t cpucfg_mask = 0; + + if (processors[cluster_leader].flags & CPUINFO_LOONGARCH_LINUX_VALID_REVISION) { + cpucfg_mask |= CPUINFO_LOONGARCH_CPUCFG_REVISION_MASK; + } + + /* Verify the bits under the mask */ + if ((processors[cluster_leader].cpucfg_id ^ cluster_configs[c].cluster_cpucfg[cluster]) & cpucfg_mask) { + cpuinfo_log_debug("parsed CPUCFG of cluster %08"PRIu32" does not match tabulated value %08"PRIu32, + processors[cluster_leader].cpucfg_id, cluster_configs[c].cluster_cpucfg[cluster]); + return false; + } + } + } + + /* Assign CPUCFGs according to tabulated configurations */ + for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { + const uint32_t cluster_leader = cluster_leaders[cluster]; + processors[cluster_leader].cpucfg_id = cluster_configs[c].cluster_cpucfg[cluster]; + processors[cluster_leader].flags |= CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG; + cpuinfo_log_debug("cluster %"PRIu32" CPUCFG = 0x%08"PRIx32, cluster, cluster_configs[c].cluster_cpucfg[cluster]); + } + return true; + } + } + } + return false; +} + + +/* + * Initializes CPUCFG for leaders of core clusters in a single sequential scan: + * - Clusters preceding the first reported CPUCFG value are assumed to have default CPUCFG value. + * - Clusters following any reported CPUCFG value to have that CPUCFG value. + * + * @param default_cpucfg - CPUCFG value that will be assigned to cluster leaders preceding any reported CPUCFG value. + * @param processors_count - number of logical processor descriptions in the @p processors array. + * @param[in,out] processors - array of logical processor descriptions with pre-parsed CPUCFG, maximum frequency, + * and decoded core cluster (package_leader_id) information. + * Upon successful return, processors[i].cpucfg for all core clusters' leaders contains + * the assigned CPUCFG value. + */ +static void cpuinfo_loongarch_linux_detect_cluster_cpucfg_by_sequential_scan( + uint32_t default_cpucfg, + uint32_t processors_count, + struct cpuinfo_loongarch_linux_processor processors[restrict static processors_count]) +{ + uint32_t cpucfg = default_cpucfg; + for (uint32_t i = 0; i < processors_count; i++) { + if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (processors[i].package_leader_id == i) { + if (bitmask_all(processors[i].flags,CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG)) { + cpucfg = processors[i].cpucfg_id; + } else { + cpuinfo_log_info("assume processor %"PRIu32" to have CPUCFG %08"PRIx32, i, cpucfg); + /* To be consistent, we copy the CPUCFG entirely, rather than by parts */ + processors[i].cpucfg_id = cpucfg; + processors[i].flags |=CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG; + } + } + } + } +} + +/* + * Detects CPUCFG of each CPU core clusters' leader. + * + * @param[in] chipset - chipset (SoC) name information. + * @param max_processors - number of processor descriptions in the @p processors array. + * @param usable_processors - number of processor descriptions in the @p processors array with both POSSIBLE and + * PRESENT flags. + * @param[in,out] processors - array of logical processor descriptions with pre-parsed CPUCFG, maximum frequency, + * and decoded core cluster (package_leader_id) information. + * Upon return, processors[i].cpucfg for all clusters' leaders contains the CPUCFG value. + * + * @returns The number of core clusters + */ +uint32_t cpuinfo_loongarch_linux_detect_cluster_cpucfg( + const struct cpuinfo_loongarch_chipset chipset[restrict static 1], + uint32_t max_processors, + uint32_t usable_processors, + struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]) +{ + uint32_t clusters_count = 0; + uint32_t cluster_leaders[CLUSTERS_MAX]; + uint32_t last_processor_in_cpuinfo = max_processors; + uint32_t last_processor_with_cpucfg = max_processors; + uint32_t processors_with_cpucfg_count = 0; + for (uint32_t i = 0; i < max_processors; i++) { + if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (processors[i].flags & CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR) { + last_processor_in_cpuinfo = i; + } + + const uint32_t group_leader = processors[i].package_leader_id; + if (group_leader == i) { + if (clusters_count < CLUSTERS_MAX) { + cluster_leaders[clusters_count] = i; + } + clusters_count += 1; + } else { + /* Copy known bits of information to cluster leader */ + if (!bitmask_all(processors[group_leader].flags,CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG) && + bitmask_all(processors[i].flags,CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG)) + { + processors[group_leader].cpucfg_id = processors[i].cpucfg_id; + processors[group_leader].flags |=CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG; + } + } + } + } + cpuinfo_log_debug("detected %"PRIu32" core clusters", clusters_count); + + /* + * Two relations between reported /proc/cpuinfo information, and cores is possible: + * - /proc/cpuinfo reports information for all or some of the cores below the corresponding + * "processor : " lines. Information on offline cores may be missing. + * - /proc/cpuinfo reports information only once, after all "processor : " lines. + * The reported information may relate to processor #0 or to the processor which + * executed the system calls to read /proc/cpuinfo. It is also indistinguishable + * from /proc/cpuinfo reporting information only for the last core (e.g. if all other + * cores are offline). + * + * We detect the second case by checking if /proc/cpuinfo contains valid CPUCFG only for one, + * last reported, processor. Note, that the last reported core may be not the last + * present & possible processor, as /proc/cpuinfo may non-report high-index offline cores. + */ + + if (processors_with_cpucfg_count < usable_processors) { + /* + * /proc/cpuinfo reported CPUCFG only for some processors, and probably some core clusters do not have CPUCFG + * for any of the cores. Check if this is the case. + */ + uint32_t clusters_with_cpucfg_count = 0; + for (uint32_t i = 0; i < max_processors; i++) { + if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID |CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG)) { + if (processors[i].package_leader_id == i) { + clusters_with_cpucfg_count += 1; + } + } + } + + if (clusters_with_cpucfg_count < clusters_count) { + /* + * /proc/cpuinfo reported CPUCFG only for some clusters, need to reconstruct others. + * We make three attempts to detect CPUCFG for clusters without it: + * 1. Search tabulated CPUCFG values for chipsets which have heterogeneous clusters and ship with Linux + * kernels which do not always report all cores in /proc/cpuinfo. If found, use the tabulated values. + * 2. For systems with 2 clusters and CPUCFG known for one cluster, assume big.LITTLE configuration, + * and estimate CPUCFG for the other cluster under assumption that CPUCFG for the big cluster is known. + * 3. Initialize CPUCFGs for core clusters in a single sequential scan: + * - Clusters preceding the first reported CPUCFG value are assumed to have the last reported CPUCFG value. + * - Clusters following any reported CPUCFG value to have that CPUCFG value. + */ + + if (cpuinfo_loongarch_linux_detect_cluster_cpucfg_by_chipset( + chipset, clusters_count, cluster_leaders, usable_processors, processors, true)) + { + return clusters_count; + } + + if (last_processor_with_cpucfg != max_processors) { + /* Fall back to sequential initialization of CPUCFG values for core clusters */ + cpuinfo_loongarch_linux_detect_cluster_cpucfg_by_sequential_scan( + processors[processors[last_processor_with_cpucfg].package_leader_id].cpucfg_id, + max_processors, processors); + } + } + } + return clusters_count; +} diff --git a/src/loongarch/linux/cpuinfo.c b/src/loongarch/linux/cpuinfo.c new file mode 100644 index 00000000..34cde570 --- /dev/null +++ b/src/loongarch/linux/cpuinfo.c @@ -0,0 +1,566 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Size, in chars, of the on-stack buffer used for parsing lines of /proc/cpuinfo. + * This is also the limit on the length of a single line. + */ +#define BUFFER_SIZE 1024 + +struct cpuinfo_loongarch_processorID{ + const char* name; + uint32_t processorID; +}; + + +static const struct cpuinfo_loongarch_processorID loongson_name_map_processorID[] = { + { + /* "3A5000" -> 0xc0 */ + .name = "3A5000", + .processorID = 0xc0, + }, +}; + + + +static uint32_t parse_processor_number( + const char* processor_start, + const char* processor_end) +{ + const size_t processor_length = (size_t) (processor_end - processor_start); + + if (processor_length == 0) { + cpuinfo_log_warning("Processor number in /proc/cpuinfo is ignored: string is empty"); + return 0; + } + + uint32_t processor_number = 0; + for (const char* digit_ptr = processor_start; digit_ptr != processor_end; digit_ptr++) { + const uint32_t digit = (uint32_t) (*digit_ptr - '0'); + if (digit > 10) { + cpuinfo_log_warning("non-decimal suffix %.*s in /proc/cpuinfo processor number is ignored", + (int) (processor_end - digit_ptr), digit_ptr); + break; + } + + processor_number = processor_number * 10 + digit; + } + + return processor_number; +} + +/* + * Full list of Loongarch features reported in /proc/cpuinfo: + */ +static void parse_features( + const char* features_start, + const char* features_end, + struct cpuinfo_loongarch_linux_processor processor[restrict static 1]) +{ + const char* feature_start = features_start; + const char* feature_end; + + /* Mark the features as valid */ + processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_FEATURES | CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR; + + do { + feature_end = feature_start + 1; + for (; feature_end != features_end; feature_end++) { + if (*feature_end == ' ') { + break; + } + } + const size_t feature_length = (size_t) (feature_end - feature_start); + + switch (feature_length) { + case 3: + if (memcmp(feature_start, "lam", feature_length) == 0) { + #if CPUINFO_ARCH_LOONGARCH64 + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_LAM; + #endif + } else if (memcmp(feature_start, "ual", feature_length) == 0) { + #if CPUINFO_ARCH_LOONGARCH64 + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_UAL; + #endif + } else if (memcmp(feature_start, "lsx", feature_length) == 0) { + #if CPUINFO_ARCH_LOONGARCH64 + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_LSX; + #endif + } else if (memcmp(feature_start, "fpu", feature_length) == 0) { + #if CPUINFO_ARCH_LOONGARCH64 + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_FPU; + #endif + } else if (memcmp(feature_start, "lvz", feature_length) == 0) { + #if CPUINFO_ARCH_LOONGARCH64 + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_LVZ; + #endif + } else { + goto unexpected; + } + break; + case 4: + if (memcmp(feature_start, "lasx", feature_length) == 0) { + #if CPUINFO_ARCH_LOONGARCH64 + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_LASX; + #endif + } else { + goto unexpected; + } + break; + case 5: + if (memcmp(feature_start, "crc32", feature_length) == 0) { + #if CPUINFO_ARCH_LOONGARCH64 + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_CRC32; + #endif + } else { + goto unexpected; + } + break; + case 6: + if (memcmp(feature_start, "crypto", feature_length) == 0) { + #if CPUINFO_ARCH_LOONGARCH64 + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_CRYPTO; + #endif + } else if (memcmp(feature_start, "cpucfg", feature_length) == 0) { + #if CPUINFO_ARCH_LOONGARCH64 + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_CPUCFG; + #endif + } else { + goto unexpected; + } + break; + case 7: + if (memcmp(feature_start, "complex", feature_length) == 0) { + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_COMPLEX; + } else if (memcmp(feature_start, "lbt_x86", feature_length) == 0) { + #if CPUINFO_ARCH_LOONGARCH64 + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_MIPS; + #endif + } else if (memcmp(feature_start, "lbt_arm", feature_length) == 0) { + #if CPUINFO_ARCH_LOONGARCH64 + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_ARM; + #endif + } else { + goto unexpected; + } + break; + case 8: + if (memcmp(feature_start, "lbt_mips", feature_length) == 0) { + #if CPUINFO_ARCH_LOONGARCH64 + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_X86; + #endif + } else { + goto unexpected; + } + default: + unexpected: + cpuinfo_log_warning("unexpected /proc/cpuinfo feature \"%.*s\" is ignored", + (int) feature_length, feature_start); + break; + } + feature_start = feature_end; + for (; feature_start != features_end; feature_start++) { + if (*feature_start != ' ') { + break; + } + } + } while (feature_start != feature_end); +} + +static bool parse_loongson(const char* name_start, size_t length){ + /* expected loongson , its length is eight */ + if(length != 8) return false; + /* expected loongson , its first char is 'l' or 'L' */ + if(name_start[0] != 'l' && name_start[0] != 'L') return false; + + char* elsechars = "oongson"; + for(int i = 0;i<7;i++){ + if(name_start[i+1] != elsechars[i]) return false; + } + return true; +} + +static void parse_processorID(const char* name_start, size_t length, int* processorID){ + /* expected 3A5000 or 3C5000L or other , its length is 6 or 7 */ + if(length != 6 && length != 7) return ; + char cpy[] = ""; + for (size_t i = 0; i < CPUINFO_COUNT_OF(loongson_name_map_processorID); i++) { + + if (strncmp(loongson_name_map_processorID[i].name, strncpy(cpy, name_start,length), length) == 0) + { + cpuinfo_log_debug( + "found /proc/cpuinfo model name second string \"%.*s\" in loongson processorID table", + (int) length, name_start); + /* Create chipset name from entry */ + *processorID = loongson_name_map_processorID[i].processorID; + } + } +} + +static void parse_model_name( + const char* model_name_start, + const char* model_name_end, + char* hardware, + struct cpuinfo_loongarch_linux_processor processor[restrict static 1]) +{ + const char* separator = model_name_start; + for (; separator != model_name_end; separator++) { + if (*separator == '-') { + break; + } + } + + const size_t model_length = (size_t) (separator - model_name_start); + const size_t name_length = (size_t) (model_name_end - (separator+1)); + + size_t value_length = name_length; + + if (value_length > CPUINFO_HARDWARE_VALUE_MAX) { + cpuinfo_log_info( + "length of model name value \"%.*s\" in /proc/cpuinfo exceeds limit (%d): truncating to the limit", + (int) value_length, separator+1, CPUINFO_HARDWARE_VALUE_MAX); + value_length = CPUINFO_HARDWARE_VALUE_MAX; + } else { + hardware[value_length] = '\0'; + } + memcpy(hardware, separator+1, value_length); + cpuinfo_log_debug("parsed /proc/cpuinfo model name second value = \"%.*s\"", (int) value_length, separator+1); + + if (model_length != 8) { + cpuinfo_log_warning("Model %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)", + (int) model_length, model_name_start, separator - 1); + return; + } + if (name_length < 6 || name_length > 7) { + cpuinfo_log_warning("Model %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)", + (int) name_length, separator + 1, model_name_end); + return; + } + uint32_t cpucfg_companyID = 0; + uint32_t cpucfg_processorID = 0; + + /* Verify the presence of hex prefix */ + bool is_loongson = parse_loongson(model_name_start, model_length); + if (is_loongson) { + cpucfg_companyID = 0x14; + processor->cpucfg_id = cpucfg_set_companyID(processor->cpucfg_id, cpucfg_companyID); + processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_COMPANYID | CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR; + }else{ + cpuinfo_log_warning("Model %.*s in /proc/cpuinfo is ignored due to unexpected words", + (int) model_length, model_name_start); + return; + } + parse_processorID(separator + 1, name_length, &cpucfg_processorID); + processor->cpucfg_id = cpucfg_set_processorID(processor->cpucfg_id, cpucfg_processorID); + processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_PROCESSORID | CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR; + +} +static void parse_cpu_revision( + const char* cpu_revision_start, + const char* cpu_revision_end, + struct cpuinfo_loongarch_linux_processor processor[restrict static 1]) +{ + const size_t cpu_revision_length = cpu_revision_end - cpu_revision_start; + + if (cpu_revision_length != 4) { + cpuinfo_log_warning("CPU revision %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)", + (int) cpu_revision_length, cpu_revision_start, cpu_revision_length); + return; + } + + /* Skip if there is no hex prefix (0x) */ + if (cpu_revision_start[0] != '0' || cpu_revision_start[1] != 'x') { + cpuinfo_log_warning("CPU revision %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix", + (int) cpu_revision_length, cpu_revision_start); + return; + } + + /* Check if the value after hex prefix is indeed a hex digit and decode it. */ + char digit_char = cpu_revision_start[2]; + uint32_t cpu_revision = 0; + if ((uint32_t) (digit_char - '0') < 10) { + cpu_revision = (uint32_t) (digit_char - '0'); + } else if ((uint32_t) (digit_char - 'A') < 6) { + cpu_revision = 10 + (uint32_t) (digit_char - 'A'); + } else if ((uint32_t) (digit_char - 'a') < 6) { + cpu_revision = 10 + (uint32_t) (digit_char - 'a'); + } else { + cpuinfo_log_warning("CPU revision %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character '%c'", + (int) cpu_revision_length, cpu_revision_start, digit_char); + return; + } + cpu_revision = cpu_revision * 16; + + digit_char = cpu_revision_start[3]; + if ((uint32_t) (digit_char - '0') < 10) { + cpu_revision = (uint32_t) (digit_char - '0'); + } else if ((uint32_t) (digit_char - 'A') < 6) { + cpu_revision = 10 + (uint32_t) (digit_char - 'A'); + } else if ((uint32_t) (digit_char - 'a') < 6) { + cpu_revision = 10 + (uint32_t) (digit_char - 'a'); + } else { + cpuinfo_log_warning("CPU revision %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character '%c'", + (int) cpu_revision_length, cpu_revision_start, digit_char); + return; + } + + processor->cpucfg_id = cpucfg_set_revision(processor->cpucfg_id, cpu_revision); + processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_REVISION | CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR; +} + +static void parse_package( + const char* cpu_package_start, + const char* cpu_package_end, + struct cpuinfo_loongarch_linux_processor processor[restrict static 1]) +{ + uint32_t cpu_package = 0; + for (const char* digit_ptr = cpu_package_start; digit_ptr != cpu_package_end; digit_ptr++) { + const uint32_t digit = (uint32_t) (*digit_ptr - '0'); + + /* Verify that the character in package is a decimal digit */ + if (digit >= 10) { + cpuinfo_log_warning("package %.*s in /proc/cpuinfo is ignored due to unexpected non-digit character '%c' at offset %zu", + (int) (cpu_package_end - cpu_package_start), cpu_package_start, + *digit_ptr, (size_t) (digit_ptr - cpu_package_start)); + return; + } + + cpu_package = cpu_package * 10 + digit; + } + + processor->package_id = cpu_package; +} + +struct proc_cpuinfo_parser_state { + char* hardware; + uint32_t processor_index; + uint32_t max_processors_count; + struct cpuinfo_loongarch_linux_processor* processors; + struct cpuinfo_loongarch_linux_processor dummy_processor; +}; + +/* + * Decode a single line of /proc/cpuinfo information. + * Lines have format [ ]*:[ ] + * An example of /proc/cpuinfo (from Loongarch-3a5000): + * + * system type : generic-loongson-machine + * processor : 0 + * package : 0 + * core : 0 + * cpu family : Loongson-64bit + * model name : Loongson-3A5000 + * CPU Revision : 0x10 + * FPU Revision : 0x00 + * CPU MHz : 2300.00 + * BogoMIPS : 4600.00 + * TLB entries : 2112 + * Address sizes : 48 bits physical, 48 bits virtual + * isa : loongarch32 loongarch64 + * features : cpucfg lam ual fpu lsx lasx complex crypto lvz lbt_x86 lbt_arm lbt_mips + * hardware watchpoint : yes, iwatch count: 8, dwatch count: 8 + */ +static bool parse_line( + const char* line_start, + const char* line_end, + struct proc_cpuinfo_parser_state state[restrict static 1], + uint64_t line_number) +{ + /* Empty line. Skip. */ + if (line_start == line_end) { + return true; + } + + /* Search for ':' on the line. */ + const char* separator = line_start; + for (; separator != line_end; separator++) { + if (*separator == ':') { + break; + } + } + /* Skip line if no ':' separator was found. */ + if (separator == line_end) { + cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: key/value separator ':' not found", + (int) (line_end - line_start), line_start); + return true; + } + + /* Skip trailing spaces in key part. */ + const char* key_end = separator; + for (; key_end != line_start; key_end--) { + if (key_end[-1] != ' ' && key_end[-1] != '\t') { + break; + } + } + /* Skip line if key contains nothing but spaces. */ + if (key_end == line_start) { + cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: key contains only spaces", + (int) (line_end - line_start), line_start); + return true; + } + + /* Skip leading spaces in value part. */ + const char* value_start = separator + 1; + for (; value_start != line_end; value_start++) { + if (*value_start != ' ') { + break; + } + } + /* Value part contains nothing but spaces. Skip line. */ + if (value_start == line_end) { + cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: value contains only spaces", + (int) (line_end - line_start), line_start); + return true; + } + + /* Skip trailing spaces in value part (if any) */ + const char* value_end = line_end; + for (; value_end != value_start; value_end--) { + if (value_end[-1] != ' ') { + break; + } + } + + const uint32_t processor_index = state->processor_index; + const uint32_t max_processors_count = state->max_processors_count; + struct cpuinfo_loongarch_linux_processor* processors = state->processors; + struct cpuinfo_loongarch_linux_processor* processor = &state->dummy_processor; + if (processor_index < max_processors_count) { + processor = &processors[processor_index]; + } + + const size_t key_length = key_end - line_start; + switch (key_length) { + case 3: + if (memcmp(line_start, "isa", key_length) == 0) { + /* isa Revision is presently useless, don't parse */ + } else { + goto unknown; + } + break; + case 4: + if (memcmp(line_start, "core", key_length) == 0) { + /* core is presently useless, don't parse */ + } else { + goto unknown; + } + break; + case 7: + if (memcmp(line_start, "package", key_length) == 0) { + parse_package(value_start, value_end, processor); + } else if (memcmp(line_start, "CPU MHz", key_length) == 0) { + /* CPU MHz is presently useless, don't parse */ + } else { + goto unknown; + } + break; + case 8: + if (memcmp(line_start, "features", key_length) == 0) { + parse_features(value_start, value_end, processor); + } else if (memcmp(line_start, "BogoMIPS", key_length) == 0) { + /* BogoMIPS is useless, don't parse */ + } else { + goto unknown; + } + break; + case 9: + if (memcmp(line_start, "processor", key_length) == 0) { + const uint32_t new_processor_index = parse_processor_number(value_start, value_end); + if (new_processor_index < processor_index) { + /* Strange: decreasing processor number */ + cpuinfo_log_warning( + "unexpectedly low processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo", + new_processor_index, processor_index); + } else if (new_processor_index > processor_index + 1) { + /* Strange, but common: skipped processor $(processor_index + 1) */ + cpuinfo_log_info( + "unexpectedly high processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo", + new_processor_index, processor_index); + } + if (new_processor_index < max_processors_count) { + /* Record that the processor was mentioned in /proc/cpuinfo */ + processors[new_processor_index].flags |= CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR; + } else { + /* Log and ignore processor */ + cpuinfo_log_warning("processor %"PRIu32" in /proc/cpuinfo is ignored: index exceeds system limit %"PRIu32, + new_processor_index, max_processors_count - 1); + } + state->processor_index = new_processor_index; + processors[new_processor_index].cpucfg_id = new_processor_index; + return true; + } else{ + goto unknown; + } + break; + case 10: + if (memcmp(line_start, "cpu family", key_length) == 0) { + /* cpu family is presently useless, don't parse */ + } else if (memcmp(line_start, "model name", key_length) == 0) { + parse_model_name(value_start,value_end,state->hardware,processor); + } else { + goto unknown; + } + break; + case 11: + if (memcmp(line_start, "system type", key_length) == 0) { + /* system type is presently useless, don't parse */ + } else if (memcmp(line_start, "TLB entries", key_length) == 0) { + /* TLB entries is presently useless, don't parse */ + } else { + goto unknown; + } + break; + case 12: + if (memcmp(line_start, "CPU Revision", key_length) == 0) { + /* CPU Revision is presently useless, don't parse */ + } else if (memcmp(line_start, "FPU Revision", key_length) == 0) { + /* FPU Revision is presently useless, don't parse */ + } else { + goto unknown; + } + break; + case 13: + if (memcmp(line_start, "Address sizes", key_length) == 0) { + /* Address sizes is presently useless, don't parse */ + } else { + goto unknown; + } + break; + case 18: + if (memcmp(line_start, "hardware watchpoint", key_length) == 0) { + /* Address sizes is presently useless, don't parse */ + } else { + goto unknown; + } + break; + default: + unknown: + cpuinfo_log_debug("unknown /proc/cpuinfo key: %.*s", (int) key_length, line_start); + + } + return true; +} + +bool cpuinfo_loongarch_linux_parse_proc_cpuinfo( + char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], + uint32_t max_processors_count, + struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors_count]) +{ + struct proc_cpuinfo_parser_state state = { + .hardware = hardware, + .processor_index = 0, + .max_processors_count = max_processors_count, + .processors = processors, + }; + cpuinfo_log_debug(""); + return cpuinfo_linux_parse_multiline_file("/proc/cpuinfo", BUFFER_SIZE, + (cpuinfo_line_callback) parse_line, &state); +} diff --git a/src/loongarch/linux/hwcap.c b/src/loongarch/linux/hwcap.c new file mode 100644 index 00000000..be71a2e5 --- /dev/null +++ b/src/loongarch/linux/hwcap.c @@ -0,0 +1,45 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + +#if CPUINFO_MOCK + #include +#endif +#include +#include +#include + +#if CPUINFO_ARCH_LOONGARCH64 + #include +#else + #define AT_HWCAP 16 +#endif + + +#if CPUINFO_MOCK + static uint32_t mock_hwcap = 0; + void cpuinfo_set_hwcap(uint32_t hwcap) { + mock_hwcap = hwcap; + } +#endif + + +#if CPUINFO_ARCH_LOONGARCH64 + void cpuinfo_loongarch_linux_hwcap_from_getauxval( + uint32_t hwcap[restrict static 1] + ) + { + #if CPUINFO_MOCK + *hwcap = mock_hwcap; + #else + *hwcap = (uint32_t) getauxval(AT_HWCAP); + return ; + #endif + } +#endif diff --git a/src/loongarch/linux/init.c b/src/loongarch/linux/init.c new file mode 100644 index 00000000..0aac9136 --- /dev/null +++ b/src/loongarch/linux/init.c @@ -0,0 +1,602 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + +struct cpuinfo_loongarch_isa cpuinfo_isa = { 0 }; + +static struct cpuinfo_package package = { { 0 } }; + +static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { + return (bitfield & mask) == mask; +} + +static inline uint32_t min(uint32_t a, uint32_t b) { + return a < b ? a : b; +} + +static inline int cmp(uint32_t a, uint32_t b) { + return (a > b) - (a < b); +} + +static bool cluster_siblings_parser( + uint32_t processor, uint32_t siblings_start, uint32_t siblings_end, + struct cpuinfo_loongarch_linux_processor* processors) +{ + processors[processor].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER; + uint32_t package_leader_id = processors[processor].package_leader_id; + + for (uint32_t sibling = siblings_start; sibling < siblings_end; sibling++) { + if (!bitmask_all(processors[sibling].flags, CPUINFO_LINUX_FLAG_VALID)) { + cpuinfo_log_info("invalid processor %"PRIu32" reported as a sibling for processor %"PRIu32, + sibling, processor); + continue; + } + + const uint32_t sibling_package_leader_id = processors[sibling].package_leader_id; + if (sibling_package_leader_id < package_leader_id) { + package_leader_id = sibling_package_leader_id; + } + + processors[sibling].package_leader_id = package_leader_id; + processors[sibling].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER; + } + + processors[processor].package_leader_id = package_leader_id; + + return true; +} + +static int cmp_loongarch_linux_processor(const void* ptr_a, const void* ptr_b) { + const struct cpuinfo_loongarch_linux_processor* processor_a = (const struct cpuinfo_loongarch_linux_processor*) ptr_a; + const struct cpuinfo_loongarch_linux_processor* processor_b = (const struct cpuinfo_loongarch_linux_processor*) ptr_b; + + /* Move usable processors towards the start of the array */ + const bool usable_a = bitmask_all(processor_a->flags, CPUINFO_LINUX_FLAG_VALID); + const bool usable_b = bitmask_all(processor_b->flags, CPUINFO_LINUX_FLAG_VALID); + if (usable_a != usable_b) { + return (int) usable_b - (int) usable_a; + } + + /* Compare based on processsor ID (i.e. processor 0 < processor 1) */ + const uint32_t pro_a = processor_a->system_processor_id; + const uint32_t pro_b = processor_b->system_processor_id; + + return cmp(pro_a,pro_b); + + +} + +void cpuinfo_loongarch_linux_init(void) { + + struct cpuinfo_loongarch_linux_processor* loongarch_linux_processors = NULL; + struct cpuinfo_processor* processors = NULL; + struct cpuinfo_core* cores = NULL; + struct cpuinfo_cluster* clusters = NULL; + struct cpuinfo_uarch_info* uarchs = NULL; + const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL; + const struct cpuinfo_core** linux_cpu_to_core_map = NULL; + struct cpuinfo_cache* l1i = NULL; + struct cpuinfo_cache* l1d = NULL; + struct cpuinfo_cache* l2 = NULL; + struct cpuinfo_cache* l3 = NULL; + uint32_t* linux_cpu_to_uarch_index_map = NULL; + + const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count(); + cpuinfo_log_debug("system maximum processors count: %"PRIu32, max_processors_count); + + const uint32_t max_possible_processors_count = 1 + + cpuinfo_linux_get_max_possible_processor(max_processors_count); + cpuinfo_log_debug("maximum possible processors count: %"PRIu32, max_possible_processors_count); + const uint32_t max_present_processors_count = 1 + + cpuinfo_linux_get_max_present_processor(max_processors_count); + cpuinfo_log_debug("maximum present processors count: %"PRIu32, max_present_processors_count); + + uint32_t valid_processor_mask = 0; + uint32_t loongarch_linux_processors_count = max_processors_count; + if (max_present_processors_count != 0) { + loongarch_linux_processors_count = min(loongarch_linux_processors_count, max_present_processors_count); + valid_processor_mask = CPUINFO_LINUX_FLAG_PRESENT; + } + if (max_possible_processors_count != 0) { + loongarch_linux_processors_count = min(loongarch_linux_processors_count, max_possible_processors_count); + valid_processor_mask |= CPUINFO_LINUX_FLAG_POSSIBLE; + } + if ((max_present_processors_count | max_possible_processors_count) == 0) { + cpuinfo_log_error("failed to parse both lists of possible and present processors"); + return; + } + + loongarch_linux_processors = calloc(loongarch_linux_processors_count, sizeof(struct cpuinfo_loongarch_linux_processor)); + if (loongarch_linux_processors == NULL) { + cpuinfo_log_error( + "failed to allocate %zu bytes for descriptions of %"PRIu32" Loongarch logical processors", + loongarch_linux_processors_count * sizeof(struct cpuinfo_loongarch_linux_processor), + loongarch_linux_processors_count); + return; + } + + if (max_possible_processors_count) { + cpuinfo_linux_detect_possible_processors( + loongarch_linux_processors_count, &loongarch_linux_processors->flags, + sizeof(struct cpuinfo_loongarch_linux_processor), + CPUINFO_LINUX_FLAG_POSSIBLE); + } + + if (max_present_processors_count) { + cpuinfo_linux_detect_present_processors( + loongarch_linux_processors_count, &loongarch_linux_processors->flags, + sizeof(struct cpuinfo_loongarch_linux_processor), + CPUINFO_LINUX_FLAG_PRESENT); + } + + char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX]; + + if (!cpuinfo_loongarch_linux_parse_proc_cpuinfo( + proc_cpuinfo_hardware, + loongarch_linux_processors_count, + loongarch_linux_processors)) { + cpuinfo_log_error("failed to parse processor information from /proc/cpuinfo"); + return; + } + + for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { + if (bitmask_all(loongarch_linux_processors[i].flags, valid_processor_mask)) { + loongarch_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_VALID; + } + } + + + uint32_t valid_processors = 0, last_cpucfg = 0; + + for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { + loongarch_linux_processors[i].system_processor_id = i; + if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + valid_processors += 1; + + if (!(loongarch_linux_processors[i].flags & CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR)) { + /* + * Processor is in possible and present lists, but not reported in /proc/cpuinfo. + * This is fairly common: high-index processors can be not reported if they are offline. + */ + cpuinfo_log_info("processor %"PRIu32" is not listed in /proc/cpuinfo", i); + } + + if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG)) { + last_cpucfg = loongarch_linux_processors[i].cpucfg_id; + } + + } else { + /* Processor reported in /proc/cpuinfo, but not in possible and/or present lists: log and ignore */ + if (!(loongarch_linux_processors[i].flags & CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR)) { + cpuinfo_log_warning("invalid processor %"PRIu32" reported in /proc/cpuinfo", i); + } + } + } + + const struct cpuinfo_loongarch_chipset chipset = + cpuinfo_loongarch_linux_decode_chipset(proc_cpuinfo_hardware); + + + #if CPUINFO_ARCH_LOONGARCH64 + uint32_t isa_features = 0; + cpuinfo_loongarch_linux_hwcap_from_getauxval(&isa_features); + cpuinfo_loongarch64_linux_decode_isa_from_proc_cpuinfo( + isa_features, &cpuinfo_isa); + #endif + + for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { + if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (cpuinfo_linux_get_processor_package_id(i, &loongarch_linux_processors[i].package_id)) { + loongarch_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_ID; + } + } + } + + /* Initialize topology group IDs */ + for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { + loongarch_linux_processors[i].package_leader_id = i; + } + + /* Propagate topology group IDs among siblings */ + for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { + if (!bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + continue; + } + + if (loongarch_linux_processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_ID) { + cpuinfo_linux_detect_core_siblings( + loongarch_linux_processors_count, i, + (cpuinfo_siblings_callback) cluster_siblings_parser, + loongarch_linux_processors); + } + } + + /* Propagate all cluster IDs */ + uint32_t clustered_processors = 0; + for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { + if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID | CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) { + clustered_processors += 1; + + const uint32_t package_leader_id = loongarch_linux_processors[i].package_leader_id; + if (package_leader_id < i) { + loongarch_linux_processors[i].package_leader_id = loongarch_linux_processors[package_leader_id].package_leader_id; + } + + cpuinfo_log_debug("processor %"PRIu32" clustered with processor %"PRIu32" as inferred from system siblings lists", + i, loongarch_linux_processors[i].package_leader_id); + } + } + + cpuinfo_loongarch_linux_count_cluster_processors(loongarch_linux_processors_count, loongarch_linux_processors); + + const uint32_t cluster_count = cpuinfo_loongarch_linux_detect_cluster_cpucfg( + &chipset, + loongarch_linux_processors_count, valid_processors, loongarch_linux_processors); + + /* Initialize core vendor, uarch, and cpucfg for every logical processor */ + for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { + if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + const uint32_t cluster_leader = loongarch_linux_processors[i].package_leader_id; + if (cluster_leader == i) { + /* Cluster leader: decode core vendor and uarch */ + cpuinfo_loongarch_decode_vendor_uarch( + loongarch_linux_processors[cluster_leader].cpucfg_id, + &loongarch_linux_processors[cluster_leader].vendor, + &loongarch_linux_processors[cluster_leader].uarch); + } else { + /* Cluster non-leader: copy vendor, uarch, and cpucfg from cluster leader */ + loongarch_linux_processors[i].flags = loongarch_linux_processors[cluster_leader].flags; + loongarch_linux_processors[i].cpucfg_id = loongarch_linux_processors[cluster_leader].cpucfg_id; + loongarch_linux_processors[i].vendor = loongarch_linux_processors[cluster_leader].vendor; + loongarch_linux_processors[i].uarch = loongarch_linux_processors[cluster_leader].uarch; + } + } + } + + + qsort(loongarch_linux_processors, loongarch_linux_processors_count, + sizeof(struct cpuinfo_loongarch_linux_processor), cmp_loongarch_linux_processor); + + + uint32_t uarchs_count = 0; + enum cpuinfo_uarch last_uarch; + for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { + if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (uarchs_count == 0 || loongarch_linux_processors[i].uarch != last_uarch) { + last_uarch = loongarch_linux_processors[i].uarch; + uarchs_count += 1; + } + loongarch_linux_processors[i].uarch_index = uarchs_count - 1; + } + } + + /* + * Assumptions: + * - No SMP (i.e. each core supports only one hardware thread). + * - Level 1 instruction and data caches are private to the core clusters. + * - Level 2 and level 3 cache is shared between cores in the same cluster. + */ + cpuinfo_loongarch_chipset_to_string(&chipset, package.name); + + package.processor_count = valid_processors; + package.core_count = valid_processors; + package.cluster_count = cluster_count; + + processors = calloc(valid_processors, sizeof(struct cpuinfo_processor)); + if (processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors", + valid_processors * sizeof(struct cpuinfo_processor), valid_processors); + goto cleanup; + } + + cores = calloc(valid_processors, sizeof(struct cpuinfo_core)); + if (cores == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores", + valid_processors * sizeof(struct cpuinfo_core), valid_processors); + goto cleanup; + } + + clusters = calloc(cluster_count, sizeof(struct cpuinfo_cluster)); + if (clusters == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters", + cluster_count * sizeof(struct cpuinfo_cluster), cluster_count); + goto cleanup; + } + + uarchs = calloc(uarchs_count, sizeof(struct cpuinfo_uarch_info)); + if (uarchs == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" microarchitectures", + uarchs_count * sizeof(struct cpuinfo_uarch_info), uarchs_count); + goto cleanup; + } + + linux_cpu_to_processor_map = calloc(loongarch_linux_processors_count, sizeof(struct cpuinfo_processor*)); + if (linux_cpu_to_processor_map == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" logical processor mapping entries", + loongarch_linux_processors_count * sizeof(struct cpuinfo_processor*), loongarch_linux_processors_count); + goto cleanup; + } + + linux_cpu_to_core_map = calloc(loongarch_linux_processors_count, sizeof(struct cpuinfo_core*)); + if (linux_cpu_to_core_map == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" core mapping entries", + loongarch_linux_processors_count * sizeof(struct cpuinfo_core*), loongarch_linux_processors_count); + goto cleanup; + } + + if (uarchs_count > 1) { + linux_cpu_to_uarch_index_map = calloc(loongarch_linux_processors_count, sizeof(uint32_t)); + if (linux_cpu_to_uarch_index_map == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" uarch index mapping entries", + loongarch_linux_processors_count * sizeof(uint32_t), loongarch_linux_processors_count); + goto cleanup; + } + } + + l1i = calloc(valid_processors, sizeof(struct cpuinfo_cache)); + if (l1i == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", + valid_processors * sizeof(struct cpuinfo_cache), valid_processors); + goto cleanup; + } + + l1d = calloc(valid_processors, sizeof(struct cpuinfo_cache)); + if (l1d == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches", + valid_processors * sizeof(struct cpuinfo_cache), valid_processors); + goto cleanup; + } + + uint32_t uarchs_index = 0; + for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { + if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (uarchs_index == 0 || loongarch_linux_processors[i].uarch != last_uarch) { + last_uarch = loongarch_linux_processors[i].uarch; + uarchs[uarchs_index] = (struct cpuinfo_uarch_info) { + .uarch = loongarch_linux_processors[i].uarch, + }; + uarchs_index += 1; + } + uarchs[uarchs_index - 1].processor_count += 1; + uarchs[uarchs_index - 1].core_count += 1; + } + } + + + uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX; + /* Indication whether L3 (if it exists) is shared between all cores */ + bool shared_l3 = true; + /* Populate cache information structures in l1i, l1d */ + for (uint32_t i = 0; i < valid_processors; i++) { + if (loongarch_linux_processors[i].package_leader_id == loongarch_linux_processors[i].system_processor_id) { + cluster_id += 1; + clusters[cluster_id] = (struct cpuinfo_cluster) { + .processor_start = i, + .processor_count = loongarch_linux_processors[i].package_processor_count, + .core_start = i, + .core_count = loongarch_linux_processors[i].package_processor_count, + .cluster_id = cluster_id, + .package = &package, + .vendor = loongarch_linux_processors[i].vendor, + .uarch = loongarch_linux_processors[i].uarch, + }; + } + + processors[i].smt_id = 0; + processors[i].core = cores + i; + processors[i].cluster = clusters + cluster_id; + processors[i].package = &package; + processors[i].linux_id = (int) loongarch_linux_processors[i].system_processor_id; + processors[i].cache.l1i = l1i + i; + processors[i].cache.l1d = l1d + i; + linux_cpu_to_processor_map[loongarch_linux_processors[i].system_processor_id] = &processors[i]; + + cores[i].processor_start = i; + cores[i].processor_count = 1; + cores[i].core_id = i; + cores[i].cluster = clusters + cluster_id; + cores[i].package = &package; + cores[i].vendor = loongarch_linux_processors[i].vendor; + cores[i].uarch = loongarch_linux_processors[i].uarch; + cores[i].cpucfg = loongarch_linux_processors[i].cpucfg_id; + linux_cpu_to_core_map[loongarch_linux_processors[i].system_processor_id] = &cores[i]; + + if (linux_cpu_to_uarch_index_map != NULL) { + linux_cpu_to_uarch_index_map[loongarch_linux_processors[i].system_processor_id] = + loongarch_linux_processors[i].uarch_index; + } + + struct cpuinfo_cache temp_l2 = { 0 }, temp_l3 = { 0 }; + cpuinfo_loongarch_decode_cache( + loongarch_linux_processors[i].uarch, + loongarch_linux_processors[i].package_processor_count, + loongarch_linux_processors[i].architecture_version, + &l1i[i], &l1d[i], &temp_l2, &temp_l3); + l1i[i].processor_start = l1d[i].processor_start = i; + l1i[i].processor_count = l1d[i].processor_count = 1; + + + if (temp_l3.size != 0) { + /* + * Assumptions: + * - L2 is private to each core + * - L3 is shared by cores in the same cluster + * - If cores in different clusters report the same L3, it is shared between all cores. + */ + l2_count += 1; + if (loongarch_linux_processors[i].package_leader_id == loongarch_linux_processors[i].system_processor_id) { + if (cluster_id == 0) { + big_l3_size = temp_l3.size; + l3_count = 1; + } else if (temp_l3.size != big_l3_size) { + /* If some cores have different L3 size, L3 is not shared between all cores */ + shared_l3 = false; + l3_count += 1; + } + } + } else { + /* If some cores don't have L3 cache, L3 is not shared between all cores */ + shared_l3 = false; + if (temp_l2.size != 0) { + /* Assume L2 is shared by cores in the same cluster */ + if (loongarch_linux_processors[i].package_leader_id == loongarch_linux_processors[i].system_processor_id) { + l2_count += 1; + } + } + } + } + + if (l2_count != 0) { + l2 = calloc(l2_count, sizeof(struct cpuinfo_cache)); + if (l2 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", + l2_count * sizeof(struct cpuinfo_cache), l2_count); + goto cleanup; + } + + if (l3_count != 0) { + l3 = calloc(l3_count, sizeof(struct cpuinfo_cache)); + if (l3 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches", + l3_count * sizeof(struct cpuinfo_cache), l3_count); + goto cleanup; + } + } + } + + cluster_id = UINT32_MAX; + uint32_t l2_index = UINT32_MAX, l3_index = UINT32_MAX; + for (uint32_t i = 0; i < valid_processors; i++) { + if (loongarch_linux_processors[i].package_leader_id == loongarch_linux_processors[i].system_processor_id) { + cluster_id++; + } + + struct cpuinfo_cache dummy_l1i, dummy_l1d, temp_l2 = { 0 }, temp_l3 = { 0 }; + cpuinfo_loongarch_decode_cache( + loongarch_linux_processors[i].uarch, + loongarch_linux_processors[i].package_processor_count, + loongarch_linux_processors[i].architecture_version, + &dummy_l1i, &dummy_l1d, &temp_l2, &temp_l3); + + if (temp_l3.size != 0) { + /* + * Assumptions: + * - L2 is private to each core + * - L3 is shared by cores in the same cluster + * - If cores in different clusters report the same L3, it is shared between all cores. + */ + l2_index += 1; + l2[l2_index] = (struct cpuinfo_cache) { + .size = temp_l2.size, + .associativity = temp_l2.associativity, + .sets = temp_l2.sets, + .partitions = 1, + .line_size = temp_l2.line_size, + .flags = temp_l2.flags, + .processor_start = i, + .processor_count = 1, + }; + processors[i].cache.l2 = l2 + l2_index; + if (loongarch_linux_processors[i].package_leader_id == loongarch_linux_processors[i].system_processor_id) { + l3_index += 1; + if (l3_index < l3_count) { + l3[l3_index] = (struct cpuinfo_cache) { + .size = temp_l3.size, + .associativity = temp_l3.associativity, + .sets = temp_l3.sets, + .partitions = 1, + .line_size = temp_l3.line_size, + .flags = temp_l3.flags, + .processor_start = i, + .processor_count = + shared_l3 ? valid_processors : loongarch_linux_processors[i].package_processor_count, + }; + } + } + if (shared_l3) { + processors[i].cache.l3 = l3; + } else if (l3_index < l3_count) { + processors[i].cache.l3 = l3 + l3_index; + } + } else if (temp_l2.size != 0) { + /* Assume L2 is shared by cores in the same cluster */ + if (loongarch_linux_processors[i].package_leader_id == loongarch_linux_processors[i].system_processor_id) { + l2_index += 1; + l2[l2_index] = (struct cpuinfo_cache) { + .size = temp_l2.size, + .associativity = temp_l2.associativity, + .sets = temp_l2.sets, + .partitions = 1, + .line_size = temp_l2.line_size, + .flags = temp_l2.flags, + .processor_start = i, + .processor_count = loongarch_linux_processors[i].package_processor_count, + }; + } + processors[i].cache.l2 = l2 + l2_index; + } + } + + /* Commit */ + cpuinfo_processors = processors; + cpuinfo_cores = cores; + cpuinfo_clusters = clusters; + cpuinfo_packages = &package; + cpuinfo_uarchs = uarchs; + cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; + cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; + cpuinfo_cache[cpuinfo_cache_level_2] = l2; + cpuinfo_cache[cpuinfo_cache_level_3] = l3; + + cpuinfo_processors_count = valid_processors; + cpuinfo_cores_count = valid_processors; + cpuinfo_clusters_count = cluster_count; + cpuinfo_packages_count = 1; + cpuinfo_uarchs_count = uarchs_count; + cpuinfo_cache_count[cpuinfo_cache_level_1i] = valid_processors; + cpuinfo_cache_count[cpuinfo_cache_level_1d] = valid_processors; + cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; + cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; + cpuinfo_max_cache_size = cpuinfo_loongarch_compute_max_cache_size(&processors[0]); + + cpuinfo_linux_cpu_max = loongarch_linux_processors_count; + cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map; + cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map; + cpuinfo_linux_cpu_to_uarch_index_map = linux_cpu_to_uarch_index_map; + + __sync_synchronize(); + cpuinfo_is_initialized = true; + + processors = NULL; + cores = NULL; + clusters = NULL; + uarchs = NULL; + l1i = l1d = l2 = l3 = NULL; + linux_cpu_to_processor_map = NULL; + linux_cpu_to_core_map = NULL; + linux_cpu_to_uarch_index_map = NULL; + +cleanup: + free(loongarch_linux_processors); + free(processors); + free(cores); + free(clusters); + free(uarchs); + free(l1i); + free(l1d); + free(l2); + free(l3); + free(linux_cpu_to_processor_map); + free(linux_cpu_to_core_map); + free(linux_cpu_to_uarch_index_map); +} diff --git a/src/loongarch/linux/loongarch64-isa.c b/src/loongarch/linux/loongarch64-isa.c new file mode 100644 index 00000000..71c75687 --- /dev/null +++ b/src/loongarch/linux/loongarch64-isa.c @@ -0,0 +1,50 @@ +#include + +#include +#include + + +void cpuinfo_loongarch64_linux_decode_isa_from_proc_cpuinfo( + uint32_t features, + struct cpuinfo_loongarch_isa isa[restrict static 1]) +{ + if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_CPUCFG) { + isa->cpucfg = true; + } + if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_LAM) { + isa->lam = true; + } + if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_UAL) { + isa->ual = true; + } + if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_FPU) { + isa->fpu = true; + } + if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_LSX) { + isa->lsx = true; + } + if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_LASX) { + isa->lasx = true; + } + if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_CRC32) { + isa->crc32 = true; + } + if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_COMPLEX) { + isa->complex = true; + } + if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_CRYPTO) { + isa->crypto = true; + } + if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_LVZ) { + isa->lvz = true; + } + if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_X86) { + isa->lbt_x86 = true; + } + if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_ARM) { + isa->lbt_arm = true; + } + if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_MIPS) { + isa->lbt_mips = true; + } +} diff --git a/src/loongarch/uarch.c b/src/loongarch/uarch.c new file mode 100644 index 00000000..7bfad3af --- /dev/null +++ b/src/loongarch/uarch.c @@ -0,0 +1,28 @@ +#include + +#include +#include +#include + + +void cpuinfo_loongarch_decode_vendor_uarch( + uint32_t cpucfg, + enum cpuinfo_vendor vendor[restrict static 1], + enum cpuinfo_uarch uarch[restrict static 1]) +{ + + switch (cpucfg_get_companyID(cpucfg)) { + case 0x14: + *vendor = cpuinfo_vendor_loongson; + switch(cpucfg_get_processorID(cpucfg)){ + case 0xc0: + *uarch = cpuinfo_uarch_LA464; + break; + } + break; + default: + //not match verify vendor and uarch + *vendor = cpuinfo_vendor_unknown; + *uarch = cpuinfo_uarch_unknown; + } +} diff --git a/tools/cpu-info.c b/tools/cpu-info.c index b896b270..252dad8a 100644 --- a/tools/cpu-info.c +++ b/tools/cpu-info.c @@ -41,6 +41,8 @@ static const char* vendor_to_string(enum cpuinfo_vendor vendor) { return "Broadcom"; case cpuinfo_vendor_apm: return "Applied Micro"; + case cpuinfo_vendor_loongson: + return "Loongson"; default: return NULL; } @@ -284,6 +286,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Dhyana"; case cpuinfo_uarch_taishan_v110: return "TaiShan v110"; + case cpuinfo_uarch_LA464: + return "LA464"; default: return NULL; } diff --git a/tools/isa-info.c b/tools/isa-info.c index 96bcdd7a..c4b6ca67 100644 --- a/tools/isa-info.c +++ b/tools/isa-info.c @@ -193,4 +193,26 @@ int main(int argc, char** argv) { printf("\tCompressed: %s\n", cpuinfo_has_riscv_c() ? "yes" : "no"); printf("\tVector: %s\n", cpuinfo_has_riscv_v() ? "yes" : "no"); #endif +#if CPUINFO_ARCH_LOONGARCH64 + printf("Loongarch:\n"); + printf("\tCPUCFG: %s\n", cpuinfo_has_loongarch_cpucfg() ? "yes" : "no"); + printf("\tLAM: %s\n", cpuinfo_has_loongarch_lam() ? "yes" : "no"); + printf("\tUAL: %s\n", cpuinfo_has_loongarch_ual() ? "yes" : "no"); + printf("\tCOMPLEX: %s\n", cpuinfo_has_loongarch_complex() ? "yes" : "no"); + printf("\tLVZ: %s\n", cpuinfo_has_loongarch_lvz() ? "yes" : "no"); + printf("\tLBT_X86: %s\n", cpuinfo_has_loongarch_lbt_x86() ? "yes" : "no"); + printf("\tLBT_arm: %s\n", cpuinfo_has_loongarch_lbt_arm() ? "yes" : "no"); + printf("\tLBT_mips: %s\n", cpuinfo_has_loongarch_lbt_mips() ? "yes" : "no"); + + printf("Scalar instructions:\n"); + printf("\tFPU: %s\n", cpuinfo_has_loongarch_fpu() ? "yes" : "no"); + + printf("SIMD extensions:\n"); + printf("\tLSX: %s\n", cpuinfo_has_loongarch_lsx() ? "yes" : "no"); + printf("\tLASX: %s\n", cpuinfo_has_loongarch_lasx() ? "yes" : "no"); + + printf("Cryptography extensions:\n"); + printf("\tCRYPTO: %s\n", cpuinfo_has_loongarch_crypto() ? "yes" : "no"); + printf("\tCRC32: %s\n", cpuinfo_has_loongarch_crc32() ? "yes" : "no"); +#endif } From 04b5542b021974ee478b3fd4db4b28faf7a9e2ab Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Mon, 20 Jan 2025 10:02:59 +0800 Subject: [PATCH 2/9] Make case-insensitive on /proc/cpuinfo for LoongArch Cpuinfo shows inconsistent letter casing due to differences between locally maintained and upstream Linux kernel versions. Made the check case-insensitive to ensure compatibility. --- src/loongarch/linux/cpuinfo.c | 62 +++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/src/loongarch/linux/cpuinfo.c b/src/loongarch/linux/cpuinfo.c index 34cde570..89ba72f7 100644 --- a/src/loongarch/linux/cpuinfo.c +++ b/src/loongarch/linux/cpuinfo.c @@ -350,23 +350,41 @@ struct proc_cpuinfo_parser_state { /* * Decode a single line of /proc/cpuinfo information. * Lines have format [ ]*:[ ] - * An example of /proc/cpuinfo (from Loongarch-3a5000): + * An example of /proc/cpuinfo (from Loongson-3A5000 Loongnix20.6): * * system type : generic-loongson-machine * processor : 0 * package : 0 * core : 0 * cpu family : Loongson-64bit - * model name : Loongson-3A5000 - * CPU Revision : 0x10 + * model name : Loongson-3A5000-HV + * CPU Revision : 0x11 * FPU Revision : 0x00 - * CPU MHz : 2300.00 - * BogoMIPS : 4600.00 + * CPU MHz : 2500.00 + * BogoMIPS : 5000.00 * TLB entries : 2112 * Address sizes : 48 bits physical, 48 bits virtual * isa : loongarch32 loongarch64 - * features : cpucfg lam ual fpu lsx lasx complex crypto lvz lbt_x86 lbt_arm lbt_mips + * features : cpucfg lam ual fpu lsx lasx crc32 lvz lbt_x86 lbt_arm lbt_mips * hardware watchpoint : yes, iwatch count: 8, dwatch count: 8 + * + * An example of /proc/cpuinfo (from Loongson-3A6000 AOSC OS): + * system type : generic-loongson-machine + * processor : 0 + * package : 0 + * core : 1 + * global_id : 0 + * CPU Family : Loongson-64bit + * Model Name : Loongson-3A6000 + * CPU Revision : 0x00 + * FPU Revision : 0x00 + * CPU MHz : 2500.00 + * BogoMIPS : 5000.00 + * TLB Entries : 2112 + * Address Sizes : 48 bits physical, 48 bits virtual + * ISA : loongarch32r loongarch32s loongarch64 + * Features : cpucfg lam ual fpu lsx lasx crc32 complex crypto lspw lvz lbt_x86 lbt_arm lbt_mips + * Hardware Watchpoint : yes, iwatch count: 8, dwatch count: 4 */ static bool parse_line( const char* line_start, @@ -440,39 +458,39 @@ static bool parse_line( const size_t key_length = key_end - line_start; switch (key_length) { case 3: - if (memcmp(line_start, "isa", key_length) == 0) { + if (strncasecmp(line_start, "isa", key_length) == 0) { /* isa Revision is presently useless, don't parse */ } else { goto unknown; } break; case 4: - if (memcmp(line_start, "core", key_length) == 0) { + if (strncasecmp(line_start, "core", key_length) == 0) { /* core is presently useless, don't parse */ } else { goto unknown; } break; case 7: - if (memcmp(line_start, "package", key_length) == 0) { + if (strncasecmp(line_start, "package", key_length) == 0) { parse_package(value_start, value_end, processor); - } else if (memcmp(line_start, "CPU MHz", key_length) == 0) { + } else if (strncasecmp(line_start, "CPU MHz", key_length) == 0) { /* CPU MHz is presently useless, don't parse */ } else { goto unknown; } break; case 8: - if (memcmp(line_start, "features", key_length) == 0) { + if (strncasecmp(line_start, "features", key_length) == 0) { parse_features(value_start, value_end, processor); - } else if (memcmp(line_start, "BogoMIPS", key_length) == 0) { + } else if (strncasecmp(line_start, "BogoMIPS", key_length) == 0) { /* BogoMIPS is useless, don't parse */ } else { goto unknown; } break; case 9: - if (memcmp(line_start, "processor", key_length) == 0) { + if (strncasecmp(line_start, "processor", key_length) == 0) { const uint32_t new_processor_index = parse_processor_number(value_start, value_end); if (new_processor_index < processor_index) { /* Strange: decreasing processor number */ @@ -496,46 +514,48 @@ static bool parse_line( state->processor_index = new_processor_index; processors[new_processor_index].cpucfg_id = new_processor_index; return true; + } else if (strncasecmp(line_start, "global_id", key_length) == 0) { + /* global_id is useless, don't parse */ } else{ goto unknown; } break; case 10: - if (memcmp(line_start, "cpu family", key_length) == 0) { + if (strncasecmp(line_start, "cpu family", key_length) == 0) { /* cpu family is presently useless, don't parse */ - } else if (memcmp(line_start, "model name", key_length) == 0) { + } else if (strncasecmp(line_start, "model name", key_length) == 0) { parse_model_name(value_start,value_end,state->hardware,processor); } else { goto unknown; } break; case 11: - if (memcmp(line_start, "system type", key_length) == 0) { + if (strncasecmp(line_start, "system type", key_length) == 0) { /* system type is presently useless, don't parse */ - } else if (memcmp(line_start, "TLB entries", key_length) == 0) { + } else if (strncasecmp(line_start, "TLB entries", key_length) == 0) { /* TLB entries is presently useless, don't parse */ } else { goto unknown; } break; case 12: - if (memcmp(line_start, "CPU Revision", key_length) == 0) { + if (strncasecmp(line_start, "CPU Revision", key_length) == 0) { /* CPU Revision is presently useless, don't parse */ - } else if (memcmp(line_start, "FPU Revision", key_length) == 0) { + } else if (strncasecmp(line_start, "FPU Revision", key_length) == 0) { /* FPU Revision is presently useless, don't parse */ } else { goto unknown; } break; case 13: - if (memcmp(line_start, "Address sizes", key_length) == 0) { + if (strncasecmp(line_start, "Address sizes", key_length) == 0) { /* Address sizes is presently useless, don't parse */ } else { goto unknown; } break; case 18: - if (memcmp(line_start, "hardware watchpoint", key_length) == 0) { + if (strncasecmp(line_start, "hardware watchpoint", key_length) == 0) { /* Address sizes is presently useless, don't parse */ } else { goto unknown; From 9efd45b3d958e365440a776c316f84dc7eeccfbb Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Mon, 20 Jan 2025 11:14:31 +0800 Subject: [PATCH 3/9] Add PTW and LSPW isa feature support --- include/cpuinfo.h | 18 +++++++++ src/loongarch/linux/api.h | 32 +++++++-------- src/loongarch/linux/cpuinfo.c | 8 ++++ src/loongarch/linux/loongarch64-isa.c | 57 ++++++++------------------- tools/isa-info.c | 6 +-- 5 files changed, 61 insertions(+), 60 deletions(-) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index 61c5c1fe..44a151af 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -2272,6 +2272,8 @@ struct cpuinfo_loongarch_isa { bool lbt_x86; bool lbt_arm; bool lbt_mips; + bool ptw; + bool lspw; }; extern struct cpuinfo_loongarch_isa cpuinfo_isa; @@ -2381,6 +2383,22 @@ static inline bool cpuinfo_has_loongarch_lbt_mips(void) { #endif } +static inline bool cpuinfo_has_loongarch_ptw(void) { +#if CPUINFO_ARCH_LOONGARCH64 + return cpuinfo_isa.ptw; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_loongarch_lspw(void) { +#if CPUINFO_ARCH_LOONGARCH64 + return cpuinfo_isa.lspw; +#else + return false; +#endif +} + const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_processors(void); const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_cores(void); const struct cpuinfo_cluster* CPUINFO_ABI cpuinfo_get_clusters(void); diff --git a/src/loongarch/linux/api.h b/src/loongarch/linux/api.h index 387a2c57..24ec2ea7 100644 --- a/src/loongarch/linux/api.h +++ b/src/loongarch/linux/api.h @@ -14,25 +14,25 @@ /* No hard limit in the kernel, maximum length on Raspberry Pi is 8. Add 1 symbol to detect overly large revision strings */ #define CPUINFO_REVISION_VALUE_MAX 9 - #if CPUINFO_ARCH_LOONGARCH64 - /* /usr/include/loongarch64-linux-gnu/asm/hwcap.h */ - #define CPUINFO_LOONGARCH_LINUX_FEATURE_CPUCFG UINT32_C(0x00000001) - #define CPUINFO_LOONGARCH_LINUX_FEATURE_LAM UINT32_C(0x00000002) - #define CPUINFO_LOONGARCH_LINUX_FEATURE_UAL UINT32_C(0x00000004) - #define CPUINFO_LOONGARCH_LINUX_FEATURE_FPU UINT32_C(0x00000008) - #define CPUINFO_LOONGARCH_LINUX_FEATURE_LSX UINT32_C(0x00000010) - #define CPUINFO_LOONGARCH_LINUX_FEATURE_LASX UINT32_C(0x00000020) - #define CPUINFO_LOONGARCH_LINUX_FEATURE_CRC32 UINT32_C(0x00000040) - #define CPUINFO_LOONGARCH_LINUX_FEATURE_COMPLEX UINT32_C(0x00000080) - #define CPUINFO_LOONGARCH_LINUX_FEATURE_CRYPTO UINT32_C(0x00000100) - #define CPUINFO_LOONGARCH_LINUX_FEATURE_LVZ UINT32_C(0x00000200) - #define CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_X86 UINT32_C(0x00000400) - #define CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_ARM UINT32_C(0x00000800) - #define CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_MIPS UINT32_C(0x00001000) +/* Linux: arch/loongarch/include/uapi/asm/hwcap.h */ +#define CPUINFO_LOONGARCH_LINUX_FEATURE_CPUCFG UINT32_C(0x00000001) +#define CPUINFO_LOONGARCH_LINUX_FEATURE_LAM UINT32_C(0x00000002) +#define CPUINFO_LOONGARCH_LINUX_FEATURE_UAL UINT32_C(0x00000004) +#define CPUINFO_LOONGARCH_LINUX_FEATURE_FPU UINT32_C(0x00000008) +#define CPUINFO_LOONGARCH_LINUX_FEATURE_LSX UINT32_C(0x00000010) +#define CPUINFO_LOONGARCH_LINUX_FEATURE_LASX UINT32_C(0x00000020) +#define CPUINFO_LOONGARCH_LINUX_FEATURE_CRC32 UINT32_C(0x00000040) +#define CPUINFO_LOONGARCH_LINUX_FEATURE_COMPLEX UINT32_C(0x00000080) +#define CPUINFO_LOONGARCH_LINUX_FEATURE_CRYPTO UINT32_C(0x00000100) +#define CPUINFO_LOONGARCH_LINUX_FEATURE_LVZ UINT32_C(0x00000200) +#define CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_X86 UINT32_C(0x00000400) +#define CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_ARM UINT32_C(0x00000800) +#define CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_MIPS UINT32_C(0x00001000) +#define CPUINFO_LOONGARCH_LINUX_FEATURE_PTW UINT32_C(0x00002000) +#define CPUINFO_LOONGARCH_LINUX_FEATURE_LSPW UINT32_C(0x00004000) #endif - #define CPUINFO_LOONGARCH_LINUX_VALID_COMPANYID UINT32_C(0x00010000) #define CPUINFO_LOONGARCH_LINUX_VALID_PROCESSORID UINT32_C(0x00020000) #define CPUINFO_LOONGARCH_LINUX_VALID_REVISION UINT32_C(0x00040000) diff --git a/src/loongarch/linux/cpuinfo.c b/src/loongarch/linux/cpuinfo.c index 89ba72f7..4c09de5f 100644 --- a/src/loongarch/linux/cpuinfo.c +++ b/src/loongarch/linux/cpuinfo.c @@ -102,6 +102,10 @@ static void parse_features( #if CPUINFO_ARCH_LOONGARCH64 processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_LVZ; #endif + } else if (memcmp(feature_start, "ptw", feature_length) == 0) { + #if CPUINFO_ARCH_LOONGARCH64 + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_PTW; + #endif } else { goto unexpected; } @@ -111,6 +115,10 @@ static void parse_features( #if CPUINFO_ARCH_LOONGARCH64 processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_LASX; #endif + } else if (memcmp(feature_start, "lspw", feature_length) == 0) { + #if CPUINFO_ARCH_LOONGARCH64 + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_LSPW; + #endif } else { goto unexpected; } diff --git a/src/loongarch/linux/loongarch64-isa.c b/src/loongarch/linux/loongarch64-isa.c index 71c75687..a0e8b115 100644 --- a/src/loongarch/linux/loongarch64-isa.c +++ b/src/loongarch/linux/loongarch64-isa.c @@ -1,50 +1,25 @@ #include #include -#include - void cpuinfo_loongarch64_linux_decode_isa_from_proc_cpuinfo( uint32_t features, struct cpuinfo_loongarch_isa isa[restrict static 1]) { - if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_CPUCFG) { - isa->cpucfg = true; - } - if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_LAM) { - isa->lam = true; - } - if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_UAL) { - isa->ual = true; - } - if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_FPU) { - isa->fpu = true; - } - if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_LSX) { - isa->lsx = true; - } - if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_LASX) { - isa->lasx = true; - } - if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_CRC32) { - isa->crc32 = true; - } - if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_COMPLEX) { - isa->complex = true; - } - if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_CRYPTO) { - isa->crypto = true; - } - if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_LVZ) { - isa->lvz = true; - } - if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_X86) { - isa->lbt_x86 = true; - } - if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_ARM) { - isa->lbt_arm = true; - } - if (features & CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_MIPS) { - isa->lbt_mips = true; - } +#define ISA_ENABLE(BIT, FLAG) isa->BIT = !!(features & CPUINFO_LOONGARCH_LINUX_FEATURE_##FLAG) + ISA_ENABLE(cpucfg, CPUCFG); + ISA_ENABLE(lam, LAM); + ISA_ENABLE(ual, UAL); + ISA_ENABLE(fpu, FPU); + ISA_ENABLE(lsx, LSX); + ISA_ENABLE(lasx, LASX); + ISA_ENABLE(crc32, CRC32); + ISA_ENABLE(complex, COMPLEX); + ISA_ENABLE(crypto, CRYPTO); + ISA_ENABLE(lvz, LVZ); + ISA_ENABLE(lbt_x86, LBT_X86); + ISA_ENABLE(lbt_arm, LBT_ARM); + ISA_ENABLE(lbt_mips, LBT_MIPS); + ISA_ENABLE(ptw, PTW); + ISA_ENABLE(lspw, LSPW); } diff --git a/tools/isa-info.c b/tools/isa-info.c index c4b6ca67..722f382e 100644 --- a/tools/isa-info.c +++ b/tools/isa-info.c @@ -198,14 +198,14 @@ int main(int argc, char** argv) { printf("\tCPUCFG: %s\n", cpuinfo_has_loongarch_cpucfg() ? "yes" : "no"); printf("\tLAM: %s\n", cpuinfo_has_loongarch_lam() ? "yes" : "no"); printf("\tUAL: %s\n", cpuinfo_has_loongarch_ual() ? "yes" : "no"); + printf("\tFPU: %s\n", cpuinfo_has_loongarch_fpu() ? "yes" : "no"); printf("\tCOMPLEX: %s\n", cpuinfo_has_loongarch_complex() ? "yes" : "no"); printf("\tLVZ: %s\n", cpuinfo_has_loongarch_lvz() ? "yes" : "no"); printf("\tLBT_X86: %s\n", cpuinfo_has_loongarch_lbt_x86() ? "yes" : "no"); printf("\tLBT_arm: %s\n", cpuinfo_has_loongarch_lbt_arm() ? "yes" : "no"); printf("\tLBT_mips: %s\n", cpuinfo_has_loongarch_lbt_mips() ? "yes" : "no"); - - printf("Scalar instructions:\n"); - printf("\tFPU: %s\n", cpuinfo_has_loongarch_fpu() ? "yes" : "no"); + printf("\tPTW: %s\n", cpuinfo_has_loongarch_ptw() ? "yes" : "no"); + printf("\tLSPW: %s\n", cpuinfo_has_loongarch_lspw() ? "yes" : "no"); printf("SIMD extensions:\n"); printf("\tLSX: %s\n", cpuinfo_has_loongarch_lsx() ? "yes" : "no"); From effc358addb27b47407d78dd349fcd7397c76f95 Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Mon, 20 Jan 2025 12:02:38 +0800 Subject: [PATCH 4/9] LoongArch: Merge hwcap.c to loongarch64-isa.c for simple Rename func name "*decode_isa_proc_cpuinfo" to "*decode_isa_hwcap". And now LoongArch has no CPUINFO_MOCK api, remove them. --- CMakeLists.txt | 1 - configure.py | 1 - include/cpuinfo-mock.h | 3 -- src/loongarch/linux/api.h | 6 +--- src/loongarch/linux/hwcap.c | 45 --------------------------- src/loongarch/linux/init.c | 5 +-- src/loongarch/linux/loongarch64-isa.c | 10 ++++-- 7 files changed, 10 insertions(+), 61 deletions(-) delete mode 100644 src/loongarch/linux/hwcap.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 00df8907..e322ebb3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -235,7 +235,6 @@ IF(CPUINFO_SUPPORTED_PLATFORM) src/loongarch/linux/clusters.c src/loongarch/linux/chipset.c src/loongarch/linux/cpucfg.c - src/loongarch/linux/hwcap.c src/loongarch/linux/loongarch64-isa.c) ENDIF() ENDIF() diff --git a/configure.py b/configure.py index 67762d62..0db9a166 100755 --- a/configure.py +++ b/configure.py @@ -72,7 +72,6 @@ def main(args): "loongarch/linux/clusters.c", "loongarch/linux/cpucfg.c", "loongarch/linux/chipset.c", - "loongarch/linux/hwcap.c", "loongarch/linux/loongarch64-isa.c", ] diff --git a/include/cpuinfo-mock.h b/include/cpuinfo-mock.h index cfba1055..7bb6d1ee 100644 --- a/include/cpuinfo-mock.h +++ b/include/cpuinfo-mock.h @@ -62,9 +62,6 @@ void CPUINFO_ABI cpuinfo_set_hwcap(uint32_t hwcap); #if CPUINFO_ARCH_ARM void CPUINFO_ABI cpuinfo_set_hwcap2(uint64_t hwcap2); #endif -#if CPUINFO_ARCH_LOONGARCH64 -void CPUINFO_ABI cpuinfo_set_hwcap(uint32_t hwcap); -#endif #endif #if defined(__ANDROID__) diff --git a/src/loongarch/linux/api.h b/src/loongarch/linux/api.h index 24ec2ea7..2f4ab70b 100644 --- a/src/loongarch/linux/api.h +++ b/src/loongarch/linux/api.h @@ -76,11 +76,7 @@ CPUINFO_INTERNAL bool cpuinfo_loongarch_linux_parse_proc_cpuinfo( struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors_count]); #if CPUINFO_ARCH_LOONGARCH64 - CPUINFO_INTERNAL void cpuinfo_loongarch_linux_hwcap_from_getauxval( - uint32_t hwcap[restrict static 1]); - - CPUINFO_INTERNAL void cpuinfo_loongarch64_linux_decode_isa_from_proc_cpuinfo( - uint32_t features, + CPUINFO_INTERNAL void cpuinfo_loongarch64_linux_decode_isa_from_hwcap( struct cpuinfo_loongarch_isa isa[restrict static 1]); #endif diff --git a/src/loongarch/linux/hwcap.c b/src/loongarch/linux/hwcap.c deleted file mode 100644 index be71a2e5..00000000 --- a/src/loongarch/linux/hwcap.c +++ /dev/null @@ -1,45 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include - -#if CPUINFO_MOCK - #include -#endif -#include -#include -#include - -#if CPUINFO_ARCH_LOONGARCH64 - #include -#else - #define AT_HWCAP 16 -#endif - - -#if CPUINFO_MOCK - static uint32_t mock_hwcap = 0; - void cpuinfo_set_hwcap(uint32_t hwcap) { - mock_hwcap = hwcap; - } -#endif - - -#if CPUINFO_ARCH_LOONGARCH64 - void cpuinfo_loongarch_linux_hwcap_from_getauxval( - uint32_t hwcap[restrict static 1] - ) - { - #if CPUINFO_MOCK - *hwcap = mock_hwcap; - #else - *hwcap = (uint32_t) getauxval(AT_HWCAP); - return ; - #endif - } -#endif diff --git a/src/loongarch/linux/init.c b/src/loongarch/linux/init.c index 0aac9136..d4b00d1f 100644 --- a/src/loongarch/linux/init.c +++ b/src/loongarch/linux/init.c @@ -188,10 +188,7 @@ void cpuinfo_loongarch_linux_init(void) { #if CPUINFO_ARCH_LOONGARCH64 - uint32_t isa_features = 0; - cpuinfo_loongarch_linux_hwcap_from_getauxval(&isa_features); - cpuinfo_loongarch64_linux_decode_isa_from_proc_cpuinfo( - isa_features, &cpuinfo_isa); + cpuinfo_loongarch64_linux_decode_isa_from_hwcap(&cpuinfo_isa); #endif for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { diff --git a/src/loongarch/linux/loongarch64-isa.c b/src/loongarch/linux/loongarch64-isa.c index a0e8b115..a89e5f52 100644 --- a/src/loongarch/linux/loongarch64-isa.c +++ b/src/loongarch/linux/loongarch64-isa.c @@ -1,11 +1,17 @@ #include +#include #include -void cpuinfo_loongarch64_linux_decode_isa_from_proc_cpuinfo( - uint32_t features, +static inline uint32_t hwcap_from_getauxval() +{ + return (uint32_t) getauxval(AT_HWCAP); +} + +void cpuinfo_loongarch64_linux_decode_isa_from_hwcap( struct cpuinfo_loongarch_isa isa[restrict static 1]) { + uint32_t features = hwcap_from_getauxval(); #define ISA_ENABLE(BIT, FLAG) isa->BIT = !!(features & CPUINFO_LOONGARCH_LINUX_FEATURE_##FLAG) ISA_ENABLE(cpucfg, CPUCFG); ISA_ENABLE(lam, LAM); From 6097f0c6b91d05ffbf2a5f9b91c2d41cf71d86f9 Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Mon, 20 Jan 2025 16:16:20 +0800 Subject: [PATCH 5/9] Rename cpucfg to prid --- CMakeLists.txt | 2 +- configure.py | 2 +- include/cpuinfo.h | 18 ++- src/loongarch/api.h | 2 +- src/loongarch/cache.c | 2 +- src/loongarch/cpucfg.h | 61 -------- src/loongarch/linux/api.h | 20 +-- src/loongarch/linux/clusters.c | 2 +- src/loongarch/linux/cpucfg.c | 264 -------------------------------- src/loongarch/linux/cpuinfo.c | 18 +-- src/loongarch/linux/init.c | 20 +-- src/loongarch/linux/prid.c | 266 +++++++++++++++++++++++++++++++++ src/loongarch/prid.h | 65 ++++++++ src/loongarch/uarch.c | 55 +++++-- 14 files changed, 415 insertions(+), 382 deletions(-) delete mode 100644 src/loongarch/cpucfg.h delete mode 100644 src/loongarch/linux/cpucfg.c create mode 100644 src/loongarch/linux/prid.c create mode 100644 src/loongarch/prid.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e322ebb3..d67e93f8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -234,7 +234,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM) src/loongarch/linux/cpuinfo.c src/loongarch/linux/clusters.c src/loongarch/linux/chipset.c - src/loongarch/linux/cpucfg.c + src/loongarch/linux/prid.c src/loongarch/linux/loongarch64-isa.c) ENDIF() ENDIF() diff --git a/configure.py b/configure.py index 0db9a166..e46283b0 100755 --- a/configure.py +++ b/configure.py @@ -70,7 +70,7 @@ def main(args): "loongarch/linux/init.c", "loongarch/linux/cpuinfo.c", "loongarch/linux/clusters.c", - "loongarch/linux/cpucfg.c", + "loongarch/linux/prid.c", "loongarch/linux/chipset.c", "loongarch/linux/loongarch64-isa.c", ] diff --git a/include/cpuinfo.h b/include/cpuinfo.h index 44a151af..e7842638 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -614,8 +614,14 @@ enum cpuinfo_uarch { /** HiSilicon TaiShan v110 (Huawei Kunpeng 920 series processors). */ cpuinfo_uarch_taishan_v110 = 0x00C00100, - /** Loongson LA4 64 (Loongarch3 series processors). */ - cpuinfo_uarch_LA464 = 0x00D00100, + /** Loongson 64bit, 2-issue. */ + cpuinfo_uarch_LA264 = 0x00D00100, + /** Loongson 64bit, 3-issue. */ + cpuinfo_uarch_LA364 = 0x00D00101, + /** Loongson 64bit, 4-issue. */ + cpuinfo_uarch_LA464 = 0x00D00102, + /** Loongson 64bit, 6-issue. */ + cpuinfo_uarch_LA664 = 0x00D00103, }; struct cpuinfo_processor { @@ -650,10 +656,6 @@ struct cpuinfo_processor { #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 /** APIC ID (unique x86-specific ID of the logical processor) */ uint32_t apic_id; -#endif -#if CPUINFO_ARCH_LOONGARCH64 - /** CPUCFG ID (unique loongarch-specific ID of the logical processor) */ - uint32_t cpucfg_id; #endif struct { /** Level 1 instruction cache */ @@ -691,8 +693,8 @@ struct cpuinfo_core { /** Value of Main ID Register (MIDR) for this core */ uint32_t midr; #elif CPUINFO_ARCH_LOONGARCH64 - /** Value of CPUCFG for this core */ - uint32_t cpucfg; + /** Value of PRocessorID (PRID) for this core */ + uint32_t prid; #endif /** Clock rate (non-Turbo) of the core, in Hz */ uint64_t frequency; diff --git a/src/loongarch/api.h b/src/loongarch/api.h index 896a2686..66ccfc49 100644 --- a/src/loongarch/api.h +++ b/src/loongarch/api.h @@ -40,7 +40,7 @@ struct cpuinfo_loongarch_chipset { struct cpuinfo_loongarch_chipset chipset[restrict static 1], uint32_t cores); CPUINFO_INTERNAL void cpuinfo_loongarch_decode_vendor_uarch( - uint32_t cpucfg, + uint32_t prid, enum cpuinfo_vendor vendor[restrict static 1], enum cpuinfo_uarch uarch[restrict static 1]); diff --git a/src/loongarch/cache.c b/src/loongarch/cache.c index 7f4117e7..04ca9e1c 100644 --- a/src/loongarch/cache.c +++ b/src/loongarch/cache.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include void cpuinfo_loongarch_decode_cache( diff --git a/src/loongarch/cpucfg.h b/src/loongarch/cpucfg.h deleted file mode 100644 index afc8ee2a..00000000 --- a/src/loongarch/cpucfg.h +++ /dev/null @@ -1,61 +0,0 @@ -#pragma once -#include - - - -#define CPUINFO_LOONGARCH_CPUCFG_COMPANYID_MASK UINT32_C(0x00FF0000) -#define CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_MASK UINT32_C(0x0000FF00) -#define CPUINFO_LOONGARCH_CPUCFG_REVISION_MASK UINT32_C(0x000000FF) - - -#define CPUINFO_LOONGARCH_CPUCFG_COMPANYID_OFFSET 16 -#define CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_OFFSET 8 -#define CPUINFO_LOONGARCH_CPUCFG_REVISION_OFFSET 0 - - - -inline static uint32_t cpucfg_set_companyID(uint32_t cpucfg, uint32_t companyID) { - return (cpucfg & ~CPUINFO_LOONGARCH_CPUCFG_COMPANYID_MASK) | - ((companyID << CPUINFO_LOONGARCH_CPUCFG_COMPANYID_OFFSET) & CPUINFO_LOONGARCH_CPUCFG_COMPANYID_MASK); -} - -inline static uint32_t cpucfg_set_processorID(uint32_t cpucfg, uint32_t processorID) { - return (cpucfg & ~CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_MASK) | - ((processorID << CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_OFFSET) & CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_MASK); -} - -inline static uint32_t cpucfg_set_revision(uint32_t cpucfg, uint32_t revision) { - return (cpucfg & ~CPUINFO_LOONGARCH_CPUCFG_REVISION_MASK) | - ((revision << CPUINFO_LOONGARCH_CPUCFG_REVISION_OFFSET) & CPUINFO_LOONGARCH_CPUCFG_REVISION_MASK); -} - - - - -inline static uint32_t cpucfg_get_companyID(uint32_t cpucfg) { - return (cpucfg & CPUINFO_LOONGARCH_CPUCFG_COMPANYID_MASK) >> CPUINFO_LOONGARCH_CPUCFG_COMPANYID_OFFSET; -} - -inline static uint32_t cpucfg_get_processorID(uint32_t cpucfg) { - return (cpucfg & CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_MASK) >> CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_OFFSET; -} - -inline static uint32_t cpucfg_get_revision(uint32_t cpucfg) { - return (cpucfg & CPUINFO_LOONGARCH_CPUCFG_REVISION_MASK) >> CPUINFO_LOONGARCH_CPUCFG_REVISION_OFFSET; -} - - - -inline static uint32_t cpucfg_copy_companyID(uint32_t cpucfg, uint32_t other_cpucfg) { - return (cpucfg & ~CPUINFO_LOONGARCH_CPUCFG_COMPANYID_MASK) | (other_cpucfg & CPUINFO_LOONGARCH_CPUCFG_COMPANYID_MASK); -} - -inline static uint32_t cpucfg_copy_processorID(uint32_t cpucfg, uint32_t other_cpucfg) { - return (cpucfg & ~CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_MASK) | (other_cpucfg & CPUINFO_LOONGARCH_CPUCFG_PROCESSORID_MASK); -} - - -inline static uint32_t cpucfg_copy_revision(uint32_t cpucfg, uint32_t other_cpucfg) { - return (cpucfg & ~CPUINFO_LOONGARCH_CPUCFG_REVISION_MASK) | (other_cpucfg & CPUINFO_LOONGARCH_CPUCFG_REVISION_MASK); -} - diff --git a/src/loongarch/linux/api.h b/src/loongarch/linux/api.h index 2f4ab70b..1f39b82d 100644 --- a/src/loongarch/linux/api.h +++ b/src/loongarch/linux/api.h @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include @@ -33,18 +33,18 @@ #define CPUINFO_LOONGARCH_LINUX_FEATURE_LSPW UINT32_C(0x00004000) #endif -#define CPUINFO_LOONGARCH_LINUX_VALID_COMPANYID UINT32_C(0x00010000) -#define CPUINFO_LOONGARCH_LINUX_VALID_PROCESSORID UINT32_C(0x00020000) -#define CPUINFO_LOONGARCH_LINUX_VALID_REVISION UINT32_C(0x00040000) -#define CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR UINT32_C(0x00200000) -#define CPUINFO_LOONGARCH_LINUX_VALID_FEATURES UINT32_C(0x00400000) -#define CPUINFO_LOONGARCH_LINUX_VALID_INFO UINT32_C(0x007F0000) -#define CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG UINT32_C(0x003F0000) +#define CPUINFO_LOONGARCH_LINUX_VALID_COMPANYID UINT32_C(0x00010000) +#define CPUINFO_LOONGARCH_LINUX_VALID_PROCESSORID UINT32_C(0x00020000) +#define CPUINFO_LOONGARCH_LINUX_VALID_REVISION UINT32_C(0x00040000) +#define CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR UINT32_C(0x00200000) +#define CPUINFO_LOONGARCH_LINUX_VALID_FEATURES UINT32_C(0x00400000) +#define CPUINFO_LOONGARCH_LINUX_VALID_INFO UINT32_C(0x007F0000) +#define CPUINFO_LOONGARCH_LINUX_VALID_PRID UINT32_C(0x003F0000) struct cpuinfo_loongarch_linux_processor { uint32_t architecture_version; uint32_t features; - uint32_t cpucfg_id; + uint32_t prid; enum cpuinfo_vendor vendor; enum cpuinfo_uarch uarch; uint32_t uarch_index; @@ -104,7 +104,7 @@ CPUINFO_INTERNAL void cpuinfo_loongarch_linux_count_cluster_processors( uint32_t max_processors, struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]); -CPUINFO_INTERNAL uint32_t cpuinfo_loongarch_linux_detect_cluster_cpucfg( +CPUINFO_INTERNAL uint32_t cpuinfo_loongarch_linux_detect_cluster_prid( const struct cpuinfo_loongarch_chipset chipset[restrict static 1], uint32_t max_processors, uint32_t usable_processors, diff --git a/src/loongarch/linux/clusters.c b/src/loongarch/linux/clusters.c index 8be642d4..381a36fd 100644 --- a/src/loongarch/linux/clusters.c +++ b/src/loongarch/linux/clusters.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/loongarch/linux/cpucfg.c b/src/loongarch/linux/cpucfg.c deleted file mode 100644 index 437371d7..00000000 --- a/src/loongarch/linux/cpucfg.c +++ /dev/null @@ -1,264 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - - -#define CLUSTERS_MAX 3 - -static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { - return (bitfield & mask) == mask; -} - -/* Description of core clusters configuration in a chipset (identified by series) */ -struct cluster_config { - /* Number of cores (logical processors) */ - uint8_t cores; - /* Loongarch chipset series (see cpuinfo_loongarch_chipset_series enum) */ - uint8_t series; - /* Number of heterogenous clusters in the CPU package */ - uint8_t clusters; - /* Number of cores in each cluster */ - uint8_t cluster_cores[CLUSTERS_MAX]; - /* CPUCFG of cores in each cluster */ - uint32_t cluster_cpucfg[CLUSTERS_MAX]; -}; - - -static const struct cluster_config cluster_configs[] = { - { - .cores = 4, - .series = cpuinfo_loongarch_chipset_series_3, - }, -}; - -/* - * Searches chipset name in mapping of chipset name to cores' CPUCFG values. If match is successful, initializes CPUCFG - * for all clusters' leaders with tabulated values. - * - * @param[in] chipset - chipset (SoC) name information. - * @param clusters_count - number of CPU core clusters detected in the SoC. - * @param cluster_leaders - indices of core clusters' leaders in the @p processors array. - * @param processors_count - number of usable logical processors in the system. - * @param[in,out] processors - array of logical processor descriptions with pre-parsed CPUCFG, maximum frequency, - * and decoded core cluster (package_leader_id) information. - * Upon successful return, processors[i].cpucfg for all clusters' leaders contains the - * tabulated CPUCFG values. - * @param verify_cpucfg - indicated whether the function should check that the CPUCFG values to be assigned to leaders of - * core clusters are consistent with known parts of their parsed values. - * Set if to false if the only CPUCFG value parsed from /proc/cpuinfo is for the last processor - * reported in /proc/cpuinfo and thus can't be unambiguously attributed to that processor. - * - * @retval true if the chipset was found in the mapping and core clusters' leaders initialized with CPUCFG values. - * @retval false if the chipset was not found in the mapping, or any consistency check failed. - */ -static bool cpuinfo_loongarch_linux_detect_cluster_cpucfg_by_chipset( - const struct cpuinfo_loongarch_chipset chipset[restrict static 1], - uint32_t clusters_count, - const uint32_t cluster_leaders[restrict static CLUSTERS_MAX], - uint32_t processors_count, - struct cpuinfo_loongarch_linux_processor processors[restrict static processors_count], - bool verify_cpucfg) -{ - if (clusters_count <= CLUSTERS_MAX) { - for (uint32_t c = 0; c < CPUINFO_COUNT_OF(cluster_configs); c++) { - if (cluster_configs[c].series == chipset->series) { - /* Verify that the total number of cores and clusters of cores matches expectation */ - if (cluster_configs[c].cores != processors_count || cluster_configs[c].clusters != clusters_count) { - return false; - } - - /* Verify that core cluster configuration matches expectation */ - for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { - const uint32_t cluster_leader = cluster_leaders[cluster]; - if (cluster_configs[c].cluster_cores[cluster] != processors[cluster_leader].package_processor_count) { - return false; - } - } - - if (verify_cpucfg) { - /* Verify known parts of CPUCFG */ - for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { - const uint32_t cluster_leader = cluster_leaders[cluster]; - - /* Create a mask of known cpucfg bits */ - uint32_t cpucfg_mask = 0; - - if (processors[cluster_leader].flags & CPUINFO_LOONGARCH_LINUX_VALID_REVISION) { - cpucfg_mask |= CPUINFO_LOONGARCH_CPUCFG_REVISION_MASK; - } - - /* Verify the bits under the mask */ - if ((processors[cluster_leader].cpucfg_id ^ cluster_configs[c].cluster_cpucfg[cluster]) & cpucfg_mask) { - cpuinfo_log_debug("parsed CPUCFG of cluster %08"PRIu32" does not match tabulated value %08"PRIu32, - processors[cluster_leader].cpucfg_id, cluster_configs[c].cluster_cpucfg[cluster]); - return false; - } - } - } - - /* Assign CPUCFGs according to tabulated configurations */ - for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { - const uint32_t cluster_leader = cluster_leaders[cluster]; - processors[cluster_leader].cpucfg_id = cluster_configs[c].cluster_cpucfg[cluster]; - processors[cluster_leader].flags |= CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG; - cpuinfo_log_debug("cluster %"PRIu32" CPUCFG = 0x%08"PRIx32, cluster, cluster_configs[c].cluster_cpucfg[cluster]); - } - return true; - } - } - } - return false; -} - - -/* - * Initializes CPUCFG for leaders of core clusters in a single sequential scan: - * - Clusters preceding the first reported CPUCFG value are assumed to have default CPUCFG value. - * - Clusters following any reported CPUCFG value to have that CPUCFG value. - * - * @param default_cpucfg - CPUCFG value that will be assigned to cluster leaders preceding any reported CPUCFG value. - * @param processors_count - number of logical processor descriptions in the @p processors array. - * @param[in,out] processors - array of logical processor descriptions with pre-parsed CPUCFG, maximum frequency, - * and decoded core cluster (package_leader_id) information. - * Upon successful return, processors[i].cpucfg for all core clusters' leaders contains - * the assigned CPUCFG value. - */ -static void cpuinfo_loongarch_linux_detect_cluster_cpucfg_by_sequential_scan( - uint32_t default_cpucfg, - uint32_t processors_count, - struct cpuinfo_loongarch_linux_processor processors[restrict static processors_count]) -{ - uint32_t cpucfg = default_cpucfg; - for (uint32_t i = 0; i < processors_count; i++) { - if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { - if (processors[i].package_leader_id == i) { - if (bitmask_all(processors[i].flags,CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG)) { - cpucfg = processors[i].cpucfg_id; - } else { - cpuinfo_log_info("assume processor %"PRIu32" to have CPUCFG %08"PRIx32, i, cpucfg); - /* To be consistent, we copy the CPUCFG entirely, rather than by parts */ - processors[i].cpucfg_id = cpucfg; - processors[i].flags |=CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG; - } - } - } - } -} - -/* - * Detects CPUCFG of each CPU core clusters' leader. - * - * @param[in] chipset - chipset (SoC) name information. - * @param max_processors - number of processor descriptions in the @p processors array. - * @param usable_processors - number of processor descriptions in the @p processors array with both POSSIBLE and - * PRESENT flags. - * @param[in,out] processors - array of logical processor descriptions with pre-parsed CPUCFG, maximum frequency, - * and decoded core cluster (package_leader_id) information. - * Upon return, processors[i].cpucfg for all clusters' leaders contains the CPUCFG value. - * - * @returns The number of core clusters - */ -uint32_t cpuinfo_loongarch_linux_detect_cluster_cpucfg( - const struct cpuinfo_loongarch_chipset chipset[restrict static 1], - uint32_t max_processors, - uint32_t usable_processors, - struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]) -{ - uint32_t clusters_count = 0; - uint32_t cluster_leaders[CLUSTERS_MAX]; - uint32_t last_processor_in_cpuinfo = max_processors; - uint32_t last_processor_with_cpucfg = max_processors; - uint32_t processors_with_cpucfg_count = 0; - for (uint32_t i = 0; i < max_processors; i++) { - if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { - if (processors[i].flags & CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR) { - last_processor_in_cpuinfo = i; - } - - const uint32_t group_leader = processors[i].package_leader_id; - if (group_leader == i) { - if (clusters_count < CLUSTERS_MAX) { - cluster_leaders[clusters_count] = i; - } - clusters_count += 1; - } else { - /* Copy known bits of information to cluster leader */ - if (!bitmask_all(processors[group_leader].flags,CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG) && - bitmask_all(processors[i].flags,CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG)) - { - processors[group_leader].cpucfg_id = processors[i].cpucfg_id; - processors[group_leader].flags |=CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG; - } - } - } - } - cpuinfo_log_debug("detected %"PRIu32" core clusters", clusters_count); - - /* - * Two relations between reported /proc/cpuinfo information, and cores is possible: - * - /proc/cpuinfo reports information for all or some of the cores below the corresponding - * "processor : " lines. Information on offline cores may be missing. - * - /proc/cpuinfo reports information only once, after all "processor : " lines. - * The reported information may relate to processor #0 or to the processor which - * executed the system calls to read /proc/cpuinfo. It is also indistinguishable - * from /proc/cpuinfo reporting information only for the last core (e.g. if all other - * cores are offline). - * - * We detect the second case by checking if /proc/cpuinfo contains valid CPUCFG only for one, - * last reported, processor. Note, that the last reported core may be not the last - * present & possible processor, as /proc/cpuinfo may non-report high-index offline cores. - */ - - if (processors_with_cpucfg_count < usable_processors) { - /* - * /proc/cpuinfo reported CPUCFG only for some processors, and probably some core clusters do not have CPUCFG - * for any of the cores. Check if this is the case. - */ - uint32_t clusters_with_cpucfg_count = 0; - for (uint32_t i = 0; i < max_processors; i++) { - if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID |CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG)) { - if (processors[i].package_leader_id == i) { - clusters_with_cpucfg_count += 1; - } - } - } - - if (clusters_with_cpucfg_count < clusters_count) { - /* - * /proc/cpuinfo reported CPUCFG only for some clusters, need to reconstruct others. - * We make three attempts to detect CPUCFG for clusters without it: - * 1. Search tabulated CPUCFG values for chipsets which have heterogeneous clusters and ship with Linux - * kernels which do not always report all cores in /proc/cpuinfo. If found, use the tabulated values. - * 2. For systems with 2 clusters and CPUCFG known for one cluster, assume big.LITTLE configuration, - * and estimate CPUCFG for the other cluster under assumption that CPUCFG for the big cluster is known. - * 3. Initialize CPUCFGs for core clusters in a single sequential scan: - * - Clusters preceding the first reported CPUCFG value are assumed to have the last reported CPUCFG value. - * - Clusters following any reported CPUCFG value to have that CPUCFG value. - */ - - if (cpuinfo_loongarch_linux_detect_cluster_cpucfg_by_chipset( - chipset, clusters_count, cluster_leaders, usable_processors, processors, true)) - { - return clusters_count; - } - - if (last_processor_with_cpucfg != max_processors) { - /* Fall back to sequential initialization of CPUCFG values for core clusters */ - cpuinfo_loongarch_linux_detect_cluster_cpucfg_by_sequential_scan( - processors[processors[last_processor_with_cpucfg].package_leader_id].cpucfg_id, - max_processors, processors); - } - } - } - return clusters_count; -} diff --git a/src/loongarch/linux/cpuinfo.c b/src/loongarch/linux/cpuinfo.c index 4c09de5f..be327494 100644 --- a/src/loongarch/linux/cpuinfo.c +++ b/src/loongarch/linux/cpuinfo.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include /* @@ -252,22 +252,22 @@ static void parse_model_name( (int) name_length, separator + 1, model_name_end); return; } - uint32_t cpucfg_companyID = 0; - uint32_t cpucfg_processorID = 0; + uint32_t prid_companyID = 0; + uint32_t prid_processorID = 0; /* Verify the presence of hex prefix */ bool is_loongson = parse_loongson(model_name_start, model_length); if (is_loongson) { - cpucfg_companyID = 0x14; - processor->cpucfg_id = cpucfg_set_companyID(processor->cpucfg_id, cpucfg_companyID); + prid_companyID = 0x14; + processor->prid = prid_set_companyID(processor->prid, prid_companyID); processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_COMPANYID | CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR; }else{ cpuinfo_log_warning("Model %.*s in /proc/cpuinfo is ignored due to unexpected words", (int) model_length, model_name_start); return; } - parse_processorID(separator + 1, name_length, &cpucfg_processorID); - processor->cpucfg_id = cpucfg_set_processorID(processor->cpucfg_id, cpucfg_processorID); + parse_processorID(separator + 1, name_length, &prid_processorID); + processor->prid = prid_set_seriesID(processor->prid, prid_processorID); processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_PROCESSORID | CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR; } @@ -320,7 +320,7 @@ static void parse_cpu_revision( return; } - processor->cpucfg_id = cpucfg_set_revision(processor->cpucfg_id, cpu_revision); + processor->prid = prid_set_productID(processor->prid, cpu_revision); processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_REVISION | CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR; } @@ -520,7 +520,7 @@ static bool parse_line( new_processor_index, max_processors_count - 1); } state->processor_index = new_processor_index; - processors[new_processor_index].cpucfg_id = new_processor_index; + processors[new_processor_index].prid = new_processor_index; return true; } else if (strncasecmp(line_start, "global_id", key_length) == 0) { /* global_id is useless, don't parse */ diff --git a/src/loongarch/linux/init.c b/src/loongarch/linux/init.c index d4b00d1f..f36cf4d4 100644 --- a/src/loongarch/linux/init.c +++ b/src/loongarch/linux/init.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -156,7 +156,7 @@ void cpuinfo_loongarch_linux_init(void) { } - uint32_t valid_processors = 0, last_cpucfg = 0; + uint32_t valid_processors = 0, last_prid = 0; for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { loongarch_linux_processors[i].system_processor_id = i; @@ -171,8 +171,8 @@ void cpuinfo_loongarch_linux_init(void) { cpuinfo_log_info("processor %"PRIu32" is not listed in /proc/cpuinfo", i); } - if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LOONGARCH_LINUX_VALID_CPUCFG)) { - last_cpucfg = loongarch_linux_processors[i].cpucfg_id; + if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LOONGARCH_LINUX_VALID_PRID)) { + last_prid = loongarch_linux_processors[i].prid; } } else { @@ -236,24 +236,24 @@ void cpuinfo_loongarch_linux_init(void) { cpuinfo_loongarch_linux_count_cluster_processors(loongarch_linux_processors_count, loongarch_linux_processors); - const uint32_t cluster_count = cpuinfo_loongarch_linux_detect_cluster_cpucfg( + const uint32_t cluster_count = cpuinfo_loongarch_linux_detect_cluster_prid( &chipset, loongarch_linux_processors_count, valid_processors, loongarch_linux_processors); - /* Initialize core vendor, uarch, and cpucfg for every logical processor */ + /* Initialize core vendor, uarch, and prid for every logical processor */ for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { const uint32_t cluster_leader = loongarch_linux_processors[i].package_leader_id; if (cluster_leader == i) { /* Cluster leader: decode core vendor and uarch */ cpuinfo_loongarch_decode_vendor_uarch( - loongarch_linux_processors[cluster_leader].cpucfg_id, + loongarch_linux_processors[cluster_leader].prid, &loongarch_linux_processors[cluster_leader].vendor, &loongarch_linux_processors[cluster_leader].uarch); } else { - /* Cluster non-leader: copy vendor, uarch, and cpucfg from cluster leader */ + /* Cluster non-leader: copy vendor, uarch, and prid from cluster leader */ loongarch_linux_processors[i].flags = loongarch_linux_processors[cluster_leader].flags; - loongarch_linux_processors[i].cpucfg_id = loongarch_linux_processors[cluster_leader].cpucfg_id; + loongarch_linux_processors[i].prid = loongarch_linux_processors[cluster_leader].prid; loongarch_linux_processors[i].vendor = loongarch_linux_processors[cluster_leader].vendor; loongarch_linux_processors[i].uarch = loongarch_linux_processors[cluster_leader].uarch; } @@ -405,7 +405,7 @@ void cpuinfo_loongarch_linux_init(void) { cores[i].package = &package; cores[i].vendor = loongarch_linux_processors[i].vendor; cores[i].uarch = loongarch_linux_processors[i].uarch; - cores[i].cpucfg = loongarch_linux_processors[i].cpucfg_id; + cores[i].prid = loongarch_linux_processors[i].prid; linux_cpu_to_core_map[loongarch_linux_processors[i].system_processor_id] = &cores[i]; if (linux_cpu_to_uarch_index_map != NULL) { diff --git a/src/loongarch/linux/prid.c b/src/loongarch/linux/prid.c new file mode 100644 index 00000000..4e051862 --- /dev/null +++ b/src/loongarch/linux/prid.c @@ -0,0 +1,266 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +#define CLUSTERS_MAX 3 + +static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { + return (bitfield & mask) == mask; +} + +/* Description of core clusters configuration in a chipset (identified by series) */ +struct cluster_config { + /* Number of cores (logical processors) */ + uint8_t cores; + /* Loongarch chipset series (see cpuinfo_loongarch_chipset_series enum) */ + uint8_t series; + /* Number of heterogenous clusters in the CPU package */ + uint8_t clusters; + /* Number of cores in each cluster */ + uint8_t cluster_cores[CLUSTERS_MAX]; + /* PRID of cores in each cluster */ + uint32_t cluster_prid[CLUSTERS_MAX]; +}; + + +static const struct cluster_config cluster_configs[] = { + { + .cores = 4, + .series = cpuinfo_loongarch_chipset_series_3, + }, +}; + +/* + * Searches chipset name in mapping of chipset name to cores' PRID values. If match is successful, initializes PRID + * for all clusters' leaders with tabulated values. + * + * @param[in] chipset - chipset (SoC) name information. + * @param clusters_count - number of CPU core clusters detected in the SoC. + * @param cluster_leaders - indices of core clusters' leaders in the @p processors array. + * @param processors_count - number of usable logical processors in the system. + * @param[in,out] processors - array of logical processor descriptions with pre-parsed PRID, maximum frequency, + * and decoded core cluster (package_leader_id) information. + * Upon successful return, processors[i].prid for all clusters' leaders contains the + * tabulated PRID values. + * @param verify_prid - indicated whether the function should check that the PRID values to be assigned to leaders of + * core clusters are consistent with known parts of their parsed values. + * Set if to false if the only PRID value parsed from /proc/cpuinfo is for the last processor + * reported in /proc/cpuinfo and thus can't be unambiguously attributed to that processor. + * + * @retval true if the chipset was found in the mapping and core clusters' leaders initialized with PRID values. + * @retval false if the chipset was not found in the mapping, or any consistency check failed. + */ +static bool cpuinfo_loongarch_linux_detect_cluster_prid_by_chipset( + const struct cpuinfo_loongarch_chipset chipset[restrict static 1], + uint32_t clusters_count, + const uint32_t cluster_leaders[restrict static CLUSTERS_MAX], + uint32_t processors_count, + struct cpuinfo_loongarch_linux_processor processors[restrict static processors_count], + bool verify_prid) +{ + if (clusters_count > CLUSTERS_MAX) { + return false; + } + for (uint32_t c = 0; c < CPUINFO_COUNT_OF(cluster_configs); c++) { + if (cluster_configs[c].series != chipset->series) { + continue; + } + /* Verify that the total number of cores and clusters of cores matches expectation */ + if (cluster_configs[c].cores != processors_count || cluster_configs[c].clusters != clusters_count) { + return false; + } + + /* Verify that core cluster configuration matches expectation */ + for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { + const uint32_t cluster_leader = cluster_leaders[cluster]; + if (cluster_configs[c].cluster_cores[cluster] != processors[cluster_leader].package_processor_count) { + return false; + } + } + + if (verify_prid) { + /* Verify known parts of PRID */ + for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { + const uint32_t cluster_leader = cluster_leaders[cluster]; + + /* Create a mask of known prid bits */ + uint32_t prid_mask = 0; + if (processors[cluster_leader].flags & CPUINFO_LOONGARCH_LINUX_VALID_REVISION) { + prid_mask |= CPUINFO_LOONGARCH_PRID_PRODUCT_MASK; + } + + /* Verify the bits under the mask */ + if ((processors[cluster_leader].prid ^ cluster_configs[c].cluster_prid[cluster]) & prid_mask) { + cpuinfo_log_debug("parsed PRID of cluster %08"PRIu32" does not match tabulated value %08"PRIu32, + processors[cluster_leader].prid, cluster_configs[c].cluster_prid[cluster]); + return false; + } + } + } + + /* Assign PRIDs according to tabulated configurations */ + for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { + const uint32_t cluster_leader = cluster_leaders[cluster]; + processors[cluster_leader].prid = cluster_configs[c].cluster_prid[cluster]; + processors[cluster_leader].flags |= CPUINFO_LOONGARCH_LINUX_VALID_PRID; + cpuinfo_log_debug("cluster %"PRIu32" PRID = 0x%08"PRIx32, cluster, cluster_configs[c].cluster_prid[cluster]); + } + return true; + } + return false; +} + + +/* + * Initializes PRID for leaders of core clusters in a single sequential scan: + * - Clusters preceding the first reported PRID value are assumed to have default PRID value. + * - Clusters following any reported PRID value to have that PRID value. + * + * @param default_prid - PRID value that will be assigned to cluster leaders preceding any reported PRID value. + * @param processors_count - number of logical processor descriptions in the @p processors array. + * @param[in,out] processors - array of logical processor descriptions with pre-parsed PRID, maximum frequency, + * and decoded core cluster (package_leader_id) information. + * Upon successful return, processors[i].prid for all core clusters' leaders contains + * the assigned PRID value. + */ +static void cpuinfo_loongarch_linux_detect_cluster_prid_by_sequential_scan( + uint32_t default_prid, + uint32_t processors_count, + struct cpuinfo_loongarch_linux_processor processors[restrict static processors_count]) +{ + uint32_t prid = default_prid; + for (uint32_t i = 0; i < processors_count; i++) { + if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (processors[i].package_leader_id == i) { + if (bitmask_all(processors[i].flags,CPUINFO_LOONGARCH_LINUX_VALID_PRID)) { + prid = processors[i].prid; + } else { + cpuinfo_log_info("assume processor %"PRIu32" to have PRID %08"PRIx32, i, prid); + /* To be consistent, we copy the PRID entirely, rather than by parts */ + processors[i].prid = prid; + processors[i].flags |=CPUINFO_LOONGARCH_LINUX_VALID_PRID; + } + } + } + } +} + +/* + * Detects PRID of each CPU core clusters' leader. + * + * @param[in] chipset - chipset (SoC) name information. + * @param max_processors - number of processor descriptions in the @p processors array. + * @param usable_processors - number of processor descriptions in the @p processors array with both POSSIBLE and + * PRESENT flags. + * @param[in,out] processors - array of logical processor descriptions with pre-parsed PRID, maximum frequency, + * and decoded core cluster (package_leader_id) information. + * Upon return, processors[i].prid for all clusters' leaders contains the PRID value. + * + * @returns The number of core clusters + */ +uint32_t cpuinfo_loongarch_linux_detect_cluster_prid( + const struct cpuinfo_loongarch_chipset chipset[restrict static 1], + uint32_t max_processors, + uint32_t usable_processors, + struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]) +{ + uint32_t clusters_count = 0; + uint32_t cluster_leaders[CLUSTERS_MAX]; + uint32_t last_processor_in_cpuinfo = max_processors; + uint32_t last_processor_with_prid = max_processors; + uint32_t processors_with_prid_count = 0; + for (uint32_t i = 0; i < max_processors; i++) { + if (!bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + continue; + } + if (processors[i].flags & CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR) { + last_processor_in_cpuinfo = i; + } + + const uint32_t group_leader = processors[i].package_leader_id; + if (group_leader == i) { + if (clusters_count < CLUSTERS_MAX) { + cluster_leaders[clusters_count] = i; + } + clusters_count += 1; + } else { + /* Copy known bits of information to cluster leader */ + if (!bitmask_all(processors[group_leader].flags,CPUINFO_LOONGARCH_LINUX_VALID_PRID) && + bitmask_all(processors[i].flags,CPUINFO_LOONGARCH_LINUX_VALID_PRID)) + { + processors[group_leader].prid = processors[i].prid; + processors[group_leader].flags |=CPUINFO_LOONGARCH_LINUX_VALID_PRID; + } + } + } + cpuinfo_log_debug("detected %"PRIu32" core clusters", clusters_count); + + /* + * Two relations between reported /proc/cpuinfo information, and cores is possible: + * - /proc/cpuinfo reports information for all or some of the cores below the corresponding + * "processor : " lines. Information on offline cores may be missing. + * - /proc/cpuinfo reports information only once, after all "processor : " lines. + * The reported information may relate to processor #0 or to the processor which + * executed the system calls to read /proc/cpuinfo. It is also indistinguishable + * from /proc/cpuinfo reporting information only for the last core (e.g. if all other + * cores are offline). + * + * We detect the second case by checking if /proc/cpuinfo contains valid PRID only for one, + * last reported, processor. Note, that the last reported core may be not the last + * present & possible processor, as /proc/cpuinfo may non-report high-index offline cores. + */ + + if (processors_with_prid_count < usable_processors) { + /* + * /proc/cpuinfo reported PRID only for some processors, and probably some core clusters do not have PRID + * for any of the cores. Check if this is the case. + */ + uint32_t clusters_with_prid_count = 0; + for (uint32_t i = 0; i < max_processors; i++) { + if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID |CPUINFO_LOONGARCH_LINUX_VALID_PRID)) { + if (processors[i].package_leader_id == i) { + clusters_with_prid_count += 1; + } + } + } + + if (clusters_with_prid_count < clusters_count) { + /* + * /proc/cpuinfo reported PRID only for some clusters, need to reconstruct others. + * We make three attempts to detect PRID for clusters without it: + * 1. Search tabulated PRID values for chipsets which have heterogeneous clusters and ship with Linux + * kernels which do not always report all cores in /proc/cpuinfo. If found, use the tabulated values. + * 2. For systems with 2 clusters and PRID known for one cluster, assume big.LITTLE configuration, + * and estimate PRID for the other cluster under assumption that PRID for the big cluster is known. + * 3. Initialize PRIDs for core clusters in a single sequential scan: + * - Clusters preceding the first reported PRID value are assumed to have the last reported PRID value. + * - Clusters following any reported PRID value to have that PRID value. + */ + + if (cpuinfo_loongarch_linux_detect_cluster_prid_by_chipset( + chipset, clusters_count, cluster_leaders, usable_processors, processors, true)) + { + return clusters_count; + } + + if (last_processor_with_prid != max_processors) { + /* Fall back to sequential initialization of PRID values for core clusters */ + cpuinfo_loongarch_linux_detect_cluster_prid_by_sequential_scan( + processors[processors[last_processor_with_prid].package_leader_id].prid, + max_processors, processors); + } + } + } + return clusters_count; +} diff --git a/src/loongarch/prid.h b/src/loongarch/prid.h new file mode 100644 index 00000000..57841ea7 --- /dev/null +++ b/src/loongarch/prid.h @@ -0,0 +1,65 @@ +#pragma once +#include + +/* + * LoongArch can get PRID by `cpucfg 0` on LoongArch expect LA32R. + * This file support interface to construct PRID. + * + * Linux: arch/loongarch/include/asm/cpu.h + * As described in LoongArch specs from Loongson Technology, the PRID register + * (CPUCFG.00) has the following layout: + * + * +---------------+----------------+------------+--------------------+ + * | Reserved | Company ID | Series ID | Product ID | + * +---------------+----------------+------------+--------------------+ + * 31 24 23 16 15 12 11 0 + */ + +#define CPUINFO_LOONGARCH_PRID_COMPANY_MASK UINT32_C(0x00FF0000) +#define CPUINFO_LOONGARCH_PRID_SERIES_MASK UINT32_C(0x0000F000) +#define CPUINFO_LOONGARCH_PRID_PRODUCT_MASK UINT32_C(0x00000FFF) + +#define CPUINFO_LOONGARCH_PRID_COMPANY_OFFSET 16 +#define CPUINFO_LOONGARCH_PRID_SERIES_OFFSET 12 +#define CPUINFO_LOONGARCH_PRID_PRODUCT_OFFSET 0 + +#define PRID_COMP_LOONGSON 0x140000 + +enum prid_company { + prid_company_loongson = 0x14, /* Loongson Technology */ +}; + +enum prid_series { + prid_series_la132 = 0x8, /* Loongson 32bit */ + prid_series_la264 = 0xa, /* Loongson 64bit, 2-issue */ + prid_series_la364 = 0xb, /* Loongson 64bit, 3-issue */ + prid_series_la464 = 0xc, /* Loongson 64bit, 4-issue */ + prid_series_la664 = 0xd, /* Loongson 64bit, 6-issue */ +}; + +inline static uint32_t prid_set_companyID(uint32_t prid, uint32_t companyID) { + return (prid & ~CPUINFO_LOONGARCH_PRID_COMPANY_MASK) | + ((companyID << CPUINFO_LOONGARCH_PRID_COMPANY_OFFSET) & CPUINFO_LOONGARCH_PRID_COMPANY_MASK); +} + +inline static uint32_t prid_set_seriesID(uint32_t prid, uint32_t seriesID) { + return (prid & ~CPUINFO_LOONGARCH_PRID_SERIES_MASK) | + ((seriesID << CPUINFO_LOONGARCH_PRID_SERIES_OFFSET) & CPUINFO_LOONGARCH_PRID_SERIES_MASK); +} + +inline static uint32_t prid_set_productID(uint32_t prid, uint32_t productID) { + return (prid & ~CPUINFO_LOONGARCH_PRID_PRODUCT_MASK) | + ((productID << CPUINFO_LOONGARCH_PRID_PRODUCT_OFFSET) & CPUINFO_LOONGARCH_PRID_PRODUCT_MASK); +} + +inline static uint32_t prid_get_companyID(uint32_t prid) { + return (prid & CPUINFO_LOONGARCH_PRID_COMPANY_MASK) >> CPUINFO_LOONGARCH_PRID_COMPANY_OFFSET; +} + +inline static uint32_t prid_get_seriesID(uint32_t prid) { + return (prid & CPUINFO_LOONGARCH_PRID_SERIES_MASK) >> CPUINFO_LOONGARCH_PRID_SERIES_OFFSET; +} + +inline static uint32_t prid_get_productID(uint32_t prid) { + return (prid & CPUINFO_LOONGARCH_PRID_PRODUCT_MASK) >> CPUINFO_LOONGARCH_PRID_PRODUCT_OFFSET; +} diff --git a/src/loongarch/uarch.c b/src/loongarch/uarch.c index 7bfad3af..6cd7382e 100644 --- a/src/loongarch/uarch.c +++ b/src/loongarch/uarch.c @@ -1,28 +1,53 @@ #include #include -#include +#include #include - -void cpuinfo_loongarch_decode_vendor_uarch( - uint32_t cpucfg, - enum cpuinfo_vendor vendor[restrict static 1], - enum cpuinfo_uarch uarch[restrict static 1]) +static void cpuinfo_loongarch_decode_vendor( + uint32_t companyID, + enum cpuinfo_vendor vendor[restrict static 1]) { - - switch (cpucfg_get_companyID(cpucfg)) { - case 0x14: + switch (companyID) { + case prid_company_loongson: *vendor = cpuinfo_vendor_loongson; - switch(cpucfg_get_processorID(cpucfg)){ - case 0xc0: - *uarch = cpuinfo_uarch_LA464; - break; - } break; default: - //not match verify vendor and uarch *vendor = cpuinfo_vendor_unknown; + break; + } +} + +static void cpuinfo_loongarch_decode_uarch( + uint32_t seriesID, + enum cpuinfo_uarch uarch[restrict static 1]) +{ + switch (seriesID) { + case prid_series_la264: + *uarch = cpuinfo_uarch_LA264; + break; + case prid_series_la364: + *uarch = cpuinfo_uarch_LA364; + break; + case prid_series_la464: + *uarch = cpuinfo_uarch_LA464; + break; + case prid_series_la664: + *uarch = cpuinfo_uarch_LA664; + break; + default: *uarch = cpuinfo_uarch_unknown; + break; } } + +void cpuinfo_loongarch_decode_vendor_uarch( + uint32_t prid, + enum cpuinfo_vendor vendor[restrict static 1], + enum cpuinfo_uarch uarch[restrict static 1]) +{ + cpuinfo_loongarch_decode_vendor( + prid_get_companyID(prid), vendor); + cpuinfo_loongarch_decode_uarch( + prid_get_seriesID(prid), uarch); +} From 77a18b8f72b0f028eff3f7118e32ee0e7a96f678 Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Tue, 21 Jan 2025 14:09:45 +0800 Subject: [PATCH 6/9] Add try_set_prid_by_cpucfg to set prid for processor --- src/loongarch/cpucfg.h | 19 +++++++++ src/loongarch/linux/api.h | 6 +-- src/loongarch/linux/cpuinfo.c | 79 +++++++++++++++++------------------ src/loongarch/linux/init.c | 38 +++++++++++------ 4 files changed, 85 insertions(+), 57 deletions(-) create mode 100644 src/loongarch/cpucfg.h diff --git a/src/loongarch/cpucfg.h b/src/loongarch/cpucfg.h new file mode 100644 index 00000000..1f81f4ca --- /dev/null +++ b/src/loongarch/cpucfg.h @@ -0,0 +1,19 @@ +#pragma once +#include + +#if defined(__GNUC__) +#include +#endif + +enum { + CPUCFG_REG_PRID = 0, +}; + +static inline bool cpucfg(uint32_t reg, uint32_t *value) { +#if defined(__GNUC__) + *value = __cpucfg(reg); + return true; +#else + return false; +#endif +} diff --git a/src/loongarch/linux/api.h b/src/loongarch/linux/api.h index 1f39b82d..9c29aee9 100644 --- a/src/loongarch/linux/api.h +++ b/src/loongarch/linux/api.h @@ -11,8 +11,6 @@ /* No hard limit in the kernel, maximum length observed on non-rogue kernels is 64 */ #define CPUINFO_HARDWARE_VALUE_MAX 64 -/* No hard limit in the kernel, maximum length on Raspberry Pi is 8. Add 1 symbol to detect overly large revision strings */ -#define CPUINFO_REVISION_VALUE_MAX 9 #if CPUINFO_ARCH_LOONGARCH64 /* Linux: arch/loongarch/include/uapi/asm/hwcap.h */ @@ -34,12 +32,12 @@ #endif #define CPUINFO_LOONGARCH_LINUX_VALID_COMPANYID UINT32_C(0x00010000) -#define CPUINFO_LOONGARCH_LINUX_VALID_PROCESSORID UINT32_C(0x00020000) +#define CPUINFO_LOONGARCH_LINUX_VALID_SERIESID UINT32_C(0x00020000) #define CPUINFO_LOONGARCH_LINUX_VALID_REVISION UINT32_C(0x00040000) #define CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR UINT32_C(0x00200000) #define CPUINFO_LOONGARCH_LINUX_VALID_FEATURES UINT32_C(0x00400000) #define CPUINFO_LOONGARCH_LINUX_VALID_INFO UINT32_C(0x007F0000) -#define CPUINFO_LOONGARCH_LINUX_VALID_PRID UINT32_C(0x003F0000) +#define CPUINFO_LOONGARCH_LINUX_VALID_PRID CPUINFO_LOONGARCH_LINUX_VALID_COMPANYID | CPUINFO_LOONGARCH_LINUX_VALID_SERIESID | CPUINFO_LOONGARCH_LINUX_VALID_REVISION struct cpuinfo_loongarch_linux_processor { uint32_t architecture_version; diff --git a/src/loongarch/linux/cpuinfo.c b/src/loongarch/linux/cpuinfo.c index be327494..1f613400 100644 --- a/src/loongarch/linux/cpuinfo.c +++ b/src/loongarch/linux/cpuinfo.c @@ -15,18 +15,17 @@ */ #define BUFFER_SIZE 1024 -struct cpuinfo_loongarch_processorID{ - const char* name; - uint32_t processorID; +struct cpuinfo_loongarch_seriesID { + const char* prefix; + enum prid_series seriesID; }; - -static const struct cpuinfo_loongarch_processorID loongson_name_map_processorID[] = { - { - /* "3A5000" -> 0xc0 */ - .name = "3A5000", - .processorID = 0xc0, - }, +static const struct cpuinfo_loongarch_seriesID loongson_name_map_seriesID[] = { + { .prefix = "3A5000", .seriesID = prid_series_la464, }, + { .prefix = "3C5000", .seriesID = prid_series_la464, }, + { .prefix = "3D5000", .seriesID = prid_series_la464, }, + { .prefix = "3A6000", .seriesID = prid_series_la664, }, + { .prefix = "3C6000", .seriesID = prid_series_la664, }, }; @@ -69,7 +68,7 @@ static void parse_features( const char* feature_end; /* Mark the features as valid */ - processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_FEATURES | CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR; + processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_FEATURES; do { feature_end = feature_start + 1; @@ -150,7 +149,7 @@ static void parse_features( processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_COMPLEX; } else if (memcmp(feature_start, "lbt_x86", feature_length) == 0) { #if CPUINFO_ARCH_LOONGARCH64 - processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_MIPS; + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_X86; #endif } else if (memcmp(feature_start, "lbt_arm", feature_length) == 0) { #if CPUINFO_ARCH_LOONGARCH64 @@ -163,11 +162,12 @@ static void parse_features( case 8: if (memcmp(feature_start, "lbt_mips", feature_length) == 0) { #if CPUINFO_ARCH_LOONGARCH64 - processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_X86; + processor->features |= CPUINFO_LOONGARCH_LINUX_FEATURE_LBT_MIPS; #endif } else { goto unexpected; } + break; default: unexpected: cpuinfo_log_warning("unexpected /proc/cpuinfo feature \"%.*s\" is ignored", @@ -196,20 +196,17 @@ static bool parse_loongson(const char* name_start, size_t length){ return true; } -static void parse_processorID(const char* name_start, size_t length, int* processorID){ - /* expected 3A5000 or 3C5000L or other , its length is 6 or 7 */ - if(length != 6 && length != 7) return ; - char cpy[] = ""; - for (size_t i = 0; i < CPUINFO_COUNT_OF(loongson_name_map_processorID); i++) { - - if (strncmp(loongson_name_map_processorID[i].name, strncpy(cpy, name_start,length), length) == 0) - { - cpuinfo_log_debug( - "found /proc/cpuinfo model name second string \"%.*s\" in loongson processorID table", - (int) length, name_start); - /* Create chipset name from entry */ - *processorID = loongson_name_map_processorID[i].processorID; - } +static void parse_seriesID(const char* name_start, size_t length, int* seriesID){ + for (size_t i = 0; i < CPUINFO_COUNT_OF(loongson_name_map_seriesID); i++) { + const struct cpuinfo_loongarch_seriesID *cur = &loongson_name_map_seriesID[i]; + if (strncmp(cur->prefix, name_start, strlen(cur->prefix))) + continue; + cpuinfo_log_debug( + "found /proc/cpuinfo model name second string \"%.*s\" in loongson seriesID table", + (int) length, name_start); + /* Create chipset name from entry */ + *seriesID = cur->seriesID; + break; } } @@ -252,25 +249,26 @@ static void parse_model_name( (int) name_length, separator + 1, model_name_end); return; } - uint32_t prid_companyID = 0; - uint32_t prid_processorID = 0; /* Verify the presence of hex prefix */ bool is_loongson = parse_loongson(model_name_start, model_length); if (is_loongson) { - prid_companyID = 0x14; - processor->prid = prid_set_companyID(processor->prid, prid_companyID); - processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_COMPANYID | CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR; - }else{ + processor->prid = prid_set_companyID(processor->prid, prid_company_loongson); + processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_COMPANYID; + } else { cpuinfo_log_warning("Model %.*s in /proc/cpuinfo is ignored due to unexpected words", (int) model_length, model_name_start); return; } - parse_processorID(separator + 1, name_length, &prid_processorID); - processor->prid = prid_set_seriesID(processor->prid, prid_processorID); - processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_PROCESSORID | CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR; + + uint32_t prid_seriesID = 0; + + parse_seriesID(separator + 1, name_length, &prid_seriesID); + processor->prid = prid_set_seriesID(processor->prid, prid_seriesID); + processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_SERIESID; } + static void parse_cpu_revision( const char* cpu_revision_start, const char* cpu_revision_end, @@ -321,7 +319,7 @@ static void parse_cpu_revision( } processor->prid = prid_set_productID(processor->prid, cpu_revision); - processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_REVISION | CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR; + processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_REVISION; } static void parse_package( @@ -520,7 +518,6 @@ static bool parse_line( new_processor_index, max_processors_count - 1); } state->processor_index = new_processor_index; - processors[new_processor_index].prid = new_processor_index; return true; } else if (strncasecmp(line_start, "global_id", key_length) == 0) { /* global_id is useless, don't parse */ @@ -532,7 +529,8 @@ static bool parse_line( if (strncasecmp(line_start, "cpu family", key_length) == 0) { /* cpu family is presently useless, don't parse */ } else if (strncasecmp(line_start, "model name", key_length) == 0) { - parse_model_name(value_start,value_end,state->hardware,processor); + if (!(processor->flags & (CPUINFO_LOONGARCH_LINUX_VALID_COMPANYID | CPUINFO_LOONGARCH_LINUX_VALID_SERIESID))) + parse_model_name(value_start,value_end,state->hardware,processor); } else { goto unknown; } @@ -548,7 +546,8 @@ static bool parse_line( break; case 12: if (strncasecmp(line_start, "CPU Revision", key_length) == 0) { - /* CPU Revision is presently useless, don't parse */ + if (!(processor->flags & CPUINFO_LOONGARCH_LINUX_VALID_REVISION)) + parse_cpu_revision(value_start, value_end, processor); } else if (strncasecmp(line_start, "FPU Revision", key_length) == 0) { /* FPU Revision is presently useless, don't parse */ } else { diff --git a/src/loongarch/linux/init.c b/src/loongarch/linux/init.c index f36cf4d4..d3ad031a 100644 --- a/src/loongarch/linux/init.c +++ b/src/loongarch/linux/init.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -76,8 +77,21 @@ static int cmp_loongarch_linux_processor(const void* ptr_a, const void* ptr_b) { } -void cpuinfo_loongarch_linux_init(void) { +static void try_set_prid_by_cpucfg( + struct cpuinfo_loongarch_linux_processor *processors, + uint32_t count) +{ + uint32_t prid; + if (!cpucfg(CPUCFG_REG_PRID, &prid)) { + return; + } + for (uint32_t i = 0; i < count; i++) { + processors[i].prid = prid; + processors[i].flags |= CPUINFO_LOONGARCH_LINUX_VALID_PRID; + } +} +void cpuinfo_loongarch_linux_init(void) { struct cpuinfo_loongarch_linux_processor* loongarch_linux_processors = NULL; struct cpuinfo_processor* processors = NULL; struct cpuinfo_core* cores = NULL; @@ -139,7 +153,17 @@ void cpuinfo_loongarch_linux_init(void) { CPUINFO_LINUX_FLAG_PRESENT); } + #if CPUINFO_ARCH_LOONGARCH64 + /* Populate ISA structure with hwcap information. */ + cpuinfo_loongarch64_linux_decode_isa_from_hwcap(&cpuinfo_isa); + if (cpuinfo_isa.cpucfg) { + try_set_prid_by_cpucfg(loongarch_linux_processors, loongarch_linux_processors_count); + } + #endif + + /* Populate processor information. */ char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX]; + uint32_t valid_processors = 0; if (!cpuinfo_loongarch_linux_parse_proc_cpuinfo( proc_cpuinfo_hardware, @@ -154,9 +178,6 @@ void cpuinfo_loongarch_linux_init(void) { loongarch_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_VALID; } } - - - uint32_t valid_processors = 0, last_prid = 0; for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { loongarch_linux_processors[i].system_processor_id = i; @@ -171,10 +192,6 @@ void cpuinfo_loongarch_linux_init(void) { cpuinfo_log_info("processor %"PRIu32" is not listed in /proc/cpuinfo", i); } - if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LOONGARCH_LINUX_VALID_PRID)) { - last_prid = loongarch_linux_processors[i].prid; - } - } else { /* Processor reported in /proc/cpuinfo, but not in possible and/or present lists: log and ignore */ if (!(loongarch_linux_processors[i].flags & CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR)) { @@ -186,11 +203,6 @@ void cpuinfo_loongarch_linux_init(void) { const struct cpuinfo_loongarch_chipset chipset = cpuinfo_loongarch_linux_decode_chipset(proc_cpuinfo_hardware); - - #if CPUINFO_ARCH_LOONGARCH64 - cpuinfo_loongarch64_linux_decode_isa_from_hwcap(&cpuinfo_isa); - #endif - for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { if (cpuinfo_linux_get_processor_package_id(i, &loongarch_linux_processors[i].package_id)) { From b495795b16e75775a88f0660af1eae8fabbcecd3 Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Tue, 21 Jan 2025 16:07:30 +0800 Subject: [PATCH 7/9] Add core parse and set smt_id --- src/loongarch/linux/api.h | 2 + src/loongarch/linux/cpuinfo.c | 25 ++++++++- src/loongarch/linux/init.c | 96 ++++++++++++++++++++--------------- 3 files changed, 81 insertions(+), 42 deletions(-) diff --git a/src/loongarch/linux/api.h b/src/loongarch/linux/api.h index 9c29aee9..54e8b204 100644 --- a/src/loongarch/linux/api.h +++ b/src/loongarch/linux/api.h @@ -64,6 +64,8 @@ struct cpuinfo_loongarch_linux_processor { /** Linux processor ID */ uint32_t system_processor_id; + /** CoreID */ + uint32_t core_id; uint32_t flags; }; diff --git a/src/loongarch/linux/cpuinfo.c b/src/loongarch/linux/cpuinfo.c index 1f613400..62e274dd 100644 --- a/src/loongarch/linux/cpuinfo.c +++ b/src/loongarch/linux/cpuinfo.c @@ -322,6 +322,29 @@ static void parse_cpu_revision( processor->flags |= CPUINFO_LOONGARCH_LINUX_VALID_REVISION; } +static void parse_core( + const char* cpu_core_start, + const char* cpu_core_end, + struct cpuinfo_loongarch_linux_processor processor[restrict static 1]) +{ + uint32_t cpu_core = 0; + for (const char* digit_ptr = cpu_core_start; digit_ptr != cpu_core_end; digit_ptr++) { + const uint32_t digit = (uint32_t) (*digit_ptr - '0'); + + /* Verify that the character in core is a decimal digit */ + if (digit >= 10) { + cpuinfo_log_warning("core %.*s in /proc/cpuinfo is ignored due to unexpected non-digit character '%c' at offset %zu", + (int) (cpu_core_end - cpu_core_start), cpu_core_start, + *digit_ptr, (size_t) (digit_ptr - cpu_core_start)); + return; + } + + cpu_core = cpu_core * 10 + digit; + } + + processor->core_id = cpu_core; +} + static void parse_package( const char* cpu_package_start, const char* cpu_package_end, @@ -472,7 +495,7 @@ static bool parse_line( break; case 4: if (strncasecmp(line_start, "core", key_length) == 0) { - /* core is presently useless, don't parse */ + parse_core(value_start, value_end, processor); } else { goto unknown; } diff --git a/src/loongarch/linux/init.c b/src/loongarch/linux/init.c index d3ad031a..381011c9 100644 --- a/src/loongarch/linux/init.c +++ b/src/loongarch/linux/init.c @@ -67,14 +67,12 @@ static int cmp_loongarch_linux_processor(const void* ptr_a, const void* ptr_b) { if (usable_a != usable_b) { return (int) usable_b - (int) usable_a; } - + /* Compare based on processsor ID (i.e. processor 0 < processor 1) */ const uint32_t pro_a = processor_a->system_processor_id; const uint32_t pro_b = processor_b->system_processor_id; - - return cmp(pro_a,pro_b); - + return cmp(pro_a, pro_b); } static void try_set_prid_by_cpucfg( @@ -115,7 +113,7 @@ void cpuinfo_loongarch_linux_init(void) { cpuinfo_linux_get_max_present_processor(max_processors_count); cpuinfo_log_debug("maximum present processors count: %"PRIu32, max_present_processors_count); - uint32_t valid_processor_mask = 0; + uint32_t valid_processor_mask = CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR; uint32_t loongarch_linux_processors_count = max_processors_count; if (max_present_processors_count != 0) { loongarch_linux_processors_count = min(loongarch_linux_processors_count, max_present_processors_count); @@ -163,7 +161,7 @@ void cpuinfo_loongarch_linux_init(void) { /* Populate processor information. */ char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX]; - uint32_t valid_processors = 0; + uint32_t valid_processors = 0, core_count = 0, last_core_id_1 = UINT32_MAX; if (!cpuinfo_loongarch_linux_parse_proc_cpuinfo( proc_cpuinfo_hardware, @@ -172,32 +170,28 @@ void cpuinfo_loongarch_linux_init(void) { cpuinfo_log_error("failed to parse processor information from /proc/cpuinfo"); return; } - - for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { - if (bitmask_all(loongarch_linux_processors[i].flags, valid_processor_mask)) { - loongarch_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_VALID; - } - } for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { - loongarch_linux_processors[i].system_processor_id = i; - if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (bitmask_all(loongarch_linux_processors[i].flags, valid_processor_mask)) { valid_processors += 1; - - if (!(loongarch_linux_processors[i].flags & CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR)) { - /* - * Processor is in possible and present lists, but not reported in /proc/cpuinfo. - * This is fairly common: high-index processors can be not reported if they are offline. - */ - cpuinfo_log_info("processor %"PRIu32" is not listed in /proc/cpuinfo", i); - } - - } else { - /* Processor reported in /proc/cpuinfo, but not in possible and/or present lists: log and ignore */ - if (!(loongarch_linux_processors[i].flags & CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR)) { - cpuinfo_log_warning("invalid processor %"PRIu32" reported in /proc/cpuinfo", i); + loongarch_linux_processors[i].system_processor_id = i; + loongarch_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_VALID; + if (loongarch_linux_processors[i].core_id != last_core_id_1) { + core_count += 1; + last_core_id_1 = loongarch_linux_processors[i].core_id; } + continue; + } + if (!(loongarch_linux_processors[i].flags & CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR)) { + /* + * Processor is in possible and present lists, but not reported in /proc/cpuinfo. + * This is fairly common: high-index processors can be not reported if they are offline. + */ + cpuinfo_log_info("processor %"PRIu32" is not listed in /proc/cpuinfo", i); + continue; } + /* Processor reported in /proc/cpuinfo, but not in possible and/or present lists: log and ignore */ + cpuinfo_log_warning("invalid processor %"PRIu32" reported in /proc/cpuinfo", i); } const struct cpuinfo_loongarch_chipset chipset = @@ -298,7 +292,7 @@ void cpuinfo_loongarch_linux_init(void) { cpuinfo_loongarch_chipset_to_string(&chipset, package.name); package.processor_count = valid_processors; - package.core_count = valid_processors; + package.core_count = core_count; package.cluster_count = cluster_count; processors = calloc(valid_processors, sizeof(struct cpuinfo_processor)); @@ -383,17 +377,28 @@ void cpuinfo_loongarch_linux_init(void) { uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX; + uint32_t smt_id = 0, core_index = UINT32_MAX, last_core_id = UINT32_MAX; /* Indication whether L3 (if it exists) is shared between all cores */ bool shared_l3 = true; /* Populate cache information structures in l1i, l1d */ - for (uint32_t i = 0; i < valid_processors; i++) { + for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { + if (!bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + continue; + } + + const uint32_t core_id = loongarch_linux_processors[i].core_id; + smt_id++; + if (last_core_id != core_id) { + core_index++; + smt_id = 0; + } + if (loongarch_linux_processors[i].package_leader_id == loongarch_linux_processors[i].system_processor_id) { cluster_id += 1; clusters[cluster_id] = (struct cpuinfo_cluster) { .processor_start = i, .processor_count = loongarch_linux_processors[i].package_processor_count, .core_start = i, - .core_count = loongarch_linux_processors[i].package_processor_count, .cluster_id = cluster_id, .package = &package, .vendor = loongarch_linux_processors[i].vendor, @@ -401,8 +406,8 @@ void cpuinfo_loongarch_linux_init(void) { }; } - processors[i].smt_id = 0; - processors[i].core = cores + i; + processors[i].smt_id = smt_id; + processors[i].core = cores + core_index; processors[i].cluster = clusters + cluster_id; processors[i].package = &package; processors[i].linux_id = (int) loongarch_linux_processors[i].system_processor_id; @@ -410,15 +415,24 @@ void cpuinfo_loongarch_linux_init(void) { processors[i].cache.l1d = l1d + i; linux_cpu_to_processor_map[loongarch_linux_processors[i].system_processor_id] = &processors[i]; - cores[i].processor_start = i; - cores[i].processor_count = 1; - cores[i].core_id = i; - cores[i].cluster = clusters + cluster_id; - cores[i].package = &package; - cores[i].vendor = loongarch_linux_processors[i].vendor; - cores[i].uarch = loongarch_linux_processors[i].uarch; - cores[i].prid = loongarch_linux_processors[i].prid; - linux_cpu_to_core_map[loongarch_linux_processors[i].system_processor_id] = &cores[i]; + if (last_core_id != core_id) { + cores[core_index] = (struct cpuinfo_core){ + .processor_start = i, + .processor_count = 1, + .core_id = core_id, + .cluster = clusters + cluster_id, + .package = &package, + .vendor = loongarch_linux_processors[i].vendor, + .uarch = loongarch_linux_processors[i].uarch, + .prid = loongarch_linux_processors[i].prid, + }; + last_core_id = core_id; + clusters[cluster_id].core_count += 1; + } else { + /* another logical processor on the same core */ + cores[core_index].processor_count++; + } + linux_cpu_to_core_map[loongarch_linux_processors[i].system_processor_id] = &cores[core_index]; if (linux_cpu_to_uarch_index_map != NULL) { linux_cpu_to_uarch_index_map[loongarch_linux_processors[i].system_processor_id] = From 40659b816ebadf17d703716df40a51d3f766583c Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Wed, 22 Jan 2025 12:44:33 +0800 Subject: [PATCH 8/9] Fix cache stage 1 --- CMakeLists.txt | 4 +- configure.py | 3 +- src/loongarch/api.h | 41 ++------ src/loongarch/cache.c | 135 ------------------------ src/loongarch/cpucfg.h | 34 +++++- src/loongarch/linux/api.h | 19 +++- src/loongarch/linux/cache.c | 203 ++++++++++++++++++++++++++++++++++++ src/loongarch/linux/init.c | 99 ++++++++++++++++-- 8 files changed, 354 insertions(+), 184 deletions(-) delete mode 100644 src/loongarch/cache.c create mode 100644 src/loongarch/linux/cache.c diff --git a/CMakeLists.txt b/CMakeLists.txt index d67e93f8..a8a1999c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -226,8 +226,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM) ENDIF() ELSEIF(CPUINFO_TARGET_PROCESSOR MATCHES "^(loongarch64)$") LIST(APPEND CPUINFO_SRCS - src/loongarch/uarch.c - src/loongarch/cache.c) + src/loongarch/uarch.c) IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") LIST(APPEND CPUINFO_SRCS src/loongarch/linux/init.c @@ -235,6 +234,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM) src/loongarch/linux/clusters.c src/loongarch/linux/chipset.c src/loongarch/linux/prid.c + src/loongarch/linux/cache.c src/loongarch/linux/loongarch64-isa.c) ENDIF() ENDIF() diff --git a/configure.py b/configure.py index e46283b0..858f0588 100755 --- a/configure.py +++ b/configure.py @@ -64,13 +64,14 @@ def main(args): ] if build.target.is_loongarch64: - sources += ["loongarch/uarch.c", "loongarch/cache.c"] + sources += ["loongarch/uarch.c"] if build.target.is_linux: sources += [ "loongarch/linux/init.c", "loongarch/linux/cpuinfo.c", "loongarch/linux/clusters.c", "loongarch/linux/prid.c", + "loongarch/linux/cache.c", "loongarch/linux/chipset.c", "loongarch/linux/loongarch64-isa.c", ] diff --git a/src/loongarch/api.h b/src/loongarch/api.h index 66ccfc49..03796b6a 100644 --- a/src/loongarch/api.h +++ b/src/loongarch/api.h @@ -31,37 +31,14 @@ struct cpuinfo_loongarch_chipset { #define CPUINFO_LOONGARCH_CHIPSET_NAME_MAX CPUINFO_PACKAGE_NAME_MAX -#ifndef __cplusplus - CPUINFO_INTERNAL void cpuinfo_loongarch_chipset_to_string( - const struct cpuinfo_loongarch_chipset chipset[restrict static 1], - char name[restrict static CPUINFO_LOONGARCH_CHIPSET_NAME_MAX]); +CPUINFO_INTERNAL void cpuinfo_loongarch_chipset_to_string( + const struct cpuinfo_loongarch_chipset chipset[restrict static 1], + char name[restrict static CPUINFO_LOONGARCH_CHIPSET_NAME_MAX]); - CPUINFO_INTERNAL void cpuinfo_loongarch_fixup_chipset( - struct cpuinfo_loongarch_chipset chipset[restrict static 1], uint32_t cores); +CPUINFO_INTERNAL void cpuinfo_loongarch_fixup_chipset( + struct cpuinfo_loongarch_chipset chipset[restrict static 1], uint32_t cores); - CPUINFO_INTERNAL void cpuinfo_loongarch_decode_vendor_uarch( - uint32_t prid, - enum cpuinfo_vendor vendor[restrict static 1], - enum cpuinfo_uarch uarch[restrict static 1]); - - CPUINFO_INTERNAL void cpuinfo_loongarch_decode_cache( - enum cpuinfo_uarch uarch, - uint32_t cluster_cores, - uint32_t arch_version, - struct cpuinfo_cache l1i[restrict static 1], - struct cpuinfo_cache l1d[restrict static 1], - struct cpuinfo_cache l2[restrict static 1], - struct cpuinfo_cache l3[restrict static 1]); - - CPUINFO_INTERNAL uint32_t cpuinfo_loongarch_compute_max_cache_size( - const struct cpuinfo_processor processor[restrict static 1]); -#else /* defined(__cplusplus) */ - CPUINFO_INTERNAL void cpuinfo_loongarch_decode_cache( - enum cpuinfo_uarch uarch, - uint32_t cluster_cores, - uint32_t arch_version, - struct cpuinfo_cache l1i[1], - struct cpuinfo_cache l1d[1], - struct cpuinfo_cache l2[1], - struct cpuinfo_cache l3[1]); -#endif +CPUINFO_INTERNAL void cpuinfo_loongarch_decode_vendor_uarch( + uint32_t prid, + enum cpuinfo_vendor vendor[restrict static 1], + enum cpuinfo_uarch uarch[restrict static 1]); diff --git a/src/loongarch/cache.c b/src/loongarch/cache.c deleted file mode 100644 index 04ca9e1c..00000000 --- a/src/loongarch/cache.c +++ /dev/null @@ -1,135 +0,0 @@ -#include - -#include -#include -#include -#include -#include - - -void cpuinfo_loongarch_decode_cache( - enum cpuinfo_uarch uarch, - uint32_t cluster_cores, - uint32_t arch_version, - struct cpuinfo_cache l1i[restrict static 1], - struct cpuinfo_cache l1d[restrict static 1], - struct cpuinfo_cache l2[restrict static 1], - struct cpuinfo_cache l3[restrict static 1]) -{ - switch (uarch) { - /* - * Loongarch 3A5000 Core Technical Reference Manual - * Loongarch 3A5000. About the L1 memory system - * The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB. - * - * Loongarch 3A5000 L1 instruction-side memory system - * The L1 instruction memory system has the following key features: - * - Virtually Indexed, Physically Tagged (VIPT), four-way set-associative instruction cache. - * - Fixed cache line length of 64 bytes. - * - * Loongarch 3A5000 L1 data-side memory system - * The L1 data memory system has the following features: - * - Physically Indexed, Physically Tagged (PIPT), 4-way set-associative L1 data cache. - * - Fixed cache line length of 64 bytes. - * - Pseudo-random cache replacement policy. - * - * Loongarch 3A5000 About the L2 memory system - * The L2 memory subsystem consist of: - * - An 16-way set associative L2 cache with a configurable size of 256KB. - * Cache lines have a fixed length of 64 bytes. - * - * +--------------------+-------+-----------+-----------+-----------+----------+ - * | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | - * +--------------------+-------+-----------+-----------+-----------+----------+ - * | Loongarch 3A5000 | 4(+4) | 64K | 64K | 256K | 16 | - * +--------------------+-------+-----------+-----------+-----------+----------+ - * - */ - case cpuinfo_uarch_LA464: - *l1i = (struct cpuinfo_cache) { - .size = 64 * 1024, - .associativity = 4, - .line_size = 64 - }; - *l1d = (struct cpuinfo_cache) { - .size = 64 * 1024, - .associativity = 4, - .line_size = 64 - }; - *l2 = (struct cpuinfo_cache) { - .size = 256 * 1024, - .associativity = 16, - .line_size = 64 - }; - *l3 = (struct cpuinfo_cache) { - .size = 16 * 1024 * 1024, - .associativity = 16, - .line_size = 64 - }; - default: - cpuinfo_log_warning("loongarch uarch not recognized; using generic cache parameters"); - /* Follow OpenBLAS */ - if (arch_version >= 8) { - *l1i = (struct cpuinfo_cache) { - .size = 32 * 1024, - .associativity = 4, - .line_size = 64 - }; - *l1d = (struct cpuinfo_cache) { - .size = 32 * 1024, - .associativity = 4, - .line_size = 64 - }; - *l2 = (struct cpuinfo_cache) { - .size = cluster_cores * 256 * 1024, - .associativity = 8, - .line_size = 64 - }; - } else { - *l1i = (struct cpuinfo_cache) { - .size = 16 * 1024, - .associativity = 4, - .line_size = 32 - }; - *l1d = (struct cpuinfo_cache) { - .size = 16 * 1024, - .associativity = 4, - .line_size = 32 - }; - if (arch_version >= 7) { - *l2 = (struct cpuinfo_cache) { - .size = cluster_cores * 128 * 1024, - .associativity = 8, - .line_size = 32 - }; - } - } - break; - } - l1i->sets = l1i->size / (l1i->associativity * l1i->line_size); - l1i->partitions = 1; - l1d->sets = l1d->size / (l1d->associativity * l1d->line_size); - l1d->partitions = 1; - if (l2->size != 0) { - l2->sets = l2->size / (l2->associativity * l2->line_size); - l2->partitions = 1; - if (l3->size != 0) { - l3->sets = l3->size / (l3->associativity * l3->line_size); - l3->partitions = 1; - } - } -} - -uint32_t cpuinfo_loongarch_compute_max_cache_size(const struct cpuinfo_processor* processor) { - /* - * There is no precise way to detect cache size on LOONGARCH64, and cache size reported by cpuinfo - * may underestimate the actual cache size. Thus, we use microarchitecture-specific maximum. - */ - switch (processor->core->uarch) { - - case cpuinfo_uarch_LA464: - return 16 * 1024 * 1024; - default: - return 4 * 1024 * 1024; - } -} diff --git a/src/loongarch/cpucfg.h b/src/loongarch/cpucfg.h index 1f81f4ca..2004fe3f 100644 --- a/src/loongarch/cpucfg.h +++ b/src/loongarch/cpucfg.h @@ -5,8 +5,13 @@ #include #endif -enum { +enum cpucfg_regs { CPUCFG_REG_PRID = 0, + CPUCFG_REG_CACHE_BASEINFO = 0x10, + CPUCFG_REG_CACHE_L1_IU, + CPUCFG_REG_CACHE_L1_D, + CPUCFG_REG_CACHE_L2_IU, + CPUCFG_REG_CACHE_L3_IU, }; static inline bool cpucfg(uint32_t reg, uint32_t *value) { @@ -17,3 +22,30 @@ static inline bool cpucfg(uint32_t reg, uint32_t *value) { return false; #endif } + +enum cache_baseinfo_bit { + L1_IU_Present = 0, + L1_IU_Unify, + L1_D_Present, + L2_IU_Present, + L2_IU_Unify, + L2_IU_Private, + L2_IU_Inclusive, + L2_D_Present, + L2_D_Private, + L2_D_Inclusive, + L3_IU_Present, + L3_IU_Unify, + L3_IU_Private, + L3_IU_Inclusive, + L3_D_Present, + L3_D_Private, + L3_D_Inclusive, +}; + +#define CACHE_WAYS_OFFSET 0 +#define CACHE_SETS_OFFSET 16 +#define CACHE_LSIZE_OFFSET 24 +#define CACHE_WAYS_MASK UINT32_C(0x0000FFFF) +#define CACHE_SETS_MASK UINT32_C(0x00FF0000) +#define CACHE_LSIZE_MASK UINT32_C(0x7F000000) diff --git a/src/loongarch/linux/api.h b/src/loongarch/linux/api.h index 54e8b204..77287c59 100644 --- a/src/loongarch/linux/api.h +++ b/src/loongarch/linux/api.h @@ -37,10 +37,14 @@ #define CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR UINT32_C(0x00200000) #define CPUINFO_LOONGARCH_LINUX_VALID_FEATURES UINT32_C(0x00400000) #define CPUINFO_LOONGARCH_LINUX_VALID_INFO UINT32_C(0x007F0000) -#define CPUINFO_LOONGARCH_LINUX_VALID_PRID CPUINFO_LOONGARCH_LINUX_VALID_COMPANYID | CPUINFO_LOONGARCH_LINUX_VALID_SERIESID | CPUINFO_LOONGARCH_LINUX_VALID_REVISION +#define CPUINFO_LOONGARCH_LINUX_VALID_L1I UINT32_C(0x01000000) +#define CPUINFO_LOONGARCH_LINUX_VALID_L1D UINT32_C(0x02000000) +#define CPUINFO_LOONGARCH_LINUX_VALID_L2 UINT32_C(0x04000000) +#define CPUINFO_LOONGARCH_LINUX_VALID_L3 UINT32_C(0x08000000) +#define CPUINFO_LOONGARCH_LINUX_VALID_PRID (CPUINFO_LOONGARCH_LINUX_VALID_COMPANYID | CPUINFO_LOONGARCH_LINUX_VALID_SERIESID | CPUINFO_LOONGARCH_LINUX_VALID_REVISION) +#define CPUINFO_LOONGARCH_LINUX_VALID_CACHE (CPUINFO_LOONGARCH_LINUX_VALID_L1I | CPUINFO_LOONGARCH_LINUX_VALID_L1D | CPUINFO_LOONGARCH_LINUX_VALID_L2 | CPUINFO_LOONGARCH_LINUX_VALID_L3) struct cpuinfo_loongarch_linux_processor { - uint32_t architecture_version; uint32_t features; uint32_t prid; enum cpuinfo_vendor vendor; @@ -66,6 +70,13 @@ struct cpuinfo_loongarch_linux_processor { uint32_t system_processor_id; /** CoreID */ uint32_t core_id; + + /** Cache info */ + struct cpuinfo_cache l1i; + struct cpuinfo_cache l1d; + struct cpuinfo_cache l2; + struct cpuinfo_cache l3; + uint32_t flags; }; @@ -110,5 +121,9 @@ CPUINFO_INTERNAL uint32_t cpuinfo_loongarch_linux_detect_cluster_prid( uint32_t usable_processors, struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]); +CPUINFO_INTERNAL bool cpuinfo_loongarch_linux_parse_cpu_cache( + uint32_t max_processors_count, + struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors_count]); + extern CPUINFO_INTERNAL const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map; extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_to_uarch_index_map_entries; diff --git a/src/loongarch/linux/cache.c b/src/loongarch/linux/cache.c new file mode 100644 index 00000000..225def9d --- /dev/null +++ b/src/loongarch/linux/cache.c @@ -0,0 +1,203 @@ +#include +#include +#include +#include + +#include +#include +#include + +#define BUFFER_SIZE 64 +#define STRINGIFY(token) #token +#define CACHE_INDEX_SIZE \ + (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/cache/index" STRINGIFY(UINT32_MAX) "/") + 32) +#define CACHE_INDEX_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/cache/index%" PRIu32 "/%s" +#define FILENAME_SETS STRINGIFY(number_of_sets) +#define FILENAME_WAYS STRINGIFY(ways_of_associativity) +#define FILENAME_LSIZE STRINGIFY(coherency_line_size) +#define FILENAME_LEVEL STRINGIFY(level) +#define FILENAME_TYPE STRINGIFY(type) +#define FILENAME_SIZE STRINGIFY(size) + +enum cache_type { + Unknown, + Instruction, + Data, + Unified, +}; + +static bool parse_number( + const char* line_start, + const char* line_end, + uint32_t number_ptr[restrict static 1], + uint64_t line_number) +{ + uint32_t number = 0; + const char* parsed = line_start; + if (line_number != 1) { + return true; + } + for (; parsed != line_end && *parsed != '\0'; parsed++) { + const uint32_t digit = (uint32_t)(uint8_t)(*parsed) - (uint32_t)'0'; + if (digit >= 10) { + return false; + } + number = number * UINT32_C(10) + digit; + } + *number_ptr = number; + return true; +} + +static bool parse_size( + const char* line_start, + const char* line_end, + uint32_t number_ptr[restrict static 1], + uint64_t line_number) +{ + uint32_t number = 0; + const char* parsed = line_start; + if (line_number != 1) { + return true; + } + for (; parsed != line_end && *parsed != 'K'; parsed++) { + const uint32_t digit = (uint32_t)(uint8_t)(*parsed) - (uint32_t)'0'; + if (digit >= 10) { + return false; + } + number = number * UINT32_C(10) + digit; + } + if (*parsed != 'K') { + return false; + } + *number_ptr = number; + return true; +} + +static inline size_t min(size_t a, size_t b) { + return a < b ? a : b; +} + +static bool parse_type( + const char* line_start, + const char* line_end, + enum cache_type type_ptr[restrict static 1], + uint64_t line_number) +{ + size_t line_length = line_end - line_start; + + if (line_number != 1) { + return true; + } + if (0 == strncmp("Instruction", line_start, min(line_length, sizeof("Instruction")))) { + *type_ptr = Instruction; + return true; + } + if (0 == strncmp("Data", line_start, min(line_length, sizeof("Data")))) { + *type_ptr = Data; + return true; + } + if (0 == strncmp("Unified", line_start, min(line_length, sizeof("Unified")))) { + *type_ptr = Unified; + return true; + } + return false; +} + +static bool parse( + const char* suffix, uint32_t x, uint32_t y, void *data_ptr, + cpuinfo_line_callback parse_func) +{ + char filename[CACHE_INDEX_SIZE]; + const int chars_formatted = + snprintf(filename, CACHE_INDEX_SIZE, CACHE_INDEX_FORMAT, x, y, suffix); + if ((unsigned int)chars_formatted >= CACHE_INDEX_SIZE) { + cpuinfo_log_warning("failed to format filename for cache index %s", suffix); + return true; + } + if (!cpuinfo_linux_parse_multiline_file(filename, BUFFER_SIZE, parse_func, data_ptr)) { + cpuinfo_log_error("failed parse cache index %s", suffix); + return false; + } + return true; +} + +static bool cpuinfo_loongarch_linux_parse_cpuX_cache_indexY( + uint32_t x, uint32_t y, + struct cpuinfo_loongarch_linux_processor processors[restrict static 1]) +{ + uint32_t sets, ways, level, line_size, size; + enum cache_type type = Unknown; + char filename[CACHE_INDEX_SIZE]; + struct cpuinfo_cache *cache; + + if (!parse(FILENAME_SETS, x, y, &sets, (cpuinfo_line_callback) parse_number)) { + return false; + } + if (!parse(FILENAME_WAYS, x, y, &ways, (cpuinfo_line_callback) parse_number)) { + return false; + } + if (!parse(FILENAME_LSIZE, x, y, &line_size, (cpuinfo_line_callback) parse_number)) { + return false; + } + if (!parse(FILENAME_LEVEL, x, y, &level, (cpuinfo_line_callback) parse_number)) { + return false; + } + if (!parse(FILENAME_TYPE, x, y, &type, (cpuinfo_line_callback) parse_type)) { + return false; + } + if (!parse(FILENAME_SIZE, x, y, &size, (cpuinfo_line_callback) parse_size)) { + return false; + } + + if (level == 1 && type == Instruction) { + cache = &processors->l1i; + processors->flags |= CPUINFO_LOONGARCH_LINUX_VALID_L1I; + } else if (level == 1 && type == Data) { + cache = &processors->l1d; + processors->flags |= CPUINFO_LOONGARCH_LINUX_VALID_L1D; + } else if (level == 2 && type == Unified) { + cache = &processors->l2; + processors->flags |= CPUINFO_LOONGARCH_LINUX_VALID_L2; + } else if (level == 3 && type == Unified) { + cache = &processors->l3; + processors->flags |= CPUINFO_LOONGARCH_LINUX_VALID_L3; + } else { + return false; + } + + *cache = (struct cpuinfo_cache) { + .associativity = ways, + .sets = sets, + .line_size = line_size, + .size = size * 1024, + .partitions = 1, + }; + return true; +} + +bool cpuinfo_loongarch_linux_parse_cpu_cache( + uint32_t max_processors_count, + struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors_count]) +{ + + for (uint32_t i = 0; i < max_processors_count; i++) { + // TODO dynamic detect + for (uint32_t j = 0; j < 4; j++) { + if ((processors[i].flags & CPUINFO_LOONGARCH_LINUX_VALID_L1D) && j == 0) { + continue; + } + if ((processors[i].flags & CPUINFO_LOONGARCH_LINUX_VALID_L1I) && j == 1) { + continue; + } + if ((processors[i].flags & CPUINFO_LOONGARCH_LINUX_VALID_L2) && j == 2) { + continue; + } + if ((processors[i].flags & CPUINFO_LOONGARCH_LINUX_VALID_L3) && j == 3) { + continue; + } + if (!cpuinfo_loongarch_linux_parse_cpuX_cache_indexY(i, j, &processors[i])) + return false; + } + } + return true; +} diff --git a/src/loongarch/linux/init.c b/src/loongarch/linux/init.c index 381011c9..26813a91 100644 --- a/src/loongarch/linux/init.c +++ b/src/loongarch/linux/init.c @@ -75,6 +75,79 @@ static int cmp_loongarch_linux_processor(const void* ptr_a, const void* ptr_b) { return cmp(pro_a, pro_b); } +static inline bool is_cache_bit_set(enum cache_baseinfo_bit bit, uint32_t info) { + return !!((info >> bit) & 1); +} + +static inline bool set_cpuinfo_cache_by_cpucfg( + enum cpucfg_regs reg, + struct cpuinfo_cache cache[restrict static 1]) +{ + uint32_t data; + + if (!cpucfg(reg, &data)) { + return false; + } + + *cache = (struct cpuinfo_cache) { + .associativity = ((data & CACHE_WAYS_MASK) >> CACHE_WAYS_OFFSET) + 1, + .sets = 1 << ((data & CACHE_SETS_MASK) >> CACHE_SETS_OFFSET), + .line_size = 1 << ((data & CACHE_LSIZE_MASK) >> CACHE_LSIZE_OFFSET), + .partitions = 1, + }; + cache->size = cache->associativity * cache->sets * cache->line_size; + return true; +} + +static void try_set_cache_by_cpucfg( + struct cpuinfo_loongarch_linux_processor *processors, + uint32_t count) +{ + uint32_t info, flags = 0; + struct cpuinfo_cache l1i, l1d, l2, l3; + + if (!cpucfg(CPUCFG_REG_CACHE_BASEINFO, &info)) { + return; + } + + if (is_cache_bit_set(L1_IU_Present, info) && !is_cache_bit_set(L1_IU_Unify, info)) { + if (set_cpuinfo_cache_by_cpucfg(CPUCFG_REG_CACHE_L1_IU, &l1i)) { + flags |= CPUINFO_LOONGARCH_LINUX_VALID_L1I; + } + } + if (is_cache_bit_set(L1_D_Present, info)) { + if (set_cpuinfo_cache_by_cpucfg(CPUCFG_REG_CACHE_L1_D, &l1d)) { + flags |= CPUINFO_LOONGARCH_LINUX_VALID_L1D; + } + } + if (is_cache_bit_set(L2_IU_Present, info) && is_cache_bit_set(L2_IU_Unify, info)) { + if (set_cpuinfo_cache_by_cpucfg(CPUCFG_REG_CACHE_L2_IU, &l2)) { + flags |= CPUINFO_LOONGARCH_LINUX_VALID_L2; + } + } + if (is_cache_bit_set(L3_IU_Present, info) && is_cache_bit_set(L3_IU_Unify, info)) { + if (set_cpuinfo_cache_by_cpucfg(CPUCFG_REG_CACHE_L3_IU, &l3)) { + flags |= CPUINFO_LOONGARCH_LINUX_VALID_L3; + } + } + + for (uint32_t i = 0; i < count; i++) { + if (flags & CPUINFO_LOONGARCH_LINUX_VALID_L1I) { + processors[i].l1i = l1i; + } + if (flags & CPUINFO_LOONGARCH_LINUX_VALID_L1D) { + processors[i].l1d = l1d; + } + if (flags & CPUINFO_LOONGARCH_LINUX_VALID_L2) { + processors[i].l2 = l2; + } + if (flags & CPUINFO_LOONGARCH_LINUX_VALID_L3) { + processors[i].l3 = l3; + } + processors[i].flags |= flags; + } +} + static void try_set_prid_by_cpucfg( struct cpuinfo_loongarch_linux_processor *processors, uint32_t count) @@ -156,6 +229,7 @@ void cpuinfo_loongarch_linux_init(void) { cpuinfo_loongarch64_linux_decode_isa_from_hwcap(&cpuinfo_isa); if (cpuinfo_isa.cpucfg) { try_set_prid_by_cpucfg(loongarch_linux_processors, loongarch_linux_processors_count); + try_set_cache_by_cpucfg(loongarch_linux_processors, loongarch_linux_processors_count); } #endif @@ -171,6 +245,13 @@ void cpuinfo_loongarch_linux_init(void) { return; } + if (!cpuinfo_loongarch_linux_parse_cpu_cache( + loongarch_linux_processors_count, + loongarch_linux_processors)) { + cpuinfo_log_error("failed to parse processor information from /sys/devices/system/cpu/cpuX/cache/indexY/*"); + return; + } + for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { if (bitmask_all(loongarch_linux_processors[i].flags, valid_processor_mask)) { valid_processors += 1; @@ -440,11 +521,10 @@ void cpuinfo_loongarch_linux_init(void) { } struct cpuinfo_cache temp_l2 = { 0 }, temp_l3 = { 0 }; - cpuinfo_loongarch_decode_cache( - loongarch_linux_processors[i].uarch, - loongarch_linux_processors[i].package_processor_count, - loongarch_linux_processors[i].architecture_version, - &l1i[i], &l1d[i], &temp_l2, &temp_l3); + memcpy(&l1i[i], &loongarch_linux_processors[i].l1i, sizeof(struct cpuinfo_cache)); + memcpy(&l1d[i], &loongarch_linux_processors[i].l1d, sizeof(struct cpuinfo_cache)); + memcpy(&temp_l2, &loongarch_linux_processors[i].l2, sizeof(struct cpuinfo_cache)); + memcpy(&temp_l3, &loongarch_linux_processors[i].l3, sizeof(struct cpuinfo_cache)); l1i[i].processor_start = l1d[i].processor_start = i; l1i[i].processor_count = l1d[i].processor_count = 1; @@ -505,11 +585,8 @@ void cpuinfo_loongarch_linux_init(void) { } struct cpuinfo_cache dummy_l1i, dummy_l1d, temp_l2 = { 0 }, temp_l3 = { 0 }; - cpuinfo_loongarch_decode_cache( - loongarch_linux_processors[i].uarch, - loongarch_linux_processors[i].package_processor_count, - loongarch_linux_processors[i].architecture_version, - &dummy_l1i, &dummy_l1d, &temp_l2, &temp_l3); + memcpy(&temp_l2, &loongarch_linux_processors[i].l2, sizeof(struct cpuinfo_cache)); + memcpy(&temp_l3, &loongarch_linux_processors[i].l3, sizeof(struct cpuinfo_cache)); if (temp_l3.size != 0) { /* @@ -590,7 +667,7 @@ void cpuinfo_loongarch_linux_init(void) { cpuinfo_cache_count[cpuinfo_cache_level_1d] = valid_processors; cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; - cpuinfo_max_cache_size = cpuinfo_loongarch_compute_max_cache_size(&processors[0]); + cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); cpuinfo_linux_cpu_max = loongarch_linux_processors_count; cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map; From 795450b6179730d01b28f044242c45b5c24b5ea9 Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Thu, 23 Jan 2025 16:38:37 +0800 Subject: [PATCH 9/9] Refactor detect LoongArch cores,clusters,packages --- CMakeLists.txt | 3 - configure.py | 3 - include/cpuinfo.h | 8 +- src/loongarch/api.h | 32 --- src/loongarch/linux/api.h | 36 +-- src/loongarch/linux/chipset.c | 202 -------------- src/loongarch/linux/clusters.c | 48 ---- src/loongarch/linux/cpuinfo.c | 18 +- src/loongarch/linux/init.c | 476 ++++++++++++--------------------- src/loongarch/linux/prid.c | 266 ------------------ 10 files changed, 190 insertions(+), 902 deletions(-) delete mode 100644 src/loongarch/linux/chipset.c delete mode 100644 src/loongarch/linux/clusters.c delete mode 100644 src/loongarch/linux/prid.c diff --git a/CMakeLists.txt b/CMakeLists.txt index a8a1999c..16e869b2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -231,9 +231,6 @@ IF(CPUINFO_SUPPORTED_PLATFORM) LIST(APPEND CPUINFO_SRCS src/loongarch/linux/init.c src/loongarch/linux/cpuinfo.c - src/loongarch/linux/clusters.c - src/loongarch/linux/chipset.c - src/loongarch/linux/prid.c src/loongarch/linux/cache.c src/loongarch/linux/loongarch64-isa.c) ENDIF() diff --git a/configure.py b/configure.py index 858f0588..f8a4b3d1 100755 --- a/configure.py +++ b/configure.py @@ -69,10 +69,7 @@ def main(args): sources += [ "loongarch/linux/init.c", "loongarch/linux/cpuinfo.c", - "loongarch/linux/clusters.c", - "loongarch/linux/prid.c", "loongarch/linux/cache.c", - "loongarch/linux/chipset.c", "loongarch/linux/loongarch64-isa.c", ] diff --git a/include/cpuinfo.h b/include/cpuinfo.h index e7842638..049bc321 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -724,8 +724,8 @@ struct cpuinfo_cluster { /** Value of Main ID Register (MIDR) of the cores in the cluster */ uint32_t midr; #elif CPUINFO_ARCH_LOONGARCH64 - /** Value of CPUCFG for this cores in the cluster */ - uint32_t cpucfg; + /** Value of PRID for this cores in the cluster */ + uint32_t prid; #endif /** Clock rate (non-Turbo) of the cores in the cluster, in Hz */ uint64_t frequency; @@ -760,8 +760,8 @@ struct cpuinfo_uarch_info { /** Value of Main ID Register (MIDR) for the microarchitecture */ uint32_t midr; #elif CPUINFO_ARCH_LOONGARCH64 - /** Value of CPUCFG for the microarchitecture */ - uint32_t cpucfg; + /** Value of PRID for the microarchitecture */ + uint32_t prid; #endif /** Number of logical processors with the microarchitecture */ uint32_t processor_count; diff --git a/src/loongarch/api.h b/src/loongarch/api.h index 03796b6a..11610e93 100644 --- a/src/loongarch/api.h +++ b/src/loongarch/api.h @@ -6,38 +6,6 @@ #include #include - -enum cpuinfo_loongarch_chipset_vendor { - cpuinfo_loongarch_chipset_vendor_unknown = 0, - cpuinfo_loongarch_chipset_vendor_Loongson, - cpuinfo_loongarch_chipset_vendor_max, -}; - - -enum cpuinfo_loongarch_chipset_series { - cpuinfo_loongarch_chipset_series_unknown = 0, - cpuinfo_loongarch_chipset_series_3, - cpuinfo_loongarch_chipset_series_max, -}; - - -#define CPUINFO_LOONGARCH_CHIPSET_SUFFIX_MAX 8 - -struct cpuinfo_loongarch_chipset { - enum cpuinfo_loongarch_chipset_vendor vendor; - enum cpuinfo_loongarch_chipset_series series; -}; - - -#define CPUINFO_LOONGARCH_CHIPSET_NAME_MAX CPUINFO_PACKAGE_NAME_MAX - -CPUINFO_INTERNAL void cpuinfo_loongarch_chipset_to_string( - const struct cpuinfo_loongarch_chipset chipset[restrict static 1], - char name[restrict static CPUINFO_LOONGARCH_CHIPSET_NAME_MAX]); - -CPUINFO_INTERNAL void cpuinfo_loongarch_fixup_chipset( - struct cpuinfo_loongarch_chipset chipset[restrict static 1], uint32_t cores); - CPUINFO_INTERNAL void cpuinfo_loongarch_decode_vendor_uarch( uint32_t prid, enum cpuinfo_vendor vendor[restrict static 1], diff --git a/src/loongarch/linux/api.h b/src/loongarch/linux/api.h index 77287c59..f919789e 100644 --- a/src/loongarch/linux/api.h +++ b/src/loongarch/linux/api.h @@ -70,6 +70,8 @@ struct cpuinfo_loongarch_linux_processor { uint32_t system_processor_id; /** CoreID */ uint32_t core_id; + /** SmtID */ + uint32_t smt_id; /** Cache info */ struct cpuinfo_cache l1i; @@ -77,12 +79,14 @@ struct cpuinfo_loongarch_linux_processor { struct cpuinfo_cache l2; struct cpuinfo_cache l3; + /** Hardware name */ + char hardware_name[CPUINFO_HARDWARE_VALUE_MAX]; + uint32_t flags; }; CPUINFO_INTERNAL bool cpuinfo_loongarch_linux_parse_proc_cpuinfo( - char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], uint32_t max_processors_count, struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors_count]); @@ -91,36 +95,6 @@ CPUINFO_INTERNAL bool cpuinfo_loongarch_linux_parse_proc_cpuinfo( struct cpuinfo_loongarch_isa isa[restrict static 1]); #endif -CPUINFO_INTERNAL struct cpuinfo_loongarch_chipset - cpuinfo_loongarch_linux_decode_chipset( - const char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX]); - - -CPUINFO_INTERNAL struct cpuinfo_loongarch_chipset - cpuinfo_loongarch_linux_decode_chipset_from_proc_cpuinfo_hardware( - const char proc_cpuinfo_hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], - bool is_loongson); - - -CPUINFO_INTERNAL bool cpuinfo_loongarch_linux_detect_core_clusters_by_heuristic( - uint32_t usable_processors, - uint32_t max_processors, - struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]); - -CPUINFO_INTERNAL void cpuinfo_loongarch_linux_detect_core_clusters_by_sequential_scan( - uint32_t max_processors, - struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]); - -CPUINFO_INTERNAL void cpuinfo_loongarch_linux_count_cluster_processors( - uint32_t max_processors, - struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]); - -CPUINFO_INTERNAL uint32_t cpuinfo_loongarch_linux_detect_cluster_prid( - const struct cpuinfo_loongarch_chipset chipset[restrict static 1], - uint32_t max_processors, - uint32_t usable_processors, - struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]); - CPUINFO_INTERNAL bool cpuinfo_loongarch_linux_parse_cpu_cache( uint32_t max_processors_count, struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors_count]); diff --git a/src/loongarch/linux/chipset.c b/src/loongarch/linux/chipset.c deleted file mode 100644 index 3a66066a..00000000 --- a/src/loongarch/linux/chipset.c +++ /dev/null @@ -1,202 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include - - -static inline bool is_ascii_whitespace(char c) { - switch (c) { - case ' ': - case '\t': - case '\r': - case '\n': - return true; - default: - return false; - } -} - -static inline bool is_ascii_alphabetic(char c) { - const char lower_c = c | '\x20'; - return (uint8_t) (lower_c - 'a') <= (uint8_t) ('z' - 'a'); -} - -static inline bool is_ascii_alphabetic_uppercase(char c) { - return (uint8_t) (c - 'A') <= (uint8_t) ('Z' - 'A'); -} - -static inline bool is_ascii_numeric(char c) { - return (uint8_t) (c - '0') < 10; -} - -static inline uint16_t load_u16le(const void* ptr) { -#if defined(__loongarch64) - return *((const uint16_t*) ptr); -#else - const uint8_t* byte_ptr = (const uint8_t*) ptr; - return ((uint16_t) byte_ptr[1] << 8) | (uint16_t) byte_ptr[0]; -#endif -} - -static inline uint32_t load_u24le(const void* ptr) { -#if defined(__loongarch64) - return ((uint32_t) ((const uint8_t*) ptr)[2] << 16) | ((uint32_t) *((const uint16_t*) ptr)); -#else - const uint8_t* byte_ptr = (const uint8_t*) ptr; - return ((uint32_t) byte_ptr[2] << 16) | ((uint32_t) byte_ptr[1] << 8) | (uint32_t) byte_ptr[0]; -#endif -} - -static inline uint32_t load_u32le(const void* ptr) { -#if defined(__loongarch64) - return *((const uint32_t*) ptr); -#else - return ((uint32_t) ((const uint8_t*) ptr)[3] << 24) | load_u24le(ptr); -#endif -} - -/* - * Map from Loongarch chipset series ID to Loongarch chipset vendor ID. - * This map is used to avoid storing vendor IDs in tables. - */ - - -static enum cpuinfo_loongarch_chipset_vendor chipset_series_vendor[cpuinfo_loongarch_chipset_series_max] = { - [cpuinfo_loongarch_chipset_series_unknown] = cpuinfo_loongarch_chipset_vendor_unknown, - [cpuinfo_loongarch_chipset_series_3] = cpuinfo_loongarch_chipset_vendor_Loongson, -}; - - - -struct loongson_map_entry { - const char* platform; - uint8_t series; -}; - - -int strcicmp(char const *a, char const *b) -{ - for (;; a++, b++) { - int d = ((int)(a-b)); - if (d != 0 || !*a) - return d; - } -} - - -static const struct loongson_map_entry loongson_hardware_map_entries[] = { - { - /* "3A5000" -> Loongson 3a5000 */ - .platform = "3A5000", - .series = cpuinfo_loongarch_chipset_series_3, - }, -}; - - - -/* - * Decodes chipset name from /proc/cpuinfo Hardware string. - * For some chipsets, the function relies frequency and on number of cores for chipset detection. - * - * @param[in] platform - /proc/cpuinfo Hardware string. - //* @param cores - number of cores in the chipset. - * - * @returns Decoded chipset name. If chipset could not be decoded, the resulting structure would use `unknown` vendor - * and series identifiers. - */ -struct cpuinfo_loongarch_chipset cpuinfo_loongarch_linux_decode_chipset_from_proc_cpuinfo_hardware( - const char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], - bool is_loongson) -{ - struct cpuinfo_loongarch_chipset chipset; - const size_t hardware_length = strnlen(hardware, CPUINFO_HARDWARE_VALUE_MAX); - const char* hardware_end = hardware + hardware_length; - - if (is_loongson) { - /* Compare to tabulated Hardware values for popular chipsets/devices which can't be otherwise detected */ - for (size_t i = 0; i < CPUINFO_COUNT_OF(loongson_hardware_map_entries); i++) { - - if (strncmp(loongson_hardware_map_entries[i].platform, hardware, hardware_length) == 0 && - loongson_hardware_map_entries[i].platform[hardware_length] == 0) - { - cpuinfo_log_debug( - "found /proc/cpuinfo Hardware string \"%.*s\" in special chipset table", - (int) hardware_length, hardware); - /* Create chipset name from entry */ - return (struct cpuinfo_loongarch_chipset) { - .vendor = chipset_series_vendor[loongson_hardware_map_entries[i].series], - .series = (enum cpuinfo_loongarch_chipset_series) loongson_hardware_map_entries[i].series, - }; - } - } - } - - return (struct cpuinfo_loongarch_chipset) { - .vendor = cpuinfo_loongarch_chipset_vendor_unknown, - .series = cpuinfo_loongarch_chipset_series_unknown, - }; -} - - -/* Map from Loongarch chipset vendor ID to its string representation */ -static const char* chipset_vendor_string[cpuinfo_loongarch_chipset_vendor_max] = { - [cpuinfo_loongarch_chipset_vendor_unknown] = "Unknown", - [cpuinfo_loongarch_chipset_vendor_Loongson] = "Loongson", -}; - -/* Map from Loongarch chipset series ID to its string representation */ -static const char* chipset_series_string[cpuinfo_loongarch_chipset_series_max] = { - [cpuinfo_loongarch_chipset_series_unknown] = NULL, - [cpuinfo_loongarch_chipset_series_3] = "3", -}; - -/* Convert chipset name represented by cpuinfo_loongarch_chipset structure to a string representation */ -void cpuinfo_loongarch_chipset_to_string( - const struct cpuinfo_loongarch_chipset chipset[restrict static 1], - char name[restrict static CPUINFO_LOONGARCH_CHIPSET_NAME_MAX]) -{ - enum cpuinfo_loongarch_chipset_vendor vendor = chipset->vendor; - - if (vendor >= cpuinfo_loongarch_chipset_vendor_max) { - vendor = cpuinfo_loongarch_chipset_vendor_unknown; - } - enum cpuinfo_loongarch_chipset_series series = chipset->series; - if (series >= cpuinfo_loongarch_chipset_series_max) { - series = cpuinfo_loongarch_chipset_series_unknown; - } - - const char* vendor_string = chipset_vendor_string[vendor]; - const char* series_string = chipset_series_string[series]; - - if (series == cpuinfo_loongarch_chipset_series_unknown) { - strncpy(name, vendor_string, CPUINFO_LOONGARCH_CHIPSET_NAME_MAX); - } else { - snprintf(name, CPUINFO_LOONGARCH_CHIPSET_NAME_MAX, - "%s %s", vendor_string, series_string); - } -} - - -/* - * Decodes chipset name from /proc/cpuinfo Hardware string. - * For some chipsets, the function relies frequency and on number of cores for chipset detection. - * - * @param[in] hardware - /proc/cpuinfo Hardware string. - * - * @returns Decoded chipset name. If chipset could not be decoded, the resulting structure would use `unknown` vendor - * and series identifiers. - */ -struct cpuinfo_loongarch_chipset cpuinfo_loongarch_linux_decode_chipset( - const char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX]) -{ - struct cpuinfo_loongarch_chipset chipset = - cpuinfo_loongarch_linux_decode_chipset_from_proc_cpuinfo_hardware( - hardware, true); - - return chipset; -} - diff --git a/src/loongarch/linux/clusters.c b/src/loongarch/linux/clusters.c deleted file mode 100644 index 381a36fd..00000000 --- a/src/loongarch/linux/clusters.c +++ /dev/null @@ -1,48 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { - return (bitfield & mask) == mask; -} - - -/* - * Counts the number of logical processors in each core cluster. - * This function should be called after all processors are assigned to core clusters. - * - * @param max_processors - number of elements in the @p processors array. - * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, - * and decoded core cluster (package_leader_id) information. - * The function expects the value of processors[i].package_processor_count to be zero. - * Upon return, processors[i].package_processor_count will contain the number of logical - * processors in the respective core cluster. - */ -void cpuinfo_loongarch_linux_count_cluster_processors( - uint32_t max_processors, - struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]) -{ - /* First pass: accumulate the number of processors at the group leader's package_processor_count */ - for (uint32_t i = 0; i < max_processors; i++) { - if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { - const uint32_t package_leader_id = processors[i].package_leader_id; - processors[package_leader_id].package_processor_count += 1; - } - } - /* Second pass: copy the package_processor_count from the group leader processor */ - for (uint32_t i = 0; i < max_processors; i++) { - if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { - const uint32_t package_leader_id = processors[i].package_leader_id; - processors[i].package_processor_count = processors[package_leader_id].package_processor_count; - } - } -} diff --git a/src/loongarch/linux/cpuinfo.c b/src/loongarch/linux/cpuinfo.c index 62e274dd..58913def 100644 --- a/src/loongarch/linux/cpuinfo.c +++ b/src/loongarch/linux/cpuinfo.c @@ -28,7 +28,9 @@ static const struct cpuinfo_loongarch_seriesID loongson_name_map_seriesID[] = { { .prefix = "3C6000", .seriesID = prid_series_la664, }, }; - +static inline size_t min(size_t a, size_t b) { + return a < b ? a : b; +} static uint32_t parse_processor_number( const char* processor_start, @@ -213,7 +215,6 @@ static void parse_seriesID(const char* name_start, size_t length, int* seriesID) static void parse_model_name( const char* model_name_start, const char* model_name_end, - char* hardware, struct cpuinfo_loongarch_linux_processor processor[restrict static 1]) { const char* separator = model_name_start; @@ -233,10 +234,8 @@ static void parse_model_name( "length of model name value \"%.*s\" in /proc/cpuinfo exceeds limit (%d): truncating to the limit", (int) value_length, separator+1, CPUINFO_HARDWARE_VALUE_MAX); value_length = CPUINFO_HARDWARE_VALUE_MAX; - } else { - hardware[value_length] = '\0'; } - memcpy(hardware, separator+1, value_length); + cpuinfo_log_debug("parsed /proc/cpuinfo model name second value = \"%.*s\"", (int) value_length, separator+1); if (model_length != 8) { @@ -343,6 +342,7 @@ static void parse_core( } processor->core_id = cpu_core; + processor->flags |= CPUINFO_LINUX_FLAG_CORE_ID; } static void parse_package( @@ -366,10 +366,10 @@ static void parse_package( } processor->package_id = cpu_package; + processor->flags |= CPUINFO_LINUX_FLAG_PACKAGE_ID; } struct proc_cpuinfo_parser_state { - char* hardware; uint32_t processor_index; uint32_t max_processors_count; struct cpuinfo_loongarch_linux_processor* processors; @@ -552,8 +552,10 @@ static bool parse_line( if (strncasecmp(line_start, "cpu family", key_length) == 0) { /* cpu family is presently useless, don't parse */ } else if (strncasecmp(line_start, "model name", key_length) == 0) { + memcpy(processor->hardware_name, value_start, min(value_end - value_start, CPUINFO_HARDWARE_VALUE_MAX)); + processor->hardware_name[min(value_end - value_start, CPUINFO_HARDWARE_VALUE_MAX)] = '\0'; if (!(processor->flags & (CPUINFO_LOONGARCH_LINUX_VALID_COMPANYID | CPUINFO_LOONGARCH_LINUX_VALID_SERIESID))) - parse_model_name(value_start,value_end,state->hardware,processor); + parse_model_name(value_start, value_end, processor); } else { goto unknown; } @@ -600,12 +602,10 @@ static bool parse_line( } bool cpuinfo_loongarch_linux_parse_proc_cpuinfo( - char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], uint32_t max_processors_count, struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors_count]) { struct proc_cpuinfo_parser_state state = { - .hardware = hardware, .processor_index = 0, .max_processors_count = max_processors_count, .processors = processors, diff --git a/src/loongarch/linux/init.c b/src/loongarch/linux/init.c index 26813a91..1e5ff5ce 100644 --- a/src/loongarch/linux/init.c +++ b/src/loongarch/linux/init.c @@ -15,8 +15,6 @@ struct cpuinfo_loongarch_isa cpuinfo_isa = { 0 }; -static struct cpuinfo_package package = { { 0 } }; - static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { return (bitfield & mask) == mask; } @@ -29,34 +27,6 @@ static inline int cmp(uint32_t a, uint32_t b) { return (a > b) - (a < b); } -static bool cluster_siblings_parser( - uint32_t processor, uint32_t siblings_start, uint32_t siblings_end, - struct cpuinfo_loongarch_linux_processor* processors) -{ - processors[processor].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER; - uint32_t package_leader_id = processors[processor].package_leader_id; - - for (uint32_t sibling = siblings_start; sibling < siblings_end; sibling++) { - if (!bitmask_all(processors[sibling].flags, CPUINFO_LINUX_FLAG_VALID)) { - cpuinfo_log_info("invalid processor %"PRIu32" reported as a sibling for processor %"PRIu32, - sibling, processor); - continue; - } - - const uint32_t sibling_package_leader_id = processors[sibling].package_leader_id; - if (sibling_package_leader_id < package_leader_id) { - package_leader_id = sibling_package_leader_id; - } - - processors[sibling].package_leader_id = package_leader_id; - processors[sibling].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER; - } - - processors[processor].package_leader_id = package_leader_id; - - return true; -} - static int cmp_loongarch_linux_processor(const void* ptr_a, const void* ptr_b) { const struct cpuinfo_loongarch_linux_processor* processor_a = (const struct cpuinfo_loongarch_linux_processor*) ptr_a; const struct cpuinfo_loongarch_linux_processor* processor_b = (const struct cpuinfo_loongarch_linux_processor*) ptr_b; @@ -167,6 +137,7 @@ void cpuinfo_loongarch_linux_init(void) { struct cpuinfo_processor* processors = NULL; struct cpuinfo_core* cores = NULL; struct cpuinfo_cluster* clusters = NULL; + struct cpuinfo_package* packages = NULL; struct cpuinfo_uarch_info* uarchs = NULL; const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL; const struct cpuinfo_core** linux_cpu_to_core_map = NULL; @@ -234,11 +205,9 @@ void cpuinfo_loongarch_linux_init(void) { #endif /* Populate processor information. */ - char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX]; - uint32_t valid_processors = 0, core_count = 0, last_core_id_1 = UINT32_MAX; + uint32_t valid_processors = 0; if (!cpuinfo_loongarch_linux_parse_proc_cpuinfo( - proc_cpuinfo_hardware, loongarch_linux_processors_count, loongarch_linux_processors)) { cpuinfo_log_error("failed to parse processor information from /proc/cpuinfo"); @@ -257,10 +226,6 @@ void cpuinfo_loongarch_linux_init(void) { valid_processors += 1; loongarch_linux_processors[i].system_processor_id = i; loongarch_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_VALID; - if (loongarch_linux_processors[i].core_id != last_core_id_1) { - core_count += 1; - last_core_id_1 = loongarch_linux_processors[i].core_id; - } continue; } if (!(loongarch_linux_processors[i].flags & CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR)) { @@ -275,107 +240,84 @@ void cpuinfo_loongarch_linux_init(void) { cpuinfo_log_warning("invalid processor %"PRIu32" reported in /proc/cpuinfo", i); } - const struct cpuinfo_loongarch_chipset chipset = - cpuinfo_loongarch_linux_decode_chipset(proc_cpuinfo_hardware); - - for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { - if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { - if (cpuinfo_linux_get_processor_package_id(i, &loongarch_linux_processors[i].package_id)) { - loongarch_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_ID; - } - } - } - - /* Initialize topology group IDs */ - for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { - loongarch_linux_processors[i].package_leader_id = i; - } - - /* Propagate topology group IDs among siblings */ + /* Populate core information. */ + uint32_t last_core_id = UINT32_MAX, core_count = 0, smt_id = UINT32_MAX; for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { - if (!bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (!bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) continue; + if (!bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_CORE_ID)) { + cpuinfo_log_warning("Not set core id for processor %"PRIu32" from /proc/cpuinfo", i); + loongarch_linux_processors[i].core_id = last_core_id; } - - if (loongarch_linux_processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_ID) { - cpuinfo_linux_detect_core_siblings( - loongarch_linux_processors_count, i, - (cpuinfo_siblings_callback) cluster_siblings_parser, - loongarch_linux_processors); + smt_id += 1; + if (loongarch_linux_processors[i].core_id != last_core_id) { + core_count += 1; + smt_id = 0; + last_core_id = loongarch_linux_processors[i].core_id; } + loongarch_linux_processors[i].smt_id = smt_id; + loongarch_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_SMT_ID; } - /* Propagate all cluster IDs */ - uint32_t clustered_processors = 0; - for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { - if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID | CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) { - clustered_processors += 1; + /* Not populate cluster information. Thought a package as a cluster. */ + uint32_t cluster_count; - const uint32_t package_leader_id = loongarch_linux_processors[i].package_leader_id; - if (package_leader_id < i) { - loongarch_linux_processors[i].package_leader_id = loongarch_linux_processors[package_leader_id].package_leader_id; + /* Populate package information. */ + uint32_t last_package_id = UINT32_MAX, package_count = 0, package_leader_id = UINT32_MAX; + for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { + if (!bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) + continue; + if (!bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_PACKAGE_ID)) { + cpuinfo_log_warning("Not set package id for processor %"PRIu32" from /proc/cpuinfo", i); + loongarch_linux_processors[i].package_id = last_package_id; + if (package_leader_id == UINT32_MAX) { + cpuinfo_log_warning("Set default package leader id 0 for processor %"PRIu32, i); + package_leader_id = 0; } - - cpuinfo_log_debug("processor %"PRIu32" clustered with processor %"PRIu32" as inferred from system siblings lists", - i, loongarch_linux_processors[i].package_leader_id); } + if (loongarch_linux_processors[i].package_id != last_package_id) { + package_count += 1; + last_package_id = loongarch_linux_processors[i].package_id; + package_leader_id = i; + } + loongarch_linux_processors[i].package_leader_id = package_leader_id; } + cluster_count = package_count; - cpuinfo_loongarch_linux_count_cluster_processors(loongarch_linux_processors_count, loongarch_linux_processors); - - const uint32_t cluster_count = cpuinfo_loongarch_linux_detect_cluster_prid( - &chipset, - loongarch_linux_processors_count, valid_processors, loongarch_linux_processors); - - /* Initialize core vendor, uarch, and prid for every logical processor */ + /* Initialize core vendor, uarch for every logical processor */ for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { - if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { - const uint32_t cluster_leader = loongarch_linux_processors[i].package_leader_id; - if (cluster_leader == i) { - /* Cluster leader: decode core vendor and uarch */ - cpuinfo_loongarch_decode_vendor_uarch( - loongarch_linux_processors[cluster_leader].prid, - &loongarch_linux_processors[cluster_leader].vendor, - &loongarch_linux_processors[cluster_leader].uarch); - } else { - /* Cluster non-leader: copy vendor, uarch, and prid from cluster leader */ - loongarch_linux_processors[i].flags = loongarch_linux_processors[cluster_leader].flags; - loongarch_linux_processors[i].prid = loongarch_linux_processors[cluster_leader].prid; - loongarch_linux_processors[i].vendor = loongarch_linux_processors[cluster_leader].vendor; - loongarch_linux_processors[i].uarch = loongarch_linux_processors[cluster_leader].uarch; - } + if (!bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID | CPUINFO_LINUX_FLAG_PACKAGE_ID)) + continue; + const uint32_t package_leader = loongarch_linux_processors[i].package_leader_id; + if (package_leader == i) { + /* Package leader: decode core vendor and uarch */ + cpuinfo_loongarch_decode_vendor_uarch( + loongarch_linux_processors[package_leader].prid, + &loongarch_linux_processors[package_leader].vendor, + &loongarch_linux_processors[package_leader].uarch); + } else { + /* Package non-leader: copy vendor, uarch from package leader */ + loongarch_linux_processors[i].vendor = loongarch_linux_processors[package_leader].vendor; + loongarch_linux_processors[i].uarch = loongarch_linux_processors[package_leader].uarch; } } - qsort(loongarch_linux_processors, loongarch_linux_processors_count, sizeof(struct cpuinfo_loongarch_linux_processor), cmp_loongarch_linux_processor); uint32_t uarchs_count = 0; - enum cpuinfo_uarch last_uarch; + enum cpuinfo_uarch last_uarch = cpuinfo_uarch_unknown; for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { - if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { - if (uarchs_count == 0 || loongarch_linux_processors[i].uarch != last_uarch) { - last_uarch = loongarch_linux_processors[i].uarch; - uarchs_count += 1; - } - loongarch_linux_processors[i].uarch_index = uarchs_count - 1; + if (!bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) + continue; + if (uarchs_count == 0 || loongarch_linux_processors[i].uarch != last_uarch) { + last_uarch = loongarch_linux_processors[i].uarch; + uarchs_count += 1; } + loongarch_linux_processors[i].uarch_index = uarchs_count - 1; } - /* - * Assumptions: - * - No SMP (i.e. each core supports only one hardware thread). - * - Level 1 instruction and data caches are private to the core clusters. - * - Level 2 and level 3 cache is shared between cores in the same cluster. - */ - cpuinfo_loongarch_chipset_to_string(&chipset, package.name); - - package.processor_count = valid_processors; - package.core_count = core_count; - package.cluster_count = cluster_count; - processors = calloc(valid_processors, sizeof(struct cpuinfo_processor)); if (processors == NULL) { cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors", @@ -383,10 +325,10 @@ void cpuinfo_loongarch_linux_init(void) { goto cleanup; } - cores = calloc(valid_processors, sizeof(struct cpuinfo_core)); + cores = calloc(core_count, sizeof(struct cpuinfo_core)); if (cores == NULL) { cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores", - valid_processors * sizeof(struct cpuinfo_core), valid_processors); + core_count * sizeof(struct cpuinfo_core), core_count); goto cleanup; } @@ -397,6 +339,13 @@ void cpuinfo_loongarch_linux_init(void) { goto cleanup; } + packages = calloc(cluster_count, sizeof(struct cpuinfo_package)); + if (packages == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core packages", + cluster_count * sizeof(struct cpuinfo_package), package_count); + goto cleanup; + } + uarchs = calloc(uarchs_count, sizeof(struct cpuinfo_uarch_info)); if (uarchs == NULL) { cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" microarchitectures", @@ -441,209 +390,128 @@ void cpuinfo_loongarch_linux_init(void) { goto cleanup; } - uint32_t uarchs_index = 0; - for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { - if (bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { - if (uarchs_index == 0 || loongarch_linux_processors[i].uarch != last_uarch) { - last_uarch = loongarch_linux_processors[i].uarch; - uarchs[uarchs_index] = (struct cpuinfo_uarch_info) { - .uarch = loongarch_linux_processors[i].uarch, - }; - uarchs_index += 1; - } - uarchs[uarchs_index - 1].processor_count += 1; - uarchs[uarchs_index - 1].core_count += 1; - } + // Victim Cache is private. TODO dynamic detect + l2 = calloc(valid_processors, sizeof(struct cpuinfo_cache)); + if (l2 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", + valid_processors * sizeof(struct cpuinfo_cache), valid_processors); + goto cleanup; } + // Shared in package. + l3 = calloc(package_count, sizeof(struct cpuinfo_cache)); + if (l3 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", + valid_processors * sizeof(struct cpuinfo_cache), valid_processors); + goto cleanup; + } - uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX; - uint32_t smt_id = 0, core_index = UINT32_MAX, last_core_id = UINT32_MAX; - /* Indication whether L3 (if it exists) is shared between all cores */ - bool shared_l3 = true; - /* Populate cache information structures in l1i, l1d */ + uint32_t uarchs_index = 0; + last_uarch = cpuinfo_uarch_unknown; for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { if (!bitmask_all(loongarch_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { continue; } - - const uint32_t core_id = loongarch_linux_processors[i].core_id; - smt_id++; - if (last_core_id != core_id) { - core_index++; - smt_id = 0; - } - - if (loongarch_linux_processors[i].package_leader_id == loongarch_linux_processors[i].system_processor_id) { - cluster_id += 1; - clusters[cluster_id] = (struct cpuinfo_cluster) { - .processor_start = i, - .processor_count = loongarch_linux_processors[i].package_processor_count, - .core_start = i, - .cluster_id = cluster_id, - .package = &package, - .vendor = loongarch_linux_processors[i].vendor, - .uarch = loongarch_linux_processors[i].uarch, - }; - } - - processors[i].smt_id = smt_id; - processors[i].core = cores + core_index; - processors[i].cluster = clusters + cluster_id; - processors[i].package = &package; - processors[i].linux_id = (int) loongarch_linux_processors[i].system_processor_id; - processors[i].cache.l1i = l1i + i; - processors[i].cache.l1d = l1d + i; - linux_cpu_to_processor_map[loongarch_linux_processors[i].system_processor_id] = &processors[i]; - - if (last_core_id != core_id) { - cores[core_index] = (struct cpuinfo_core){ - .processor_start = i, - .processor_count = 1, - .core_id = core_id, - .cluster = clusters + cluster_id, - .package = &package, - .vendor = loongarch_linux_processors[i].vendor, + if (uarchs_index == 0 || loongarch_linux_processors[i].uarch != last_uarch) { + last_uarch = loongarch_linux_processors[i].uarch; + uarchs[uarchs_index] = (struct cpuinfo_uarch_info) { .uarch = loongarch_linux_processors[i].uarch, .prid = loongarch_linux_processors[i].prid, }; - last_core_id = core_id; - clusters[cluster_id].core_count += 1; - } else { - /* another logical processor on the same core */ - cores[core_index].processor_count++; - } - linux_cpu_to_core_map[loongarch_linux_processors[i].system_processor_id] = &cores[core_index]; - - if (linux_cpu_to_uarch_index_map != NULL) { - linux_cpu_to_uarch_index_map[loongarch_linux_processors[i].system_processor_id] = - loongarch_linux_processors[i].uarch_index; - } - - struct cpuinfo_cache temp_l2 = { 0 }, temp_l3 = { 0 }; - memcpy(&l1i[i], &loongarch_linux_processors[i].l1i, sizeof(struct cpuinfo_cache)); - memcpy(&l1d[i], &loongarch_linux_processors[i].l1d, sizeof(struct cpuinfo_cache)); - memcpy(&temp_l2, &loongarch_linux_processors[i].l2, sizeof(struct cpuinfo_cache)); - memcpy(&temp_l3, &loongarch_linux_processors[i].l3, sizeof(struct cpuinfo_cache)); - l1i[i].processor_start = l1d[i].processor_start = i; - l1i[i].processor_count = l1d[i].processor_count = 1; - - - if (temp_l3.size != 0) { - /* - * Assumptions: - * - L2 is private to each core - * - L3 is shared by cores in the same cluster - * - If cores in different clusters report the same L3, it is shared between all cores. - */ - l2_count += 1; - if (loongarch_linux_processors[i].package_leader_id == loongarch_linux_processors[i].system_processor_id) { - if (cluster_id == 0) { - big_l3_size = temp_l3.size; - l3_count = 1; - } else if (temp_l3.size != big_l3_size) { - /* If some cores have different L3 size, L3 is not shared between all cores */ - shared_l3 = false; - l3_count += 1; - } - } - } else { - /* If some cores don't have L3 cache, L3 is not shared between all cores */ - shared_l3 = false; - if (temp_l2.size != 0) { - /* Assume L2 is shared by cores in the same cluster */ - if (loongarch_linux_processors[i].package_leader_id == loongarch_linux_processors[i].system_processor_id) { - l2_count += 1; - } - } + uarchs_index += 1; } + uarchs[uarchs_index - 1].processor_count += 1; + uarchs[uarchs_index - 1].core_count += loongarch_linux_processors[i].smt_id == 0 ? 1 : 0; } - if (l2_count != 0) { - l2 = calloc(l2_count, sizeof(struct cpuinfo_cache)); - if (l2 == NULL) { - cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", - l2_count * sizeof(struct cpuinfo_cache), l2_count); - goto cleanup; - } + /* Transfer contents of processor list to ABI structures. */ + uint32_t processor_index = UINT32_MAX, core_index = UINT32_MAX, cluster_index = UINT32_MAX, package_index = UINT32_MAX; + last_core_id = last_package_id = UINT32_MAX; + for (uint32_t i = 0; i < loongarch_linux_processors_count; i++) { + struct cpuinfo_loongarch_linux_processor *cur = &loongarch_linux_processors[i]; - if (l3_count != 0) { - l3 = calloc(l3_count, sizeof(struct cpuinfo_cache)); - if (l3 == NULL) { - cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches", - l3_count * sizeof(struct cpuinfo_cache), l3_count); - goto cleanup; - } + if (!bitmask_all(cur->flags, CPUINFO_LINUX_FLAG_VALID)) { + continue; } - } - cluster_id = UINT32_MAX; - uint32_t l2_index = UINT32_MAX, l3_index = UINT32_MAX; - for (uint32_t i = 0; i < valid_processors; i++) { - if (loongarch_linux_processors[i].package_leader_id == loongarch_linux_processors[i].system_processor_id) { - cluster_id++; + processor_index += 1; + core_index += last_core_id != cur->core_id ? 1 : 0; + cluster_index += last_package_id != cur->package_id ? 1 : 0; + package_index += last_package_id != cur->package_id ? 1 : 0; + processors[processor_index] = (struct cpuinfo_processor) { + .smt_id = cur->smt_id, + .core = &cores[core_index], + .cluster = &clusters[cluster_index], + .package = &packages[package_index], + .linux_id = (int) cur->system_processor_id, + .cache.l1i = &l1i[processor_index], + .cache.l1d = &l1d[processor_index], + .cache.l2 = &l2[processor_index], + .cache.l3 = &l3[package_index], + }; + + memcpy(&l1i[processor_index], &cur->l1i, sizeof(struct cpuinfo_cache)); + memcpy(&l1d[processor_index], &cur->l1d, sizeof(struct cpuinfo_cache)); + memcpy(&l2[processor_index], &cur->l2, sizeof(struct cpuinfo_cache)); + l1i[processor_index].processor_start = processor_index; + l1i[processor_index].processor_count = 1; + l1d[processor_index].processor_start = processor_index; + l1d[processor_index].processor_count = 1; + l2[processor_index].processor_start = processor_index; + l2[processor_index].processor_count = 1; + + if (cur->smt_id == 0) { + cores[core_index] = (struct cpuinfo_core) { + .processor_start = processor_index, + .processor_count = 1, + .core_id = cur->core_id, + .cluster = &clusters[cluster_index], + .package = &packages[package_index], + .vendor = cur->vendor, + .uarch = cur->uarch, + .prid = cur->prid, + }; + last_core_id = cur->core_id; + } else { + cores[core_index].processor_count += 1; } - struct cpuinfo_cache dummy_l1i, dummy_l1d, temp_l2 = { 0 }, temp_l3 = { 0 }; - memcpy(&temp_l2, &loongarch_linux_processors[i].l2, sizeof(struct cpuinfo_cache)); - memcpy(&temp_l3, &loongarch_linux_processors[i].l3, sizeof(struct cpuinfo_cache)); - - if (temp_l3.size != 0) { - /* - * Assumptions: - * - L2 is private to each core - * - L3 is shared by cores in the same cluster - * - If cores in different clusters report the same L3, it is shared between all cores. - */ - l2_index += 1; - l2[l2_index] = (struct cpuinfo_cache) { - .size = temp_l2.size, - .associativity = temp_l2.associativity, - .sets = temp_l2.sets, - .partitions = 1, - .line_size = temp_l2.line_size, - .flags = temp_l2.flags, - .processor_start = i, + if (cur->package_leader_id == cur->system_processor_id) { + clusters[cluster_index] = (struct cpuinfo_cluster) { + .processor_start = processor_index, .processor_count = 1, + .core_start = core_index, + .core_count = 1, + .cluster_id = 0, // Thought a package as a cluster + .package = &packages[package_index], + .vendor = cur->vendor, + .uarch = cur->uarch, + .prid = cur->prid, }; - processors[i].cache.l2 = l2 + l2_index; - if (loongarch_linux_processors[i].package_leader_id == loongarch_linux_processors[i].system_processor_id) { - l3_index += 1; - if (l3_index < l3_count) { - l3[l3_index] = (struct cpuinfo_cache) { - .size = temp_l3.size, - .associativity = temp_l3.associativity, - .sets = temp_l3.sets, - .partitions = 1, - .line_size = temp_l3.line_size, - .flags = temp_l3.flags, - .processor_start = i, - .processor_count = - shared_l3 ? valid_processors : loongarch_linux_processors[i].package_processor_count, - }; - } - } - if (shared_l3) { - processors[i].cache.l3 = l3; - } else if (l3_index < l3_count) { - processors[i].cache.l3 = l3 + l3_index; - } - } else if (temp_l2.size != 0) { - /* Assume L2 is shared by cores in the same cluster */ - if (loongarch_linux_processors[i].package_leader_id == loongarch_linux_processors[i].system_processor_id) { - l2_index += 1; - l2[l2_index] = (struct cpuinfo_cache) { - .size = temp_l2.size, - .associativity = temp_l2.associativity, - .sets = temp_l2.sets, - .partitions = 1, - .line_size = temp_l2.line_size, - .flags = temp_l2.flags, - .processor_start = i, - .processor_count = loongarch_linux_processors[i].package_processor_count, - }; - } - processors[i].cache.l2 = l2 + l2_index; + packages[package_index] = (struct cpuinfo_package) { + .processor_start = processor_index, + .processor_count = 1, + .core_start = core_index, + .core_count = 1, + .cluster_start = cluster_index, + .cluster_count = 1, + }; + memcpy(&l3[package_index], &cur->l3, sizeof(struct cpuinfo_cache)); + l3[package_index].processor_start = processor_index; + l3[package_index].processor_count = 1; + last_package_id = cur->package_id; + } else { + clusters[cluster_index].processor_count += 1; + clusters[cluster_index].core_count += cur->smt_id == 0 ? 1 : 0; + packages[package_index].processor_count += 1; + packages[package_index].core_count += cur->smt_id == 0 ? 1 : 0; + l3[package_index].processor_count += 1; + } + + linux_cpu_to_processor_map[cur->system_processor_id] = &processors[processor_index]; + linux_cpu_to_core_map[cur->system_processor_id] = &cores[core_index]; + if (linux_cpu_to_uarch_index_map != NULL) { + linux_cpu_to_uarch_index_map[cur->system_processor_id] = cur->uarch_index; } } @@ -651,7 +519,7 @@ void cpuinfo_loongarch_linux_init(void) { cpuinfo_processors = processors; cpuinfo_cores = cores; cpuinfo_clusters = clusters; - cpuinfo_packages = &package; + cpuinfo_packages = packages; cpuinfo_uarchs = uarchs; cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; @@ -659,14 +527,14 @@ void cpuinfo_loongarch_linux_init(void) { cpuinfo_cache[cpuinfo_cache_level_3] = l3; cpuinfo_processors_count = valid_processors; - cpuinfo_cores_count = valid_processors; + cpuinfo_cores_count = core_count; cpuinfo_clusters_count = cluster_count; - cpuinfo_packages_count = 1; + cpuinfo_packages_count = package_count; cpuinfo_uarchs_count = uarchs_count; cpuinfo_cache_count[cpuinfo_cache_level_1i] = valid_processors; cpuinfo_cache_count[cpuinfo_cache_level_1d] = valid_processors; - cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; - cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; + cpuinfo_cache_count[cpuinfo_cache_level_2] = valid_processors; + cpuinfo_cache_count[cpuinfo_cache_level_3] = package_count; cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); cpuinfo_linux_cpu_max = loongarch_linux_processors_count; diff --git a/src/loongarch/linux/prid.c b/src/loongarch/linux/prid.c deleted file mode 100644 index 4e051862..00000000 --- a/src/loongarch/linux/prid.c +++ /dev/null @@ -1,266 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - - -#define CLUSTERS_MAX 3 - -static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { - return (bitfield & mask) == mask; -} - -/* Description of core clusters configuration in a chipset (identified by series) */ -struct cluster_config { - /* Number of cores (logical processors) */ - uint8_t cores; - /* Loongarch chipset series (see cpuinfo_loongarch_chipset_series enum) */ - uint8_t series; - /* Number of heterogenous clusters in the CPU package */ - uint8_t clusters; - /* Number of cores in each cluster */ - uint8_t cluster_cores[CLUSTERS_MAX]; - /* PRID of cores in each cluster */ - uint32_t cluster_prid[CLUSTERS_MAX]; -}; - - -static const struct cluster_config cluster_configs[] = { - { - .cores = 4, - .series = cpuinfo_loongarch_chipset_series_3, - }, -}; - -/* - * Searches chipset name in mapping of chipset name to cores' PRID values. If match is successful, initializes PRID - * for all clusters' leaders with tabulated values. - * - * @param[in] chipset - chipset (SoC) name information. - * @param clusters_count - number of CPU core clusters detected in the SoC. - * @param cluster_leaders - indices of core clusters' leaders in the @p processors array. - * @param processors_count - number of usable logical processors in the system. - * @param[in,out] processors - array of logical processor descriptions with pre-parsed PRID, maximum frequency, - * and decoded core cluster (package_leader_id) information. - * Upon successful return, processors[i].prid for all clusters' leaders contains the - * tabulated PRID values. - * @param verify_prid - indicated whether the function should check that the PRID values to be assigned to leaders of - * core clusters are consistent with known parts of their parsed values. - * Set if to false if the only PRID value parsed from /proc/cpuinfo is for the last processor - * reported in /proc/cpuinfo and thus can't be unambiguously attributed to that processor. - * - * @retval true if the chipset was found in the mapping and core clusters' leaders initialized with PRID values. - * @retval false if the chipset was not found in the mapping, or any consistency check failed. - */ -static bool cpuinfo_loongarch_linux_detect_cluster_prid_by_chipset( - const struct cpuinfo_loongarch_chipset chipset[restrict static 1], - uint32_t clusters_count, - const uint32_t cluster_leaders[restrict static CLUSTERS_MAX], - uint32_t processors_count, - struct cpuinfo_loongarch_linux_processor processors[restrict static processors_count], - bool verify_prid) -{ - if (clusters_count > CLUSTERS_MAX) { - return false; - } - for (uint32_t c = 0; c < CPUINFO_COUNT_OF(cluster_configs); c++) { - if (cluster_configs[c].series != chipset->series) { - continue; - } - /* Verify that the total number of cores and clusters of cores matches expectation */ - if (cluster_configs[c].cores != processors_count || cluster_configs[c].clusters != clusters_count) { - return false; - } - - /* Verify that core cluster configuration matches expectation */ - for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { - const uint32_t cluster_leader = cluster_leaders[cluster]; - if (cluster_configs[c].cluster_cores[cluster] != processors[cluster_leader].package_processor_count) { - return false; - } - } - - if (verify_prid) { - /* Verify known parts of PRID */ - for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { - const uint32_t cluster_leader = cluster_leaders[cluster]; - - /* Create a mask of known prid bits */ - uint32_t prid_mask = 0; - if (processors[cluster_leader].flags & CPUINFO_LOONGARCH_LINUX_VALID_REVISION) { - prid_mask |= CPUINFO_LOONGARCH_PRID_PRODUCT_MASK; - } - - /* Verify the bits under the mask */ - if ((processors[cluster_leader].prid ^ cluster_configs[c].cluster_prid[cluster]) & prid_mask) { - cpuinfo_log_debug("parsed PRID of cluster %08"PRIu32" does not match tabulated value %08"PRIu32, - processors[cluster_leader].prid, cluster_configs[c].cluster_prid[cluster]); - return false; - } - } - } - - /* Assign PRIDs according to tabulated configurations */ - for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { - const uint32_t cluster_leader = cluster_leaders[cluster]; - processors[cluster_leader].prid = cluster_configs[c].cluster_prid[cluster]; - processors[cluster_leader].flags |= CPUINFO_LOONGARCH_LINUX_VALID_PRID; - cpuinfo_log_debug("cluster %"PRIu32" PRID = 0x%08"PRIx32, cluster, cluster_configs[c].cluster_prid[cluster]); - } - return true; - } - return false; -} - - -/* - * Initializes PRID for leaders of core clusters in a single sequential scan: - * - Clusters preceding the first reported PRID value are assumed to have default PRID value. - * - Clusters following any reported PRID value to have that PRID value. - * - * @param default_prid - PRID value that will be assigned to cluster leaders preceding any reported PRID value. - * @param processors_count - number of logical processor descriptions in the @p processors array. - * @param[in,out] processors - array of logical processor descriptions with pre-parsed PRID, maximum frequency, - * and decoded core cluster (package_leader_id) information. - * Upon successful return, processors[i].prid for all core clusters' leaders contains - * the assigned PRID value. - */ -static void cpuinfo_loongarch_linux_detect_cluster_prid_by_sequential_scan( - uint32_t default_prid, - uint32_t processors_count, - struct cpuinfo_loongarch_linux_processor processors[restrict static processors_count]) -{ - uint32_t prid = default_prid; - for (uint32_t i = 0; i < processors_count; i++) { - if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { - if (processors[i].package_leader_id == i) { - if (bitmask_all(processors[i].flags,CPUINFO_LOONGARCH_LINUX_VALID_PRID)) { - prid = processors[i].prid; - } else { - cpuinfo_log_info("assume processor %"PRIu32" to have PRID %08"PRIx32, i, prid); - /* To be consistent, we copy the PRID entirely, rather than by parts */ - processors[i].prid = prid; - processors[i].flags |=CPUINFO_LOONGARCH_LINUX_VALID_PRID; - } - } - } - } -} - -/* - * Detects PRID of each CPU core clusters' leader. - * - * @param[in] chipset - chipset (SoC) name information. - * @param max_processors - number of processor descriptions in the @p processors array. - * @param usable_processors - number of processor descriptions in the @p processors array with both POSSIBLE and - * PRESENT flags. - * @param[in,out] processors - array of logical processor descriptions with pre-parsed PRID, maximum frequency, - * and decoded core cluster (package_leader_id) information. - * Upon return, processors[i].prid for all clusters' leaders contains the PRID value. - * - * @returns The number of core clusters - */ -uint32_t cpuinfo_loongarch_linux_detect_cluster_prid( - const struct cpuinfo_loongarch_chipset chipset[restrict static 1], - uint32_t max_processors, - uint32_t usable_processors, - struct cpuinfo_loongarch_linux_processor processors[restrict static max_processors]) -{ - uint32_t clusters_count = 0; - uint32_t cluster_leaders[CLUSTERS_MAX]; - uint32_t last_processor_in_cpuinfo = max_processors; - uint32_t last_processor_with_prid = max_processors; - uint32_t processors_with_prid_count = 0; - for (uint32_t i = 0; i < max_processors; i++) { - if (!bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { - continue; - } - if (processors[i].flags & CPUINFO_LOONGARCH_LINUX_VALID_PROCESSOR) { - last_processor_in_cpuinfo = i; - } - - const uint32_t group_leader = processors[i].package_leader_id; - if (group_leader == i) { - if (clusters_count < CLUSTERS_MAX) { - cluster_leaders[clusters_count] = i; - } - clusters_count += 1; - } else { - /* Copy known bits of information to cluster leader */ - if (!bitmask_all(processors[group_leader].flags,CPUINFO_LOONGARCH_LINUX_VALID_PRID) && - bitmask_all(processors[i].flags,CPUINFO_LOONGARCH_LINUX_VALID_PRID)) - { - processors[group_leader].prid = processors[i].prid; - processors[group_leader].flags |=CPUINFO_LOONGARCH_LINUX_VALID_PRID; - } - } - } - cpuinfo_log_debug("detected %"PRIu32" core clusters", clusters_count); - - /* - * Two relations between reported /proc/cpuinfo information, and cores is possible: - * - /proc/cpuinfo reports information for all or some of the cores below the corresponding - * "processor : " lines. Information on offline cores may be missing. - * - /proc/cpuinfo reports information only once, after all "processor : " lines. - * The reported information may relate to processor #0 or to the processor which - * executed the system calls to read /proc/cpuinfo. It is also indistinguishable - * from /proc/cpuinfo reporting information only for the last core (e.g. if all other - * cores are offline). - * - * We detect the second case by checking if /proc/cpuinfo contains valid PRID only for one, - * last reported, processor. Note, that the last reported core may be not the last - * present & possible processor, as /proc/cpuinfo may non-report high-index offline cores. - */ - - if (processors_with_prid_count < usable_processors) { - /* - * /proc/cpuinfo reported PRID only for some processors, and probably some core clusters do not have PRID - * for any of the cores. Check if this is the case. - */ - uint32_t clusters_with_prid_count = 0; - for (uint32_t i = 0; i < max_processors; i++) { - if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID |CPUINFO_LOONGARCH_LINUX_VALID_PRID)) { - if (processors[i].package_leader_id == i) { - clusters_with_prid_count += 1; - } - } - } - - if (clusters_with_prid_count < clusters_count) { - /* - * /proc/cpuinfo reported PRID only for some clusters, need to reconstruct others. - * We make three attempts to detect PRID for clusters without it: - * 1. Search tabulated PRID values for chipsets which have heterogeneous clusters and ship with Linux - * kernels which do not always report all cores in /proc/cpuinfo. If found, use the tabulated values. - * 2. For systems with 2 clusters and PRID known for one cluster, assume big.LITTLE configuration, - * and estimate PRID for the other cluster under assumption that PRID for the big cluster is known. - * 3. Initialize PRIDs for core clusters in a single sequential scan: - * - Clusters preceding the first reported PRID value are assumed to have the last reported PRID value. - * - Clusters following any reported PRID value to have that PRID value. - */ - - if (cpuinfo_loongarch_linux_detect_cluster_prid_by_chipset( - chipset, clusters_count, cluster_leaders, usable_processors, processors, true)) - { - return clusters_count; - } - - if (last_processor_with_prid != max_processors) { - /* Fall back to sequential initialization of PRID values for core clusters */ - cpuinfo_loongarch_linux_detect_cluster_prid_by_sequential_scan( - processors[processors[last_processor_with_prid].package_leader_id].prid, - max_processors, processors); - } - } - } - return clusters_count; -}